4 Commits

Author SHA1 Message Date
eugenefischer
4099ec2623 update ToDos 2025-04-15 15:50:55 -05:00
eugenefischer
7744586e79 change frequency of garbage collection requests 2025-04-10 20:07:34 -05:00
eugenefischer
83eff0d1e7 remove output to stdout that was added for testing 2025-04-10 15:08:33 -05:00
eugenefischer
d1810c453d Even more efficient graph creation (my initial scheme, but this time without accidentally changing what's in the sequence records) 2025-04-10 15:03:10 -05:00
2 changed files with 18 additions and 28 deletions

View File

@@ -634,7 +634,10 @@ a means of exploring some very beautiful math.
## TODO
* Update CLI option text in this readme to include Zipf distribution options
* Consider whether a graph database might be a better option than keeping things in memory.
* Look at fastUtil for more performant maps and arrays. Note that there is an optional jGraphT library to work with fastUtil (see FastutilMapIntVertexGraph, for example).
* Consider implementing an option to use the jGrapht sparse graph representation for a lower memory cost with very large graphs (tens or hundreds of thousands of distinct sequences).
* ~~Update CLI option text in this readme to include Zipf distribution options~~
* ~~Try invoking GC at end of workloads to reduce paging to disk~~ DONE
* ~~Hold graph data in memory until another graph is read-in? ABANDONED UNABANDONED~~ DONE
* ~~*No, this won't work, because BiGpairSEQ simulations alter the underlying graph based on filtering constraints. Changes would cascade with multiple experiments.*~~

View File

@@ -1,3 +1,4 @@
import org.jgrapht.Graphs;
import org.jgrapht.alg.interfaces.MatchingAlgorithm;
import org.jgrapht.alg.matching.MaximumWeightBipartiteMatching;
import org.jgrapht.graph.DefaultWeightedEdge;
@@ -147,39 +148,25 @@ public class Simulator implements GraphModificationFunctions {
vertexLabelValue++;
}
betaVertices.forEach(graph::addVertex);
//add edges
//add edges (best so far)
int edgesAddedCount = 0;
for(Vertex a: alphaVertices) {
Set<Integer> a_wells = a.getRecord().getWells();
for(Vertex b: betaVertices) {
double weight = 0.0;
for (Integer well: a.getRecord().getWells()) {
if (b.getRecord().isInWell(well)) {
weight += 1.0;
}
Set<Integer> sharedWells = new HashSet<>(a_wells);
sharedWells.retainAll(b.getRecord().getWells());
if (!sharedWells.isEmpty()) {
Graphs.addEdge(graph, a, b, (double) sharedWells.size());
}
if (weight != 0.0) {
DefaultWeightedEdge edge = graph.addEdge(a, b);
graph.setEdgeWeight(edge, weight);
edgesAddedCount++;
if (edgesAddedCount % 10000000 == 0) { //collect garbage every 10,000,000 edges
System.out.println(edgesAddedCount + " edges added");
//request garbage collection
System.gc();
System.out.println("Garbage collection requested");
}
}
}
// //add edges
// for(Vertex a: alphaVertices) {
// for(Integer well: a.getRecord().getWells()) {
// for (Vertex b: betaVertices) {
// if (b.getRecord().isInWell(well)) {
// DefaultWeightedEdge edge = graph.getEdge(a, b);
// if (edge == null) {
// edge = graph.addEdge(a, b);
// graph.setEdgeWeight(edge, 1.0);
// }
// else {
// double weight = graph.getEdgeWeight(edge);
// graph.setEdgeWeight(edge, weight + 1.0);
// }
// }
// }
// }
// }
if(verbose){System.out.println("Graph created");}
//stop timing
Instant stop = Instant.now();