8 Commits

Author SHA1 Message Date
eugenefischer
4099ec2623 update ToDos 2025-04-15 15:50:55 -05:00
eugenefischer
7744586e79 change frequency of garbage collection requests 2025-04-10 20:07:34 -05:00
eugenefischer
83eff0d1e7 remove output to stdout that was added for testing 2025-04-10 15:08:33 -05:00
eugenefischer
d1810c453d Even more efficient graph creation (my initial scheme, but this time without accidentally changing what's in the sequence records) 2025-04-10 15:03:10 -05:00
eugenefischer
187401f2d6 More efficient graph creation 2025-04-10 14:06:11 -05:00
eugenefischer
678ce99424 iterate over vertex wells correctly 2025-04-10 13:34:04 -05:00
eugenefischer
c21e375303 fix concurrent modification bug 2025-04-10 13:33:47 -05:00
eugenefischer
57fe9c1619 Update graph modification functions to work with edges directly 2025-04-10 12:42:19 -05:00
4 changed files with 60 additions and 73 deletions

View File

@@ -634,7 +634,10 @@ a means of exploring some very beautiful math.
## TODO
* Update CLI option text in this readme to include Zipf distribution options
* Consider whether a graph database might be a better option than keeping things in memory.
* Look at fastUtil for more performant maps and arrays. Note that there is an optional jGraphT library to work with fastUtil (see FastutilMapIntVertexGraph, for example).
* Consider implementing an option to use the jGrapht sparse graph representation for a lower memory cost with very large graphs (tens or hundreds of thousands of distinct sequences).
* ~~Update CLI option text in this readme to include Zipf distribution options~~
* ~~Try invoking GC at end of workloads to reduce paging to disk~~ DONE
* ~~Hold graph data in memory until another graph is read-in? ABANDONED UNABANDONED~~ DONE
* ~~*No, this won't work, because BiGpairSEQ simulations alter the underlying graph based on filtering constraints. Changes would cascade with multiple experiments.*~~

View File

@@ -13,7 +13,7 @@ public class BiGpairSEQ {
private static boolean cacheCells = false;
private static boolean cachePlate = false;
private static boolean cacheGraph = false;
private static AlgorithmType matchingAlgoritmType = AlgorithmType.HUNGARIAN;
private static AlgorithmType matchingAlgorithmType = AlgorithmType.HUNGARIAN;
private static HeapType priorityQueueHeapType = HeapType.PAIRING;
private static DistributionType distributionType = DistributionType.ZIPF;
private static boolean outputBinary = true;
@@ -166,13 +166,13 @@ public class BiGpairSEQ {
return priorityQueueHeapType;
}
public static AlgorithmType getMatchingAlgoritmType() { return matchingAlgoritmType; }
public static AlgorithmType getMatchingAlgorithmType() { return matchingAlgorithmType; }
public static void setHungarianAlgorithm() { matchingAlgoritmType = AlgorithmType.HUNGARIAN; }
public static void setHungarianAlgorithm() { matchingAlgorithmType = AlgorithmType.HUNGARIAN; }
public static void setIntegerWeightScalingAlgorithm() { matchingAlgoritmType = AlgorithmType.INTEGER_WEIGHT_SCALING; }
public static void setIntegerWeightScalingAlgorithm() { matchingAlgorithmType = AlgorithmType.INTEGER_WEIGHT_SCALING; }
public static void setAuctionAlgorithm() { matchingAlgoritmType = AlgorithmType.AUCTION; }
public static void setAuctionAlgorithm() { matchingAlgorithmType = AlgorithmType.AUCTION; }
public static void setPairingHeap() {
priorityQueueHeapType = HeapType.PAIRING;

View File

@@ -1,72 +1,54 @@
import org.jgrapht.graph.DefaultWeightedEdge;
import org.jgrapht.graph.SimpleWeightedGraph;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.*;
public interface GraphModificationFunctions {
//remove over- and under-weight edges, return removed edges
static Map<Vertex[], Integer> filterByOverlapThresholds(SimpleWeightedGraph<Vertex, DefaultWeightedEdge> graph,
static Map<DefaultWeightedEdge, Vertex[]> filterByOverlapThresholds(SimpleWeightedGraph<Vertex, DefaultWeightedEdge> graph,
int low, int high, boolean saveEdges) {
Map<Vertex[], Integer> removedEdges = new HashMap<>();
Map<DefaultWeightedEdge, Vertex[]> removedEdges = new HashMap<>();
Set<DefaultWeightedEdge> edgesToRemove = new HashSet<>();
for (DefaultWeightedEdge e : graph.edgeSet()) {
if ((graph.getEdgeWeight(e) > high) || (graph.getEdgeWeight(e) < low)) {
if(saveEdges) {
Vertex source = graph.getEdgeSource(e);
Vertex target = graph.getEdgeTarget(e);
Integer weight = (int) graph.getEdgeWeight(e);
Vertex[] edge = {source, target};
removedEdges.put(edge, weight);
Vertex[] vertices = {graph.getEdgeSource(e), graph.getEdgeTarget(e)};
removedEdges.put(e, vertices);
}
else {
graph.setEdgeWeight(e, 0.0);
}
}
}
if(saveEdges) {
for (Vertex[] edge : removedEdges.keySet()) {
graph.removeEdge(edge[0], edge[1]);
edgesToRemove.add(e);
}
}
edgesToRemove.forEach(graph::removeEdge);
return removedEdges;
}
//Remove edges for pairs with large occupancy discrepancy, return removed edges
static Map<Vertex[], Integer> filterByRelativeOccupancy(SimpleWeightedGraph<Vertex, DefaultWeightedEdge> graph,
static Map<DefaultWeightedEdge, Vertex[]> filterByRelativeOccupancy(SimpleWeightedGraph<Vertex, DefaultWeightedEdge> graph,
Integer maxOccupancyDifference, boolean saveEdges) {
Map<Vertex[], Integer> removedEdges = new HashMap<>();
Map<DefaultWeightedEdge, Vertex[]> removedEdges = new HashMap<>();
Set<DefaultWeightedEdge> edgesToRemove = new HashSet<>();
for (DefaultWeightedEdge e : graph.edgeSet()) {
Integer alphaOcc = graph.getEdgeSource(e).getOccupancy();
Integer betaOcc = graph.getEdgeTarget(e).getOccupancy();
if (Math.abs(alphaOcc - betaOcc) >= maxOccupancyDifference) {
if (saveEdges) {
Vertex source = graph.getEdgeSource(e);
Vertex target = graph.getEdgeTarget(e);
Integer weight = (int) graph.getEdgeWeight(e);
Vertex[] edge = {source, target};
removedEdges.put(edge, weight);
Vertex[] vertices = {graph.getEdgeSource(e), graph.getEdgeTarget(e)};
removedEdges.put(e, vertices);
}
else {
graph.setEdgeWeight(e, 0.0);
}
}
}
if(saveEdges) {
for (Vertex[] edge : removedEdges.keySet()) {
graph.removeEdge(edge[0], edge[1]);
edgesToRemove.add(e);
}
}
edgesToRemove.forEach(graph::removeEdge);
return removedEdges;
}
//Remove edges for pairs where overlap size is significantly lower than the well occupancy, return removed edges
static Map<Vertex[], Integer> filterByOverlapPercent(SimpleWeightedGraph<Vertex, DefaultWeightedEdge> graph,
static Map<DefaultWeightedEdge, Vertex[]> filterByOverlapPercent(SimpleWeightedGraph<Vertex, DefaultWeightedEdge> graph,
Integer minOverlapPercent,
boolean saveEdges) {
Map<Vertex[], Integer> removedEdges = new HashMap<>();
Map<DefaultWeightedEdge, Vertex[]> removedEdges = new HashMap<>();
Set<DefaultWeightedEdge> edgesToRemove = new HashSet<>();
for (DefaultWeightedEdge e : graph.edgeSet()) {
Integer alphaOcc = graph.getEdgeSource(e).getOccupancy();
Integer betaOcc = graph.getEdgeTarget(e).getOccupancy();
@@ -74,22 +56,13 @@ public interface GraphModificationFunctions {
double min = minOverlapPercent / 100.0;
if ((weight / alphaOcc < min) || (weight / betaOcc < min)) {
if (saveEdges) {
Vertex source = graph.getEdgeSource(e);
Vertex target = graph.getEdgeTarget(e);
Integer intWeight = (int) graph.getEdgeWeight(e);
Vertex[] edge = {source, target};
removedEdges.put(edge, intWeight);
Vertex[] vertices = {graph.getEdgeSource(e), graph.getEdgeTarget(e)};
removedEdges.put(e, vertices);
}
else {
graph.setEdgeWeight(e, 0.0);
}
}
}
if(saveEdges) {
for (Vertex[] edge : removedEdges.keySet()) {
graph.removeEdge(edge[0], edge[1]);
edgesToRemove.add(e);
}
}
edgesToRemove.forEach(graph::removeEdge);
return removedEdges;
}
@@ -126,10 +99,10 @@ public interface GraphModificationFunctions {
}
static void addRemovedEdges(SimpleWeightedGraph<Vertex, DefaultWeightedEdge> graph,
Map<Vertex[], Integer> removedEdges) {
for (Vertex[] edge : removedEdges.keySet()) {
DefaultWeightedEdge e = graph.addEdge(edge[0], edge[1]);
graph.setEdgeWeight(e, removedEdges.get(edge));
Map<DefaultWeightedEdge, Vertex[]> removedEdges) {
for (DefaultWeightedEdge edge : removedEdges.keySet()) {
Vertex[] vertices = removedEdges.get(edge);
graph.addEdge(vertices[0], vertices[1], edge);
}
}

View File

@@ -1,9 +1,8 @@
import org.jgrapht.Graphs;
import org.jgrapht.alg.interfaces.MatchingAlgorithm;
import org.jgrapht.alg.matching.MaximumWeightBipartiteMatching;
import org.jgrapht.generate.SimpleWeightedBipartiteGraphMatrixGenerator;
import org.jgrapht.graph.DefaultWeightedEdge;
import org.jgrapht.graph.SimpleWeightedGraph;
import org.jheaps.tree.FibonacciHeap;
import org.jheaps.tree.PairingHeap;
import java.math.BigDecimal;
@@ -149,18 +148,25 @@ public class Simulator implements GraphModificationFunctions {
vertexLabelValue++;
}
betaVertices.forEach(graph::addVertex);
//add edges
for(Vertex alpha: alphaVertices){
for(Vertex beta: betaVertices) {
Set<Integer> sharedWells = alpha.getRecord().getWells();
sharedWells.retainAll(beta.getRecord().getWells());
if(!sharedWells.isEmpty()) {
DefaultWeightedEdge edge = graph.addEdge(alpha, beta);
graph.setEdgeWeight(edge, sharedWells.size());
//add edges (best so far)
int edgesAddedCount = 0;
for(Vertex a: alphaVertices) {
Set<Integer> a_wells = a.getRecord().getWells();
for(Vertex b: betaVertices) {
Set<Integer> sharedWells = new HashSet<>(a_wells);
sharedWells.retainAll(b.getRecord().getWells());
if (!sharedWells.isEmpty()) {
Graphs.addEdge(graph, a, b, (double) sharedWells.size());
}
edgesAddedCount++;
if (edgesAddedCount % 10000000 == 0) { //collect garbage every 10,000,000 edges
System.out.println(edgesAddedCount + " edges added");
//request garbage collection
System.gc();
System.out.println("Garbage collection requested");
}
}
}
if(verbose){System.out.println("Graph created");}
//stop timing
Instant stop = Instant.now();
@@ -184,7 +190,7 @@ public class Simulator implements GraphModificationFunctions {
Integer minOverlapPercent, boolean verbose, boolean calculatePValue) {
Instant start = Instant.now();
SimpleWeightedGraph<Vertex, DefaultWeightedEdge> graph = data.getGraph();
Map<Vertex[], Integer> removedEdges = new HashMap<>();
Map<DefaultWeightedEdge, Vertex[]> removedEdges = new HashMap<>();
boolean saveEdges = BiGpairSEQ.cacheGraph();
int numWells = data.getNumWells();
//Integer alphaCount = data.getAlphaCount();
@@ -202,6 +208,7 @@ public class Simulator implements GraphModificationFunctions {
}
Integer graphAlphaCount = alphas.size();
Integer graphBetaCount = betas.size();
Integer graphEdgeCount = graph.edgeSet().size();
//remove edges with weights outside given overlap thresholds, add those to removed edge list
if(verbose){System.out.println("Eliminating edges with weights outside overlap threshold values");}
@@ -221,12 +228,14 @@ public class Simulator implements GraphModificationFunctions {
if(verbose){System.out.println("Edges between vertices of with excessively different occupancy values " +
"removed");}
Integer filteredGraphEdgeCount = graph.edgeSet().size();
//Find Maximum Weight Matching
if(verbose){System.out.println("Finding maximum weight matching");}
//The matching object
MatchingAlgorithm<Vertex, DefaultWeightedEdge> maxWeightMatching;
//Determine algorithm type
AlgorithmType algorithm = BiGpairSEQ.getMatchingAlgoritmType();
AlgorithmType algorithm = BiGpairSEQ.getMatchingAlgorithmType();
switch (algorithm) { //Only two options now, but I have room to add more algorithms in the future this way
case AUCTION -> {
//create a new MaximumIntegerWeightBipartiteAuctionMatching
@@ -372,8 +381,10 @@ public class Simulator implements GraphModificationFunctions {
metadata.put("real sequence collision rate", data.getRealSequenceCollisionRate().toString());
metadata.put("total alphas read from plate", data.getAlphaCount().toString());
metadata.put("total betas read from plate", data.getBetaCount().toString());
metadata.put("initial edges in graph", graphEdgeCount.toString());
metadata.put("alphas in graph (after pre-filtering)", graphAlphaCount.toString());
metadata.put("betas in graph (after pre-filtering)", graphBetaCount.toString());
metadata.put("final edges in graph (after pre-filtering)", filteredGraphEdgeCount.toString());
metadata.put("high overlap threshold for pairing", highThreshold.toString());
metadata.put("low overlap threshold for pairing", lowThreshold.toString());
metadata.put("minimum overlap percent for pairing", minOverlapPercent.toString());