Implemented storing graphs in memory for multiple pairing experiments

This commit is contained in:
2022-02-22 21:30:00 -06:00
parent fd2ec76b71
commit 68ee9e4bb6
6 changed files with 208 additions and 99 deletions

View File

@@ -1,3 +1,4 @@
import org.jgrapht.Graph;
import org.jgrapht.alg.interfaces.MatchingAlgorithm;
import org.jgrapht.alg.matching.MaximumWeightBipartiteMatching;
import org.jgrapht.generate.SimpleWeightedBipartiteGraphMatrixGenerator;
@@ -49,6 +50,7 @@ public class Simulator {
Instant start = Instant.now();
int[] alphaIndex = {cdr3AlphaIndex};
int[] betaIndex = {cdr3BetaIndex};
int numWells = samplePlate.getSize();
if(verbose){System.out.println("Making cell maps");}
@@ -63,15 +65,11 @@ public class Simulator {
if(verbose){System.out.println("All alphas count: " + alphaCount);}
int betaCount = allBetas.size();
if(verbose){System.out.println("All betas count: " + betaCount);}
if(verbose){System.out.println("Well maps made");}
//Remove saturating-occupancy sequences because they have no signal value.
//Remove sequences with total occupancy below minimum pair overlap threshold
if(verbose){System.out.println("Removing sequences present in all wells.");}
//if(verbose){System.out.println("Removing sequences with occupancy below the minimum overlap threshold");}
filterByOccupancyThreshold(allAlphas, 1, numWells - 1);
filterByOccupancyThreshold(allBetas, 1, numWells - 1);
filterByOccupancyThresholds(allAlphas, 1, numWells - 1);
filterByOccupancyThresholds(allBetas, 1, numWells - 1);
if(verbose){System.out.println("Sequences removed");}
int pairableAlphaCount = allAlphas.size();
if(verbose){System.out.println("Remaining alphas count: " + pairableAlphaCount);}
@@ -136,6 +134,7 @@ public class Simulator {
GraphWithMapData output = new GraphWithMapData(graph, numWells, samplePlate.getPopulations(), alphaCount, betaCount,
distCellsMapAlphaKey, plateVtoAMap, plateVtoBMap, plateAtoVMap,
plateBtoVMap, alphaWellCounts, betaWellCounts, time);
//Set source file name in graph to name of sample plate
output.setSourceFilename(samplePlate.getSourceFileName());
//return GraphWithMapData object
return output;
@@ -146,6 +145,8 @@ public class Simulator {
Integer highThreshold, Integer maxOccupancyDifference,
Integer minOverlapPercent, boolean verbose) {
Instant start = Instant.now();
//Integer arrays will contain TO VERTEX, FROM VERTEX, and WEIGHT (which I'll need to cast to double)
List<Integer[]> removedEdges = new ArrayList<>();
int numWells = data.getNumWells();
Integer alphaCount = data.getAlphaCount();
Integer betaCount = data.getBetaCount();
@@ -156,24 +157,26 @@ public class Simulator {
Map<Integer, Integer> betaWellCounts = data.getBetaWellCounts();
SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph = data.getGraph();
//remove weights outside given overlap thresholds
//remove edges with weights outside given overlap thresholds, add those to removed edge list
if(verbose){System.out.println("Eliminating edges with weights outside overlap threshold values");}
filterByOccupancyThreshold(graph, lowThreshold, highThreshold);
if(verbose){System.out.println("Over- and under-weight edges set to 0.0");}
removedEdges.addAll(GraphModificationFunctions.filterByOverlapThresholds(graph, lowThreshold, highThreshold));
if(verbose){System.out.println("Over- and under-weight edges removed");}
//Filter by overlap size
//remove edges between vertices with too small an overlap size, add those to removed edge list
if(verbose){System.out.println("Eliminating edges with weights less than " + minOverlapPercent.toString() +
" percent of vertex occupancy value.");}
filterByOverlapSize(graph, alphaWellCounts, betaWellCounts, plateVtoAMap, plateVtoBMap, minOverlapPercent);
if(verbose){System.out.println("Edges with weights too far below vertex occupancy values set to 0.0");}
removedEdges.addAll(GraphModificationFunctions.filterByOverlapPercent(graph, alphaWellCounts, betaWellCounts,
plateVtoAMap, plateVtoBMap, minOverlapPercent));
if(verbose){System.out.println("Edges with weights too far below a vertex occupancy value removed");}
//Filter by relative occupancy
if(verbose){System.out.println("Eliminating edges between vertices with occupancy difference > "
+ maxOccupancyDifference);}
filterByRelativeOccupancy(graph, alphaWellCounts, betaWellCounts, plateVtoAMap, plateVtoBMap,
maxOccupancyDifference);
removedEdges.addAll(GraphModificationFunctions.filterByRelativeOccupancy(graph, alphaWellCounts, betaWellCounts,
plateVtoAMap, plateVtoBMap, maxOccupancyDifference));
if(verbose){System.out.println("Edges between vertices of with excessively different occupancy values " +
"set to 0.0");}
"removed");}
//Find Maximum Weighted Matching
//using jheaps library class PairingHeap for improved efficiency
if(verbose){System.out.println("Finding maximum weighted matching");}
@@ -239,8 +242,10 @@ public class Simulator {
//Metadata comments for CSV file
int min = Math.min(alphaCount, betaCount);
//rate of attempted matching
double attemptRate = (double) (trueCount + falseCount) / min;
BigDecimal attemptRateTrunc = new BigDecimal(attemptRate, mc);
//rate of pairing error
double pairingErrorRate = (double) falseCount / (trueCount + falseCount);
BigDecimal pairingErrorRateTrunc = new BigDecimal(pairingErrorRate, mc);
//get list of well concentrations
@@ -272,13 +277,19 @@ public class Simulator {
metadata.put("incorrect pairing count", Integer.toString(falseCount));
metadata.put("pairing error rate", pairingErrorRateTrunc.toString());
metadata.put("simulation time", nf.format(time.toSeconds()));
MatchingResult output = new MatchingResult(data.getSourceFilename(), metadata, header, allResults, matchMap, time);
//create MatchingResult object
MatchingResult output = new MatchingResult(metadata, header, allResults, matchMap, time);
if(verbose){
for(String s: output.getComments()){
System.out.println(s);
}
}
//put the removed edges back on the graph
System.out.println("Restoring removed edges to graph.");
GraphModificationFunctions.addRemovedEdges(graph, removedEdges);
//return MatchingResult object
return output;
}
@@ -587,6 +598,18 @@ public class Simulator {
// return output;
// }
//Remove sequences based on occupancy
public static void filterByOccupancyThresholds(Map<Integer, Integer> wellMap, int low, int high){
List<Integer> noise = new ArrayList<>();
for(Integer k: wellMap.keySet()){
if((wellMap.get(k) > high) || (wellMap.get(k) < low)){
noise.add(k);
}
}
for(Integer k: noise) {
wellMap.remove(k);
}
}
//Counts the well occupancy of the row peptides and column peptides into given maps, and
//fills weights in the given 2D array
@@ -630,62 +653,6 @@ public class Simulator {
}
}
private static void filterByOccupancyThreshold(SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph,
int low, int high) {
for(DefaultWeightedEdge e: graph.edgeSet()){
if ((graph.getEdgeWeight(e) > high) || (graph.getEdgeWeight(e) < low)){
graph.setEdgeWeight(e, 0.0);
}
}
}
private static void filterByOccupancyThreshold(Map<Integer, Integer> wellMap, int low, int high){
List<Integer> noise = new ArrayList<>();
for(Integer k: wellMap.keySet()){
if((wellMap.get(k) > high) || (wellMap.get(k) < low)){
noise.add(k);
}
}
for(Integer k: noise) {
wellMap.remove(k);
}
}
//Remove edges for pairs with large occupancy discrepancy
private static void filterByRelativeOccupancy(SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph,
Map<Integer, Integer> alphaWellCounts,
Map<Integer, Integer> betaWellCounts,
Map<Integer, Integer> plateVtoAMap,
Map<Integer, Integer> plateVtoBMap,
Integer maxOccupancyDifference) {
for (DefaultWeightedEdge e : graph.edgeSet()) {
Integer alphaOcc = alphaWellCounts.get(plateVtoAMap.get(graph.getEdgeSource(e)));
Integer betaOcc = betaWellCounts.get(plateVtoBMap.get(graph.getEdgeTarget(e)));
//Adjust this to something cleverer later
if (Math.abs(alphaOcc - betaOcc) >= maxOccupancyDifference) {
graph.setEdgeWeight(e, 0.0);
}
}
}
//Remove edges for pairs where overlap size is significantly lower than the well occupancy
private static void filterByOverlapSize(SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph,
Map<Integer, Integer> alphaWellCounts,
Map<Integer, Integer> betaWellCounts,
Map<Integer, Integer> plateVtoAMap,
Map<Integer, Integer> plateVtoBMap,
Integer minOverlapPercent) {
for (DefaultWeightedEdge e : graph.edgeSet()) {
Integer alphaOcc = alphaWellCounts.get(plateVtoAMap.get(graph.getEdgeSource(e)));
Integer betaOcc = betaWellCounts.get(plateVtoBMap.get(graph.getEdgeTarget(e)));
double weight = graph.getEdgeWeight(e);
double min = minOverlapPercent / 100.0;
if ((weight / alphaOcc < min) || (weight / betaOcc < min)) {
graph.setEdgeWeight(e, 0.0);
}
}
}
private static Map<Integer, Integer> makeSequenceToSequenceMap(List<Integer[]> cells, int keySequenceIndex,
int valueSequenceIndex){
Map<Integer, Integer> keySequenceToValueSequenceMap = new HashMap<>();