Refactoring to allow graphs from file

This commit is contained in:
2022-02-19 17:23:55 -06:00
parent cfa473c7ce
commit 568a6be3c7
8 changed files with 730 additions and 244 deletions

View File

@@ -45,6 +45,244 @@ public class Simulator {
return new CellSample(distinctCells, cdr1Freq);
}
// Version that reads in a graph? Possibly should just separate graph-making into its own function
// public static MatchingResult matchCDR3s(List<Integer[]> distinctCells,
// Plate samplePlate, Integer lowThreshold,
// Integer highThreshold, Integer maxOccupancyDifference,
// Integer minOverlapPercent, boolean verbose, boolean importGraph,
// SimpleWeightedGraph graph){
//
// }
//Make the maps needed for matching CDR3s
public static MapData makeMaps(List<Integer[]> distinctCells,
Plate samplePlate, Integer lowThreshold, boolean verbose) {
Instant start = Instant.now();
int numWells = samplePlate.getSize();
int[] alphaIndex = {cdr3AlphaIndex};
int[] betaIndex = {cdr3BetaIndex};
if(verbose){System.out.println("Making cell maps");}
//HashMap keyed to Alphas, values Betas
Map<Integer, Integer> distCellsMapAlphaKey = makePeptideToPeptideMap(distinctCells, 0, 1);
if(verbose){System.out.println("Cell maps made");}
if(verbose){System.out.println("Making well maps");}
Map<Integer, Integer> allAlphas = samplePlate.assayWellsPeptideP(alphaIndex);
Map<Integer, Integer> allBetas = samplePlate.assayWellsPeptideP(betaIndex);
int alphaCount = allAlphas.size();
if(verbose){System.out.println("all alphas count: " + alphaCount);}
int betaCount = allBetas.size();
if(verbose){System.out.println("all betas count: " + betaCount);}
if(verbose){System.out.println("Well maps made");}
//Remove saturating-occupancy peptides because they have no signal value.
//Remove below-minimum-overlap-threshold peptides because they can't possibly have an overlap with another
//peptide that's above the threshold.
if(verbose){System.out.println("Removing peptides present in all wells.");}
if(verbose){System.out.println("Removing peptides with occupancy below the minimum overlap threshold");}
filterByOccupancyThreshold(allAlphas, lowThreshold, numWells - 1);
filterByOccupancyThreshold(allBetas, lowThreshold, numWells - 1);
if(verbose){System.out.println("Peptides removed");}
int pairableAlphaCount = allAlphas.size();
if(verbose){System.out.println("Remaining alpha count: " + pairableAlphaCount);}
int pairableBetaCount = allBetas.size();
if(verbose){System.out.println("Remaining beta count: " + pairableBetaCount);}
if(verbose){System.out.println("Making vertex maps");}
//For the SimpleWeightedBipartiteGraphMatrixGenerator, all vertices must have
// distinct numbers associated with them. Since I'm using a 2D array, that means
// distinct indices between the rows and columns. vertexStartValue lets me track where I switch
// from numbering rows to columns, so I can assign unique numbers to every vertex, and then
// subtract the vertexStartValue from betas to use their vertex labels as array indices
Integer vertexStartValue = 0;
//keys are sequential integer vertices, values are alphas
Map<Integer, Integer> plateVtoAMap = makeVertexToPeptideMap(allAlphas, vertexStartValue);
//New start value for vertex to beta map should be one more than final vertex value in alpha map
vertexStartValue += plateVtoAMap.size();
//keys are sequential integers vertices, values are betas
Map<Integer, Integer> plateVtoBMap = makeVertexToPeptideMap(allBetas, vertexStartValue);
//keys are alphas, values are sequential integer vertices from previous map
Map<Integer, Integer> plateAtoVMap = invertVertexMap(plateVtoAMap);
//keys are betas, values are sequential integer vertices from previous map
Map<Integer, Integer> plateBtoVMap = invertVertexMap(plateVtoBMap);
if(verbose){System.out.println("Vertex maps made");}
Instant stop = Instant.now();
Duration time = Duration.between(start, stop);
return new MapData(distCellsMapAlphaKey, allAlphas, allBetas, plateVtoAMap, plateVtoBMap, plateAtoVMap,
plateBtoVMap, time);
}
//Make the graph needed for matching CDR3s
public static GraphWithMapData makeGraph(Plate samplePlate, MapData maps, Integer lowThreshold,
Integer highThreshold, boolean verbose) {
int[] alphaIndex = {cdr3AlphaIndex};
int[] betaIndex = {cdr3BetaIndex};
Instant start = Instant.now();
Map<Integer, Integer> plateVtoAMap = maps.getPlateVtoAMap();
Map<Integer, Integer> plateVtoBMap = maps.getPlateVtoBMap();
Map<Integer, Integer> plateAtoVMap = maps.getPlateAtoVMap();
Map<Integer, Integer> plateBtoVMap = maps.getPlateBtoVMap();
Map<Integer, Integer> allAlphas = maps.getAllAlphas();
Map<Integer, Integer> allBetas = maps.getAllBetas();
if(verbose){System.out.println("Creating adjacency matrix");}
//Count how many wells each alpha appears in
Map<Integer, Integer> alphaWellCounts = new HashMap<>();
//count how many wells each beta appears in
Map<Integer, Integer> betaWellCounts = new HashMap<>();
//the adjacency matrix to be used by the graph generator
double[][] weights = new double[plateVtoAMap.size()][plateVtoBMap.size()];
countPeptidesAndFillMatrix(samplePlate, allAlphas, allBetas, plateAtoVMap,
plateBtoVMap, alphaIndex, betaIndex, alphaWellCounts, betaWellCounts, weights);
if(verbose){System.out.println("matrix created");}
//create bipartite graph
if(verbose){System.out.println("creating graph");}
//the graph object
SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph =
new SimpleWeightedGraph<>(DefaultWeightedEdge.class);
//the graph generator
SimpleWeightedBipartiteGraphMatrixGenerator graphGenerator = new SimpleWeightedBipartiteGraphMatrixGenerator();
//the list of alpha vertices
List<Integer> alphaVertices = new ArrayList<>(plateVtoAMap.keySet()); //This will work because LinkedHashMap preserves order of entry
graphGenerator.first(alphaVertices);
//the list of beta vertices
List<Integer> betaVertices = new ArrayList<>(plateVtoBMap.keySet());
graphGenerator.second(betaVertices); //This will work because LinkedHashMap preserves order of entry
graphGenerator.weights(weights);
graphGenerator.generateGraph(graph);
if(verbose){System.out.println("Graph created");}
if(verbose){System.out.println("Eliminating edges with weights outside threshold values");}
filterByOccupancyThreshold(graph, lowThreshold, highThreshold);
if(verbose){System.out.println("Over- and under-weight edges set to 0.0");}
Instant stop = Instant.now();
Duration time = Duration.between(start, stop);
time = time.plus(maps.getTime());
return new GraphWithMapData(graph, maps, alphaWellCounts, betaWellCounts, time);
}
//match CDR3s
public static MatchingResult matchCDR3s(Plate samplePlate, GraphWithMapData data, Integer lowThreshold,
Integer highThreshold, Integer maxOccupancyDifference,
Integer minOverlapPercent, boolean verbose) {
Instant start = Instant.now();
int numWells = samplePlate.getSize();
Map<Integer, Integer> distCellsMapAlphaKey = data.getDistCellsMapAlphaKey();
Map<Integer, Integer> plateVtoAMap = data.getPlateVtoAMap();
Map<Integer, Integer> plateVtoBMap = data.getPlateVtoBMap();
Map<Integer, Integer> allAlphas = data.getAllAlphas();
Map<Integer, Integer> allBetas = data.getAllBetas();
Map<Integer, Integer> alphaWellCounts = data.getAlphaWellCounts();
Map<Integer, Integer> betaWellCounts = data.getBetaWellCounts();
Integer alphaCount = allAlphas.size();
Integer betaCount = allBetas.size();
SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph = data.getGraph();
//Find Maximum Weighted Matching
// if(verbose){System.out.println("Finding maximum weighted matching");}
// MaximumWeightBipartiteMatching maxWeightMatching =
// new MaximumWeightBipartiteMatching(graph, plateVtoAMap.keySet(), plateVtoBMap.keySet());
// MatchingAlgorithm.Matching<String, DefaultWeightedEdge> graphMatching = maxWeightMatching.getMatching();
// if(verbose){System.out.println("Matching completed");}
// Instant stop = Instant.now();
//trying with jheaps addressable now to improve performance
if(verbose){System.out.println("Finding maximum weighted matching");}
//Attempting to use addressable heap to improve performance
MaximumWeightBipartiteMatching maxWeightMatching =
new MaximumWeightBipartiteMatching(graph,
plateVtoAMap.keySet(),
plateVtoBMap.keySet(),
i -> new PairingHeap(Comparator.naturalOrder()));
MatchingAlgorithm.Matching<String, DefaultWeightedEdge> graphMatching = maxWeightMatching.getMatching();
if(verbose){System.out.println("Matching completed");}
Instant stop = Instant.now();
//Header for CSV file
List<String> header = new ArrayList<>();
header.add("Alpha");
header.add("Alpha well count");
header.add("Beta");
header.add("Beta well count");
header.add("Overlap well count");
header.add("Matched correctly?");
header.add("P-value");
//Results for csv file
List<List<String>> allResults = new ArrayList<>();
NumberFormat nf = NumberFormat.getInstance(Locale.US);
MathContext mc = new MathContext(3);
Iterator<DefaultWeightedEdge> weightIter = graphMatching.iterator();
DefaultWeightedEdge e;
int trueCount = 0;
int falseCount = 0;
boolean check;
Map<Integer, Integer> matchMap = new HashMap<>();
while(weightIter.hasNext()) {
e = weightIter.next();
Integer source = graph.getEdgeSource(e);
Integer target = graph.getEdgeTarget(e);
//The match map is all matches found, not just true matches!
matchMap.put(plateVtoAMap.get(source), plateVtoBMap.get(target));
check = plateVtoBMap.get(target).equals(distCellsMapAlphaKey.get(plateVtoAMap.get(source)));
if(check) {
trueCount++;
}
else {
falseCount++;
}
List<String> result = new ArrayList<>();
result.add(plateVtoAMap.get(source).toString());
//alpha well count
result.add(alphaWellCounts.get(plateVtoAMap.get(source)).toString());
result.add(plateVtoBMap.get(target).toString());
//beta well count
result.add(betaWellCounts.get(plateVtoBMap.get(target)).toString());
//overlap count
result.add(Double.toString(graph.getEdgeWeight(e)));
result.add(Boolean.toString(check));
double pValue = Equations.pValue(numWells, alphaWellCounts.get(plateVtoAMap.get(source)),
betaWellCounts.get(plateVtoBMap.get(target)), graph.getEdgeWeight(e));
BigDecimal pValueTrunc = new BigDecimal(pValue, mc);
result.add(pValueTrunc.toString());
allResults.add(result);
}
//Metadate comments for CSV file
int min = Math.min(alphaCount, betaCount);
double attemptRate = (double) (trueCount + falseCount) / min;
BigDecimal attemptRateTrunc = new BigDecimal(attemptRate, mc);
double pairingErrorRate = (double) falseCount / (trueCount + falseCount);
BigDecimal pairingErrorRateTrunc = new BigDecimal(pairingErrorRate, mc);
List<String> comments = new ArrayList<>();
comments.add("Total alphas found: " + alphaCount);
comments.add("Total betas found: " + betaCount);
comments.add("High overlap threshold: " + highThreshold);
comments.add("Low overlap threshold: " + lowThreshold);
comments.add("Minimum overlap percent: " + minOverlapPercent);
comments.add("Maximum occupancy difference: " + maxOccupancyDifference);
comments.add("Pairing attempt rate: " + attemptRateTrunc);
comments.add("Correct pairings: " + trueCount);
comments.add("Incorrect pairings: " + falseCount);
comments.add("Pairing error rate: " + pairingErrorRateTrunc);
Duration time = Duration.between(start, stop);
time = time.plus(data.getTime());
comments.add("Simulation time: " + nf.format(time.toSeconds()) + " seconds");
if(verbose){
for(String s: comments){
System.out.println(s);
}
}
return new MatchingResult(samplePlate.getSourceFileName(), comments, header, allResults, matchMap, time);
}
public static MatchingResult matchCDR3s(List<Integer[]> distinctCells,
Plate samplePlate, Integer lowThreshold,
Integer highThreshold, Integer maxOccupancyDifference,
@@ -58,7 +296,7 @@ public class Simulator {
if(verbose){System.out.println("Making cell maps");}
//HashMap keyed to Alphas, values Betas
//HashMap from cells, keyed to Alphas, values Betas, for checking if matches are correct
Map<Integer, Integer> distCellsMapAlphaKey = makePeptideToPeptideMap(distinctCells, 0, 1);
if(verbose){System.out.println("Cell maps made");}
@@ -132,10 +370,16 @@ public class Simulator {
graphGenerator.generateGraph(graph);
if(verbose){System.out.println("Graph created");}
//write graph to file
GraphFileWriter writer = new GraphFileWriter("graph", graph);
writer.writeGraphToFile();
if(verbose){System.out.println("Eliminating edges with weights outside threshold values");}
filterByOccupancyThreshold(graph, lowThreshold, highThreshold);
if(verbose){System.out.println("Over- and under-weight edges set to 0.0");}
//Filter by overlap size
if(verbose){System.out.println("Eliminating edges with weights much less than occupancy values");}
filterByOverlapSize(graph, alphaWellCounts, betaWellCounts, plateVtoAMap, plateVtoBMap, minOverlapPercent);