Refactoring to allow graphs from file
This commit is contained in:
@@ -45,6 +45,244 @@ public class Simulator {
|
||||
return new CellSample(distinctCells, cdr1Freq);
|
||||
}
|
||||
|
||||
// Version that reads in a graph? Possibly should just separate graph-making into its own function
|
||||
// public static MatchingResult matchCDR3s(List<Integer[]> distinctCells,
|
||||
// Plate samplePlate, Integer lowThreshold,
|
||||
// Integer highThreshold, Integer maxOccupancyDifference,
|
||||
// Integer minOverlapPercent, boolean verbose, boolean importGraph,
|
||||
// SimpleWeightedGraph graph){
|
||||
//
|
||||
// }
|
||||
|
||||
|
||||
//Make the maps needed for matching CDR3s
|
||||
public static MapData makeMaps(List<Integer[]> distinctCells,
|
||||
Plate samplePlate, Integer lowThreshold, boolean verbose) {
|
||||
Instant start = Instant.now();
|
||||
int numWells = samplePlate.getSize();
|
||||
int[] alphaIndex = {cdr3AlphaIndex};
|
||||
int[] betaIndex = {cdr3BetaIndex};
|
||||
|
||||
if(verbose){System.out.println("Making cell maps");}
|
||||
//HashMap keyed to Alphas, values Betas
|
||||
Map<Integer, Integer> distCellsMapAlphaKey = makePeptideToPeptideMap(distinctCells, 0, 1);
|
||||
if(verbose){System.out.println("Cell maps made");}
|
||||
|
||||
if(verbose){System.out.println("Making well maps");}
|
||||
Map<Integer, Integer> allAlphas = samplePlate.assayWellsPeptideP(alphaIndex);
|
||||
Map<Integer, Integer> allBetas = samplePlate.assayWellsPeptideP(betaIndex);
|
||||
int alphaCount = allAlphas.size();
|
||||
if(verbose){System.out.println("all alphas count: " + alphaCount);}
|
||||
int betaCount = allBetas.size();
|
||||
if(verbose){System.out.println("all betas count: " + betaCount);}
|
||||
|
||||
if(verbose){System.out.println("Well maps made");}
|
||||
|
||||
//Remove saturating-occupancy peptides because they have no signal value.
|
||||
//Remove below-minimum-overlap-threshold peptides because they can't possibly have an overlap with another
|
||||
//peptide that's above the threshold.
|
||||
if(verbose){System.out.println("Removing peptides present in all wells.");}
|
||||
if(verbose){System.out.println("Removing peptides with occupancy below the minimum overlap threshold");}
|
||||
filterByOccupancyThreshold(allAlphas, lowThreshold, numWells - 1);
|
||||
filterByOccupancyThreshold(allBetas, lowThreshold, numWells - 1);
|
||||
if(verbose){System.out.println("Peptides removed");}
|
||||
int pairableAlphaCount = allAlphas.size();
|
||||
if(verbose){System.out.println("Remaining alpha count: " + pairableAlphaCount);}
|
||||
int pairableBetaCount = allBetas.size();
|
||||
if(verbose){System.out.println("Remaining beta count: " + pairableBetaCount);}
|
||||
|
||||
if(verbose){System.out.println("Making vertex maps");}
|
||||
//For the SimpleWeightedBipartiteGraphMatrixGenerator, all vertices must have
|
||||
// distinct numbers associated with them. Since I'm using a 2D array, that means
|
||||
// distinct indices between the rows and columns. vertexStartValue lets me track where I switch
|
||||
// from numbering rows to columns, so I can assign unique numbers to every vertex, and then
|
||||
// subtract the vertexStartValue from betas to use their vertex labels as array indices
|
||||
Integer vertexStartValue = 0;
|
||||
//keys are sequential integer vertices, values are alphas
|
||||
Map<Integer, Integer> plateVtoAMap = makeVertexToPeptideMap(allAlphas, vertexStartValue);
|
||||
//New start value for vertex to beta map should be one more than final vertex value in alpha map
|
||||
vertexStartValue += plateVtoAMap.size();
|
||||
//keys are sequential integers vertices, values are betas
|
||||
Map<Integer, Integer> plateVtoBMap = makeVertexToPeptideMap(allBetas, vertexStartValue);
|
||||
//keys are alphas, values are sequential integer vertices from previous map
|
||||
Map<Integer, Integer> plateAtoVMap = invertVertexMap(plateVtoAMap);
|
||||
//keys are betas, values are sequential integer vertices from previous map
|
||||
Map<Integer, Integer> plateBtoVMap = invertVertexMap(plateVtoBMap);
|
||||
if(verbose){System.out.println("Vertex maps made");}
|
||||
Instant stop = Instant.now();
|
||||
Duration time = Duration.between(start, stop);
|
||||
return new MapData(distCellsMapAlphaKey, allAlphas, allBetas, plateVtoAMap, plateVtoBMap, plateAtoVMap,
|
||||
plateBtoVMap, time);
|
||||
}
|
||||
|
||||
//Make the graph needed for matching CDR3s
|
||||
public static GraphWithMapData makeGraph(Plate samplePlate, MapData maps, Integer lowThreshold,
|
||||
Integer highThreshold, boolean verbose) {
|
||||
int[] alphaIndex = {cdr3AlphaIndex};
|
||||
int[] betaIndex = {cdr3BetaIndex};
|
||||
Instant start = Instant.now();
|
||||
Map<Integer, Integer> plateVtoAMap = maps.getPlateVtoAMap();
|
||||
Map<Integer, Integer> plateVtoBMap = maps.getPlateVtoBMap();
|
||||
Map<Integer, Integer> plateAtoVMap = maps.getPlateAtoVMap();
|
||||
Map<Integer, Integer> plateBtoVMap = maps.getPlateBtoVMap();
|
||||
Map<Integer, Integer> allAlphas = maps.getAllAlphas();
|
||||
Map<Integer, Integer> allBetas = maps.getAllBetas();
|
||||
|
||||
if(verbose){System.out.println("Creating adjacency matrix");}
|
||||
//Count how many wells each alpha appears in
|
||||
Map<Integer, Integer> alphaWellCounts = new HashMap<>();
|
||||
//count how many wells each beta appears in
|
||||
Map<Integer, Integer> betaWellCounts = new HashMap<>();
|
||||
//the adjacency matrix to be used by the graph generator
|
||||
double[][] weights = new double[plateVtoAMap.size()][plateVtoBMap.size()];
|
||||
countPeptidesAndFillMatrix(samplePlate, allAlphas, allBetas, plateAtoVMap,
|
||||
plateBtoVMap, alphaIndex, betaIndex, alphaWellCounts, betaWellCounts, weights);
|
||||
if(verbose){System.out.println("matrix created");}
|
||||
|
||||
//create bipartite graph
|
||||
if(verbose){System.out.println("creating graph");}
|
||||
//the graph object
|
||||
SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph =
|
||||
new SimpleWeightedGraph<>(DefaultWeightedEdge.class);
|
||||
//the graph generator
|
||||
SimpleWeightedBipartiteGraphMatrixGenerator graphGenerator = new SimpleWeightedBipartiteGraphMatrixGenerator();
|
||||
//the list of alpha vertices
|
||||
List<Integer> alphaVertices = new ArrayList<>(plateVtoAMap.keySet()); //This will work because LinkedHashMap preserves order of entry
|
||||
graphGenerator.first(alphaVertices);
|
||||
//the list of beta vertices
|
||||
List<Integer> betaVertices = new ArrayList<>(plateVtoBMap.keySet());
|
||||
graphGenerator.second(betaVertices); //This will work because LinkedHashMap preserves order of entry
|
||||
graphGenerator.weights(weights);
|
||||
graphGenerator.generateGraph(graph);
|
||||
if(verbose){System.out.println("Graph created");}
|
||||
if(verbose){System.out.println("Eliminating edges with weights outside threshold values");}
|
||||
filterByOccupancyThreshold(graph, lowThreshold, highThreshold);
|
||||
if(verbose){System.out.println("Over- and under-weight edges set to 0.0");}
|
||||
Instant stop = Instant.now();
|
||||
Duration time = Duration.between(start, stop);
|
||||
time = time.plus(maps.getTime());
|
||||
return new GraphWithMapData(graph, maps, alphaWellCounts, betaWellCounts, time);
|
||||
}
|
||||
|
||||
//match CDR3s
|
||||
public static MatchingResult matchCDR3s(Plate samplePlate, GraphWithMapData data, Integer lowThreshold,
|
||||
Integer highThreshold, Integer maxOccupancyDifference,
|
||||
Integer minOverlapPercent, boolean verbose) {
|
||||
Instant start = Instant.now();
|
||||
int numWells = samplePlate.getSize();
|
||||
Map<Integer, Integer> distCellsMapAlphaKey = data.getDistCellsMapAlphaKey();
|
||||
Map<Integer, Integer> plateVtoAMap = data.getPlateVtoAMap();
|
||||
Map<Integer, Integer> plateVtoBMap = data.getPlateVtoBMap();
|
||||
Map<Integer, Integer> allAlphas = data.getAllAlphas();
|
||||
Map<Integer, Integer> allBetas = data.getAllBetas();
|
||||
Map<Integer, Integer> alphaWellCounts = data.getAlphaWellCounts();
|
||||
Map<Integer, Integer> betaWellCounts = data.getBetaWellCounts();
|
||||
Integer alphaCount = allAlphas.size();
|
||||
Integer betaCount = allBetas.size();
|
||||
SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph = data.getGraph();
|
||||
|
||||
|
||||
//Find Maximum Weighted Matching
|
||||
|
||||
// if(verbose){System.out.println("Finding maximum weighted matching");}
|
||||
// MaximumWeightBipartiteMatching maxWeightMatching =
|
||||
// new MaximumWeightBipartiteMatching(graph, plateVtoAMap.keySet(), plateVtoBMap.keySet());
|
||||
// MatchingAlgorithm.Matching<String, DefaultWeightedEdge> graphMatching = maxWeightMatching.getMatching();
|
||||
// if(verbose){System.out.println("Matching completed");}
|
||||
// Instant stop = Instant.now();
|
||||
|
||||
//trying with jheaps addressable now to improve performance
|
||||
if(verbose){System.out.println("Finding maximum weighted matching");}
|
||||
//Attempting to use addressable heap to improve performance
|
||||
MaximumWeightBipartiteMatching maxWeightMatching =
|
||||
new MaximumWeightBipartiteMatching(graph,
|
||||
plateVtoAMap.keySet(),
|
||||
plateVtoBMap.keySet(),
|
||||
i -> new PairingHeap(Comparator.naturalOrder()));
|
||||
MatchingAlgorithm.Matching<String, DefaultWeightedEdge> graphMatching = maxWeightMatching.getMatching();
|
||||
if(verbose){System.out.println("Matching completed");}
|
||||
Instant stop = Instant.now();
|
||||
|
||||
//Header for CSV file
|
||||
List<String> header = new ArrayList<>();
|
||||
header.add("Alpha");
|
||||
header.add("Alpha well count");
|
||||
header.add("Beta");
|
||||
header.add("Beta well count");
|
||||
header.add("Overlap well count");
|
||||
header.add("Matched correctly?");
|
||||
header.add("P-value");
|
||||
|
||||
|
||||
//Results for csv file
|
||||
List<List<String>> allResults = new ArrayList<>();
|
||||
NumberFormat nf = NumberFormat.getInstance(Locale.US);
|
||||
MathContext mc = new MathContext(3);
|
||||
Iterator<DefaultWeightedEdge> weightIter = graphMatching.iterator();
|
||||
DefaultWeightedEdge e;
|
||||
int trueCount = 0;
|
||||
int falseCount = 0;
|
||||
boolean check;
|
||||
Map<Integer, Integer> matchMap = new HashMap<>();
|
||||
while(weightIter.hasNext()) {
|
||||
e = weightIter.next();
|
||||
Integer source = graph.getEdgeSource(e);
|
||||
Integer target = graph.getEdgeTarget(e);
|
||||
//The match map is all matches found, not just true matches!
|
||||
matchMap.put(plateVtoAMap.get(source), plateVtoBMap.get(target));
|
||||
check = plateVtoBMap.get(target).equals(distCellsMapAlphaKey.get(plateVtoAMap.get(source)));
|
||||
if(check) {
|
||||
trueCount++;
|
||||
}
|
||||
else {
|
||||
falseCount++;
|
||||
}
|
||||
List<String> result = new ArrayList<>();
|
||||
result.add(plateVtoAMap.get(source).toString());
|
||||
//alpha well count
|
||||
result.add(alphaWellCounts.get(plateVtoAMap.get(source)).toString());
|
||||
result.add(plateVtoBMap.get(target).toString());
|
||||
//beta well count
|
||||
result.add(betaWellCounts.get(plateVtoBMap.get(target)).toString());
|
||||
//overlap count
|
||||
result.add(Double.toString(graph.getEdgeWeight(e)));
|
||||
result.add(Boolean.toString(check));
|
||||
double pValue = Equations.pValue(numWells, alphaWellCounts.get(plateVtoAMap.get(source)),
|
||||
betaWellCounts.get(plateVtoBMap.get(target)), graph.getEdgeWeight(e));
|
||||
BigDecimal pValueTrunc = new BigDecimal(pValue, mc);
|
||||
result.add(pValueTrunc.toString());
|
||||
allResults.add(result);
|
||||
}
|
||||
|
||||
//Metadate comments for CSV file
|
||||
int min = Math.min(alphaCount, betaCount);
|
||||
double attemptRate = (double) (trueCount + falseCount) / min;
|
||||
BigDecimal attemptRateTrunc = new BigDecimal(attemptRate, mc);
|
||||
double pairingErrorRate = (double) falseCount / (trueCount + falseCount);
|
||||
BigDecimal pairingErrorRateTrunc = new BigDecimal(pairingErrorRate, mc);
|
||||
|
||||
List<String> comments = new ArrayList<>();
|
||||
comments.add("Total alphas found: " + alphaCount);
|
||||
comments.add("Total betas found: " + betaCount);
|
||||
comments.add("High overlap threshold: " + highThreshold);
|
||||
comments.add("Low overlap threshold: " + lowThreshold);
|
||||
comments.add("Minimum overlap percent: " + minOverlapPercent);
|
||||
comments.add("Maximum occupancy difference: " + maxOccupancyDifference);
|
||||
comments.add("Pairing attempt rate: " + attemptRateTrunc);
|
||||
comments.add("Correct pairings: " + trueCount);
|
||||
comments.add("Incorrect pairings: " + falseCount);
|
||||
comments.add("Pairing error rate: " + pairingErrorRateTrunc);
|
||||
Duration time = Duration.between(start, stop);
|
||||
time = time.plus(data.getTime());
|
||||
comments.add("Simulation time: " + nf.format(time.toSeconds()) + " seconds");
|
||||
if(verbose){
|
||||
for(String s: comments){
|
||||
System.out.println(s);
|
||||
}
|
||||
}
|
||||
return new MatchingResult(samplePlate.getSourceFileName(), comments, header, allResults, matchMap, time);
|
||||
}
|
||||
|
||||
public static MatchingResult matchCDR3s(List<Integer[]> distinctCells,
|
||||
Plate samplePlate, Integer lowThreshold,
|
||||
Integer highThreshold, Integer maxOccupancyDifference,
|
||||
@@ -58,7 +296,7 @@ public class Simulator {
|
||||
|
||||
|
||||
if(verbose){System.out.println("Making cell maps");}
|
||||
//HashMap keyed to Alphas, values Betas
|
||||
//HashMap from cells, keyed to Alphas, values Betas, for checking if matches are correct
|
||||
Map<Integer, Integer> distCellsMapAlphaKey = makePeptideToPeptideMap(distinctCells, 0, 1);
|
||||
if(verbose){System.out.println("Cell maps made");}
|
||||
|
||||
@@ -132,10 +370,16 @@ public class Simulator {
|
||||
graphGenerator.generateGraph(graph);
|
||||
if(verbose){System.out.println("Graph created");}
|
||||
|
||||
//write graph to file
|
||||
GraphFileWriter writer = new GraphFileWriter("graph", graph);
|
||||
writer.writeGraphToFile();
|
||||
|
||||
if(verbose){System.out.println("Eliminating edges with weights outside threshold values");}
|
||||
filterByOccupancyThreshold(graph, lowThreshold, highThreshold);
|
||||
if(verbose){System.out.println("Over- and under-weight edges set to 0.0");}
|
||||
|
||||
|
||||
|
||||
//Filter by overlap size
|
||||
if(verbose){System.out.println("Eliminating edges with weights much less than occupancy values");}
|
||||
filterByOverlapSize(graph, alphaWellCounts, betaWellCounts, plateVtoAMap, plateVtoBMap, minOverlapPercent);
|
||||
|
||||
Reference in New Issue
Block a user