Improved efficiency by pre-filtering peptide lists before making and processing graphs
This commit is contained in:
@@ -4,12 +4,13 @@ import org.jgrapht.alg.matching.MaximumWeightBipartiteMatching;
|
||||
import org.jgrapht.generate.SimpleWeightedBipartiteGraphMatrixGenerator;
|
||||
import org.jgrapht.graph.DefaultWeightedEdge;
|
||||
import org.jgrapht.graph.SimpleWeightedGraph;
|
||||
import org.jgrapht.util.ArrayUnenforcedSet;
|
||||
|
||||
import java.math.BigDecimal;
|
||||
import java.math.MathContext;
|
||||
import java.text.NumberFormat;
|
||||
//import java.time.Duration;
|
||||
//import java.time.Instant;
|
||||
import java.time.Instant;
|
||||
import java.time.Duration;
|
||||
import java.util.*;
|
||||
import java.util.stream.IntStream;
|
||||
|
||||
@@ -74,6 +75,8 @@ public class Simulator {
|
||||
public static MatchingResult matchCDR3s(List<Integer[]> distinctCells,
|
||||
Plate samplePlate, Integer lowThreshold, Integer highThreshold){
|
||||
System.out.println("Cells: " + distinctCells.size());
|
||||
Instant start = Instant.now();
|
||||
|
||||
int numWells = samplePlate.getSize();
|
||||
System.out.println("Making cell maps");
|
||||
//HashMap keyed to Alphas, values Betas
|
||||
@@ -98,6 +101,34 @@ public class Simulator {
|
||||
|
||||
System.out.println("Well maps made");
|
||||
|
||||
//Remove saturating-occupancy peptides because they have no signal value.
|
||||
//Remove below-minimum-overlap-threshold peptides because they can't possibly have an overlap with another
|
||||
//peptide that's above the threshold.
|
||||
System.out.println("Removing below-minimum-overlap-threshold and saturating-occupancy peptides");
|
||||
List<Integer> noiseAlphas = new ArrayList<>();
|
||||
List<Integer> noiseBetas = new ArrayList<>();
|
||||
for(Integer k: allAlphas.keySet()){
|
||||
if((allAlphas.get(k)>numWells - 1) || (allAlphas.get(k) < lowThreshold)){
|
||||
noiseAlphas.add(k);
|
||||
}
|
||||
}
|
||||
for(Integer k: allBetas.keySet()){
|
||||
if((allBetas.get(k)> numWells - 1 ) || (allBetas.get(k) < lowThreshold)){
|
||||
noiseBetas.add(k);
|
||||
}
|
||||
}
|
||||
for(Integer p: noiseAlphas){
|
||||
allAlphas.remove(p);
|
||||
}
|
||||
for(Integer p: noiseBetas){
|
||||
allBetas.remove(p);
|
||||
}
|
||||
System.out.println("Peptides removed");
|
||||
int pairableAlphaCount = allAlphas.size();
|
||||
System.out.println("Remaining alpha count: " + pairableAlphaCount);
|
||||
int pairableBetaCount = allBetas.size();
|
||||
System.out.println("Remaining beta count: " + pairableBetaCount);
|
||||
|
||||
System.out.println("Making vertex maps");
|
||||
//Using Integers instead of Strings to label vertices so I can do clever stuff with indices if I need to
|
||||
// when I refactor to use a 2d array to make the graph
|
||||
@@ -115,7 +146,7 @@ public class Simulator {
|
||||
Map<Integer, Integer> plateBtoVMap = invertVertexMap(plateVtoBMap);
|
||||
System.out.println("Vertex maps made");
|
||||
|
||||
System.out.println("Creating Graph");
|
||||
System.out.println("Creating adjacency matrix");
|
||||
//Count how many wells each alpha appears in
|
||||
Map<Integer, Integer> alphaWellCounts = new HashMap<>();
|
||||
//count how many wells each beta appears in
|
||||
@@ -130,18 +161,30 @@ public class Simulator {
|
||||
for (int n = 0; n < numWells; n++) {
|
||||
wellNAlphas = samplePlate.assayWellsCDR3Alpha(n);
|
||||
for (Integer a : wellNAlphas.keySet()) {
|
||||
alphaWellCounts.merge(a, 1, (oldValue, newValue) -> oldValue + newValue);
|
||||
if(allAlphas.containsKey(a)){
|
||||
alphaWellCounts.merge(a, 1, (oldValue, newValue) -> oldValue + newValue);
|
||||
}
|
||||
}
|
||||
wellNBetas = samplePlate.assayWellsCDR3Beta(n);
|
||||
for (Integer b : wellNBetas.keySet()) {
|
||||
betaWellCounts.merge(b, 1, (oldValue, newValue) -> oldValue + newValue);
|
||||
if(allBetas.containsKey(b)){
|
||||
betaWellCounts.merge(b, 1, (oldValue, newValue) -> oldValue + newValue);
|
||||
}
|
||||
}
|
||||
for (Integer i : wellNAlphas.keySet()) {
|
||||
for (Integer j : wellNBetas.keySet()) {
|
||||
weights[plateAtoVMap.get(i)][plateBtoVMap.get(j) - vertexStartValue] += 1.0;
|
||||
if(allAlphas.containsKey(i)){
|
||||
for (Integer j : wellNBetas.keySet()) {
|
||||
if(allBetas.containsKey(j)){
|
||||
weights[plateAtoVMap.get(i)][plateBtoVMap.get(j) - vertexStartValue] += 1.0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
System.out.println("matrix created");
|
||||
|
||||
System.out.println("creating graph");
|
||||
|
||||
SimpleWeightedBipartiteGraphMatrixGenerator graphGenerator = new SimpleWeightedBipartiteGraphMatrixGenerator();
|
||||
List<Integer> alphaVertices = new ArrayList<>();
|
||||
alphaVertices.addAll(plateVtoAMap.keySet()); //This will work because LinkedHashMap preserves order of entry
|
||||
@@ -153,11 +196,21 @@ public class Simulator {
|
||||
graphGenerator.generateGraph(graph);
|
||||
System.out.println("Graph created");
|
||||
|
||||
System.out.println("Eliminating edges with weights outside threshold values");
|
||||
for(DefaultWeightedEdge e: graph.edgeSet()){
|
||||
if ((graph.getEdgeWeight(e) > highThreshold) || (graph.getEdgeWeight(e) < lowThreshold)){
|
||||
graph.setEdgeWeight(e, 0.0);
|
||||
}
|
||||
}
|
||||
System.out.println("Over- and under-weight edges set to 0.0");
|
||||
|
||||
|
||||
System.out.println("Finding maximum weighted matching");
|
||||
MaximumWeightBipartiteMatching maxWeightMatching =
|
||||
new MaximumWeightBipartiteMatching(graph, plateVtoAMap.keySet(), plateVtoBMap.keySet());
|
||||
MatchingAlgorithm.Matching<String, DefaultWeightedEdge> graphMatching = maxWeightMatching.getMatching();
|
||||
System.out.println("Matching completed");
|
||||
Instant stop = Instant.now();
|
||||
|
||||
//Header for CSV file
|
||||
List<String> header = new ArrayList<>();
|
||||
@@ -182,9 +235,6 @@ public class Simulator {
|
||||
Map<Integer, Integer> matchMap = new HashMap<>();
|
||||
while(weightIter.hasNext()) {
|
||||
e = weightIter.next();
|
||||
if(graph.getEdgeWeight(e) < lowThreshold || graph.getEdgeWeight(e) > highThreshold) {
|
||||
continue;
|
||||
}
|
||||
Integer source = graph.getEdgeSource(e);
|
||||
Integer target = graph.getEdgeTarget(e);
|
||||
//The match map is all matches found, not just true matches!
|
||||
@@ -223,10 +273,15 @@ public class Simulator {
|
||||
List<String> comments = new ArrayList<>();
|
||||
comments.add("Total alphas found: " + alphaCount);
|
||||
comments.add("Total betas found: " + betaCount);
|
||||
comments.add("High overlap threshold: " + highThreshold);
|
||||
comments.add("Low overlap threshold: " + lowThreshold);
|
||||
comments.add("Pairing attempt rate: " + attemptRateTrunc);
|
||||
comments.add("Correct pairings: " + trueCount);
|
||||
comments.add("Incorrect pairings: " + falseCount);
|
||||
comments.add("Pairing error rate: " + pairingErrorRateTrunc);
|
||||
Duration time = Duration.between(start, stop);
|
||||
comments.add("Simulation time: " + nf.format(time.toSeconds()) + " seconds");
|
||||
System.out.println("Simulation time: " + nf.format(time.toSeconds()) + " seconds");
|
||||
|
||||
return new MatchingResult(comments, header, allResults, matchMap);
|
||||
}
|
||||
@@ -234,6 +289,8 @@ public class Simulator {
|
||||
public static MatchingResult[] matchCDR1s(List<Integer[]> distinctCells,
|
||||
Plate samplePlate, Integer lowThreshold,
|
||||
Integer highThreshold, Map<Integer, Integer> previousMatches){
|
||||
Instant start = Instant.now();
|
||||
|
||||
int numWells = samplePlate.getSize();
|
||||
System.out.println("Making previous match maps");
|
||||
Map<Integer, Integer> CDR3AtoBMap = previousMatches;
|
||||
@@ -261,6 +318,34 @@ public class Simulator {
|
||||
System.out.println("all CDR1s count: " + CDR1Count);
|
||||
System.out.println("Well maps made");
|
||||
|
||||
//NEW FILTERING CODE TO TEST
|
||||
System.out.println("Removing unpaired CDR3s from well maps");
|
||||
List<Integer> unpairedCDR3s = new ArrayList<>();
|
||||
for(Integer i: allCDR3s.keySet()){
|
||||
if(!(CDR3AtoBMap.containsKey(i) || CDR3BtoAMap.containsKey(i))){
|
||||
unpairedCDR3s.add(i);
|
||||
}
|
||||
}
|
||||
for(Integer i: unpairedCDR3s){
|
||||
allCDR3s.remove(i);
|
||||
}
|
||||
System.out.println("Unpaired CDR3s removed.");
|
||||
System.out.println("Remaining CDR3 count: " + allCDR3s.size());
|
||||
|
||||
System.out.println("Removing below-minimum-overlap-threshold and saturating-occupancy CDR1s");
|
||||
List<Integer> noiseCDR1s = new ArrayList<>();
|
||||
for(Integer i: allCDR1s.keySet()){
|
||||
if((allCDR1s.get(i) < lowThreshold) || (allCDR1s.get(i)) > numWells - 1){
|
||||
noiseCDR1s.add(i);
|
||||
}
|
||||
}
|
||||
for(Integer i: noiseCDR1s){
|
||||
allCDR1s.remove(i);
|
||||
}
|
||||
System.out.println("CDR1s removed.");
|
||||
System.out.println("Remaining CDR1 count: " + allCDR1s.size());
|
||||
//END NEW CODE
|
||||
|
||||
System.out.println("Making vertex maps");
|
||||
//Using Integers instead of Strings to label vertices so I can do clever stuff with indices if I need to
|
||||
// when I refactor to use a 2d array to make the graph
|
||||
@@ -300,9 +385,11 @@ public class Simulator {
|
||||
CDR1WellCounts.merge(b, 1, (oldValue, newValue) -> oldValue + newValue);
|
||||
}
|
||||
for (Integer i : wellNCDR3s.keySet()) {
|
||||
if(CDR3AtoBMap.containsKey(i)||CDR3BtoAMap.containsKey(i)){//only consider ones we have matches for
|
||||
if(allCDR3s.containsKey(i)){//only consider CDR3s we have matches for - rest filtered out
|
||||
for (Integer j : wellNCDR1s.keySet()) {
|
||||
weights[plateCDR3toVMap.get(i)][plateCDR1toVMap.get(j) - vertexStartValue] += 1.0;
|
||||
if(allCDR1s.containsKey(j)){ //only those CDR1s we didn't filter out
|
||||
weights[plateCDR3toVMap.get(i)][plateCDR1toVMap.get(j) - vertexStartValue] += 1.0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -319,6 +406,14 @@ public class Simulator {
|
||||
System.out.println("Graph created");
|
||||
System.out.println("Number of edges: " + graph.edgeSet().size());
|
||||
|
||||
System.out.println("Removing edges outside of weight thresholds");
|
||||
for(DefaultWeightedEdge e: graph.edgeSet()){
|
||||
if ((graph.getEdgeWeight(e) > highThreshold) || (graph.getEdgeWeight(e) < lowThreshold)){
|
||||
graph.setEdgeWeight(e, 0.0);
|
||||
}
|
||||
}
|
||||
System.out.println("Over- and under-weight edges set to 0.0");
|
||||
|
||||
System.out.println("Finding first maximum weighted matching");
|
||||
MaximumWeightBipartiteMatching firstMaxWeightMatching =
|
||||
new MaximumWeightBipartiteMatching(graph, plateVtoCDR3Map.keySet(), plateVtoCDR1Map.keySet());
|
||||
@@ -411,17 +506,22 @@ public class Simulator {
|
||||
}
|
||||
}
|
||||
|
||||
//results for dual map
|
||||
Instant stop = Instant.now();
|
||||
|
||||
|
||||
//results for first map
|
||||
System.out.println("Results for first pass");
|
||||
List<List<String>> allResults = new ArrayList<>();
|
||||
Integer trueCount = 0;
|
||||
Iterator iter = dualMatchesMap.keySet().iterator();
|
||||
Iterator iter = firstMatchesMap.keySet().iterator();
|
||||
|
||||
while(iter.hasNext()){
|
||||
Boolean proven = false;
|
||||
List<String> tmp = new ArrayList<>();
|
||||
tmp.add(iter.next().toString());
|
||||
tmp.add(iter.next().toString());
|
||||
tmp.add(dualMatchesMap.get(Integer.valueOf(tmp.get(0))).toString());
|
||||
tmp.add(dualMatchesMap.get(Integer.valueOf(tmp.get(1))).toString());
|
||||
tmp.add(firstMatchesMap.get(Integer.valueOf(tmp.get(0))).toString());
|
||||
tmp.add(firstMatchesMap.get(Integer.valueOf(tmp.get(1))).toString());
|
||||
if(alphaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(0))).equals(Integer.valueOf(tmp.get(2)))){
|
||||
if(betaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(1))).equals(Integer.valueOf(tmp.get(3)))){
|
||||
proven = true;
|
||||
@@ -449,6 +549,10 @@ public class Simulator {
|
||||
double correctRate = (double) trueCount / allResults.size();
|
||||
comments.add("Correct matching rate: " + correctRate);
|
||||
|
||||
for(String s: comments){
|
||||
System.out.println(s);
|
||||
}
|
||||
|
||||
List<String> headers = new ArrayList<>();
|
||||
headers.add("CDR3 alpha");
|
||||
headers.add("CDR3 beta");
|
||||
@@ -456,24 +560,20 @@ public class Simulator {
|
||||
headers.add("second matched CDR1");
|
||||
headers.add("Correct match?");
|
||||
|
||||
for(String s: comments){
|
||||
System.out.println(s);
|
||||
}
|
||||
MatchingResult firstTest = new MatchingResult(comments, headers, allResults, dualMatchesMap);
|
||||
|
||||
MatchingResult dualTest = new MatchingResult(comments, headers, allResults, dualMatchesMap);
|
||||
|
||||
|
||||
//results for first map
|
||||
//results for dual map
|
||||
System.out.println("Results for second pass");
|
||||
allResults = new ArrayList<>();
|
||||
trueCount = 0;
|
||||
iter = firstMatchesMap.keySet().iterator();
|
||||
iter = dualMatchesMap.keySet().iterator();
|
||||
while(iter.hasNext()){
|
||||
Boolean proven = false;
|
||||
List<String> tmp = new ArrayList<>();
|
||||
tmp.add(iter.next().toString());
|
||||
tmp.add(iter.next().toString());
|
||||
tmp.add(firstMatchesMap.get(Integer.valueOf(tmp.get(0))).toString());
|
||||
tmp.add(firstMatchesMap.get(Integer.valueOf(tmp.get(1))).toString());
|
||||
tmp.add(dualMatchesMap.get(Integer.valueOf(tmp.get(0))).toString());
|
||||
tmp.add(dualMatchesMap.get(Integer.valueOf(tmp.get(1))).toString());
|
||||
if(alphaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(0))).equals(Integer.valueOf(tmp.get(2)))){
|
||||
if(betaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(1))).equals(Integer.valueOf(tmp.get(3)))){
|
||||
proven = true;
|
||||
@@ -494,6 +594,8 @@ public class Simulator {
|
||||
comments = new ArrayList<>();
|
||||
comments.add("Plate size: " + samplePlate.getSize() + " wells");
|
||||
comments.add("Previous pairs found: " + previousMatches.size());
|
||||
comments.add("High overlap threshold: " + highThreshold);
|
||||
comments.add("Low overlap threshold: " + lowThreshold);
|
||||
comments.add("CDR1 matches attempted: " + allResults.size());
|
||||
attemptRate = (double) allResults.size() / previousMatches.size();
|
||||
comments.add("Matching attempt rate: " + attemptRate);
|
||||
@@ -501,10 +603,19 @@ public class Simulator {
|
||||
correctRate = (double) trueCount / allResults.size();
|
||||
comments.add("Correct matching rate: " + correctRate);
|
||||
|
||||
|
||||
|
||||
for(String s: comments){
|
||||
System.out.println(s);
|
||||
}
|
||||
MatchingResult firstTest = new MatchingResult(comments, headers, allResults, dualMatchesMap);
|
||||
NumberFormat nf = NumberFormat.getInstance(Locale.US);
|
||||
Duration time = Duration.between(start, stop);
|
||||
comments.add("Simulation time: " + nf.format(time.toSeconds()) + " seconds");
|
||||
System.out.println("Simulation time: " + nf.format(time.toSeconds()) + " seconds");
|
||||
|
||||
MatchingResult dualTest = new MatchingResult(comments, headers, allResults, dualMatchesMap);
|
||||
|
||||
|
||||
MatchingResult[] output = {firstTest, dualTest};
|
||||
return output;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user