diff --git a/src/main/java/Simulator.java b/src/main/java/Simulator.java index 81a2e02..7380ba6 100644 --- a/src/main/java/Simulator.java +++ b/src/main/java/Simulator.java @@ -4,78 +4,22 @@ import org.jgrapht.alg.matching.MaximumWeightBipartiteMatching; import org.jgrapht.generate.SimpleWeightedBipartiteGraphMatrixGenerator; import org.jgrapht.graph.DefaultWeightedEdge; import org.jgrapht.graph.SimpleWeightedGraph; -import org.jheaps.AddressableHeap; import org.jheaps.tree.PairingHeap; import java.math.BigDecimal; -import java.math.BigInteger; import java.math.MathContext; import java.text.NumberFormat; import java.time.Instant; import java.time.Duration; import java.util.*; -import java.util.function.Function; -import java.util.function.Supplier; import java.util.stream.IntStream; public class Simulator { - private static int cdr3AlphaIndex = 0; - private static int cdr3BetaIndex = 1; - private static int cdr1AlphaIndex = 2; - private static int cdr1BetaIndex = 3; + private static final int cdr3AlphaIndex = 0; + private static final int cdr3BetaIndex = 1; + private static final int cdr1AlphaIndex = 2; + private static final int cdr1BetaIndex = 3; - - //Tested generating the cell sample file itself with a set distribution, but it wasn't working well - //realized I'd have to change matching algos as well, so abandoned it. -// public static CellSample generateCellSampleExp(Integer numDistinctCells, Integer cdr1Freq){ -// //was told I=in real T cells, CDR1s have about one third the diversity of CDR3s -// //this could be wrong, though. could be less diversity than that. -// //previous sim was only CDR3s -// //Integer numDistinctCells = 1000000; -// //int cdr1Freq = 3; -// -// List numbersCDR3 = new ArrayList<>(); -// List numbersCDR1 = new ArrayList<>(); -// Integer numDistCDR3s = 2 * numDistinctCells + 1; -// IntStream.range(1, numDistCDR3s + 1).forEach(i -> numbersCDR3.add(i)); -// IntStream.range(numDistCDR3s + 1, numDistCDR3s + 1 + (numDistCDR3s / cdr1Freq) + 1).forEach(i -> numbersCDR1.add(i)); -// Collections.shuffle(numbersCDR3); -// Collections.shuffle(numbersCDR1); -// -// //Each cell represented by 4 values -// //two CDR3s, and two CDR1s. First two values are CDR3s, second two are CDR1s -// List distinctCells = new ArrayList<>(); -// for(int i = 0; i < numbersCDR3.size() - 1; i = i + 2){ -// Integer tmpCDR3a = numbersCDR3.get(i); -// Integer tmpCDR3b = numbersCDR3.get(i+1); -// Integer tmpCDR1a = numbersCDR1.get(i % numbersCDR1.size()); -// Integer tmpCDR1b = numbersCDR1.get((i+1) % numbersCDR1.size()); -// Integer[] tmp = {tmpCDR3a, tmpCDR3b, tmpCDR1a, tmpCDR1b}; -// distinctCells.add(tmp); -// } -// ListsampleCells = new ArrayList<>(); -// int count; -// int lastFactor = 0; -// int factor = 10; -// int index = 0; -// while(numDistinctCells / factor >= 1){ -// int i = index; -// while(i < index + factor - lastFactor){ -// count = 0; -// while(count < (numDistinctCells/(factor * 10))){ -// sampleCells.add(distinctCells.get(i)); -// count++; -// } -// i++; -// } -// index = i; -// lastFactor = factor; -// factor *=10; -// } -// System.out.println("Total cells in sample: " + sampleCells.size()); -// System.out.println("Distinct cells in sample: " + index); -// return new CellSample(sampleCells, cdr1Freq); -// } public static CellSample generateCellSample(Integer numDistinctCells, Integer cdr1Freq) { //In real T cells, CDR1s have about one third the diversity of CDR3s //previous sim was only CDR3s @@ -103,7 +47,8 @@ public class Simulator { public static MatchingResult matchCDR3s(List distinctCells, Plate samplePlate, Integer lowThreshold, - Integer highThreshold, boolean verbose){ + Integer highThreshold, Integer maxOccupancyDifference, + Integer minOverlapPercent, boolean verbose){ if(verbose){System.out.println("Cells: " + distinctCells.size());} Instant start = Instant.now(); @@ -111,6 +56,7 @@ public class Simulator { int[] alphaIndex = {cdr3AlphaIndex}; int[] betaIndex = {cdr3BetaIndex}; + if(verbose){System.out.println("Making cell maps");} //HashMap keyed to Alphas, values Betas Map distCellsMapAlphaKey = makePeptideToPeptideMap(distinctCells, 0, 1); @@ -169,9 +115,7 @@ public class Simulator { plateBtoVMap, alphaIndex, betaIndex, alphaWellCounts, betaWellCounts, weights); if(verbose){System.out.println("matrix created");} - /** - * This is where the bipartite graph is created - */ + //create bipartite graph if(verbose){System.out.println("creating graph");} //the graph object SimpleWeightedGraph graph = @@ -179,12 +123,10 @@ public class Simulator { //the graph generator SimpleWeightedBipartiteGraphMatrixGenerator graphGenerator = new SimpleWeightedBipartiteGraphMatrixGenerator(); //the list of alpha vertices - List alphaVertices = new ArrayList<>(); - alphaVertices.addAll(plateVtoAMap.keySet()); //This will work because LinkedHashMap preserves order of entry + List alphaVertices = new ArrayList<>(plateVtoAMap.keySet()); //This will work because LinkedHashMap preserves order of entry graphGenerator.first(alphaVertices); //the list of beta vertices - List betaVertices = new ArrayList<>(); - betaVertices.addAll(plateVtoBMap.keySet()); + List betaVertices = new ArrayList<>(plateVtoBMap.keySet()); graphGenerator.second(betaVertices); //This will work because LinkedHashMap preserves order of entry graphGenerator.weights(weights); graphGenerator.generateGraph(graph); @@ -196,18 +138,17 @@ public class Simulator { //Filter by overlap size if(verbose){System.out.println("Eliminating edges with weights much less than occupancy values");} - filterByOverlapSize(graph, alphaWellCounts, betaWellCounts, plateVtoAMap, plateVtoBMap); + filterByOverlapSize(graph, alphaWellCounts, betaWellCounts, plateVtoAMap, plateVtoBMap, minOverlapPercent); if(verbose){System.out.println("Edges with weights much less than occupancy values set to 0.0");} //Filter by relative occupancy if(verbose){System.out.println("Eliminating edges between vertices of massively different occupancy");} - filterByRelativeOccupancy(graph, alphaWellCounts, betaWellCounts, plateVtoAMap, plateVtoBMap); + filterByRelativeOccupancy(graph, alphaWellCounts, betaWellCounts, plateVtoAMap, plateVtoBMap, + maxOccupancyDifference); if(verbose){System.out.println("Edges between vertices of massively different occupancy set to 0.0");} + //Find Maximum Weighted Matching - /** - * This is where the maximum weighted matching is found - */ // if(verbose){System.out.println("Finding maximum weighted matching");} // MaximumWeightBipartiteMatching maxWeightMatching = // new MaximumWeightBipartiteMatching(graph, plateVtoAMap.keySet(), plateVtoBMap.keySet()); @@ -215,14 +156,14 @@ public class Simulator { // if(verbose){System.out.println("Matching completed");} // Instant stop = Instant.now(); - //trying with jheaps now + //trying with jheaps addressable now to improve performance if(verbose){System.out.println("Finding maximum weighted matching");} //Attempting to use addressable heap to improve performance MaximumWeightBipartiteMatching maxWeightMatching = new MaximumWeightBipartiteMatching(graph, plateVtoAMap.keySet(), plateVtoBMap.keySet(), - (i) -> new PairingHeap(Comparator.naturalOrder())); + i -> new PairingHeap(Comparator.naturalOrder())); MatchingAlgorithm.Matching graphMatching = maxWeightMatching.getMatching(); if(verbose){System.out.println("Matching completed");} Instant stop = Instant.now(); @@ -243,10 +184,10 @@ public class Simulator { NumberFormat nf = NumberFormat.getInstance(Locale.US); MathContext mc = new MathContext(3); Iterator weightIter = graphMatching.iterator(); - DefaultWeightedEdge e = null; + DefaultWeightedEdge e; int trueCount = 0; int falseCount = 0; - boolean check = false; + boolean check; Map matchMap = new HashMap<>(); while(weightIter.hasNext()) { e = weightIter.next(); @@ -279,7 +220,7 @@ public class Simulator { } //Metadate comments for CSV file - int min = alphaCount > betaCount ? betaCount : alphaCount; + int min = Math.min(alphaCount, betaCount); double attemptRate = (double) (trueCount + falseCount) / min; BigDecimal attemptRateTrunc = new BigDecimal(attemptRate, mc); double pairingErrorRate = (double) falseCount / (trueCount + falseCount); @@ -290,6 +231,8 @@ public class Simulator { comments.add("Total betas found: " + betaCount); comments.add("High overlap threshold: " + highThreshold); comments.add("Low overlap threshold: " + lowThreshold); + comments.add("Minimum overlap percent: " + minOverlapPercent); + comments.add("Maximum occupancy difference: " + maxOccupancyDifference); comments.add("Pairing attempt rate: " + attemptRateTrunc); comments.add("Correct pairings: " + trueCount); comments.add("Incorrect pairings: " + falseCount); @@ -393,11 +336,9 @@ public class Simulator { new SimpleWeightedGraph<>(DefaultWeightedEdge.class); SimpleWeightedBipartiteGraphMatrixGenerator graphGenerator = new SimpleWeightedBipartiteGraphMatrixGenerator(); - List cdr3Vertices = new ArrayList<>(); - cdr3Vertices.addAll(plateVtoCDR3Map.keySet()); //This will work because LinkedHashMap preserves order of entry + List cdr3Vertices = new ArrayList<>(plateVtoCDR3Map.keySet()); //This will work because LinkedHashMap preserves order of entry graphGenerator.first(cdr3Vertices); - List cdr1Vertices = new ArrayList<>(); - cdr1Vertices.addAll(plateVtoCDR1Map.keySet()); + List cdr1Vertices = new ArrayList<>(plateVtoCDR1Map.keySet()); graphGenerator.second(cdr1Vertices); //This will work because LinkedHashMap preserves order of entry graphGenerator.weights(weights); graphGenerator.generateGraph(graph); @@ -417,7 +358,7 @@ public class Simulator { //first processing run Map firstMatchCDR3toCDR1Map = new HashMap<>(); Iterator weightIter = graphMatching.iterator(); - DefaultWeightedEdge e = null; + DefaultWeightedEdge e; while(weightIter.hasNext()){ e = weightIter.next(); // if(graph.getEdgeWeight(e) < lowThreshold || graph.getEdgeWeight(e) > highThreshold) { @@ -541,7 +482,7 @@ public class Simulator { comments.add("Correct matching rate: " + correctRate); NumberFormat nf = NumberFormat.getInstance(Locale.US); Duration time = Duration.between(start, stop); - time.plus(previousTime); + time = time.plus(previousTime); comments.add("Simulation time: " + nf.format(time.toSeconds()) + " seconds"); for(String s: comments){ System.out.println(s); @@ -681,12 +622,13 @@ public class Simulator { Map alphaWellCounts, Map betaWellCounts, Map plateVtoAMap, - Map plateVtoBMap) { + Map plateVtoBMap, + Integer maxOccupancyDifference) { for (DefaultWeightedEdge e : graph.edgeSet()) { Integer alphaOcc = alphaWellCounts.get(plateVtoAMap.get(graph.getEdgeSource(e))); Integer betaOcc = betaWellCounts.get(plateVtoBMap.get(graph.getEdgeTarget(e))); //Adjust this to something cleverer later - if (Math.abs(alphaOcc - betaOcc) >= 20) { + if (Math.abs(alphaOcc - betaOcc) >= maxOccupancyDifference) { graph.setEdgeWeight(e, 0.0); } } @@ -697,13 +639,14 @@ public class Simulator { Map alphaWellCounts, Map betaWellCounts, Map plateVtoAMap, - Map plateVtoBMap) { + Map plateVtoBMap, + Integer minOverlapPercent) { for (DefaultWeightedEdge e : graph.edgeSet()) { Integer alphaOcc = alphaWellCounts.get(plateVtoAMap.get(graph.getEdgeSource(e))); Integer betaOcc = betaWellCounts.get(plateVtoBMap.get(graph.getEdgeTarget(e))); - //Adjust this to something cleverer later - Integer min = alphaOcc > betaOcc ? betaOcc : alphaOcc; - if (min - graph.getEdgeWeight(e) >= 15) { + double weight = graph.getEdgeWeight(e); + double min = minOverlapPercent / 100.0; + if ((weight / alphaOcc < min) || (weight / betaOcc < min)) { graph.setEdgeWeight(e, 0.0); } } diff --git a/src/main/java/UserInterface.java b/src/main/java/UserInterface.java index 0265f6e..86b64c7 100644 --- a/src/main/java/UserInterface.java +++ b/src/main/java/UserInterface.java @@ -166,6 +166,20 @@ public class UserInterface { .desc("Sets the maximum occupancy overlap to attempt matching") .build(); mainOptions.addOption(highThresh); + Option occDiff = Option.builder("occdiff") + .longOpt("occupancy-difference") + .hasArg() + .argName("Number") + .desc("Maximum difference in alpha/beta occupancy to attempt matching") + .build(); + mainOptions.addOption(occDiff); + Option overlapPer = Option.builder("ovper") + .longOpt("overlap-percent") + .hasArg() + .argName("Percent") + .desc("Minimum overlap percent to attempt matching (0 -100)") + .build(); + mainOptions.addOption(overlapPer); Option inputPlates = Option.builder("p") .longOpt("plate-files") .hasArgs() @@ -183,8 +197,10 @@ public class UserInterface { String cellFile = line.getOptionValue("c"); Integer lowThreshold = Integer.valueOf(line.getOptionValue(lowThresh)); Integer highThreshold = Integer.valueOf(line.getOptionValue(highThresh)); + Integer occupancyDifference = Integer.valueOf(line.getOptionValue(occDiff)); + Integer overlapPercent = Integer.valueOf(line.getOptionValue(overlapPer)); for(String plate: line.getOptionValues("p")) { - matchCDR3s(cellFile, plate, lowThreshold, highThreshold); + matchCDR3s(cellFile, plate, lowThreshold, highThreshold, occupancyDifference, overlapPercent); } } else if(line.hasOption("cells")){ @@ -432,7 +448,8 @@ public class UserInterface { } } - private static void matchCDR3s(String cellFile, String plateFile, Integer lowThreshold, Integer highThreshold){ + private static void matchCDR3s(String cellFile, String plateFile, Integer lowThreshold, Integer highThreshold, + Integer maxOccupancyDifference, Integer minOverlapPercent){ CellFileReader cellReader = new CellFileReader(cellFile); PlateFileReader plateReader = new PlateFileReader(plateFile); Plate plate = new Plate(plateReader.getFilename(), plateReader.getWells()); @@ -448,7 +465,7 @@ public class UserInterface { highThreshold = plate.getSize() - 1; } List cells = cellReader.getCells(); - MatchingResult results = Simulator.matchCDR3s(cells, plate, lowThreshold, highThreshold, false); + MatchingResult results = Simulator.matchCDR3s(cells, plate, lowThreshold, highThreshold, maxOccupancyDifference, minOverlapPercent, false); //result writer MatchingFileWriter writer = new MatchingFileWriter("", results); writer.writeErrorRateToTerminal(); @@ -462,6 +479,8 @@ public class UserInterface { String plateFile = null; Integer lowThreshold = 0; Integer highThreshold = Integer.MAX_VALUE; + Integer maxOccupancyDiff = 96; //no filtering if max difference is all wells by default + Integer minOverlapPercent = 0; //no filtering if min percentage is zero by default try { System.out.println("\nSimulated experiment requires a cell sample file and a sample plate file."); System.out.print("Please enter name of an existing cell sample file: "); @@ -478,6 +497,13 @@ public class UserInterface { } System.out.println("What is the maximum number of alpha/beta overlap wells to attempt matching?"); highThreshold = sc.nextInt(); + System.out.println("What is the maximum difference in alpha/beta occupancy to attempt matching?"); + maxOccupancyDiff = sc.nextInt(); + System.out.println("What is the minimum overlap percentage to attempt matching? (0 - 100)"); + minOverlapPercent = sc.nextInt(); + if (minOverlapPercent < 0 || minOverlapPercent > 100) { + throw new InputMismatchException("Value outside range. Minimum percent set to 0"); + } } catch (InputMismatchException ex) { System.out.println(ex); sc.next(); @@ -499,7 +525,7 @@ public class UserInterface { highThreshold = plate.getSize() - 1; } List cells = cellReader.getCells(); - MatchingResult results = Simulator.matchCDR3s(cells, plate, lowThreshold, highThreshold, true); + MatchingResult results = Simulator.matchCDR3s(cells, plate, lowThreshold, highThreshold, maxOccupancyDiff, minOverlapPercent, true); //result writer MatchingFileWriter writer = new MatchingFileWriter(filename, results); writer.writeResultsToFile(); @@ -518,6 +544,8 @@ public class UserInterface { String plateFile = null; Integer lowThresholdCDR3 = 0; Integer highThresholdCDR3 = Integer.MAX_VALUE; + Integer maxOccupancyDiffCDR3 = 96; //no filtering if max difference is all wells by default + Integer minOverlapPercentCDR3 = 0; //no filtering if min percentage is zero by default Integer lowThresholdCDR1 = 0; Integer highThresholdCDR1 = Integer.MAX_VALUE; boolean outputCDR3Matches = false; @@ -537,6 +565,13 @@ public class UserInterface { } System.out.println("What is the maximum number of CDR3 alpha/beta overlap wells to attempt matching?"); highThresholdCDR3 = sc.nextInt(); + System.out.println("What is the maximum difference in CDR3 alpha/beta occupancy to attempt matching?"); + maxOccupancyDiffCDR3 = sc.nextInt(); + System.out.println("What is the minimum CDR3 overlap percentage to attempt matching? (0 - 100)"); + minOverlapPercentCDR3 = sc.nextInt(); + if (minOverlapPercentCDR3 < 0 || minOverlapPercentCDR3 > 100) { + throw new InputMismatchException("Value outside range. Minimum percent set to 0"); + } System.out.println("What is the minimum number of CDR3/CDR1 overlap wells to attempt matching?"); lowThresholdCDR1 = sc.nextInt(); if(lowThresholdCDR1 < 1){ @@ -583,7 +618,8 @@ public class UserInterface { highThresholdCDR1 = plate.getSize() - 1; } List cells = cellReader.getCells(); - MatchingResult preliminaryResults = Simulator.matchCDR3s(cells, plate, lowThresholdCDR3, highThresholdCDR3, true); + MatchingResult preliminaryResults = Simulator.matchCDR3s(cells, plate, lowThresholdCDR3, highThresholdCDR3, + maxOccupancyDiffCDR3, minOverlapPercentCDR3, true); MatchingResult[] results = Simulator.matchCDR1s(cells, plate, lowThresholdCDR1, highThresholdCDR1, preliminaryResults); MatchingFileWriter writer = new MatchingFileWriter(filename + "_FirstPass", results[0]);