From 906c06062f11a0aedc01db3ef3c804b4f71bc550 Mon Sep 17 00:00:00 2001 From: efischer Date: Tue, 22 Feb 2022 18:36:30 -0600 Subject: [PATCH] Added metadata to MatchingResult to enable CLI options --- src/main/java/BiGpairSEQ.java | 4 +- src/main/java/CommandLineInterface.java | 10 +- src/main/java/InteractiveInterface.java | 2 +- src/main/java/MatchingFileWriter.java | 2 - src/main/java/MatchingResult.java | 42 +- src/main/java/Plate.java | 28 +- src/main/java/PlateFileWriter.java | 30 +- src/main/java/Simulator.java | 669 ++++++++++++------------ 8 files changed, 417 insertions(+), 370 deletions(-) diff --git a/src/main/java/BiGpairSEQ.java b/src/main/java/BiGpairSEQ.java index 2cd066a..72b7176 100644 --- a/src/main/java/BiGpairSEQ.java +++ b/src/main/java/BiGpairSEQ.java @@ -6,7 +6,9 @@ public class BiGpairSEQ { InteractiveInterface.startInteractive(); } else { - CommandLineInterface.startCLI(args); + //This will be uncommented when command line arguments are fixed. + //CommandLineInterface.startCLI(args); + System.out.println("Command line arguments are still being re-implemented."); } } } diff --git a/src/main/java/CommandLineInterface.java b/src/main/java/CommandLineInterface.java index e7e6609..da9efd4 100644 --- a/src/main/java/CommandLineInterface.java +++ b/src/main/java/CommandLineInterface.java @@ -188,13 +188,14 @@ public class CommandLineInterface { CommandLine line = parser.parse(mainOptions, args); if(line.hasOption("match")){ //line = parser.parse(mainOptions, args); - String cellFile = line.getOptionValue("c"); + //String cellFile = line.getOptionValue("c"); + String graphFile = line.getOptionValue("g"); Integer lowThreshold = Integer.valueOf(line.getOptionValue(lowThresh)); Integer highThreshold = Integer.valueOf(line.getOptionValue(highThresh)); Integer occupancyDifference = Integer.valueOf(line.getOptionValue(occDiff)); Integer overlapPercent = Integer.valueOf(line.getOptionValue(overlapPer)); for(String plate: line.getOptionValues("p")) { - matchCDR3s(cellFile, plate, lowThreshold, highThreshold, occupancyDifference, overlapPercent); + matchCDR3s(graphFile, lowThreshold, highThreshold, occupancyDifference, overlapPercent); } } else if(line.hasOption("cells")){ @@ -280,4 +281,9 @@ public class CommandLineInterface { PlateFileWriter writer = new PlateFileWriter(filename, samplePlate); writer.writePlateFile(); } + + private static void matchCDR3s(String graphFile, Integer lowThreshold, Integer highThreshold, + Integer occupancyDifference, Integer overlapPercent) { + + } } diff --git a/src/main/java/InteractiveInterface.java b/src/main/java/InteractiveInterface.java index eceefd4..48853ba 100644 --- a/src/main/java/InteractiveInterface.java +++ b/src/main/java/InteractiveInterface.java @@ -223,7 +223,7 @@ public class InteractiveInterface { System.out.println("No cell sample found."); System.out.println("Returning to main menu."); } - else if(plate.getWells().size() == 0 || plate.getConcentrations().length == 0){ + else if(plate.getWells().size() == 0 || plate.getPopulations().length == 0){ System.out.println("No sample plate found."); System.out.println("Returning to main menu."); } diff --git a/src/main/java/MatchingFileWriter.java b/src/main/java/MatchingFileWriter.java index b797cfe..83fb774 100644 --- a/src/main/java/MatchingFileWriter.java +++ b/src/main/java/MatchingFileWriter.java @@ -7,8 +7,6 @@ import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.StandardOpenOption; import java.util.List; -import java.util.regex.Pattern; - public class MatchingFileWriter { diff --git a/src/main/java/MatchingResult.java b/src/main/java/MatchingResult.java index df87422..6ce7d52 100644 --- a/src/main/java/MatchingResult.java +++ b/src/main/java/MatchingResult.java @@ -1,18 +1,42 @@ import java.time.Duration; +import java.util.ArrayList; import java.util.List; import java.util.Map; public class MatchingResult { - private String sourceFile; - private List comments; - private List headers; - private List> allResults; - private Map matchMap; - private Duration time; + private final String sourceFile; + private final Map metadata; + private final List comments; + private final List headers; + private final List> allResults; + private final Map matchMap; + private final Duration time; - public MatchingResult(String sourceFileName, List comments, List headers, List> allResults, MapmatchMap, Duration time){ + public MatchingResult(String sourceFileName, Map metadata, List headers, + List> allResults, MapmatchMap, Duration time){ this.sourceFile = sourceFileName; - this.comments = comments; + /* + * POSSIBLE KEYS FOR METADATA MAP ARE: + * sample plate filename + * graph filename + * well populations + * total alphas found + * total betas found + * high overlap threshold + * low overlap threshold + * maximum occupancy difference + * minimum overlap percent + * pairing attempt rate + * correct pairing count + * incorrect pairing count + * pairing error rate + * simulation time + */ + this.metadata = metadata; + this.comments = new ArrayList<>(); + for (String key : metadata.keySet()) { + comments.add(key +": " + metadata.get(key)); + } this.headers = headers; this.allResults = allResults; this.matchMap = matchMap; @@ -20,6 +44,8 @@ public class MatchingResult { } + public Map getMetadata() {return metadata;} + public List getComments() { return comments; } diff --git a/src/main/java/Plate.java b/src/main/java/Plate.java index 3a264d7..a65b9bd 100644 --- a/src/main/java/Plate.java +++ b/src/main/java/Plate.java @@ -1,26 +1,28 @@ -import java.util.*; + /* TODO: Implement exponential distribution using inversion method - DONE TODO: Implement discrete frequency distributions using Vose's Alias Method */ +import java.util.*; + public class Plate { private String sourceFile; private List> wells; private Random rand = new Random(); private int size; private double error; - private Integer[] concentrations; + private Integer[] populations; private double stdDev; private double lambda; boolean exponential = false; - public Plate(int size, double error, Integer[] concentrations) { + public Plate(int size, double error, Integer[] populations) { this.size = size; this.error = error; - this.concentrations = concentrations; + this.populations = populations; wells = new ArrayList<>(); } @@ -35,9 +37,9 @@ public class Plate { concentrations.add(w.size()); } } - this.concentrations = new Integer[concentrations.size()]; - for (int i = 0; i < this.concentrations.length; i++) { - this.concentrations[i] = concentrations.get(i); + this.populations = new Integer[concentrations.size()]; + for (int i = 0; i < this.populations.length; i++) { + this.populations[i] = concentrations.get(i); } } @@ -45,7 +47,7 @@ public class Plate { this.lambda = lambda; exponential = true; sourceFile = sourceFileName; - int numSections = concentrations.length; + int numSections = populations.length; int section = 0; double m; int n; @@ -53,7 +55,7 @@ public class Plate { while (section < numSections){ for (int i = 0; i < (size / numSections); i++) { List well = new ArrayList<>(); - for (int j = 0; j < concentrations[section]; j++) { + for (int j = 0; j < populations[section]; j++) { do { //inverse transform sampling: for random number u in [0,1), x = log(1-u) / (-lambda) m = (Math.log10((1 - rand.nextDouble()))/(-lambda)) * Math.sqrt(cells.size()); @@ -84,14 +86,14 @@ public class Plate { public void fillWells(String sourceFileName, List cells, double stdDev) { this.stdDev = stdDev; sourceFile = sourceFileName; - int numSections = concentrations.length; + int numSections = populations.length; int section = 0; double m; int n; while (section < numSections){ for (int i = 0; i < (size / numSections); i++) { List well = new ArrayList<>(); - for (int j = 0; j < concentrations[section]; j++) { + for (int j = 0; j < populations[section]; j++) { do { m = (rand.nextGaussian() * stdDev) + (cells.size() / 2); } while (m >= cells.size() || m < 0); @@ -110,8 +112,8 @@ public class Plate { } } - public Integer[] getConcentrations(){ - return concentrations; + public Integer[] getPopulations(){ + return populations; } public int getSize(){ diff --git a/src/main/java/PlateFileWriter.java b/src/main/java/PlateFileWriter.java index 0a671b3..317a244 100644 --- a/src/main/java/PlateFileWriter.java +++ b/src/main/java/PlateFileWriter.java @@ -7,7 +7,6 @@ import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.StandardOpenOption; import java.util.*; -import java.util.regex.Pattern; public class PlateFileWriter { private int size; @@ -18,7 +17,7 @@ public class PlateFileWriter { private String filename; private String sourceFileName; private String[] headers; - private List concentrations; + private Integer[] concentrations; private boolean isExponential = false; public PlateFileWriter(String filename, Plate plate) { @@ -37,8 +36,8 @@ public class PlateFileWriter { } this.error = plate.getError(); this.wells = plate.getWells(); - this.concentrations = Arrays.asList(plate.getConcentrations()); - concentrations.sort(Comparator.reverseOrder()); + this.concentrations = plate.getPopulations(); + Arrays.sort(concentrations); } public void writePlateFile(){ @@ -59,7 +58,7 @@ public class PlateFileWriter { } } - //this took forever + //this took forever and I don't use it List> rows = new ArrayList<>(); List tmp = new ArrayList<>(); for(int i = 0; i < wellsAsStrings.size(); i++){//List w: wells){ @@ -73,14 +72,19 @@ public class PlateFileWriter { } rows.add(tmp); } - //build string of well concentrations - StringBuilder concen = new StringBuilder(); - for(Integer i: concentrations){ - concen.append(i.toString()); - concen.append(" "); - } - String concenString = concen.toString(); + //get list of well populations + List wellPopulations = Arrays.asList(concentrations); + //make string out of populations list + StringBuilder populationsStringBuilder = new StringBuilder(); + populationsStringBuilder.append(wellPopulations.remove(0).toString()); + for(Integer i: wellPopulations){ + populationsStringBuilder.append(", "); + populationsStringBuilder.append(i.toString()); + } + String wellPopulationsString = populationsStringBuilder.toString(); + + //set CSV format CSVFormat plateFileFormat = CSVFormat.Builder.create() .setCommentMarker('#') .build(); @@ -92,7 +96,7 @@ public class PlateFileWriter { printer.printComment("Each row represents one well on the plate."); printer.printComment("Plate size: " + size); printer.printComment("Error rate: " + error); - printer.printComment("Concentrations: " + concenString); + printer.printComment("Well populations: " + wellPopulationsString); if(isExponential){ printer.printComment("Lambda: " + lambda); } diff --git a/src/main/java/Simulator.java b/src/main/java/Simulator.java index f7fc6ee..abc9e94 100644 --- a/src/main/java/Simulator.java +++ b/src/main/java/Simulator.java @@ -131,10 +131,14 @@ public class Simulator { Instant stop = Instant.now(); Duration time = Duration.between(start, stop); - //return GraphWithMapData object - return new GraphWithMapData(graph, numWells, samplePlate.getConcentrations(), alphaCount, betaCount, + + //create GraphWithMapData object + GraphWithMapData output = new GraphWithMapData(graph, numWells, samplePlate.getPopulations(), alphaCount, betaCount, distCellsMapAlphaKey, plateVtoAMap, plateVtoBMap, plateAtoVMap, plateBtoVMap, alphaWellCounts, betaWellCounts, time); + output.setSourceFilename(samplePlate.getSourceFileName()); + //return GraphWithMapData object + return output; } //match CDR3s. @@ -233,351 +237,356 @@ public class Simulator { allResults.add(result); } - //Metadate comments for CSV file + //Metadata comments for CSV file int min = Math.min(alphaCount, betaCount); double attemptRate = (double) (trueCount + falseCount) / min; BigDecimal attemptRateTrunc = new BigDecimal(attemptRate, mc); double pairingErrorRate = (double) falseCount / (trueCount + falseCount); BigDecimal pairingErrorRateTrunc = new BigDecimal(pairingErrorRate, mc); //get list of well concentrations - List wellConcentrations = Arrays.asList(data.getWellConcentrations()); + List wellPopulations = Arrays.asList(data.getWellConcentrations()); //make string out of concentrations list - StringBuilder concentrationStringBuilder = new StringBuilder(); - for(Integer i: wellConcentrations){ - concentrationStringBuilder.append(i.toString()); - concentrationStringBuilder.append(" "); + StringBuilder populationsStringBuilder = new StringBuilder(); + populationsStringBuilder.append(wellPopulations.remove(0).toString()); + for(Integer i: wellPopulations){ + populationsStringBuilder.append(", "); + populationsStringBuilder.append(i.toString()); } - String concentrationString = concentrationStringBuilder.toString(); - - List comments = new ArrayList<>(); - comments.add("Source Sample Plate filename: " + data.getSourceFilename()); - comments.add("Source Graph and Data filename: " + dataFilename); - comments.add("T cell counts in sample plate wells: " + concentrationString); - comments.add("Total alphas found: " + alphaCount); - comments.add("Total betas found: " + betaCount); - comments.add("High overlap threshold: " + highThreshold); - comments.add("Low overlap threshold: " + lowThreshold); - comments.add("Minimum overlap percent: " + minOverlapPercent); - comments.add("Maximum occupancy difference: " + maxOccupancyDifference); - comments.add("Pairing attempt rate: " + attemptRateTrunc); - comments.add("Correct pairings: " + trueCount); - comments.add("Incorrect pairings: " + falseCount); - comments.add("Pairing error rate: " + pairingErrorRateTrunc); + String wellPopulationsString = populationsStringBuilder.toString(); + //total simulation time Duration time = Duration.between(start, stop); time = time.plus(data.getTime()); - comments.add("Simulation time: " + nf.format(time.toSeconds()) + " seconds"); + + Map metadata = new LinkedHashMap<>(); + metadata.put("sample plate filename", data.getSourceFilename()); + metadata.put("graph filename", dataFilename); + metadata.put("well populations", wellPopulationsString); + metadata.put("total alphas found", alphaCount.toString()); + metadata.put("total betas found", betaCount.toString()); + metadata.put("high overlap threshold", highThreshold.toString()); + metadata.put("low overlap threshold", lowThreshold.toString()); + metadata.put("maximum occupancy difference", maxOccupancyDifference.toString()); + metadata.put("minimum overlap percent", minOverlapPercent.toString()); + metadata.put("pairing attempt rate", attemptRateTrunc.toString()); + metadata.put("correct pairing count", Integer.toString(trueCount)); + metadata.put("incorrect pairing count", Integer.toString(falseCount)); + metadata.put("pairing error rate", pairingErrorRateTrunc.toString()); + metadata.put("simulation time", nf.format(time.toSeconds())); + + MatchingResult output = new MatchingResult(data.getSourceFilename(), metadata, header, allResults, matchMap, time); if(verbose){ - for(String s: comments){ + for(String s: output.getComments()){ System.out.println(s); } } - return new MatchingResult(data.getSourceFilename(), comments, header, allResults, matchMap, time); - } - - //Simulated matching of CDR1s to CDR3s. Requires MatchingResult from prior run of matchCDR3s. - public static MatchingResult[] matchCDR1s(List distinctCells, - Plate samplePlate, Integer lowThreshold, - Integer highThreshold, MatchingResult priorResult){ - Instant start = Instant.now(); - Duration previousTime = priorResult.getTime(); - Map previousMatches = priorResult.getMatchMap(); - int numWells = samplePlate.getSize(); - int[] cdr3Indices = {cdr3AlphaIndex, cdr3BetaIndex}; - int[] cdr1Indices = {cdr1AlphaIndex, cdr1BetaIndex}; - - System.out.println("Making previous match maps"); - Map cdr3AtoBMap = previousMatches; - Map cdr3BtoAMap = invertVertexMap(cdr3AtoBMap); - System.out.println("Previous match maps made"); - - System.out.println("Making cell maps"); - Map alphaCDR3toCDR1Map = makeSequenceToSequenceMap(distinctCells, cdr3AlphaIndex, cdr1AlphaIndex); - Map betaCDR3toCDR1Map = makeSequenceToSequenceMap(distinctCells, cdr3BetaIndex, cdr1BetaIndex); - System.out.println("Cell maps made"); - - System.out.println("Making well maps"); - Map allCDR3s = samplePlate.assayWellsSequenceS(cdr3Indices); - Map allCDR1s = samplePlate.assayWellsSequenceS(cdr1Indices); - int CDR3Count = allCDR3s.size(); - System.out.println("all CDR3s count: " + CDR3Count); - int CDR1Count = allCDR1s.size(); - System.out.println("all CDR1s count: " + CDR1Count); - System.out.println("Well maps made"); - - System.out.println("Removing unpaired CDR3s from well maps"); - List unpairedCDR3s = new ArrayList<>(); - for(Integer i: allCDR3s.keySet()){ - if(!(cdr3AtoBMap.containsKey(i) || cdr3BtoAMap.containsKey(i))){ - unpairedCDR3s.add(i); - } - } - for(Integer i: unpairedCDR3s){ - allCDR3s.remove(i); - } - System.out.println("Unpaired CDR3s removed."); - System.out.println("Remaining CDR3 count: " + allCDR3s.size()); - - System.out.println("Removing below-minimum-overlap-threshold and saturating-occupancy CDR1s"); - filterByOccupancyThreshold(allCDR1s, lowThreshold, numWells - 1); - System.out.println("CDR1s removed."); - System.out.println("Remaining CDR1 count: " + allCDR1s.size()); - - System.out.println("Making vertex maps"); - - //For the SimpleWeightedBipartiteGraphMatrixGenerator, all vertices must have - // distinct numbers associated with them. Since I'm using a 2D array, that means - // distinct indices between the rows and columns. vertexStartValue lets me track where I switch - // from numbering rows to columns, so I can assign unique numbers to every vertex, and then - // subtract the vertexStartValue from CDR1s to use their vertex labels as array indices - Integer vertexStartValue = 0; - //keys are sequential integer vertices, values are CDR3s - Map plateVtoCDR3Map = makeVertexToSequenceMap(allCDR3s, vertexStartValue); - //New start value for vertex to CDR1 map should be one more than final vertex value in CDR3 map - vertexStartValue += plateVtoCDR3Map.size(); - //keys are sequential integers vertices, values are CDR1s - Map plateVtoCDR1Map = makeVertexToSequenceMap(allCDR1s, vertexStartValue); - //keys are CDR3s, values are sequential integer vertices from previous map - Map plateCDR3toVMap = invertVertexMap(plateVtoCDR3Map); - //keys are CDR1s, values are sequential integer vertices from previous map - Map plateCDR1toVMap = invertVertexMap(plateVtoCDR1Map); - System.out.println("Vertex maps made"); - - System.out.println("Creating adjacency matrix"); - //Count how many wells each CDR3 appears in - Map cdr3WellCounts = new HashMap<>(); - //count how many wells each CDR1 appears in - Map cdr1WellCounts = new HashMap<>(); - //add edges, where weights are number of wells the peptides share in common. - //If this is too slow, can make a 2d array and use the SimpleWeightedGraphMatrixGenerator class - Map wellNCDR3s = null; - Map wellNCDR1s = null; - double[][] weights = new double[plateVtoCDR3Map.size()][plateVtoCDR1Map.size()]; - countSequencesAndFillMatrix(samplePlate, allCDR3s, allCDR1s, plateCDR3toVMap, plateCDR1toVMap, - cdr3Indices, cdr1Indices, cdr3WellCounts, cdr1WellCounts, weights); - System.out.println("Matrix created"); - - System.out.println("Creating graph"); - SimpleWeightedGraph graph = - new SimpleWeightedGraph<>(DefaultWeightedEdge.class); - - SimpleWeightedBipartiteGraphMatrixGenerator graphGenerator = new SimpleWeightedBipartiteGraphMatrixGenerator(); - List cdr3Vertices = new ArrayList<>(plateVtoCDR3Map.keySet()); //This will work because LinkedHashMap preserves order of entry - graphGenerator.first(cdr3Vertices); - List cdr1Vertices = new ArrayList<>(plateVtoCDR1Map.keySet()); - graphGenerator.second(cdr1Vertices); //This will work because LinkedHashMap preserves order of entry - graphGenerator.weights(weights); - graphGenerator.generateGraph(graph); - System.out.println("Graph created"); - - System.out.println("Removing edges outside of weight thresholds"); - filterByOccupancyThreshold(graph, lowThreshold, highThreshold); - System.out.println("Over- and under-weight edges set to 0.0"); - - System.out.println("Finding first maximum weighted matching"); - MaximumWeightBipartiteMatching firstMaxWeightMatching = - new MaximumWeightBipartiteMatching(graph, plateVtoCDR3Map.keySet(), plateVtoCDR1Map.keySet()); - MatchingAlgorithm.Matching graphMatching = firstMaxWeightMatching.getMatching(); - System.out.println("First maximum weighted matching found"); - - - //first processing run - Map firstMatchCDR3toCDR1Map = new HashMap<>(); - Iterator weightIter = graphMatching.iterator(); - DefaultWeightedEdge e; - while(weightIter.hasNext()){ - e = weightIter.next(); -// if(graph.getEdgeWeight(e) < lowThreshold || graph.getEdgeWeight(e) > highThreshold) { -// continue; -// } - Integer source = graph.getEdgeSource(e); - Integer target = graph.getEdgeTarget(e); - firstMatchCDR3toCDR1Map.put(plateVtoCDR3Map.get(source), plateVtoCDR1Map.get(target)); - } - System.out.println("First pass matches: " + firstMatchCDR3toCDR1Map.size()); - - System.out.println("Removing edges from first maximum weighted matching"); - //zero out the edge weights in the matching - weightIter = graphMatching.iterator(); - while(weightIter.hasNext()){ - graph.removeEdge(weightIter.next()); - } - System.out.println("Edges removed"); - - //Generate a new matching - System.out.println("Finding second maximum weighted matching"); - MaximumWeightBipartiteMatching secondMaxWeightMatching = - new MaximumWeightBipartiteMatching(graph, plateVtoCDR3Map.keySet(), plateVtoCDR1Map.keySet()); - graphMatching = secondMaxWeightMatching.getMatching(); - System.out.println("Second maximum weighted matching found"); - - - //second processing run - Map secondMatchCDR3toCDR1Map = new HashMap<>(); - weightIter = graphMatching.iterator(); - while(weightIter.hasNext()){ - e = weightIter.next(); -// if(graph.getEdgeWeight(e) < lowThreshold || graph.getEdgeWeight(e) > highThreshold) { -// continue; -// } - Integer source = graph.getEdgeSource(e); -// if(!(CDR3AtoBMap.containsKey(source) || CDR3BtoAMap.containsKey(source))){ -// continue; -// } - Integer target = graph.getEdgeTarget(e); - secondMatchCDR3toCDR1Map.put(plateVtoCDR3Map.get(source), plateVtoCDR1Map.get(target)); - } - System.out.println("Second pass matches: " + secondMatchCDR3toCDR1Map.size()); - - System.out.println("Mapping first pass CDR3 alpha/beta pairs"); - //get linked map for first matching attempt - Map firstMatchesMap = new LinkedHashMap<>(); - for(Integer alphaCDR3: cdr3AtoBMap.keySet()) { - if (!(firstMatchCDR3toCDR1Map.containsKey(alphaCDR3))) { - continue; - } - Integer betaCDR3 = cdr3AtoBMap.get(alphaCDR3); - if (!(firstMatchCDR3toCDR1Map.containsKey(betaCDR3))) { - continue; - } - firstMatchesMap.put(alphaCDR3, firstMatchCDR3toCDR1Map.get(alphaCDR3)); - firstMatchesMap.put(betaCDR3, firstMatchCDR3toCDR1Map.get(betaCDR3)); - } - System.out.println("First pass CDR3 alpha/beta pairs mapped"); - - System.out.println("Mapping second pass CDR3 alpha/beta pairs."); - System.out.println("Finding CDR3 pairs that swapped CDR1 matches between first pass and second pass."); - //Look for matches that simply swapped already-matched alpha and beta CDR3s - Map dualMatchesMap = new LinkedHashMap<>(); - for(Integer alphaCDR3: cdr3AtoBMap.keySet()) { - if (!(firstMatchCDR3toCDR1Map.containsKey(alphaCDR3) && secondMatchCDR3toCDR1Map.containsKey(alphaCDR3))) { - continue; - } - Integer betaCDR3 = cdr3AtoBMap.get(alphaCDR3); - if (!(firstMatchCDR3toCDR1Map.containsKey(betaCDR3) && secondMatchCDR3toCDR1Map.containsKey(betaCDR3))) { - continue; - } - if(firstMatchCDR3toCDR1Map.get(alphaCDR3).equals(secondMatchCDR3toCDR1Map.get(betaCDR3))){ - if(firstMatchCDR3toCDR1Map.get(betaCDR3).equals(secondMatchCDR3toCDR1Map.get(alphaCDR3))){ - dualMatchesMap.put(alphaCDR3, firstMatchCDR3toCDR1Map.get(alphaCDR3)); - dualMatchesMap.put(betaCDR3, firstMatchCDR3toCDR1Map.get(betaCDR3)); - } - } - } - System.out.println("Second pass mapping made. Dual CDR3/CDR1 pairings found."); - - Instant stop = Instant.now(); - //results for first map - System.out.println("RESULTS FOR FIRST PASS MATCHING"); - List> allResults = new ArrayList<>(); - Integer trueCount = 0; - Iterator iter = firstMatchesMap.keySet().iterator(); - - while(iter.hasNext()){ - Boolean proven = false; - List tmp = new ArrayList<>(); - tmp.add(iter.next().toString()); - tmp.add(iter.next().toString()); - tmp.add(firstMatchesMap.get(Integer.valueOf(tmp.get(0))).toString()); - tmp.add(firstMatchesMap.get(Integer.valueOf(tmp.get(1))).toString()); - if(alphaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(0))).equals(Integer.valueOf(tmp.get(2)))){ - if(betaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(1))).equals(Integer.valueOf(tmp.get(3)))){ - proven = true; - } - } - else if(alphaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(0))).equals(Integer.valueOf(tmp.get(3)))){ - if(betaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(1))).equals(Integer.valueOf(tmp.get(2)))){ - proven = true; - } - } - tmp.add(proven.toString()); - allResults.add(tmp); - if(proven){ - trueCount++; - } - } - - List comments = new ArrayList<>(); - comments.add("Plate size: " + samplePlate.getSize() + " wells"); - comments.add("Previous pairs found: " + previousMatches.size()); - comments.add("CDR1 matches attempted: " + allResults.size()); - double attemptRate = (double) allResults.size() / previousMatches.size(); - comments.add("Matching attempt rate: " + attemptRate); - comments.add("Number of correct matches: " + trueCount); - double correctRate = (double) trueCount / allResults.size(); - comments.add("Correct matching rate: " + correctRate); - NumberFormat nf = NumberFormat.getInstance(Locale.US); - Duration time = Duration.between(start, stop); - time = time.plus(previousTime); - comments.add("Simulation time: " + nf.format(time.toSeconds()) + " seconds"); - for(String s: comments){ - System.out.println(s); - } - - - - List headers = new ArrayList<>(); - headers.add("CDR3 alpha"); - headers.add("CDR3 beta"); - headers.add("first matched CDR1"); - headers.add("second matched CDR1"); - headers.add("Correct match?"); - - MatchingResult firstTest = new MatchingResult(samplePlate.getSourceFileName(), - comments, headers, allResults, dualMatchesMap, time); - - //results for dual map - System.out.println("RESULTS FOR SECOND PASS MATCHING"); - allResults = new ArrayList<>(); - trueCount = 0; - iter = dualMatchesMap.keySet().iterator(); - while(iter.hasNext()){ - Boolean proven = false; - List tmp = new ArrayList<>(); - tmp.add(iter.next().toString()); - tmp.add(iter.next().toString()); - tmp.add(dualMatchesMap.get(Integer.valueOf(tmp.get(0))).toString()); - tmp.add(dualMatchesMap.get(Integer.valueOf(tmp.get(1))).toString()); - if(alphaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(0))).equals(Integer.valueOf(tmp.get(2)))){ - if(betaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(1))).equals(Integer.valueOf(tmp.get(3)))){ - proven = true; - } - } - else if(alphaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(0))).equals(Integer.valueOf(tmp.get(3)))){ - if(betaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(1))).equals(Integer.valueOf(tmp.get(2)))){ - proven = true; - } - } - tmp.add(proven.toString()); - allResults.add(tmp); - if(proven){ - trueCount++; - } - } - - comments = new ArrayList<>(); - comments.add("Plate size: " + samplePlate.getSize() + " wells"); - comments.add("Previous pairs found: " + previousMatches.size()); - comments.add("High overlap threshold: " + highThreshold); - comments.add("Low overlap threshold: " + lowThreshold); - comments.add("CDR1 matches attempted: " + allResults.size()); - attemptRate = (double) allResults.size() / previousMatches.size(); - comments.add("Matching attempt rate: " + attemptRate); - comments.add("Number of correct matches: " + trueCount); - correctRate = (double) trueCount / allResults.size(); - comments.add("Correct matching rate: " + correctRate); - comments.add("Simulation time: " + nf.format(time.toSeconds()) + " seconds"); - - for(String s: comments){ - System.out.println(s); - } - - System.out.println("Simulation time: " + nf.format(time.toSeconds()) + " seconds"); - MatchingResult dualTest = new MatchingResult(samplePlate.getSourceFileName(), comments, headers, - allResults, dualMatchesMap, time); - MatchingResult[] output = {firstTest, dualTest}; return output; } + //Commented out CDR1 matching until it's time to re-implement it +// //Simulated matching of CDR1s to CDR3s. Requires MatchingResult from prior run of matchCDR3s. +// public static MatchingResult[] matchCDR1s(List distinctCells, +// Plate samplePlate, Integer lowThreshold, +// Integer highThreshold, MatchingResult priorResult){ +// Instant start = Instant.now(); +// Duration previousTime = priorResult.getTime(); +// Map previousMatches = priorResult.getMatchMap(); +// int numWells = samplePlate.getSize(); +// int[] cdr3Indices = {cdr3AlphaIndex, cdr3BetaIndex}; +// int[] cdr1Indices = {cdr1AlphaIndex, cdr1BetaIndex}; +// +// System.out.println("Making previous match maps"); +// Map cdr3AtoBMap = previousMatches; +// Map cdr3BtoAMap = invertVertexMap(cdr3AtoBMap); +// System.out.println("Previous match maps made"); +// +// System.out.println("Making cell maps"); +// Map alphaCDR3toCDR1Map = makeSequenceToSequenceMap(distinctCells, cdr3AlphaIndex, cdr1AlphaIndex); +// Map betaCDR3toCDR1Map = makeSequenceToSequenceMap(distinctCells, cdr3BetaIndex, cdr1BetaIndex); +// System.out.println("Cell maps made"); +// +// System.out.println("Making well maps"); +// Map allCDR3s = samplePlate.assayWellsSequenceS(cdr3Indices); +// Map allCDR1s = samplePlate.assayWellsSequenceS(cdr1Indices); +// int CDR3Count = allCDR3s.size(); +// System.out.println("all CDR3s count: " + CDR3Count); +// int CDR1Count = allCDR1s.size(); +// System.out.println("all CDR1s count: " + CDR1Count); +// System.out.println("Well maps made"); +// +// System.out.println("Removing unpaired CDR3s from well maps"); +// List unpairedCDR3s = new ArrayList<>(); +// for(Integer i: allCDR3s.keySet()){ +// if(!(cdr3AtoBMap.containsKey(i) || cdr3BtoAMap.containsKey(i))){ +// unpairedCDR3s.add(i); +// } +// } +// for(Integer i: unpairedCDR3s){ +// allCDR3s.remove(i); +// } +// System.out.println("Unpaired CDR3s removed."); +// System.out.println("Remaining CDR3 count: " + allCDR3s.size()); +// +// System.out.println("Removing below-minimum-overlap-threshold and saturating-occupancy CDR1s"); +// filterByOccupancyThreshold(allCDR1s, lowThreshold, numWells - 1); +// System.out.println("CDR1s removed."); +// System.out.println("Remaining CDR1 count: " + allCDR1s.size()); +// +// System.out.println("Making vertex maps"); +// +// //For the SimpleWeightedBipartiteGraphMatrixGenerator, all vertices must have +// // distinct numbers associated with them. Since I'm using a 2D array, that means +// // distinct indices between the rows and columns. vertexStartValue lets me track where I switch +// // from numbering rows to columns, so I can assign unique numbers to every vertex, and then +// // subtract the vertexStartValue from CDR1s to use their vertex labels as array indices +// Integer vertexStartValue = 0; +// //keys are sequential integer vertices, values are CDR3s +// Map plateVtoCDR3Map = makeVertexToSequenceMap(allCDR3s, vertexStartValue); +// //New start value for vertex to CDR1 map should be one more than final vertex value in CDR3 map +// vertexStartValue += plateVtoCDR3Map.size(); +// //keys are sequential integers vertices, values are CDR1s +// Map plateVtoCDR1Map = makeVertexToSequenceMap(allCDR1s, vertexStartValue); +// //keys are CDR3s, values are sequential integer vertices from previous map +// Map plateCDR3toVMap = invertVertexMap(plateVtoCDR3Map); +// //keys are CDR1s, values are sequential integer vertices from previous map +// Map plateCDR1toVMap = invertVertexMap(plateVtoCDR1Map); +// System.out.println("Vertex maps made"); +// +// System.out.println("Creating adjacency matrix"); +// //Count how many wells each CDR3 appears in +// Map cdr3WellCounts = new HashMap<>(); +// //count how many wells each CDR1 appears in +// Map cdr1WellCounts = new HashMap<>(); +// //add edges, where weights are number of wells the peptides share in common. +// //If this is too slow, can make a 2d array and use the SimpleWeightedGraphMatrixGenerator class +// Map wellNCDR3s = null; +// Map wellNCDR1s = null; +// double[][] weights = new double[plateVtoCDR3Map.size()][plateVtoCDR1Map.size()]; +// countSequencesAndFillMatrix(samplePlate, allCDR3s, allCDR1s, plateCDR3toVMap, plateCDR1toVMap, +// cdr3Indices, cdr1Indices, cdr3WellCounts, cdr1WellCounts, weights); +// System.out.println("Matrix created"); +// +// System.out.println("Creating graph"); +// SimpleWeightedGraph graph = +// new SimpleWeightedGraph<>(DefaultWeightedEdge.class); +// +// SimpleWeightedBipartiteGraphMatrixGenerator graphGenerator = new SimpleWeightedBipartiteGraphMatrixGenerator(); +// List cdr3Vertices = new ArrayList<>(plateVtoCDR3Map.keySet()); //This will work because LinkedHashMap preserves order of entry +// graphGenerator.first(cdr3Vertices); +// List cdr1Vertices = new ArrayList<>(plateVtoCDR1Map.keySet()); +// graphGenerator.second(cdr1Vertices); //This will work because LinkedHashMap preserves order of entry +// graphGenerator.weights(weights); +// graphGenerator.generateGraph(graph); +// System.out.println("Graph created"); +// +// System.out.println("Removing edges outside of weight thresholds"); +// filterByOccupancyThreshold(graph, lowThreshold, highThreshold); +// System.out.println("Over- and under-weight edges set to 0.0"); +// +// System.out.println("Finding first maximum weighted matching"); +// MaximumWeightBipartiteMatching firstMaxWeightMatching = +// new MaximumWeightBipartiteMatching(graph, plateVtoCDR3Map.keySet(), plateVtoCDR1Map.keySet()); +// MatchingAlgorithm.Matching graphMatching = firstMaxWeightMatching.getMatching(); +// System.out.println("First maximum weighted matching found"); +// +// +// //first processing run +// Map firstMatchCDR3toCDR1Map = new HashMap<>(); +// Iterator weightIter = graphMatching.iterator(); +// DefaultWeightedEdge e; +// while(weightIter.hasNext()){ +// e = weightIter.next(); +//// if(graph.getEdgeWeight(e) < lowThreshold || graph.getEdgeWeight(e) > highThreshold) { +//// continue; +//// } +// Integer source = graph.getEdgeSource(e); +// Integer target = graph.getEdgeTarget(e); +// firstMatchCDR3toCDR1Map.put(plateVtoCDR3Map.get(source), plateVtoCDR1Map.get(target)); +// } +// System.out.println("First pass matches: " + firstMatchCDR3toCDR1Map.size()); +// +// System.out.println("Removing edges from first maximum weighted matching"); +// //zero out the edge weights in the matching +// weightIter = graphMatching.iterator(); +// while(weightIter.hasNext()){ +// graph.removeEdge(weightIter.next()); +// } +// System.out.println("Edges removed"); +// +// //Generate a new matching +// System.out.println("Finding second maximum weighted matching"); +// MaximumWeightBipartiteMatching secondMaxWeightMatching = +// new MaximumWeightBipartiteMatching(graph, plateVtoCDR3Map.keySet(), plateVtoCDR1Map.keySet()); +// graphMatching = secondMaxWeightMatching.getMatching(); +// System.out.println("Second maximum weighted matching found"); +// +// +// //second processing run +// Map secondMatchCDR3toCDR1Map = new HashMap<>(); +// weightIter = graphMatching.iterator(); +// while(weightIter.hasNext()){ +// e = weightIter.next(); +//// if(graph.getEdgeWeight(e) < lowThreshold || graph.getEdgeWeight(e) > highThreshold) { +//// continue; +//// } +// Integer source = graph.getEdgeSource(e); +//// if(!(CDR3AtoBMap.containsKey(source) || CDR3BtoAMap.containsKey(source))){ +//// continue; +//// } +// Integer target = graph.getEdgeTarget(e); +// secondMatchCDR3toCDR1Map.put(plateVtoCDR3Map.get(source), plateVtoCDR1Map.get(target)); +// } +// System.out.println("Second pass matches: " + secondMatchCDR3toCDR1Map.size()); +// +// System.out.println("Mapping first pass CDR3 alpha/beta pairs"); +// //get linked map for first matching attempt +// Map firstMatchesMap = new LinkedHashMap<>(); +// for(Integer alphaCDR3: cdr3AtoBMap.keySet()) { +// if (!(firstMatchCDR3toCDR1Map.containsKey(alphaCDR3))) { +// continue; +// } +// Integer betaCDR3 = cdr3AtoBMap.get(alphaCDR3); +// if (!(firstMatchCDR3toCDR1Map.containsKey(betaCDR3))) { +// continue; +// } +// firstMatchesMap.put(alphaCDR3, firstMatchCDR3toCDR1Map.get(alphaCDR3)); +// firstMatchesMap.put(betaCDR3, firstMatchCDR3toCDR1Map.get(betaCDR3)); +// } +// System.out.println("First pass CDR3 alpha/beta pairs mapped"); +// +// System.out.println("Mapping second pass CDR3 alpha/beta pairs."); +// System.out.println("Finding CDR3 pairs that swapped CDR1 matches between first pass and second pass."); +// //Look for matches that simply swapped already-matched alpha and beta CDR3s +// Map dualMatchesMap = new LinkedHashMap<>(); +// for(Integer alphaCDR3: cdr3AtoBMap.keySet()) { +// if (!(firstMatchCDR3toCDR1Map.containsKey(alphaCDR3) && secondMatchCDR3toCDR1Map.containsKey(alphaCDR3))) { +// continue; +// } +// Integer betaCDR3 = cdr3AtoBMap.get(alphaCDR3); +// if (!(firstMatchCDR3toCDR1Map.containsKey(betaCDR3) && secondMatchCDR3toCDR1Map.containsKey(betaCDR3))) { +// continue; +// } +// if(firstMatchCDR3toCDR1Map.get(alphaCDR3).equals(secondMatchCDR3toCDR1Map.get(betaCDR3))){ +// if(firstMatchCDR3toCDR1Map.get(betaCDR3).equals(secondMatchCDR3toCDR1Map.get(alphaCDR3))){ +// dualMatchesMap.put(alphaCDR3, firstMatchCDR3toCDR1Map.get(alphaCDR3)); +// dualMatchesMap.put(betaCDR3, firstMatchCDR3toCDR1Map.get(betaCDR3)); +// } +// } +// } +// System.out.println("Second pass mapping made. Dual CDR3/CDR1 pairings found."); +// +// Instant stop = Instant.now(); +// //results for first map +// System.out.println("RESULTS FOR FIRST PASS MATCHING"); +// List> allResults = new ArrayList<>(); +// Integer trueCount = 0; +// Iterator iter = firstMatchesMap.keySet().iterator(); +// +// while(iter.hasNext()){ +// Boolean proven = false; +// List tmp = new ArrayList<>(); +// tmp.add(iter.next().toString()); +// tmp.add(iter.next().toString()); +// tmp.add(firstMatchesMap.get(Integer.valueOf(tmp.get(0))).toString()); +// tmp.add(firstMatchesMap.get(Integer.valueOf(tmp.get(1))).toString()); +// if(alphaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(0))).equals(Integer.valueOf(tmp.get(2)))){ +// if(betaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(1))).equals(Integer.valueOf(tmp.get(3)))){ +// proven = true; +// } +// } +// else if(alphaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(0))).equals(Integer.valueOf(tmp.get(3)))){ +// if(betaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(1))).equals(Integer.valueOf(tmp.get(2)))){ +// proven = true; +// } +// } +// tmp.add(proven.toString()); +// allResults.add(tmp); +// if(proven){ +// trueCount++; +// } +// } +// +// List comments = new ArrayList<>(); +// comments.add("Plate size: " + samplePlate.getSize() + " wells"); +// comments.add("Previous pairs found: " + previousMatches.size()); +// comments.add("CDR1 matches attempted: " + allResults.size()); +// double attemptRate = (double) allResults.size() / previousMatches.size(); +// comments.add("Matching attempt rate: " + attemptRate); +// comments.add("Number of correct matches: " + trueCount); +// double correctRate = (double) trueCount / allResults.size(); +// comments.add("Correct matching rate: " + correctRate); +// NumberFormat nf = NumberFormat.getInstance(Locale.US); +// Duration time = Duration.between(start, stop); +// time = time.plus(previousTime); +// comments.add("Simulation time: " + nf.format(time.toSeconds()) + " seconds"); +// for(String s: comments){ +// System.out.println(s); +// } +// +// +// +// List headers = new ArrayList<>(); +// headers.add("CDR3 alpha"); +// headers.add("CDR3 beta"); +// headers.add("first matched CDR1"); +// headers.add("second matched CDR1"); +// headers.add("Correct match?"); +// +// MatchingResult firstTest = new MatchingResult(samplePlate.getSourceFileName(), +// comments, headers, allResults, dualMatchesMap, time); +// +// //results for dual map +// System.out.println("RESULTS FOR SECOND PASS MATCHING"); +// allResults = new ArrayList<>(); +// trueCount = 0; +// iter = dualMatchesMap.keySet().iterator(); +// while(iter.hasNext()){ +// Boolean proven = false; +// List tmp = new ArrayList<>(); +// tmp.add(iter.next().toString()); +// tmp.add(iter.next().toString()); +// tmp.add(dualMatchesMap.get(Integer.valueOf(tmp.get(0))).toString()); +// tmp.add(dualMatchesMap.get(Integer.valueOf(tmp.get(1))).toString()); +// if(alphaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(0))).equals(Integer.valueOf(tmp.get(2)))){ +// if(betaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(1))).equals(Integer.valueOf(tmp.get(3)))){ +// proven = true; +// } +// } +// else if(alphaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(0))).equals(Integer.valueOf(tmp.get(3)))){ +// if(betaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(1))).equals(Integer.valueOf(tmp.get(2)))){ +// proven = true; +// } +// } +// tmp.add(proven.toString()); +// allResults.add(tmp); +// if(proven){ +// trueCount++; +// } +// } +// +// comments = new ArrayList<>(); +// comments.add("Plate size: " + samplePlate.getSize() + " wells"); +// comments.add("Previous pairs found: " + previousMatches.size()); +// comments.add("High overlap threshold: " + highThreshold); +// comments.add("Low overlap threshold: " + lowThreshold); +// comments.add("CDR1 matches attempted: " + allResults.size()); +// attemptRate = (double) allResults.size() / previousMatches.size(); +// comments.add("Matching attempt rate: " + attemptRate); +// comments.add("Number of correct matches: " + trueCount); +// correctRate = (double) trueCount / allResults.size(); +// comments.add("Correct matching rate: " + correctRate); +// comments.add("Simulation time: " + nf.format(time.toSeconds()) + " seconds"); +// +// for(String s: comments){ +// System.out.println(s); +// } +// +// System.out.println("Simulation time: " + nf.format(time.toSeconds()) + " seconds"); +// MatchingResult dualTest = new MatchingResult(samplePlate.getSourceFileName(), comments, headers, +// allResults, dualMatchesMap, time); +// MatchingResult[] output = {firstTest, dualTest}; +// return output; +// } + //Counts the well occupancy of the row peptides and column peptides into given maps, and //fills weights in the given 2D array