diff --git a/src/main/java/Plate.java b/src/main/java/Plate.java index a33c769..2da18b3 100644 --- a/src/main/java/Plate.java +++ b/src/main/java/Plate.java @@ -1,6 +1,8 @@ import java.util.*; -//Need to write function to output plate to a file that I can read in. +/* +TODO: Implement discrete frequency distributions using Vose's Alias Method + */ public class Plate { private List> wells; @@ -28,8 +30,6 @@ public class Plate { int section = 0; double m; int n; - //testing - //System.out.println("Cell size: " + cells.get(0).length); while (section < numSections){ for (int i = 0; i < (size / numSections); i++) { List well = new ArrayList<>(); @@ -52,11 +52,6 @@ public class Plate { } } - public void writePlateToFile(String filename) { - - - } - public Integer[] getConcentrations(){ return concentrations; } @@ -77,142 +72,32 @@ public class Plate { return wells; } - - //returns a map of counts of all the CDR3s (alphas and betas) in all wells - public MapassayWellsCDR3(){ - return this.assayWellsCDR3(0, size); - } - //returns a map of counts of all the CDR3 alphas in all wells - public Map assayWellsCDR3Alpha() { - return this.assayWellsCDR3Alpha(0, size); - } - //returns a map of counts of all the CDR3 betas in all wells - public Map assayWellsCDR3Beta() { - return this.assayWellsCDR3Beta(0, size); - } - //returns a map of counts of all CDR1s (alphas and betas) in all wells - public Map assayWellsCDR1(){ - return this.assayWellsCDR1(0, size); - } - //returns a map of counts of all the CDR1 alphas in all wells - public Map assayWellsCDR1Alpha() { - return this.assayWellsCDR1Alpha(0, size); - } - //returns a map of counts of all the CDR1 betas in all wells - public Map assayWellsCDR1Beta() { - return this.assayWellsCDR1Beta(0, size); + //returns a map of the counts of the peptide at cell index pIndex, in all wells + public Map assayWellsPeptideP(int... pIndices){ + return this.assayWellsPeptideP(0, size, pIndices); } - //returns a map of counts of the CDR3s (alphas and betas) in a specific well - public MapassayWellsCDR3(int n){ - return this.assayWellsCDR3(n, n+1); - } - //returns a map of counts of the CDR1s (alphas and betas) in a specific well - public Map assayWellsCDR1(int n){ - return this.assayWellsCDR1(n, n+1); - } - //returns a map of counts of the CDR3 alphas in a specific well - public Map assayWellsCDR3Alpha(int n) { - return this.assayWellsCDR3Alpha(n, n+1); - } - //returns a map of counts of the CDR3 betas in a specific well - public Map assayWellsCDR3Beta(int n) { - return this.assayWellsCDR3Beta(n, n+1); - } - //returns a map of counts of the CDR1 alphas in a specific well - public Map assayWellsCDR1Alpha(int n) { - return this.assayWellsCDR1Alpha(n, n+1); - } - //returns a map of counts of the CDR1 betas in a specific well - public Map assayWellsCDR1Beta(int n) { - return this.assayWellsCDR1Beta(n, n+1); - } + //returns a map of the counts of the peptide at cell index pIndex, in a specific well + public Map assayWellsPeptideP(int n, int... pIndices) { return this.assayWellsPeptideP(n, n+1, pIndices);} - - //returns a map of the counts of the CDR3s (alphas and betas) in a range of wells - public MapassayWellsCDR3(int start, int end){ + //returns a map of the counts of the peptide at cell index pIndex, in a range of wells + public Map assayWellsPeptideP(int start, int end, int... pIndices) { Map assay = new HashMap<>(); - for(int i = start; i < end; i++){ - countCDR3Alphas(assay, wells.get(i)); - countCDR3Betas(assay,wells.get(i)); + for(int pIndex: pIndices){ + for(int i = start; i < end; i++){ + countPeptides(assay, wells.get(i), pIndex); + } } return assay; } - //returns a map of the counts of the CDR1s (alphas and betas) in a range of wells - public MapassayWellsCDR1(int start, int end){ - Map assay = new HashMap<>(); - for(int i = start; i < end; i++){ - countCDR1Alphas(assay, wells.get(i)); - countCDR1Betas(assay,wells.get(i)); - } - return assay; - } - //returns a map of the counts of the CDR3 alphas in a range of wells - public Map assayWellsCDR3Alpha(int start, int end) { - Map assay = new HashMap<>(); - for(int i = start; i < end; i++){ - countCDR3Alphas(assay, wells.get(i)); - } - return assay; - } - //returns a map of the counts of the CDR3 betas in a range of wells - public Map assayWellsCDR3Beta(int start, int end) { - Map assay = new HashMap<>(); - for(int i = start; i < end; i++){ - countCDR3Betas(assay, wells.get(i)); - } - return assay; - } - //returns a map of the counts of the CDR1 alphas in a range of wells - public Map assayWellsCDR1Alpha(int start, int end) { - Map assay = new HashMap<>(); - for(int i = start; i < end; i++){ - countCDR1Alphas(assay, wells.get(i)); - } - return assay; - } - //returns a map of the counts of the CDR1 betas in a range of wells - public Map assayWellsCDR1Beta(int start, int end) { - Map assay = new HashMap<>(); - for(int i = start; i < end; i++){ - countCDR1Betas(assay, wells.get(i)); - } - return assay; - } - - - //given a map, counts distinct CDR3 alphas in a well - private void countCDR3Alphas(Map wellMap, List well){ + //For the peptides at cell indices pIndices, counts number of unique peptides in the given well into the given map + private void countPeptides(Map wellMap, List well, int... pIndices) { for(Integer[] cell : well) { - if(cell[0] != -1){ - //keys are alphas, value is how many of them have been assayed - wellMap.merge(cell[0], 1, (oldValue, newValue) -> oldValue + newValue); + for(int pIndex: pIndices){ + if(cell[pIndex] != -1){ + wellMap.merge(cell[pIndex], 1, (oldValue, newValue) -> oldValue + newValue); + } } } } - //given a map, counts distinct CDR3 betas in a well - private void countCDR3Betas(Map wellMap, List well){ - for(Integer[] cell : well) { - if(cell[1] != -1){ - wellMap.merge(cell[1], 1, (oldValue, newValue) -> oldValue + newValue); - } - } - } - //given a map, counts distinct CDR1 alphas in a well - private void countCDR1Alphas(Map wellMap, List well){ - for(Integer[] cell: well){ - if(cell[2] != -1){ - wellMap.merge(cell[2], 1, (oldValue, newValue) -> oldValue + newValue); - } - } - } - //given a map, counts distinct CDR1 betas in a well - private void countCDR1Betas(Map wellMap, List well){ - for(Integer[] cell: well){ - if(cell[3] != -1){ - wellMap.merge(cell[3], 1, (oldValue, newValue) -> oldValue + newValue); - } - } - } - } diff --git a/src/main/java/Simulator.java b/src/main/java/Simulator.java index 85f94fb..5083ffd 100644 --- a/src/main/java/Simulator.java +++ b/src/main/java/Simulator.java @@ -14,20 +14,13 @@ import java.util.*; import java.util.stream.IntStream; public class Simulator { - /* - * Old instance fields from before object-oriented refactor - * - private static Integer numDistinctCells = 2_000_000; - private static double stdDeviation = 200; //square root of numDistCells would approximate poisson dist - private static int numWells; - private static int numConcentrations = 1; - private static double errorRate = 0.1; - private static Integer[] concentrations = {500}; - private static int lowThreshold = 2; //min number of shared wells to attempt pairing - private static int highThreshold = numWells - 3; //max number of shared wells to attempt pairing - private static boolean use2DArrayForGraph = true; //Doing this is much faster for larger graphs - private static boolean useJGraphTGraphMatrixGenerator = true; //fastest option - */ + private static int cdr3AlphaIndex = 0; + private static int cdr3BetaIndex = 1; + private static int cdr1AlphaIndex = 2; + private static int cdr1BetaIndex = 3; + + + public static CellSample generateCellSample(Integer numDistinctCells, Integer cdr1Freq) { //In real T cells, CDR1s have about one third the diversity of CDR3s @@ -57,25 +50,20 @@ public class Simulator { public static MatchingResult matchCDR3s(List distinctCells, Plate samplePlate, Integer lowThreshold, Integer highThreshold){ System.out.println("Cells: " + distinctCells.size()); - Instant start = Instant.now(); + Instant start = Instant.now(); int numWells = samplePlate.getSize(); + int[] alphaIndex = {cdr3AlphaIndex}; + int[] betaIndex = {cdr3BetaIndex}; + System.out.println("Making cell maps"); //HashMap keyed to Alphas, values Betas - Map distCellsMapAlphaKey = new HashMap<>(); - for (Integer[] cell : distinctCells) { - distCellsMapAlphaKey.put(cell[0], cell[1]); - } - //HashMap keyed to Betas, values Alphas - Map distCellsMapBetaKey = new HashMap<>(); - for (Integer[] cell : distinctCells) { - distCellsMapBetaKey.put(cell[1], cell[0]); - } + Map distCellsMapAlphaKey = makePeptideToPeptideMap(distinctCells, 0, 1); System.out.println("Cell maps made"); System.out.println("Making well maps"); - Map allAlphas = samplePlate.assayWellsCDR3Alpha(); - Map allBetas = samplePlate.assayWellsCDR3Beta(); + Map allAlphas = samplePlate.assayWellsPeptideP(alphaIndex); + Map allBetas = samplePlate.assayWellsPeptideP(betaIndex); int alphaCount = allAlphas.size(); System.out.println("all alphas count: " + alphaCount); int betaCount = allBetas.size(); @@ -86,25 +74,10 @@ public class Simulator { //Remove saturating-occupancy peptides because they have no signal value. //Remove below-minimum-overlap-threshold peptides because they can't possibly have an overlap with another //peptide that's above the threshold. - System.out.println("Removing below-minimum-overlap-threshold and saturating-occupancy peptides"); - List noiseAlphas = new ArrayList<>(); - List noiseBetas = new ArrayList<>(); - for(Integer k: allAlphas.keySet()){ - if((allAlphas.get(k)>numWells - 1) || (allAlphas.get(k) < lowThreshold)){ - noiseAlphas.add(k); - } - } - for(Integer k: allBetas.keySet()){ - if((allBetas.get(k)> numWells - 1 ) || (allBetas.get(k) < lowThreshold)){ - noiseBetas.add(k); - } - } - for(Integer p: noiseAlphas){ - allAlphas.remove(p); - } - for(Integer p: noiseBetas){ - allBetas.remove(p); - } + System.out.println("Removing peptides present in all wells."); + System.out.println("Removing peptides with occupancy below the minimum overlap threshold"); + filterByOccupancyThreshold(allAlphas, lowThreshold, numWells - 1); + filterByOccupancyThreshold(allBetas, lowThreshold, numWells - 1); System.out.println("Peptides removed"); int pairableAlphaCount = allAlphas.size(); System.out.println("Remaining alpha count: " + pairableAlphaCount); @@ -112,16 +85,18 @@ public class Simulator { System.out.println("Remaining beta count: " + pairableBetaCount); System.out.println("Making vertex maps"); - //Using Integers instead of Strings to label vertices so I can do clever stuff with indices if I need to - // when I refactor to use a 2d array to make the graph - //For the autogenerator, all vertices must have distinct numbers associated with them + //For the SimpleWeightedBipartiteGraphMatrixGenerator, all vertices must have + // distinct numbers associated with them. Since I'm using a 2D array, that means + // distinct indices between the rows and columns. vertexStartValue lets me track where I switch + // from numbering rows to columns, so I can assign unique numbers to every vertex, and then + // subtract the vertexStartValue from betas to use their vertex labels as array indices Integer vertexStartValue = 0; //keys are sequential integer vertices, values are alphas - Map plateVtoAMap = getVertexToPeptideMap(allAlphas, vertexStartValue); + Map plateVtoAMap = makeVertexToPeptideMap(allAlphas, vertexStartValue); //New start value for vertex to beta map should be one more than final vertex value in alpha map vertexStartValue += plateVtoAMap.size(); //keys are sequential integers vertices, values are betas - Map plateVtoBMap = getVertexToPeptideMap(allBetas, vertexStartValue); + Map plateVtoBMap = makeVertexToPeptideMap(allBetas, vertexStartValue); //keys are alphas, values are sequential integer vertices from previous map Map plateAtoVMap = invertVertexMap(plateVtoAMap); //keys are betas, values are sequential integer vertices from previous map @@ -133,40 +108,15 @@ public class Simulator { Map alphaWellCounts = new HashMap<>(); //count how many wells each beta appears in Map betaWellCounts = new HashMap<>(); - //add edges, where weights are number of wells the peptides share in common. - //If this is too slow, can make a 2d array and use the SimpleWeightedGraphMatrixGenerator class - Map wellNAlphas = null; - Map wellNBetas = null; - SimpleWeightedGraph graph = - new SimpleWeightedGraph<>(DefaultWeightedEdge.class); double[][] weights = new double[plateVtoAMap.size()][plateVtoBMap.size()]; - for (int n = 0; n < numWells; n++) { - wellNAlphas = samplePlate.assayWellsCDR3Alpha(n); - for (Integer a : wellNAlphas.keySet()) { - if(allAlphas.containsKey(a)){ - alphaWellCounts.merge(a, 1, (oldValue, newValue) -> oldValue + newValue); - } - } - wellNBetas = samplePlate.assayWellsCDR3Beta(n); - for (Integer b : wellNBetas.keySet()) { - if(allBetas.containsKey(b)){ - betaWellCounts.merge(b, 1, (oldValue, newValue) -> oldValue + newValue); - } - } - for (Integer i : wellNAlphas.keySet()) { - if(allAlphas.containsKey(i)){ - for (Integer j : wellNBetas.keySet()) { - if(allBetas.containsKey(j)){ - weights[plateAtoVMap.get(i)][plateBtoVMap.get(j) - vertexStartValue] += 1.0; - } - } - } - } - } + + countPeptidesAndFillMatrix(samplePlate, allAlphas, allBetas, plateAtoVMap, + plateBtoVMap, alphaIndex, betaIndex, alphaWellCounts, betaWellCounts, weights); System.out.println("matrix created"); System.out.println("creating graph"); - + SimpleWeightedGraph graph = + new SimpleWeightedGraph<>(DefaultWeightedEdge.class); SimpleWeightedBipartiteGraphMatrixGenerator graphGenerator = new SimpleWeightedBipartiteGraphMatrixGenerator(); List alphaVertices = new ArrayList<>(); alphaVertices.addAll(plateVtoAMap.keySet()); //This will work because LinkedHashMap preserves order of entry @@ -179,11 +129,7 @@ public class Simulator { System.out.println("Graph created"); System.out.println("Eliminating edges with weights outside threshold values"); - for(DefaultWeightedEdge e: graph.edgeSet()){ - if ((graph.getEdgeWeight(e) > highThreshold) || (graph.getEdgeWeight(e) < lowThreshold)){ - graph.setEdgeWeight(e, 0.0); - } - } + filterByOccupancyThreshold(graph, lowThreshold, highThreshold); System.out.println("Over- and under-weight edges set to 0.0"); @@ -263,48 +209,47 @@ public class Simulator { comments.add("Pairing error rate: " + pairingErrorRateTrunc); Duration time = Duration.between(start, stop); comments.add("Simulation time: " + nf.format(time.toSeconds()) + " seconds"); - System.out.println("Simulation time: " + nf.format(time.toSeconds()) + " seconds"); + for(String s: comments){ + System.out.println(s); + } return new MatchingResult(comments, header, allResults, matchMap, time); } + //Simulated matching of CDR1s to CDR3s. Requires MatchingResult from prior run of matchCDR3s. public static MatchingResult[] matchCDR1s(List distinctCells, Plate samplePlate, Integer lowThreshold, - Integer highThreshold, Map previousMatches, Duration previousTime){ + Integer highThreshold, MatchingResult priorResult){ Instant start = Instant.now(); - + Duration previousTime = priorResult.getTime(); + Map previousMatches = priorResult.getMatchMap(); int numWells = samplePlate.getSize(); + int[] cdr3Indices = {cdr3AlphaIndex, cdr3BetaIndex}; + int[] cdr1Indices = {cdr1AlphaIndex, cdr1BetaIndex}; + System.out.println("Making previous match maps"); - Map CDR3AtoBMap = previousMatches; - Map CDR3BtoAMap = invertVertexMap(CDR3AtoBMap); + Map cdr3AtoBMap = previousMatches; + Map cdr3BtoAMap = invertVertexMap(cdr3AtoBMap); System.out.println("Previous match maps made"); System.out.println("Making cell maps"); - Map alphaCDR3toCDR1Map = new HashMap<>(); - for (Integer[] cell : distinctCells) { - alphaCDR3toCDR1Map.put(cell[0], cell[2]); - } - //HashMap keyed to Betas, values Alphas - Map betaCDR3toCDR1Map = new HashMap<>(); - for (Integer[] cell : distinctCells) { - betaCDR3toCDR1Map.put(cell[1], cell[3]); - } + Map alphaCDR3toCDR1Map = makePeptideToPeptideMap(distinctCells, cdr3AlphaIndex, cdr1AlphaIndex); + Map betaCDR3toCDR1Map = makePeptideToPeptideMap(distinctCells, cdr3BetaIndex, cdr1BetaIndex); System.out.println("Cell maps made"); System.out.println("Making well maps"); - Map allCDR3s = samplePlate.assayWellsCDR3(); - Map allCDR1s = samplePlate.assayWellsCDR1(); + Map allCDR3s = samplePlate.assayWellsPeptideP(cdr3Indices); + Map allCDR1s = samplePlate.assayWellsPeptideP(cdr1Indices); int CDR3Count = allCDR3s.size(); System.out.println("all CDR3s count: " + CDR3Count); int CDR1Count = allCDR1s.size(); System.out.println("all CDR1s count: " + CDR1Count); System.out.println("Well maps made"); - //NEW FILTERING CODE TO TEST System.out.println("Removing unpaired CDR3s from well maps"); List unpairedCDR3s = new ArrayList<>(); for(Integer i: allCDR3s.keySet()){ - if(!(CDR3AtoBMap.containsKey(i) || CDR3BtoAMap.containsKey(i))){ + if(!(cdr3AtoBMap.containsKey(i) || cdr3BtoAMap.containsKey(i))){ unpairedCDR3s.add(i); } } @@ -315,85 +260,61 @@ public class Simulator { System.out.println("Remaining CDR3 count: " + allCDR3s.size()); System.out.println("Removing below-minimum-overlap-threshold and saturating-occupancy CDR1s"); - List noiseCDR1s = new ArrayList<>(); - for(Integer i: allCDR1s.keySet()){ - if((allCDR1s.get(i) < lowThreshold) || (allCDR1s.get(i)) > numWells - 1){ - noiseCDR1s.add(i); - } - } - for(Integer i: noiseCDR1s){ - allCDR1s.remove(i); - } + filterByOccupancyThreshold(allCDR1s, lowThreshold, numWells - 1); System.out.println("CDR1s removed."); System.out.println("Remaining CDR1 count: " + allCDR1s.size()); - //END NEW CODE System.out.println("Making vertex maps"); - //Using Integers instead of Strings to label vertices so I can do clever stuff with indices if I need to - // when I refactor to use a 2d array to make the graph - //For the autogenerator, all vertices must have distinct numbers associated with them + + //For the SimpleWeightedBipartiteGraphMatrixGenerator, all vertices must have + // distinct numbers associated with them. Since I'm using a 2D array, that means + // distinct indices between the rows and columns. vertexStartValue lets me track where I switch + // from numbering rows to columns, so I can assign unique numbers to every vertex, and then + // subtract the vertexStartValue from CDR1s to use their vertex labels as array indices Integer vertexStartValue = 0; //keys are sequential integer vertices, values are CDR3s - Map plateVtoCDR3Map = getVertexToPeptideMap(allCDR3s, vertexStartValue); - //New start value for vertex to beta map should be one more than final vertex value in alpha map + Map plateVtoCDR3Map = makeVertexToPeptideMap(allCDR3s, vertexStartValue); + //New start value for vertex to CDR1 map should be one more than final vertex value in CDR3 map vertexStartValue += plateVtoCDR3Map.size(); //keys are sequential integers vertices, values are CDR1s - Map plateVtoCDR1Map = getVertexToPeptideMap(allCDR1s, vertexStartValue); + Map plateVtoCDR1Map = makeVertexToPeptideMap(allCDR1s, vertexStartValue); //keys are CDR3s, values are sequential integer vertices from previous map Map plateCDR3toVMap = invertVertexMap(plateVtoCDR3Map); //keys are CDR1s, values are sequential integer vertices from previous map Map plateCDR1toVMap = invertVertexMap(plateVtoCDR1Map); System.out.println("Vertex maps made"); - System.out.println("Creating Graph"); + System.out.println("Creating adjacency matrix"); //Count how many wells each CDR3 appears in - Map CDR3WellCounts = new HashMap<>(); + Map cdr3WellCounts = new HashMap<>(); //count how many wells each CDR1 appears in - Map CDR1WellCounts = new HashMap<>(); + Map cdr1WellCounts = new HashMap<>(); //add edges, where weights are number of wells the peptides share in common. //If this is too slow, can make a 2d array and use the SimpleWeightedGraphMatrixGenerator class Map wellNCDR3s = null; Map wellNCDR1s = null; + double[][] weights = new double[plateVtoCDR3Map.size()][plateVtoCDR1Map.size()]; + countPeptidesAndFillMatrix(samplePlate, allCDR3s, allCDR1s, plateCDR3toVMap, plateCDR1toVMap, + cdr3Indices, cdr1Indices, cdr3WellCounts, cdr1WellCounts, weights); + System.out.println("Matrix created"); + + System.out.println("Creating graph"); SimpleWeightedGraph graph = new SimpleWeightedGraph<>(DefaultWeightedEdge.class); - double[][] weights = new double[plateVtoCDR3Map.size()][plateVtoCDR1Map.size()]; - for (int n = 0; n < numWells; n++) { - wellNCDR3s = samplePlate.assayWellsCDR3(n); - for (Integer a : wellNCDR3s.keySet()) { - CDR3WellCounts.merge(a, 1, (oldValue, newValue) -> oldValue + newValue); - } - wellNCDR1s = samplePlate.assayWellsCDR1(n); - for (Integer b : wellNCDR1s.keySet()) { - CDR1WellCounts.merge(b, 1, (oldValue, newValue) -> oldValue + newValue); - } - for (Integer i : wellNCDR3s.keySet()) { - if(allCDR3s.containsKey(i)){//only consider CDR3s we have matches for - rest filtered out - for (Integer j : wellNCDR1s.keySet()) { - if(allCDR1s.containsKey(j)){ //only those CDR1s we didn't filter out - weights[plateCDR3toVMap.get(i)][plateCDR1toVMap.get(j) - vertexStartValue] += 1.0; - } - } - } - } - } + SimpleWeightedBipartiteGraphMatrixGenerator graphGenerator = new SimpleWeightedBipartiteGraphMatrixGenerator(); - List CDR3Vertices = new ArrayList<>(); - CDR3Vertices.addAll(plateVtoCDR3Map.keySet()); //This will work because LinkedHashMap preserves order of entry - graphGenerator.first(CDR3Vertices); - List CDR1Vertices = new ArrayList<>(); - CDR1Vertices.addAll(plateVtoCDR1Map.keySet()); - graphGenerator.second(CDR1Vertices); //This will work because LinkedHashMap preserves order of entry + List cdr3Vertices = new ArrayList<>(); + cdr3Vertices.addAll(plateVtoCDR3Map.keySet()); //This will work because LinkedHashMap preserves order of entry + graphGenerator.first(cdr3Vertices); + List cdr1Vertices = new ArrayList<>(); + cdr1Vertices.addAll(plateVtoCDR1Map.keySet()); + graphGenerator.second(cdr1Vertices); //This will work because LinkedHashMap preserves order of entry graphGenerator.weights(weights); graphGenerator.generateGraph(graph); System.out.println("Graph created"); - System.out.println("Number of edges: " + graph.edgeSet().size()); System.out.println("Removing edges outside of weight thresholds"); - for(DefaultWeightedEdge e: graph.edgeSet()){ - if ((graph.getEdgeWeight(e) > highThreshold) || (graph.getEdgeWeight(e) < lowThreshold)){ - graph.setEdgeWeight(e, 0.0); - } - } + filterByOccupancyThreshold(graph, lowThreshold, highThreshold); System.out.println("Over- and under-weight edges set to 0.0"); System.out.println("Finding first maximum weighted matching"); @@ -409,24 +330,22 @@ public class Simulator { DefaultWeightedEdge e = null; while(weightIter.hasNext()){ e = weightIter.next(); - if(graph.getEdgeWeight(e) < lowThreshold || graph.getEdgeWeight(e) > highThreshold) { - continue; - } - Integer source = graph.getEdgeSource(e); -// if(!(CDR3AtoBMap.containsKey(source) || CDR3BtoAMap.containsKey(source))){ +// if(graph.getEdgeWeight(e) < lowThreshold || graph.getEdgeWeight(e) > highThreshold) { // continue; // } + Integer source = graph.getEdgeSource(e); Integer target = graph.getEdgeTarget(e); firstMatchCDR3toCDR1Map.put(plateVtoCDR3Map.get(source), plateVtoCDR1Map.get(target)); } - System.out.println("First pass matches: " + firstMatchCDR3toCDR1Map.size()); + System.out.println("Removing edges from first maximum weighted matching"); //zero out the edge weights in the matching weightIter = graphMatching.iterator(); while(weightIter.hasNext()){ graph.removeEdge(weightIter.next()); } + System.out.println("Edges removed"); //Generate a new matching System.out.println("Finding second maximum weighted matching"); @@ -441,9 +360,9 @@ public class Simulator { weightIter = graphMatching.iterator(); while(weightIter.hasNext()){ e = weightIter.next(); - if(graph.getEdgeWeight(e) < lowThreshold || graph.getEdgeWeight(e) > highThreshold) { - continue; - } +// if(graph.getEdgeWeight(e) < lowThreshold || graph.getEdgeWeight(e) > highThreshold) { +// continue; +// } Integer source = graph.getEdgeSource(e); // if(!(CDR3AtoBMap.containsKey(source) || CDR3BtoAMap.containsKey(source))){ // continue; @@ -451,32 +370,33 @@ public class Simulator { Integer target = graph.getEdgeTarget(e); secondMatchCDR3toCDR1Map.put(plateVtoCDR3Map.get(source), plateVtoCDR1Map.get(target)); } - System.out.println("Second pass matches: " + secondMatchCDR3toCDR1Map.size()); - + System.out.println("Mapping first pass CDR3 alpha/beta pairs"); //get linked map for first matching attempt Map firstMatchesMap = new LinkedHashMap<>(); - for(Integer alphaCDR3: CDR3AtoBMap.keySet()) { + for(Integer alphaCDR3: cdr3AtoBMap.keySet()) { if (!(firstMatchCDR3toCDR1Map.containsKey(alphaCDR3))) { continue; } - Integer betaCDR3 = CDR3AtoBMap.get(alphaCDR3); + Integer betaCDR3 = cdr3AtoBMap.get(alphaCDR3); if (!(firstMatchCDR3toCDR1Map.containsKey(betaCDR3))) { continue; } firstMatchesMap.put(alphaCDR3, firstMatchCDR3toCDR1Map.get(alphaCDR3)); firstMatchesMap.put(betaCDR3, firstMatchCDR3toCDR1Map.get(betaCDR3)); } + System.out.println("First pass CDR3 alpha/beta pairs mapped"); - + System.out.println("Mapping second pass CDR3 alpha/beta pairs."); + System.out.println("Finding CDR3 pairs that swapped CDR1 matches between first pass and second pass."); //Look for matches that simply swapped already-matched alpha and beta CDR3s Map dualMatchesMap = new LinkedHashMap<>(); - for(Integer alphaCDR3: CDR3AtoBMap.keySet()) { + for(Integer alphaCDR3: cdr3AtoBMap.keySet()) { if (!(firstMatchCDR3toCDR1Map.containsKey(alphaCDR3) && secondMatchCDR3toCDR1Map.containsKey(alphaCDR3))) { continue; } - Integer betaCDR3 = CDR3AtoBMap.get(alphaCDR3); + Integer betaCDR3 = cdr3AtoBMap.get(alphaCDR3); if (!(firstMatchCDR3toCDR1Map.containsKey(betaCDR3) && secondMatchCDR3toCDR1Map.containsKey(betaCDR3))) { continue; } @@ -487,12 +407,11 @@ public class Simulator { } } } + System.out.println("Second pass mapping made. Dual CDR3/CDR1 pairings found."); Instant stop = Instant.now(); - - //results for first map - System.out.println("Results for first pass"); + System.out.println("RESULTS FOR FIRST PASS MATCHING"); List> allResults = new ArrayList<>(); Integer trueCount = 0; Iterator iter = firstMatchesMap.keySet().iterator(); @@ -550,7 +469,7 @@ public class Simulator { MatchingResult firstTest = new MatchingResult(comments, headers, allResults, dualMatchesMap, time); //results for dual map - System.out.println("Results for second pass"); + System.out.println("RESULTS FOR SECOND PASS MATCHING"); allResults = new ArrayList<>(); trueCount = 0; iter = dualMatchesMap.keySet().iterator(); @@ -596,260 +515,85 @@ public class Simulator { } System.out.println("Simulation time: " + nf.format(time.toSeconds()) + " seconds"); - MatchingResult dualTest = new MatchingResult(comments, headers, allResults, dualMatchesMap, time); - - MatchingResult[] output = {firstTest, dualTest}; return output; } - /* - ** Old main method before object-oriented refactor - * - public static void Simulate() { - Instant start = Instant.now(); - //Four things to try to improve this - //1. Run it on hardware with more memory - //2. implement p-values and just check exhaustively for strictly-bounded weights - // If I do this, can start with highest weights and zero out columns when I find a p-value I like maybe - //3. hand-implement the maximum weighted matching algorithm to use arrays and be efficient - // 3.5 Could also do a version where I make the graph one edge at a time, but from a 2D array so I have the - // weights already - // Tried it. It was way better than doing the graph directly, but slightly slower than the built-in - //4. Implement a discrete distribution function (along the lines of the alias method) - // - //With array method, could also try simply zeroing high-well-count peptides before graph creation - - //stdDeviation = Math.sqrt(numDistinctCells.doubleValue()); - //concentrations = new int[numConcentrations]; - - //OUTPUT - System.out.println("Generating cells"); - - List numbers = new ArrayList<>(); - IntStream.range(1, (2 * numDistinctCells) + 1).forEach(i -> numbers.add(i)); - Collections.shuffle(numbers); - - //Each cell represented by two numbers from the random permutation - //These represent unique alpha and beta peptides - List distinctCells = new ArrayList<>(); - for(int i = 0; i < numbers.size() - 1; i = i + 2) { - Integer tmp1 = numbers.get(i); - Integer tmp2 = numbers.get(i+1); - Integer[] tmp = {tmp1, tmp2}; - distinctCells.add(tmp); - } - - //OUTPUT - System.out.println("Cells generated"); - System.out.println("Filling wells"); - - //HashMap keyed to Alphas, values Betas - Map distCellsMapAlphaKey = new HashMap<>(); - for (Integer[] cell : distinctCells) { - distCellsMapAlphaKey.put(cell[0], cell[1]); - } - - //HashMap keyed to Betas, values Alphas - Map distCellsMapBetaKey = new HashMap<>(); - for (Integer[] cell : distinctCells) { - distCellsMapBetaKey.put(cell[1], cell[0]); - } - - Plate samplePlate = new Plate(numWells, errorRate, concentrations, stdDeviation); - samplePlate.fillWells(distinctCells); - - //OUTPUT - System.out.println("Wells filled"); - System.out.println("Making maps"); - - //All alphas on plate, with - Map allAlphas = samplePlate.assayWellsAlpha(); - Map allBetas = samplePlate.assayWellsBeta(); - int alphaCount = allAlphas.size(); - int betaCount = allBetas.size(); - - //Using Integers instead of Strings to label vertices so I can do clever stuff with indices if I need to - // when I refactor to use a 2d array to make the graph - //For the autogenerator, all vertices must have distinct numbers associated with them - Integer vertexStartValue = 0; - //keys are sequential integer vertices, values are alphas - Map plateVtoAMap = getVertexToPeptideMap(allAlphas, vertexStartValue); - //New start value for vertex to beta map should be one more than final vertex value in alpha map - vertexStartValue += plateVtoAMap.size(); - //keys are sequential integers vertices, values are betas - Map plateVtoBMap = getVertexToPeptideMap(allBetas, vertexStartValue); - //keys are alphas, values are sequential integer vertices from previous map - Map plateAtoVMap = invertVertexMap(plateVtoAMap); - //keys are betas, values are sequential integer vertices from previous map - Map plateBtoVMap = invertVertexMap(plateVtoBMap); - - //OUTPUT - System.out.println("Maps made"); - System.out.println("Creating Graph"); - - //Count how many wells each alpha appears in - Map alphaWellCounts = new HashMap<>(); - //count how many wells each beta appears in - Map betaWellCounts = new HashMap<>(); - //add edges, where weights are number of wells the peptides share in common. - //If this is too slow, can make a 2d array and use the SimpleWeightedGraphMatrixGenerator class - Map wellNAlphas = null; - Map wellNBetas = null; - SimpleWeightedGraph graph = - new SimpleWeightedGraph(DefaultWeightedEdge.class); - - //Flag in global variables determines if edges are added to graph one at a time directly, or if a graph - //is made all at once from a 2D array using the SimpleWeightedBipartiteGraphMatrixGenerator class - if(use2DArrayForGraph) { - double[][] weights = new double[plateVtoAMap.size()][plateVtoBMap.size()]; - for (int n = 0; n < numWells; n++) { - wellNAlphas = samplePlate.assayWellsAlpha(n); - for(Integer a: wellNAlphas.keySet()){ - alphaWellCounts.merge(a, 1, (oldValue, newValue) -> oldValue + newValue); + //Counts the well occupancy of the row peptides and column peptides into given maps, and + //fills weights in the given 2D array + private static void countPeptidesAndFillMatrix(Plate samplePlate, + Map allRowPeptides, + Map allColumnPeptides, + Map rowPeptideToVertexMap, + Map columnPeptideToVertexMap, + int[] rowPeptideIndices, + int[] colPeptideIndices, + Map rowPeptideCounts, + Map columnPeptideCounts, + double[][] weights){ + Map wellNRowPeptides = null; + Map wellNColumnPeptides = null; + int vertexStartValue = rowPeptideToVertexMap.size(); + int numWells = samplePlate.getSize(); + for (int n = 0; n < numWells; n++) { + wellNRowPeptides = samplePlate.assayWellsPeptideP(n, rowPeptideIndices); + for (Integer a : wellNRowPeptides.keySet()) { + if(allRowPeptides.containsKey(a)){ + rowPeptideCounts.merge(a, 1, (oldValue, newValue) -> oldValue + newValue); } - wellNBetas = samplePlate.assayWellsBeta(n); - for(Integer b: wellNBetas.keySet()){ - betaWellCounts.merge(b, 1, (oldValue, newValue) -> oldValue + newValue); + } + wellNColumnPeptides = samplePlate.assayWellsPeptideP(n, colPeptideIndices); + for (Integer b : wellNColumnPeptides.keySet()) { + if(allColumnPeptides.containsKey(b)){ + columnPeptideCounts.merge(b, 1, (oldValue, newValue) -> oldValue + newValue); } - - for(Integer i: wellNAlphas.keySet()) { - for(Integer j: wellNBetas.keySet()){ - weights[plateAtoVMap.get(i)][plateBtoVMap.get(j) - vertexStartValue] += 1.0; - } - } - } - if(useJGraphTGraphMatrixGenerator) { - SimpleWeightedBipartiteGraphMatrixGenerator graphGenerator = new SimpleWeightedBipartiteGraphMatrixGenerator(); - List alphaVertices = new ArrayList<>(); - alphaVertices.addAll(plateVtoAMap.keySet()); //This will work because LinkedHashMap preserves order of entry - graphGenerator.first(alphaVertices); - List betaVertices = new ArrayList<>(); - betaVertices.addAll(plateVtoBMap.keySet()); - graphGenerator.second(betaVertices); //This will work because LinkedHashMap preserves order of entry - graphGenerator.weights(weights); - graphGenerator.generateGraph(graph); - } else { - for(int i = 0; i < alphaCount; i++) { - for (int j = vertexStartValue; j < betaCount + vertexStartValue; j++){ - graph.addVertex(i); - graph.addVertex(j); - addEdgeWithWeight(graph, i, (j), weights[i][j - vertexStartValue]); + } + for (Integer i : wellNRowPeptides.keySet()) { + if(allRowPeptides.containsKey(i)){ + for (Integer j : wellNColumnPeptides.keySet()) { + if(allColumnPeptides.containsKey(j)){ + weights[rowPeptideToVertexMap.get(i)][columnPeptideToVertexMap.get(j) - vertexStartValue] += 1.0; + } } } } + } - else { - //construct graph - //add vertices - for(Integer v: plateVtoAMap.keySet()) { - graph.addVertex(v); - } - for(Integer v: plateVtoBMap.keySet()) { - graph.addVertex(v); - } - - for (int n = 0; n < numWells; n++) { - wellNAlphas = samplePlate.assayWellsAlpha(n); - for (Integer a : wellNAlphas.keySet()) { - alphaWellCounts.merge(a, 1, (oldValue, newValue) -> oldValue + newValue); - } - wellNBetas = samplePlate.assayWellsBeta(n); - for (Integer b : wellNBetas.keySet()) { - betaWellCounts.merge(b, 1, (oldValue, newValue) -> oldValue + newValue); - } - - for (Integer i : wellNAlphas.keySet()) { - for (Integer j : wellNBetas.keySet()) { - addEdgeOrWeight(graph, plateAtoVMap.get(i), plateBtoVMap.get(j)); - } - } - } - } - - //OUTPUT - System.out.println("Graph created"); - System.out.println("Finding maximum weighted matching"); - - MaximumWeightBipartiteMatching maxWeightMatching = - new MaximumWeightBipartiteMatching(graph, plateVtoAMap.keySet(), plateVtoBMap.keySet()); - - MatchingAlgorithm.Matching graphMatching = maxWeightMatching.getMatching(); - - Instant stop = Instant.now(); - - //OUTPUT - System.out.println("Matching completed"); - - - Iterator weightIter = graphMatching.iterator(); - DefaultWeightedEdge e = null; - int trueCount = 0; - int falseCount = 0; - boolean check = false; - while(weightIter.hasNext()) { - e = weightIter.next(); - if(graph.getEdgeWeight(e) < lowThreshold || graph.getEdgeWeight(e) > highThreshold) { - continue; - } - Integer source = graph.getEdgeSource(e); - Integer target = graph.getEdgeTarget(e); - //Need map of each peptide and number of wells it appears in. - check = printData(graph.getEdgeWeight(e), source, alphaWellCounts.get(plateVtoAMap.get(source)), target, betaWellCounts.get(plateVtoBMap.get(target)), distCellsMapAlphaKey, plateVtoAMap, plateVtoBMap); - if(check) { - trueCount++; - } - else { - falseCount++; - } - } - - //RESULTS SUMMARY - NumberFormat nf = NumberFormat.getInstance(Locale.US); - System.out.println(""); - System.out.println("Distinct cell population: " + nf.format(numDistinctCells)); - System.out.println("1 standard deviation of distribution: " + stdDeviation + " most common cells"); - System.out.print("Sample plate: " + numWells +" wells, "); - System.out.print("divided into " + concentrations.length + " sections of "); - System.out.println(Arrays.toString(concentrations) + " cells per well."); - System.out.print("Attempted to match alpha/beta overlaps of at least " + lowThreshold + " wells "); - System.out.println("and no more than " + highThreshold + " wells."); - System.out.println(nf.format(alphaCount) + " alpha peptides found"); - System.out.println(nf.format(betaCount) + " beta peptides found"); - MathContext mc = new MathContext(3); - int min = alphaCount > betaCount ? betaCount : alphaCount; - double attemptRate = (double) (trueCount + falseCount) / min; - BigDecimal attR = new BigDecimal(attemptRate, mc); - System.out.println("Pairing attempt rate: " + attR); - System.out.println("Correct pairings: " + nf.format(trueCount)); - System.out.println("Incorrect pairings: " + nf.format(falseCount)); - double experimentalError = (double) falseCount / (trueCount + falseCount); - BigDecimal expErr = new BigDecimal(experimentalError, mc); - System.out.println("Pairing error rate: " + expErr); - Duration time = Duration.between(start, stop); - System.out.println("Simulation time: " + nf.format(time.toSeconds()) + " seconds"); - if(use2DArrayForGraph) { - System.out.print("Graph generated from 2D array of weights"); - if(useJGraphTGraphMatrixGenerator){ - System.out.print(" using JGraphT's SimpleWeightedBipartiteGraphMatrixGenerator"); - } - else { - System.out.print(" directly."); - } - System.out.println(""); - } - else { - System.out.println("Graph generated directly from individual well maps"); - } - System.out.println(""); - - } - */ - private static Map getVertexToPeptideMap(Map peptides, Integer startValue) { + + private static void filterByOccupancyThreshold(SimpleWeightedGraph graph, + int low, int high) { + for(DefaultWeightedEdge e: graph.edgeSet()){ + if ((graph.getEdgeWeight(e) > high) || (graph.getEdgeWeight(e) < low)){ + graph.setEdgeWeight(e, 0.0); + } + } + } + private static void filterByOccupancyThreshold(Map wellMap, int low, int high){ + List noise = new ArrayList<>(); + for(Integer k: wellMap.keySet()){ + if((wellMap.get(k) > high) || (wellMap.get(k) < low)){ + noise.add(k); + } + } + for(Integer k: noise) { + wellMap.remove(k); + } + } + + private static Map makePeptideToPeptideMap(List cells, int keyPeptideIndex, + int valuePeptideIndex){ + Map keyPeptideToValuePeptideMap = new HashMap<>(); + for (Integer[] cell : cells) { + keyPeptideToValuePeptideMap.put(cell[keyPeptideIndex], cell[valuePeptideIndex]); + } + return keyPeptideToValuePeptideMap; + } + + private static Map makeVertexToPeptideMap(Map peptides, Integer startValue) { Map map = new LinkedHashMap<>(); //LinkedHashMap to preserve order of entry Integer index = startValue; for (Integer k: peptides.keySet()) { @@ -867,44 +611,4 @@ public class Simulator { return inverse; } - /* - * Old methods for graph generation algorithms less efficient than built in matrix generator - * - private static void addEdgeWithWeight(Graph g, Integer v1, Integer v2, double weight) { - g.addEdge(v1, v2); - g.setEdgeWeight(v1, v2, weight); - } - - private static void addEdgeOrWeight(Graph g, Integer v1, Integer v2) { - if (g.containsEdge(v1, v2)) { - g.setEdgeWeight(v1, v2, g.getEdgeWeight(g.getEdge(v1, v2)) + 1.0); - } - else { - g.addEdge(v1, v2); - } - } - */ - - - /* - * Old output method before object-oriented refactor - * - private static boolean printData (Double edgeWeight, Integer source, Integer sourceCount, Integer target, - Integer targetCount, Map AtoBMap, Map VtoAMap, - Map VtoBMap) { - System.out.println("Alpha peptide " + source + " present in " + sourceCount + " wells"); - System.out.println("Beta peptide " + target + " present in " + targetCount + " wells"); - System.out.println("Both alpha and beta present in " + edgeWeight + " wells"); - if(VtoBMap.get(target).equals(AtoBMap.get(VtoAMap.get(source)))){ - System.out.println("True"); - System.out.println("p-value: " + Equations.pValue(numWells, sourceCount, targetCount, edgeWeight)); - return true; - } - else { - System.out.println("False"); - System.out.println("p-value: " + Equations.pValue(numWells, sourceCount, targetCount, edgeWeight)); - System.out.println("The real pair is " + source + " and " + AtoBMap.get(VtoAMap.get(source))); - return false; - } - }*/ } diff --git a/src/main/java/UserInterface.java b/src/main/java/UserInterface.java index 994c0c6..9c4fe87 100644 --- a/src/main/java/UserInterface.java +++ b/src/main/java/UserInterface.java @@ -260,7 +260,7 @@ public class UserInterface { List cells = cellReader.getCells(); MatchingResult preliminaryResults = Simulator.matchCDR3s(cells, plate, lowThresholdCDR3, highThresholdCDR3); MatchingResult[] results = Simulator.matchCDR1s(cells, plate, lowThresholdCDR1, - highThresholdCDR1, preliminaryResults.getMatchMap(), preliminaryResults.getTime()); + highThresholdCDR1, preliminaryResults); //result writer MatchingFileWriter writer = new MatchingFileWriter(filename + "First", results[0].getComments(),