import org.jgrapht.alg.interfaces.MatchingAlgorithm; import org.jgrapht.alg.matching.MaximumWeightBipartiteMatching; import org.jgrapht.generate.SimpleWeightedBipartiteGraphMatrixGenerator; import org.jgrapht.graph.DefaultWeightedEdge; import org.jgrapht.graph.SimpleWeightedGraph; import org.jheaps.tree.PairingHeap; import java.math.BigDecimal; import java.math.MathContext; import java.text.NumberFormat; import java.time.Instant; import java.time.Duration; import java.util.*; import java.util.stream.IntStream; //NOTE: "sequence" in method and variable names refers to a peptide sequence from a simulated T cell public class Simulator { private static final int cdr3AlphaIndex = 0; private static final int cdr3BetaIndex = 1; private static final int cdr1AlphaIndex = 2; private static final int cdr1BetaIndex = 3; public static CellSample generateCellSample(Integer numDistinctCells, Integer cdr1Freq) { //In real T cells, CDR1s have about one third the diversity of CDR3s List numbersCDR3 = new ArrayList<>(); List numbersCDR1 = new ArrayList<>(); Integer numDistCDR3s = 2 * numDistinctCells + 1; IntStream.range(1, numDistCDR3s + 1).forEach(i -> numbersCDR3.add(i)); IntStream.range(numDistCDR3s + 1, numDistCDR3s + 1 + (numDistCDR3s / cdr1Freq) + 1).forEach(i -> numbersCDR1.add(i)); Collections.shuffle(numbersCDR3); Collections.shuffle(numbersCDR1); //Each cell represented by 4 values //two CDR3s, and two CDR1s. First two values are CDR3s (alpha, beta), second two are CDR1s (alpha, beta) List distinctCells = new ArrayList<>(); for(int i = 0; i < numbersCDR3.size() - 1; i = i + 2){ Integer tmpCDR3a = numbersCDR3.get(i); Integer tmpCDR3b = numbersCDR3.get(i+1); Integer tmpCDR1a = numbersCDR1.get(i % numbersCDR1.size()); Integer tmpCDR1b = numbersCDR1.get((i+1) % numbersCDR1.size()); Integer[] tmp = {tmpCDR3a, tmpCDR3b, tmpCDR1a, tmpCDR1b}; distinctCells.add(tmp); } return new CellSample(distinctCells, cdr1Freq); } //Make the graph needed for matching CDR3s public static GraphWithMapData makeGraph(List distinctCells, Plate samplePlate, Integer lowThreshold, Integer highThreshold, boolean verbose) { Instant start = Instant.now(); int[] alphaIndex = {cdr3AlphaIndex}; int[] betaIndex = {cdr3BetaIndex}; int numWells = samplePlate.getSize(); if(verbose){System.out.println("Making cell maps");} //HashMap keyed to Alphas, values Betas Map distCellsMapAlphaKey = makeSequenceToSequenceMap(distinctCells, 0, 1); if(verbose){System.out.println("Cell maps made");} if(verbose){System.out.println("Making well maps");} Map allAlphas = samplePlate.assayWellsPeptideP(alphaIndex); Map allBetas = samplePlate.assayWellsPeptideP(betaIndex); int alphaCount = allAlphas.size(); if(verbose){System.out.println("All alphas count: " + alphaCount);} int betaCount = allBetas.size(); if(verbose){System.out.println("All betas count: " + betaCount);} if(verbose){System.out.println("Well maps made");} //Remove saturating-occupancy sequences because they have no signal value. //Remove sequences with total occupancy below minimum pair overlap threshold if(verbose){System.out.println("Removing sequences present in all wells.");} if(verbose){System.out.println("Removing sequences with occupancy below the minimum overlap threshold");} filterByOccupancyThreshold(allAlphas, lowThreshold, numWells - 1); filterByOccupancyThreshold(allBetas, lowThreshold, numWells - 1); if(verbose){System.out.println("Sequences removed");} int pairableAlphaCount = allAlphas.size(); if(verbose){System.out.println("Remaining alphas count: " + pairableAlphaCount);} int pairableBetaCount = allBetas.size(); if(verbose){System.out.println("Remaining betas count: " + pairableBetaCount);} if(verbose){System.out.println("Making vertex maps");} //For the SimpleWeightedBipartiteGraphMatrixGenerator, all vertices must have //distinct numbers associated with them. Since I'm using a 2D array, that means //distinct indices between the rows and columns. vertexStartValue lets me track where I switch //from numbering rows to columns, so I can assign unique numbers to every vertex, and then //subtract the vertexStartValue from betas to use their vertex labels as array indices Integer vertexStartValue = 0; //keys are sequential integer vertices, values are alphas Map plateVtoAMap = makeVertexToSequenceMap(allAlphas, vertexStartValue); //new start value for vertex to beta map should be one more than final vertex value in alpha map vertexStartValue += plateVtoAMap.size(); //keys are sequential integers vertices, values are betas Map plateVtoBMap = makeVertexToSequenceMap(allBetas, vertexStartValue); //keys are alphas, values are sequential integer vertices from previous map Map plateAtoVMap = invertVertexMap(plateVtoAMap); //keys are betas, values are sequential integer vertices from previous map Map plateBtoVMap = invertVertexMap(plateVtoBMap); if(verbose){System.out.println("Vertex maps made");} //make adjacency matrix for bipartite graph generator //(technically this is only 1/4 of an adjacency matrix, but that's all you need //for a bipartite graph, and all the SimpleWeightedBipartiteGraphMatrixGenerator class expects.) if(verbose){System.out.println("Creating adjacency matrix");} //Count how many wells each alpha appears in Map alphaWellCounts = new HashMap<>(); //count how many wells each beta appears in Map betaWellCounts = new HashMap<>(); //the adjacency matrix to be used by the graph generator double[][] weights = new double[plateVtoAMap.size()][plateVtoBMap.size()]; countSequencesAndFillMatrix(samplePlate, allAlphas, allBetas, plateAtoVMap, plateBtoVMap, alphaIndex, betaIndex, alphaWellCounts, betaWellCounts, weights); if(verbose){System.out.println("Matrix created");} //create bipartite graph if(verbose){System.out.println("Creating graph");} //the graph object SimpleWeightedGraph graph = new SimpleWeightedGraph<>(DefaultWeightedEdge.class); //the graph generator SimpleWeightedBipartiteGraphMatrixGenerator graphGenerator = new SimpleWeightedBipartiteGraphMatrixGenerator(); //the list of alpha vertices List alphaVertices = new ArrayList<>(plateVtoAMap.keySet()); //This will work because LinkedHashMap preserves order of entry graphGenerator.first(alphaVertices); //the list of beta vertices List betaVertices = new ArrayList<>(plateVtoBMap.keySet()); graphGenerator.second(betaVertices); //This will work because LinkedHashMap preserves order of entry //use adjacency matrix of weight created previously graphGenerator.weights(weights); graphGenerator.generateGraph(graph); if(verbose){System.out.println("Graph created");} //remove weights outside given overlap thresholds if(verbose){System.out.println("Eliminating edges with weights outside overlap threshold values");} filterByOccupancyThreshold(graph, lowThreshold, highThreshold); if(verbose){System.out.println("Over- and under-weight edges set to 0.0");} Instant stop = Instant.now(); Duration time = Duration.between(start, stop); //return GraphWithMapData object return new GraphWithMapData(graph, numWells, samplePlate.getConcentrations(), alphaCount, betaCount, lowThreshold, highThreshold, distCellsMapAlphaKey, plateVtoAMap, plateVtoBMap, plateAtoVMap, plateBtoVMap, alphaWellCounts, betaWellCounts, time); } //match CDR3s. public static MatchingResult matchCDR3s(GraphWithMapData data, Integer maxOccupancyDifference, Integer minOverlapPercent, boolean verbose) { Instant start = Instant.now(); int numWells = data.getNumWells(); Integer highThreshold = data.getHighThreshold(); Integer lowThreshold = data.getLowThreshold(); Integer alphaCount = data.getAlphaCount(); Integer betaCount = data.getBetaCount(); Map distCellsMapAlphaKey = data.getDistCellsMapAlphaKey(); Map plateVtoAMap = data.getPlateVtoAMap(); Map plateVtoBMap = data.getPlateVtoBMap(); Map alphaWellCounts = data.getAlphaWellCounts(); Map betaWellCounts = data.getBetaWellCounts(); SimpleWeightedGraph graph = data.getGraph(); //Filter by overlap size if(verbose){System.out.println("Eliminating edges with weights less than " + minOverlapPercent.toString() + " percent of vertex occupancy value.");} filterByOverlapSize(graph, alphaWellCounts, betaWellCounts, plateVtoAMap, plateVtoBMap, minOverlapPercent); if(verbose){System.out.println("Edges with weights too far below vertex occupancy values set to 0.0");} //Filter by relative occupancy if(verbose){System.out.println("Eliminating edges between vertices with occupancy difference > " + maxOccupancyDifference);} filterByRelativeOccupancy(graph, alphaWellCounts, betaWellCounts, plateVtoAMap, plateVtoBMap, maxOccupancyDifference); if(verbose){System.out.println("Edges between vertices of with excessively different occupancy values " + "set to 0.0");} //Find Maximum Weighted Matching //using jheaps library class PairingHeap for improved efficiency if(verbose){System.out.println("Finding maximum weighted matching");} //Attempting to use addressable heap to improve performance MaximumWeightBipartiteMatching maxWeightMatching = new MaximumWeightBipartiteMatching(graph, plateVtoAMap.keySet(), plateVtoBMap.keySet(), i -> new PairingHeap(Comparator.naturalOrder())); MatchingAlgorithm.Matching graphMatching = maxWeightMatching.getMatching(); if(verbose){System.out.println("Matching completed");} Instant stop = Instant.now(); //Header for CSV file List header = new ArrayList<>(); header.add("Alpha"); header.add("Alpha well count"); header.add("Beta"); header.add("Beta well count"); header.add("Overlap well count"); header.add("Matched correctly?"); header.add("P-value"); //Results for csv file List> allResults = new ArrayList<>(); NumberFormat nf = NumberFormat.getInstance(Locale.US); MathContext mc = new MathContext(3); Iterator weightIter = graphMatching.iterator(); DefaultWeightedEdge e; int trueCount = 0; int falseCount = 0; boolean check; Map matchMap = new HashMap<>(); while(weightIter.hasNext()) { e = weightIter.next(); Integer source = graph.getEdgeSource(e); Integer target = graph.getEdgeTarget(e); //The match map is all matches found, not just true matches! matchMap.put(plateVtoAMap.get(source), plateVtoBMap.get(target)); check = plateVtoBMap.get(target).equals(distCellsMapAlphaKey.get(plateVtoAMap.get(source))); if(check) { trueCount++; } else { falseCount++; } List result = new ArrayList<>(); result.add(plateVtoAMap.get(source).toString()); //alpha well count result.add(alphaWellCounts.get(plateVtoAMap.get(source)).toString()); result.add(plateVtoBMap.get(target).toString()); //beta well count result.add(betaWellCounts.get(plateVtoBMap.get(target)).toString()); //overlap count result.add(Double.toString(graph.getEdgeWeight(e))); result.add(Boolean.toString(check)); double pValue = Equations.pValue(numWells, alphaWellCounts.get(plateVtoAMap.get(source)), betaWellCounts.get(plateVtoBMap.get(target)), graph.getEdgeWeight(e)); BigDecimal pValueTrunc = new BigDecimal(pValue, mc); result.add(pValueTrunc.toString()); allResults.add(result); } //Metadate comments for CSV file int min = Math.min(alphaCount, betaCount); double attemptRate = (double) (trueCount + falseCount) / min; BigDecimal attemptRateTrunc = new BigDecimal(attemptRate, mc); double pairingErrorRate = (double) falseCount / (trueCount + falseCount); BigDecimal pairingErrorRateTrunc = new BigDecimal(pairingErrorRate, mc); //make list of well concentrations List wellConcentrations = Arrays.asList(data.getWellConcentrations()); //make string out of concentrations list StringBuilder concen = new StringBuilder(); for(Integer i: wellConcentrations){ concen.append(i.toString()); concen.append(" "); } String concenString = concen.toString(); List comments = new ArrayList<>(); comments.add("T cell counts in sample plate wells: " + concenString); comments.add("Total alphas found: " + alphaCount); comments.add("Total betas found: " + betaCount); comments.add("High overlap threshold: " + highThreshold); comments.add("Low overlap threshold: " + lowThreshold); comments.add("Minimum overlap percent: " + minOverlapPercent); comments.add("Maximum occupancy difference: " + maxOccupancyDifference); comments.add("Pairing attempt rate: " + attemptRateTrunc); comments.add("Correct pairings: " + trueCount); comments.add("Incorrect pairings: " + falseCount); comments.add("Pairing error rate: " + pairingErrorRateTrunc); Duration time = Duration.between(start, stop); time = time.plus(data.getTime()); comments.add("Simulation time: " + nf.format(time.toSeconds()) + " seconds"); if(verbose){ for(String s: comments){ System.out.println(s); } } return new MatchingResult(data.getSourceFilename(), comments, header, allResults, matchMap, time); } //Simulated matching of CDR1s to CDR3s. Requires MatchingResult from prior run of matchCDR3s. public static MatchingResult[] matchCDR1s(List distinctCells, Plate samplePlate, Integer lowThreshold, Integer highThreshold, MatchingResult priorResult){ Instant start = Instant.now(); Duration previousTime = priorResult.getTime(); Map previousMatches = priorResult.getMatchMap(); int numWells = samplePlate.getSize(); int[] cdr3Indices = {cdr3AlphaIndex, cdr3BetaIndex}; int[] cdr1Indices = {cdr1AlphaIndex, cdr1BetaIndex}; System.out.println("Making previous match maps"); Map cdr3AtoBMap = previousMatches; Map cdr3BtoAMap = invertVertexMap(cdr3AtoBMap); System.out.println("Previous match maps made"); System.out.println("Making cell maps"); Map alphaCDR3toCDR1Map = makeSequenceToSequenceMap(distinctCells, cdr3AlphaIndex, cdr1AlphaIndex); Map betaCDR3toCDR1Map = makeSequenceToSequenceMap(distinctCells, cdr3BetaIndex, cdr1BetaIndex); System.out.println("Cell maps made"); System.out.println("Making well maps"); Map allCDR3s = samplePlate.assayWellsPeptideP(cdr3Indices); Map allCDR1s = samplePlate.assayWellsPeptideP(cdr1Indices); int CDR3Count = allCDR3s.size(); System.out.println("all CDR3s count: " + CDR3Count); int CDR1Count = allCDR1s.size(); System.out.println("all CDR1s count: " + CDR1Count); System.out.println("Well maps made"); System.out.println("Removing unpaired CDR3s from well maps"); List unpairedCDR3s = new ArrayList<>(); for(Integer i: allCDR3s.keySet()){ if(!(cdr3AtoBMap.containsKey(i) || cdr3BtoAMap.containsKey(i))){ unpairedCDR3s.add(i); } } for(Integer i: unpairedCDR3s){ allCDR3s.remove(i); } System.out.println("Unpaired CDR3s removed."); System.out.println("Remaining CDR3 count: " + allCDR3s.size()); System.out.println("Removing below-minimum-overlap-threshold and saturating-occupancy CDR1s"); filterByOccupancyThreshold(allCDR1s, lowThreshold, numWells - 1); System.out.println("CDR1s removed."); System.out.println("Remaining CDR1 count: " + allCDR1s.size()); System.out.println("Making vertex maps"); //For the SimpleWeightedBipartiteGraphMatrixGenerator, all vertices must have // distinct numbers associated with them. Since I'm using a 2D array, that means // distinct indices between the rows and columns. vertexStartValue lets me track where I switch // from numbering rows to columns, so I can assign unique numbers to every vertex, and then // subtract the vertexStartValue from CDR1s to use their vertex labels as array indices Integer vertexStartValue = 0; //keys are sequential integer vertices, values are CDR3s Map plateVtoCDR3Map = makeVertexToSequenceMap(allCDR3s, vertexStartValue); //New start value for vertex to CDR1 map should be one more than final vertex value in CDR3 map vertexStartValue += plateVtoCDR3Map.size(); //keys are sequential integers vertices, values are CDR1s Map plateVtoCDR1Map = makeVertexToSequenceMap(allCDR1s, vertexStartValue); //keys are CDR3s, values are sequential integer vertices from previous map Map plateCDR3toVMap = invertVertexMap(plateVtoCDR3Map); //keys are CDR1s, values are sequential integer vertices from previous map Map plateCDR1toVMap = invertVertexMap(plateVtoCDR1Map); System.out.println("Vertex maps made"); System.out.println("Creating adjacency matrix"); //Count how many wells each CDR3 appears in Map cdr3WellCounts = new HashMap<>(); //count how many wells each CDR1 appears in Map cdr1WellCounts = new HashMap<>(); //add edges, where weights are number of wells the peptides share in common. //If this is too slow, can make a 2d array and use the SimpleWeightedGraphMatrixGenerator class Map wellNCDR3s = null; Map wellNCDR1s = null; double[][] weights = new double[plateVtoCDR3Map.size()][plateVtoCDR1Map.size()]; countSequencesAndFillMatrix(samplePlate, allCDR3s, allCDR1s, plateCDR3toVMap, plateCDR1toVMap, cdr3Indices, cdr1Indices, cdr3WellCounts, cdr1WellCounts, weights); System.out.println("Matrix created"); System.out.println("Creating graph"); SimpleWeightedGraph graph = new SimpleWeightedGraph<>(DefaultWeightedEdge.class); SimpleWeightedBipartiteGraphMatrixGenerator graphGenerator = new SimpleWeightedBipartiteGraphMatrixGenerator(); List cdr3Vertices = new ArrayList<>(plateVtoCDR3Map.keySet()); //This will work because LinkedHashMap preserves order of entry graphGenerator.first(cdr3Vertices); List cdr1Vertices = new ArrayList<>(plateVtoCDR1Map.keySet()); graphGenerator.second(cdr1Vertices); //This will work because LinkedHashMap preserves order of entry graphGenerator.weights(weights); graphGenerator.generateGraph(graph); System.out.println("Graph created"); System.out.println("Removing edges outside of weight thresholds"); filterByOccupancyThreshold(graph, lowThreshold, highThreshold); System.out.println("Over- and under-weight edges set to 0.0"); System.out.println("Finding first maximum weighted matching"); MaximumWeightBipartiteMatching firstMaxWeightMatching = new MaximumWeightBipartiteMatching(graph, plateVtoCDR3Map.keySet(), plateVtoCDR1Map.keySet()); MatchingAlgorithm.Matching graphMatching = firstMaxWeightMatching.getMatching(); System.out.println("First maximum weighted matching found"); //first processing run Map firstMatchCDR3toCDR1Map = new HashMap<>(); Iterator weightIter = graphMatching.iterator(); DefaultWeightedEdge e; while(weightIter.hasNext()){ e = weightIter.next(); // if(graph.getEdgeWeight(e) < lowThreshold || graph.getEdgeWeight(e) > highThreshold) { // continue; // } Integer source = graph.getEdgeSource(e); Integer target = graph.getEdgeTarget(e); firstMatchCDR3toCDR1Map.put(plateVtoCDR3Map.get(source), plateVtoCDR1Map.get(target)); } System.out.println("First pass matches: " + firstMatchCDR3toCDR1Map.size()); System.out.println("Removing edges from first maximum weighted matching"); //zero out the edge weights in the matching weightIter = graphMatching.iterator(); while(weightIter.hasNext()){ graph.removeEdge(weightIter.next()); } System.out.println("Edges removed"); //Generate a new matching System.out.println("Finding second maximum weighted matching"); MaximumWeightBipartiteMatching secondMaxWeightMatching = new MaximumWeightBipartiteMatching(graph, plateVtoCDR3Map.keySet(), plateVtoCDR1Map.keySet()); graphMatching = secondMaxWeightMatching.getMatching(); System.out.println("Second maximum weighted matching found"); //second processing run Map secondMatchCDR3toCDR1Map = new HashMap<>(); weightIter = graphMatching.iterator(); while(weightIter.hasNext()){ e = weightIter.next(); // if(graph.getEdgeWeight(e) < lowThreshold || graph.getEdgeWeight(e) > highThreshold) { // continue; // } Integer source = graph.getEdgeSource(e); // if(!(CDR3AtoBMap.containsKey(source) || CDR3BtoAMap.containsKey(source))){ // continue; // } Integer target = graph.getEdgeTarget(e); secondMatchCDR3toCDR1Map.put(plateVtoCDR3Map.get(source), plateVtoCDR1Map.get(target)); } System.out.println("Second pass matches: " + secondMatchCDR3toCDR1Map.size()); System.out.println("Mapping first pass CDR3 alpha/beta pairs"); //get linked map for first matching attempt Map firstMatchesMap = new LinkedHashMap<>(); for(Integer alphaCDR3: cdr3AtoBMap.keySet()) { if (!(firstMatchCDR3toCDR1Map.containsKey(alphaCDR3))) { continue; } Integer betaCDR3 = cdr3AtoBMap.get(alphaCDR3); if (!(firstMatchCDR3toCDR1Map.containsKey(betaCDR3))) { continue; } firstMatchesMap.put(alphaCDR3, firstMatchCDR3toCDR1Map.get(alphaCDR3)); firstMatchesMap.put(betaCDR3, firstMatchCDR3toCDR1Map.get(betaCDR3)); } System.out.println("First pass CDR3 alpha/beta pairs mapped"); System.out.println("Mapping second pass CDR3 alpha/beta pairs."); System.out.println("Finding CDR3 pairs that swapped CDR1 matches between first pass and second pass."); //Look for matches that simply swapped already-matched alpha and beta CDR3s Map dualMatchesMap = new LinkedHashMap<>(); for(Integer alphaCDR3: cdr3AtoBMap.keySet()) { if (!(firstMatchCDR3toCDR1Map.containsKey(alphaCDR3) && secondMatchCDR3toCDR1Map.containsKey(alphaCDR3))) { continue; } Integer betaCDR3 = cdr3AtoBMap.get(alphaCDR3); if (!(firstMatchCDR3toCDR1Map.containsKey(betaCDR3) && secondMatchCDR3toCDR1Map.containsKey(betaCDR3))) { continue; } if(firstMatchCDR3toCDR1Map.get(alphaCDR3).equals(secondMatchCDR3toCDR1Map.get(betaCDR3))){ if(firstMatchCDR3toCDR1Map.get(betaCDR3).equals(secondMatchCDR3toCDR1Map.get(alphaCDR3))){ dualMatchesMap.put(alphaCDR3, firstMatchCDR3toCDR1Map.get(alphaCDR3)); dualMatchesMap.put(betaCDR3, firstMatchCDR3toCDR1Map.get(betaCDR3)); } } } System.out.println("Second pass mapping made. Dual CDR3/CDR1 pairings found."); Instant stop = Instant.now(); //results for first map System.out.println("RESULTS FOR FIRST PASS MATCHING"); List> allResults = new ArrayList<>(); Integer trueCount = 0; Iterator iter = firstMatchesMap.keySet().iterator(); while(iter.hasNext()){ Boolean proven = false; List tmp = new ArrayList<>(); tmp.add(iter.next().toString()); tmp.add(iter.next().toString()); tmp.add(firstMatchesMap.get(Integer.valueOf(tmp.get(0))).toString()); tmp.add(firstMatchesMap.get(Integer.valueOf(tmp.get(1))).toString()); if(alphaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(0))).equals(Integer.valueOf(tmp.get(2)))){ if(betaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(1))).equals(Integer.valueOf(tmp.get(3)))){ proven = true; } } else if(alphaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(0))).equals(Integer.valueOf(tmp.get(3)))){ if(betaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(1))).equals(Integer.valueOf(tmp.get(2)))){ proven = true; } } tmp.add(proven.toString()); allResults.add(tmp); if(proven){ trueCount++; } } List comments = new ArrayList<>(); comments.add("Plate size: " + samplePlate.getSize() + " wells"); comments.add("Previous pairs found: " + previousMatches.size()); comments.add("CDR1 matches attempted: " + allResults.size()); double attemptRate = (double) allResults.size() / previousMatches.size(); comments.add("Matching attempt rate: " + attemptRate); comments.add("Number of correct matches: " + trueCount); double correctRate = (double) trueCount / allResults.size(); comments.add("Correct matching rate: " + correctRate); NumberFormat nf = NumberFormat.getInstance(Locale.US); Duration time = Duration.between(start, stop); time = time.plus(previousTime); comments.add("Simulation time: " + nf.format(time.toSeconds()) + " seconds"); for(String s: comments){ System.out.println(s); } List headers = new ArrayList<>(); headers.add("CDR3 alpha"); headers.add("CDR3 beta"); headers.add("first matched CDR1"); headers.add("second matched CDR1"); headers.add("Correct match?"); MatchingResult firstTest = new MatchingResult(samplePlate.getSourceFileName(), comments, headers, allResults, dualMatchesMap, time); //results for dual map System.out.println("RESULTS FOR SECOND PASS MATCHING"); allResults = new ArrayList<>(); trueCount = 0; iter = dualMatchesMap.keySet().iterator(); while(iter.hasNext()){ Boolean proven = false; List tmp = new ArrayList<>(); tmp.add(iter.next().toString()); tmp.add(iter.next().toString()); tmp.add(dualMatchesMap.get(Integer.valueOf(tmp.get(0))).toString()); tmp.add(dualMatchesMap.get(Integer.valueOf(tmp.get(1))).toString()); if(alphaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(0))).equals(Integer.valueOf(tmp.get(2)))){ if(betaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(1))).equals(Integer.valueOf(tmp.get(3)))){ proven = true; } } else if(alphaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(0))).equals(Integer.valueOf(tmp.get(3)))){ if(betaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(1))).equals(Integer.valueOf(tmp.get(2)))){ proven = true; } } tmp.add(proven.toString()); allResults.add(tmp); if(proven){ trueCount++; } } comments = new ArrayList<>(); comments.add("Plate size: " + samplePlate.getSize() + " wells"); comments.add("Previous pairs found: " + previousMatches.size()); comments.add("High overlap threshold: " + highThreshold); comments.add("Low overlap threshold: " + lowThreshold); comments.add("CDR1 matches attempted: " + allResults.size()); attemptRate = (double) allResults.size() / previousMatches.size(); comments.add("Matching attempt rate: " + attemptRate); comments.add("Number of correct matches: " + trueCount); correctRate = (double) trueCount / allResults.size(); comments.add("Correct matching rate: " + correctRate); comments.add("Simulation time: " + nf.format(time.toSeconds()) + " seconds"); for(String s: comments){ System.out.println(s); } System.out.println("Simulation time: " + nf.format(time.toSeconds()) + " seconds"); MatchingResult dualTest = new MatchingResult(samplePlate.getSourceFileName(), comments, headers, allResults, dualMatchesMap, time); MatchingResult[] output = {firstTest, dualTest}; return output; } //Counts the well occupancy of the row peptides and column peptides into given maps, and //fills weights in the given 2D array private static void countSequencesAndFillMatrix(Plate samplePlate, Map allRowSequences, Map allColumnSequences, Map rowSequenceToVertexMap, Map columnSequenceToVertexMap, int[] rowSequenceIndices, int[] colSequenceIndices, Map rowSequenceCounts, Map columnSequenceCounts, double[][] weights){ Map wellNRowPeptides = null; Map wellNColumnPeptides = null; int vertexStartValue = rowSequenceToVertexMap.size(); int numWells = samplePlate.getSize(); for (int n = 0; n < numWells; n++) { wellNRowPeptides = samplePlate.assayWellsPeptideP(n, rowSequenceIndices); for (Integer a : wellNRowPeptides.keySet()) { if(allRowSequences.containsKey(a)){ rowSequenceCounts.merge(a, 1, (oldValue, newValue) -> oldValue + newValue); } } wellNColumnPeptides = samplePlate.assayWellsPeptideP(n, colSequenceIndices); for (Integer b : wellNColumnPeptides.keySet()) { if(allColumnSequences.containsKey(b)){ columnSequenceCounts.merge(b, 1, (oldValue, newValue) -> oldValue + newValue); } } for (Integer i : wellNRowPeptides.keySet()) { if(allRowSequences.containsKey(i)){ for (Integer j : wellNColumnPeptides.keySet()) { if(allColumnSequences.containsKey(j)){ weights[rowSequenceToVertexMap.get(i)][columnSequenceToVertexMap.get(j) - vertexStartValue] += 1.0; } } } } } } private static void filterByOccupancyThreshold(SimpleWeightedGraph graph, int low, int high) { for(DefaultWeightedEdge e: graph.edgeSet()){ if ((graph.getEdgeWeight(e) > high) || (graph.getEdgeWeight(e) < low)){ graph.setEdgeWeight(e, 0.0); } } } private static void filterByOccupancyThreshold(Map wellMap, int low, int high){ List noise = new ArrayList<>(); for(Integer k: wellMap.keySet()){ if((wellMap.get(k) > high) || (wellMap.get(k) < low)){ noise.add(k); } } for(Integer k: noise) { wellMap.remove(k); } } //Remove edges for pairs with large occupancy discrepancy private static void filterByRelativeOccupancy(SimpleWeightedGraph graph, Map alphaWellCounts, Map betaWellCounts, Map plateVtoAMap, Map plateVtoBMap, Integer maxOccupancyDifference) { for (DefaultWeightedEdge e : graph.edgeSet()) { Integer alphaOcc = alphaWellCounts.get(plateVtoAMap.get(graph.getEdgeSource(e))); Integer betaOcc = betaWellCounts.get(plateVtoBMap.get(graph.getEdgeTarget(e))); //Adjust this to something cleverer later if (Math.abs(alphaOcc - betaOcc) >= maxOccupancyDifference) { graph.setEdgeWeight(e, 0.0); } } } //Remove edges for pairs where overlap size is significantly lower than the well occupancy private static void filterByOverlapSize(SimpleWeightedGraph graph, Map alphaWellCounts, Map betaWellCounts, Map plateVtoAMap, Map plateVtoBMap, Integer minOverlapPercent) { for (DefaultWeightedEdge e : graph.edgeSet()) { Integer alphaOcc = alphaWellCounts.get(plateVtoAMap.get(graph.getEdgeSource(e))); Integer betaOcc = betaWellCounts.get(plateVtoBMap.get(graph.getEdgeTarget(e))); double weight = graph.getEdgeWeight(e); double min = minOverlapPercent / 100.0; if ((weight / alphaOcc < min) || (weight / betaOcc < min)) { graph.setEdgeWeight(e, 0.0); } } } private static Map makeSequenceToSequenceMap(List cells, int keySequenceIndex, int valueSequenceIndex){ Map keySequenceToValueSequenceMap = new HashMap<>(); for (Integer[] cell : cells) { keySequenceToValueSequenceMap.put(cell[keySequenceIndex], cell[valueSequenceIndex]); } return keySequenceToValueSequenceMap; } private static Map makeVertexToSequenceMap(Map sequences, Integer startValue) { Map map = new LinkedHashMap<>(); //LinkedHashMap to preserve order of entry Integer index = startValue; for (Integer k: sequences.keySet()) { map.put(index, k); index++; } return map; } private static Map invertVertexMap(Map map) { Map inverse = new HashMap<>(); for (Integer k : map.keySet()) { inverse.put(map.get(k), k); } return inverse; } }