import org.jgrapht.Graphs; import org.jgrapht.alg.interfaces.MatchingAlgorithm; import org.jgrapht.alg.matching.MaximumWeightBipartiteMatching; import org.jgrapht.graph.DefaultWeightedEdge; import org.jgrapht.graph.SimpleWeightedGraph; import org.jheaps.tree.PairingHeap; import java.math.BigDecimal; import java.math.MathContext; import java.text.NumberFormat; import java.time.Instant; import java.time.Duration; import java.util.*; //NOTE: "sequence" in method and variable names refers to a peptide sequence from a simulated T cell public class Simulator implements GraphModificationFunctions { public static GraphWithMapData makeCDR3Graph(CellSample cellSample, Plate samplePlate, int readDepth, double readErrorRate, double errorCollisionRate, double realSequenceCollisionRate, boolean verbose) { //start timing Instant start = Instant.now(); int[] alphaIndices = {SequenceType.CDR3_ALPHA.ordinal()}; int[] betaIndices = {SequenceType.CDR3_BETA.ordinal()}; List distinctCells = cellSample.getCells(); int numWells = samplePlate.getSize(); //Make a hashmap keyed to alphas, values are associated betas. if(verbose){System.out.println("Making cell maps");} Map distCellsMapAlphaKey = makeSequenceToSequenceMap(distinctCells, SequenceType.CDR3_ALPHA.ordinal(), SequenceType.CDR3_BETA.ordinal()); if(verbose){System.out.println("Cell maps made");} //Make linkedHashMap keyed to sequences, values are SequenceRecords reflecting plate statistics if(verbose){System.out.println("Making sample plate sequence maps");} Map alphaSequences = samplePlate.countSequences(readDepth, readErrorRate, errorCollisionRate, realSequenceCollisionRate, alphaIndices); int alphaCount = alphaSequences.size(); if(verbose){System.out.println("Alphas sequences read: " + alphaCount);} Map betaSequences = samplePlate.countSequences(readDepth, readErrorRate, errorCollisionRate, realSequenceCollisionRate, betaIndices); int betaCount = betaSequences.size(); if(verbose){System.out.println("Betas sequences read: " + betaCount);} if(verbose){System.out.println("Sample plate sequence maps made");} //pre-filter saturating sequences and sequences likely to be misreads if(verbose){System.out.println("Removing sequences present in all wells.");} filterByOccupancyThresholds(alphaSequences, 1, numWells - 1); filterByOccupancyThresholds(betaSequences, 1, numWells - 1); if(verbose){System.out.println("Sequences removed");} if(verbose){System.out.println("Remaining alpha sequence count: " + alphaSequences.size());} if(verbose){System.out.println("Remaining beta sequence count: " + betaSequences.size());} if (readDepth > 1) { if(verbose){System.out.println("Removing sequences with disparate occupancies and read counts");} filterByOccupancyAndReadCount(alphaSequences, readDepth); filterByOccupancyAndReadCount(betaSequences, readDepth); if(verbose){System.out.println("Sequences removed");} if(verbose){System.out.println("Remaining alpha sequence count: " + alphaSequences.size());} if(verbose){System.out.println("Remaining beta sequence count: " + betaSequences.size());} } if (realSequenceCollisionRate > 0.0) { if(verbose){System.out.println("Removing wells with anomalous read counts from sequence records");} int alphaWellsRemoved = filterWellsByReadCount(alphaSequences); int betaWellsRemoved = filterWellsByReadCount(betaSequences); if(verbose){System.out.println("Wells with anomalous read counts removed from sequence records");} if(verbose){System.out.println("Total alpha sequence wells removed: " + alphaWellsRemoved);} if(verbose){System.out.println("Total beta sequence wells removed: " + betaWellsRemoved);} } /* * The commented out code below works beautifully for small enough graphs. However, after implementing a * Zipf distribution and attempting to simulate Experiment 3 from the paper again, I discovered that * this method uses too much memory. Even a 120GB heap is not enough to build this adjacency matrix. * So I'm going to attempt to build this graph directly and see if that is less memory intensive */ // //construct the graph. For simplicity, going to make // if(verbose){System.out.println("Making vertex maps");} // //For the SimpleWeightedBipartiteGraphMatrixGenerator, all vertices must have // //distinct numbers associated with them. Since I'm using a 2D array, that means // //distinct indices between the rows and columns. vertexStartValue lets me track where I switch // //from numbering rows to columns, so I can assign unique numbers to every vertex, and then // //subtract the vertexStartValue from betas to use their vertex labels as array indices // int vertexStartValue = 0; // //keys are sequential integer vertices, values are alphas // Map plateAtoVMap = makeSequenceToVertexMap(alphaSequences, vertexStartValue); // //new start value for vertex to beta map should be one more than final vertex value in alpha map // vertexStartValue += plateAtoVMap.size(); // //keys are betas, values are sequential integers // Map plateBtoVMap = makeSequenceToVertexMap(betaSequences, vertexStartValue); // if(verbose){System.out.println("Vertex maps made");} // //make adjacency matrix for bipartite graph generator // //(technically this is only 1/4 of an adjacency matrix, but that's all you need // //for a bipartite graph, and all the SimpleWeightedBipartiteGraphMatrixGenerator class expects.) // if(verbose){System.out.println("Making adjacency matrix");} // double[][] weights = new double[plateAtoVMap.size()][plateBtoVMap.size()]; // fillAdjacencyMatrix(weights, vertexStartValue, alphaSequences, betaSequences, plateAtoVMap, plateBtoVMap); // if(verbose){System.out.println("Adjacency matrix made");} // //make bipartite graph // if(verbose){System.out.println("Making bipartite weighted graph");} // //the graph object // SimpleWeightedGraph graph = // new SimpleWeightedGraph<>(DefaultWeightedEdge.class); // //the graph generator // SimpleWeightedBipartiteGraphMatrixGenerator graphGenerator = new SimpleWeightedBipartiteGraphMatrixGenerator(); // //the list of alpha vertices // List alphaVertices = new ArrayList<>(); // for (String seq : plateAtoVMap.keySet()) { // Vertex alphaVertex = new Vertex(alphaSequences.get(seq), plateAtoVMap.get(seq)); // alphaVertices.add(alphaVertex); // } // //Sort to make sure the order of vertices in list matches the order of the adjacency matrix // Collections.sort(alphaVertices); // //Add ordered list of vertices to the graph // graphGenerator.first(alphaVertices); // //the list of beta vertices // List betaVertices = new ArrayList<>(); // for (String seq : plateBtoVMap.keySet()) { // Vertex betaVertex = new Vertex(betaSequences.get(seq), plateBtoVMap.get(seq)); // betaVertices.add(betaVertex); // } // //Sort to make sure the order of vertices in list matches the order of the adjacency matrix // Collections.sort(betaVertices); // //Add ordered list of vertices to the graph // graphGenerator.second(betaVertices); // //use adjacency matrix of weight created previously // graphGenerator.weights(weights); // graphGenerator.generateGraph(graph); //make bipartite graph if(verbose){System.out.println("Making bipartite weighted graph");} //the graph object SimpleWeightedGraph graph = new SimpleWeightedGraph<>(DefaultWeightedEdge.class); int vertexLabelValue = 0; //create and add alpha sequence vertices List alphaVertices = new ArrayList<>(); for (Map.Entry entry: alphaSequences.entrySet()) { alphaVertices.add(new Vertex(entry.getValue(), vertexLabelValue)); vertexLabelValue++; } alphaVertices.forEach(graph::addVertex); //add beta sequence vertices List betaVertices = new ArrayList<>(); for (Map.Entry entry: betaSequences.entrySet()) { betaVertices.add(new Vertex(entry.getValue(), vertexLabelValue)); vertexLabelValue++; } betaVertices.forEach(graph::addVertex); //add edges (best so far) int edgesAddedCount = 0; for(Vertex a: alphaVertices) { Set a_wells = a.getRecord().getWells(); for(Vertex b: betaVertices) { Set sharedWells = new HashSet<>(a_wells); sharedWells.retainAll(b.getRecord().getWells()); if (!sharedWells.isEmpty()) { Graphs.addEdge(graph, a, b, (double) sharedWells.size()); } edgesAddedCount++; if (edgesAddedCount % 10000000 == 0) { //collect garbage every 10,000,000 edges System.out.println(edgesAddedCount + " edges added"); //request garbage collection System.gc(); System.out.println("Garbage collection requested"); } } } if(verbose){System.out.println("Graph created");} //stop timing Instant stop = Instant.now(); Duration time = Duration.between(start, stop); //create GraphWithMapData object GraphWithMapData output = new GraphWithMapData(graph, numWells, samplePlate.getPopulations(), distCellsMapAlphaKey, alphaCount, betaCount, samplePlate.getError(), readDepth, readErrorRate, errorCollisionRate, realSequenceCollisionRate, time); //Set cell sample file name in graph to name of cell sample output.setCellFilename(cellSample.getFilename()); //Set cell sample size in graph output.setCellSampleSize(cellSample.getCellCount()); //Set sample plate file name in graph to name of sample plate output.setPlateFilename(samplePlate.getFilename()); //return GraphWithMapData object return output; } //match CDR3s. public static MatchingResult matchCDR3s(GraphWithMapData data, String dataFilename, Integer lowThreshold, Integer highThreshold, Integer maxOccupancyDifference, Integer minOverlapPercent, boolean verbose, boolean calculatePValue) { Instant start = Instant.now(); SimpleWeightedGraph graph = data.getGraph(); Map removedEdges = new HashMap<>(); boolean saveEdges = BiGpairSEQ.cacheGraph(); int numWells = data.getNumWells(); //Integer alphaCount = data.getAlphaCount(); //Integer betaCount = data.getBetaCount(); Map distCellsMapAlphaKey = data.getDistCellsMapAlphaKey(); Set alphas = new HashSet<>(); Set betas = new HashSet<>(); for(Vertex v: graph.vertexSet()) { if (SequenceType.CDR3_ALPHA.equals(v.getType())){ alphas.add(v); } else { betas.add(v); } } Integer graphAlphaCount = alphas.size(); Integer graphBetaCount = betas.size(); Integer graphEdgeCount = graph.edgeSet().size(); //remove edges with weights outside given overlap thresholds, add those to removed edge list if(verbose){System.out.println("Eliminating edges with weights outside overlap threshold values");} removedEdges.putAll(GraphModificationFunctions.filterByOverlapThresholds(graph, lowThreshold, highThreshold, saveEdges)); if(verbose){System.out.println("Over- and under-weight edges removed");} //remove edges between vertices with too small an overlap size, add those to removed edge list if(verbose){System.out.println("Eliminating edges with weights less than " + minOverlapPercent.toString() + " percent of vertex occupancy value.");} removedEdges.putAll(GraphModificationFunctions.filterByOverlapPercent(graph, minOverlapPercent, saveEdges)); if(verbose){System.out.println("Edges with weights too far below a vertex occupancy value removed");} //Filter by relative occupancy if(verbose){System.out.println("Eliminating edges between vertices with occupancy difference > " + maxOccupancyDifference);} removedEdges.putAll(GraphModificationFunctions.filterByRelativeOccupancy(graph, maxOccupancyDifference, saveEdges)); if(verbose){System.out.println("Edges between vertices of with excessively different occupancy values " + "removed");} Integer filteredGraphEdgeCount = graph.edgeSet().size(); //Find Maximum Weight Matching if(verbose){System.out.println("Finding maximum weight matching");} //The matching object MatchingAlgorithm maxWeightMatching; //Determine algorithm type AlgorithmType algorithm = BiGpairSEQ.getMatchingAlgorithmType(); switch (algorithm) { //Only two options now, but I have room to add more algorithms in the future this way case AUCTION -> { //create a new MaximumIntegerWeightBipartiteAuctionMatching maxWeightMatching = new MaximumIntegerWeightBipartiteAuctionMatching<>(graph, alphas, betas); } case INTEGER_WEIGHT_SCALING -> { maxWeightMatching = new MaximumIntegerWeightBipartiteMatching<>(graph, alphas, betas, new BigDecimal(highThreshold)); } default -> { //HUNGARIAN //use selected heap type for priority queue HeapType heap = BiGpairSEQ.getPriorityQueueHeapType(); if(HeapType.PAIRING.equals(heap)) { maxWeightMatching = new MaximumWeightBipartiteMatching(graph, alphas, betas, i -> new PairingHeap(Comparator.naturalOrder())); } else {//Fibonacci is the default, and what's used in the JGraphT implementation maxWeightMatching = new MaximumWeightBipartiteMatching(graph, alphas, betas); } } } MatchingAlgorithm.Matching matching = maxWeightMatching.getMatching(); if(verbose){System.out.println("Matching completed");} Instant stop = Instant.now(); //Header for CSV file List header = new ArrayList<>(); header.add("Alpha"); header.add("Alpha well count"); header.add("Beta"); header.add("Beta well count"); header.add("Overlap well count"); header.add("Matched correctly?"); if(calculatePValue) { header.add("P-value"); } //Results for csv file List> allResults = new ArrayList<>(); NumberFormat nf = NumberFormat.getInstance(Locale.US); MathContext mc = new MathContext(3); Iterator weightIter = matching.iterator(); DefaultWeightedEdge e; int trueCount = 0; int falseCount = 0; boolean check; Map matchMap = new HashMap<>(); while(weightIter.hasNext()) { e = weightIter.next(); Vertex source = graph.getEdgeSource(e); Vertex target = graph.getEdgeTarget(e); //The match map is all matches found, not just true matches! matchMap.put(source.getSequence(), target.getSequence()); check = target.getSequence().equals(distCellsMapAlphaKey.get(source.getSequence())); if(check) { trueCount++; } else { falseCount++; } List result = new ArrayList<>(); //alpha sequence result.add(source.getSequence()); //alpha well count result.add(source.getOccupancy().toString()); //beta sequence result.add(target.getSequence()); //beta well count result.add(target.getOccupancy().toString()); //overlap count result.add(Double.toString(graph.getEdgeWeight(e))); result.add(Boolean.toString(check)); if (calculatePValue) { double pValue = Equations.pValue(numWells, source.getOccupancy(), target.getOccupancy(), graph.getEdgeWeight(e)); BigDecimal pValueTrunc = new BigDecimal(pValue, mc); result.add(pValueTrunc.toString()); } allResults.add(result); } //Metadata comments for CSV file String algoType; switch(algorithm) { case AUCTION -> { algoType = "Auction algorithm"; } case INTEGER_WEIGHT_SCALING -> { algoType = "Integer weight scaling algorithm from Duan and Su (not yet perfectly implemented)"; } default -> { //HUNGARIAN algoType = "Hungarian algorithm with heap: " + BiGpairSEQ.getPriorityQueueHeapType().name(); } } int min = Math.min(graphAlphaCount, graphBetaCount); //matching weight Double matchingWeight = matching.getWeight(); //rate of attempted matching double attemptRate = (double) (trueCount + falseCount) / min; BigDecimal attemptRateTrunc = new BigDecimal(attemptRate, mc); //rate of pairing error double pairingErrorRate = (double) falseCount / (trueCount + falseCount); BigDecimal pairingErrorRateTrunc; if(Double.isFinite(pairingErrorRate)) { pairingErrorRateTrunc = new BigDecimal(pairingErrorRate, mc); } else{ pairingErrorRateTrunc = new BigDecimal(-1, mc); } //get list of well populations Integer[] wellPopulations = data.getWellPopulations(); //make string out of populations list StringBuilder populationsStringBuilder = new StringBuilder(); populationsStringBuilder.append(wellPopulations[0].toString()); for(int i = 1; i < wellPopulations.length; i++){ populationsStringBuilder.append(", "); populationsStringBuilder.append(wellPopulations[i].toString()); } String wellPopulationsString = populationsStringBuilder.toString(); //graph generation time Duration graphTime = data.getTime(); //MWM run time Duration pairingTime = Duration.between(start, stop); //total simulation time Duration totalTime = graphTime.plus(pairingTime); Map metadata = new LinkedHashMap<>(); metadata.put("cell sample filename", data.getCellFilename()); metadata.put("cell sample size", data.getCellSampleSize().toString()); metadata.put("sample plate filename", data.getPlateFilename()); metadata.put("sample plate well count", data.getNumWells().toString()); metadata.put("sequence dropout rate", data.getDropoutRate().toString()); metadata.put("graph filename", dataFilename); metadata.put("MWM algorithm type", algoType); metadata.put("matching weight", matchingWeight.toString()); metadata.put("well populations", wellPopulationsString); metadata.put("sequence read depth", data.getReadDepth().toString()); metadata.put("sequence read error rate", data.getReadErrorRate().toString()); metadata.put("read error collision rate", data.getErrorCollisionRate().toString()); metadata.put("real sequence collision rate", data.getRealSequenceCollisionRate().toString()); metadata.put("total alphas read from plate", data.getAlphaCount().toString()); metadata.put("total betas read from plate", data.getBetaCount().toString()); metadata.put("initial edges in graph", graphEdgeCount.toString()); metadata.put("alphas in graph (after pre-filtering)", graphAlphaCount.toString()); metadata.put("betas in graph (after pre-filtering)", graphBetaCount.toString()); metadata.put("final edges in graph (after pre-filtering)", filteredGraphEdgeCount.toString()); metadata.put("high overlap threshold for pairing", highThreshold.toString()); metadata.put("low overlap threshold for pairing", lowThreshold.toString()); metadata.put("minimum overlap percent for pairing", minOverlapPercent.toString()); metadata.put("maximum occupancy difference for pairing", maxOccupancyDifference.toString()); metadata.put("pairing attempt rate", attemptRateTrunc.toString()); metadata.put("correct pairing count", Integer.toString(trueCount)); metadata.put("incorrect pairing count", Integer.toString(falseCount)); metadata.put("pairing error rate", pairingErrorRateTrunc.toString()); metadata.put("time to generate graph (seconds)", nf.format(graphTime.toSeconds())); metadata.put("time to pair sequences (seconds)",nf.format(pairingTime.toSeconds())); metadata.put("total simulation time (seconds)", nf.format(totalTime.toSeconds())); //create MatchingResult object MatchingResult output = new MatchingResult(metadata, header, allResults, matchMap); if(verbose){ for(String s: output.getComments()){ System.out.println(s); } } if(saveEdges) { //put the removed edges back on the graph System.out.println("Restoring removed edges to graph."); GraphModificationFunctions.addRemovedEdges(graph, removedEdges); } //return MatchingResult object return output; } //Commented out CDR1 matching until it's time to re-implement it // //Simulated matching of CDR1s to CDR3s. Requires MatchingResult from prior run of matchCDR3s. // public static MatchingResult[] matchCDR1s(List distinctCells, // Plate samplePlate, Integer lowThreshold, // Integer highThreshold, MatchingResult priorResult){ // Instant start = Instant.now(); // Duration previousTime = priorResult.getTime(); // Map previousMatches = priorResult.getMatchMap(); // int numWells = samplePlate.getSize(); // int[] cdr3Indices = {cdr3AlphaIndex, cdr3BetaIndex}; // int[] cdr1Indices = {cdr1AlphaIndex, cdr1BetaIndex}; // // System.out.println("Making previous match maps"); // Map cdr3AtoBMap = previousMatches; // Map cdr3BtoAMap = invertVertexMap(cdr3AtoBMap); // System.out.println("Previous match maps made"); // // System.out.println("Making cell maps"); // Map alphaCDR3toCDR1Map = makeSequenceToSequenceMap(distinctCells, cdr3AlphaIndex, cdr1AlphaIndex); // Map betaCDR3toCDR1Map = makeSequenceToSequenceMap(distinctCells, cdr3BetaIndex, cdr1BetaIndex); // System.out.println("Cell maps made"); // // System.out.println("Making well maps"); // Map allCDR3s = samplePlate.assayWellsSequenceS(cdr3Indices); // Map allCDR1s = samplePlate.assayWellsSequenceS(cdr1Indices); // int CDR3Count = allCDR3s.size(); // System.out.println("all CDR3s count: " + CDR3Count); // int CDR1Count = allCDR1s.size(); // System.out.println("all CDR1s count: " + CDR1Count); // System.out.println("Well maps made"); // // System.out.println("Removing unpaired CDR3s from well maps"); // List unpairedCDR3s = new ArrayList<>(); // for(Integer i: allCDR3s.keySet()){ // if(!(cdr3AtoBMap.containsKey(i) || cdr3BtoAMap.containsKey(i))){ // unpairedCDR3s.add(i); // } // } // for(Integer i: unpairedCDR3s){ // allCDR3s.remove(i); // } // System.out.println("Unpaired CDR3s removed."); // System.out.println("Remaining CDR3 count: " + allCDR3s.size()); // // System.out.println("Removing below-minimum-overlap-threshold and saturating-occupancy CDR1s"); // filterByOccupancyThreshold(allCDR1s, lowThreshold, numWells - 1); // System.out.println("CDR1s removed."); // System.out.println("Remaining CDR1 count: " + allCDR1s.size()); // // System.out.println("Making vertex maps"); // // //For the SimpleWeightedBipartiteGraphMatrixGenerator, all vertices must have // // distinct numbers associated with them. Since I'm using a 2D array, that means // // distinct indices between the rows and columns. vertexStartValue lets me track where I switch // // from numbering rows to columns, so I can assign unique numbers to every vertex, and then // // subtract the vertexStartValue from CDR1s to use their vertex labels as array indices // Integer vertexStartValue = 0; // //keys are sequential integer vertices, values are CDR3s // Map plateVtoCDR3Map = makeVertexToSequenceMap(allCDR3s, vertexStartValue); // //New start value for vertex to CDR1 map should be one more than final vertex value in CDR3 map // vertexStartValue += plateVtoCDR3Map.size(); // //keys are sequential integers vertices, values are CDR1s // Map plateVtoCDR1Map = makeVertexToSequenceMap(allCDR1s, vertexStartValue); // //keys are CDR3s, values are sequential integer vertices from previous map // Map plateCDR3toVMap = invertVertexMap(plateVtoCDR3Map); // //keys are CDR1s, values are sequential integer vertices from previous map // Map plateCDR1toVMap = invertVertexMap(plateVtoCDR1Map); // System.out.println("Vertex maps made"); // // System.out.println("Creating adjacency matrix"); // //Count how many wells each CDR3 appears in // Map cdr3WellCounts = new HashMap<>(); // //count how many wells each CDR1 appears in // Map cdr1WellCounts = new HashMap<>(); // //add edges, where weights are number of wells the peptides share in common. // //If this is too slow, can make a 2d array and use the SimpleWeightedGraphMatrixGenerator class // Map wellNCDR3s = null; // Map wellNCDR1s = null; // double[][] weights = new double[plateVtoCDR3Map.size()][plateVtoCDR1Map.size()]; // countSequencesAndFillMatrix(samplePlate, allCDR3s, allCDR1s, plateCDR3toVMap, plateCDR1toVMap, // cdr3Indices, cdr1Indices, cdr3WellCounts, cdr1WellCounts, weights); // System.out.println("Matrix created"); // // System.out.println("Creating graph"); // SimpleWeightedGraph graph = // new SimpleWeightedGraph<>(DefaultWeightedEdge.class); // // SimpleWeightedBipartiteGraphMatrixGenerator graphGenerator = new SimpleWeightedBipartiteGraphMatrixGenerator(); // List cdr3Vertices = new ArrayList<>(plateVtoCDR3Map.keySet()); //This will work because LinkedHashMap preserves order of entry // graphGenerator.first(cdr3Vertices); // List cdr1Vertices = new ArrayList<>(plateVtoCDR1Map.keySet()); // graphGenerator.second(cdr1Vertices); //This will work because LinkedHashMap preserves order of entry // graphGenerator.weights(weights); // graphGenerator.generateGraph(graph); // System.out.println("Graph created"); // // System.out.println("Removing edges outside of weight thresholds"); // filterByOccupancyThreshold(graph, lowThreshold, highThreshold); // System.out.println("Over- and under-weight edges set to 0.0"); // // System.out.println("Finding first maximum weighted matching"); // MaximumWeightBipartiteMatching firstMaxWeightMatching = // new MaximumWeightBipartiteMatching(graph, plateVtoCDR3Map.keySet(), plateVtoCDR1Map.keySet()); // MatchingAlgorithm.Matching graphMatching = firstMaxWeightMatching.getMatching(); // System.out.println("First maximum weighted matching found"); // // // //first processing run // Map firstMatchCDR3toCDR1Map = new HashMap<>(); // Iterator weightIter = graphMatching.iterator(); // DefaultWeightedEdge e; // while(weightIter.hasNext()){ // e = weightIter.next(); //// if(graph.getEdgeWeight(e) < lowThreshold || graph.getEdgeWeight(e) > highThreshold) { //// continue; //// } // Integer source = graph.getEdgeSource(e); // Integer target = graph.getEdgeTarget(e); // firstMatchCDR3toCDR1Map.put(plateVtoCDR3Map.get(source), plateVtoCDR1Map.get(target)); // } // System.out.println("First pass matches: " + firstMatchCDR3toCDR1Map.size()); // // System.out.println("Removing edges from first maximum weighted matching"); // //zero out the edge weights in the matching // weightIter = graphMatching.iterator(); // while(weightIter.hasNext()){ // graph.removeEdge(weightIter.next()); // } // System.out.println("Edges removed"); // // //Generate a new matching // System.out.println("Finding second maximum weighted matching"); // MaximumWeightBipartiteMatching secondMaxWeightMatching = // new MaximumWeightBipartiteMatching(graph, plateVtoCDR3Map.keySet(), plateVtoCDR1Map.keySet()); // graphMatching = secondMaxWeightMatching.getMatching(); // System.out.println("Second maximum weighted matching found"); // // // //second processing run // Map secondMatchCDR3toCDR1Map = new HashMap<>(); // weightIter = graphMatching.iterator(); // while(weightIter.hasNext()){ // e = weightIter.next(); //// if(graph.getEdgeWeight(e) < lowThreshold || graph.getEdgeWeight(e) > highThreshold) { //// continue; //// } // Integer source = graph.getEdgeSource(e); //// if(!(CDR3AtoBMap.containsKey(source) || CDR3BtoAMap.containsKey(source))){ //// continue; //// } // Integer target = graph.getEdgeTarget(e); // secondMatchCDR3toCDR1Map.put(plateVtoCDR3Map.get(source), plateVtoCDR1Map.get(target)); // } // System.out.println("Second pass matches: " + secondMatchCDR3toCDR1Map.size()); // // System.out.println("Mapping first pass CDR3 alpha/beta pairs"); // //get linked map for first matching attempt // Map firstMatchesMap = new LinkedHashMap<>(); // for(Integer alphaCDR3: cdr3AtoBMap.keySet()) { // if (!(firstMatchCDR3toCDR1Map.containsKey(alphaCDR3))) { // continue; // } // Integer betaCDR3 = cdr3AtoBMap.get(alphaCDR3); // if (!(firstMatchCDR3toCDR1Map.containsKey(betaCDR3))) { // continue; // } // firstMatchesMap.put(alphaCDR3, firstMatchCDR3toCDR1Map.get(alphaCDR3)); // firstMatchesMap.put(betaCDR3, firstMatchCDR3toCDR1Map.get(betaCDR3)); // } // System.out.println("First pass CDR3 alpha/beta pairs mapped"); // // System.out.println("Mapping second pass CDR3 alpha/beta pairs."); // System.out.println("Finding CDR3 pairs that swapped CDR1 matches between first pass and second pass."); // //Look for matches that simply swapped already-matched alpha and beta CDR3s // Map dualMatchesMap = new LinkedHashMap<>(); // for(Integer alphaCDR3: cdr3AtoBMap.keySet()) { // if (!(firstMatchCDR3toCDR1Map.containsKey(alphaCDR3) && secondMatchCDR3toCDR1Map.containsKey(alphaCDR3))) { // continue; // } // Integer betaCDR3 = cdr3AtoBMap.get(alphaCDR3); // if (!(firstMatchCDR3toCDR1Map.containsKey(betaCDR3) && secondMatchCDR3toCDR1Map.containsKey(betaCDR3))) { // continue; // } // if(firstMatchCDR3toCDR1Map.get(alphaCDR3).equals(secondMatchCDR3toCDR1Map.get(betaCDR3))){ // if(firstMatchCDR3toCDR1Map.get(betaCDR3).equals(secondMatchCDR3toCDR1Map.get(alphaCDR3))){ // dualMatchesMap.put(alphaCDR3, firstMatchCDR3toCDR1Map.get(alphaCDR3)); // dualMatchesMap.put(betaCDR3, firstMatchCDR3toCDR1Map.get(betaCDR3)); // } // } // } // System.out.println("Second pass mapping made. Dual CDR3/CDR1 pairings found."); // // Instant stop = Instant.now(); // //results for first map // System.out.println("RESULTS FOR FIRST PASS MATCHING"); // List> allResults = new ArrayList<>(); // Integer trueCount = 0; // Iterator iter = firstMatchesMap.keySet().iterator(); // // while(iter.hasNext()){ // Boolean proven = false; // List tmp = new ArrayList<>(); // tmp.add(iter.next().toString()); // tmp.add(iter.next().toString()); // tmp.add(firstMatchesMap.get(Integer.valueOf(tmp.get(0))).toString()); // tmp.add(firstMatchesMap.get(Integer.valueOf(tmp.get(1))).toString()); // if(alphaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(0))).equals(Integer.valueOf(tmp.get(2)))){ // if(betaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(1))).equals(Integer.valueOf(tmp.get(3)))){ // proven = true; // } // } // else if(alphaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(0))).equals(Integer.valueOf(tmp.get(3)))){ // if(betaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(1))).equals(Integer.valueOf(tmp.get(2)))){ // proven = true; // } // } // tmp.add(proven.toString()); // allResults.add(tmp); // if(proven){ // trueCount++; // } // } // // List comments = new ArrayList<>(); // comments.add("Plate size: " + samplePlate.getSize() + " wells"); // comments.add("Previous pairs found: " + previousMatches.size()); // comments.add("CDR1 matches attempted: " + allResults.size()); // double attemptRate = (double) allResults.size() / previousMatches.size(); // comments.add("Matching attempt rate: " + attemptRate); // comments.add("Number of correct matches: " + trueCount); // double correctRate = (double) trueCount / allResults.size(); // comments.add("Correct matching rate: " + correctRate); // NumberFormat nf = NumberFormat.getInstance(Locale.US); // Duration time = Duration.between(start, stop); // time = time.plus(previousTime); // comments.add("Simulation time: " + nf.format(time.toSeconds()) + " seconds"); // for(String s: comments){ // System.out.println(s); // } // // // // List headers = new ArrayList<>(); // headers.add("CDR3 alpha"); // headers.add("CDR3 beta"); // headers.add("first matched CDR1"); // headers.add("second matched CDR1"); // headers.add("Correct match?"); // // MatchingResult firstTest = new MatchingResult(samplePlate.getSourceFileName(), // comments, headers, allResults, dualMatchesMap, time); // // //results for dual map // System.out.println("RESULTS FOR SECOND PASS MATCHING"); // allResults = new ArrayList<>(); // trueCount = 0; // iter = dualMatchesMap.keySet().iterator(); // while(iter.hasNext()){ // Boolean proven = false; // List tmp = new ArrayList<>(); // tmp.add(iter.next().toString()); // tmp.add(iter.next().toString()); // tmp.add(dualMatchesMap.get(Integer.valueOf(tmp.get(0))).toString()); // tmp.add(dualMatchesMap.get(Integer.valueOf(tmp.get(1))).toString()); // if(alphaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(0))).equals(Integer.valueOf(tmp.get(2)))){ // if(betaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(1))).equals(Integer.valueOf(tmp.get(3)))){ // proven = true; // } // } // else if(alphaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(0))).equals(Integer.valueOf(tmp.get(3)))){ // if(betaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(1))).equals(Integer.valueOf(tmp.get(2)))){ // proven = true; // } // } // tmp.add(proven.toString()); // allResults.add(tmp); // if(proven){ // trueCount++; // } // } // // comments = new ArrayList<>(); // comments.add("Plate size: " + samplePlate.getSize() + " wells"); // comments.add("Previous pairs found: " + previousMatches.size()); // comments.add("High overlap threshold: " + highThreshold); // comments.add("Low overlap threshold: " + lowThreshold); // comments.add("CDR1 matches attempted: " + allResults.size()); // attemptRate = (double) allResults.size() / previousMatches.size(); // comments.add("Matching attempt rate: " + attemptRate); // comments.add("Number of correct matches: " + trueCount); // correctRate = (double) trueCount / allResults.size(); // comments.add("Correct matching rate: " + correctRate); // comments.add("Simulation time: " + nf.format(time.toSeconds()) + " seconds"); // // for(String s: comments){ // System.out.println(s); // } // // System.out.println("Simulation time: " + nf.format(time.toSeconds()) + " seconds"); // MatchingResult dualTest = new MatchingResult(samplePlate.getSourceFileName(), comments, headers, // allResults, dualMatchesMap, time); // MatchingResult[] output = {firstTest, dualTest}; // return output; // } //Remove sequences based on occupancy private static void filterByOccupancyThresholds(Map wellMap, int low, int high){ List noise = new ArrayList<>(); for(String k: wellMap.keySet()){ if((wellMap.get(k).getOccupancy() > high) || (wellMap.get(k).getOccupancy() < low)){ noise.add(k); } } for(String k: noise) { wellMap.remove(k); } } private static void filterByOccupancyAndReadCount(Map sequences, int readDepth) { List noise = new ArrayList<>(); for(String k : sequences.keySet()){ //the sequence read count should be more than half the occupancy times read depth if the read error rate is low Integer threshold = (sequences.get(k).getOccupancy() * readDepth) / 2; if(sequences.get(k).getReadCount() < threshold) { noise.add(k); } } for(String k : noise) { sequences.remove(k); } } private static int filterWellsByReadCount(Map sequences) { int count = 0; for (String k: sequences.keySet()) { //If a sequence has read count R and appears in W wells, then on average its read count in each //well should be R/W. Delete any wells where the read count is less than R/2W. Integer threshold = sequences.get(k).getReadCount() / (2 * sequences.get(k).getOccupancy()); List noise = new ArrayList<>(); for (Integer well: sequences.get(k).getWells()) { if (sequences.get(k).getReadCount(well) < threshold) { noise.add(well); count++; } } for (Integer well: noise) { sequences.get(k).deleteWell(well); } } return count; } private static Map makeSequenceToSequenceMap(List cells, int keySequenceIndex, int valueSequenceIndex){ Map keySequenceToValueSequenceMap = new HashMap<>(); for (String[] cell : cells) { keySequenceToValueSequenceMap.put(cell[keySequenceIndex], cell[valueSequenceIndex]); } return keySequenceToValueSequenceMap; } private static Map makeVertexToSequenceMap(Map sequences, Integer startValue) { Map map = new LinkedHashMap<>(); //LinkedHashMap to preserve order of entry Integer index = startValue; for (String k: sequences.keySet()) { map.put(index, k); index++; } return map; } private static Map makeSequenceToVertexMap(Map sequences, Integer startValue) { Map map = new LinkedHashMap<>(); //LinkedHashMap to preserve order of entry Integer index = startValue; for (String k: sequences.keySet()) { map.put(k, index); index++; } return map; } private static void fillAdjacencyMatrix(double[][] weights, Integer vertexOffsetValue, Map rowSequences, Map columnSequences, Map rowToVertexMap, Map columnToVertexMap) { for (String rowSeq: rowSequences.keySet()) { for (Integer well: rowSequences.get(rowSeq).getWells()) { for (String colSeq: columnSequences.keySet()) { if (columnSequences.get(colSeq).isInWell(well)) { weights[rowToVertexMap.get(rowSeq)][columnToVertexMap.get(colSeq) - vertexOffsetValue] += 1.0; } } } } } private static Map invertVertexMap(Map map) { Map inverse = new HashMap<>(); for (Integer k : map.keySet()) { inverse.put(map.get(k), k); } return inverse; } }