Files
BiGpairSEQ/src/main/java/Simulator.java

689 lines
36 KiB
Java

import org.jgrapht.alg.interfaces.MatchingAlgorithm;
import org.jgrapht.alg.matching.MaximumWeightBipartiteMatching;
import org.jgrapht.generate.SimpleWeightedBipartiteGraphMatrixGenerator;
import org.jgrapht.graph.DefaultWeightedEdge;
import org.jgrapht.graph.SimpleWeightedGraph;
import org.jheaps.tree.FibonacciHeap;
import org.jheaps.tree.PairingHeap;
import java.math.BigDecimal;
import java.math.MathContext;
import java.text.NumberFormat;
import java.time.Instant;
import java.time.Duration;
import java.util.*;
import java.util.stream.IntStream;
import static java.lang.Float.*;
//NOTE: "sequence" in method and variable names refers to a peptide sequence from a simulated T cell
public class Simulator implements GraphModificationFunctions {
private static final int cdr3AlphaIndex = 0;
private static final int cdr3BetaIndex = 1;
private static final int cdr1AlphaIndex = 2;
private static final int cdr1BetaIndex = 3;
//Make the graph needed for matching CDR3s
public static GraphWithMapData makeGraph(CellSample cellSample, Plate samplePlate, boolean verbose) {
Instant start = Instant.now();
List<Integer[]> distinctCells = cellSample.getCells();
int[] alphaIndex = {cdr3AlphaIndex};
int[] betaIndex = {cdr3BetaIndex};
int numWells = samplePlate.getSize();
if(verbose){System.out.println("Making cell maps");}
//HashMap keyed to Alphas, values Betas
Map<Integer, Integer> distCellsMapAlphaKey = makeSequenceToSequenceMap(distinctCells, cdr3AlphaIndex, cdr3BetaIndex);
if(verbose){System.out.println("Cell maps made");}
if(verbose){System.out.println("Making well maps");}
Map<Integer, Integer> allAlphas = samplePlate.assayWellsSequenceS(alphaIndex);
Map<Integer, Integer> allBetas = samplePlate.assayWellsSequenceS(betaIndex);
int alphaCount = allAlphas.size();
if(verbose){System.out.println("All alphas count: " + alphaCount);}
int betaCount = allBetas.size();
if(verbose){System.out.println("All betas count: " + betaCount);}
if(verbose){System.out.println("Well maps made");}
if(verbose){System.out.println("Removing singleton sequences and sequences present in all wells.");}
filterByOccupancyThresholds(allAlphas, 2, numWells - 1);
filterByOccupancyThresholds(allBetas, 2, numWells - 1);
if(verbose){System.out.println("Sequences removed");}
int pairableAlphaCount = allAlphas.size();
if(verbose){System.out.println("Remaining alphas count: " + pairableAlphaCount);}
int pairableBetaCount = allBetas.size();
if(verbose){System.out.println("Remaining betas count: " + pairableBetaCount);}
if(verbose){System.out.println("Making vertex maps");}
//For the SimpleWeightedBipartiteGraphMatrixGenerator, all vertices must have
//distinct numbers associated with them. Since I'm using a 2D array, that means
//distinct indices between the rows and columns. vertexStartValue lets me track where I switch
//from numbering rows to columns, so I can assign unique numbers to every vertex, and then
//subtract the vertexStartValue from betas to use their vertex labels as array indices
Integer vertexStartValue = 0;
//keys are sequential integer vertices, values are alphas
Map<Integer, Integer> plateVtoAMap = makeVertexToSequenceMap(allAlphas, vertexStartValue);
//new start value for vertex to beta map should be one more than final vertex value in alpha map
vertexStartValue += plateVtoAMap.size();
//keys are sequential integers vertices, values are betas
Map<Integer, Integer> plateVtoBMap = makeVertexToSequenceMap(allBetas, vertexStartValue);
//keys are alphas, values are sequential integer vertices from previous map
Map<Integer, Integer> plateAtoVMap = invertVertexMap(plateVtoAMap);
//keys are betas, values are sequential integer vertices from previous map
Map<Integer, Integer> plateBtoVMap = invertVertexMap(plateVtoBMap);
if(verbose){System.out.println("Vertex maps made");}
//make adjacency matrix for bipartite graph generator
//(technically this is only 1/4 of an adjacency matrix, but that's all you need
//for a bipartite graph, and all the SimpleWeightedBipartiteGraphMatrixGenerator class expects.)
if(verbose){System.out.println("Creating adjacency matrix");}
//Count how many wells each alpha appears in
Map<Integer, Integer> alphaWellCounts = new HashMap<>();
//count how many wells each beta appears in
Map<Integer, Integer> betaWellCounts = new HashMap<>();
//the adjacency matrix to be used by the graph generator
double[][] weights = new double[plateVtoAMap.size()][plateVtoBMap.size()];
countSequencesAndFillMatrix(samplePlate, allAlphas, allBetas, plateAtoVMap,
plateBtoVMap, alphaIndex, betaIndex, alphaWellCounts, betaWellCounts, weights);
if(verbose){System.out.println("Matrix created");}
//create bipartite graph
if(verbose){System.out.println("Creating graph");}
//the graph object
SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph =
new SimpleWeightedGraph<>(DefaultWeightedEdge.class);
//the graph generator
SimpleWeightedBipartiteGraphMatrixGenerator graphGenerator = new SimpleWeightedBipartiteGraphMatrixGenerator();
//the list of alpha vertices
List<Integer> alphaVertices = new ArrayList<>(plateVtoAMap.keySet()); //This will work because LinkedHashMap preserves order of entry
graphGenerator.first(alphaVertices);
//the list of beta vertices
List<Integer> betaVertices = new ArrayList<>(plateVtoBMap.keySet());
graphGenerator.second(betaVertices); //This will work because LinkedHashMap preserves order of entry
//use adjacency matrix of weight created previously
graphGenerator.weights(weights);
graphGenerator.generateGraph(graph);
if(verbose){System.out.println("Graph created");}
Instant stop = Instant.now();
Duration time = Duration.between(start, stop);
//create GraphWithMapData object
GraphWithMapData output = new GraphWithMapData(graph, numWells, samplePlate.getPopulations(), alphaCount, betaCount,
distCellsMapAlphaKey, plateVtoAMap, plateVtoBMap, plateAtoVMap,
plateBtoVMap, alphaWellCounts, betaWellCounts, time);
//Set source file name in graph to name of sample plate
output.setSourceFilename(samplePlate.getFilename());
//return GraphWithMapData object
return output;
}
//match CDR3s.
public static MatchingResult matchCDR3s(GraphWithMapData data, String dataFilename, Integer lowThreshold,
Integer highThreshold, Integer maxOccupancyDifference,
Integer minOverlapPercent, boolean verbose) {
Instant start = Instant.now();
List<Integer[]> removedEdges = new ArrayList<>();
boolean saveEdges = BiGpairSEQ.cacheGraph();
int numWells = data.getNumWells();
Integer alphaCount = data.getAlphaCount();
Integer betaCount = data.getBetaCount();
Map<Integer, Integer> distCellsMapAlphaKey = data.getDistCellsMapAlphaKey();
Map<Integer, Integer> plateVtoAMap = data.getPlateVtoAMap();
Map<Integer, Integer> plateVtoBMap = data.getPlateVtoBMap();
Map<Integer, Integer> alphaWellCounts = data.getAlphaWellCounts();
Map<Integer, Integer> betaWellCounts = data.getBetaWellCounts();
SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph = data.getGraph();
//remove edges with weights outside given overlap thresholds, add those to removed edge list
if(verbose){System.out.println("Eliminating edges with weights outside overlap threshold values");}
removedEdges.addAll(GraphModificationFunctions.filterByOverlapThresholds(graph, lowThreshold, highThreshold, saveEdges));
if(verbose){System.out.println("Over- and under-weight edges removed");}
//remove edges between vertices with too small an overlap size, add those to removed edge list
if(verbose){System.out.println("Eliminating edges with weights less than " + minOverlapPercent.toString() +
" percent of vertex occupancy value.");}
removedEdges.addAll(GraphModificationFunctions.filterByOverlapPercent(graph, alphaWellCounts, betaWellCounts,
plateVtoAMap, plateVtoBMap, minOverlapPercent, saveEdges));
if(verbose){System.out.println("Edges with weights too far below a vertex occupancy value removed");}
//Filter by relative occupancy
if(verbose){System.out.println("Eliminating edges between vertices with occupancy difference > "
+ maxOccupancyDifference);}
removedEdges.addAll(GraphModificationFunctions.filterByRelativeOccupancy(graph, alphaWellCounts, betaWellCounts,
plateVtoAMap, plateVtoBMap, maxOccupancyDifference, saveEdges));
if(verbose){System.out.println("Edges between vertices of with excessively different occupancy values " +
"removed");}
//Find Maximum Weighted Matching
if(verbose){System.out.println("Finding maximum weighted matching");}
MaximumWeightBipartiteMatching maxWeightMatching;
//Use correct heap type for priority queue
String heapType = BiGpairSEQ.getPriorityQueueHeapType();
switch (heapType) {
case "PAIRING" -> {
maxWeightMatching = new MaximumWeightBipartiteMatching(graph,
plateVtoAMap.keySet(),
plateVtoBMap.keySet(),
i -> new PairingHeap(Comparator.naturalOrder()));
}
case "FIBONACCI" -> {
maxWeightMatching = new MaximumWeightBipartiteMatching(graph,
plateVtoAMap.keySet(),
plateVtoBMap.keySet(),
i -> new FibonacciHeap(Comparator.naturalOrder()));
}
default -> {
maxWeightMatching = new MaximumWeightBipartiteMatching(graph,
plateVtoAMap.keySet(),
plateVtoBMap.keySet());
}
}
//get the matching
MatchingAlgorithm.Matching<String, DefaultWeightedEdge> graphMatching = maxWeightMatching.getMatching();
if(verbose){System.out.println("Matching completed");}
Instant stop = Instant.now();
//Header for CSV file
List<String> header = new ArrayList<>();
header.add("Alpha");
header.add("Alpha well count");
header.add("Beta");
header.add("Beta well count");
header.add("Overlap well count");
header.add("Matched correctly?");
header.add("P-value");
//Results for csv file
List<List<String>> allResults = new ArrayList<>();
NumberFormat nf = NumberFormat.getInstance(Locale.US);
MathContext mc = new MathContext(3);
Iterator<DefaultWeightedEdge> weightIter = graphMatching.iterator();
DefaultWeightedEdge e;
int trueCount = 0;
int falseCount = 0;
boolean check;
Map<Integer, Integer> matchMap = new HashMap<>();
while(weightIter.hasNext()) {
e = weightIter.next();
Integer source = graph.getEdgeSource(e);
Integer target = graph.getEdgeTarget(e);
//The match map is all matches found, not just true matches!
matchMap.put(plateVtoAMap.get(source), plateVtoBMap.get(target));
check = plateVtoBMap.get(target).equals(distCellsMapAlphaKey.get(plateVtoAMap.get(source)));
if(check) {
trueCount++;
}
else {
falseCount++;
}
List<String> result = new ArrayList<>();
result.add(plateVtoAMap.get(source).toString());
//alpha well count
result.add(alphaWellCounts.get(plateVtoAMap.get(source)).toString());
result.add(plateVtoBMap.get(target).toString());
//beta well count
result.add(betaWellCounts.get(plateVtoBMap.get(target)).toString());
//overlap count
result.add(Double.toString(graph.getEdgeWeight(e)));
result.add(Boolean.toString(check));
double pValue = Equations.pValue(numWells, alphaWellCounts.get(plateVtoAMap.get(source)),
betaWellCounts.get(plateVtoBMap.get(target)), graph.getEdgeWeight(e));
BigDecimal pValueTrunc = new BigDecimal(pValue, mc);
result.add(pValueTrunc.toString());
allResults.add(result);
}
//Metadata comments for CSV file
String algoType = "LEDA book with heap: " + heapType;
int min = Math.min(alphaCount, betaCount);
//rate of attempted matching
double attemptRate = (double) (trueCount + falseCount) / min;
BigDecimal attemptRateTrunc = new BigDecimal(attemptRate, mc);
//rate of pairing error
double pairingErrorRate = (double) falseCount / (trueCount + falseCount);
BigDecimal pairingErrorRateTrunc;
if(Double.isFinite(pairingErrorRate)) {
pairingErrorRateTrunc = new BigDecimal(pairingErrorRate, mc);
}
else{
pairingErrorRateTrunc = new BigDecimal(-1, mc);
}
//get list of well populations
Integer[] wellPopulations = data.getWellPopulations();
//make string out of populations list
StringBuilder populationsStringBuilder = new StringBuilder();
populationsStringBuilder.append(wellPopulations[0].toString());
for(int i = 1; i < wellPopulations.length; i++){
populationsStringBuilder.append(", ");
populationsStringBuilder.append(wellPopulations[i].toString());
}
String wellPopulationsString = populationsStringBuilder.toString();
//total simulation time
Duration time = Duration.between(start, stop);
time = time.plus(data.getTime());
Map<String, String> metadata = new LinkedHashMap<>();
metadata.put("sample plate filename", data.getSourceFilename());
metadata.put("graph filename", dataFilename);
metadata.put("algorithm type", algoType);
metadata.put("well populations", wellPopulationsString);
metadata.put("total alphas found", alphaCount.toString());
metadata.put("total betas found", betaCount.toString());
metadata.put("high overlap threshold", highThreshold.toString());
metadata.put("low overlap threshold", lowThreshold.toString());
metadata.put("minimum overlap percent", minOverlapPercent.toString());
metadata.put("maximum occupancy difference", maxOccupancyDifference.toString());
metadata.put("pairing attempt rate", attemptRateTrunc.toString());
metadata.put("correct pairing count", Integer.toString(trueCount));
metadata.put("incorrect pairing count", Integer.toString(falseCount));
metadata.put("pairing error rate", pairingErrorRateTrunc.toString());
metadata.put("simulation time (seconds)", nf.format(time.toSeconds()));
//create MatchingResult object
MatchingResult output = new MatchingResult(metadata, header, allResults, matchMap, time);
if(verbose){
for(String s: output.getComments()){
System.out.println(s);
}
}
if(saveEdges) {
//put the removed edges back on the graph
System.out.println("Restoring removed edges to graph.");
GraphModificationFunctions.addRemovedEdges(graph, removedEdges);
}
//return MatchingResult object
return output;
}
//Commented out CDR1 matching until it's time to re-implement it
// //Simulated matching of CDR1s to CDR3s. Requires MatchingResult from prior run of matchCDR3s.
// public static MatchingResult[] matchCDR1s(List<Integer[]> distinctCells,
// Plate samplePlate, Integer lowThreshold,
// Integer highThreshold, MatchingResult priorResult){
// Instant start = Instant.now();
// Duration previousTime = priorResult.getTime();
// Map<Integer, Integer> previousMatches = priorResult.getMatchMap();
// int numWells = samplePlate.getSize();
// int[] cdr3Indices = {cdr3AlphaIndex, cdr3BetaIndex};
// int[] cdr1Indices = {cdr1AlphaIndex, cdr1BetaIndex};
//
// System.out.println("Making previous match maps");
// Map<Integer, Integer> cdr3AtoBMap = previousMatches;
// Map<Integer, Integer> cdr3BtoAMap = invertVertexMap(cdr3AtoBMap);
// System.out.println("Previous match maps made");
//
// System.out.println("Making cell maps");
// Map<Integer, Integer> alphaCDR3toCDR1Map = makeSequenceToSequenceMap(distinctCells, cdr3AlphaIndex, cdr1AlphaIndex);
// Map<Integer, Integer> betaCDR3toCDR1Map = makeSequenceToSequenceMap(distinctCells, cdr3BetaIndex, cdr1BetaIndex);
// System.out.println("Cell maps made");
//
// System.out.println("Making well maps");
// Map<Integer, Integer> allCDR3s = samplePlate.assayWellsSequenceS(cdr3Indices);
// Map<Integer, Integer> allCDR1s = samplePlate.assayWellsSequenceS(cdr1Indices);
// int CDR3Count = allCDR3s.size();
// System.out.println("all CDR3s count: " + CDR3Count);
// int CDR1Count = allCDR1s.size();
// System.out.println("all CDR1s count: " + CDR1Count);
// System.out.println("Well maps made");
//
// System.out.println("Removing unpaired CDR3s from well maps");
// List<Integer> unpairedCDR3s = new ArrayList<>();
// for(Integer i: allCDR3s.keySet()){
// if(!(cdr3AtoBMap.containsKey(i) || cdr3BtoAMap.containsKey(i))){
// unpairedCDR3s.add(i);
// }
// }
// for(Integer i: unpairedCDR3s){
// allCDR3s.remove(i);
// }
// System.out.println("Unpaired CDR3s removed.");
// System.out.println("Remaining CDR3 count: " + allCDR3s.size());
//
// System.out.println("Removing below-minimum-overlap-threshold and saturating-occupancy CDR1s");
// filterByOccupancyThreshold(allCDR1s, lowThreshold, numWells - 1);
// System.out.println("CDR1s removed.");
// System.out.println("Remaining CDR1 count: " + allCDR1s.size());
//
// System.out.println("Making vertex maps");
//
// //For the SimpleWeightedBipartiteGraphMatrixGenerator, all vertices must have
// // distinct numbers associated with them. Since I'm using a 2D array, that means
// // distinct indices between the rows and columns. vertexStartValue lets me track where I switch
// // from numbering rows to columns, so I can assign unique numbers to every vertex, and then
// // subtract the vertexStartValue from CDR1s to use their vertex labels as array indices
// Integer vertexStartValue = 0;
// //keys are sequential integer vertices, values are CDR3s
// Map<Integer, Integer> plateVtoCDR3Map = makeVertexToSequenceMap(allCDR3s, vertexStartValue);
// //New start value for vertex to CDR1 map should be one more than final vertex value in CDR3 map
// vertexStartValue += plateVtoCDR3Map.size();
// //keys are sequential integers vertices, values are CDR1s
// Map<Integer, Integer> plateVtoCDR1Map = makeVertexToSequenceMap(allCDR1s, vertexStartValue);
// //keys are CDR3s, values are sequential integer vertices from previous map
// Map<Integer, Integer> plateCDR3toVMap = invertVertexMap(plateVtoCDR3Map);
// //keys are CDR1s, values are sequential integer vertices from previous map
// Map<Integer, Integer> plateCDR1toVMap = invertVertexMap(plateVtoCDR1Map);
// System.out.println("Vertex maps made");
//
// System.out.println("Creating adjacency matrix");
// //Count how many wells each CDR3 appears in
// Map<Integer, Integer> cdr3WellCounts = new HashMap<>();
// //count how many wells each CDR1 appears in
// Map<Integer, Integer> cdr1WellCounts = new HashMap<>();
// //add edges, where weights are number of wells the peptides share in common.
// //If this is too slow, can make a 2d array and use the SimpleWeightedGraphMatrixGenerator class
// Map<Integer, Integer> wellNCDR3s = null;
// Map<Integer, Integer> wellNCDR1s = null;
// double[][] weights = new double[plateVtoCDR3Map.size()][plateVtoCDR1Map.size()];
// countSequencesAndFillMatrix(samplePlate, allCDR3s, allCDR1s, plateCDR3toVMap, plateCDR1toVMap,
// cdr3Indices, cdr1Indices, cdr3WellCounts, cdr1WellCounts, weights);
// System.out.println("Matrix created");
//
// System.out.println("Creating graph");
// SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph =
// new SimpleWeightedGraph<>(DefaultWeightedEdge.class);
//
// SimpleWeightedBipartiteGraphMatrixGenerator graphGenerator = new SimpleWeightedBipartiteGraphMatrixGenerator();
// List<Integer> cdr3Vertices = new ArrayList<>(plateVtoCDR3Map.keySet()); //This will work because LinkedHashMap preserves order of entry
// graphGenerator.first(cdr3Vertices);
// List<Integer> cdr1Vertices = new ArrayList<>(plateVtoCDR1Map.keySet());
// graphGenerator.second(cdr1Vertices); //This will work because LinkedHashMap preserves order of entry
// graphGenerator.weights(weights);
// graphGenerator.generateGraph(graph);
// System.out.println("Graph created");
//
// System.out.println("Removing edges outside of weight thresholds");
// filterByOccupancyThreshold(graph, lowThreshold, highThreshold);
// System.out.println("Over- and under-weight edges set to 0.0");
//
// System.out.println("Finding first maximum weighted matching");
// MaximumWeightBipartiteMatching firstMaxWeightMatching =
// new MaximumWeightBipartiteMatching(graph, plateVtoCDR3Map.keySet(), plateVtoCDR1Map.keySet());
// MatchingAlgorithm.Matching<String, DefaultWeightedEdge> graphMatching = firstMaxWeightMatching.getMatching();
// System.out.println("First maximum weighted matching found");
//
//
// //first processing run
// Map<Integer, Integer> firstMatchCDR3toCDR1Map = new HashMap<>();
// Iterator<DefaultWeightedEdge> weightIter = graphMatching.iterator();
// DefaultWeightedEdge e;
// while(weightIter.hasNext()){
// e = weightIter.next();
//// if(graph.getEdgeWeight(e) < lowThreshold || graph.getEdgeWeight(e) > highThreshold) {
//// continue;
//// }
// Integer source = graph.getEdgeSource(e);
// Integer target = graph.getEdgeTarget(e);
// firstMatchCDR3toCDR1Map.put(plateVtoCDR3Map.get(source), plateVtoCDR1Map.get(target));
// }
// System.out.println("First pass matches: " + firstMatchCDR3toCDR1Map.size());
//
// System.out.println("Removing edges from first maximum weighted matching");
// //zero out the edge weights in the matching
// weightIter = graphMatching.iterator();
// while(weightIter.hasNext()){
// graph.removeEdge(weightIter.next());
// }
// System.out.println("Edges removed");
//
// //Generate a new matching
// System.out.println("Finding second maximum weighted matching");
// MaximumWeightBipartiteMatching secondMaxWeightMatching =
// new MaximumWeightBipartiteMatching(graph, plateVtoCDR3Map.keySet(), plateVtoCDR1Map.keySet());
// graphMatching = secondMaxWeightMatching.getMatching();
// System.out.println("Second maximum weighted matching found");
//
//
// //second processing run
// Map<Integer, Integer> secondMatchCDR3toCDR1Map = new HashMap<>();
// weightIter = graphMatching.iterator();
// while(weightIter.hasNext()){
// e = weightIter.next();
//// if(graph.getEdgeWeight(e) < lowThreshold || graph.getEdgeWeight(e) > highThreshold) {
//// continue;
//// }
// Integer source = graph.getEdgeSource(e);
//// if(!(CDR3AtoBMap.containsKey(source) || CDR3BtoAMap.containsKey(source))){
//// continue;
//// }
// Integer target = graph.getEdgeTarget(e);
// secondMatchCDR3toCDR1Map.put(plateVtoCDR3Map.get(source), plateVtoCDR1Map.get(target));
// }
// System.out.println("Second pass matches: " + secondMatchCDR3toCDR1Map.size());
//
// System.out.println("Mapping first pass CDR3 alpha/beta pairs");
// //get linked map for first matching attempt
// Map<Integer, Integer> firstMatchesMap = new LinkedHashMap<>();
// for(Integer alphaCDR3: cdr3AtoBMap.keySet()) {
// if (!(firstMatchCDR3toCDR1Map.containsKey(alphaCDR3))) {
// continue;
// }
// Integer betaCDR3 = cdr3AtoBMap.get(alphaCDR3);
// if (!(firstMatchCDR3toCDR1Map.containsKey(betaCDR3))) {
// continue;
// }
// firstMatchesMap.put(alphaCDR3, firstMatchCDR3toCDR1Map.get(alphaCDR3));
// firstMatchesMap.put(betaCDR3, firstMatchCDR3toCDR1Map.get(betaCDR3));
// }
// System.out.println("First pass CDR3 alpha/beta pairs mapped");
//
// System.out.println("Mapping second pass CDR3 alpha/beta pairs.");
// System.out.println("Finding CDR3 pairs that swapped CDR1 matches between first pass and second pass.");
// //Look for matches that simply swapped already-matched alpha and beta CDR3s
// Map<Integer, Integer> dualMatchesMap = new LinkedHashMap<>();
// for(Integer alphaCDR3: cdr3AtoBMap.keySet()) {
// if (!(firstMatchCDR3toCDR1Map.containsKey(alphaCDR3) && secondMatchCDR3toCDR1Map.containsKey(alphaCDR3))) {
// continue;
// }
// Integer betaCDR3 = cdr3AtoBMap.get(alphaCDR3);
// if (!(firstMatchCDR3toCDR1Map.containsKey(betaCDR3) && secondMatchCDR3toCDR1Map.containsKey(betaCDR3))) {
// continue;
// }
// if(firstMatchCDR3toCDR1Map.get(alphaCDR3).equals(secondMatchCDR3toCDR1Map.get(betaCDR3))){
// if(firstMatchCDR3toCDR1Map.get(betaCDR3).equals(secondMatchCDR3toCDR1Map.get(alphaCDR3))){
// dualMatchesMap.put(alphaCDR3, firstMatchCDR3toCDR1Map.get(alphaCDR3));
// dualMatchesMap.put(betaCDR3, firstMatchCDR3toCDR1Map.get(betaCDR3));
// }
// }
// }
// System.out.println("Second pass mapping made. Dual CDR3/CDR1 pairings found.");
//
// Instant stop = Instant.now();
// //results for first map
// System.out.println("RESULTS FOR FIRST PASS MATCHING");
// List<List<String>> allResults = new ArrayList<>();
// Integer trueCount = 0;
// Iterator iter = firstMatchesMap.keySet().iterator();
//
// while(iter.hasNext()){
// Boolean proven = false;
// List<String> tmp = new ArrayList<>();
// tmp.add(iter.next().toString());
// tmp.add(iter.next().toString());
// tmp.add(firstMatchesMap.get(Integer.valueOf(tmp.get(0))).toString());
// tmp.add(firstMatchesMap.get(Integer.valueOf(tmp.get(1))).toString());
// if(alphaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(0))).equals(Integer.valueOf(tmp.get(2)))){
// if(betaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(1))).equals(Integer.valueOf(tmp.get(3)))){
// proven = true;
// }
// }
// else if(alphaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(0))).equals(Integer.valueOf(tmp.get(3)))){
// if(betaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(1))).equals(Integer.valueOf(tmp.get(2)))){
// proven = true;
// }
// }
// tmp.add(proven.toString());
// allResults.add(tmp);
// if(proven){
// trueCount++;
// }
// }
//
// List<String> comments = new ArrayList<>();
// comments.add("Plate size: " + samplePlate.getSize() + " wells");
// comments.add("Previous pairs found: " + previousMatches.size());
// comments.add("CDR1 matches attempted: " + allResults.size());
// double attemptRate = (double) allResults.size() / previousMatches.size();
// comments.add("Matching attempt rate: " + attemptRate);
// comments.add("Number of correct matches: " + trueCount);
// double correctRate = (double) trueCount / allResults.size();
// comments.add("Correct matching rate: " + correctRate);
// NumberFormat nf = NumberFormat.getInstance(Locale.US);
// Duration time = Duration.between(start, stop);
// time = time.plus(previousTime);
// comments.add("Simulation time: " + nf.format(time.toSeconds()) + " seconds");
// for(String s: comments){
// System.out.println(s);
// }
//
//
//
// List<String> headers = new ArrayList<>();
// headers.add("CDR3 alpha");
// headers.add("CDR3 beta");
// headers.add("first matched CDR1");
// headers.add("second matched CDR1");
// headers.add("Correct match?");
//
// MatchingResult firstTest = new MatchingResult(samplePlate.getSourceFileName(),
// comments, headers, allResults, dualMatchesMap, time);
//
// //results for dual map
// System.out.println("RESULTS FOR SECOND PASS MATCHING");
// allResults = new ArrayList<>();
// trueCount = 0;
// iter = dualMatchesMap.keySet().iterator();
// while(iter.hasNext()){
// Boolean proven = false;
// List<String> tmp = new ArrayList<>();
// tmp.add(iter.next().toString());
// tmp.add(iter.next().toString());
// tmp.add(dualMatchesMap.get(Integer.valueOf(tmp.get(0))).toString());
// tmp.add(dualMatchesMap.get(Integer.valueOf(tmp.get(1))).toString());
// if(alphaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(0))).equals(Integer.valueOf(tmp.get(2)))){
// if(betaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(1))).equals(Integer.valueOf(tmp.get(3)))){
// proven = true;
// }
// }
// else if(alphaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(0))).equals(Integer.valueOf(tmp.get(3)))){
// if(betaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(1))).equals(Integer.valueOf(tmp.get(2)))){
// proven = true;
// }
// }
// tmp.add(proven.toString());
// allResults.add(tmp);
// if(proven){
// trueCount++;
// }
// }
//
// comments = new ArrayList<>();
// comments.add("Plate size: " + samplePlate.getSize() + " wells");
// comments.add("Previous pairs found: " + previousMatches.size());
// comments.add("High overlap threshold: " + highThreshold);
// comments.add("Low overlap threshold: " + lowThreshold);
// comments.add("CDR1 matches attempted: " + allResults.size());
// attemptRate = (double) allResults.size() / previousMatches.size();
// comments.add("Matching attempt rate: " + attemptRate);
// comments.add("Number of correct matches: " + trueCount);
// correctRate = (double) trueCount / allResults.size();
// comments.add("Correct matching rate: " + correctRate);
// comments.add("Simulation time: " + nf.format(time.toSeconds()) + " seconds");
//
// for(String s: comments){
// System.out.println(s);
// }
//
// System.out.println("Simulation time: " + nf.format(time.toSeconds()) + " seconds");
// MatchingResult dualTest = new MatchingResult(samplePlate.getSourceFileName(), comments, headers,
// allResults, dualMatchesMap, time);
// MatchingResult[] output = {firstTest, dualTest};
// return output;
// }
//Remove sequences based on occupancy
public static void filterByOccupancyThresholds(Map<Integer, Integer> wellMap, int low, int high){
List<Integer> noise = new ArrayList<>();
for(Integer k: wellMap.keySet()){
if((wellMap.get(k) > high) || (wellMap.get(k) < low)){
noise.add(k);
}
}
for(Integer k: noise) {
wellMap.remove(k);
}
}
//Counts the well occupancy of the row peptides and column peptides into given maps, and
//fills weights in the given 2D array
private static void countSequencesAndFillMatrix(Plate samplePlate,
Map<Integer,Integer> allRowSequences,
Map<Integer,Integer> allColumnSequences,
Map<Integer,Integer> rowSequenceToVertexMap,
Map<Integer,Integer> columnSequenceToVertexMap,
int[] rowSequenceIndices,
int[] colSequenceIndices,
Map<Integer, Integer> rowSequenceCounts,
Map<Integer,Integer> columnSequenceCounts,
double[][] weights){
Map<Integer, Integer> wellNRowSequences = null;
Map<Integer, Integer> wellNColumnSequences = null;
int vertexStartValue = rowSequenceToVertexMap.size();
int numWells = samplePlate.getSize();
for (int n = 0; n < numWells; n++) {
wellNRowSequences = samplePlate.assayWellsSequenceS(n, rowSequenceIndices);
for (Integer a : wellNRowSequences.keySet()) {
if(allRowSequences.containsKey(a)){
rowSequenceCounts.merge(a, 1, (oldValue, newValue) -> oldValue + newValue);
}
}
wellNColumnSequences = samplePlate.assayWellsSequenceS(n, colSequenceIndices);
for (Integer b : wellNColumnSequences.keySet()) {
if(allColumnSequences.containsKey(b)){
columnSequenceCounts.merge(b, 1, (oldValue, newValue) -> oldValue + newValue);
}
}
for (Integer i : wellNRowSequences.keySet()) {
if(allRowSequences.containsKey(i)){
for (Integer j : wellNColumnSequences.keySet()) {
if(allColumnSequences.containsKey(j)){
weights[rowSequenceToVertexMap.get(i)][columnSequenceToVertexMap.get(j) - vertexStartValue] += 1.0;
}
}
}
}
}
}
private static Map<Integer, Integer> makeSequenceToSequenceMap(List<Integer[]> cells, int keySequenceIndex,
int valueSequenceIndex){
Map<Integer, Integer> keySequenceToValueSequenceMap = new HashMap<>();
for (Integer[] cell : cells) {
keySequenceToValueSequenceMap.put(cell[keySequenceIndex], cell[valueSequenceIndex]);
}
return keySequenceToValueSequenceMap;
}
private static Map<Integer, Integer> makeVertexToSequenceMap(Map<Integer, Integer> sequences, Integer startValue) {
Map<Integer, Integer> map = new LinkedHashMap<>(); //LinkedHashMap to preserve order of entry
Integer index = startValue; //is this necessary? I don't think I use this.
for (Integer k: sequences.keySet()) {
map.put(index, k);
index++;
}
return map;
}
private static Map<Integer, Integer> invertVertexMap(Map<Integer, Integer> map) {
Map<Integer, Integer> inverse = new HashMap<>();
for (Integer k : map.keySet()) {
inverse.put(map.get(k), k);
}
return inverse;
}
}