Files
BiGpairSEQ/src/main/java/Simulator.java

736 lines
37 KiB
Java

//import org.jgrapht.Graph;
import org.jgrapht.alg.interfaces.MatchingAlgorithm;
import org.jgrapht.alg.matching.MaximumWeightBipartiteMatching;
import org.jgrapht.generate.SimpleWeightedBipartiteGraphMatrixGenerator;
import org.jgrapht.graph.DefaultWeightedEdge;
import org.jgrapht.graph.SimpleWeightedGraph;
import org.jheaps.AddressableHeap;
import org.jheaps.tree.PairingHeap;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.math.MathContext;
import java.text.NumberFormat;
import java.time.Instant;
import java.time.Duration;
import java.util.*;
import java.util.function.Function;
import java.util.function.Supplier;
import java.util.stream.IntStream;
public class Simulator {
private static int cdr3AlphaIndex = 0;
private static int cdr3BetaIndex = 1;
private static int cdr1AlphaIndex = 2;
private static int cdr1BetaIndex = 3;
//Tested generating the cell sample file itself with a set distribution, but it wasn't working well
//realized I'd have to change matching algos as well, so abandoned it.
// public static CellSample generateCellSampleExp(Integer numDistinctCells, Integer cdr1Freq){
// //was told I=in real T cells, CDR1s have about one third the diversity of CDR3s
// //this could be wrong, though. could be less diversity than that.
// //previous sim was only CDR3s
// //Integer numDistinctCells = 1000000;
// //int cdr1Freq = 3;
//
// List<Integer> numbersCDR3 = new ArrayList<>();
// List<Integer> numbersCDR1 = new ArrayList<>();
// Integer numDistCDR3s = 2 * numDistinctCells + 1;
// IntStream.range(1, numDistCDR3s + 1).forEach(i -> numbersCDR3.add(i));
// IntStream.range(numDistCDR3s + 1, numDistCDR3s + 1 + (numDistCDR3s / cdr1Freq) + 1).forEach(i -> numbersCDR1.add(i));
// Collections.shuffle(numbersCDR3);
// Collections.shuffle(numbersCDR1);
//
// //Each cell represented by 4 values
// //two CDR3s, and two CDR1s. First two values are CDR3s, second two are CDR1s
// List<Integer[]> distinctCells = new ArrayList<>();
// for(int i = 0; i < numbersCDR3.size() - 1; i = i + 2){
// Integer tmpCDR3a = numbersCDR3.get(i);
// Integer tmpCDR3b = numbersCDR3.get(i+1);
// Integer tmpCDR1a = numbersCDR1.get(i % numbersCDR1.size());
// Integer tmpCDR1b = numbersCDR1.get((i+1) % numbersCDR1.size());
// Integer[] tmp = {tmpCDR3a, tmpCDR3b, tmpCDR1a, tmpCDR1b};
// distinctCells.add(tmp);
// }
// List<Integer[]>sampleCells = new ArrayList<>();
// int count;
// int lastFactor = 0;
// int factor = 10;
// int index = 0;
// while(numDistinctCells / factor >= 1){
// int i = index;
// while(i < index + factor - lastFactor){
// count = 0;
// while(count < (numDistinctCells/(factor * 10))){
// sampleCells.add(distinctCells.get(i));
// count++;
// }
// i++;
// }
// index = i;
// lastFactor = factor;
// factor *=10;
// }
// System.out.println("Total cells in sample: " + sampleCells.size());
// System.out.println("Distinct cells in sample: " + index);
// return new CellSample(sampleCells, cdr1Freq);
// }
public static CellSample generateCellSample(Integer numDistinctCells, Integer cdr1Freq) {
//In real T cells, CDR1s have about one third the diversity of CDR3s
//previous sim was only CDR3s
List<Integer> numbersCDR3 = new ArrayList<>();
List<Integer> numbersCDR1 = new ArrayList<>();
Integer numDistCDR3s = 2 * numDistinctCells + 1;
IntStream.range(1, numDistCDR3s + 1).forEach(i -> numbersCDR3.add(i));
IntStream.range(numDistCDR3s + 1, numDistCDR3s + 1 + (numDistCDR3s / cdr1Freq) + 1).forEach(i -> numbersCDR1.add(i));
Collections.shuffle(numbersCDR3);
Collections.shuffle(numbersCDR1);
//Each cell represented by 4 values
//two CDR3s, and two CDR1s. First two values are CDR3s, second two are CDR1s
List<Integer[]> distinctCells = new ArrayList<>();
for(int i = 0; i < numbersCDR3.size() - 1; i = i + 2){
Integer tmpCDR3a = numbersCDR3.get(i);
Integer tmpCDR3b = numbersCDR3.get(i+1);
Integer tmpCDR1a = numbersCDR1.get(i % numbersCDR1.size());
Integer tmpCDR1b = numbersCDR1.get((i+1) % numbersCDR1.size());
Integer[] tmp = {tmpCDR3a, tmpCDR3b, tmpCDR1a, tmpCDR1b};
distinctCells.add(tmp);
}
return new CellSample(distinctCells, cdr1Freq);
}
public static MatchingResult matchCDR3s(List<Integer[]> distinctCells,
Plate samplePlate, Integer lowThreshold,
Integer highThreshold, boolean verbose){
if(verbose){System.out.println("Cells: " + distinctCells.size());}
Instant start = Instant.now();
int numWells = samplePlate.getSize();
int[] alphaIndex = {cdr3AlphaIndex};
int[] betaIndex = {cdr3BetaIndex};
if(verbose){System.out.println("Making cell maps");}
//HashMap keyed to Alphas, values Betas
Map<Integer, Integer> distCellsMapAlphaKey = makePeptideToPeptideMap(distinctCells, 0, 1);
if(verbose){System.out.println("Cell maps made");}
if(verbose){System.out.println("Making well maps");}
Map<Integer, Integer> allAlphas = samplePlate.assayWellsPeptideP(alphaIndex);
Map<Integer, Integer> allBetas = samplePlate.assayWellsPeptideP(betaIndex);
int alphaCount = allAlphas.size();
if(verbose){System.out.println("all alphas count: " + alphaCount);}
int betaCount = allBetas.size();
if(verbose){System.out.println("all betas count: " + betaCount);}
if(verbose){System.out.println("Well maps made");}
//Remove saturating-occupancy peptides because they have no signal value.
//Remove below-minimum-overlap-threshold peptides because they can't possibly have an overlap with another
//peptide that's above the threshold.
if(verbose){System.out.println("Removing peptides present in all wells.");}
if(verbose){System.out.println("Removing peptides with occupancy below the minimum overlap threshold");}
filterByOccupancyThreshold(allAlphas, lowThreshold, numWells - 1);
filterByOccupancyThreshold(allBetas, lowThreshold, numWells - 1);
if(verbose){System.out.println("Peptides removed");}
int pairableAlphaCount = allAlphas.size();
if(verbose){System.out.println("Remaining alpha count: " + pairableAlphaCount);}
int pairableBetaCount = allBetas.size();
if(verbose){System.out.println("Remaining beta count: " + pairableBetaCount);}
if(verbose){System.out.println("Making vertex maps");}
//For the SimpleWeightedBipartiteGraphMatrixGenerator, all vertices must have
// distinct numbers associated with them. Since I'm using a 2D array, that means
// distinct indices between the rows and columns. vertexStartValue lets me track where I switch
// from numbering rows to columns, so I can assign unique numbers to every vertex, and then
// subtract the vertexStartValue from betas to use their vertex labels as array indices
Integer vertexStartValue = 0;
//keys are sequential integer vertices, values are alphas
Map<Integer, Integer> plateVtoAMap = makeVertexToPeptideMap(allAlphas, vertexStartValue);
//New start value for vertex to beta map should be one more than final vertex value in alpha map
vertexStartValue += plateVtoAMap.size();
//keys are sequential integers vertices, values are betas
Map<Integer, Integer> plateVtoBMap = makeVertexToPeptideMap(allBetas, vertexStartValue);
//keys are alphas, values are sequential integer vertices from previous map
Map<Integer, Integer> plateAtoVMap = invertVertexMap(plateVtoAMap);
//keys are betas, values are sequential integer vertices from previous map
Map<Integer, Integer> plateBtoVMap = invertVertexMap(plateVtoBMap);
if(verbose){System.out.println("Vertex maps made");}
if(verbose){System.out.println("Creating adjacency matrix");}
//Count how many wells each alpha appears in
Map<Integer, Integer> alphaWellCounts = new HashMap<>();
//count how many wells each beta appears in
Map<Integer, Integer> betaWellCounts = new HashMap<>();
//the adjacency matrix to be used by the graph generator
double[][] weights = new double[plateVtoAMap.size()][plateVtoBMap.size()];
countPeptidesAndFillMatrix(samplePlate, allAlphas, allBetas, plateAtoVMap,
plateBtoVMap, alphaIndex, betaIndex, alphaWellCounts, betaWellCounts, weights);
if(verbose){System.out.println("matrix created");}
/**
* This is where the bipartite graph is created
*/
if(verbose){System.out.println("creating graph");}
//the graph object
SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph =
new SimpleWeightedGraph<>(DefaultWeightedEdge.class);
//the graph generator
SimpleWeightedBipartiteGraphMatrixGenerator graphGenerator = new SimpleWeightedBipartiteGraphMatrixGenerator();
//the list of alpha vertices
List<Integer> alphaVertices = new ArrayList<>();
alphaVertices.addAll(plateVtoAMap.keySet()); //This will work because LinkedHashMap preserves order of entry
graphGenerator.first(alphaVertices);
//the list of beta vertices
List<Integer> betaVertices = new ArrayList<>();
betaVertices.addAll(plateVtoBMap.keySet());
graphGenerator.second(betaVertices); //This will work because LinkedHashMap preserves order of entry
graphGenerator.weights(weights);
graphGenerator.generateGraph(graph);
if(verbose){System.out.println("Graph created");}
if(verbose){System.out.println("Eliminating edges with weights outside threshold values");}
filterByOccupancyThreshold(graph, lowThreshold, highThreshold);
if(verbose){System.out.println("Over- and under-weight edges set to 0.0");}
//Filter by overlap size
if(verbose){System.out.println("Eliminating edges with weights much less than occupancy values");}
filterByOverlapSize(graph, alphaWellCounts, betaWellCounts);
if(verbose){System.out.println("Edges with weights much less than occupancy values set to 0.0");}
//Filter by relative occupancy
if(verbose){System.out.println("Eliminating edges between vertices of massively different occupancy");}
filterByRelativeOccupancy(graph, alphaWellCounts, betaWellCounts);
if(verbose){System.out.println("Edges between vertices of massively different occupancy set to 0.0");}
/**
* This is where the maximum weighted matching is found
*/
// if(verbose){System.out.println("Finding maximum weighted matching");}
// MaximumWeightBipartiteMatching maxWeightMatching =
// new MaximumWeightBipartiteMatching(graph, plateVtoAMap.keySet(), plateVtoBMap.keySet());
// MatchingAlgorithm.Matching<String, DefaultWeightedEdge> graphMatching = maxWeightMatching.getMatching();
// if(verbose){System.out.println("Matching completed");}
// Instant stop = Instant.now();
//trying with jheaps now
if(verbose){System.out.println("Finding maximum weighted matching");}
//Attempting to use addressable heap to improve performance
MaximumWeightBipartiteMatching maxWeightMatching =
new MaximumWeightBipartiteMatching(graph,
plateVtoAMap.keySet(),
plateVtoBMap.keySet(),
(i) -> new PairingHeap(Comparator.naturalOrder()));
MatchingAlgorithm.Matching<String, DefaultWeightedEdge> graphMatching = maxWeightMatching.getMatching();
if(verbose){System.out.println("Matching completed");}
Instant stop = Instant.now();
//Header for CSV file
List<String> header = new ArrayList<>();
header.add("Alpha");
header.add("Alpha well count");
header.add("Beta");
header.add("Beta well count");
header.add("Overlap well count");
header.add("Matched correctly?");
header.add("P-value");
//Results for csv file
List<List<String>> allResults = new ArrayList<>();
NumberFormat nf = NumberFormat.getInstance(Locale.US);
MathContext mc = new MathContext(3);
Iterator<DefaultWeightedEdge> weightIter = graphMatching.iterator();
DefaultWeightedEdge e = null;
int trueCount = 0;
int falseCount = 0;
boolean check = false;
Map<Integer, Integer> matchMap = new HashMap<>();
while(weightIter.hasNext()) {
e = weightIter.next();
Integer source = graph.getEdgeSource(e);
Integer target = graph.getEdgeTarget(e);
//The match map is all matches found, not just true matches!
matchMap.put(plateVtoAMap.get(source), plateVtoBMap.get(target));
check = plateVtoBMap.get(target).equals(distCellsMapAlphaKey.get(plateVtoAMap.get(source)));
if(check) {
trueCount++;
}
else {
falseCount++;
}
List<String> result = new ArrayList<>();
result.add(plateVtoAMap.get(source).toString());
//alpha well count
result.add(alphaWellCounts.get(plateVtoAMap.get(source)).toString());
result.add(plateVtoBMap.get(target).toString());
//beta well count
result.add(betaWellCounts.get(plateVtoBMap.get(target)).toString());
//overlap count
result.add(Double.toString(graph.getEdgeWeight(e)));
result.add(Boolean.toString(check));
double pValue = Equations.pValue(numWells, alphaWellCounts.get(plateVtoAMap.get(source)),
betaWellCounts.get(plateVtoBMap.get(target)), graph.getEdgeWeight(e));
BigDecimal pValueTrunc = new BigDecimal(pValue, mc);
result.add(pValueTrunc.toString());
allResults.add(result);
}
//Metadate comments for CSV file
int min = alphaCount > betaCount ? betaCount : alphaCount;
double attemptRate = (double) (trueCount + falseCount) / min;
BigDecimal attemptRateTrunc = new BigDecimal(attemptRate, mc);
double pairingErrorRate = (double) falseCount / (trueCount + falseCount);
BigDecimal pairingErrorRateTrunc = new BigDecimal(pairingErrorRate, mc);
List<String> comments = new ArrayList<>();
comments.add("Total alphas found: " + alphaCount);
comments.add("Total betas found: " + betaCount);
comments.add("High overlap threshold: " + highThreshold);
comments.add("Low overlap threshold: " + lowThreshold);
comments.add("Pairing attempt rate: " + attemptRateTrunc);
comments.add("Correct pairings: " + trueCount);
comments.add("Incorrect pairings: " + falseCount);
comments.add("Pairing error rate: " + pairingErrorRateTrunc);
Duration time = Duration.between(start, stop);
comments.add("Simulation time: " + nf.format(time.toSeconds()) + " seconds");
if(verbose){
for(String s: comments){
System.out.println(s);
}
}
return new MatchingResult(samplePlate.getSourceFileName(), comments, header, allResults, matchMap, time);
}
//Simulated matching of CDR1s to CDR3s. Requires MatchingResult from prior run of matchCDR3s.
public static MatchingResult[] matchCDR1s(List<Integer[]> distinctCells,
Plate samplePlate, Integer lowThreshold,
Integer highThreshold, MatchingResult priorResult){
Instant start = Instant.now();
Duration previousTime = priorResult.getTime();
Map<Integer, Integer> previousMatches = priorResult.getMatchMap();
int numWells = samplePlate.getSize();
int[] cdr3Indices = {cdr3AlphaIndex, cdr3BetaIndex};
int[] cdr1Indices = {cdr1AlphaIndex, cdr1BetaIndex};
System.out.println("Making previous match maps");
Map<Integer, Integer> cdr3AtoBMap = previousMatches;
Map<Integer, Integer> cdr3BtoAMap = invertVertexMap(cdr3AtoBMap);
System.out.println("Previous match maps made");
System.out.println("Making cell maps");
Map<Integer, Integer> alphaCDR3toCDR1Map = makePeptideToPeptideMap(distinctCells, cdr3AlphaIndex, cdr1AlphaIndex);
Map<Integer, Integer> betaCDR3toCDR1Map = makePeptideToPeptideMap(distinctCells, cdr3BetaIndex, cdr1BetaIndex);
System.out.println("Cell maps made");
System.out.println("Making well maps");
Map<Integer, Integer> allCDR3s = samplePlate.assayWellsPeptideP(cdr3Indices);
Map<Integer, Integer> allCDR1s = samplePlate.assayWellsPeptideP(cdr1Indices);
int CDR3Count = allCDR3s.size();
System.out.println("all CDR3s count: " + CDR3Count);
int CDR1Count = allCDR1s.size();
System.out.println("all CDR1s count: " + CDR1Count);
System.out.println("Well maps made");
System.out.println("Removing unpaired CDR3s from well maps");
List<Integer> unpairedCDR3s = new ArrayList<>();
for(Integer i: allCDR3s.keySet()){
if(!(cdr3AtoBMap.containsKey(i) || cdr3BtoAMap.containsKey(i))){
unpairedCDR3s.add(i);
}
}
for(Integer i: unpairedCDR3s){
allCDR3s.remove(i);
}
System.out.println("Unpaired CDR3s removed.");
System.out.println("Remaining CDR3 count: " + allCDR3s.size());
System.out.println("Removing below-minimum-overlap-threshold and saturating-occupancy CDR1s");
filterByOccupancyThreshold(allCDR1s, lowThreshold, numWells - 1);
System.out.println("CDR1s removed.");
System.out.println("Remaining CDR1 count: " + allCDR1s.size());
System.out.println("Making vertex maps");
//For the SimpleWeightedBipartiteGraphMatrixGenerator, all vertices must have
// distinct numbers associated with them. Since I'm using a 2D array, that means
// distinct indices between the rows and columns. vertexStartValue lets me track where I switch
// from numbering rows to columns, so I can assign unique numbers to every vertex, and then
// subtract the vertexStartValue from CDR1s to use their vertex labels as array indices
Integer vertexStartValue = 0;
//keys are sequential integer vertices, values are CDR3s
Map<Integer, Integer> plateVtoCDR3Map = makeVertexToPeptideMap(allCDR3s, vertexStartValue);
//New start value for vertex to CDR1 map should be one more than final vertex value in CDR3 map
vertexStartValue += plateVtoCDR3Map.size();
//keys are sequential integers vertices, values are CDR1s
Map<Integer, Integer> plateVtoCDR1Map = makeVertexToPeptideMap(allCDR1s, vertexStartValue);
//keys are CDR3s, values are sequential integer vertices from previous map
Map<Integer, Integer> plateCDR3toVMap = invertVertexMap(plateVtoCDR3Map);
//keys are CDR1s, values are sequential integer vertices from previous map
Map<Integer, Integer> plateCDR1toVMap = invertVertexMap(plateVtoCDR1Map);
System.out.println("Vertex maps made");
System.out.println("Creating adjacency matrix");
//Count how many wells each CDR3 appears in
Map<Integer, Integer> cdr3WellCounts = new HashMap<>();
//count how many wells each CDR1 appears in
Map<Integer, Integer> cdr1WellCounts = new HashMap<>();
//add edges, where weights are number of wells the peptides share in common.
//If this is too slow, can make a 2d array and use the SimpleWeightedGraphMatrixGenerator class
Map<Integer, Integer> wellNCDR3s = null;
Map<Integer, Integer> wellNCDR1s = null;
double[][] weights = new double[plateVtoCDR3Map.size()][plateVtoCDR1Map.size()];
countPeptidesAndFillMatrix(samplePlate, allCDR3s, allCDR1s, plateCDR3toVMap, plateCDR1toVMap,
cdr3Indices, cdr1Indices, cdr3WellCounts, cdr1WellCounts, weights);
System.out.println("Matrix created");
System.out.println("Creating graph");
SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph =
new SimpleWeightedGraph<>(DefaultWeightedEdge.class);
SimpleWeightedBipartiteGraphMatrixGenerator graphGenerator = new SimpleWeightedBipartiteGraphMatrixGenerator();
List<Integer> cdr3Vertices = new ArrayList<>();
cdr3Vertices.addAll(plateVtoCDR3Map.keySet()); //This will work because LinkedHashMap preserves order of entry
graphGenerator.first(cdr3Vertices);
List<Integer> cdr1Vertices = new ArrayList<>();
cdr1Vertices.addAll(plateVtoCDR1Map.keySet());
graphGenerator.second(cdr1Vertices); //This will work because LinkedHashMap preserves order of entry
graphGenerator.weights(weights);
graphGenerator.generateGraph(graph);
System.out.println("Graph created");
System.out.println("Removing edges outside of weight thresholds");
filterByOccupancyThreshold(graph, lowThreshold, highThreshold);
System.out.println("Over- and under-weight edges set to 0.0");
System.out.println("Finding first maximum weighted matching");
MaximumWeightBipartiteMatching firstMaxWeightMatching =
new MaximumWeightBipartiteMatching(graph, plateVtoCDR3Map.keySet(), plateVtoCDR1Map.keySet());
MatchingAlgorithm.Matching<String, DefaultWeightedEdge> graphMatching = firstMaxWeightMatching.getMatching();
System.out.println("First maximum weighted matching found");
//first processing run
Map<Integer, Integer> firstMatchCDR3toCDR1Map = new HashMap<>();
Iterator<DefaultWeightedEdge> weightIter = graphMatching.iterator();
DefaultWeightedEdge e = null;
while(weightIter.hasNext()){
e = weightIter.next();
// if(graph.getEdgeWeight(e) < lowThreshold || graph.getEdgeWeight(e) > highThreshold) {
// continue;
// }
Integer source = graph.getEdgeSource(e);
Integer target = graph.getEdgeTarget(e);
firstMatchCDR3toCDR1Map.put(plateVtoCDR3Map.get(source), plateVtoCDR1Map.get(target));
}
System.out.println("First pass matches: " + firstMatchCDR3toCDR1Map.size());
System.out.println("Removing edges from first maximum weighted matching");
//zero out the edge weights in the matching
weightIter = graphMatching.iterator();
while(weightIter.hasNext()){
graph.removeEdge(weightIter.next());
}
System.out.println("Edges removed");
//Generate a new matching
System.out.println("Finding second maximum weighted matching");
MaximumWeightBipartiteMatching secondMaxWeightMatching =
new MaximumWeightBipartiteMatching(graph, plateVtoCDR3Map.keySet(), plateVtoCDR1Map.keySet());
graphMatching = secondMaxWeightMatching.getMatching();
System.out.println("Second maximum weighted matching found");
//second processing run
Map<Integer, Integer> secondMatchCDR3toCDR1Map = new HashMap<>();
weightIter = graphMatching.iterator();
while(weightIter.hasNext()){
e = weightIter.next();
// if(graph.getEdgeWeight(e) < lowThreshold || graph.getEdgeWeight(e) > highThreshold) {
// continue;
// }
Integer source = graph.getEdgeSource(e);
// if(!(CDR3AtoBMap.containsKey(source) || CDR3BtoAMap.containsKey(source))){
// continue;
// }
Integer target = graph.getEdgeTarget(e);
secondMatchCDR3toCDR1Map.put(plateVtoCDR3Map.get(source), plateVtoCDR1Map.get(target));
}
System.out.println("Second pass matches: " + secondMatchCDR3toCDR1Map.size());
System.out.println("Mapping first pass CDR3 alpha/beta pairs");
//get linked map for first matching attempt
Map<Integer, Integer> firstMatchesMap = new LinkedHashMap<>();
for(Integer alphaCDR3: cdr3AtoBMap.keySet()) {
if (!(firstMatchCDR3toCDR1Map.containsKey(alphaCDR3))) {
continue;
}
Integer betaCDR3 = cdr3AtoBMap.get(alphaCDR3);
if (!(firstMatchCDR3toCDR1Map.containsKey(betaCDR3))) {
continue;
}
firstMatchesMap.put(alphaCDR3, firstMatchCDR3toCDR1Map.get(alphaCDR3));
firstMatchesMap.put(betaCDR3, firstMatchCDR3toCDR1Map.get(betaCDR3));
}
System.out.println("First pass CDR3 alpha/beta pairs mapped");
System.out.println("Mapping second pass CDR3 alpha/beta pairs.");
System.out.println("Finding CDR3 pairs that swapped CDR1 matches between first pass and second pass.");
//Look for matches that simply swapped already-matched alpha and beta CDR3s
Map<Integer, Integer> dualMatchesMap = new LinkedHashMap<>();
for(Integer alphaCDR3: cdr3AtoBMap.keySet()) {
if (!(firstMatchCDR3toCDR1Map.containsKey(alphaCDR3) && secondMatchCDR3toCDR1Map.containsKey(alphaCDR3))) {
continue;
}
Integer betaCDR3 = cdr3AtoBMap.get(alphaCDR3);
if (!(firstMatchCDR3toCDR1Map.containsKey(betaCDR3) && secondMatchCDR3toCDR1Map.containsKey(betaCDR3))) {
continue;
}
if(firstMatchCDR3toCDR1Map.get(alphaCDR3).equals(secondMatchCDR3toCDR1Map.get(betaCDR3))){
if(firstMatchCDR3toCDR1Map.get(betaCDR3).equals(secondMatchCDR3toCDR1Map.get(alphaCDR3))){
dualMatchesMap.put(alphaCDR3, firstMatchCDR3toCDR1Map.get(alphaCDR3));
dualMatchesMap.put(betaCDR3, firstMatchCDR3toCDR1Map.get(betaCDR3));
}
}
}
System.out.println("Second pass mapping made. Dual CDR3/CDR1 pairings found.");
Instant stop = Instant.now();
//results for first map
System.out.println("RESULTS FOR FIRST PASS MATCHING");
List<List<String>> allResults = new ArrayList<>();
Integer trueCount = 0;
Iterator iter = firstMatchesMap.keySet().iterator();
while(iter.hasNext()){
Boolean proven = false;
List<String> tmp = new ArrayList<>();
tmp.add(iter.next().toString());
tmp.add(iter.next().toString());
tmp.add(firstMatchesMap.get(Integer.valueOf(tmp.get(0))).toString());
tmp.add(firstMatchesMap.get(Integer.valueOf(tmp.get(1))).toString());
if(alphaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(0))).equals(Integer.valueOf(tmp.get(2)))){
if(betaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(1))).equals(Integer.valueOf(tmp.get(3)))){
proven = true;
}
}
else if(alphaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(0))).equals(Integer.valueOf(tmp.get(3)))){
if(betaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(1))).equals(Integer.valueOf(tmp.get(2)))){
proven = true;
}
}
tmp.add(proven.toString());
allResults.add(tmp);
if(proven){
trueCount++;
}
}
List<String> comments = new ArrayList<>();
comments.add("Plate size: " + samplePlate.getSize() + " wells");
comments.add("Previous pairs found: " + previousMatches.size());
comments.add("CDR1 matches attempted: " + allResults.size());
double attemptRate = (double) allResults.size() / previousMatches.size();
comments.add("Matching attempt rate: " + attemptRate);
comments.add("Number of correct matches: " + trueCount);
double correctRate = (double) trueCount / allResults.size();
comments.add("Correct matching rate: " + correctRate);
NumberFormat nf = NumberFormat.getInstance(Locale.US);
Duration time = Duration.between(start, stop);
time.plus(previousTime);
comments.add("Simulation time: " + nf.format(time.toSeconds()) + " seconds");
for(String s: comments){
System.out.println(s);
}
List<String> headers = new ArrayList<>();
headers.add("CDR3 alpha");
headers.add("CDR3 beta");
headers.add("first matched CDR1");
headers.add("second matched CDR1");
headers.add("Correct match?");
MatchingResult firstTest = new MatchingResult(samplePlate.getSourceFileName(),
comments, headers, allResults, dualMatchesMap, time);
//results for dual map
System.out.println("RESULTS FOR SECOND PASS MATCHING");
allResults = new ArrayList<>();
trueCount = 0;
iter = dualMatchesMap.keySet().iterator();
while(iter.hasNext()){
Boolean proven = false;
List<String> tmp = new ArrayList<>();
tmp.add(iter.next().toString());
tmp.add(iter.next().toString());
tmp.add(dualMatchesMap.get(Integer.valueOf(tmp.get(0))).toString());
tmp.add(dualMatchesMap.get(Integer.valueOf(tmp.get(1))).toString());
if(alphaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(0))).equals(Integer.valueOf(tmp.get(2)))){
if(betaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(1))).equals(Integer.valueOf(tmp.get(3)))){
proven = true;
}
}
else if(alphaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(0))).equals(Integer.valueOf(tmp.get(3)))){
if(betaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(1))).equals(Integer.valueOf(tmp.get(2)))){
proven = true;
}
}
tmp.add(proven.toString());
allResults.add(tmp);
if(proven){
trueCount++;
}
}
comments = new ArrayList<>();
comments.add("Plate size: " + samplePlate.getSize() + " wells");
comments.add("Previous pairs found: " + previousMatches.size());
comments.add("High overlap threshold: " + highThreshold);
comments.add("Low overlap threshold: " + lowThreshold);
comments.add("CDR1 matches attempted: " + allResults.size());
attemptRate = (double) allResults.size() / previousMatches.size();
comments.add("Matching attempt rate: " + attemptRate);
comments.add("Number of correct matches: " + trueCount);
correctRate = (double) trueCount / allResults.size();
comments.add("Correct matching rate: " + correctRate);
comments.add("Simulation time: " + nf.format(time.toSeconds()) + " seconds");
for(String s: comments){
System.out.println(s);
}
System.out.println("Simulation time: " + nf.format(time.toSeconds()) + " seconds");
MatchingResult dualTest = new MatchingResult(samplePlate.getSourceFileName(), comments, headers,
allResults, dualMatchesMap, time);
MatchingResult[] output = {firstTest, dualTest};
return output;
}
//Counts the well occupancy of the row peptides and column peptides into given maps, and
//fills weights in the given 2D array
private static void countPeptidesAndFillMatrix(Plate samplePlate,
Map<Integer,Integer> allRowPeptides,
Map<Integer,Integer> allColumnPeptides,
Map<Integer,Integer> rowPeptideToVertexMap,
Map<Integer,Integer> columnPeptideToVertexMap,
int[] rowPeptideIndices,
int[] colPeptideIndices,
Map<Integer, Integer> rowPeptideCounts,
Map<Integer,Integer> columnPeptideCounts,
double[][] weights){
Map<Integer, Integer> wellNRowPeptides = null;
Map<Integer, Integer> wellNColumnPeptides = null;
int vertexStartValue = rowPeptideToVertexMap.size();
int numWells = samplePlate.getSize();
for (int n = 0; n < numWells; n++) {
wellNRowPeptides = samplePlate.assayWellsPeptideP(n, rowPeptideIndices);
for (Integer a : wellNRowPeptides.keySet()) {
if(allRowPeptides.containsKey(a)){
rowPeptideCounts.merge(a, 1, (oldValue, newValue) -> oldValue + newValue);
}
}
wellNColumnPeptides = samplePlate.assayWellsPeptideP(n, colPeptideIndices);
for (Integer b : wellNColumnPeptides.keySet()) {
if(allColumnPeptides.containsKey(b)){
columnPeptideCounts.merge(b, 1, (oldValue, newValue) -> oldValue + newValue);
}
}
for (Integer i : wellNRowPeptides.keySet()) {
if(allRowPeptides.containsKey(i)){
for (Integer j : wellNColumnPeptides.keySet()) {
if(allColumnPeptides.containsKey(j)){
weights[rowPeptideToVertexMap.get(i)][columnPeptideToVertexMap.get(j) - vertexStartValue] += 1.0;
}
}
}
}
}
}
private static void filterByOccupancyThreshold(SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph,
int low, int high) {
for(DefaultWeightedEdge e: graph.edgeSet()){
if ((graph.getEdgeWeight(e) > high) || (graph.getEdgeWeight(e) < low)){
graph.setEdgeWeight(e, 0.0);
}
}
}
private static void filterByOccupancyThreshold(Map<Integer, Integer> wellMap, int low, int high){
List<Integer> noise = new ArrayList<>();
for(Integer k: wellMap.keySet()){
if((wellMap.get(k) > high) || (wellMap.get(k) < low)){
noise.add(k);
}
}
for(Integer k: noise) {
wellMap.remove(k);
}
}
//Remove edges for pairs with large occupancy discrepancy
private static void filterByRelativeOccupancy(SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph,
Map<Integer, Integer> alphaWellCounts,
Map<Integer, Integer> betaWellCounts) {
for (DefaultWeightedEdge e : graph.edgeSet()) {
Integer alphaOcc = alphaWellCounts.get(graph.getEdgeSource(e));
Integer betaOcc = betaWellCounts.get(graph.getEdgeTarget(e));
//Adjust this to something cleverer later
if (Math.abs(alphaOcc - betaOcc) >= 20) {
graph.setEdgeWeight(e, 0.0);
}
}
}
//Remove edges for pairs where overlap size is significantly lower than the well occupancy
private static void filterByOverlapSize(SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph,
Map<Integer, Integer> alphaWellCounts,
Map<Integer, Integer> betaWellCounts) {
for (DefaultWeightedEdge e : graph.edgeSet()) {
Integer alphaOcc = alphaWellCounts.get(graph.getEdgeSource(e));
Integer betaOcc = betaWellCounts.get(graph.getEdgeTarget(e));
//Adjust this to something cleverer later
Integer min = alphaOcc > betaOcc ? betaOcc : alphaOcc;
if (min - graph.getEdgeWeight(e) >= 15) {
graph.setEdgeWeight(e, 0.0);
}
}
}
private static Map<Integer, Integer> makePeptideToPeptideMap(List<Integer[]> cells, int keyPeptideIndex,
int valuePeptideIndex){
Map<Integer, Integer> keyPeptideToValuePeptideMap = new HashMap<>();
for (Integer[] cell : cells) {
keyPeptideToValuePeptideMap.put(cell[keyPeptideIndex], cell[valuePeptideIndex]);
}
return keyPeptideToValuePeptideMap;
}
private static Map<Integer, Integer> makeVertexToPeptideMap(Map<Integer, Integer> peptides, Integer startValue) {
Map<Integer, Integer> map = new LinkedHashMap<>(); //LinkedHashMap to preserve order of entry
Integer index = startValue;
for (Integer k: peptides.keySet()) {
map.put(index, k);
index++;
}
return map;
}
private static Map<Integer, Integer> invertVertexMap(Map<Integer, Integer> map) {
Map<Integer, Integer> inverse = new HashMap<>();
for (Integer k : map.keySet()) {
inverse.put(map.get(k), k);
}
return inverse;
}
}