714 lines
37 KiB
Java
714 lines
37 KiB
Java
import org.jgrapht.alg.interfaces.MatchingAlgorithm;
|
|
import org.jgrapht.alg.matching.MaximumWeightBipartiteMatching;
|
|
import org.jgrapht.generate.SimpleWeightedBipartiteGraphMatrixGenerator;
|
|
import org.jgrapht.graph.DefaultWeightedEdge;
|
|
import org.jgrapht.graph.SimpleWeightedGraph;
|
|
import org.jheaps.tree.FibonacciHeap;
|
|
import org.jheaps.tree.PairingHeap;
|
|
|
|
import java.math.BigDecimal;
|
|
import java.math.MathContext;
|
|
import java.text.NumberFormat;
|
|
import java.time.Instant;
|
|
import java.time.Duration;
|
|
import java.util.*;
|
|
import java.util.stream.IntStream;
|
|
|
|
import static java.lang.Float.*;
|
|
|
|
//NOTE: "sequence" in method and variable names refers to a peptide sequence from a simulated T cell
|
|
public class Simulator implements GraphModificationFunctions {
|
|
|
|
|
|
//Make the graph needed for matching sequences.
|
|
//sourceVertexIndices and targetVertexIndices are indices within the cell to use as for the two sets of vertices
|
|
//in the bipartite graph. "Source" and "target" are JGraphT terms for the two vertices an edge touches,
|
|
//even if not directed.
|
|
public static GraphWithMapData makeGraph(CellSample cellSample, Plate samplePlate, boolean verbose) {
|
|
Instant start = Instant.now();
|
|
List<Integer[]> distinctCells = cellSample.getCells();
|
|
int[] alphaIndices = {SequenceType.CDR3_ALPHA.ordinal()};
|
|
int[] betaIndices = {SequenceType.CDR3_BETA.ordinal()};
|
|
|
|
int numWells = samplePlate.getSize();
|
|
|
|
if(verbose){System.out.println("Making cell maps");}
|
|
//HashMap keyed to Alphas, values Betas
|
|
Map<Integer, Integer> distCellsMapAlphaKey = makeSequenceToSequenceMap(distinctCells, 0, 1);
|
|
if(verbose){System.out.println("Cell maps made");}
|
|
|
|
if(verbose){System.out.println("Making well maps");}
|
|
|
|
Map<Integer, Integer> allAlphas = samplePlate.assayWellsSequenceS(alphaIndices);
|
|
Map<Integer, Integer> allBetas = samplePlate.assayWellsSequenceS(betaIndices);
|
|
int alphaCount = allAlphas.size();
|
|
if(verbose){System.out.println("All alphas count: " + alphaCount);}
|
|
int betaCount = allBetas.size();
|
|
if(verbose){System.out.println("All betas count: " + betaCount);}
|
|
if(verbose){System.out.println("Well maps made");}
|
|
|
|
|
|
// if(verbose){System.out.println("Removing singleton sequences and sequences present in all wells.");}
|
|
// filterByOccupancyThresholds(allAlphas, 2, numWells - 1);
|
|
// filterByOccupancyThresholds(allBetas, 2, numWells - 1);
|
|
// if(verbose){System.out.println("Sequences removed");}
|
|
int pairableAlphaCount = allAlphas.size();
|
|
if(verbose){System.out.println("Remaining alphas count: " + pairableAlphaCount);}
|
|
int pairableBetaCount = allBetas.size();
|
|
if(verbose){System.out.println("Remaining betas count: " + pairableBetaCount);}
|
|
|
|
if(verbose){System.out.println("Making vertex maps");}
|
|
//For the SimpleWeightedBipartiteGraphMatrixGenerator, all vertices must have
|
|
//distinct numbers associated with them. Since I'm using a 2D array, that means
|
|
//distinct indices between the rows and columns. vertexStartValue lets me track where I switch
|
|
//from numbering rows to columns, so I can assign unique numbers to every vertex, and then
|
|
//subtract the vertexStartValue from betas to use their vertex labels as array indices
|
|
Integer vertexStartValue = 0;
|
|
//keys are sequential integer vertices, values are alphas
|
|
Map<Integer, Integer> plateVtoAMap = makeVertexToSequenceMap(allAlphas, vertexStartValue);
|
|
//new start value for vertex to beta map should be one more than final vertex value in alpha map
|
|
vertexStartValue += plateVtoAMap.size();
|
|
//keys are sequential integers vertices, values are betas
|
|
Map<Integer, Integer> plateVtoBMap = makeVertexToSequenceMap(allBetas, vertexStartValue);
|
|
//keys are alphas, values are sequential integer vertices from previous map
|
|
Map<Integer, Integer> plateAtoVMap = invertVertexMap(plateVtoAMap);
|
|
//keys are betas, values are sequential integer vertices from previous map
|
|
Map<Integer, Integer> plateBtoVMap = invertVertexMap(plateVtoBMap);
|
|
if(verbose){System.out.println("Vertex maps made");}
|
|
|
|
//make adjacency matrix for bipartite graph generator
|
|
//(technically this is only 1/4 of an adjacency matrix, but that's all you need
|
|
//for a bipartite graph, and all the SimpleWeightedBipartiteGraphMatrixGenerator class expects.)
|
|
if(verbose){System.out.println("Creating adjacency matrix");}
|
|
//Count how many wells each alpha sequence appears in
|
|
Map<Integer, Integer> alphaWellCounts = new HashMap<>();
|
|
//count how many wells each beta sequence appears in
|
|
Map<Integer, Integer> betaWellCounts = new HashMap<>();
|
|
//the adjacency matrix to be used by the graph generator
|
|
double[][] weights = new double[plateVtoAMap.size()][plateVtoBMap.size()];
|
|
countSequencesAndFillMatrix(samplePlate, allAlphas, allBetas, plateAtoVMap,
|
|
plateBtoVMap, alphaIndices, betaIndices, alphaWellCounts, betaWellCounts, weights);
|
|
if(verbose){System.out.println("Matrix created");}
|
|
|
|
//create bipartite graph
|
|
if(verbose){System.out.println("Creating graph");}
|
|
//the graph object
|
|
SimpleWeightedGraph<Vertex, DefaultWeightedEdge> graph =
|
|
new SimpleWeightedGraph<>(DefaultWeightedEdge.class);
|
|
//the graph generator
|
|
SimpleWeightedBipartiteGraphMatrixGenerator graphGenerator = new SimpleWeightedBipartiteGraphMatrixGenerator();
|
|
//the list of alpha vertices
|
|
//List<Integer> alphaVertices = new ArrayList<>(plateVtoAMap.keySet()); //This will work because LinkedHashMap preserves order of entry
|
|
List<Vertex> alphaVertices = new ArrayList<>();
|
|
//start with map of all alphas mapped to vertex values, get occupancy from the alphaWellCounts map
|
|
for (Integer seq : plateAtoVMap.keySet()) {
|
|
Vertex alphaVertex = new Vertex(SequenceType.CDR3_ALPHA, seq, alphaWellCounts.get(seq), plateAtoVMap.get(seq));
|
|
alphaVertices.add(alphaVertex);
|
|
}
|
|
graphGenerator.first(alphaVertices);
|
|
//the list of beta vertices
|
|
//List<Integer> betaVertices = new ArrayList<>(plateVtoBMap.keySet());//This will work because LinkedHashMap preserves order of entry
|
|
List<Vertex> betaVertices = new ArrayList<>();
|
|
for (Integer seq : plateBtoVMap.keySet()) {
|
|
Vertex betaVertex = new Vertex(SequenceType.CDR3_BETA, seq, betaWellCounts.get(seq), plateBtoVMap.get(seq));
|
|
betaVertices.add(betaVertex);
|
|
}
|
|
graphGenerator.second(betaVertices);
|
|
//use adjacency matrix of weight created previously
|
|
graphGenerator.weights(weights);
|
|
graphGenerator.generateGraph(graph);
|
|
if(verbose){System.out.println("Graph created");}
|
|
|
|
Instant stop = Instant.now();
|
|
Duration time = Duration.between(start, stop);
|
|
|
|
//create GraphWithMapData object
|
|
GraphWithMapData output = new GraphWithMapData(graph, numWells, samplePlate.getPopulations(), distCellsMapAlphaKey, time);
|
|
//Set source file name in graph to name of sample plate
|
|
output.setSourceFilename(samplePlate.getFilename());
|
|
//return GraphWithMapData object
|
|
return output;
|
|
}
|
|
|
|
//match CDR3s.
|
|
public static MatchingResult matchCDR3s(GraphWithMapData data, String dataFilename, Integer lowThreshold,
|
|
Integer highThreshold, Integer maxOccupancyDifference,
|
|
Integer minOverlapPercent, boolean verbose) {
|
|
Instant start = Instant.now();
|
|
SimpleWeightedGraph<Vertex, DefaultWeightedEdge> graph = data.getGraph();
|
|
Map<Vertex[], Integer> removedEdges = new HashMap<>();
|
|
boolean saveEdges = BiGpairSEQ.cacheGraph();
|
|
int numWells = data.getNumWells();
|
|
//Integer alphaCount = data.getAlphaCount();
|
|
//Integer betaCount = data.getBetaCount();
|
|
Map<Integer, Integer> distCellsMapAlphaKey = data.getDistCellsMapAlphaKey();
|
|
Set<Vertex> alphas = new HashSet<>();
|
|
Set<Vertex> betas = new HashSet<>();
|
|
for(Vertex v: graph.vertexSet()) {
|
|
if (SequenceType.CDR3_ALPHA.equals(v.getType())){
|
|
alphas.add(v);
|
|
}
|
|
else {
|
|
betas.add(v);
|
|
}
|
|
}
|
|
Integer alphaCount = alphas.size();
|
|
Integer betaCount = betas.size();
|
|
|
|
//remove edges with weights outside given overlap thresholds, add those to removed edge list
|
|
if(verbose){System.out.println("Eliminating edges with weights outside overlap threshold values");}
|
|
removedEdges.putAll(GraphModificationFunctions.filterByOverlapThresholds(graph, lowThreshold, highThreshold, saveEdges));
|
|
if(verbose){System.out.println("Over- and under-weight edges removed");}
|
|
|
|
//remove edges between vertices with too small an overlap size, add those to removed edge list
|
|
if(verbose){System.out.println("Eliminating edges with weights less than " + minOverlapPercent.toString() +
|
|
" percent of vertex occupancy value.");}
|
|
removedEdges.putAll(GraphModificationFunctions.filterByOverlapPercent(graph, minOverlapPercent, saveEdges));
|
|
if(verbose){System.out.println("Edges with weights too far below a vertex occupancy value removed");}
|
|
|
|
//Filter by relative occupancy
|
|
if(verbose){System.out.println("Eliminating edges between vertices with occupancy difference > "
|
|
+ maxOccupancyDifference);}
|
|
removedEdges.putAll(GraphModificationFunctions.filterByRelativeOccupancy(graph, maxOccupancyDifference, saveEdges));
|
|
if(verbose){System.out.println("Edges between vertices of with excessively different occupancy values " +
|
|
"removed");}
|
|
|
|
//Find Maximum Weighted Matching
|
|
//using jheaps library class PairingHeap for improved efficiency
|
|
if(verbose){System.out.println("Finding maximum weighted matching");}
|
|
MaximumWeightBipartiteMatching maxWeightMatching;
|
|
//Use correct heap type for priority queue
|
|
String heapType = BiGpairSEQ.getPriorityQueueHeapType();
|
|
switch (heapType) {
|
|
case "PAIRING" -> {
|
|
maxWeightMatching = new MaximumWeightBipartiteMatching(graph,
|
|
alphas,
|
|
betas,
|
|
i -> new PairingHeap(Comparator.naturalOrder()));
|
|
}
|
|
case "FIBONACCI" -> {
|
|
maxWeightMatching = new MaximumWeightBipartiteMatching(graph,
|
|
alphas,
|
|
betas,
|
|
i -> new FibonacciHeap(Comparator.naturalOrder()));
|
|
}
|
|
default -> {
|
|
maxWeightMatching = new MaximumWeightBipartiteMatching(graph,
|
|
alphas,
|
|
betas);
|
|
}
|
|
}
|
|
//get the matching
|
|
MatchingAlgorithm.Matching<String, DefaultWeightedEdge> graphMatching = maxWeightMatching.getMatching();
|
|
if(verbose){System.out.println("Matching completed");}
|
|
Instant stop = Instant.now();
|
|
|
|
//Header for CSV file
|
|
List<String> header = new ArrayList<>();
|
|
header.add("Alpha");
|
|
header.add("Alpha well count");
|
|
header.add("Beta");
|
|
header.add("Beta well count");
|
|
header.add("Overlap well count");
|
|
header.add("Matched correctly?");
|
|
header.add("P-value");
|
|
|
|
//Results for csv file
|
|
List<List<String>> allResults = new ArrayList<>();
|
|
NumberFormat nf = NumberFormat.getInstance(Locale.US);
|
|
MathContext mc = new MathContext(3);
|
|
Iterator<DefaultWeightedEdge> weightIter = graphMatching.iterator();
|
|
DefaultWeightedEdge e;
|
|
int trueCount = 0;
|
|
int falseCount = 0;
|
|
boolean check;
|
|
Map<Integer, Integer> matchMap = new HashMap<>();
|
|
while(weightIter.hasNext()) {
|
|
e = weightIter.next();
|
|
Vertex source = graph.getEdgeSource(e);
|
|
Vertex target = graph.getEdgeTarget(e);
|
|
//Integer source = graph.getEdgeSource(e);
|
|
//Integer target = graph.getEdgeTarget(e);
|
|
//The match map is all matches found, not just true matches!
|
|
matchMap.put(source.getSequence(), target.getSequence());
|
|
check = target.getSequence().equals(distCellsMapAlphaKey.get(source.getSequence()));
|
|
//check = plateVtoBMap.get(target).equals(distCellsMapAlphaKey.get(plateVtoAMap.get(source)));
|
|
if(check) {
|
|
trueCount++;
|
|
}
|
|
else {
|
|
falseCount++;
|
|
}
|
|
List<String> result = new ArrayList<>();
|
|
//alpha sequence
|
|
result.add(source.getSequence().toString());
|
|
//alpha well count
|
|
result.add(source.getOccupancy().toString());
|
|
//beta sequence
|
|
result.add(target.getSequence().toString());
|
|
//beta well count
|
|
result.add(target.getOccupancy().toString());
|
|
//overlap count
|
|
result.add(Double.toString(graph.getEdgeWeight(e)));
|
|
result.add(Boolean.toString(check));
|
|
double pValue = Equations.pValue(numWells, source.getOccupancy(),
|
|
target.getOccupancy(), graph.getEdgeWeight(e));
|
|
BigDecimal pValueTrunc = new BigDecimal(pValue, mc);
|
|
result.add(pValueTrunc.toString());
|
|
allResults.add(result);
|
|
}
|
|
|
|
//Metadata comments for CSV file
|
|
String algoType = "LEDA book with heap: " + heapType;
|
|
int min = Math.min(alphaCount, betaCount);
|
|
//matching weight
|
|
BigDecimal totalMatchingWeight = maxWeightMatching.getMatchingWeight();
|
|
//rate of attempted matching
|
|
double attemptRate = (double) (trueCount + falseCount) / min;
|
|
BigDecimal attemptRateTrunc = new BigDecimal(attemptRate, mc);
|
|
//rate of pairing error
|
|
double pairingErrorRate = (double) falseCount / (trueCount + falseCount);
|
|
BigDecimal pairingErrorRateTrunc;
|
|
if(Double.isFinite(pairingErrorRate)) {
|
|
pairingErrorRateTrunc = new BigDecimal(pairingErrorRate, mc);
|
|
}
|
|
else{
|
|
pairingErrorRateTrunc = new BigDecimal(-1, mc);
|
|
}
|
|
//get list of well populations
|
|
Integer[] wellPopulations = data.getWellPopulations();
|
|
//make string out of populations list
|
|
StringBuilder populationsStringBuilder = new StringBuilder();
|
|
populationsStringBuilder.append(wellPopulations[0].toString());
|
|
for(int i = 1; i < wellPopulations.length; i++){
|
|
populationsStringBuilder.append(", ");
|
|
populationsStringBuilder.append(wellPopulations[i].toString());
|
|
}
|
|
String wellPopulationsString = populationsStringBuilder.toString();
|
|
//total simulation time
|
|
Duration time = Duration.between(start, stop);
|
|
time = time.plus(data.getTime());
|
|
|
|
Map<String, String> metadata = new LinkedHashMap<>();
|
|
metadata.put("sample plate filename", data.getSourceFilename());
|
|
metadata.put("graph filename", dataFilename);
|
|
metadata.put("algorithm type", algoType);
|
|
metadata.put("matching weight", totalMatchingWeight.toString());
|
|
metadata.put("well populations", wellPopulationsString);
|
|
metadata.put("total alphas found", alphaCount.toString());
|
|
metadata.put("total betas found", betaCount.toString());
|
|
metadata.put("high overlap threshold", highThreshold.toString());
|
|
metadata.put("low overlap threshold", lowThreshold.toString());
|
|
metadata.put("minimum overlap percent", minOverlapPercent.toString());
|
|
metadata.put("maximum occupancy difference", maxOccupancyDifference.toString());
|
|
metadata.put("pairing attempt rate", attemptRateTrunc.toString());
|
|
metadata.put("correct pairing count", Integer.toString(trueCount));
|
|
metadata.put("incorrect pairing count", Integer.toString(falseCount));
|
|
metadata.put("pairing error rate", pairingErrorRateTrunc.toString());
|
|
metadata.put("simulation time (seconds)", nf.format(time.toSeconds()));
|
|
//create MatchingResult object
|
|
MatchingResult output = new MatchingResult(metadata, header, allResults, matchMap, time);
|
|
if(verbose){
|
|
for(String s: output.getComments()){
|
|
System.out.println(s);
|
|
}
|
|
}
|
|
|
|
if(saveEdges) {
|
|
//put the removed edges back on the graph
|
|
System.out.println("Restoring removed edges to graph.");
|
|
GraphModificationFunctions.addRemovedEdges(graph, removedEdges);
|
|
}
|
|
//return MatchingResult object
|
|
return output;
|
|
}
|
|
|
|
//Commented out CDR1 matching until it's time to re-implement it
|
|
// //Simulated matching of CDR1s to CDR3s. Requires MatchingResult from prior run of matchCDR3s.
|
|
// public static MatchingResult[] matchCDR1s(List<Integer[]> distinctCells,
|
|
// Plate samplePlate, Integer lowThreshold,
|
|
// Integer highThreshold, MatchingResult priorResult){
|
|
// Instant start = Instant.now();
|
|
// Duration previousTime = priorResult.getTime();
|
|
// Map<Integer, Integer> previousMatches = priorResult.getMatchMap();
|
|
// int numWells = samplePlate.getSize();
|
|
// int[] cdr3Indices = {cdr3AlphaIndex, cdr3BetaIndex};
|
|
// int[] cdr1Indices = {cdr1AlphaIndex, cdr1BetaIndex};
|
|
//
|
|
// System.out.println("Making previous match maps");
|
|
// Map<Integer, Integer> cdr3AtoBMap = previousMatches;
|
|
// Map<Integer, Integer> cdr3BtoAMap = invertVertexMap(cdr3AtoBMap);
|
|
// System.out.println("Previous match maps made");
|
|
//
|
|
// System.out.println("Making cell maps");
|
|
// Map<Integer, Integer> alphaCDR3toCDR1Map = makeSequenceToSequenceMap(distinctCells, cdr3AlphaIndex, cdr1AlphaIndex);
|
|
// Map<Integer, Integer> betaCDR3toCDR1Map = makeSequenceToSequenceMap(distinctCells, cdr3BetaIndex, cdr1BetaIndex);
|
|
// System.out.println("Cell maps made");
|
|
//
|
|
// System.out.println("Making well maps");
|
|
// Map<Integer, Integer> allCDR3s = samplePlate.assayWellsSequenceS(cdr3Indices);
|
|
// Map<Integer, Integer> allCDR1s = samplePlate.assayWellsSequenceS(cdr1Indices);
|
|
// int CDR3Count = allCDR3s.size();
|
|
// System.out.println("all CDR3s count: " + CDR3Count);
|
|
// int CDR1Count = allCDR1s.size();
|
|
// System.out.println("all CDR1s count: " + CDR1Count);
|
|
// System.out.println("Well maps made");
|
|
//
|
|
// System.out.println("Removing unpaired CDR3s from well maps");
|
|
// List<Integer> unpairedCDR3s = new ArrayList<>();
|
|
// for(Integer i: allCDR3s.keySet()){
|
|
// if(!(cdr3AtoBMap.containsKey(i) || cdr3BtoAMap.containsKey(i))){
|
|
// unpairedCDR3s.add(i);
|
|
// }
|
|
// }
|
|
// for(Integer i: unpairedCDR3s){
|
|
// allCDR3s.remove(i);
|
|
// }
|
|
// System.out.println("Unpaired CDR3s removed.");
|
|
// System.out.println("Remaining CDR3 count: " + allCDR3s.size());
|
|
//
|
|
// System.out.println("Removing below-minimum-overlap-threshold and saturating-occupancy CDR1s");
|
|
// filterByOccupancyThreshold(allCDR1s, lowThreshold, numWells - 1);
|
|
// System.out.println("CDR1s removed.");
|
|
// System.out.println("Remaining CDR1 count: " + allCDR1s.size());
|
|
//
|
|
// System.out.println("Making vertex maps");
|
|
//
|
|
// //For the SimpleWeightedBipartiteGraphMatrixGenerator, all vertices must have
|
|
// // distinct numbers associated with them. Since I'm using a 2D array, that means
|
|
// // distinct indices between the rows and columns. vertexStartValue lets me track where I switch
|
|
// // from numbering rows to columns, so I can assign unique numbers to every vertex, and then
|
|
// // subtract the vertexStartValue from CDR1s to use their vertex labels as array indices
|
|
// Integer vertexStartValue = 0;
|
|
// //keys are sequential integer vertices, values are CDR3s
|
|
// Map<Integer, Integer> plateVtoCDR3Map = makeVertexToSequenceMap(allCDR3s, vertexStartValue);
|
|
// //New start value for vertex to CDR1 map should be one more than final vertex value in CDR3 map
|
|
// vertexStartValue += plateVtoCDR3Map.size();
|
|
// //keys are sequential integers vertices, values are CDR1s
|
|
// Map<Integer, Integer> plateVtoCDR1Map = makeVertexToSequenceMap(allCDR1s, vertexStartValue);
|
|
// //keys are CDR3s, values are sequential integer vertices from previous map
|
|
// Map<Integer, Integer> plateCDR3toVMap = invertVertexMap(plateVtoCDR3Map);
|
|
// //keys are CDR1s, values are sequential integer vertices from previous map
|
|
// Map<Integer, Integer> plateCDR1toVMap = invertVertexMap(plateVtoCDR1Map);
|
|
// System.out.println("Vertex maps made");
|
|
//
|
|
// System.out.println("Creating adjacency matrix");
|
|
// //Count how many wells each CDR3 appears in
|
|
// Map<Integer, Integer> cdr3WellCounts = new HashMap<>();
|
|
// //count how many wells each CDR1 appears in
|
|
// Map<Integer, Integer> cdr1WellCounts = new HashMap<>();
|
|
// //add edges, where weights are number of wells the peptides share in common.
|
|
// //If this is too slow, can make a 2d array and use the SimpleWeightedGraphMatrixGenerator class
|
|
// Map<Integer, Integer> wellNCDR3s = null;
|
|
// Map<Integer, Integer> wellNCDR1s = null;
|
|
// double[][] weights = new double[plateVtoCDR3Map.size()][plateVtoCDR1Map.size()];
|
|
// countSequencesAndFillMatrix(samplePlate, allCDR3s, allCDR1s, plateCDR3toVMap, plateCDR1toVMap,
|
|
// cdr3Indices, cdr1Indices, cdr3WellCounts, cdr1WellCounts, weights);
|
|
// System.out.println("Matrix created");
|
|
//
|
|
// System.out.println("Creating graph");
|
|
// SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph =
|
|
// new SimpleWeightedGraph<>(DefaultWeightedEdge.class);
|
|
//
|
|
// SimpleWeightedBipartiteGraphMatrixGenerator graphGenerator = new SimpleWeightedBipartiteGraphMatrixGenerator();
|
|
// List<Integer> cdr3Vertices = new ArrayList<>(plateVtoCDR3Map.keySet()); //This will work because LinkedHashMap preserves order of entry
|
|
// graphGenerator.first(cdr3Vertices);
|
|
// List<Integer> cdr1Vertices = new ArrayList<>(plateVtoCDR1Map.keySet());
|
|
// graphGenerator.second(cdr1Vertices); //This will work because LinkedHashMap preserves order of entry
|
|
// graphGenerator.weights(weights);
|
|
// graphGenerator.generateGraph(graph);
|
|
// System.out.println("Graph created");
|
|
//
|
|
// System.out.println("Removing edges outside of weight thresholds");
|
|
// filterByOccupancyThreshold(graph, lowThreshold, highThreshold);
|
|
// System.out.println("Over- and under-weight edges set to 0.0");
|
|
//
|
|
// System.out.println("Finding first maximum weighted matching");
|
|
// MaximumWeightBipartiteMatching firstMaxWeightMatching =
|
|
// new MaximumWeightBipartiteMatching(graph, plateVtoCDR3Map.keySet(), plateVtoCDR1Map.keySet());
|
|
// MatchingAlgorithm.Matching<String, DefaultWeightedEdge> graphMatching = firstMaxWeightMatching.getMatching();
|
|
// System.out.println("First maximum weighted matching found");
|
|
//
|
|
//
|
|
// //first processing run
|
|
// Map<Integer, Integer> firstMatchCDR3toCDR1Map = new HashMap<>();
|
|
// Iterator<DefaultWeightedEdge> weightIter = graphMatching.iterator();
|
|
// DefaultWeightedEdge e;
|
|
// while(weightIter.hasNext()){
|
|
// e = weightIter.next();
|
|
//// if(graph.getEdgeWeight(e) < lowThreshold || graph.getEdgeWeight(e) > highThreshold) {
|
|
//// continue;
|
|
//// }
|
|
// Integer source = graph.getEdgeSource(e);
|
|
// Integer target = graph.getEdgeTarget(e);
|
|
// firstMatchCDR3toCDR1Map.put(plateVtoCDR3Map.get(source), plateVtoCDR1Map.get(target));
|
|
// }
|
|
// System.out.println("First pass matches: " + firstMatchCDR3toCDR1Map.size());
|
|
//
|
|
// System.out.println("Removing edges from first maximum weighted matching");
|
|
// //zero out the edge weights in the matching
|
|
// weightIter = graphMatching.iterator();
|
|
// while(weightIter.hasNext()){
|
|
// graph.removeEdge(weightIter.next());
|
|
// }
|
|
// System.out.println("Edges removed");
|
|
//
|
|
// //Generate a new matching
|
|
// System.out.println("Finding second maximum weighted matching");
|
|
// MaximumWeightBipartiteMatching secondMaxWeightMatching =
|
|
// new MaximumWeightBipartiteMatching(graph, plateVtoCDR3Map.keySet(), plateVtoCDR1Map.keySet());
|
|
// graphMatching = secondMaxWeightMatching.getMatching();
|
|
// System.out.println("Second maximum weighted matching found");
|
|
//
|
|
//
|
|
// //second processing run
|
|
// Map<Integer, Integer> secondMatchCDR3toCDR1Map = new HashMap<>();
|
|
// weightIter = graphMatching.iterator();
|
|
// while(weightIter.hasNext()){
|
|
// e = weightIter.next();
|
|
//// if(graph.getEdgeWeight(e) < lowThreshold || graph.getEdgeWeight(e) > highThreshold) {
|
|
//// continue;
|
|
//// }
|
|
// Integer source = graph.getEdgeSource(e);
|
|
//// if(!(CDR3AtoBMap.containsKey(source) || CDR3BtoAMap.containsKey(source))){
|
|
//// continue;
|
|
//// }
|
|
// Integer target = graph.getEdgeTarget(e);
|
|
// secondMatchCDR3toCDR1Map.put(plateVtoCDR3Map.get(source), plateVtoCDR1Map.get(target));
|
|
// }
|
|
// System.out.println("Second pass matches: " + secondMatchCDR3toCDR1Map.size());
|
|
//
|
|
// System.out.println("Mapping first pass CDR3 alpha/beta pairs");
|
|
// //get linked map for first matching attempt
|
|
// Map<Integer, Integer> firstMatchesMap = new LinkedHashMap<>();
|
|
// for(Integer alphaCDR3: cdr3AtoBMap.keySet()) {
|
|
// if (!(firstMatchCDR3toCDR1Map.containsKey(alphaCDR3))) {
|
|
// continue;
|
|
// }
|
|
// Integer betaCDR3 = cdr3AtoBMap.get(alphaCDR3);
|
|
// if (!(firstMatchCDR3toCDR1Map.containsKey(betaCDR3))) {
|
|
// continue;
|
|
// }
|
|
// firstMatchesMap.put(alphaCDR3, firstMatchCDR3toCDR1Map.get(alphaCDR3));
|
|
// firstMatchesMap.put(betaCDR3, firstMatchCDR3toCDR1Map.get(betaCDR3));
|
|
// }
|
|
// System.out.println("First pass CDR3 alpha/beta pairs mapped");
|
|
//
|
|
// System.out.println("Mapping second pass CDR3 alpha/beta pairs.");
|
|
// System.out.println("Finding CDR3 pairs that swapped CDR1 matches between first pass and second pass.");
|
|
// //Look for matches that simply swapped already-matched alpha and beta CDR3s
|
|
// Map<Integer, Integer> dualMatchesMap = new LinkedHashMap<>();
|
|
// for(Integer alphaCDR3: cdr3AtoBMap.keySet()) {
|
|
// if (!(firstMatchCDR3toCDR1Map.containsKey(alphaCDR3) && secondMatchCDR3toCDR1Map.containsKey(alphaCDR3))) {
|
|
// continue;
|
|
// }
|
|
// Integer betaCDR3 = cdr3AtoBMap.get(alphaCDR3);
|
|
// if (!(firstMatchCDR3toCDR1Map.containsKey(betaCDR3) && secondMatchCDR3toCDR1Map.containsKey(betaCDR3))) {
|
|
// continue;
|
|
// }
|
|
// if(firstMatchCDR3toCDR1Map.get(alphaCDR3).equals(secondMatchCDR3toCDR1Map.get(betaCDR3))){
|
|
// if(firstMatchCDR3toCDR1Map.get(betaCDR3).equals(secondMatchCDR3toCDR1Map.get(alphaCDR3))){
|
|
// dualMatchesMap.put(alphaCDR3, firstMatchCDR3toCDR1Map.get(alphaCDR3));
|
|
// dualMatchesMap.put(betaCDR3, firstMatchCDR3toCDR1Map.get(betaCDR3));
|
|
// }
|
|
// }
|
|
// }
|
|
// System.out.println("Second pass mapping made. Dual CDR3/CDR1 pairings found.");
|
|
//
|
|
// Instant stop = Instant.now();
|
|
// //results for first map
|
|
// System.out.println("RESULTS FOR FIRST PASS MATCHING");
|
|
// List<List<String>> allResults = new ArrayList<>();
|
|
// Integer trueCount = 0;
|
|
// Iterator iter = firstMatchesMap.keySet().iterator();
|
|
//
|
|
// while(iter.hasNext()){
|
|
// Boolean proven = false;
|
|
// List<String> tmp = new ArrayList<>();
|
|
// tmp.add(iter.next().toString());
|
|
// tmp.add(iter.next().toString());
|
|
// tmp.add(firstMatchesMap.get(Integer.valueOf(tmp.get(0))).toString());
|
|
// tmp.add(firstMatchesMap.get(Integer.valueOf(tmp.get(1))).toString());
|
|
// if(alphaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(0))).equals(Integer.valueOf(tmp.get(2)))){
|
|
// if(betaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(1))).equals(Integer.valueOf(tmp.get(3)))){
|
|
// proven = true;
|
|
// }
|
|
// }
|
|
// else if(alphaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(0))).equals(Integer.valueOf(tmp.get(3)))){
|
|
// if(betaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(1))).equals(Integer.valueOf(tmp.get(2)))){
|
|
// proven = true;
|
|
// }
|
|
// }
|
|
// tmp.add(proven.toString());
|
|
// allResults.add(tmp);
|
|
// if(proven){
|
|
// trueCount++;
|
|
// }
|
|
// }
|
|
//
|
|
// List<String> comments = new ArrayList<>();
|
|
// comments.add("Plate size: " + samplePlate.getSize() + " wells");
|
|
// comments.add("Previous pairs found: " + previousMatches.size());
|
|
// comments.add("CDR1 matches attempted: " + allResults.size());
|
|
// double attemptRate = (double) allResults.size() / previousMatches.size();
|
|
// comments.add("Matching attempt rate: " + attemptRate);
|
|
// comments.add("Number of correct matches: " + trueCount);
|
|
// double correctRate = (double) trueCount / allResults.size();
|
|
// comments.add("Correct matching rate: " + correctRate);
|
|
// NumberFormat nf = NumberFormat.getInstance(Locale.US);
|
|
// Duration time = Duration.between(start, stop);
|
|
// time = time.plus(previousTime);
|
|
// comments.add("Simulation time: " + nf.format(time.toSeconds()) + " seconds");
|
|
// for(String s: comments){
|
|
// System.out.println(s);
|
|
// }
|
|
//
|
|
//
|
|
//
|
|
// List<String> headers = new ArrayList<>();
|
|
// headers.add("CDR3 alpha");
|
|
// headers.add("CDR3 beta");
|
|
// headers.add("first matched CDR1");
|
|
// headers.add("second matched CDR1");
|
|
// headers.add("Correct match?");
|
|
//
|
|
// MatchingResult firstTest = new MatchingResult(samplePlate.getSourceFileName(),
|
|
// comments, headers, allResults, dualMatchesMap, time);
|
|
//
|
|
// //results for dual map
|
|
// System.out.println("RESULTS FOR SECOND PASS MATCHING");
|
|
// allResults = new ArrayList<>();
|
|
// trueCount = 0;
|
|
// iter = dualMatchesMap.keySet().iterator();
|
|
// while(iter.hasNext()){
|
|
// Boolean proven = false;
|
|
// List<String> tmp = new ArrayList<>();
|
|
// tmp.add(iter.next().toString());
|
|
// tmp.add(iter.next().toString());
|
|
// tmp.add(dualMatchesMap.get(Integer.valueOf(tmp.get(0))).toString());
|
|
// tmp.add(dualMatchesMap.get(Integer.valueOf(tmp.get(1))).toString());
|
|
// if(alphaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(0))).equals(Integer.valueOf(tmp.get(2)))){
|
|
// if(betaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(1))).equals(Integer.valueOf(tmp.get(3)))){
|
|
// proven = true;
|
|
// }
|
|
// }
|
|
// else if(alphaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(0))).equals(Integer.valueOf(tmp.get(3)))){
|
|
// if(betaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(1))).equals(Integer.valueOf(tmp.get(2)))){
|
|
// proven = true;
|
|
// }
|
|
// }
|
|
// tmp.add(proven.toString());
|
|
// allResults.add(tmp);
|
|
// if(proven){
|
|
// trueCount++;
|
|
// }
|
|
// }
|
|
//
|
|
// comments = new ArrayList<>();
|
|
// comments.add("Plate size: " + samplePlate.getSize() + " wells");
|
|
// comments.add("Previous pairs found: " + previousMatches.size());
|
|
// comments.add("High overlap threshold: " + highThreshold);
|
|
// comments.add("Low overlap threshold: " + lowThreshold);
|
|
// comments.add("CDR1 matches attempted: " + allResults.size());
|
|
// attemptRate = (double) allResults.size() / previousMatches.size();
|
|
// comments.add("Matching attempt rate: " + attemptRate);
|
|
// comments.add("Number of correct matches: " + trueCount);
|
|
// correctRate = (double) trueCount / allResults.size();
|
|
// comments.add("Correct matching rate: " + correctRate);
|
|
// comments.add("Simulation time: " + nf.format(time.toSeconds()) + " seconds");
|
|
//
|
|
// for(String s: comments){
|
|
// System.out.println(s);
|
|
// }
|
|
//
|
|
// System.out.println("Simulation time: " + nf.format(time.toSeconds()) + " seconds");
|
|
// MatchingResult dualTest = new MatchingResult(samplePlate.getSourceFileName(), comments, headers,
|
|
// allResults, dualMatchesMap, time);
|
|
// MatchingResult[] output = {firstTest, dualTest};
|
|
// return output;
|
|
// }
|
|
|
|
//Remove sequences based on occupancy
|
|
public static void filterByOccupancyThresholds(Map<Integer, Integer> wellMap, int low, int high){
|
|
List<Integer> noise = new ArrayList<>();
|
|
for(Integer k: wellMap.keySet()){
|
|
if((wellMap.get(k) > high) || (wellMap.get(k) < low)){
|
|
noise.add(k);
|
|
}
|
|
}
|
|
for(Integer k: noise) {
|
|
wellMap.remove(k);
|
|
}
|
|
}
|
|
|
|
//Counts the well occupancy of the row peptides and column peptides into given maps, and
|
|
//fills weights in the given 2D array
|
|
private static void countSequencesAndFillMatrix(Plate samplePlate,
|
|
Map<Integer,Integer> allRowSequences,
|
|
Map<Integer,Integer> allColumnSequences,
|
|
Map<Integer,Integer> rowSequenceToVertexMap,
|
|
Map<Integer,Integer> columnSequenceToVertexMap,
|
|
int[] rowSequenceIndices,
|
|
int[] colSequenceIndices,
|
|
Map<Integer, Integer> rowSequenceCounts,
|
|
Map<Integer,Integer> columnSequenceCounts,
|
|
double[][] weights){
|
|
Map<Integer, Integer> wellNRowSequences = null;
|
|
Map<Integer, Integer> wellNColumnSequences = null;
|
|
int vertexStartValue = rowSequenceToVertexMap.size();
|
|
int numWells = samplePlate.getSize();
|
|
for (int n = 0; n < numWells; n++) {
|
|
wellNRowSequences = samplePlate.assayWellsSequenceS(n, rowSequenceIndices);
|
|
for (Integer a : wellNRowSequences.keySet()) {
|
|
if(allRowSequences.containsKey(a)){
|
|
rowSequenceCounts.merge(a, 1, (oldValue, newValue) -> oldValue + newValue);
|
|
}
|
|
}
|
|
wellNColumnSequences = samplePlate.assayWellsSequenceS(n, colSequenceIndices);
|
|
for (Integer b : wellNColumnSequences.keySet()) {
|
|
if(allColumnSequences.containsKey(b)){
|
|
columnSequenceCounts.merge(b, 1, (oldValue, newValue) -> oldValue + newValue);
|
|
}
|
|
}
|
|
for (Integer i : wellNRowSequences.keySet()) {
|
|
if(allRowSequences.containsKey(i)){
|
|
for (Integer j : wellNColumnSequences.keySet()) {
|
|
if(allColumnSequences.containsKey(j)){
|
|
weights[rowSequenceToVertexMap.get(i)][columnSequenceToVertexMap.get(j) - vertexStartValue] += 1.0;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
}
|
|
}
|
|
|
|
private static Map<Integer, Integer> makeSequenceToSequenceMap(List<Integer[]> cells, int keySequenceIndex,
|
|
int valueSequenceIndex){
|
|
Map<Integer, Integer> keySequenceToValueSequenceMap = new HashMap<>();
|
|
for (Integer[] cell : cells) {
|
|
keySequenceToValueSequenceMap.put(cell[keySequenceIndex], cell[valueSequenceIndex]);
|
|
}
|
|
return keySequenceToValueSequenceMap;
|
|
}
|
|
|
|
private static Map<Integer, Integer> makeVertexToSequenceMap(Map<Integer, Integer> sequences, Integer startValue) {
|
|
Map<Integer, Integer> map = new LinkedHashMap<>(); //LinkedHashMap to preserve order of entry
|
|
Integer index = startValue; //is this necessary? I don't think I use this.
|
|
for (Integer k: sequences.keySet()) {
|
|
map.put(index, k);
|
|
index++;
|
|
}
|
|
return map;
|
|
}
|
|
|
|
private static Map<Integer, Integer> invertVertexMap(Map<Integer, Integer> map) {
|
|
Map<Integer, Integer> inverse = new HashMap<>();
|
|
for (Integer k : map.keySet()) {
|
|
inverse.put(map.get(k), k);
|
|
}
|
|
return inverse;
|
|
}
|
|
|
|
}
|