Adding parameters to filter by occupancy difference and percent overlap
This commit is contained in:
@@ -4,78 +4,22 @@ import org.jgrapht.alg.matching.MaximumWeightBipartiteMatching;
|
||||
import org.jgrapht.generate.SimpleWeightedBipartiteGraphMatrixGenerator;
|
||||
import org.jgrapht.graph.DefaultWeightedEdge;
|
||||
import org.jgrapht.graph.SimpleWeightedGraph;
|
||||
import org.jheaps.AddressableHeap;
|
||||
import org.jheaps.tree.PairingHeap;
|
||||
|
||||
import java.math.BigDecimal;
|
||||
import java.math.BigInteger;
|
||||
import java.math.MathContext;
|
||||
import java.text.NumberFormat;
|
||||
import java.time.Instant;
|
||||
import java.time.Duration;
|
||||
import java.util.*;
|
||||
import java.util.function.Function;
|
||||
import java.util.function.Supplier;
|
||||
import java.util.stream.IntStream;
|
||||
|
||||
public class Simulator {
|
||||
private static int cdr3AlphaIndex = 0;
|
||||
private static int cdr3BetaIndex = 1;
|
||||
private static int cdr1AlphaIndex = 2;
|
||||
private static int cdr1BetaIndex = 3;
|
||||
private static final int cdr3AlphaIndex = 0;
|
||||
private static final int cdr3BetaIndex = 1;
|
||||
private static final int cdr1AlphaIndex = 2;
|
||||
private static final int cdr1BetaIndex = 3;
|
||||
|
||||
|
||||
//Tested generating the cell sample file itself with a set distribution, but it wasn't working well
|
||||
//realized I'd have to change matching algos as well, so abandoned it.
|
||||
// public static CellSample generateCellSampleExp(Integer numDistinctCells, Integer cdr1Freq){
|
||||
// //was told I=in real T cells, CDR1s have about one third the diversity of CDR3s
|
||||
// //this could be wrong, though. could be less diversity than that.
|
||||
// //previous sim was only CDR3s
|
||||
// //Integer numDistinctCells = 1000000;
|
||||
// //int cdr1Freq = 3;
|
||||
//
|
||||
// List<Integer> numbersCDR3 = new ArrayList<>();
|
||||
// List<Integer> numbersCDR1 = new ArrayList<>();
|
||||
// Integer numDistCDR3s = 2 * numDistinctCells + 1;
|
||||
// IntStream.range(1, numDistCDR3s + 1).forEach(i -> numbersCDR3.add(i));
|
||||
// IntStream.range(numDistCDR3s + 1, numDistCDR3s + 1 + (numDistCDR3s / cdr1Freq) + 1).forEach(i -> numbersCDR1.add(i));
|
||||
// Collections.shuffle(numbersCDR3);
|
||||
// Collections.shuffle(numbersCDR1);
|
||||
//
|
||||
// //Each cell represented by 4 values
|
||||
// //two CDR3s, and two CDR1s. First two values are CDR3s, second two are CDR1s
|
||||
// List<Integer[]> distinctCells = new ArrayList<>();
|
||||
// for(int i = 0; i < numbersCDR3.size() - 1; i = i + 2){
|
||||
// Integer tmpCDR3a = numbersCDR3.get(i);
|
||||
// Integer tmpCDR3b = numbersCDR3.get(i+1);
|
||||
// Integer tmpCDR1a = numbersCDR1.get(i % numbersCDR1.size());
|
||||
// Integer tmpCDR1b = numbersCDR1.get((i+1) % numbersCDR1.size());
|
||||
// Integer[] tmp = {tmpCDR3a, tmpCDR3b, tmpCDR1a, tmpCDR1b};
|
||||
// distinctCells.add(tmp);
|
||||
// }
|
||||
// List<Integer[]>sampleCells = new ArrayList<>();
|
||||
// int count;
|
||||
// int lastFactor = 0;
|
||||
// int factor = 10;
|
||||
// int index = 0;
|
||||
// while(numDistinctCells / factor >= 1){
|
||||
// int i = index;
|
||||
// while(i < index + factor - lastFactor){
|
||||
// count = 0;
|
||||
// while(count < (numDistinctCells/(factor * 10))){
|
||||
// sampleCells.add(distinctCells.get(i));
|
||||
// count++;
|
||||
// }
|
||||
// i++;
|
||||
// }
|
||||
// index = i;
|
||||
// lastFactor = factor;
|
||||
// factor *=10;
|
||||
// }
|
||||
// System.out.println("Total cells in sample: " + sampleCells.size());
|
||||
// System.out.println("Distinct cells in sample: " + index);
|
||||
// return new CellSample(sampleCells, cdr1Freq);
|
||||
// }
|
||||
public static CellSample generateCellSample(Integer numDistinctCells, Integer cdr1Freq) {
|
||||
//In real T cells, CDR1s have about one third the diversity of CDR3s
|
||||
//previous sim was only CDR3s
|
||||
@@ -103,7 +47,8 @@ public class Simulator {
|
||||
|
||||
public static MatchingResult matchCDR3s(List<Integer[]> distinctCells,
|
||||
Plate samplePlate, Integer lowThreshold,
|
||||
Integer highThreshold, boolean verbose){
|
||||
Integer highThreshold, Integer maxOccupancyDifference,
|
||||
Integer minOverlapPercent, boolean verbose){
|
||||
if(verbose){System.out.println("Cells: " + distinctCells.size());}
|
||||
|
||||
Instant start = Instant.now();
|
||||
@@ -111,6 +56,7 @@ public class Simulator {
|
||||
int[] alphaIndex = {cdr3AlphaIndex};
|
||||
int[] betaIndex = {cdr3BetaIndex};
|
||||
|
||||
|
||||
if(verbose){System.out.println("Making cell maps");}
|
||||
//HashMap keyed to Alphas, values Betas
|
||||
Map<Integer, Integer> distCellsMapAlphaKey = makePeptideToPeptideMap(distinctCells, 0, 1);
|
||||
@@ -169,9 +115,7 @@ public class Simulator {
|
||||
plateBtoVMap, alphaIndex, betaIndex, alphaWellCounts, betaWellCounts, weights);
|
||||
if(verbose){System.out.println("matrix created");}
|
||||
|
||||
/**
|
||||
* This is where the bipartite graph is created
|
||||
*/
|
||||
//create bipartite graph
|
||||
if(verbose){System.out.println("creating graph");}
|
||||
//the graph object
|
||||
SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph =
|
||||
@@ -179,12 +123,10 @@ public class Simulator {
|
||||
//the graph generator
|
||||
SimpleWeightedBipartiteGraphMatrixGenerator graphGenerator = new SimpleWeightedBipartiteGraphMatrixGenerator();
|
||||
//the list of alpha vertices
|
||||
List<Integer> alphaVertices = new ArrayList<>();
|
||||
alphaVertices.addAll(plateVtoAMap.keySet()); //This will work because LinkedHashMap preserves order of entry
|
||||
List<Integer> alphaVertices = new ArrayList<>(plateVtoAMap.keySet()); //This will work because LinkedHashMap preserves order of entry
|
||||
graphGenerator.first(alphaVertices);
|
||||
//the list of beta vertices
|
||||
List<Integer> betaVertices = new ArrayList<>();
|
||||
betaVertices.addAll(plateVtoBMap.keySet());
|
||||
List<Integer> betaVertices = new ArrayList<>(plateVtoBMap.keySet());
|
||||
graphGenerator.second(betaVertices); //This will work because LinkedHashMap preserves order of entry
|
||||
graphGenerator.weights(weights);
|
||||
graphGenerator.generateGraph(graph);
|
||||
@@ -196,18 +138,17 @@ public class Simulator {
|
||||
|
||||
//Filter by overlap size
|
||||
if(verbose){System.out.println("Eliminating edges with weights much less than occupancy values");}
|
||||
filterByOverlapSize(graph, alphaWellCounts, betaWellCounts, plateVtoAMap, plateVtoBMap);
|
||||
filterByOverlapSize(graph, alphaWellCounts, betaWellCounts, plateVtoAMap, plateVtoBMap, minOverlapPercent);
|
||||
if(verbose){System.out.println("Edges with weights much less than occupancy values set to 0.0");}
|
||||
|
||||
//Filter by relative occupancy
|
||||
if(verbose){System.out.println("Eliminating edges between vertices of massively different occupancy");}
|
||||
filterByRelativeOccupancy(graph, alphaWellCounts, betaWellCounts, plateVtoAMap, plateVtoBMap);
|
||||
filterByRelativeOccupancy(graph, alphaWellCounts, betaWellCounts, plateVtoAMap, plateVtoBMap,
|
||||
maxOccupancyDifference);
|
||||
if(verbose){System.out.println("Edges between vertices of massively different occupancy set to 0.0");}
|
||||
|
||||
//Find Maximum Weighted Matching
|
||||
|
||||
/**
|
||||
* This is where the maximum weighted matching is found
|
||||
*/
|
||||
// if(verbose){System.out.println("Finding maximum weighted matching");}
|
||||
// MaximumWeightBipartiteMatching maxWeightMatching =
|
||||
// new MaximumWeightBipartiteMatching(graph, plateVtoAMap.keySet(), plateVtoBMap.keySet());
|
||||
@@ -215,14 +156,14 @@ public class Simulator {
|
||||
// if(verbose){System.out.println("Matching completed");}
|
||||
// Instant stop = Instant.now();
|
||||
|
||||
//trying with jheaps now
|
||||
//trying with jheaps addressable now to improve performance
|
||||
if(verbose){System.out.println("Finding maximum weighted matching");}
|
||||
//Attempting to use addressable heap to improve performance
|
||||
MaximumWeightBipartiteMatching maxWeightMatching =
|
||||
new MaximumWeightBipartiteMatching(graph,
|
||||
plateVtoAMap.keySet(),
|
||||
plateVtoBMap.keySet(),
|
||||
(i) -> new PairingHeap(Comparator.naturalOrder()));
|
||||
i -> new PairingHeap(Comparator.naturalOrder()));
|
||||
MatchingAlgorithm.Matching<String, DefaultWeightedEdge> graphMatching = maxWeightMatching.getMatching();
|
||||
if(verbose){System.out.println("Matching completed");}
|
||||
Instant stop = Instant.now();
|
||||
@@ -243,10 +184,10 @@ public class Simulator {
|
||||
NumberFormat nf = NumberFormat.getInstance(Locale.US);
|
||||
MathContext mc = new MathContext(3);
|
||||
Iterator<DefaultWeightedEdge> weightIter = graphMatching.iterator();
|
||||
DefaultWeightedEdge e = null;
|
||||
DefaultWeightedEdge e;
|
||||
int trueCount = 0;
|
||||
int falseCount = 0;
|
||||
boolean check = false;
|
||||
boolean check;
|
||||
Map<Integer, Integer> matchMap = new HashMap<>();
|
||||
while(weightIter.hasNext()) {
|
||||
e = weightIter.next();
|
||||
@@ -279,7 +220,7 @@ public class Simulator {
|
||||
}
|
||||
|
||||
//Metadate comments for CSV file
|
||||
int min = alphaCount > betaCount ? betaCount : alphaCount;
|
||||
int min = Math.min(alphaCount, betaCount);
|
||||
double attemptRate = (double) (trueCount + falseCount) / min;
|
||||
BigDecimal attemptRateTrunc = new BigDecimal(attemptRate, mc);
|
||||
double pairingErrorRate = (double) falseCount / (trueCount + falseCount);
|
||||
@@ -290,6 +231,8 @@ public class Simulator {
|
||||
comments.add("Total betas found: " + betaCount);
|
||||
comments.add("High overlap threshold: " + highThreshold);
|
||||
comments.add("Low overlap threshold: " + lowThreshold);
|
||||
comments.add("Minimum overlap percent: " + minOverlapPercent);
|
||||
comments.add("Maximum occupancy difference: " + maxOccupancyDifference);
|
||||
comments.add("Pairing attempt rate: " + attemptRateTrunc);
|
||||
comments.add("Correct pairings: " + trueCount);
|
||||
comments.add("Incorrect pairings: " + falseCount);
|
||||
@@ -393,11 +336,9 @@ public class Simulator {
|
||||
new SimpleWeightedGraph<>(DefaultWeightedEdge.class);
|
||||
|
||||
SimpleWeightedBipartiteGraphMatrixGenerator graphGenerator = new SimpleWeightedBipartiteGraphMatrixGenerator();
|
||||
List<Integer> cdr3Vertices = new ArrayList<>();
|
||||
cdr3Vertices.addAll(plateVtoCDR3Map.keySet()); //This will work because LinkedHashMap preserves order of entry
|
||||
List<Integer> cdr3Vertices = new ArrayList<>(plateVtoCDR3Map.keySet()); //This will work because LinkedHashMap preserves order of entry
|
||||
graphGenerator.first(cdr3Vertices);
|
||||
List<Integer> cdr1Vertices = new ArrayList<>();
|
||||
cdr1Vertices.addAll(plateVtoCDR1Map.keySet());
|
||||
List<Integer> cdr1Vertices = new ArrayList<>(plateVtoCDR1Map.keySet());
|
||||
graphGenerator.second(cdr1Vertices); //This will work because LinkedHashMap preserves order of entry
|
||||
graphGenerator.weights(weights);
|
||||
graphGenerator.generateGraph(graph);
|
||||
@@ -417,7 +358,7 @@ public class Simulator {
|
||||
//first processing run
|
||||
Map<Integer, Integer> firstMatchCDR3toCDR1Map = new HashMap<>();
|
||||
Iterator<DefaultWeightedEdge> weightIter = graphMatching.iterator();
|
||||
DefaultWeightedEdge e = null;
|
||||
DefaultWeightedEdge e;
|
||||
while(weightIter.hasNext()){
|
||||
e = weightIter.next();
|
||||
// if(graph.getEdgeWeight(e) < lowThreshold || graph.getEdgeWeight(e) > highThreshold) {
|
||||
@@ -541,7 +482,7 @@ public class Simulator {
|
||||
comments.add("Correct matching rate: " + correctRate);
|
||||
NumberFormat nf = NumberFormat.getInstance(Locale.US);
|
||||
Duration time = Duration.between(start, stop);
|
||||
time.plus(previousTime);
|
||||
time = time.plus(previousTime);
|
||||
comments.add("Simulation time: " + nf.format(time.toSeconds()) + " seconds");
|
||||
for(String s: comments){
|
||||
System.out.println(s);
|
||||
@@ -681,12 +622,13 @@ public class Simulator {
|
||||
Map<Integer, Integer> alphaWellCounts,
|
||||
Map<Integer, Integer> betaWellCounts,
|
||||
Map<Integer, Integer> plateVtoAMap,
|
||||
Map<Integer, Integer> plateVtoBMap) {
|
||||
Map<Integer, Integer> plateVtoBMap,
|
||||
Integer maxOccupancyDifference) {
|
||||
for (DefaultWeightedEdge e : graph.edgeSet()) {
|
||||
Integer alphaOcc = alphaWellCounts.get(plateVtoAMap.get(graph.getEdgeSource(e)));
|
||||
Integer betaOcc = betaWellCounts.get(plateVtoBMap.get(graph.getEdgeTarget(e)));
|
||||
//Adjust this to something cleverer later
|
||||
if (Math.abs(alphaOcc - betaOcc) >= 20) {
|
||||
if (Math.abs(alphaOcc - betaOcc) >= maxOccupancyDifference) {
|
||||
graph.setEdgeWeight(e, 0.0);
|
||||
}
|
||||
}
|
||||
@@ -697,13 +639,14 @@ public class Simulator {
|
||||
Map<Integer, Integer> alphaWellCounts,
|
||||
Map<Integer, Integer> betaWellCounts,
|
||||
Map<Integer, Integer> plateVtoAMap,
|
||||
Map<Integer, Integer> plateVtoBMap) {
|
||||
Map<Integer, Integer> plateVtoBMap,
|
||||
Integer minOverlapPercent) {
|
||||
for (DefaultWeightedEdge e : graph.edgeSet()) {
|
||||
Integer alphaOcc = alphaWellCounts.get(plateVtoAMap.get(graph.getEdgeSource(e)));
|
||||
Integer betaOcc = betaWellCounts.get(plateVtoBMap.get(graph.getEdgeTarget(e)));
|
||||
//Adjust this to something cleverer later
|
||||
Integer min = alphaOcc > betaOcc ? betaOcc : alphaOcc;
|
||||
if (min - graph.getEdgeWeight(e) >= 15) {
|
||||
double weight = graph.getEdgeWeight(e);
|
||||
double min = minOverlapPercent / 100.0;
|
||||
if ((weight / alphaOcc < min) || (weight / betaOcc < min)) {
|
||||
graph.setEdgeWeight(e, 0.0);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -166,6 +166,20 @@ public class UserInterface {
|
||||
.desc("Sets the maximum occupancy overlap to attempt matching")
|
||||
.build();
|
||||
mainOptions.addOption(highThresh);
|
||||
Option occDiff = Option.builder("occdiff")
|
||||
.longOpt("occupancy-difference")
|
||||
.hasArg()
|
||||
.argName("Number")
|
||||
.desc("Maximum difference in alpha/beta occupancy to attempt matching")
|
||||
.build();
|
||||
mainOptions.addOption(occDiff);
|
||||
Option overlapPer = Option.builder("ovper")
|
||||
.longOpt("overlap-percent")
|
||||
.hasArg()
|
||||
.argName("Percent")
|
||||
.desc("Minimum overlap percent to attempt matching (0 -100)")
|
||||
.build();
|
||||
mainOptions.addOption(overlapPer);
|
||||
Option inputPlates = Option.builder("p")
|
||||
.longOpt("plate-files")
|
||||
.hasArgs()
|
||||
@@ -183,8 +197,10 @@ public class UserInterface {
|
||||
String cellFile = line.getOptionValue("c");
|
||||
Integer lowThreshold = Integer.valueOf(line.getOptionValue(lowThresh));
|
||||
Integer highThreshold = Integer.valueOf(line.getOptionValue(highThresh));
|
||||
Integer occupancyDifference = Integer.valueOf(line.getOptionValue(occDiff));
|
||||
Integer overlapPercent = Integer.valueOf(line.getOptionValue(overlapPer));
|
||||
for(String plate: line.getOptionValues("p")) {
|
||||
matchCDR3s(cellFile, plate, lowThreshold, highThreshold);
|
||||
matchCDR3s(cellFile, plate, lowThreshold, highThreshold, occupancyDifference, overlapPercent);
|
||||
}
|
||||
}
|
||||
else if(line.hasOption("cells")){
|
||||
@@ -432,7 +448,8 @@ public class UserInterface {
|
||||
}
|
||||
}
|
||||
|
||||
private static void matchCDR3s(String cellFile, String plateFile, Integer lowThreshold, Integer highThreshold){
|
||||
private static void matchCDR3s(String cellFile, String plateFile, Integer lowThreshold, Integer highThreshold,
|
||||
Integer maxOccupancyDifference, Integer minOverlapPercent){
|
||||
CellFileReader cellReader = new CellFileReader(cellFile);
|
||||
PlateFileReader plateReader = new PlateFileReader(plateFile);
|
||||
Plate plate = new Plate(plateReader.getFilename(), plateReader.getWells());
|
||||
@@ -448,7 +465,7 @@ public class UserInterface {
|
||||
highThreshold = plate.getSize() - 1;
|
||||
}
|
||||
List<Integer[]> cells = cellReader.getCells();
|
||||
MatchingResult results = Simulator.matchCDR3s(cells, plate, lowThreshold, highThreshold, false);
|
||||
MatchingResult results = Simulator.matchCDR3s(cells, plate, lowThreshold, highThreshold, maxOccupancyDifference, minOverlapPercent, false);
|
||||
//result writer
|
||||
MatchingFileWriter writer = new MatchingFileWriter("", results);
|
||||
writer.writeErrorRateToTerminal();
|
||||
@@ -462,6 +479,8 @@ public class UserInterface {
|
||||
String plateFile = null;
|
||||
Integer lowThreshold = 0;
|
||||
Integer highThreshold = Integer.MAX_VALUE;
|
||||
Integer maxOccupancyDiff = 96; //no filtering if max difference is all wells by default
|
||||
Integer minOverlapPercent = 0; //no filtering if min percentage is zero by default
|
||||
try {
|
||||
System.out.println("\nSimulated experiment requires a cell sample file and a sample plate file.");
|
||||
System.out.print("Please enter name of an existing cell sample file: ");
|
||||
@@ -478,6 +497,13 @@ public class UserInterface {
|
||||
}
|
||||
System.out.println("What is the maximum number of alpha/beta overlap wells to attempt matching?");
|
||||
highThreshold = sc.nextInt();
|
||||
System.out.println("What is the maximum difference in alpha/beta occupancy to attempt matching?");
|
||||
maxOccupancyDiff = sc.nextInt();
|
||||
System.out.println("What is the minimum overlap percentage to attempt matching? (0 - 100)");
|
||||
minOverlapPercent = sc.nextInt();
|
||||
if (minOverlapPercent < 0 || minOverlapPercent > 100) {
|
||||
throw new InputMismatchException("Value outside range. Minimum percent set to 0");
|
||||
}
|
||||
} catch (InputMismatchException ex) {
|
||||
System.out.println(ex);
|
||||
sc.next();
|
||||
@@ -499,7 +525,7 @@ public class UserInterface {
|
||||
highThreshold = plate.getSize() - 1;
|
||||
}
|
||||
List<Integer[]> cells = cellReader.getCells();
|
||||
MatchingResult results = Simulator.matchCDR3s(cells, plate, lowThreshold, highThreshold, true);
|
||||
MatchingResult results = Simulator.matchCDR3s(cells, plate, lowThreshold, highThreshold, maxOccupancyDiff, minOverlapPercent, true);
|
||||
//result writer
|
||||
MatchingFileWriter writer = new MatchingFileWriter(filename, results);
|
||||
writer.writeResultsToFile();
|
||||
@@ -518,6 +544,8 @@ public class UserInterface {
|
||||
String plateFile = null;
|
||||
Integer lowThresholdCDR3 = 0;
|
||||
Integer highThresholdCDR3 = Integer.MAX_VALUE;
|
||||
Integer maxOccupancyDiffCDR3 = 96; //no filtering if max difference is all wells by default
|
||||
Integer minOverlapPercentCDR3 = 0; //no filtering if min percentage is zero by default
|
||||
Integer lowThresholdCDR1 = 0;
|
||||
Integer highThresholdCDR1 = Integer.MAX_VALUE;
|
||||
boolean outputCDR3Matches = false;
|
||||
@@ -537,6 +565,13 @@ public class UserInterface {
|
||||
}
|
||||
System.out.println("What is the maximum number of CDR3 alpha/beta overlap wells to attempt matching?");
|
||||
highThresholdCDR3 = sc.nextInt();
|
||||
System.out.println("What is the maximum difference in CDR3 alpha/beta occupancy to attempt matching?");
|
||||
maxOccupancyDiffCDR3 = sc.nextInt();
|
||||
System.out.println("What is the minimum CDR3 overlap percentage to attempt matching? (0 - 100)");
|
||||
minOverlapPercentCDR3 = sc.nextInt();
|
||||
if (minOverlapPercentCDR3 < 0 || minOverlapPercentCDR3 > 100) {
|
||||
throw new InputMismatchException("Value outside range. Minimum percent set to 0");
|
||||
}
|
||||
System.out.println("What is the minimum number of CDR3/CDR1 overlap wells to attempt matching?");
|
||||
lowThresholdCDR1 = sc.nextInt();
|
||||
if(lowThresholdCDR1 < 1){
|
||||
@@ -583,7 +618,8 @@ public class UserInterface {
|
||||
highThresholdCDR1 = plate.getSize() - 1;
|
||||
}
|
||||
List<Integer[]> cells = cellReader.getCells();
|
||||
MatchingResult preliminaryResults = Simulator.matchCDR3s(cells, plate, lowThresholdCDR3, highThresholdCDR3, true);
|
||||
MatchingResult preliminaryResults = Simulator.matchCDR3s(cells, plate, lowThresholdCDR3, highThresholdCDR3,
|
||||
maxOccupancyDiffCDR3, minOverlapPercentCDR3, true);
|
||||
MatchingResult[] results = Simulator.matchCDR1s(cells, plate, lowThresholdCDR1,
|
||||
highThresholdCDR1, preliminaryResults);
|
||||
MatchingFileWriter writer = new MatchingFileWriter(filename + "_FirstPass", results[0]);
|
||||
|
||||
Reference in New Issue
Block a user