Adding parameters to filter by occupancy difference and percent overlap

This commit is contained in:
2022-02-19 14:05:26 -06:00
parent ce88e170c1
commit 6faacd9a82
2 changed files with 74 additions and 95 deletions

View File

@@ -4,78 +4,22 @@ import org.jgrapht.alg.matching.MaximumWeightBipartiteMatching;
import org.jgrapht.generate.SimpleWeightedBipartiteGraphMatrixGenerator;
import org.jgrapht.graph.DefaultWeightedEdge;
import org.jgrapht.graph.SimpleWeightedGraph;
import org.jheaps.AddressableHeap;
import org.jheaps.tree.PairingHeap;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.math.MathContext;
import java.text.NumberFormat;
import java.time.Instant;
import java.time.Duration;
import java.util.*;
import java.util.function.Function;
import java.util.function.Supplier;
import java.util.stream.IntStream;
public class Simulator {
private static int cdr3AlphaIndex = 0;
private static int cdr3BetaIndex = 1;
private static int cdr1AlphaIndex = 2;
private static int cdr1BetaIndex = 3;
private static final int cdr3AlphaIndex = 0;
private static final int cdr3BetaIndex = 1;
private static final int cdr1AlphaIndex = 2;
private static final int cdr1BetaIndex = 3;
//Tested generating the cell sample file itself with a set distribution, but it wasn't working well
//realized I'd have to change matching algos as well, so abandoned it.
// public static CellSample generateCellSampleExp(Integer numDistinctCells, Integer cdr1Freq){
// //was told I=in real T cells, CDR1s have about one third the diversity of CDR3s
// //this could be wrong, though. could be less diversity than that.
// //previous sim was only CDR3s
// //Integer numDistinctCells = 1000000;
// //int cdr1Freq = 3;
//
// List<Integer> numbersCDR3 = new ArrayList<>();
// List<Integer> numbersCDR1 = new ArrayList<>();
// Integer numDistCDR3s = 2 * numDistinctCells + 1;
// IntStream.range(1, numDistCDR3s + 1).forEach(i -> numbersCDR3.add(i));
// IntStream.range(numDistCDR3s + 1, numDistCDR3s + 1 + (numDistCDR3s / cdr1Freq) + 1).forEach(i -> numbersCDR1.add(i));
// Collections.shuffle(numbersCDR3);
// Collections.shuffle(numbersCDR1);
//
// //Each cell represented by 4 values
// //two CDR3s, and two CDR1s. First two values are CDR3s, second two are CDR1s
// List<Integer[]> distinctCells = new ArrayList<>();
// for(int i = 0; i < numbersCDR3.size() - 1; i = i + 2){
// Integer tmpCDR3a = numbersCDR3.get(i);
// Integer tmpCDR3b = numbersCDR3.get(i+1);
// Integer tmpCDR1a = numbersCDR1.get(i % numbersCDR1.size());
// Integer tmpCDR1b = numbersCDR1.get((i+1) % numbersCDR1.size());
// Integer[] tmp = {tmpCDR3a, tmpCDR3b, tmpCDR1a, tmpCDR1b};
// distinctCells.add(tmp);
// }
// List<Integer[]>sampleCells = new ArrayList<>();
// int count;
// int lastFactor = 0;
// int factor = 10;
// int index = 0;
// while(numDistinctCells / factor >= 1){
// int i = index;
// while(i < index + factor - lastFactor){
// count = 0;
// while(count < (numDistinctCells/(factor * 10))){
// sampleCells.add(distinctCells.get(i));
// count++;
// }
// i++;
// }
// index = i;
// lastFactor = factor;
// factor *=10;
// }
// System.out.println("Total cells in sample: " + sampleCells.size());
// System.out.println("Distinct cells in sample: " + index);
// return new CellSample(sampleCells, cdr1Freq);
// }
public static CellSample generateCellSample(Integer numDistinctCells, Integer cdr1Freq) {
//In real T cells, CDR1s have about one third the diversity of CDR3s
//previous sim was only CDR3s
@@ -103,7 +47,8 @@ public class Simulator {
public static MatchingResult matchCDR3s(List<Integer[]> distinctCells,
Plate samplePlate, Integer lowThreshold,
Integer highThreshold, boolean verbose){
Integer highThreshold, Integer maxOccupancyDifference,
Integer minOverlapPercent, boolean verbose){
if(verbose){System.out.println("Cells: " + distinctCells.size());}
Instant start = Instant.now();
@@ -111,6 +56,7 @@ public class Simulator {
int[] alphaIndex = {cdr3AlphaIndex};
int[] betaIndex = {cdr3BetaIndex};
if(verbose){System.out.println("Making cell maps");}
//HashMap keyed to Alphas, values Betas
Map<Integer, Integer> distCellsMapAlphaKey = makePeptideToPeptideMap(distinctCells, 0, 1);
@@ -169,9 +115,7 @@ public class Simulator {
plateBtoVMap, alphaIndex, betaIndex, alphaWellCounts, betaWellCounts, weights);
if(verbose){System.out.println("matrix created");}
/**
* This is where the bipartite graph is created
*/
//create bipartite graph
if(verbose){System.out.println("creating graph");}
//the graph object
SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph =
@@ -179,12 +123,10 @@ public class Simulator {
//the graph generator
SimpleWeightedBipartiteGraphMatrixGenerator graphGenerator = new SimpleWeightedBipartiteGraphMatrixGenerator();
//the list of alpha vertices
List<Integer> alphaVertices = new ArrayList<>();
alphaVertices.addAll(plateVtoAMap.keySet()); //This will work because LinkedHashMap preserves order of entry
List<Integer> alphaVertices = new ArrayList<>(plateVtoAMap.keySet()); //This will work because LinkedHashMap preserves order of entry
graphGenerator.first(alphaVertices);
//the list of beta vertices
List<Integer> betaVertices = new ArrayList<>();
betaVertices.addAll(plateVtoBMap.keySet());
List<Integer> betaVertices = new ArrayList<>(plateVtoBMap.keySet());
graphGenerator.second(betaVertices); //This will work because LinkedHashMap preserves order of entry
graphGenerator.weights(weights);
graphGenerator.generateGraph(graph);
@@ -196,18 +138,17 @@ public class Simulator {
//Filter by overlap size
if(verbose){System.out.println("Eliminating edges with weights much less than occupancy values");}
filterByOverlapSize(graph, alphaWellCounts, betaWellCounts, plateVtoAMap, plateVtoBMap);
filterByOverlapSize(graph, alphaWellCounts, betaWellCounts, plateVtoAMap, plateVtoBMap, minOverlapPercent);
if(verbose){System.out.println("Edges with weights much less than occupancy values set to 0.0");}
//Filter by relative occupancy
if(verbose){System.out.println("Eliminating edges between vertices of massively different occupancy");}
filterByRelativeOccupancy(graph, alphaWellCounts, betaWellCounts, plateVtoAMap, plateVtoBMap);
filterByRelativeOccupancy(graph, alphaWellCounts, betaWellCounts, plateVtoAMap, plateVtoBMap,
maxOccupancyDifference);
if(verbose){System.out.println("Edges between vertices of massively different occupancy set to 0.0");}
//Find Maximum Weighted Matching
/**
* This is where the maximum weighted matching is found
*/
// if(verbose){System.out.println("Finding maximum weighted matching");}
// MaximumWeightBipartiteMatching maxWeightMatching =
// new MaximumWeightBipartiteMatching(graph, plateVtoAMap.keySet(), plateVtoBMap.keySet());
@@ -215,14 +156,14 @@ public class Simulator {
// if(verbose){System.out.println("Matching completed");}
// Instant stop = Instant.now();
//trying with jheaps now
//trying with jheaps addressable now to improve performance
if(verbose){System.out.println("Finding maximum weighted matching");}
//Attempting to use addressable heap to improve performance
MaximumWeightBipartiteMatching maxWeightMatching =
new MaximumWeightBipartiteMatching(graph,
plateVtoAMap.keySet(),
plateVtoBMap.keySet(),
(i) -> new PairingHeap(Comparator.naturalOrder()));
i -> new PairingHeap(Comparator.naturalOrder()));
MatchingAlgorithm.Matching<String, DefaultWeightedEdge> graphMatching = maxWeightMatching.getMatching();
if(verbose){System.out.println("Matching completed");}
Instant stop = Instant.now();
@@ -243,10 +184,10 @@ public class Simulator {
NumberFormat nf = NumberFormat.getInstance(Locale.US);
MathContext mc = new MathContext(3);
Iterator<DefaultWeightedEdge> weightIter = graphMatching.iterator();
DefaultWeightedEdge e = null;
DefaultWeightedEdge e;
int trueCount = 0;
int falseCount = 0;
boolean check = false;
boolean check;
Map<Integer, Integer> matchMap = new HashMap<>();
while(weightIter.hasNext()) {
e = weightIter.next();
@@ -279,7 +220,7 @@ public class Simulator {
}
//Metadate comments for CSV file
int min = alphaCount > betaCount ? betaCount : alphaCount;
int min = Math.min(alphaCount, betaCount);
double attemptRate = (double) (trueCount + falseCount) / min;
BigDecimal attemptRateTrunc = new BigDecimal(attemptRate, mc);
double pairingErrorRate = (double) falseCount / (trueCount + falseCount);
@@ -290,6 +231,8 @@ public class Simulator {
comments.add("Total betas found: " + betaCount);
comments.add("High overlap threshold: " + highThreshold);
comments.add("Low overlap threshold: " + lowThreshold);
comments.add("Minimum overlap percent: " + minOverlapPercent);
comments.add("Maximum occupancy difference: " + maxOccupancyDifference);
comments.add("Pairing attempt rate: " + attemptRateTrunc);
comments.add("Correct pairings: " + trueCount);
comments.add("Incorrect pairings: " + falseCount);
@@ -393,11 +336,9 @@ public class Simulator {
new SimpleWeightedGraph<>(DefaultWeightedEdge.class);
SimpleWeightedBipartiteGraphMatrixGenerator graphGenerator = new SimpleWeightedBipartiteGraphMatrixGenerator();
List<Integer> cdr3Vertices = new ArrayList<>();
cdr3Vertices.addAll(plateVtoCDR3Map.keySet()); //This will work because LinkedHashMap preserves order of entry
List<Integer> cdr3Vertices = new ArrayList<>(plateVtoCDR3Map.keySet()); //This will work because LinkedHashMap preserves order of entry
graphGenerator.first(cdr3Vertices);
List<Integer> cdr1Vertices = new ArrayList<>();
cdr1Vertices.addAll(plateVtoCDR1Map.keySet());
List<Integer> cdr1Vertices = new ArrayList<>(plateVtoCDR1Map.keySet());
graphGenerator.second(cdr1Vertices); //This will work because LinkedHashMap preserves order of entry
graphGenerator.weights(weights);
graphGenerator.generateGraph(graph);
@@ -417,7 +358,7 @@ public class Simulator {
//first processing run
Map<Integer, Integer> firstMatchCDR3toCDR1Map = new HashMap<>();
Iterator<DefaultWeightedEdge> weightIter = graphMatching.iterator();
DefaultWeightedEdge e = null;
DefaultWeightedEdge e;
while(weightIter.hasNext()){
e = weightIter.next();
// if(graph.getEdgeWeight(e) < lowThreshold || graph.getEdgeWeight(e) > highThreshold) {
@@ -541,7 +482,7 @@ public class Simulator {
comments.add("Correct matching rate: " + correctRate);
NumberFormat nf = NumberFormat.getInstance(Locale.US);
Duration time = Duration.between(start, stop);
time.plus(previousTime);
time = time.plus(previousTime);
comments.add("Simulation time: " + nf.format(time.toSeconds()) + " seconds");
for(String s: comments){
System.out.println(s);
@@ -681,12 +622,13 @@ public class Simulator {
Map<Integer, Integer> alphaWellCounts,
Map<Integer, Integer> betaWellCounts,
Map<Integer, Integer> plateVtoAMap,
Map<Integer, Integer> plateVtoBMap) {
Map<Integer, Integer> plateVtoBMap,
Integer maxOccupancyDifference) {
for (DefaultWeightedEdge e : graph.edgeSet()) {
Integer alphaOcc = alphaWellCounts.get(plateVtoAMap.get(graph.getEdgeSource(e)));
Integer betaOcc = betaWellCounts.get(plateVtoBMap.get(graph.getEdgeTarget(e)));
//Adjust this to something cleverer later
if (Math.abs(alphaOcc - betaOcc) >= 20) {
if (Math.abs(alphaOcc - betaOcc) >= maxOccupancyDifference) {
graph.setEdgeWeight(e, 0.0);
}
}
@@ -697,13 +639,14 @@ public class Simulator {
Map<Integer, Integer> alphaWellCounts,
Map<Integer, Integer> betaWellCounts,
Map<Integer, Integer> plateVtoAMap,
Map<Integer, Integer> plateVtoBMap) {
Map<Integer, Integer> plateVtoBMap,
Integer minOverlapPercent) {
for (DefaultWeightedEdge e : graph.edgeSet()) {
Integer alphaOcc = alphaWellCounts.get(plateVtoAMap.get(graph.getEdgeSource(e)));
Integer betaOcc = betaWellCounts.get(plateVtoBMap.get(graph.getEdgeTarget(e)));
//Adjust this to something cleverer later
Integer min = alphaOcc > betaOcc ? betaOcc : alphaOcc;
if (min - graph.getEdgeWeight(e) >= 15) {
double weight = graph.getEdgeWeight(e);
double min = minOverlapPercent / 100.0;
if ((weight / alphaOcc < min) || (weight / betaOcc < min)) {
graph.setEdgeWeight(e, 0.0);
}
}