Three ways of making graph, but [...]MatrixGenerator is fastest

This commit is contained in:
2021-11-10 11:18:31 -06:00
parent 93cac1d38a
commit d39fdbee3b
3 changed files with 100 additions and 50 deletions

View File

@@ -1,13 +1,29 @@
import java.math.BigDecimal;
import java.math.BigInteger; import java.math.BigInteger;
import java.math.MathContext;
public abstract class Equations { public abstract class Equations {
public static double pValue(Integer w, Integer w_a, Integer w_b, Integer w_ab) { public static double pValue(Integer w, Integer w_a, Integer w_b, double w_ab_d) {
return 1.0; int w_ab = (int) w_ab_d;
double pv = 0.0;
Integer maxOverlap = w_a >= w_b ? w_b : w_a;
for(int i = w_ab; i <= maxOverlap; i++){
pv += probPairedByChance(w, i, w_a, w_b);
}
return pv;
} }
private static double probPairedByChanc(Integer w, Integer w_a, Integer w_b, Integer w_ab){ private static double probPairedByChance(Integer w, Integer w_a, Integer w_b, Integer w_ab){
return 1.0; BigInteger numer1 = choose(w, w_ab);
BigInteger numer2 = choose(w - w_ab, w_a - w_ab);
BigInteger numer3 = choose(w - w_a, w_b - w_ab);
BigInteger numer = numer1.multiply(numer2.multiply(numer3));
BigInteger denom = choose(w, w_a).multiply(choose(w, w_b));
BigDecimal numer_d = new BigDecimal(numer);
BigDecimal denom_d = new BigDecimal(denom);
BigDecimal prob = numer_d.divide(denom_d, MathContext.DECIMAL64);
return prob.doubleValue();
} }
/* /*

View File

@@ -1,6 +1,8 @@
import java.sql.Array; import java.sql.Array;
import java.util.*; import java.util.*;
//Need to write function to output plate to a file that I can read in.
public class Plate { public class Plate {
private List<List<Integer[]>> wells; private List<List<Integer[]>> wells;
private Random rand = new Random(); private Random rand = new Random();

View File

@@ -14,15 +14,16 @@ import java.util.*;
import java.util.stream.IntStream; import java.util.stream.IntStream;
public class Simulation { public class Simulation {
private static Integer numDistinctCells = 4_000_000; private static Integer numDistinctCells = 15_000_000;
private static double stdDeviation = 400; private static double stdDeviation = 1000; //square root of numDistCells would approximate poisson dist, supposedly
private static int numWells = 96; private static int numWells = 96;
private static int numConcentrations = 1; private static int numConcentrations = 1;
private static double errorRate = 0.1; private static double errorRate = 0.1;
private static int[] concentrations = {5000}; private static int[] concentrations = {500};
private static int lowThreshold = 2; //min number of shared wells to attempt pairing private static int lowThreshold = 2; //min number of shared wells to attempt pairing
private static int highThreshold = numWells - 6; //max number of shared wells to attempt pairing private static int highThreshold = numWells - 3; //max number of shared wells to attempt pairing
private static boolean autogenerateGraphFromArray = false; private static boolean use2DArrayForGraph = true; //Doing this is much faster for larger graphs
private static boolean useJGraphTGraphMatrixGenerator = true; //fastest option
public static void main(String[] args) { public static void main(String[] args) {
Instant start = Instant.now(); Instant start = Instant.now();
@@ -32,6 +33,9 @@ public class Simulation {
//2. implement p-values and just check exhaustively for strictly-bounded weights //2. implement p-values and just check exhaustively for strictly-bounded weights
// If I do this, can start with highest weights and zero out columns when I find a p-value I like maybe // If I do this, can start with highest weights and zero out columns when I find a p-value I like maybe
//3. hand-implement the maximum weighted matching algorithm to use arrays and be efficient //3. hand-implement the maximum weighted matching algorithm to use arrays and be efficient
// 3.5 Could also do a version where I make the graph one edge at a time, but from a 2D array so I have the
// weights already
// Tried it. It was way better than doing the graph directly, but slightly slower than the built-in
//4. Implement a discrete distribution function (along the lines of the alias method) //4. Implement a discrete distribution function (along the lines of the alias method)
// //
//With array method, could also try simply zeroing high-well-count peptides before graph creation //With array method, could also try simply zeroing high-well-count peptides before graph creation
@@ -87,6 +91,7 @@ public class Simulation {
//Using Integers instead of Strings to label vertices so I can do clever stuff with indices if I need to //Using Integers instead of Strings to label vertices so I can do clever stuff with indices if I need to
// when I refactor to use a 2d array to make the graph // when I refactor to use a 2d array to make the graph
//For the autogenerator, all vertices must have distinct numbers associated with them
Integer vertexStartValue = 0; Integer vertexStartValue = 0;
//keys are sequential integer vertices, values are alphas //keys are sequential integer vertices, values are alphas
Map<Integer, Integer> plateVtoAMap = getVertexToPeptideMap(allAlphas, vertexStartValue); Map<Integer, Integer> plateVtoAMap = getVertexToPeptideMap(allAlphas, vertexStartValue);
@@ -111,11 +116,50 @@ public class Simulation {
//If this is too slow, can make a 2d array and use the SimpleWeightedGraphMatrixGenerator class //If this is too slow, can make a 2d array and use the SimpleWeightedGraphMatrixGenerator class
Map<Integer, Integer> wellNAlphas = null; Map<Integer, Integer> wellNAlphas = null;
Map<Integer, Integer> wellNBetas = null; Map<Integer, Integer> wellNBetas = null;
SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph = new SimpleWeightedGraph<Integer, DefaultWeightedEdge>(DefaultWeightedEdge.class); SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph =
new SimpleWeightedGraph<Integer, DefaultWeightedEdge>(DefaultWeightedEdge.class);
//Flag in global variables determines if edges are added to graph one at a time directly, or if a graph //Flag in global variables determines if edges are added to graph one at a time directly, or if a graph
//is made all at once from a 2D array using the SimpleWeightedBipartiteGraphMatrixGenerator class //is made all at once from a 2D array using the SimpleWeightedBipartiteGraphMatrixGenerator class
if(!autogenerateGraphFromArray) { if(use2DArrayForGraph) {
double[][] weights = new double[plateVtoAMap.size()][plateVtoBMap.size()];
for (int n = 0; n < numWells; n++) {
wellNAlphas = samplePlate.assayWellsAlpha(n);
for(Integer a: wellNAlphas.keySet()){
alphaWellCounts.merge(a, 1, (oldValue, newValue) -> oldValue + newValue);
}
wellNBetas = samplePlate.assayWellsBeta(n);
for(Integer b: wellNBetas.keySet()){
betaWellCounts.merge(b, 1, (oldValue, newValue) -> oldValue + newValue);
}
for(Integer i: wellNAlphas.keySet()) {
for(Integer j: wellNBetas.keySet()){
weights[plateAtoVMap.get(i)][plateBtoVMap.get(j) - vertexStartValue] += 1.0;
}
}
}
if(useJGraphTGraphMatrixGenerator) {
SimpleWeightedBipartiteGraphMatrixGenerator graphGenerator = new SimpleWeightedBipartiteGraphMatrixGenerator();
List<Integer> alphaVertices = new ArrayList<>();
alphaVertices.addAll(plateVtoAMap.keySet()); //This will work because LinkedHashMap preserves order of entry
graphGenerator.first(alphaVertices);
List<Integer> betaVertices = new ArrayList<>();
betaVertices.addAll(plateVtoBMap.keySet());
graphGenerator.second(betaVertices); //This will work because LinkedHashMap preserves order of entry
graphGenerator.weights(weights);
graphGenerator.generateGraph(graph);
} else {
for(int i = 0; i < alphaCount; i++) {
for (int j = vertexStartValue; j < betaCount + vertexStartValue; j++){
graph.addVertex(i);
graph.addVertex(j);
addEdgeWithWeight(graph, i, (j), weights[i][j - vertexStartValue]);
}
}
}
}
else {
//construct graph //construct graph
//add vertices //add vertices
for(Integer v: plateVtoAMap.keySet()) { for(Integer v: plateVtoAMap.keySet()) {
@@ -142,44 +186,14 @@ public class Simulation {
} }
} }
} }
else {
//The above code works, but is too memory intensive. Going to try with a 2D array of doubles.
double[][] weights = new double[plateVtoAMap.size()][plateVtoBMap.size()];
for (int n = 0; n < numWells; n++) {
wellNAlphas = samplePlate.assayWellsAlpha(n);
for(Integer a: wellNAlphas.keySet()){
alphaWellCounts.merge(a, 1, (oldValue, newValue) -> oldValue + newValue);
}
wellNBetas = samplePlate.assayWellsBeta(n);
for(Integer b: wellNBetas.keySet()){
betaWellCounts.merge(b, 1, (oldValue, newValue) -> oldValue + newValue);
}
for(Integer i: wellNAlphas.keySet()) {
for(Integer j: wellNBetas.keySet()){
weights[plateAtoVMap.get(i)][plateBtoVMap.get(j) - vertexStartValue] += 1.0;
}
}
}
SimpleWeightedBipartiteGraphMatrixGenerator graphGenerator = new SimpleWeightedBipartiteGraphMatrixGenerator();
List<Integer> alphaVertices = new ArrayList<>();
alphaVertices.addAll(plateVtoAMap.keySet()); //This will work because LinkedHashMap preserves order of entry
graphGenerator.first(alphaVertices);
List<Integer> betaVertices = new ArrayList<>();
betaVertices.addAll(plateVtoBMap.keySet());
graphGenerator.second(betaVertices); //This will work because LinkedHashMap preserves order of entry
graphGenerator.weights(weights);
graphGenerator.generateGraph(graph);
}
//OUTPUT //OUTPUT
System.out.println("Graph created"); System.out.println("Graph created");
System.out.println("Finding maximum weighted matching"); System.out.println("Finding maximum weighted matching");
MaximumWeightBipartiteMatching maxWeightMatching = new MaximumWeightBipartiteMatching(graph, plateVtoAMap.keySet(), plateVtoBMap.keySet()); MaximumWeightBipartiteMatching maxWeightMatching =
new MaximumWeightBipartiteMatching(graph, plateVtoAMap.keySet(), plateVtoBMap.keySet());
MatchingAlgorithm.Matching<String, DefaultWeightedEdge> graphMatching = maxWeightMatching.getMatching(); MatchingAlgorithm.Matching<String, DefaultWeightedEdge> graphMatching = maxWeightMatching.getMatching();
Instant stop = Instant.now(); Instant stop = Instant.now();
@@ -195,7 +209,7 @@ public class Simulation {
boolean check = false; boolean check = false;
while(weightIter.hasNext()) { while(weightIter.hasNext()) {
e = weightIter.next(); e = weightIter.next();
if(graph.getEdgeWeight(e) <= lowThreshold || graph.getEdgeWeight(e) >= highThreshold) { if(graph.getEdgeWeight(e) < lowThreshold || graph.getEdgeWeight(e) > highThreshold) {
continue; continue;
} }
Integer source = graph.getEdgeSource(e); Integer source = graph.getEdgeSource(e);
@@ -234,6 +248,19 @@ public class Simulation {
System.out.println("Pairing error rate: " + expErr); System.out.println("Pairing error rate: " + expErr);
Duration time = Duration.between(start, stop); Duration time = Duration.between(start, stop);
System.out.println("Simulation time: " + nf.format(time.toSeconds()) + " seconds"); System.out.println("Simulation time: " + nf.format(time.toSeconds()) + " seconds");
if(use2DArrayForGraph) {
System.out.print("Graph generated from 2D array of weights");
if(useJGraphTGraphMatrixGenerator){
System.out.print(" using JGraphT's SimpleWeightedBipartiteGraphMatrixGenerator");
}
else {
System.out.print(" directly.");
}
System.out.println("");
}
else {
System.out.println("Graph generated directly from individual well maps");
}
System.out.println(""); System.out.println("");
@@ -257,6 +284,11 @@ public class Simulation {
return inverse; return inverse;
} }
private static void addEdgeWithWeight(Graph g, Integer v1, Integer v2, double weight) {
g.addEdge(v1, v2);
g.setEdgeWeight(v1, v2, weight);
}
private static void addEdgeOrWeight(Graph g, Integer v1, Integer v2) { private static void addEdgeOrWeight(Graph g, Integer v1, Integer v2) {
if (g.containsEdge(v1, v2)) { if (g.containsEdge(v1, v2)) {
g.setEdgeWeight(v1, v2, g.getEdgeWeight(g.getEdge(v1, v2)) + 1.0); g.setEdgeWeight(v1, v2, g.getEdgeWeight(g.getEdge(v1, v2)) + 1.0);
@@ -264,22 +296,22 @@ public class Simulation {
else { else {
g.addEdge(v1, v2); g.addEdge(v1, v2);
} }
//System.out.println("added!");
} }
private static boolean printData (Double edgeWeight, Integer source, Integer sourceCount, Integer target, Integer targetCount, Map<Integer,Integer> AtoBMap, Map<Integer,Integer> VtoAMap, Map<Integer, Integer> VtoBMap) { private static boolean printData (Double edgeWeight, Integer source, Integer sourceCount, Integer target,
//placeholder for real calculations Integer targetCount, Map<Integer,Integer> AtoBMap, Map<Integer,Integer> VtoAMap,
int trueCount = 0; Map<Integer, Integer> VtoBMap) {
int falseCount = 0;
System.out.println("Alpha peptide " + source + " present in " + sourceCount + " wells"); System.out.println("Alpha peptide " + source + " present in " + sourceCount + " wells");
System.out.println("Beta peptide " + target + " present in " + targetCount + " wells"); System.out.println("Beta peptide " + target + " present in " + targetCount + " wells");
System.out.println("Both alpha and beta present in " + edgeWeight + " wells"); System.out.println("Both alpha and beta present in " + edgeWeight + " wells");
if(VtoBMap.get(target).equals(AtoBMap.get(VtoAMap.get(source)))){ if(VtoBMap.get(target).equals(AtoBMap.get(VtoAMap.get(source)))){
System.out.println("True"); System.out.println("True");
System.out.println("p-value: " + Equations.pValue(numWells, sourceCount, targetCount, edgeWeight));
return true; return true;
} }
else { else {
System.out.println("False"); System.out.println("False");
System.out.println("p-value: " + Equations.pValue(numWells, sourceCount, targetCount, edgeWeight));
System.out.println("The real pair is " + source + " and " + AtoBMap.get(VtoAMap.get(source))); System.out.println("The real pair is " + source + " and " + AtoBMap.get(VtoAMap.get(source)));
return false; return false;
} }