From d39fdbee3b709fb99283f01e398ab2c8340b0408 Mon Sep 17 00:00:00 2001 From: efischer Date: Wed, 10 Nov 2021 11:18:31 -0600 Subject: [PATCH] Three ways of making graph, but [...]MatrixGenerator is fastest --- src/main/java/Equations.java | 24 +++++-- src/main/java/Plate.java | 2 + src/main/java/Simulation.java | 124 +++++++++++++++++++++------------- 3 files changed, 100 insertions(+), 50 deletions(-) diff --git a/src/main/java/Equations.java b/src/main/java/Equations.java index 3aa6b6f..471656d 100644 --- a/src/main/java/Equations.java +++ b/src/main/java/Equations.java @@ -1,13 +1,29 @@ +import java.math.BigDecimal; import java.math.BigInteger; +import java.math.MathContext; public abstract class Equations { - public static double pValue(Integer w, Integer w_a, Integer w_b, Integer w_ab) { - return 1.0; + public static double pValue(Integer w, Integer w_a, Integer w_b, double w_ab_d) { + int w_ab = (int) w_ab_d; + double pv = 0.0; + Integer maxOverlap = w_a >= w_b ? w_b : w_a; + for(int i = w_ab; i <= maxOverlap; i++){ + pv += probPairedByChance(w, i, w_a, w_b); + } + return pv; } - private static double probPairedByChanc(Integer w, Integer w_a, Integer w_b, Integer w_ab){ - return 1.0; + private static double probPairedByChance(Integer w, Integer w_a, Integer w_b, Integer w_ab){ + BigInteger numer1 = choose(w, w_ab); + BigInteger numer2 = choose(w - w_ab, w_a - w_ab); + BigInteger numer3 = choose(w - w_a, w_b - w_ab); + BigInteger numer = numer1.multiply(numer2.multiply(numer3)); + BigInteger denom = choose(w, w_a).multiply(choose(w, w_b)); + BigDecimal numer_d = new BigDecimal(numer); + BigDecimal denom_d = new BigDecimal(denom); + BigDecimal prob = numer_d.divide(denom_d, MathContext.DECIMAL64); + return prob.doubleValue(); } /* diff --git a/src/main/java/Plate.java b/src/main/java/Plate.java index 5b5d81a..c3f7a91 100644 --- a/src/main/java/Plate.java +++ b/src/main/java/Plate.java @@ -1,6 +1,8 @@ import java.sql.Array; import java.util.*; +//Need to write function to output plate to a file that I can read in. + public class Plate { private List> wells; private Random rand = new Random(); diff --git a/src/main/java/Simulation.java b/src/main/java/Simulation.java index 7300a00..6aeff78 100644 --- a/src/main/java/Simulation.java +++ b/src/main/java/Simulation.java @@ -14,15 +14,16 @@ import java.util.*; import java.util.stream.IntStream; public class Simulation { - private static Integer numDistinctCells = 4_000_000; - private static double stdDeviation = 400; + private static Integer numDistinctCells = 15_000_000; + private static double stdDeviation = 1000; //square root of numDistCells would approximate poisson dist, supposedly private static int numWells = 96; private static int numConcentrations = 1; private static double errorRate = 0.1; - private static int[] concentrations = {5000}; + private static int[] concentrations = {500}; private static int lowThreshold = 2; //min number of shared wells to attempt pairing - private static int highThreshold = numWells - 6; //max number of shared wells to attempt pairing - private static boolean autogenerateGraphFromArray = false; + private static int highThreshold = numWells - 3; //max number of shared wells to attempt pairing + private static boolean use2DArrayForGraph = true; //Doing this is much faster for larger graphs + private static boolean useJGraphTGraphMatrixGenerator = true; //fastest option public static void main(String[] args) { Instant start = Instant.now(); @@ -32,6 +33,9 @@ public class Simulation { //2. implement p-values and just check exhaustively for strictly-bounded weights // If I do this, can start with highest weights and zero out columns when I find a p-value I like maybe //3. hand-implement the maximum weighted matching algorithm to use arrays and be efficient + // 3.5 Could also do a version where I make the graph one edge at a time, but from a 2D array so I have the + // weights already + // Tried it. It was way better than doing the graph directly, but slightly slower than the built-in //4. Implement a discrete distribution function (along the lines of the alias method) // //With array method, could also try simply zeroing high-well-count peptides before graph creation @@ -87,6 +91,7 @@ public class Simulation { //Using Integers instead of Strings to label vertices so I can do clever stuff with indices if I need to // when I refactor to use a 2d array to make the graph + //For the autogenerator, all vertices must have distinct numbers associated with them Integer vertexStartValue = 0; //keys are sequential integer vertices, values are alphas Map plateVtoAMap = getVertexToPeptideMap(allAlphas, vertexStartValue); @@ -111,11 +116,50 @@ public class Simulation { //If this is too slow, can make a 2d array and use the SimpleWeightedGraphMatrixGenerator class Map wellNAlphas = null; Map wellNBetas = null; - SimpleWeightedGraph graph = new SimpleWeightedGraph(DefaultWeightedEdge.class); + SimpleWeightedGraph graph = + new SimpleWeightedGraph(DefaultWeightedEdge.class); //Flag in global variables determines if edges are added to graph one at a time directly, or if a graph //is made all at once from a 2D array using the SimpleWeightedBipartiteGraphMatrixGenerator class - if(!autogenerateGraphFromArray) { + if(use2DArrayForGraph) { + double[][] weights = new double[plateVtoAMap.size()][plateVtoBMap.size()]; + for (int n = 0; n < numWells; n++) { + wellNAlphas = samplePlate.assayWellsAlpha(n); + for(Integer a: wellNAlphas.keySet()){ + alphaWellCounts.merge(a, 1, (oldValue, newValue) -> oldValue + newValue); + } + wellNBetas = samplePlate.assayWellsBeta(n); + for(Integer b: wellNBetas.keySet()){ + betaWellCounts.merge(b, 1, (oldValue, newValue) -> oldValue + newValue); + } + + for(Integer i: wellNAlphas.keySet()) { + for(Integer j: wellNBetas.keySet()){ + weights[plateAtoVMap.get(i)][plateBtoVMap.get(j) - vertexStartValue] += 1.0; + } + } + } + if(useJGraphTGraphMatrixGenerator) { + SimpleWeightedBipartiteGraphMatrixGenerator graphGenerator = new SimpleWeightedBipartiteGraphMatrixGenerator(); + List alphaVertices = new ArrayList<>(); + alphaVertices.addAll(plateVtoAMap.keySet()); //This will work because LinkedHashMap preserves order of entry + graphGenerator.first(alphaVertices); + List betaVertices = new ArrayList<>(); + betaVertices.addAll(plateVtoBMap.keySet()); + graphGenerator.second(betaVertices); //This will work because LinkedHashMap preserves order of entry + graphGenerator.weights(weights); + graphGenerator.generateGraph(graph); + } else { + for(int i = 0; i < alphaCount; i++) { + for (int j = vertexStartValue; j < betaCount + vertexStartValue; j++){ + graph.addVertex(i); + graph.addVertex(j); + addEdgeWithWeight(graph, i, (j), weights[i][j - vertexStartValue]); + } + } + } + } + else { //construct graph //add vertices for(Integer v: plateVtoAMap.keySet()) { @@ -142,44 +186,14 @@ public class Simulation { } } } - else { - //The above code works, but is too memory intensive. Going to try with a 2D array of doubles. - double[][] weights = new double[plateVtoAMap.size()][plateVtoBMap.size()]; - for (int n = 0; n < numWells; n++) { - wellNAlphas = samplePlate.assayWellsAlpha(n); - for(Integer a: wellNAlphas.keySet()){ - alphaWellCounts.merge(a, 1, (oldValue, newValue) -> oldValue + newValue); - } - wellNBetas = samplePlate.assayWellsBeta(n); - for(Integer b: wellNBetas.keySet()){ - betaWellCounts.merge(b, 1, (oldValue, newValue) -> oldValue + newValue); - } - - for(Integer i: wellNAlphas.keySet()) { - for(Integer j: wellNBetas.keySet()){ - weights[plateAtoVMap.get(i)][plateBtoVMap.get(j) - vertexStartValue] += 1.0; - } - } - } - - SimpleWeightedBipartiteGraphMatrixGenerator graphGenerator = new SimpleWeightedBipartiteGraphMatrixGenerator(); - List alphaVertices = new ArrayList<>(); - alphaVertices.addAll(plateVtoAMap.keySet()); //This will work because LinkedHashMap preserves order of entry - graphGenerator.first(alphaVertices); - List betaVertices = new ArrayList<>(); - betaVertices.addAll(plateVtoBMap.keySet()); - graphGenerator.second(betaVertices); //This will work because LinkedHashMap preserves order of entry - graphGenerator.weights(weights); - graphGenerator.generateGraph(graph); - - - } //OUTPUT System.out.println("Graph created"); System.out.println("Finding maximum weighted matching"); - MaximumWeightBipartiteMatching maxWeightMatching = new MaximumWeightBipartiteMatching(graph, plateVtoAMap.keySet(), plateVtoBMap.keySet()); + MaximumWeightBipartiteMatching maxWeightMatching = + new MaximumWeightBipartiteMatching(graph, plateVtoAMap.keySet(), plateVtoBMap.keySet()); + MatchingAlgorithm.Matching graphMatching = maxWeightMatching.getMatching(); Instant stop = Instant.now(); @@ -195,7 +209,7 @@ public class Simulation { boolean check = false; while(weightIter.hasNext()) { e = weightIter.next(); - if(graph.getEdgeWeight(e) <= lowThreshold || graph.getEdgeWeight(e) >= highThreshold) { + if(graph.getEdgeWeight(e) < lowThreshold || graph.getEdgeWeight(e) > highThreshold) { continue; } Integer source = graph.getEdgeSource(e); @@ -234,6 +248,19 @@ public class Simulation { System.out.println("Pairing error rate: " + expErr); Duration time = Duration.between(start, stop); System.out.println("Simulation time: " + nf.format(time.toSeconds()) + " seconds"); + if(use2DArrayForGraph) { + System.out.print("Graph generated from 2D array of weights"); + if(useJGraphTGraphMatrixGenerator){ + System.out.print(" using JGraphT's SimpleWeightedBipartiteGraphMatrixGenerator"); + } + else { + System.out.print(" directly."); + } + System.out.println(""); + } + else { + System.out.println("Graph generated directly from individual well maps"); + } System.out.println(""); @@ -257,6 +284,11 @@ public class Simulation { return inverse; } + private static void addEdgeWithWeight(Graph g, Integer v1, Integer v2, double weight) { + g.addEdge(v1, v2); + g.setEdgeWeight(v1, v2, weight); + } + private static void addEdgeOrWeight(Graph g, Integer v1, Integer v2) { if (g.containsEdge(v1, v2)) { g.setEdgeWeight(v1, v2, g.getEdgeWeight(g.getEdge(v1, v2)) + 1.0); @@ -264,22 +296,22 @@ public class Simulation { else { g.addEdge(v1, v2); } - //System.out.println("added!"); } - private static boolean printData (Double edgeWeight, Integer source, Integer sourceCount, Integer target, Integer targetCount, Map AtoBMap, Map VtoAMap, Map VtoBMap) { - //placeholder for real calculations - int trueCount = 0; - int falseCount = 0; + private static boolean printData (Double edgeWeight, Integer source, Integer sourceCount, Integer target, + Integer targetCount, Map AtoBMap, Map VtoAMap, + Map VtoBMap) { System.out.println("Alpha peptide " + source + " present in " + sourceCount + " wells"); System.out.println("Beta peptide " + target + " present in " + targetCount + " wells"); System.out.println("Both alpha and beta present in " + edgeWeight + " wells"); if(VtoBMap.get(target).equals(AtoBMap.get(VtoAMap.get(source)))){ System.out.println("True"); + System.out.println("p-value: " + Equations.pValue(numWells, sourceCount, targetCount, edgeWeight)); return true; } else { System.out.println("False"); + System.out.println("p-value: " + Equations.pValue(numWells, sourceCount, targetCount, edgeWeight)); System.out.println("The real pair is " + source + " and " + AtoBMap.get(VtoAMap.get(source))); return false; }