Two ways of making graph, fixed bug with thresholds

This commit is contained in:
2021-11-10 09:15:03 -06:00
parent 8280382ca7
commit 93cac1d38a
2 changed files with 100 additions and 75 deletions

View File

@@ -1,6 +1,25 @@
import java.math.BigInteger;
public abstract class Equations {
public static double pValue(Integer w, Integer w_a, Integer w_b, Integer w_ab) {
return 1.0;
}
private static double probPairedByChanc(Integer w, Integer w_a, Integer w_b, Integer w_ab){
return 1.0;
}
/*
* This works because nC(k+1) = nCk * (n-k)/(k+1)
* Since nC0 = 1, can start there and generate all the rest.
*/
public static BigInteger choose(final int N, final int K) {
BigInteger nCk = BigInteger.ONE;
for (int k = 0; k < K; k++) {
nCk = nCk.multiply(BigInteger.valueOf(N-k))
.divide(BigInteger.valueOf(k+1));
}
return nCk;
}
}

View File

@@ -15,18 +15,28 @@ import java.util.stream.IntStream;
public class Simulation {
private static Integer numDistinctCells = 4_000_000;
private static double stdDeviation;
private static double stdDeviation = 400;
private static int numWells = 96;
private static int numConcentrations = 1;
private static double errorRate = 0.1;
private static int[] concentrations = {50, 500, 5000};
private static int[] concentrations = {5000};
private static int lowThreshold = 2; //min number of shared wells to attempt pairing
private static int highThreshold = 96; //max number of shared wells to attempt pairing
private static int highThreshold = numWells - 6; //max number of shared wells to attempt pairing
private static boolean autogenerateGraphFromArray = false;
public static void main(String[] args) {
Instant start = Instant.now();
stdDeviation = Math.sqrt(numDistinctCells.doubleValue());
//Four things to try to improve this
//1. Run it on hardware with more memory
//2. implement p-values and just check exhaustively for strictly-bounded weights
// If I do this, can start with highest weights and zero out columns when I find a p-value I like maybe
//3. hand-implement the maximum weighted matching algorithm to use arrays and be efficient
//4. Implement a discrete distribution function (along the lines of the alias method)
//
//With array method, could also try simply zeroing high-well-count peptides before graph creation
//stdDeviation = Math.sqrt(numDistinctCells.doubleValue());
//concentrations = new int[numConcentrations];
//OUTPUT
@@ -93,82 +103,77 @@ public class Simulation {
System.out.println("Maps made");
System.out.println("Creating Graph");
//construct graph
SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph = new SimpleWeightedGraph<Integer, DefaultWeightedEdge>(DefaultWeightedEdge.class);
//add vertices
for(Integer v: plateVtoAMap.keySet()) {
graph.addVertex(v);
}
for(Integer v: plateVtoBMap.keySet()) {
graph.addVertex(v);
}
//Count how many wells each alpha appears in
Map<Integer, Integer> alphaWellCounts = new HashMap<>();
//count how many wells each beta appears in
Map<Integer, Integer> betaWellCounts = new HashMap<>();
//add edges, where weights are number of wells the peptides share in common.
//If this is too slow, can make a 2d array and use the SimpleWeightedGraphMatrixGenerator class
Map<Integer, Integer> wellNAlphas = null;
Map<Integer, Integer> wellNBetas = null;
//Count how many wells each alpha appears in
Map<Integer, Integer> alphaWellCounts = new HashMap<>();
//count how many wells each beta appears in
Map<Integer, Integer> betaWellCounts = new HashMap<>();
for (int n = 0; n < numWells; n++) {
wellNAlphas = samplePlate.assayWellsAlpha(n);
for(Integer a: wellNAlphas.keySet()){
alphaWellCounts.merge(a, 1, (oldValue, newValue) -> oldValue + newValue);
}
wellNBetas = samplePlate.assayWellsBeta(n);
for(Integer b: wellNBetas.keySet()){
betaWellCounts.merge(b, 1, (oldValue, newValue) -> oldValue + newValue);
}
for(Integer i: wellNAlphas.keySet()) {
for(Integer j: wellNBetas.keySet()){
addEdgeOrWeight(graph, plateAtoVMap.get(i), plateBtoVMap.get(j));
}
}
}
/*
//The above code works, but is too memory intensive. Going to try with a 2D array of doubles.
double[][] weights = new double[plateVtoAMap.size()][plateVtoBMap.size()];
Map<Integer, Integer> wellNAlphas = null;
Map<Integer, Integer> wellNBetas = null;
//Count how many wells each alpha appears in
Map<Integer, Integer> alphaWellCounts = new HashMap<>();
//count how many wells each beta appears in
Map<Integer, Integer> betaWellCounts = new HashMap<>();
for (int n = 0; n < numWells; n++) {
wellNAlphas = samplePlate.assayWellsAlpha(n);
for(Integer a: wellNAlphas.keySet()){
alphaWellCounts.merge(a, 1, (oldValue, newValue) -> oldValue + newValue);
}
wellNBetas = samplePlate.assayWellsBeta(n);
for(Integer b: wellNBetas.keySet()){
betaWellCounts.merge(b, 1, (oldValue, newValue) -> oldValue + newValue);
}
for(Integer i: wellNAlphas.keySet()) {
for(Integer j: wellNBetas.keySet()){
weights[plateAtoVMap.get(i)][plateBtoVMap.get(j) - vertexStartValue] += 1.0;
}
}
}
SimpleWeightedBipartiteGraphMatrixGenerator graphGenerator = new SimpleWeightedBipartiteGraphMatrixGenerator();
SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph = new SimpleWeightedGraph<Integer, DefaultWeightedEdge>(DefaultWeightedEdge.class);
List<Integer> alphaVertices = new ArrayList<>();
alphaVertices.addAll(plateVtoAMap.keySet()); //This will work because LinkedHashMap preserves order of entry
graphGenerator.first(alphaVertices);
List<Integer> betaVertices = new ArrayList<>();
betaVertices.addAll(plateVtoBMap.keySet());
graphGenerator.second(betaVertices); //This will work because LinkedHashMap preserves order of entry
graphGenerator.weights(weights);
graphGenerator.generateGraph(graph);
*/
//Flag in global variables determines if edges are added to graph one at a time directly, or if a graph
//is made all at once from a 2D array using the SimpleWeightedBipartiteGraphMatrixGenerator class
if(!autogenerateGraphFromArray) {
//construct graph
//add vertices
for(Integer v: plateVtoAMap.keySet()) {
graph.addVertex(v);
}
for(Integer v: plateVtoBMap.keySet()) {
graph.addVertex(v);
}
for (int n = 0; n < numWells; n++) {
wellNAlphas = samplePlate.assayWellsAlpha(n);
for (Integer a : wellNAlphas.keySet()) {
alphaWellCounts.merge(a, 1, (oldValue, newValue) -> oldValue + newValue);
}
wellNBetas = samplePlate.assayWellsBeta(n);
for (Integer b : wellNBetas.keySet()) {
betaWellCounts.merge(b, 1, (oldValue, newValue) -> oldValue + newValue);
}
for (Integer i : wellNAlphas.keySet()) {
for (Integer j : wellNBetas.keySet()) {
addEdgeOrWeight(graph, plateAtoVMap.get(i), plateBtoVMap.get(j));
}
}
}
}
else {
//The above code works, but is too memory intensive. Going to try with a 2D array of doubles.
double[][] weights = new double[plateVtoAMap.size()][plateVtoBMap.size()];
for (int n = 0; n < numWells; n++) {
wellNAlphas = samplePlate.assayWellsAlpha(n);
for(Integer a: wellNAlphas.keySet()){
alphaWellCounts.merge(a, 1, (oldValue, newValue) -> oldValue + newValue);
}
wellNBetas = samplePlate.assayWellsBeta(n);
for(Integer b: wellNBetas.keySet()){
betaWellCounts.merge(b, 1, (oldValue, newValue) -> oldValue + newValue);
}
for(Integer i: wellNAlphas.keySet()) {
for(Integer j: wellNBetas.keySet()){
weights[plateAtoVMap.get(i)][plateBtoVMap.get(j) - vertexStartValue] += 1.0;
}
}
}
SimpleWeightedBipartiteGraphMatrixGenerator graphGenerator = new SimpleWeightedBipartiteGraphMatrixGenerator();
List<Integer> alphaVertices = new ArrayList<>();
alphaVertices.addAll(plateVtoAMap.keySet()); //This will work because LinkedHashMap preserves order of entry
graphGenerator.first(alphaVertices);
List<Integer> betaVertices = new ArrayList<>();
betaVertices.addAll(plateVtoBMap.keySet());
graphGenerator.second(betaVertices); //This will work because LinkedHashMap preserves order of entry
graphGenerator.weights(weights);
graphGenerator.generateGraph(graph);
}
//OUTPUT
System.out.println("Graph created");
@@ -190,13 +195,13 @@ public class Simulation {
boolean check = false;
while(weightIter.hasNext()) {
e = weightIter.next();
if(graph.getEdgeWeight(e) <= 2.0) {
if(graph.getEdgeWeight(e) <= lowThreshold || graph.getEdgeWeight(e) >= highThreshold) {
continue;
}
Integer source = graph.getEdgeSource(e);
Integer target = graph.getEdgeTarget(e);
//Need map of each peptide and number of wells it appears in.
check =printData(graph.getEdgeWeight(e), source, alphaWellCounts.get(plateVtoAMap.get(source)), target, betaWellCounts.get(plateVtoBMap.get(target)), distCellsMapAlphaKey, plateVtoAMap, plateVtoBMap);
check = printData(graph.getEdgeWeight(e), source, alphaWellCounts.get(plateVtoAMap.get(source)), target, betaWellCounts.get(plateVtoBMap.get(target)), distCellsMapAlphaKey, plateVtoAMap, plateVtoBMap);
if(check) {
trueCount++;
}
@@ -209,6 +214,7 @@ public class Simulation {
NumberFormat nf = NumberFormat.getInstance(Locale.US);
System.out.println("");
System.out.println("Distinct cell population: " + nf.format(numDistinctCells));
System.out.println("1 standard deviation of distribution: " + stdDeviation + " most common cells");
System.out.print("Sample plate: " + numWells +" wells, ");
System.out.print("divided into " + concentrations.length + " sections of ");
System.out.println(Arrays.toString(concentrations) + " cells per well.");