Two ways of making graph, fixed bug with thresholds
This commit is contained in:
@@ -1,6 +1,25 @@
|
||||
import java.math.BigInteger;
|
||||
|
||||
public abstract class Equations {
|
||||
|
||||
public static double pValue(Integer w, Integer w_a, Integer w_b, Integer w_ab) {
|
||||
return 1.0;
|
||||
}
|
||||
|
||||
private static double probPairedByChanc(Integer w, Integer w_a, Integer w_b, Integer w_ab){
|
||||
return 1.0;
|
||||
}
|
||||
|
||||
/*
|
||||
* This works because nC(k+1) = nCk * (n-k)/(k+1)
|
||||
* Since nC0 = 1, can start there and generate all the rest.
|
||||
*/
|
||||
public static BigInteger choose(final int N, final int K) {
|
||||
BigInteger nCk = BigInteger.ONE;
|
||||
for (int k = 0; k < K; k++) {
|
||||
nCk = nCk.multiply(BigInteger.valueOf(N-k))
|
||||
.divide(BigInteger.valueOf(k+1));
|
||||
}
|
||||
return nCk;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -15,18 +15,28 @@ import java.util.stream.IntStream;
|
||||
|
||||
public class Simulation {
|
||||
private static Integer numDistinctCells = 4_000_000;
|
||||
private static double stdDeviation;
|
||||
private static double stdDeviation = 400;
|
||||
private static int numWells = 96;
|
||||
private static int numConcentrations = 1;
|
||||
private static double errorRate = 0.1;
|
||||
private static int[] concentrations = {50, 500, 5000};
|
||||
private static int[] concentrations = {5000};
|
||||
private static int lowThreshold = 2; //min number of shared wells to attempt pairing
|
||||
private static int highThreshold = 96; //max number of shared wells to attempt pairing
|
||||
private static int highThreshold = numWells - 6; //max number of shared wells to attempt pairing
|
||||
private static boolean autogenerateGraphFromArray = false;
|
||||
|
||||
public static void main(String[] args) {
|
||||
Instant start = Instant.now();
|
||||
|
||||
stdDeviation = Math.sqrt(numDistinctCells.doubleValue());
|
||||
//Four things to try to improve this
|
||||
//1. Run it on hardware with more memory
|
||||
//2. implement p-values and just check exhaustively for strictly-bounded weights
|
||||
// If I do this, can start with highest weights and zero out columns when I find a p-value I like maybe
|
||||
//3. hand-implement the maximum weighted matching algorithm to use arrays and be efficient
|
||||
//4. Implement a discrete distribution function (along the lines of the alias method)
|
||||
//
|
||||
//With array method, could also try simply zeroing high-well-count peptides before graph creation
|
||||
|
||||
//stdDeviation = Math.sqrt(numDistinctCells.doubleValue());
|
||||
//concentrations = new int[numConcentrations];
|
||||
|
||||
//OUTPUT
|
||||
@@ -93,82 +103,77 @@ public class Simulation {
|
||||
System.out.println("Maps made");
|
||||
System.out.println("Creating Graph");
|
||||
|
||||
|
||||
//construct graph
|
||||
SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph = new SimpleWeightedGraph<Integer, DefaultWeightedEdge>(DefaultWeightedEdge.class);
|
||||
//add vertices
|
||||
for(Integer v: plateVtoAMap.keySet()) {
|
||||
graph.addVertex(v);
|
||||
}
|
||||
for(Integer v: plateVtoBMap.keySet()) {
|
||||
graph.addVertex(v);
|
||||
}
|
||||
|
||||
|
||||
//Count how many wells each alpha appears in
|
||||
Map<Integer, Integer> alphaWellCounts = new HashMap<>();
|
||||
//count how many wells each beta appears in
|
||||
Map<Integer, Integer> betaWellCounts = new HashMap<>();
|
||||
//add edges, where weights are number of wells the peptides share in common.
|
||||
//If this is too slow, can make a 2d array and use the SimpleWeightedGraphMatrixGenerator class
|
||||
Map<Integer, Integer> wellNAlphas = null;
|
||||
Map<Integer, Integer> wellNBetas = null;
|
||||
//Count how many wells each alpha appears in
|
||||
Map<Integer, Integer> alphaWellCounts = new HashMap<>();
|
||||
//count how many wells each beta appears in
|
||||
Map<Integer, Integer> betaWellCounts = new HashMap<>();
|
||||
for (int n = 0; n < numWells; n++) {
|
||||
wellNAlphas = samplePlate.assayWellsAlpha(n);
|
||||
for(Integer a: wellNAlphas.keySet()){
|
||||
alphaWellCounts.merge(a, 1, (oldValue, newValue) -> oldValue + newValue);
|
||||
}
|
||||
wellNBetas = samplePlate.assayWellsBeta(n);
|
||||
for(Integer b: wellNBetas.keySet()){
|
||||
betaWellCounts.merge(b, 1, (oldValue, newValue) -> oldValue + newValue);
|
||||
}
|
||||
|
||||
for(Integer i: wellNAlphas.keySet()) {
|
||||
for(Integer j: wellNBetas.keySet()){
|
||||
addEdgeOrWeight(graph, plateAtoVMap.get(i), plateBtoVMap.get(j));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
//The above code works, but is too memory intensive. Going to try with a 2D array of doubles.
|
||||
double[][] weights = new double[plateVtoAMap.size()][plateVtoBMap.size()];
|
||||
Map<Integer, Integer> wellNAlphas = null;
|
||||
Map<Integer, Integer> wellNBetas = null;
|
||||
//Count how many wells each alpha appears in
|
||||
Map<Integer, Integer> alphaWellCounts = new HashMap<>();
|
||||
//count how many wells each beta appears in
|
||||
Map<Integer, Integer> betaWellCounts = new HashMap<>();
|
||||
for (int n = 0; n < numWells; n++) {
|
||||
wellNAlphas = samplePlate.assayWellsAlpha(n);
|
||||
for(Integer a: wellNAlphas.keySet()){
|
||||
alphaWellCounts.merge(a, 1, (oldValue, newValue) -> oldValue + newValue);
|
||||
}
|
||||
wellNBetas = samplePlate.assayWellsBeta(n);
|
||||
for(Integer b: wellNBetas.keySet()){
|
||||
betaWellCounts.merge(b, 1, (oldValue, newValue) -> oldValue + newValue);
|
||||
}
|
||||
|
||||
for(Integer i: wellNAlphas.keySet()) {
|
||||
for(Integer j: wellNBetas.keySet()){
|
||||
weights[plateAtoVMap.get(i)][plateBtoVMap.get(j) - vertexStartValue] += 1.0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
SimpleWeightedBipartiteGraphMatrixGenerator graphGenerator = new SimpleWeightedBipartiteGraphMatrixGenerator();
|
||||
SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph = new SimpleWeightedGraph<Integer, DefaultWeightedEdge>(DefaultWeightedEdge.class);
|
||||
List<Integer> alphaVertices = new ArrayList<>();
|
||||
alphaVertices.addAll(plateVtoAMap.keySet()); //This will work because LinkedHashMap preserves order of entry
|
||||
graphGenerator.first(alphaVertices);
|
||||
List<Integer> betaVertices = new ArrayList<>();
|
||||
betaVertices.addAll(plateVtoBMap.keySet());
|
||||
graphGenerator.second(betaVertices); //This will work because LinkedHashMap preserves order of entry
|
||||
graphGenerator.weights(weights);
|
||||
graphGenerator.generateGraph(graph);
|
||||
|
||||
*/
|
||||
//Flag in global variables determines if edges are added to graph one at a time directly, or if a graph
|
||||
//is made all at once from a 2D array using the SimpleWeightedBipartiteGraphMatrixGenerator class
|
||||
if(!autogenerateGraphFromArray) {
|
||||
//construct graph
|
||||
//add vertices
|
||||
for(Integer v: plateVtoAMap.keySet()) {
|
||||
graph.addVertex(v);
|
||||
}
|
||||
for(Integer v: plateVtoBMap.keySet()) {
|
||||
graph.addVertex(v);
|
||||
}
|
||||
|
||||
for (int n = 0; n < numWells; n++) {
|
||||
wellNAlphas = samplePlate.assayWellsAlpha(n);
|
||||
for (Integer a : wellNAlphas.keySet()) {
|
||||
alphaWellCounts.merge(a, 1, (oldValue, newValue) -> oldValue + newValue);
|
||||
}
|
||||
wellNBetas = samplePlate.assayWellsBeta(n);
|
||||
for (Integer b : wellNBetas.keySet()) {
|
||||
betaWellCounts.merge(b, 1, (oldValue, newValue) -> oldValue + newValue);
|
||||
}
|
||||
|
||||
for (Integer i : wellNAlphas.keySet()) {
|
||||
for (Integer j : wellNBetas.keySet()) {
|
||||
addEdgeOrWeight(graph, plateAtoVMap.get(i), plateBtoVMap.get(j));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
//The above code works, but is too memory intensive. Going to try with a 2D array of doubles.
|
||||
double[][] weights = new double[plateVtoAMap.size()][plateVtoBMap.size()];
|
||||
for (int n = 0; n < numWells; n++) {
|
||||
wellNAlphas = samplePlate.assayWellsAlpha(n);
|
||||
for(Integer a: wellNAlphas.keySet()){
|
||||
alphaWellCounts.merge(a, 1, (oldValue, newValue) -> oldValue + newValue);
|
||||
}
|
||||
wellNBetas = samplePlate.assayWellsBeta(n);
|
||||
for(Integer b: wellNBetas.keySet()){
|
||||
betaWellCounts.merge(b, 1, (oldValue, newValue) -> oldValue + newValue);
|
||||
}
|
||||
|
||||
for(Integer i: wellNAlphas.keySet()) {
|
||||
for(Integer j: wellNBetas.keySet()){
|
||||
weights[plateAtoVMap.get(i)][plateBtoVMap.get(j) - vertexStartValue] += 1.0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
SimpleWeightedBipartiteGraphMatrixGenerator graphGenerator = new SimpleWeightedBipartiteGraphMatrixGenerator();
|
||||
List<Integer> alphaVertices = new ArrayList<>();
|
||||
alphaVertices.addAll(plateVtoAMap.keySet()); //This will work because LinkedHashMap preserves order of entry
|
||||
graphGenerator.first(alphaVertices);
|
||||
List<Integer> betaVertices = new ArrayList<>();
|
||||
betaVertices.addAll(plateVtoBMap.keySet());
|
||||
graphGenerator.second(betaVertices); //This will work because LinkedHashMap preserves order of entry
|
||||
graphGenerator.weights(weights);
|
||||
graphGenerator.generateGraph(graph);
|
||||
|
||||
|
||||
}
|
||||
|
||||
//OUTPUT
|
||||
System.out.println("Graph created");
|
||||
@@ -190,13 +195,13 @@ public class Simulation {
|
||||
boolean check = false;
|
||||
while(weightIter.hasNext()) {
|
||||
e = weightIter.next();
|
||||
if(graph.getEdgeWeight(e) <= 2.0) {
|
||||
if(graph.getEdgeWeight(e) <= lowThreshold || graph.getEdgeWeight(e) >= highThreshold) {
|
||||
continue;
|
||||
}
|
||||
Integer source = graph.getEdgeSource(e);
|
||||
Integer target = graph.getEdgeTarget(e);
|
||||
//Need map of each peptide and number of wells it appears in.
|
||||
check =printData(graph.getEdgeWeight(e), source, alphaWellCounts.get(plateVtoAMap.get(source)), target, betaWellCounts.get(plateVtoBMap.get(target)), distCellsMapAlphaKey, plateVtoAMap, plateVtoBMap);
|
||||
check = printData(graph.getEdgeWeight(e), source, alphaWellCounts.get(plateVtoAMap.get(source)), target, betaWellCounts.get(plateVtoBMap.get(target)), distCellsMapAlphaKey, plateVtoAMap, plateVtoBMap);
|
||||
if(check) {
|
||||
trueCount++;
|
||||
}
|
||||
@@ -209,6 +214,7 @@ public class Simulation {
|
||||
NumberFormat nf = NumberFormat.getInstance(Locale.US);
|
||||
System.out.println("");
|
||||
System.out.println("Distinct cell population: " + nf.format(numDistinctCells));
|
||||
System.out.println("1 standard deviation of distribution: " + stdDeviation + " most common cells");
|
||||
System.out.print("Sample plate: " + numWells +" wells, ");
|
||||
System.out.print("divided into " + concentrations.length + " sections of ");
|
||||
System.out.println(Arrays.toString(concentrations) + " cells per well.");
|
||||
|
||||
Reference in New Issue
Block a user