All working, able to be built to .jar

This commit is contained in:
2021-11-12 10:41:44 -06:00
parent d39fdbee3b
commit e15cbc6672
21 changed files with 1003 additions and 12 deletions

View File

@@ -0,0 +1,494 @@
import org.jgrapht.Graph;
import org.jgrapht.alg.interfaces.MatchingAlgorithm;
import org.jgrapht.alg.matching.MaximumWeightBipartiteMatching;
import org.jgrapht.generate.SimpleWeightedBipartiteGraphMatrixGenerator;
import org.jgrapht.graph.DefaultWeightedEdge;
import org.jgrapht.graph.SimpleWeightedGraph;
import java.math.BigDecimal;
import java.math.MathContext;
import java.text.NumberFormat;
import java.time.Duration;
import java.time.Instant;
import java.util.*;
import java.util.stream.IntStream;
public class Simulator {
private static Integer numDistinctCells = 2_000_000;
private static double stdDeviation = 200; //square root of numDistCells would approximate poisson dist
private static int numWells = 96;
private static int numConcentrations = 1;
private static double errorRate = 0.1;
private static Integer[] concentrations = {500};
private static int lowThreshold = 2; //min number of shared wells to attempt pairing
private static int highThreshold = numWells - 3; //max number of shared wells to attempt pairing
private static boolean use2DArrayForGraph = true; //Doing this is much faster for larger graphs
private static boolean useJGraphTGraphMatrixGenerator = true; //fastest option
public static CellSample generateCellSample(Integer numDistinctCells) {
List<Integer> numbers = new ArrayList<>();
IntStream.range(1, (2 * numDistinctCells) + 1).forEach(i -> numbers.add(i));
Collections.shuffle(numbers);
//Each cell represented by two numbers from the random permutation
//These represent unique alpha and beta peptides
List<Integer[]> distinctCells = new ArrayList<>();
for(int i = 0; i < numbers.size() - 1; i = i + 2) {
Integer tmp1 = numbers.get(i);
Integer tmp2 = numbers.get(i+1);
Integer[] tmp = {tmp1, tmp2};
distinctCells.add(tmp);
}
return new CellSample(distinctCells);
}
public static void matchCells(String filename, List<Integer[]> distinctCells, Plate samplePlate, Integer lowThreshold, Integer highThreshold){
System.out.println("Cells: " + distinctCells.size());
System.out.println("Making cell maps");
//HashMap keyed to Alphas, values Betas
Map<Integer, Integer> distCellsMapAlphaKey = new HashMap<>();
for (Integer[] cell : distinctCells) {
distCellsMapAlphaKey.put(cell[0], cell[1]);
}
//HashMap keyed to Betas, values Alphas
Map<Integer, Integer> distCellsMapBetaKey = new HashMap<>();
for (Integer[] cell : distinctCells) {
distCellsMapBetaKey.put(cell[1], cell[0]);
}
System.out.println("Cell maps made");
System.out.println("Making well maps");
Map<Integer, Integer> allAlphas = samplePlate.assayWellsAlpha();
Map<Integer, Integer> allBetas = samplePlate.assayWellsBeta();
int alphaCount = allAlphas.size();
System.out.println("all alphas count: " + alphaCount);
int betaCount = allBetas.size();
System.out.println("all betas count: " + betaCount);
System.out.println("Well maps made");
System.out.println("Making vertex maps");
//Using Integers instead of Strings to label vertices so I can do clever stuff with indices if I need to
// when I refactor to use a 2d array to make the graph
//For the autogenerator, all vertices must have distinct numbers associated with them
Integer vertexStartValue = 0;
//keys are sequential integer vertices, values are alphas
Map<Integer, Integer> plateVtoAMap = getVertexToPeptideMap(allAlphas, vertexStartValue);
//New start value for vertex to beta map should be one more than final vertex value in alpha map
vertexStartValue += plateVtoAMap.size();
//keys are sequential integers vertices, values are betas
Map<Integer, Integer> plateVtoBMap = getVertexToPeptideMap(allBetas, vertexStartValue);
//keys are alphas, values are sequential integer vertices from previous map
Map<Integer, Integer> plateAtoVMap = invertVertexMap(plateVtoAMap);
System.out.println(plateAtoVMap.size());
//keys are betas, values are sequential integer vertices from previous map
Map<Integer, Integer> plateBtoVMap = invertVertexMap(plateVtoBMap);
System.out.println(plateAtoVMap.size());
System.out.println("Vertex maps made");
System.out.println("Creating Graph");
//Count how many wells each alpha appears in
Map<Integer, Integer> alphaWellCounts = new HashMap<>();
//count how many wells each beta appears in
Map<Integer, Integer> betaWellCounts = new HashMap<>();
//add edges, where weights are number of wells the peptides share in common.
//If this is too slow, can make a 2d array and use the SimpleWeightedGraphMatrixGenerator class
Map<Integer, Integer> wellNAlphas = null;
Map<Integer, Integer> wellNBetas = null;
SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph =
new SimpleWeightedGraph<>(DefaultWeightedEdge.class);
double[][] weights = new double[plateVtoAMap.size()][plateVtoBMap.size()];
for (int n = 0; n < numWells; n++) {
wellNAlphas = samplePlate.assayWellsAlpha(n);
for (Integer a : wellNAlphas.keySet()) {
alphaWellCounts.merge(a, 1, (oldValue, newValue) -> oldValue + newValue);
}
wellNBetas = samplePlate.assayWellsBeta(n);
for (Integer b : wellNBetas.keySet()) {
betaWellCounts.merge(b, 1, (oldValue, newValue) -> oldValue + newValue);
}
for (Integer i : wellNAlphas.keySet()) {
for (Integer j : wellNBetas.keySet()) {
weights[plateAtoVMap.get(i)][plateBtoVMap.get(j) - vertexStartValue] += 1.0;
}
}
}
SimpleWeightedBipartiteGraphMatrixGenerator graphGenerator = new SimpleWeightedBipartiteGraphMatrixGenerator();
List<Integer> alphaVertices = new ArrayList<>();
alphaVertices.addAll(plateVtoAMap.keySet()); //This will work because LinkedHashMap preserves order of entry
graphGenerator.first(alphaVertices);
List<Integer> betaVertices = new ArrayList<>();
betaVertices.addAll(plateVtoBMap.keySet());
graphGenerator.second(betaVertices); //This will work because LinkedHashMap preserves order of entry
graphGenerator.weights(weights);
graphGenerator.generateGraph(graph);
System.out.println("Graph created");
System.out.println("Finding maximum weighted matching");
MaximumWeightBipartiteMatching maxWeightMatching =
new MaximumWeightBipartiteMatching(graph, plateVtoAMap.keySet(), plateVtoBMap.keySet());
MatchingAlgorithm.Matching<String, DefaultWeightedEdge> graphMatching = maxWeightMatching.getMatching();
System.out.println("Matching completed");
//Header for CSV file
List<String> header = new ArrayList<>();
header.add("Alpha");
header.add("Alpha well count");
header.add("Beta");
header.add("Beta well count");
header.add("Overlap well count");
header.add("Matched correctly?");
header.add("P-value");
//Results for csv file
List<List<String>> allResults = new ArrayList<>();
int size = samplePlate.getSize();
Iterator<DefaultWeightedEdge> weightIter = graphMatching.iterator();
DefaultWeightedEdge e = null;
int trueCount = 0;
int falseCount = 0;
boolean check = false;
while(weightIter.hasNext()) {
e = weightIter.next();
if(graph.getEdgeWeight(e) < lowThreshold || graph.getEdgeWeight(e) > highThreshold) {
continue;
}
Integer source = graph.getEdgeSource(e);
Integer target = graph.getEdgeTarget(e);
check = plateVtoBMap.get(target).equals(distCellsMapAlphaKey.get(plateVtoAMap.get(source)));
if(check) {
trueCount++;
}
else {
falseCount++;
}
List<String> result = new ArrayList<>();
result.add(plateVtoAMap.get(source).toString());
//alpha well count
result.add(alphaWellCounts.get(plateVtoAMap.get(source)).toString());
result.add(plateVtoBMap.get(target).toString());
//beta well count
result.add(betaWellCounts.get(plateVtoBMap.get(target)).toString());
//overlap count
result.add(Double.toString(graph.getEdgeWeight(e)));
result.add(Boolean.toString(check));
result.add(Double.toString(Equations.pValue(size, alphaWellCounts.get(plateVtoAMap.get(source)),
betaWellCounts.get(plateVtoBMap.get(target)), graph.getEdgeWeight(e))));
allResults.add(result);
}
//Metadate comments for CSV file
int min = alphaCount > betaCount ? betaCount : alphaCount;
double attemptRate = (double) (trueCount + falseCount) / min;
double pairingErrorRate = (double) falseCount / (trueCount + falseCount);
List<String> comments = new ArrayList<>();
comments.add("Total alphas found: " + alphaCount);
comments.add("Total betas found: " + betaCount);
comments.add("Pairing attempt rate: " + attemptRate);
comments.add("Correct pairings: " + trueCount);
comments.add("Incorrect pairings: " + falseCount);
comments.add("Pairing error rate: " + pairingErrorRate);
//result writer
MatchingFileWriter writer = new MatchingFileWriter(filename, comments, header, allResults);
writer.writeResultsToFile();
}
public static void Simulate() {
Instant start = Instant.now();
//Four things to try to improve this
//1. Run it on hardware with more memory
//2. implement p-values and just check exhaustively for strictly-bounded weights
// If I do this, can start with highest weights and zero out columns when I find a p-value I like maybe
//3. hand-implement the maximum weighted matching algorithm to use arrays and be efficient
// 3.5 Could also do a version where I make the graph one edge at a time, but from a 2D array so I have the
// weights already
// Tried it. It was way better than doing the graph directly, but slightly slower than the built-in
//4. Implement a discrete distribution function (along the lines of the alias method)
//
//With array method, could also try simply zeroing high-well-count peptides before graph creation
//stdDeviation = Math.sqrt(numDistinctCells.doubleValue());
//concentrations = new int[numConcentrations];
//OUTPUT
System.out.println("Generating cells");
List<Integer> numbers = new ArrayList<>();
IntStream.range(1, (2 * numDistinctCells) + 1).forEach(i -> numbers.add(i));
Collections.shuffle(numbers);
//Each cell represented by two numbers from the random permutation
//These represent unique alpha and beta peptides
List<Integer[]> distinctCells = new ArrayList<>();
for(int i = 0; i < numbers.size() - 1; i = i + 2) {
Integer tmp1 = numbers.get(i);
Integer tmp2 = numbers.get(i+1);
Integer[] tmp = {tmp1, tmp2};
distinctCells.add(tmp);
}
//OUTPUT
System.out.println("Cells generated");
System.out.println("Filling wells");
//HashMap keyed to Alphas, values Betas
Map<Integer, Integer> distCellsMapAlphaKey = new HashMap<>();
for (Integer[] cell : distinctCells) {
distCellsMapAlphaKey.put(cell[0], cell[1]);
}
//HashMap keyed to Betas, values Alphas
Map<Integer, Integer> distCellsMapBetaKey = new HashMap<>();
for (Integer[] cell : distinctCells) {
distCellsMapBetaKey.put(cell[1], cell[0]);
}
Plate samplePlate = new Plate(numWells, errorRate, concentrations, stdDeviation);
samplePlate.fillWells(distinctCells);
//OUTPUT
System.out.println("Wells filled");
System.out.println("Making maps");
//All alphas on plate, with
Map<Integer, Integer> allAlphas = samplePlate.assayWellsAlpha();
Map<Integer, Integer> allBetas = samplePlate.assayWellsBeta();
int alphaCount = allAlphas.size();
int betaCount = allBetas.size();
//Using Integers instead of Strings to label vertices so I can do clever stuff with indices if I need to
// when I refactor to use a 2d array to make the graph
//For the autogenerator, all vertices must have distinct numbers associated with them
Integer vertexStartValue = 0;
//keys are sequential integer vertices, values are alphas
Map<Integer, Integer> plateVtoAMap = getVertexToPeptideMap(allAlphas, vertexStartValue);
//New start value for vertex to beta map should be one more than final vertex value in alpha map
vertexStartValue += plateVtoAMap.size();
//keys are sequential integers vertices, values are betas
Map<Integer, Integer> plateVtoBMap = getVertexToPeptideMap(allBetas, vertexStartValue);
//keys are alphas, values are sequential integer vertices from previous map
Map<Integer, Integer> plateAtoVMap = invertVertexMap(plateVtoAMap);
//keys are betas, values are sequential integer vertices from previous map
Map<Integer, Integer> plateBtoVMap = invertVertexMap(plateVtoBMap);
//OUTPUT
System.out.println("Maps made");
System.out.println("Creating Graph");
//Count how many wells each alpha appears in
Map<Integer, Integer> alphaWellCounts = new HashMap<>();
//count how many wells each beta appears in
Map<Integer, Integer> betaWellCounts = new HashMap<>();
//add edges, where weights are number of wells the peptides share in common.
//If this is too slow, can make a 2d array and use the SimpleWeightedGraphMatrixGenerator class
Map<Integer, Integer> wellNAlphas = null;
Map<Integer, Integer> wellNBetas = null;
SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph =
new SimpleWeightedGraph<Integer, DefaultWeightedEdge>(DefaultWeightedEdge.class);
//Flag in global variables determines if edges are added to graph one at a time directly, or if a graph
//is made all at once from a 2D array using the SimpleWeightedBipartiteGraphMatrixGenerator class
if(use2DArrayForGraph) {
double[][] weights = new double[plateVtoAMap.size()][plateVtoBMap.size()];
for (int n = 0; n < numWells; n++) {
wellNAlphas = samplePlate.assayWellsAlpha(n);
for(Integer a: wellNAlphas.keySet()){
alphaWellCounts.merge(a, 1, (oldValue, newValue) -> oldValue + newValue);
}
wellNBetas = samplePlate.assayWellsBeta(n);
for(Integer b: wellNBetas.keySet()){
betaWellCounts.merge(b, 1, (oldValue, newValue) -> oldValue + newValue);
}
for(Integer i: wellNAlphas.keySet()) {
for(Integer j: wellNBetas.keySet()){
weights[plateAtoVMap.get(i)][plateBtoVMap.get(j) - vertexStartValue] += 1.0;
}
}
}
if(useJGraphTGraphMatrixGenerator) {
SimpleWeightedBipartiteGraphMatrixGenerator graphGenerator = new SimpleWeightedBipartiteGraphMatrixGenerator();
List<Integer> alphaVertices = new ArrayList<>();
alphaVertices.addAll(plateVtoAMap.keySet()); //This will work because LinkedHashMap preserves order of entry
graphGenerator.first(alphaVertices);
List<Integer> betaVertices = new ArrayList<>();
betaVertices.addAll(plateVtoBMap.keySet());
graphGenerator.second(betaVertices); //This will work because LinkedHashMap preserves order of entry
graphGenerator.weights(weights);
graphGenerator.generateGraph(graph);
} else {
for(int i = 0; i < alphaCount; i++) {
for (int j = vertexStartValue; j < betaCount + vertexStartValue; j++){
graph.addVertex(i);
graph.addVertex(j);
addEdgeWithWeight(graph, i, (j), weights[i][j - vertexStartValue]);
}
}
}
}
else {
//construct graph
//add vertices
for(Integer v: plateVtoAMap.keySet()) {
graph.addVertex(v);
}
for(Integer v: plateVtoBMap.keySet()) {
graph.addVertex(v);
}
for (int n = 0; n < numWells; n++) {
wellNAlphas = samplePlate.assayWellsAlpha(n);
for (Integer a : wellNAlphas.keySet()) {
alphaWellCounts.merge(a, 1, (oldValue, newValue) -> oldValue + newValue);
}
wellNBetas = samplePlate.assayWellsBeta(n);
for (Integer b : wellNBetas.keySet()) {
betaWellCounts.merge(b, 1, (oldValue, newValue) -> oldValue + newValue);
}
for (Integer i : wellNAlphas.keySet()) {
for (Integer j : wellNBetas.keySet()) {
addEdgeOrWeight(graph, plateAtoVMap.get(i), plateBtoVMap.get(j));
}
}
}
}
//OUTPUT
System.out.println("Graph created");
System.out.println("Finding maximum weighted matching");
MaximumWeightBipartiteMatching maxWeightMatching =
new MaximumWeightBipartiteMatching(graph, plateVtoAMap.keySet(), plateVtoBMap.keySet());
MatchingAlgorithm.Matching<String, DefaultWeightedEdge> graphMatching = maxWeightMatching.getMatching();
Instant stop = Instant.now();
//OUTPUT
System.out.println("Matching completed");
Iterator<DefaultWeightedEdge> weightIter = graphMatching.iterator();
DefaultWeightedEdge e = null;
int trueCount = 0;
int falseCount = 0;
boolean check = false;
while(weightIter.hasNext()) {
e = weightIter.next();
if(graph.getEdgeWeight(e) < lowThreshold || graph.getEdgeWeight(e) > highThreshold) {
continue;
}
Integer source = graph.getEdgeSource(e);
Integer target = graph.getEdgeTarget(e);
//Need map of each peptide and number of wells it appears in.
check = printData(graph.getEdgeWeight(e), source, alphaWellCounts.get(plateVtoAMap.get(source)), target, betaWellCounts.get(plateVtoBMap.get(target)), distCellsMapAlphaKey, plateVtoAMap, plateVtoBMap);
if(check) {
trueCount++;
}
else {
falseCount++;
}
}
//RESULTS SUMMARY
NumberFormat nf = NumberFormat.getInstance(Locale.US);
System.out.println("");
System.out.println("Distinct cell population: " + nf.format(numDistinctCells));
System.out.println("1 standard deviation of distribution: " + stdDeviation + " most common cells");
System.out.print("Sample plate: " + numWells +" wells, ");
System.out.print("divided into " + concentrations.length + " sections of ");
System.out.println(Arrays.toString(concentrations) + " cells per well.");
System.out.print("Attempted to match alpha/beta overlaps of at least " + lowThreshold + " wells ");
System.out.println("and no more than " + highThreshold + " wells.");
System.out.println(nf.format(alphaCount) + " alpha peptides found");
System.out.println(nf.format(betaCount) + " beta peptides found");
MathContext mc = new MathContext(3);
int min = alphaCount > betaCount ? betaCount : alphaCount;
double attemptRate = (double) (trueCount + falseCount) / min;
BigDecimal attR = new BigDecimal(attemptRate, mc);
System.out.println("Pairing attempt rate: " + attR);
System.out.println("Correct pairings: " + nf.format(trueCount));
System.out.println("Incorrect pairings: " + nf.format(falseCount));
double experimentalError = (double) falseCount / (trueCount + falseCount);
BigDecimal expErr = new BigDecimal(experimentalError, mc);
System.out.println("Pairing error rate: " + expErr);
Duration time = Duration.between(start, stop);
System.out.println("Simulation time: " + nf.format(time.toSeconds()) + " seconds");
if(use2DArrayForGraph) {
System.out.print("Graph generated from 2D array of weights");
if(useJGraphTGraphMatrixGenerator){
System.out.print(" using JGraphT's SimpleWeightedBipartiteGraphMatrixGenerator");
}
else {
System.out.print(" directly.");
}
System.out.println("");
}
else {
System.out.println("Graph generated directly from individual well maps");
}
System.out.println("");
}
private static Map<Integer, Integer> getVertexToPeptideMap(Map<Integer, Integer> peptides, Integer startValue) {
Map<Integer, Integer> map = new LinkedHashMap<>(); //LinkedHashMap to preserve order of entry
Integer index = startValue;
for (Integer k: peptides.keySet()) {
map.put(index, k);
index++;
}
return map;
}
private static Map<Integer, Integer> invertVertexMap(Map<Integer, Integer> map) {
Map<Integer, Integer> inverse = new HashMap<>();
for(Integer k: map.keySet()) {
inverse.put(map.get(k), k);
}
return inverse;
}
private static void addEdgeWithWeight(Graph g, Integer v1, Integer v2, double weight) {
g.addEdge(v1, v2);
g.setEdgeWeight(v1, v2, weight);
}
private static void addEdgeOrWeight(Graph g, Integer v1, Integer v2) {
if (g.containsEdge(v1, v2)) {
g.setEdgeWeight(v1, v2, g.getEdgeWeight(g.getEdge(v1, v2)) + 1.0);
}
else {
g.addEdge(v1, v2);
}
}
private static boolean printData (Double edgeWeight, Integer source, Integer sourceCount, Integer target,
Integer targetCount, Map<Integer,Integer> AtoBMap, Map<Integer,Integer> VtoAMap,
Map<Integer, Integer> VtoBMap) {
System.out.println("Alpha peptide " + source + " present in " + sourceCount + " wells");
System.out.println("Beta peptide " + target + " present in " + targetCount + " wells");
System.out.println("Both alpha and beta present in " + edgeWeight + " wells");
if(VtoBMap.get(target).equals(AtoBMap.get(VtoAMap.get(source)))){
System.out.println("True");
System.out.println("p-value: " + Equations.pValue(numWells, sourceCount, targetCount, edgeWeight));
return true;
}
else {
System.out.println("False");
System.out.println("p-value: " + Equations.pValue(numWells, sourceCount, targetCount, edgeWeight));
System.out.println("The real pair is " + source + " and " + AtoBMap.get(VtoAMap.get(source)));
return false;
}
}
}