Implemented storing graphs in memory for multiple pairing experiments

This commit is contained in:
2022-02-22 21:30:00 -06:00
parent fd2ec76b71
commit 68ee9e4bb6
6 changed files with 208 additions and 99 deletions

View File

@@ -1,14 +1,42 @@
//main class. Only job is to choose which interface to use.
//main class. Only job is to choose which interface to use, and hold graph data in memory
public class BiGpairSEQ {
private static void main(String[] args) {
private static GraphWithMapData graphInMemory = null;
private static String graphFilename = null;
public static void main(String[] args) {
if (args.length == 0) {
InteractiveInterface.startInteractive();
}
else {
//This will be uncommented when command line arguments are fixed.
//This will be uncommented when command line arguments are re-implemented.
//CommandLineInterface.startCLI(args);
System.out.println("Command line arguments are still being re-implemented.");
}
}
public static GraphWithMapData getGraph() {
return graphInMemory;
}
public static void setGraph(GraphWithMapData g) {
if (graphInMemory != null) {
clearGraph();
}
graphInMemory = g;
}
public static void clearGraph() {
graphInMemory = null;
System.gc();
}
public static String getGraphFilename() {
return graphFilename;
}
public static void setGraphFilename(String filename) {
graphFilename = filename;
}
}

View File

@@ -0,0 +1,83 @@
import org.jgrapht.graph.DefaultWeightedEdge;
import org.jgrapht.graph.SimpleWeightedGraph;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
public abstract class GraphModificationFunctions {
//remove over- and under-weight edges
public static List<Integer[]> filterByOverlapThresholds(SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph,
int low, int high) {
List<Integer[]> removedEdges = new ArrayList<>();
for(DefaultWeightedEdge e: graph.edgeSet()){
if ((graph.getEdgeWeight(e) > high) || (graph.getEdgeWeight(e) < low)){
Integer source = graph.getEdgeSource(e);
Integer target = graph.getEdgeTarget(e);
Integer weight = (int) graph.getEdgeWeight(e);
Integer[] edge = {source, target, weight};
removedEdges.add(edge);
graph.removeEdge(e);
}
}
return removedEdges;
}
//Remove edges for pairs with large occupancy discrepancy
public static List<Integer[]> filterByRelativeOccupancy(SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph,
Map<Integer, Integer> alphaWellCounts,
Map<Integer, Integer> betaWellCounts,
Map<Integer, Integer> plateVtoAMap,
Map<Integer, Integer> plateVtoBMap,
Integer maxOccupancyDifference) {
List<Integer[]> removedEdges = new ArrayList<>();
for (DefaultWeightedEdge e : graph.edgeSet()) {
Integer alphaOcc = alphaWellCounts.get(plateVtoAMap.get(graph.getEdgeSource(e)));
Integer betaOcc = betaWellCounts.get(plateVtoBMap.get(graph.getEdgeTarget(e)));
if (Math.abs(alphaOcc - betaOcc) >= maxOccupancyDifference) {
Integer source = graph.getEdgeSource(e);
Integer target = graph.getEdgeTarget(e);
Integer weight = (int) graph.getEdgeWeight(e);
Integer[] edge = {source, target, weight};
removedEdges.add(edge);
graph.removeEdge(e);
}
}
return removedEdges;
}
//Remove edges for pairs where overlap size is significantly lower than the well occupancy
public static List<Integer[]> filterByOverlapPercent(SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph,
Map<Integer, Integer> alphaWellCounts,
Map<Integer, Integer> betaWellCounts,
Map<Integer, Integer> plateVtoAMap,
Map<Integer, Integer> plateVtoBMap,
Integer minOverlapPercent) {
List<Integer[]> removedEdges = new ArrayList<>();
for (DefaultWeightedEdge e : graph.edgeSet()) {
Integer alphaOcc = alphaWellCounts.get(plateVtoAMap.get(graph.getEdgeSource(e)));
Integer betaOcc = betaWellCounts.get(plateVtoBMap.get(graph.getEdgeTarget(e)));
double weight = graph.getEdgeWeight(e);
double min = minOverlapPercent / 100.0;
if ((weight / alphaOcc < min) || (weight / betaOcc < min)) {
Integer source = graph.getEdgeSource(e);
Integer target = graph.getEdgeTarget(e);
Integer intWeight = (int) graph.getEdgeWeight(e);
Integer[] edge = {source, target, intWeight};
removedEdges.add(edge);
graph.removeEdge(e);
}
}
return removedEdges;
}
public static void addRemovedEdges(SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph,
List<Integer[]> removedEdges) {
for (Integer[] edge : removedEdges) {
DefaultWeightedEdge e = graph.addEdge(edge[0], edge[1]);
graph.setEdgeWeight(e, (double) edge[2]);
}
}
}

View File

@@ -243,7 +243,7 @@ public class InteractiveInterface {
//Simulate matching and output CSV file of results
private static void matchCDR3s() throws IOException {
String filename = null;
String dataFilename = null;
String graphFilename = null;
Integer lowThreshold = 0;
Integer highThreshold = Integer.MAX_VALUE;
Integer maxOccupancyDiff = Integer.MAX_VALUE;
@@ -251,7 +251,7 @@ public class InteractiveInterface {
try {
System.out.println("\nBiGpairSEQ simulation requires an occupancy data and overlap graph file");
System.out.println("Please enter name of an existing graph and occupancy data file: ");
dataFilename = sc.next();
graphFilename = sc.next();
System.out.println("The matching results will be written to a file.");
System.out.print("Please enter a name for the output file: ");
filename = sc.next();
@@ -274,16 +274,25 @@ public class InteractiveInterface {
System.out.println(ex);
sc.next();
}
//read object data from file
System.out.println("Reading graph data from file. This may take some time");
System.out.println("File I/O time is not included in results");
assert dataFilename != null;
GraphDataObjectReader dataReader = new GraphDataObjectReader(dataFilename);
GraphWithMapData data = dataReader.getData();
//set source file name
data.setSourceFilename(dataFilename);
assert graphFilename != null;
//check if this is the same graph we already have in memory.
GraphWithMapData data;
if(!(graphFilename.equals(BiGpairSEQ.getGraphFilename()) || BiGpairSEQ.getGraph() == null)) {
BiGpairSEQ.clearGraph();
//read object data from file
System.out.println("Reading graph data from file. This may take some time");
System.out.println("File I/O time is not included in results");
GraphDataObjectReader dataReader = new GraphDataObjectReader(graphFilename);
data = dataReader.getData();
//set new graph in memory and new filename
BiGpairSEQ.setGraph(data);
BiGpairSEQ.setGraphFilename(graphFilename);
}
else {
data = BiGpairSEQ.getGraph();
}
//simulate matching
MatchingResult results = Simulator.matchCDR3s(data, dataFilename, lowThreshold, highThreshold, maxOccupancyDiff,
MatchingResult results = Simulator.matchCDR3s(data, graphFilename, lowThreshold, highThreshold, maxOccupancyDiff,
minOverlapPercent, true);
//write results to file
assert filename != null;

View File

@@ -11,7 +11,6 @@ import java.util.List;
public class MatchingFileWriter {
private String filename;
private String sourceFileName;
private List<String> comments;
private List<String> headers;
private List<List<String>> allResults;
@@ -21,7 +20,6 @@ public class MatchingFileWriter {
filename = filename + ".csv";
}
this.filename = filename;
this.sourceFileName = result.getSourceFileName();
this.comments = result.getComments();
this.headers = result.getHeaders();
this.allResults = result.getAllResults();

View File

@@ -4,7 +4,7 @@ import java.util.List;
import java.util.Map;
public class MatchingResult {
private final String sourceFile;
private final Map<String, String> metadata;
private final List<String> comments;
private final List<String> headers;
@@ -12,16 +12,15 @@ public class MatchingResult {
private final Map<Integer, Integer> matchMap;
private final Duration time;
public MatchingResult(String sourceFileName, Map<String, String> metadata, List<String> headers,
public MatchingResult(Map<String, String> metadata, List<String> headers,
List<List<String>> allResults, Map<Integer, Integer>matchMap, Duration time){
this.sourceFile = sourceFileName;
/*
* POSSIBLE KEYS FOR METADATA MAP ARE:
* sample plate filename
* graph filename
* well populations
* total alphas found
* total betas found
* sample plate filename *
* graph filename *
* well populations *
* total alphas found *
* total betas found *
* high overlap threshold
* low overlap threshold
* maximum occupancy difference
@@ -66,7 +65,32 @@ public class MatchingResult {
return time;
}
public String getSourceFileName() {
return sourceFile;
public String getPlateFilename() {
return metadata.get("sample plate filename");
}
public String getGraphFilename() {
return metadata.get("graph filename");
}
public Integer[] getWellPopulations() {
List<Integer> wellPopulations = new ArrayList<>();
String popString = metadata.get("well populations");
for (String p : popString.split(", ")) {
wellPopulations.add(Integer.parseInt(p));
}
Integer[] popArray = new Integer[wellPopulations.size()];
return wellPopulations.toArray(popArray);
}
public Integer getAlphaCount() {
return Integer.parseInt(metadata.get("total alpha count"));
}
public Integer getBetaCount() {
return Integer.parseInt(metadata.get("total beta count"));
}
//put in the rest of these methods following the same pattern
}

View File

@@ -1,3 +1,4 @@
import org.jgrapht.Graph;
import org.jgrapht.alg.interfaces.MatchingAlgorithm;
import org.jgrapht.alg.matching.MaximumWeightBipartiteMatching;
import org.jgrapht.generate.SimpleWeightedBipartiteGraphMatrixGenerator;
@@ -49,6 +50,7 @@ public class Simulator {
Instant start = Instant.now();
int[] alphaIndex = {cdr3AlphaIndex};
int[] betaIndex = {cdr3BetaIndex};
int numWells = samplePlate.getSize();
if(verbose){System.out.println("Making cell maps");}
@@ -63,15 +65,11 @@ public class Simulator {
if(verbose){System.out.println("All alphas count: " + alphaCount);}
int betaCount = allBetas.size();
if(verbose){System.out.println("All betas count: " + betaCount);}
if(verbose){System.out.println("Well maps made");}
//Remove saturating-occupancy sequences because they have no signal value.
//Remove sequences with total occupancy below minimum pair overlap threshold
if(verbose){System.out.println("Removing sequences present in all wells.");}
//if(verbose){System.out.println("Removing sequences with occupancy below the minimum overlap threshold");}
filterByOccupancyThreshold(allAlphas, 1, numWells - 1);
filterByOccupancyThreshold(allBetas, 1, numWells - 1);
filterByOccupancyThresholds(allAlphas, 1, numWells - 1);
filterByOccupancyThresholds(allBetas, 1, numWells - 1);
if(verbose){System.out.println("Sequences removed");}
int pairableAlphaCount = allAlphas.size();
if(verbose){System.out.println("Remaining alphas count: " + pairableAlphaCount);}
@@ -136,6 +134,7 @@ public class Simulator {
GraphWithMapData output = new GraphWithMapData(graph, numWells, samplePlate.getPopulations(), alphaCount, betaCount,
distCellsMapAlphaKey, plateVtoAMap, plateVtoBMap, plateAtoVMap,
plateBtoVMap, alphaWellCounts, betaWellCounts, time);
//Set source file name in graph to name of sample plate
output.setSourceFilename(samplePlate.getSourceFileName());
//return GraphWithMapData object
return output;
@@ -146,6 +145,8 @@ public class Simulator {
Integer highThreshold, Integer maxOccupancyDifference,
Integer minOverlapPercent, boolean verbose) {
Instant start = Instant.now();
//Integer arrays will contain TO VERTEX, FROM VERTEX, and WEIGHT (which I'll need to cast to double)
List<Integer[]> removedEdges = new ArrayList<>();
int numWells = data.getNumWells();
Integer alphaCount = data.getAlphaCount();
Integer betaCount = data.getBetaCount();
@@ -156,24 +157,26 @@ public class Simulator {
Map<Integer, Integer> betaWellCounts = data.getBetaWellCounts();
SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph = data.getGraph();
//remove weights outside given overlap thresholds
//remove edges with weights outside given overlap thresholds, add those to removed edge list
if(verbose){System.out.println("Eliminating edges with weights outside overlap threshold values");}
filterByOccupancyThreshold(graph, lowThreshold, highThreshold);
if(verbose){System.out.println("Over- and under-weight edges set to 0.0");}
removedEdges.addAll(GraphModificationFunctions.filterByOverlapThresholds(graph, lowThreshold, highThreshold));
if(verbose){System.out.println("Over- and under-weight edges removed");}
//Filter by overlap size
//remove edges between vertices with too small an overlap size, add those to removed edge list
if(verbose){System.out.println("Eliminating edges with weights less than " + minOverlapPercent.toString() +
" percent of vertex occupancy value.");}
filterByOverlapSize(graph, alphaWellCounts, betaWellCounts, plateVtoAMap, plateVtoBMap, minOverlapPercent);
if(verbose){System.out.println("Edges with weights too far below vertex occupancy values set to 0.0");}
removedEdges.addAll(GraphModificationFunctions.filterByOverlapPercent(graph, alphaWellCounts, betaWellCounts,
plateVtoAMap, plateVtoBMap, minOverlapPercent));
if(verbose){System.out.println("Edges with weights too far below a vertex occupancy value removed");}
//Filter by relative occupancy
if(verbose){System.out.println("Eliminating edges between vertices with occupancy difference > "
+ maxOccupancyDifference);}
filterByRelativeOccupancy(graph, alphaWellCounts, betaWellCounts, plateVtoAMap, plateVtoBMap,
maxOccupancyDifference);
removedEdges.addAll(GraphModificationFunctions.filterByRelativeOccupancy(graph, alphaWellCounts, betaWellCounts,
plateVtoAMap, plateVtoBMap, maxOccupancyDifference));
if(verbose){System.out.println("Edges between vertices of with excessively different occupancy values " +
"set to 0.0");}
"removed");}
//Find Maximum Weighted Matching
//using jheaps library class PairingHeap for improved efficiency
if(verbose){System.out.println("Finding maximum weighted matching");}
@@ -239,8 +242,10 @@ public class Simulator {
//Metadata comments for CSV file
int min = Math.min(alphaCount, betaCount);
//rate of attempted matching
double attemptRate = (double) (trueCount + falseCount) / min;
BigDecimal attemptRateTrunc = new BigDecimal(attemptRate, mc);
//rate of pairing error
double pairingErrorRate = (double) falseCount / (trueCount + falseCount);
BigDecimal pairingErrorRateTrunc = new BigDecimal(pairingErrorRate, mc);
//get list of well concentrations
@@ -272,13 +277,19 @@ public class Simulator {
metadata.put("incorrect pairing count", Integer.toString(falseCount));
metadata.put("pairing error rate", pairingErrorRateTrunc.toString());
metadata.put("simulation time", nf.format(time.toSeconds()));
MatchingResult output = new MatchingResult(data.getSourceFilename(), metadata, header, allResults, matchMap, time);
//create MatchingResult object
MatchingResult output = new MatchingResult(metadata, header, allResults, matchMap, time);
if(verbose){
for(String s: output.getComments()){
System.out.println(s);
}
}
//put the removed edges back on the graph
System.out.println("Restoring removed edges to graph.");
GraphModificationFunctions.addRemovedEdges(graph, removedEdges);
//return MatchingResult object
return output;
}
@@ -587,6 +598,18 @@ public class Simulator {
// return output;
// }
//Remove sequences based on occupancy
public static void filterByOccupancyThresholds(Map<Integer, Integer> wellMap, int low, int high){
List<Integer> noise = new ArrayList<>();
for(Integer k: wellMap.keySet()){
if((wellMap.get(k) > high) || (wellMap.get(k) < low)){
noise.add(k);
}
}
for(Integer k: noise) {
wellMap.remove(k);
}
}
//Counts the well occupancy of the row peptides and column peptides into given maps, and
//fills weights in the given 2D array
@@ -630,62 +653,6 @@ public class Simulator {
}
}
private static void filterByOccupancyThreshold(SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph,
int low, int high) {
for(DefaultWeightedEdge e: graph.edgeSet()){
if ((graph.getEdgeWeight(e) > high) || (graph.getEdgeWeight(e) < low)){
graph.setEdgeWeight(e, 0.0);
}
}
}
private static void filterByOccupancyThreshold(Map<Integer, Integer> wellMap, int low, int high){
List<Integer> noise = new ArrayList<>();
for(Integer k: wellMap.keySet()){
if((wellMap.get(k) > high) || (wellMap.get(k) < low)){
noise.add(k);
}
}
for(Integer k: noise) {
wellMap.remove(k);
}
}
//Remove edges for pairs with large occupancy discrepancy
private static void filterByRelativeOccupancy(SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph,
Map<Integer, Integer> alphaWellCounts,
Map<Integer, Integer> betaWellCounts,
Map<Integer, Integer> plateVtoAMap,
Map<Integer, Integer> plateVtoBMap,
Integer maxOccupancyDifference) {
for (DefaultWeightedEdge e : graph.edgeSet()) {
Integer alphaOcc = alphaWellCounts.get(plateVtoAMap.get(graph.getEdgeSource(e)));
Integer betaOcc = betaWellCounts.get(plateVtoBMap.get(graph.getEdgeTarget(e)));
//Adjust this to something cleverer later
if (Math.abs(alphaOcc - betaOcc) >= maxOccupancyDifference) {
graph.setEdgeWeight(e, 0.0);
}
}
}
//Remove edges for pairs where overlap size is significantly lower than the well occupancy
private static void filterByOverlapSize(SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph,
Map<Integer, Integer> alphaWellCounts,
Map<Integer, Integer> betaWellCounts,
Map<Integer, Integer> plateVtoAMap,
Map<Integer, Integer> plateVtoBMap,
Integer minOverlapPercent) {
for (DefaultWeightedEdge e : graph.edgeSet()) {
Integer alphaOcc = alphaWellCounts.get(plateVtoAMap.get(graph.getEdgeSource(e)));
Integer betaOcc = betaWellCounts.get(plateVtoBMap.get(graph.getEdgeTarget(e)));
double weight = graph.getEdgeWeight(e);
double min = minOverlapPercent / 100.0;
if ((weight / alphaOcc < min) || (weight / betaOcc < min)) {
graph.setEdgeWeight(e, 0.0);
}
}
}
private static Map<Integer, Integer> makeSequenceToSequenceMap(List<Integer[]> cells, int keySequenceIndex,
int valueSequenceIndex){
Map<Integer, Integer> keySequenceToValueSequenceMap = new HashMap<>();