Implemented storing graphs in memory for multiple pairing experiments
This commit is contained in:
@@ -1,14 +1,42 @@
|
||||
//main class. Only job is to choose which interface to use.
|
||||
//main class. Only job is to choose which interface to use, and hold graph data in memory
|
||||
public class BiGpairSEQ {
|
||||
|
||||
private static void main(String[] args) {
|
||||
private static GraphWithMapData graphInMemory = null;
|
||||
private static String graphFilename = null;
|
||||
|
||||
public static void main(String[] args) {
|
||||
if (args.length == 0) {
|
||||
InteractiveInterface.startInteractive();
|
||||
}
|
||||
else {
|
||||
//This will be uncommented when command line arguments are fixed.
|
||||
//This will be uncommented when command line arguments are re-implemented.
|
||||
//CommandLineInterface.startCLI(args);
|
||||
System.out.println("Command line arguments are still being re-implemented.");
|
||||
}
|
||||
}
|
||||
|
||||
public static GraphWithMapData getGraph() {
|
||||
return graphInMemory;
|
||||
}
|
||||
|
||||
public static void setGraph(GraphWithMapData g) {
|
||||
if (graphInMemory != null) {
|
||||
clearGraph();
|
||||
}
|
||||
graphInMemory = g;
|
||||
}
|
||||
|
||||
public static void clearGraph() {
|
||||
graphInMemory = null;
|
||||
System.gc();
|
||||
}
|
||||
|
||||
public static String getGraphFilename() {
|
||||
return graphFilename;
|
||||
}
|
||||
|
||||
public static void setGraphFilename(String filename) {
|
||||
graphFilename = filename;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
83
src/main/java/GraphModificationFunctions.java
Normal file
83
src/main/java/GraphModificationFunctions.java
Normal file
@@ -0,0 +1,83 @@
|
||||
import org.jgrapht.graph.DefaultWeightedEdge;
|
||||
import org.jgrapht.graph.SimpleWeightedGraph;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
public abstract class GraphModificationFunctions {
|
||||
|
||||
//remove over- and under-weight edges
|
||||
public static List<Integer[]> filterByOverlapThresholds(SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph,
|
||||
int low, int high) {
|
||||
List<Integer[]> removedEdges = new ArrayList<>();
|
||||
for(DefaultWeightedEdge e: graph.edgeSet()){
|
||||
if ((graph.getEdgeWeight(e) > high) || (graph.getEdgeWeight(e) < low)){
|
||||
Integer source = graph.getEdgeSource(e);
|
||||
Integer target = graph.getEdgeTarget(e);
|
||||
Integer weight = (int) graph.getEdgeWeight(e);
|
||||
Integer[] edge = {source, target, weight};
|
||||
removedEdges.add(edge);
|
||||
graph.removeEdge(e);
|
||||
}
|
||||
}
|
||||
return removedEdges;
|
||||
}
|
||||
|
||||
//Remove edges for pairs with large occupancy discrepancy
|
||||
public static List<Integer[]> filterByRelativeOccupancy(SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph,
|
||||
Map<Integer, Integer> alphaWellCounts,
|
||||
Map<Integer, Integer> betaWellCounts,
|
||||
Map<Integer, Integer> plateVtoAMap,
|
||||
Map<Integer, Integer> plateVtoBMap,
|
||||
Integer maxOccupancyDifference) {
|
||||
List<Integer[]> removedEdges = new ArrayList<>();
|
||||
for (DefaultWeightedEdge e : graph.edgeSet()) {
|
||||
Integer alphaOcc = alphaWellCounts.get(plateVtoAMap.get(graph.getEdgeSource(e)));
|
||||
Integer betaOcc = betaWellCounts.get(plateVtoBMap.get(graph.getEdgeTarget(e)));
|
||||
if (Math.abs(alphaOcc - betaOcc) >= maxOccupancyDifference) {
|
||||
Integer source = graph.getEdgeSource(e);
|
||||
Integer target = graph.getEdgeTarget(e);
|
||||
Integer weight = (int) graph.getEdgeWeight(e);
|
||||
Integer[] edge = {source, target, weight};
|
||||
removedEdges.add(edge);
|
||||
graph.removeEdge(e);
|
||||
}
|
||||
}
|
||||
return removedEdges;
|
||||
}
|
||||
|
||||
//Remove edges for pairs where overlap size is significantly lower than the well occupancy
|
||||
public static List<Integer[]> filterByOverlapPercent(SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph,
|
||||
Map<Integer, Integer> alphaWellCounts,
|
||||
Map<Integer, Integer> betaWellCounts,
|
||||
Map<Integer, Integer> plateVtoAMap,
|
||||
Map<Integer, Integer> plateVtoBMap,
|
||||
Integer minOverlapPercent) {
|
||||
List<Integer[]> removedEdges = new ArrayList<>();
|
||||
for (DefaultWeightedEdge e : graph.edgeSet()) {
|
||||
Integer alphaOcc = alphaWellCounts.get(plateVtoAMap.get(graph.getEdgeSource(e)));
|
||||
Integer betaOcc = betaWellCounts.get(plateVtoBMap.get(graph.getEdgeTarget(e)));
|
||||
double weight = graph.getEdgeWeight(e);
|
||||
double min = minOverlapPercent / 100.0;
|
||||
if ((weight / alphaOcc < min) || (weight / betaOcc < min)) {
|
||||
Integer source = graph.getEdgeSource(e);
|
||||
Integer target = graph.getEdgeTarget(e);
|
||||
Integer intWeight = (int) graph.getEdgeWeight(e);
|
||||
Integer[] edge = {source, target, intWeight};
|
||||
removedEdges.add(edge);
|
||||
graph.removeEdge(e);
|
||||
}
|
||||
}
|
||||
return removedEdges;
|
||||
}
|
||||
|
||||
public static void addRemovedEdges(SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph,
|
||||
List<Integer[]> removedEdges) {
|
||||
for (Integer[] edge : removedEdges) {
|
||||
DefaultWeightedEdge e = graph.addEdge(edge[0], edge[1]);
|
||||
graph.setEdgeWeight(e, (double) edge[2]);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@@ -243,7 +243,7 @@ public class InteractiveInterface {
|
||||
//Simulate matching and output CSV file of results
|
||||
private static void matchCDR3s() throws IOException {
|
||||
String filename = null;
|
||||
String dataFilename = null;
|
||||
String graphFilename = null;
|
||||
Integer lowThreshold = 0;
|
||||
Integer highThreshold = Integer.MAX_VALUE;
|
||||
Integer maxOccupancyDiff = Integer.MAX_VALUE;
|
||||
@@ -251,7 +251,7 @@ public class InteractiveInterface {
|
||||
try {
|
||||
System.out.println("\nBiGpairSEQ simulation requires an occupancy data and overlap graph file");
|
||||
System.out.println("Please enter name of an existing graph and occupancy data file: ");
|
||||
dataFilename = sc.next();
|
||||
graphFilename = sc.next();
|
||||
System.out.println("The matching results will be written to a file.");
|
||||
System.out.print("Please enter a name for the output file: ");
|
||||
filename = sc.next();
|
||||
@@ -274,16 +274,25 @@ public class InteractiveInterface {
|
||||
System.out.println(ex);
|
||||
sc.next();
|
||||
}
|
||||
//read object data from file
|
||||
System.out.println("Reading graph data from file. This may take some time");
|
||||
System.out.println("File I/O time is not included in results");
|
||||
assert dataFilename != null;
|
||||
GraphDataObjectReader dataReader = new GraphDataObjectReader(dataFilename);
|
||||
GraphWithMapData data = dataReader.getData();
|
||||
//set source file name
|
||||
data.setSourceFilename(dataFilename);
|
||||
assert graphFilename != null;
|
||||
//check if this is the same graph we already have in memory.
|
||||
GraphWithMapData data;
|
||||
if(!(graphFilename.equals(BiGpairSEQ.getGraphFilename()) || BiGpairSEQ.getGraph() == null)) {
|
||||
BiGpairSEQ.clearGraph();
|
||||
//read object data from file
|
||||
System.out.println("Reading graph data from file. This may take some time");
|
||||
System.out.println("File I/O time is not included in results");
|
||||
GraphDataObjectReader dataReader = new GraphDataObjectReader(graphFilename);
|
||||
data = dataReader.getData();
|
||||
//set new graph in memory and new filename
|
||||
BiGpairSEQ.setGraph(data);
|
||||
BiGpairSEQ.setGraphFilename(graphFilename);
|
||||
}
|
||||
else {
|
||||
data = BiGpairSEQ.getGraph();
|
||||
}
|
||||
//simulate matching
|
||||
MatchingResult results = Simulator.matchCDR3s(data, dataFilename, lowThreshold, highThreshold, maxOccupancyDiff,
|
||||
MatchingResult results = Simulator.matchCDR3s(data, graphFilename, lowThreshold, highThreshold, maxOccupancyDiff,
|
||||
minOverlapPercent, true);
|
||||
//write results to file
|
||||
assert filename != null;
|
||||
|
||||
@@ -11,7 +11,6 @@ import java.util.List;
|
||||
public class MatchingFileWriter {
|
||||
|
||||
private String filename;
|
||||
private String sourceFileName;
|
||||
private List<String> comments;
|
||||
private List<String> headers;
|
||||
private List<List<String>> allResults;
|
||||
@@ -21,7 +20,6 @@ public class MatchingFileWriter {
|
||||
filename = filename + ".csv";
|
||||
}
|
||||
this.filename = filename;
|
||||
this.sourceFileName = result.getSourceFileName();
|
||||
this.comments = result.getComments();
|
||||
this.headers = result.getHeaders();
|
||||
this.allResults = result.getAllResults();
|
||||
|
||||
@@ -4,7 +4,7 @@ import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
public class MatchingResult {
|
||||
private final String sourceFile;
|
||||
|
||||
private final Map<String, String> metadata;
|
||||
private final List<String> comments;
|
||||
private final List<String> headers;
|
||||
@@ -12,16 +12,15 @@ public class MatchingResult {
|
||||
private final Map<Integer, Integer> matchMap;
|
||||
private final Duration time;
|
||||
|
||||
public MatchingResult(String sourceFileName, Map<String, String> metadata, List<String> headers,
|
||||
public MatchingResult(Map<String, String> metadata, List<String> headers,
|
||||
List<List<String>> allResults, Map<Integer, Integer>matchMap, Duration time){
|
||||
this.sourceFile = sourceFileName;
|
||||
/*
|
||||
* POSSIBLE KEYS FOR METADATA MAP ARE:
|
||||
* sample plate filename
|
||||
* graph filename
|
||||
* well populations
|
||||
* total alphas found
|
||||
* total betas found
|
||||
* sample plate filename *
|
||||
* graph filename *
|
||||
* well populations *
|
||||
* total alphas found *
|
||||
* total betas found *
|
||||
* high overlap threshold
|
||||
* low overlap threshold
|
||||
* maximum occupancy difference
|
||||
@@ -66,7 +65,32 @@ public class MatchingResult {
|
||||
return time;
|
||||
}
|
||||
|
||||
public String getSourceFileName() {
|
||||
return sourceFile;
|
||||
public String getPlateFilename() {
|
||||
return metadata.get("sample plate filename");
|
||||
}
|
||||
|
||||
public String getGraphFilename() {
|
||||
return metadata.get("graph filename");
|
||||
}
|
||||
|
||||
public Integer[] getWellPopulations() {
|
||||
List<Integer> wellPopulations = new ArrayList<>();
|
||||
String popString = metadata.get("well populations");
|
||||
for (String p : popString.split(", ")) {
|
||||
wellPopulations.add(Integer.parseInt(p));
|
||||
}
|
||||
Integer[] popArray = new Integer[wellPopulations.size()];
|
||||
return wellPopulations.toArray(popArray);
|
||||
}
|
||||
|
||||
public Integer getAlphaCount() {
|
||||
return Integer.parseInt(metadata.get("total alpha count"));
|
||||
}
|
||||
|
||||
public Integer getBetaCount() {
|
||||
return Integer.parseInt(metadata.get("total beta count"));
|
||||
}
|
||||
|
||||
//put in the rest of these methods following the same pattern
|
||||
|
||||
}
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import org.jgrapht.Graph;
|
||||
import org.jgrapht.alg.interfaces.MatchingAlgorithm;
|
||||
import org.jgrapht.alg.matching.MaximumWeightBipartiteMatching;
|
||||
import org.jgrapht.generate.SimpleWeightedBipartiteGraphMatrixGenerator;
|
||||
@@ -49,6 +50,7 @@ public class Simulator {
|
||||
Instant start = Instant.now();
|
||||
int[] alphaIndex = {cdr3AlphaIndex};
|
||||
int[] betaIndex = {cdr3BetaIndex};
|
||||
|
||||
int numWells = samplePlate.getSize();
|
||||
|
||||
if(verbose){System.out.println("Making cell maps");}
|
||||
@@ -63,15 +65,11 @@ public class Simulator {
|
||||
if(verbose){System.out.println("All alphas count: " + alphaCount);}
|
||||
int betaCount = allBetas.size();
|
||||
if(verbose){System.out.println("All betas count: " + betaCount);}
|
||||
|
||||
if(verbose){System.out.println("Well maps made");}
|
||||
|
||||
//Remove saturating-occupancy sequences because they have no signal value.
|
||||
//Remove sequences with total occupancy below minimum pair overlap threshold
|
||||
if(verbose){System.out.println("Removing sequences present in all wells.");}
|
||||
//if(verbose){System.out.println("Removing sequences with occupancy below the minimum overlap threshold");}
|
||||
filterByOccupancyThreshold(allAlphas, 1, numWells - 1);
|
||||
filterByOccupancyThreshold(allBetas, 1, numWells - 1);
|
||||
filterByOccupancyThresholds(allAlphas, 1, numWells - 1);
|
||||
filterByOccupancyThresholds(allBetas, 1, numWells - 1);
|
||||
if(verbose){System.out.println("Sequences removed");}
|
||||
int pairableAlphaCount = allAlphas.size();
|
||||
if(verbose){System.out.println("Remaining alphas count: " + pairableAlphaCount);}
|
||||
@@ -136,6 +134,7 @@ public class Simulator {
|
||||
GraphWithMapData output = new GraphWithMapData(graph, numWells, samplePlate.getPopulations(), alphaCount, betaCount,
|
||||
distCellsMapAlphaKey, plateVtoAMap, plateVtoBMap, plateAtoVMap,
|
||||
plateBtoVMap, alphaWellCounts, betaWellCounts, time);
|
||||
//Set source file name in graph to name of sample plate
|
||||
output.setSourceFilename(samplePlate.getSourceFileName());
|
||||
//return GraphWithMapData object
|
||||
return output;
|
||||
@@ -146,6 +145,8 @@ public class Simulator {
|
||||
Integer highThreshold, Integer maxOccupancyDifference,
|
||||
Integer minOverlapPercent, boolean verbose) {
|
||||
Instant start = Instant.now();
|
||||
//Integer arrays will contain TO VERTEX, FROM VERTEX, and WEIGHT (which I'll need to cast to double)
|
||||
List<Integer[]> removedEdges = new ArrayList<>();
|
||||
int numWells = data.getNumWells();
|
||||
Integer alphaCount = data.getAlphaCount();
|
||||
Integer betaCount = data.getBetaCount();
|
||||
@@ -156,24 +157,26 @@ public class Simulator {
|
||||
Map<Integer, Integer> betaWellCounts = data.getBetaWellCounts();
|
||||
SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph = data.getGraph();
|
||||
|
||||
//remove weights outside given overlap thresholds
|
||||
//remove edges with weights outside given overlap thresholds, add those to removed edge list
|
||||
if(verbose){System.out.println("Eliminating edges with weights outside overlap threshold values");}
|
||||
filterByOccupancyThreshold(graph, lowThreshold, highThreshold);
|
||||
if(verbose){System.out.println("Over- and under-weight edges set to 0.0");}
|
||||
removedEdges.addAll(GraphModificationFunctions.filterByOverlapThresholds(graph, lowThreshold, highThreshold));
|
||||
if(verbose){System.out.println("Over- and under-weight edges removed");}
|
||||
|
||||
//Filter by overlap size
|
||||
//remove edges between vertices with too small an overlap size, add those to removed edge list
|
||||
if(verbose){System.out.println("Eliminating edges with weights less than " + minOverlapPercent.toString() +
|
||||
" percent of vertex occupancy value.");}
|
||||
filterByOverlapSize(graph, alphaWellCounts, betaWellCounts, plateVtoAMap, plateVtoBMap, minOverlapPercent);
|
||||
if(verbose){System.out.println("Edges with weights too far below vertex occupancy values set to 0.0");}
|
||||
removedEdges.addAll(GraphModificationFunctions.filterByOverlapPercent(graph, alphaWellCounts, betaWellCounts,
|
||||
plateVtoAMap, plateVtoBMap, minOverlapPercent));
|
||||
if(verbose){System.out.println("Edges with weights too far below a vertex occupancy value removed");}
|
||||
|
||||
//Filter by relative occupancy
|
||||
if(verbose){System.out.println("Eliminating edges between vertices with occupancy difference > "
|
||||
+ maxOccupancyDifference);}
|
||||
filterByRelativeOccupancy(graph, alphaWellCounts, betaWellCounts, plateVtoAMap, plateVtoBMap,
|
||||
maxOccupancyDifference);
|
||||
removedEdges.addAll(GraphModificationFunctions.filterByRelativeOccupancy(graph, alphaWellCounts, betaWellCounts,
|
||||
plateVtoAMap, plateVtoBMap, maxOccupancyDifference));
|
||||
if(verbose){System.out.println("Edges between vertices of with excessively different occupancy values " +
|
||||
"set to 0.0");}
|
||||
"removed");}
|
||||
|
||||
//Find Maximum Weighted Matching
|
||||
//using jheaps library class PairingHeap for improved efficiency
|
||||
if(verbose){System.out.println("Finding maximum weighted matching");}
|
||||
@@ -239,8 +242,10 @@ public class Simulator {
|
||||
|
||||
//Metadata comments for CSV file
|
||||
int min = Math.min(alphaCount, betaCount);
|
||||
//rate of attempted matching
|
||||
double attemptRate = (double) (trueCount + falseCount) / min;
|
||||
BigDecimal attemptRateTrunc = new BigDecimal(attemptRate, mc);
|
||||
//rate of pairing error
|
||||
double pairingErrorRate = (double) falseCount / (trueCount + falseCount);
|
||||
BigDecimal pairingErrorRateTrunc = new BigDecimal(pairingErrorRate, mc);
|
||||
//get list of well concentrations
|
||||
@@ -272,13 +277,19 @@ public class Simulator {
|
||||
metadata.put("incorrect pairing count", Integer.toString(falseCount));
|
||||
metadata.put("pairing error rate", pairingErrorRateTrunc.toString());
|
||||
metadata.put("simulation time", nf.format(time.toSeconds()));
|
||||
|
||||
MatchingResult output = new MatchingResult(data.getSourceFilename(), metadata, header, allResults, matchMap, time);
|
||||
//create MatchingResult object
|
||||
MatchingResult output = new MatchingResult(metadata, header, allResults, matchMap, time);
|
||||
if(verbose){
|
||||
for(String s: output.getComments()){
|
||||
System.out.println(s);
|
||||
}
|
||||
}
|
||||
|
||||
//put the removed edges back on the graph
|
||||
System.out.println("Restoring removed edges to graph.");
|
||||
GraphModificationFunctions.addRemovedEdges(graph, removedEdges);
|
||||
|
||||
//return MatchingResult object
|
||||
return output;
|
||||
}
|
||||
|
||||
@@ -587,6 +598,18 @@ public class Simulator {
|
||||
// return output;
|
||||
// }
|
||||
|
||||
//Remove sequences based on occupancy
|
||||
public static void filterByOccupancyThresholds(Map<Integer, Integer> wellMap, int low, int high){
|
||||
List<Integer> noise = new ArrayList<>();
|
||||
for(Integer k: wellMap.keySet()){
|
||||
if((wellMap.get(k) > high) || (wellMap.get(k) < low)){
|
||||
noise.add(k);
|
||||
}
|
||||
}
|
||||
for(Integer k: noise) {
|
||||
wellMap.remove(k);
|
||||
}
|
||||
}
|
||||
|
||||
//Counts the well occupancy of the row peptides and column peptides into given maps, and
|
||||
//fills weights in the given 2D array
|
||||
@@ -630,62 +653,6 @@ public class Simulator {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private static void filterByOccupancyThreshold(SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph,
|
||||
int low, int high) {
|
||||
for(DefaultWeightedEdge e: graph.edgeSet()){
|
||||
if ((graph.getEdgeWeight(e) > high) || (graph.getEdgeWeight(e) < low)){
|
||||
graph.setEdgeWeight(e, 0.0);
|
||||
}
|
||||
}
|
||||
}
|
||||
private static void filterByOccupancyThreshold(Map<Integer, Integer> wellMap, int low, int high){
|
||||
List<Integer> noise = new ArrayList<>();
|
||||
for(Integer k: wellMap.keySet()){
|
||||
if((wellMap.get(k) > high) || (wellMap.get(k) < low)){
|
||||
noise.add(k);
|
||||
}
|
||||
}
|
||||
for(Integer k: noise) {
|
||||
wellMap.remove(k);
|
||||
}
|
||||
}
|
||||
|
||||
//Remove edges for pairs with large occupancy discrepancy
|
||||
private static void filterByRelativeOccupancy(SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph,
|
||||
Map<Integer, Integer> alphaWellCounts,
|
||||
Map<Integer, Integer> betaWellCounts,
|
||||
Map<Integer, Integer> plateVtoAMap,
|
||||
Map<Integer, Integer> plateVtoBMap,
|
||||
Integer maxOccupancyDifference) {
|
||||
for (DefaultWeightedEdge e : graph.edgeSet()) {
|
||||
Integer alphaOcc = alphaWellCounts.get(plateVtoAMap.get(graph.getEdgeSource(e)));
|
||||
Integer betaOcc = betaWellCounts.get(plateVtoBMap.get(graph.getEdgeTarget(e)));
|
||||
//Adjust this to something cleverer later
|
||||
if (Math.abs(alphaOcc - betaOcc) >= maxOccupancyDifference) {
|
||||
graph.setEdgeWeight(e, 0.0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//Remove edges for pairs where overlap size is significantly lower than the well occupancy
|
||||
private static void filterByOverlapSize(SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph,
|
||||
Map<Integer, Integer> alphaWellCounts,
|
||||
Map<Integer, Integer> betaWellCounts,
|
||||
Map<Integer, Integer> plateVtoAMap,
|
||||
Map<Integer, Integer> plateVtoBMap,
|
||||
Integer minOverlapPercent) {
|
||||
for (DefaultWeightedEdge e : graph.edgeSet()) {
|
||||
Integer alphaOcc = alphaWellCounts.get(plateVtoAMap.get(graph.getEdgeSource(e)));
|
||||
Integer betaOcc = betaWellCounts.get(plateVtoBMap.get(graph.getEdgeTarget(e)));
|
||||
double weight = graph.getEdgeWeight(e);
|
||||
double min = minOverlapPercent / 100.0;
|
||||
if ((weight / alphaOcc < min) || (weight / betaOcc < min)) {
|
||||
graph.setEdgeWeight(e, 0.0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static Map<Integer, Integer> makeSequenceToSequenceMap(List<Integer[]> cells, int keySequenceIndex,
|
||||
int valueSequenceIndex){
|
||||
Map<Integer, Integer> keySequenceToValueSequenceMap = new HashMap<>();
|
||||
|
||||
Reference in New Issue
Block a user