diff --git a/readme.md b/readme.md index 4908318..cd42fc9 100644 --- a/readme.md +++ b/readme.md @@ -281,7 +281,7 @@ with different filtering options), the actual elapsed time was greater. File I/O slightly less time than the simulation itself. Real elapsed time from start to finish was under 30 minutes. As mentioned in the theory section, performance could be improved by implementing a more efficient algorithm for finding -the maximum weighted matching. +the maximum weight matching. ## BEHAVIOR WITH RANDOMIZED WELL POPULATIONS @@ -351,6 +351,8 @@ roughly as though it had a constant well population equal to the plate's average * Advantage: would eliminate the need to use maps to associate vertices with sequences, which would make the code easier to understand. * Have a branch where this is implemented, but there's a bug that broke matching. Don't currently have time to fix. * ~~Re-implement command line arguments, to enable scripting and statistical simulation studies~~ DONE +* ~~Implement custom Vertex class to simplify code and make it easier to implement different MWM algorithms~~ DONE + * This also seems to be faster when using the same algorithm than the version with lots of maps, which is a nice bonus! * Re-implement CDR1 matching method * Implement Duan and Su's maximum weight matching algorithm * Add controllable algorithm-type parameter? @@ -361,7 +363,7 @@ roughly as though it had a constant well population equal to the plate's average * Implement Vose's alias method for arbitrary statistical distributions of cells * Should probably refactor to use apache commons rng for this * Use commons JCS for caching -* Enable post-filtering instead of pre-filtering. Pre-filtering of things like singleton sequences or saturating-occupancy sequences reduces graph size, but could conceivably reduce pairing accuracy by throwing away data. While these sequences have very little signal, it would be interesting to compare unfiltered results to filtered results. This would require a much, much faster MWM algorithm, though, to handle the much larger graphs. Possible one of the linear-time approximation algorithms. +* Parameterize pre-filtering. Currently, sequences present in all wells are filtered out before constructing the graph, which massively reduces graph size. But, ideally, no pre-filtering would be necessary. ## CITATIONS diff --git a/src/main/java/BiGpairSEQ.java b/src/main/java/BiGpairSEQ.java index 7b3cb95..1f9483a 100644 --- a/src/main/java/BiGpairSEQ.java +++ b/src/main/java/BiGpairSEQ.java @@ -13,7 +13,7 @@ public class BiGpairSEQ { private static boolean cacheCells = false; private static boolean cachePlate = false; private static boolean cacheGraph = false; - private static String priorityQueueHeapType = "FIBONACCI"; + private static HeapType priorityQueueHeapType = HeapType.FIBONACCI; private static boolean outputBinary = true; private static boolean outputGraphML = false; private static final String version = "version 2.0"; @@ -157,15 +157,15 @@ public class BiGpairSEQ { } public static String getPriorityQueueHeapType() { - return priorityQueueHeapType; + return priorityQueueHeapType.name(); } public static void setPairingHeap() { - priorityQueueHeapType = "PAIRING"; + priorityQueueHeapType = HeapType.PAIRING; } public static void setFibonacciHeap() { - priorityQueueHeapType = "FIBONACCI"; + priorityQueueHeapType = HeapType.FIBONACCI; } public static boolean outputBinary() {return outputBinary;} diff --git a/src/main/java/CellSample.java b/src/main/java/CellSample.java index cfb8c0b..95b6e39 100644 --- a/src/main/java/CellSample.java +++ b/src/main/java/CellSample.java @@ -13,8 +13,12 @@ public class CellSample { List numbersCDR3 = new ArrayList<>(); List numbersCDR1 = new ArrayList<>(); Integer numDistCDR3s = 2 * numDistinctCells + 1; + //Assign consecutive integers for each CDR3. This ensures they are all unique. IntStream.range(1, numDistCDR3s + 1).forEach(i -> numbersCDR3.add(i)); + //After all CDR3s are assigned, start assigning consecutive integers to CDR1s + //There will usually be fewer integers in the CDR1 list, which will allow repeats below IntStream.range(numDistCDR3s + 1, numDistCDR3s + 1 + (numDistCDR3s / cdr1Freq) + 1).forEach(i -> numbersCDR1.add(i)); + //randomize the order of the numbers in the lists Collections.shuffle(numbersCDR3); Collections.shuffle(numbersCDR1); @@ -22,11 +26,15 @@ public class CellSample { //two CDR3s, and two CDR1s. First two values are CDR3s (alpha, beta), second two are CDR1s (alpha, beta) List distinctCells = new ArrayList<>(); for(int i = 0; i < numbersCDR3.size() - 1; i = i + 2){ + //Go through entire CDR3 list once, make pairs of alphas and betas Integer tmpCDR3a = numbersCDR3.get(i); Integer tmpCDR3b = numbersCDR3.get(i+1); + //Go through (likely shorter) CDR1 list as many times as necessary, make pairs of alphas and betas Integer tmpCDR1a = numbersCDR1.get(i % numbersCDR1.size()); Integer tmpCDR1b = numbersCDR1.get((i+1) % numbersCDR1.size()); + //Make the array representing the cell Integer[] tmp = {tmpCDR3a, tmpCDR3b, tmpCDR1a, tmpCDR1b}; + //Add the cell to the list of distinct cells distinctCells.add(tmp); } this.cells = distinctCells; diff --git a/src/main/java/GraphModificationFunctions.java b/src/main/java/GraphModificationFunctions.java index a4ab1a0..4b5591a 100644 --- a/src/main/java/GraphModificationFunctions.java +++ b/src/main/java/GraphModificationFunctions.java @@ -2,23 +2,25 @@ import org.jgrapht.graph.DefaultWeightedEdge; import org.jgrapht.graph.SimpleWeightedGraph; import java.util.ArrayList; +import java.util.HashMap; import java.util.List; import java.util.Map; public interface GraphModificationFunctions { - //remove over- and under-weight edges - static List filterByOverlapThresholds(SimpleWeightedGraph graph, + //remove over- and under-weight edges, return removed edges + static Map filterByOverlapThresholds(SimpleWeightedGraph graph, int low, int high, boolean saveEdges) { - List removedEdges = new ArrayList<>(); + Map removedEdges = new HashMap<>(); + //List removedEdges = new ArrayList<>(); for (DefaultWeightedEdge e : graph.edgeSet()) { if ((graph.getEdgeWeight(e) > high) || (graph.getEdgeWeight(e) < low)) { if(saveEdges) { - Integer source = graph.getEdgeSource(e); - Integer target = graph.getEdgeTarget(e); + Vertex source = graph.getEdgeSource(e); + Vertex target = graph.getEdgeTarget(e); Integer weight = (int) graph.getEdgeWeight(e); - Integer[] edge = {source, target, weight}; - removedEdges.add(edge); + Vertex[] edge = {source, target}; + removedEdges.put(edge, weight); } else { graph.setEdgeWeight(e, 0.0); @@ -26,31 +28,27 @@ public interface GraphModificationFunctions { } } if(saveEdges) { - for (Integer[] edge : removedEdges) { + for (Vertex[] edge : removedEdges.keySet()) { graph.removeEdge(edge[0], edge[1]); } } return removedEdges; } - //Remove edges for pairs with large occupancy discrepancy - static List filterByRelativeOccupancy(SimpleWeightedGraph graph, - Map alphaWellCounts, - Map betaWellCounts, - Map plateVtoAMap, - Map plateVtoBMap, + //Remove edges for pairs with large occupancy discrepancy, return removed edges + static Map filterByRelativeOccupancy(SimpleWeightedGraph graph, Integer maxOccupancyDifference, boolean saveEdges) { - List removedEdges = new ArrayList<>(); + Map removedEdges = new HashMap<>(); for (DefaultWeightedEdge e : graph.edgeSet()) { - Integer alphaOcc = alphaWellCounts.get(plateVtoAMap.get(graph.getEdgeSource(e))); - Integer betaOcc = betaWellCounts.get(plateVtoBMap.get(graph.getEdgeTarget(e))); + Integer alphaOcc = graph.getEdgeSource(e).getOccupancy(); + Integer betaOcc = graph.getEdgeTarget(e).getOccupancy(); if (Math.abs(alphaOcc - betaOcc) >= maxOccupancyDifference) { if (saveEdges) { - Integer source = graph.getEdgeSource(e); - Integer target = graph.getEdgeTarget(e); + Vertex source = graph.getEdgeSource(e); + Vertex target = graph.getEdgeTarget(e); Integer weight = (int) graph.getEdgeWeight(e); - Integer[] edge = {source, target, weight}; - removedEdges.add(edge); + Vertex[] edge = {source, target}; + removedEdges.put(edge, weight); } else { graph.setEdgeWeight(e, 0.0); @@ -58,34 +56,30 @@ public interface GraphModificationFunctions { } } if(saveEdges) { - for (Integer[] edge : removedEdges) { + for (Vertex[] edge : removedEdges.keySet()) { graph.removeEdge(edge[0], edge[1]); } } return removedEdges; } - //Remove edges for pairs where overlap size is significantly lower than the well occupancy - static List filterByOverlapPercent(SimpleWeightedGraph graph, - Map alphaWellCounts, - Map betaWellCounts, - Map plateVtoAMap, - Map plateVtoBMap, + //Remove edges for pairs where overlap size is significantly lower than the well occupancy, return removed edges + static Map filterByOverlapPercent(SimpleWeightedGraph graph, Integer minOverlapPercent, boolean saveEdges) { - List removedEdges = new ArrayList<>(); + Map removedEdges = new HashMap<>(); for (DefaultWeightedEdge e : graph.edgeSet()) { - Integer alphaOcc = alphaWellCounts.get(plateVtoAMap.get(graph.getEdgeSource(e))); - Integer betaOcc = betaWellCounts.get(plateVtoBMap.get(graph.getEdgeTarget(e))); + Integer alphaOcc = graph.getEdgeSource(e).getOccupancy(); + Integer betaOcc = graph.getEdgeTarget(e).getOccupancy(); double weight = graph.getEdgeWeight(e); double min = minOverlapPercent / 100.0; if ((weight / alphaOcc < min) || (weight / betaOcc < min)) { - if(saveEdges) { - Integer source = graph.getEdgeSource(e); - Integer target = graph.getEdgeTarget(e); + if (saveEdges) { + Vertex source = graph.getEdgeSource(e); + Vertex target = graph.getEdgeTarget(e); Integer intWeight = (int) graph.getEdgeWeight(e); - Integer[] edge = {source, target, intWeight}; - removedEdges.add(edge); + Vertex[] edge = {source, target}; + removedEdges.put(edge, intWeight); } else { graph.setEdgeWeight(e, 0.0); @@ -93,18 +87,18 @@ public interface GraphModificationFunctions { } } if(saveEdges) { - for (Integer[] edge : removedEdges) { + for (Vertex[] edge : removedEdges.keySet()) { graph.removeEdge(edge[0], edge[1]); } } return removedEdges; } - static void addRemovedEdges(SimpleWeightedGraph graph, - List removedEdges) { - for (Integer[] edge : removedEdges) { + static void addRemovedEdges(SimpleWeightedGraph graph, + Map removedEdges) { + for (Vertex[] edge : removedEdges.keySet()) { DefaultWeightedEdge e = graph.addEdge(edge[0], edge[1]); - graph.setEdgeWeight(e, (double) edge[2]); + graph.setEdgeWeight(e, removedEdges.get(edge)); } } diff --git a/src/main/java/GraphWithMapData.java b/src/main/java/GraphWithMapData.java index 3795190..aad6c2f 100644 --- a/src/main/java/GraphWithMapData.java +++ b/src/main/java/GraphWithMapData.java @@ -6,6 +6,7 @@ import java.util.Map; //Can't just write the graph, because I need the occupancy data too. //Makes most sense to serialize object and write that to a file. //Which means there's no reason to split map data and graph data up. +//Custom vertex class means a lot of the map data can now be encoded in the graph itself public class GraphWithMapData implements java.io.Serializable { private String sourceFilename; @@ -15,32 +16,33 @@ public class GraphWithMapData implements java.io.Serializable { private Integer alphaCount; private Integer betaCount; private final Map distCellsMapAlphaKey; - private final Map plateVtoAMap; - private final Map plateVtoBMap; - private final Map plateAtoVMap; - private final Map plateBtoVMap; - private final Map alphaWellCounts; - private final Map betaWellCounts; +// private final Map plateVtoAMap; +// private final Map plateVtoBMap; +// private final Map plateAtoVMap; +// private final Map plateBtoVMap; +// private final Map alphaWellCounts; +// private final Map betaWellCounts; private final Duration time; public GraphWithMapData(SimpleWeightedGraph graph, Integer numWells, Integer[] wellConcentrations, - Integer alphaCount, Integer betaCount, - Map distCellsMapAlphaKey, Map plateVtoAMap, - Map plateVtoBMap, Map plateAtoVMap, - Map plateBtoVMap, Map alphaWellCounts, - Map betaWellCounts, Duration time) { + Map distCellsMapAlphaKey, Integer alphaCount, Integer betaCount, Duration time){ + +// Map plateVtoAMap, +// Map plateVtoBMap, Map plateAtoVMap, +// Map plateBtoVMap, Map alphaWellCounts, +// Map betaWellCounts,) { this.graph = graph; this.numWells = numWells; this.wellPopulations = wellConcentrations; this.alphaCount = alphaCount; this.betaCount = betaCount; this.distCellsMapAlphaKey = distCellsMapAlphaKey; - this.plateVtoAMap = plateVtoAMap; - this.plateVtoBMap = plateVtoBMap; - this.plateAtoVMap = plateAtoVMap; - this.plateBtoVMap = plateBtoVMap; - this.alphaWellCounts = alphaWellCounts; - this.betaWellCounts = betaWellCounts; +// this.plateVtoAMap = plateVtoAMap; +// this.plateVtoBMap = plateVtoBMap; +// this.plateAtoVMap = plateAtoVMap; +// this.plateBtoVMap = plateBtoVMap; +// this.alphaWellCounts = alphaWellCounts; +// this.betaWellCounts = betaWellCounts; this.time = time; } @@ -68,29 +70,29 @@ public class GraphWithMapData implements java.io.Serializable { return distCellsMapAlphaKey; } - public Map getPlateVtoAMap() { - return plateVtoAMap; - } - - public Map getPlateVtoBMap() { - return plateVtoBMap; - } - - public Map getPlateAtoVMap() { - return plateAtoVMap; - } - - public Map getPlateBtoVMap() { - return plateBtoVMap; - } - - public Map getAlphaWellCounts() { - return alphaWellCounts; - } - - public Map getBetaWellCounts() { - return betaWellCounts; - } +// public Map getPlateVtoAMap() { +// return plateVtoAMap; +// } +// +// public Map getPlateVtoBMap() { +// return plateVtoBMap; +// } +// +// public Map getPlateAtoVMap() { +// return plateAtoVMap; +// } +// +// public Map getPlateBtoVMap() { +// return plateBtoVMap; +// } +// +// public Map getAlphaWellCounts() { +// return alphaWellCounts; +// } +// +// public Map getBetaWellCounts() { +// return betaWellCounts; +// } public Duration getTime() { return time; diff --git a/src/main/java/HeapType.java b/src/main/java/HeapType.java new file mode 100644 index 0000000..a37c219 --- /dev/null +++ b/src/main/java/HeapType.java @@ -0,0 +1,4 @@ +public enum HeapType { + FIBONACCI, + PAIRING +} diff --git a/src/main/java/Plate.java b/src/main/java/Plate.java index 3bdc713..83d1c3b 100644 --- a/src/main/java/Plate.java +++ b/src/main/java/Plate.java @@ -158,9 +158,9 @@ public class Plate { //returns a map of the counts of the sequence at cell index sIndex, in a range of wells public Map assayWellsSequenceS(int start, int end, int... sIndices) { Map assay = new HashMap<>(); - for(int pIndex: sIndices){ + for(int sIndex: sIndices){ for(int i = start; i < end; i++){ - countSequences(assay, wells.get(i), pIndex); + countSequences(assay, wells.get(i), sIndex); } } return assay; @@ -169,6 +169,7 @@ public class Plate { private void countSequences(Map wellMap, List well, int... sIndices) { for(Integer[] cell : well) { for(int sIndex: sIndices){ + //skip dropout sequences, which have value -1 if(cell[sIndex] != -1){ wellMap.merge(cell[sIndex], 1, (oldValue, newValue) -> oldValue + newValue); } diff --git a/src/main/java/Simulator.java b/src/main/java/Simulator.java index 03bb9e2..ede14e8 100644 --- a/src/main/java/Simulator.java +++ b/src/main/java/Simulator.java @@ -18,36 +18,36 @@ import static java.lang.Float.*; //NOTE: "sequence" in method and variable names refers to a peptide sequence from a simulated T cell public class Simulator implements GraphModificationFunctions { - private static final int cdr3AlphaIndex = 0; - private static final int cdr3BetaIndex = 1; - private static final int cdr1AlphaIndex = 2; - private static final int cdr1BetaIndex = 3; - //Make the graph needed for matching CDR3s + //Make the graph needed for matching sequences. + //sourceVertexIndices and targetVertexIndices are indices within the cell to use as for the two sets of vertices + //in the bipartite graph. "Source" and "target" are JGraphT terms for the two vertices an edge touches, + //even if not directed. public static GraphWithMapData makeGraph(CellSample cellSample, Plate samplePlate, boolean verbose) { Instant start = Instant.now(); List distinctCells = cellSample.getCells(); - int[] alphaIndex = {cdr3AlphaIndex}; - int[] betaIndex = {cdr3BetaIndex}; + int[] alphaIndices = {SequenceType.CDR3_ALPHA.ordinal()}; + int[] betaIndices = {SequenceType.CDR3_BETA.ordinal()}; int numWells = samplePlate.getSize(); if(verbose){System.out.println("Making cell maps");} //HashMap keyed to Alphas, values Betas - Map distCellsMapAlphaKey = makeSequenceToSequenceMap(distinctCells, cdr3AlphaIndex, cdr3BetaIndex); + Map distCellsMapAlphaKey = makeSequenceToSequenceMap(distinctCells, 0, 1); if(verbose){System.out.println("Cell maps made");} if(verbose){System.out.println("Making well maps");} - Map allAlphas = samplePlate.assayWellsSequenceS(alphaIndex); - Map allBetas = samplePlate.assayWellsSequenceS(betaIndex); + + Map allAlphas = samplePlate.assayWellsSequenceS(alphaIndices); + Map allBetas = samplePlate.assayWellsSequenceS(betaIndices); int alphaCount = allAlphas.size(); if(verbose){System.out.println("All alphas count: " + alphaCount);} int betaCount = allBetas.size(); if(verbose){System.out.println("All betas count: " + betaCount);} if(verbose){System.out.println("Well maps made");} - + //ideally we wouldn't do any graph pre-filtering. But sequences present in all wells add a huge number of edges to the graph and don't carry any signal value if(verbose){System.out.println("Removing sequences present in all wells.");} filterByOccupancyThresholds(allAlphas, 1, numWells - 1); filterByOccupancyThresholds(allBetas, 1, numWells - 1); @@ -80,29 +80,46 @@ public class Simulator implements GraphModificationFunctions { //(technically this is only 1/4 of an adjacency matrix, but that's all you need //for a bipartite graph, and all the SimpleWeightedBipartiteGraphMatrixGenerator class expects.) if(verbose){System.out.println("Creating adjacency matrix");} - //Count how many wells each alpha appears in + //Count how many wells each alpha sequence appears in Map alphaWellCounts = new HashMap<>(); - //count how many wells each beta appears in + //count how many wells each beta sequence appears in Map betaWellCounts = new HashMap<>(); //the adjacency matrix to be used by the graph generator double[][] weights = new double[plateVtoAMap.size()][plateVtoBMap.size()]; countSequencesAndFillMatrix(samplePlate, allAlphas, allBetas, plateAtoVMap, - plateBtoVMap, alphaIndex, betaIndex, alphaWellCounts, betaWellCounts, weights); + plateBtoVMap, alphaIndices, betaIndices, alphaWellCounts, betaWellCounts, weights); if(verbose){System.out.println("Matrix created");} //create bipartite graph if(verbose){System.out.println("Creating graph");} //the graph object - SimpleWeightedGraph graph = + SimpleWeightedGraph graph = new SimpleWeightedGraph<>(DefaultWeightedEdge.class); //the graph generator SimpleWeightedBipartiteGraphMatrixGenerator graphGenerator = new SimpleWeightedBipartiteGraphMatrixGenerator(); //the list of alpha vertices - List alphaVertices = new ArrayList<>(plateVtoAMap.keySet()); //This will work because LinkedHashMap preserves order of entry + //List alphaVertices = new ArrayList<>(plateVtoAMap.keySet()); //This will work because LinkedHashMap preserves order of entry + List alphaVertices = new ArrayList<>(); + //start with map of all alphas mapped to vertex values, get occupancy from the alphaWellCounts map + for (Integer seq : plateAtoVMap.keySet()) { + Vertex alphaVertex = new Vertex(SequenceType.CDR3_ALPHA, seq, alphaWellCounts.get(seq), plateAtoVMap.get(seq)); + alphaVertices.add(alphaVertex); + } + //Sort to make sure the order of vertices in list matches the order of the adjacency matrix + Collections.sort(alphaVertices); + //Add ordered list of vertices to the graph graphGenerator.first(alphaVertices); //the list of beta vertices - List betaVertices = new ArrayList<>(plateVtoBMap.keySet()); - graphGenerator.second(betaVertices); //This will work because LinkedHashMap preserves order of entry + //List betaVertices = new ArrayList<>(plateVtoBMap.keySet());//This will work because LinkedHashMap preserves order of entry + List betaVertices = new ArrayList<>(); + for (Integer seq : plateBtoVMap.keySet()) { + Vertex betaVertex = new Vertex(SequenceType.CDR3_BETA, seq, betaWellCounts.get(seq), plateBtoVMap.get(seq)); + betaVertices.add(betaVertex); + } + //Sort to make sure the order of vertices in list matches the order of the adjacency matrix + Collections.sort(betaVertices); + //Add ordered list of vertices to the graph + graphGenerator.second(betaVertices); //use adjacency matrix of weight created previously graphGenerator.weights(weights); graphGenerator.generateGraph(graph); @@ -112,9 +129,7 @@ public class Simulator implements GraphModificationFunctions { Duration time = Duration.between(start, stop); //create GraphWithMapData object - GraphWithMapData output = new GraphWithMapData(graph, numWells, samplePlate.getPopulations(), alphaCount, betaCount, - distCellsMapAlphaKey, plateVtoAMap, plateVtoBMap, plateAtoVMap, - plateBtoVMap, alphaWellCounts, betaWellCounts, time); + GraphWithMapData output = new GraphWithMapData(graph, numWells, samplePlate.getPopulations(), distCellsMapAlphaKey, alphaCount, betaCount, time); //Set source file name in graph to name of sample plate output.setSourceFilename(samplePlate.getFilename()); //return GraphWithMapData object @@ -126,60 +141,67 @@ public class Simulator implements GraphModificationFunctions { Integer highThreshold, Integer maxOccupancyDifference, Integer minOverlapPercent, boolean verbose) { Instant start = Instant.now(); - List removedEdges = new ArrayList<>(); + SimpleWeightedGraph graph = data.getGraph(); + Map removedEdges = new HashMap<>(); boolean saveEdges = BiGpairSEQ.cacheGraph(); int numWells = data.getNumWells(); - Integer alphaCount = data.getAlphaCount(); - Integer betaCount = data.getBetaCount(); + //Integer alphaCount = data.getAlphaCount(); + //Integer betaCount = data.getBetaCount(); Map distCellsMapAlphaKey = data.getDistCellsMapAlphaKey(); - Map plateVtoAMap = data.getPlateVtoAMap(); - Map plateVtoBMap = data.getPlateVtoBMap(); - Map alphaWellCounts = data.getAlphaWellCounts(); - Map betaWellCounts = data.getBetaWellCounts(); - SimpleWeightedGraph graph = data.getGraph(); + Set alphas = new HashSet<>(); + Set betas = new HashSet<>(); + for(Vertex v: graph.vertexSet()) { + if (SequenceType.CDR3_ALPHA.equals(v.getType())){ + alphas.add(v); + } + else { + betas.add(v); + } + } + Integer graphAlphaCount = alphas.size(); + Integer graphBetaCount = betas.size(); //remove edges with weights outside given overlap thresholds, add those to removed edge list if(verbose){System.out.println("Eliminating edges with weights outside overlap threshold values");} - removedEdges.addAll(GraphModificationFunctions.filterByOverlapThresholds(graph, lowThreshold, highThreshold, saveEdges)); + removedEdges.putAll(GraphModificationFunctions.filterByOverlapThresholds(graph, lowThreshold, highThreshold, saveEdges)); if(verbose){System.out.println("Over- and under-weight edges removed");} //remove edges between vertices with too small an overlap size, add those to removed edge list if(verbose){System.out.println("Eliminating edges with weights less than " + minOverlapPercent.toString() + " percent of vertex occupancy value.");} - removedEdges.addAll(GraphModificationFunctions.filterByOverlapPercent(graph, alphaWellCounts, betaWellCounts, - plateVtoAMap, plateVtoBMap, minOverlapPercent, saveEdges)); + removedEdges.putAll(GraphModificationFunctions.filterByOverlapPercent(graph, minOverlapPercent, saveEdges)); if(verbose){System.out.println("Edges with weights too far below a vertex occupancy value removed");} //Filter by relative occupancy if(verbose){System.out.println("Eliminating edges between vertices with occupancy difference > " + maxOccupancyDifference);} - removedEdges.addAll(GraphModificationFunctions.filterByRelativeOccupancy(graph, alphaWellCounts, betaWellCounts, - plateVtoAMap, plateVtoBMap, maxOccupancyDifference, saveEdges)); + removedEdges.putAll(GraphModificationFunctions.filterByRelativeOccupancy(graph, maxOccupancyDifference, saveEdges)); if(verbose){System.out.println("Edges between vertices of with excessively different occupancy values " + "removed");} - //Find Maximum Weighted Matching - if(verbose){System.out.println("Finding maximum weighted matching");} + //Find Maximum Weight Matching + //using jheaps library class PairingHeap for improved efficiency + if(verbose){System.out.println("Finding maximum weight matching");} MaximumWeightBipartiteMatching maxWeightMatching; //Use correct heap type for priority queue String heapType = BiGpairSEQ.getPriorityQueueHeapType(); switch (heapType) { case "PAIRING" -> { maxWeightMatching = new MaximumWeightBipartiteMatching(graph, - plateVtoAMap.keySet(), - plateVtoBMap.keySet(), + alphas, + betas, i -> new PairingHeap(Comparator.naturalOrder())); } case "FIBONACCI" -> { maxWeightMatching = new MaximumWeightBipartiteMatching(graph, - plateVtoAMap.keySet(), - plateVtoBMap.keySet(), + alphas, + betas, i -> new FibonacciHeap(Comparator.naturalOrder())); } default -> { maxWeightMatching = new MaximumWeightBipartiteMatching(graph, - plateVtoAMap.keySet(), - plateVtoBMap.keySet()); + alphas, + betas); } } //get the matching @@ -209,11 +231,14 @@ public class Simulator implements GraphModificationFunctions { Map matchMap = new HashMap<>(); while(weightIter.hasNext()) { e = weightIter.next(); - Integer source = graph.getEdgeSource(e); - Integer target = graph.getEdgeTarget(e); + Vertex source = graph.getEdgeSource(e); + Vertex target = graph.getEdgeTarget(e); + //Integer source = graph.getEdgeSource(e); + //Integer target = graph.getEdgeTarget(e); //The match map is all matches found, not just true matches! - matchMap.put(plateVtoAMap.get(source), plateVtoBMap.get(target)); - check = plateVtoBMap.get(target).equals(distCellsMapAlphaKey.get(plateVtoAMap.get(source))); + matchMap.put(source.getSequence(), target.getSequence()); + check = target.getSequence().equals(distCellsMapAlphaKey.get(source.getSequence())); + //check = plateVtoBMap.get(target).equals(distCellsMapAlphaKey.get(plateVtoAMap.get(source))); if(check) { trueCount++; } @@ -221,17 +246,19 @@ public class Simulator implements GraphModificationFunctions { falseCount++; } List result = new ArrayList<>(); - result.add(plateVtoAMap.get(source).toString()); + //alpha sequence + result.add(source.getSequence().toString()); //alpha well count - result.add(alphaWellCounts.get(plateVtoAMap.get(source)).toString()); - result.add(plateVtoBMap.get(target).toString()); + result.add(source.getOccupancy().toString()); + //beta sequence + result.add(target.getSequence().toString()); //beta well count - result.add(betaWellCounts.get(plateVtoBMap.get(target)).toString()); + result.add(target.getOccupancy().toString()); //overlap count result.add(Double.toString(graph.getEdgeWeight(e))); result.add(Boolean.toString(check)); - double pValue = Equations.pValue(numWells, alphaWellCounts.get(plateVtoAMap.get(source)), - betaWellCounts.get(plateVtoBMap.get(target)), graph.getEdgeWeight(e)); + double pValue = Equations.pValue(numWells, source.getOccupancy(), + target.getOccupancy(), graph.getEdgeWeight(e)); BigDecimal pValueTrunc = new BigDecimal(pValue, mc); result.add(pValueTrunc.toString()); allResults.add(result); @@ -239,7 +266,7 @@ public class Simulator implements GraphModificationFunctions { //Metadata comments for CSV file String algoType = "LEDA book with heap: " + heapType; - int min = Math.min(alphaCount, betaCount); + int min = Math.min(graphAlphaCount, graphBetaCount); //matching weight BigDecimal totalMatchingWeight = maxWeightMatching.getMatchingWeight(); //rate of attempted matching @@ -274,8 +301,10 @@ public class Simulator implements GraphModificationFunctions { metadata.put("algorithm type", algoType); metadata.put("matching weight", totalMatchingWeight.toString()); metadata.put("well populations", wellPopulationsString); - metadata.put("total alphas found", alphaCount.toString()); - metadata.put("total betas found", betaCount.toString()); + metadata.put("total alphas on plate", data.getAlphaCount().toString()); + metadata.put("total betas on plate", data.getBetaCount().toString()); + metadata.put("alphas in graph (after pre-filtering)", graphAlphaCount.toString()); + metadata.put("betas in graph (after pre-filtering)", graphBetaCount.toString()); metadata.put("high overlap threshold", highThreshold.toString()); metadata.put("low overlap threshold", lowThreshold.toString()); metadata.put("minimum overlap percent", minOverlapPercent.toString()); diff --git a/src/main/java/Vertex.java b/src/main/java/Vertex.java index ef962ae..35719b8 100644 --- a/src/main/java/Vertex.java +++ b/src/main/java/Vertex.java @@ -1,23 +1,97 @@ +import java.io.Serializable; +public class Vertex implements Serializable, Comparable { + private SequenceType type; + private Integer vertexLabel; + private Integer sequence; + private Integer occupancy; -public class Vertex { - private final Integer vertexLabel; - private final Integer sequence; - private final Integer occupancy; + public Vertex(Integer vertexLabel) { + this.vertexLabel = vertexLabel; + } + public Vertex(String vertexLabel) { + this.vertexLabel = Integer.parseInt((vertexLabel)); + } - public Vertex(Integer vertexLabel, Integer sequence, Integer occupancy) { + public Vertex(SequenceType type, Integer sequence, Integer occupancy, Integer vertexLabel) { + this.type = type; this.vertexLabel = vertexLabel; this.sequence = sequence; this.occupancy = occupancy; } - public Integer getVertexLabel() { return vertexLabel; } + + public SequenceType getType() { + return type; + } + + public void setType(String type) { + this.type = SequenceType.valueOf(type); + } + + public Integer getVertexLabel() { + return vertexLabel; + } + + public void setVertexLabel(String label) { + this.vertexLabel = Integer.parseInt(label); + } public Integer getSequence() { + return sequence; } + public void setSequence(String sequence) { + this.sequence = Integer.parseInt(sequence); + } + public Integer getOccupancy() { return occupancy; } + + public void setOccupancy(String occupancy) { + this.occupancy = Integer.parseInt(occupancy); + } + + @Override //adapted from JGraphT example code + public int hashCode() + { + return (sequence == null) ? 0 : sequence.hashCode(); + } + + @Override //adapted from JGraphT example code + public boolean equals(Object obj) + { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + Vertex other = (Vertex) obj; + if (sequence == null) { + return other.sequence == null; + } else { + return sequence.equals(other.sequence); + } + } + + + @Override //adapted from JGraphT example code + public String toString() + { + StringBuilder sb = new StringBuilder(); + sb.append("(").append(vertexLabel) + .append(", Type: ").append(type.name()) + .append(", Sequence: ").append(sequence) + .append(", Occupancy: ").append(occupancy).append(")"); + return sb.toString(); + } + + @Override + public int compareTo(Vertex other) { + return this.vertexLabel - other.getVertexLabel(); + } + }