From b604b1d3cd6a7a5f7e13dc88366c081de331ac8b Mon Sep 17 00:00:00 2001 From: efischer Date: Sat, 26 Feb 2022 06:19:08 -0600 Subject: [PATCH 01/18] Changing graph to use Vertex class --- src/main/java/CellSample.java | 8 +++++ src/main/java/InteractiveInterface.java | 2 +- src/main/java/Plate.java | 5 +-- src/main/java/SequenceType.java | 8 +++++ src/main/java/Simulator.java | 47 +++++++++++++++++-------- src/main/java/Vertex.java | 6 +++- 6 files changed, 57 insertions(+), 19 deletions(-) create mode 100644 src/main/java/SequenceType.java diff --git a/src/main/java/CellSample.java b/src/main/java/CellSample.java index cfb8c0b..95b6e39 100644 --- a/src/main/java/CellSample.java +++ b/src/main/java/CellSample.java @@ -13,8 +13,12 @@ public class CellSample { List numbersCDR3 = new ArrayList<>(); List numbersCDR1 = new ArrayList<>(); Integer numDistCDR3s = 2 * numDistinctCells + 1; + //Assign consecutive integers for each CDR3. This ensures they are all unique. IntStream.range(1, numDistCDR3s + 1).forEach(i -> numbersCDR3.add(i)); + //After all CDR3s are assigned, start assigning consecutive integers to CDR1s + //There will usually be fewer integers in the CDR1 list, which will allow repeats below IntStream.range(numDistCDR3s + 1, numDistCDR3s + 1 + (numDistCDR3s / cdr1Freq) + 1).forEach(i -> numbersCDR1.add(i)); + //randomize the order of the numbers in the lists Collections.shuffle(numbersCDR3); Collections.shuffle(numbersCDR1); @@ -22,11 +26,15 @@ public class CellSample { //two CDR3s, and two CDR1s. First two values are CDR3s (alpha, beta), second two are CDR1s (alpha, beta) List distinctCells = new ArrayList<>(); for(int i = 0; i < numbersCDR3.size() - 1; i = i + 2){ + //Go through entire CDR3 list once, make pairs of alphas and betas Integer tmpCDR3a = numbersCDR3.get(i); Integer tmpCDR3b = numbersCDR3.get(i+1); + //Go through (likely shorter) CDR1 list as many times as necessary, make pairs of alphas and betas Integer tmpCDR1a = numbersCDR1.get(i % numbersCDR1.size()); Integer tmpCDR1b = numbersCDR1.get((i+1) % numbersCDR1.size()); + //Make the array representing the cell Integer[] tmp = {tmpCDR3a, tmpCDR3b, tmpCDR1a, tmpCDR1b}; + //Add the cell to the list of distinct cells distinctCells.add(tmp); } this.cells = distinctCells; diff --git a/src/main/java/InteractiveInterface.java b/src/main/java/InteractiveInterface.java index 044e03b..0ab30b2 100644 --- a/src/main/java/InteractiveInterface.java +++ b/src/main/java/InteractiveInterface.java @@ -307,7 +307,7 @@ public class InteractiveInterface { } else{ List cells = cellSample.getCells(); - GraphWithMapData data = Simulator.makeGraph(cells, plate, true); + GraphWithMapData data = Simulator.makeCDR3Graph(cells, plate, true); assert filename != null; if(BiGpairSEQ.outputBinary()) { GraphDataObjectWriter dataWriter = new GraphDataObjectWriter(filename, data); diff --git a/src/main/java/Plate.java b/src/main/java/Plate.java index 5a13eff..7f70c43 100644 --- a/src/main/java/Plate.java +++ b/src/main/java/Plate.java @@ -138,9 +138,9 @@ public class Plate { //returns a map of the counts of the sequence at cell index sIndex, in a range of wells public Map assayWellsSequenceS(int start, int end, int... sIndices) { Map assay = new HashMap<>(); - for(int pIndex: sIndices){ + for(int sIndex: sIndices){ for(int i = start; i < end; i++){ - countSequences(assay, wells.get(i), pIndex); + countSequences(assay, wells.get(i), sIndex); } } return assay; @@ -149,6 +149,7 @@ public class Plate { private void countSequences(Map wellMap, List well, int... sIndices) { for(Integer[] cell : well) { for(int sIndex: sIndices){ + //skip dropout sequences, which have value -1 if(cell[sIndex] != -1){ wellMap.merge(cell[sIndex], 1, (oldValue, newValue) -> oldValue + newValue); } diff --git a/src/main/java/SequenceType.java b/src/main/java/SequenceType.java new file mode 100644 index 0000000..54de083 --- /dev/null +++ b/src/main/java/SequenceType.java @@ -0,0 +1,8 @@ +//enum for tagging types of sequences +//Listed in order that they appear in a cell array, so ordinal() method will return correct index +public enum SequenceType { + CDR3_ALPHA, + CDR3_BETA, + CDR1_ALPHA, + CDR1_BETA +} diff --git a/src/main/java/Simulator.java b/src/main/java/Simulator.java index 6d3f6eb..c3d0397 100644 --- a/src/main/java/Simulator.java +++ b/src/main/java/Simulator.java @@ -18,18 +18,24 @@ import static java.lang.Float.*; //NOTE: "sequence" in method and variable names refers to a peptide sequence from a simulated T cell public class Simulator implements GraphModificationFunctions { + + //These are the indices of the different sequences within a cell array private static final int cdr3AlphaIndex = 0; private static final int cdr3BetaIndex = 1; private static final int cdr1AlphaIndex = 2; private static final int cdr1BetaIndex = 3; - //Make the graph needed for matching CDR3s - public static GraphWithMapData makeGraph(List distinctCells, Plate samplePlate, boolean verbose) { + //Make the graph needed for matching sequences. + //sourceVertexIndices and targetVertexIndices are indices within the cell to use as for the two sets of vertices + //in the bipartite graph. "Source" and "target" are JGraphT terms for the two vertices an edge touches, + //even if not directed. + public static GraphWithMapData makeCDR3Graph(List distinctCells, Plate samplePlate, boolean verbose) { Instant start = Instant.now(); - int[] alphaIndex = {cdr3AlphaIndex}; - int[] betaIndex = {cdr3BetaIndex}; int numWells = samplePlate.getSize(); + //The ordinal value of the sequence type enum is also that sequence's index in a cell array + int[] alphaIndices = {SequenceType.CDR3_ALPHA.ordinal()}; + int[] betaIndices = {SequenceType.CDR3_BETA.ordinal()}; if(verbose){System.out.println("Making cell maps");} //HashMap keyed to Alphas, values Betas @@ -37,8 +43,9 @@ public class Simulator implements GraphModificationFunctions { if(verbose){System.out.println("Cell maps made");} if(verbose){System.out.println("Making well maps");} - Map allAlphas = samplePlate.assayWellsSequenceS(alphaIndex); - Map allBetas = samplePlate.assayWellsSequenceS(betaIndex); + + Map allAlphas = samplePlate.assayWellsSequenceS(alphaIndices); + Map allBetas = samplePlate.assayWellsSequenceS(betaIndices); int alphaCount = allAlphas.size(); if(verbose){System.out.println("All alphas count: " + alphaCount);} int betaCount = allBetas.size(); @@ -77,29 +84,40 @@ public class Simulator implements GraphModificationFunctions { //(technically this is only 1/4 of an adjacency matrix, but that's all you need //for a bipartite graph, and all the SimpleWeightedBipartiteGraphMatrixGenerator class expects.) if(verbose){System.out.println("Creating adjacency matrix");} - //Count how many wells each alpha appears in + //Count how many wells each alpha sequence appears in Map alphaWellCounts = new HashMap<>(); - //count how many wells each beta appears in + //count how many wells each beta sequence appears in Map betaWellCounts = new HashMap<>(); //the adjacency matrix to be used by the graph generator double[][] weights = new double[plateVtoAMap.size()][plateVtoBMap.size()]; countSequencesAndFillMatrix(samplePlate, allAlphas, allBetas, plateAtoVMap, - plateBtoVMap, alphaIndex, betaIndex, alphaWellCounts, betaWellCounts, weights); + plateBtoVMap, alphaIndices, betaIndices, alphaWellCounts, betaWellCounts, weights); if(verbose){System.out.println("Matrix created");} //create bipartite graph if(verbose){System.out.println("Creating graph");} //the graph object - SimpleWeightedGraph graph = + SimpleWeightedGraph graph = new SimpleWeightedGraph<>(DefaultWeightedEdge.class); //the graph generator SimpleWeightedBipartiteGraphMatrixGenerator graphGenerator = new SimpleWeightedBipartiteGraphMatrixGenerator(); //the list of alpha vertices - List alphaVertices = new ArrayList<>(plateVtoAMap.keySet()); //This will work because LinkedHashMap preserves order of entry + //List alphaVertices = new ArrayList<>(plateVtoAMap.keySet()); //This will work because LinkedHashMap preserves order of entry + List alphaVertices = new ArrayList<>(); + //start with map of all alphas mapped to vertex values, get occupancy from the alphaWellCounts map + for (Integer seq: plateAtoVMap.keySet()) { + Vertex alphaVertex = new Vertex(SequenceType.CDR1_ALPHA, seq, alphaWellCounts.get(seq), plateAtoVMap.get(seq)); + alphaVertices.add(alphaVertex); + } graphGenerator.first(alphaVertices); //the list of beta vertices - List betaVertices = new ArrayList<>(plateVtoBMap.keySet()); - graphGenerator.second(betaVertices); //This will work because LinkedHashMap preserves order of entry + //List betaVertices = new ArrayList<>(plateVtoBMap.keySet());//This will work because LinkedHashMap preserves order of entry + List betaVertices = new ArrayList<>(); + for (Integer seq : plateBtoVMap.keySet()) { + Vertex betaVertex = new Vertex(SequenceType.CDR3_BETA, seq, betaWellCounts.get(seq), plateBtoVMap.get(seq)); + betaVertices.add(betaVertex); + } + graphGenerator.second(betaVertices); //use adjacency matrix of weight created previously graphGenerator.weights(weights); graphGenerator.generateGraph(graph); @@ -653,7 +671,6 @@ public class Simulator implements GraphModificationFunctions { } } } - } } @@ -668,7 +685,7 @@ public class Simulator implements GraphModificationFunctions { private static Map makeVertexToSequenceMap(Map sequences, Integer startValue) { Map map = new LinkedHashMap<>(); //LinkedHashMap to preserve order of entry - Integer index = startValue; //is this necessary? I don't think I use this. + Integer index = startValue; for (Integer k: sequences.keySet()) { map.put(index, k); index++; diff --git a/src/main/java/Vertex.java b/src/main/java/Vertex.java index ef962ae..32627ae 100644 --- a/src/main/java/Vertex.java +++ b/src/main/java/Vertex.java @@ -1,16 +1,20 @@ public class Vertex { + private final SequenceType type; private final Integer vertexLabel; private final Integer sequence; private final Integer occupancy; - public Vertex(Integer vertexLabel, Integer sequence, Integer occupancy) { + public Vertex(SequenceType type, Integer sequence, Integer occupancy, Integer vertexLabel) { + this.type = type; this.vertexLabel = vertexLabel; this.sequence = sequence; this.occupancy = occupancy; } + public SequenceType getType() { return type; } + public Integer getVertexLabel() { return vertexLabel; } public Integer getSequence() { From f032d3e852506fb80568242a088d4d29dc39538d Mon Sep 17 00:00:00 2001 From: efischer Date: Sat, 26 Feb 2022 07:34:07 -0600 Subject: [PATCH 02/18] rewrite GraphML importer/exporter --- src/main/java/GraphMLFileReader.java | 8 ++- src/main/java/GraphMLFileWriter.java | 28 +++++++++- src/main/java/Vertex.java | 84 ++++++++++++++++++++++++---- 3 files changed, 105 insertions(+), 15 deletions(-) diff --git a/src/main/java/GraphMLFileReader.java b/src/main/java/GraphMLFileReader.java index 151c078..fc75f3b 100644 --- a/src/main/java/GraphMLFileReader.java +++ b/src/main/java/GraphMLFileReader.java @@ -1,3 +1,4 @@ +import org.jgrapht.graph.DefaultWeightedEdge; import org.jgrapht.graph.SimpleWeightedGraph; import org.jgrapht.nio.graphml.GraphMLImporter; @@ -21,7 +22,12 @@ public class GraphMLFileReader { try(//don't need to close reader bc of try-with-resources auto-closing BufferedReader reader = Files.newBufferedReader(Path.of(filename)); ){ - GraphMLImporter importer = new GraphMLImporter<>(); + GraphMLImporter importer = new GraphMLImporter<>(); + importer.addVertexWithAttributesConsumer((vertex, attributes) -> { + vertex.setType(attributes.get("type").getValue()); + vertex.setSequence(attributes.get("sequence").getValue()); + vertex.setOccupancy((attributes.get("occupancy").getValue())); + }); importer.importGraph(graph, reader); } catch (IOException ex) { diff --git a/src/main/java/GraphMLFileWriter.java b/src/main/java/GraphMLFileWriter.java index ca5e89d..a2f0a5a 100644 --- a/src/main/java/GraphMLFileWriter.java +++ b/src/main/java/GraphMLFileWriter.java @@ -1,12 +1,18 @@ +import org.jgrapht.graph.DefaultWeightedEdge; import org.jgrapht.graph.SimpleWeightedGraph; -import org.jgrapht.nio.dot.DOTExporter; +import org.jgrapht.nio.Attribute; +import org.jgrapht.nio.AttributeType; +import org.jgrapht.nio.DefaultAttribute; import org.jgrapht.nio.graphml.GraphMLExporter; +import org.jgrapht.nio.graphml.GraphMLExporter.AttributeCategory; import java.io.BufferedWriter; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.StandardOpenOption; +import java.util.HashMap; +import java.util.Map; public class GraphMLFileWriter { @@ -14,7 +20,7 @@ public class GraphMLFileWriter { SimpleWeightedGraph graph; - public GraphMLFileWriter(String filename, SimpleWeightedGraph graph) { + public GraphMLFileWriter(String filename, SimpleWeightedGraph graph) { if(!filename.matches(".*\\.graphml")){ filename = filename + ".graphml"; } @@ -25,7 +31,23 @@ public class GraphMLFileWriter { public void writeGraphToFile() { try(BufferedWriter writer = Files.newBufferedWriter(Path.of(filename), StandardOpenOption.CREATE_NEW); ){ - GraphMLExporter exporter = new GraphMLExporter<>(); + //create exporter. Let the vertex labels be the unique ids for the vertices + GraphMLExporter> exporter = new GraphMLExporter<>(Vertex::getVertexLabel); + //set to export weights + exporter.setExportEdgeWeights(true); + //set type, sequnce, and occupancy attributes for each vertex + exporter.setVertexAttributeProvider( v -> { + Map attributes = new HashMap<>(); + attributes.put("type", DefaultAttribute.createAttribute(v.getType())); + attributes.put("sequence", DefaultAttribute.createAttribute(v.getSequence())); + attributes.put("occupancy", DefaultAttribute.createAttribute(v.getOccupancy())); + return attributes; + }); + //register the attributes + exporter.registerAttribute("type", AttributeCategory.NODE, AttributeType.STRING); + exporter.registerAttribute("sequence", AttributeCategory.NODE, AttributeType.STRING); + exporter.registerAttribute("occupancy", AttributeCategory.NODE, AttributeType.STRING); + //export the graph exporter.exportGraph(graph, writer); } catch(IOException ex){ System.out.println("Could not make new file named "+filename); diff --git a/src/main/java/Vertex.java b/src/main/java/Vertex.java index 32627ae..74b2cac 100644 --- a/src/main/java/Vertex.java +++ b/src/main/java/Vertex.java @@ -1,10 +1,14 @@ public class Vertex { - private final SequenceType type; - private final Integer vertexLabel; - private final Integer sequence; - private final Integer occupancy; + private SequenceType type; + private Integer vertexLabel; + private Integer sequence; + private Integer occupancy; + + public Vertex(String vertexLabel) { + this.vertexLabel = Integer.parseInt((vertexLabel)); + } public Vertex(SequenceType type, Integer sequence, Integer occupancy, Integer vertexLabel) { this.type = type; @@ -13,15 +17,73 @@ public class Vertex { this.occupancy = occupancy; } - public SequenceType getType() { return type; } - public Integer getVertexLabel() { return vertexLabel; } - - public Integer getSequence() { - return sequence; + public String getType() { + return type.name(); } - public Integer getOccupancy() { - return occupancy; + public void setType(String type) { + this.type = SequenceType.valueOf(type); } + + public String getVertexLabel() { + return vertexLabel.toString(); + } + + public void setVertexLabel(String label) { + this.vertexLabel = Integer.parseInt(label); + } + + public String getSequence() { + + return sequence.toString(); + } + + public void setSequence(String sequence) { + this.sequence = Integer.parseInt(sequence); + } + + public String getOccupancy() { + return occupancy.toString(); + } + + public void setOccupancy(String occupancy) { + this.occupancy = Integer.parseInt(occupancy); + } + + @Override //adapted from JGraphT example code + public int hashCode() + { + return (sequence == null) ? 0 : sequence.hashCode(); + } + + @Override //adapted from JGraphT example code + public boolean equals(Object obj) + { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + Vertex other = (Vertex) obj; + if (sequence == null) { + return other.sequence == null; + } else { + return sequence.equals(other.sequence); + } + } + + + @Override //adapted from JGraphT example code + public String toString() + { + StringBuilder sb = new StringBuilder(); + sb.append("(").append(vertexLabel) + .append(", Type: ").append(type.name()) + .append(", Sequence: ").append(sequence) + .append(", Occupancy: ").append(occupancy).append(")"); + return sb.toString(); + } + } From 7b03a3cce8324e373d0d041a55883b79cb001f29 Mon Sep 17 00:00:00 2001 From: efischer Date: Sat, 26 Feb 2022 07:35:34 -0600 Subject: [PATCH 03/18] bugfix --- src/main/java/Simulator.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/Simulator.java b/src/main/java/Simulator.java index c3d0397..f63b5b8 100644 --- a/src/main/java/Simulator.java +++ b/src/main/java/Simulator.java @@ -106,7 +106,7 @@ public class Simulator implements GraphModificationFunctions { List alphaVertices = new ArrayList<>(); //start with map of all alphas mapped to vertex values, get occupancy from the alphaWellCounts map for (Integer seq: plateAtoVMap.keySet()) { - Vertex alphaVertex = new Vertex(SequenceType.CDR1_ALPHA, seq, alphaWellCounts.get(seq), plateAtoVMap.get(seq)); + Vertex alphaVertex = new Vertex(SequenceType.CDR3_ALPHA, seq, alphaWellCounts.get(seq), plateAtoVMap.get(seq)); alphaVertices.add(alphaVertex); } graphGenerator.first(alphaVertices); From ab437512e9460421b5d27fda184744fc34815d50 Mon Sep 17 00:00:00 2001 From: efischer Date: Sat, 26 Feb 2022 07:45:36 -0600 Subject: [PATCH 04/18] make Vertex serializable --- src/main/java/GraphMLFileWriter.java | 2 +- src/main/java/Vertex.java | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/main/java/GraphMLFileWriter.java b/src/main/java/GraphMLFileWriter.java index a2f0a5a..b767a8f 100644 --- a/src/main/java/GraphMLFileWriter.java +++ b/src/main/java/GraphMLFileWriter.java @@ -35,7 +35,7 @@ public class GraphMLFileWriter { GraphMLExporter> exporter = new GraphMLExporter<>(Vertex::getVertexLabel); //set to export weights exporter.setExportEdgeWeights(true); - //set type, sequnce, and occupancy attributes for each vertex + //set type, sequence, and occupancy attributes for each vertex exporter.setVertexAttributeProvider( v -> { Map attributes = new HashMap<>(); attributes.put("type", DefaultAttribute.createAttribute(v.getType())); diff --git a/src/main/java/Vertex.java b/src/main/java/Vertex.java index 74b2cac..a801b87 100644 --- a/src/main/java/Vertex.java +++ b/src/main/java/Vertex.java @@ -1,6 +1,6 @@ +import java.io.Serializable; - -public class Vertex { +public class Vertex implements Serializable { private SequenceType type; private Integer vertexLabel; private Integer sequence; From fb8d8d87855ada81d95b23732795a1f9ea21811e Mon Sep 17 00:00:00 2001 From: efischer Date: Sat, 26 Feb 2022 08:15:31 -0600 Subject: [PATCH 05/18] make heap type an enum --- src/main/java/BiGpairSEQ.java | 12 ++++++------ src/main/java/HeapType.java | 4 ++++ 2 files changed, 10 insertions(+), 6 deletions(-) create mode 100644 src/main/java/HeapType.java diff --git a/src/main/java/BiGpairSEQ.java b/src/main/java/BiGpairSEQ.java index 935888c..5a894ed 100644 --- a/src/main/java/BiGpairSEQ.java +++ b/src/main/java/BiGpairSEQ.java @@ -13,9 +13,9 @@ public class BiGpairSEQ { private static boolean cacheCells = false; private static boolean cachePlate = false; private static boolean cacheGraph = false; - private static String priorityQueueHeapType = "FIBONACCI"; - private static boolean outputBinary = true; - private static boolean outputGraphML = false; + private static HeapType priorityQueueHeapType = HeapType.FIBONACCI; + private static boolean outputBinary = false; + private static boolean outputGraphML = true; public static void main(String[] args) { if (args.length == 0) { @@ -156,15 +156,15 @@ public class BiGpairSEQ { } public static String getPriorityQueueHeapType() { - return priorityQueueHeapType; + return priorityQueueHeapType.name(); } public static void setPairingHeap() { - priorityQueueHeapType = "PAIRING"; + priorityQueueHeapType = HeapType.PAIRING; } public static void setFibonacciHeap() { - priorityQueueHeapType = "FIBONACCI"; + priorityQueueHeapType = HeapType.FIBONACCI; } public static boolean outputBinary() {return outputBinary;} diff --git a/src/main/java/HeapType.java b/src/main/java/HeapType.java new file mode 100644 index 0000000..a37c219 --- /dev/null +++ b/src/main/java/HeapType.java @@ -0,0 +1,4 @@ +public enum HeapType { + FIBONACCI, + PAIRING +} From b3dc10f2870aafed048cdb5bb83f99a8a3e877d4 Mon Sep 17 00:00:00 2001 From: efischer Date: Sat, 26 Feb 2022 08:15:48 -0600 Subject: [PATCH 06/18] add graph attributes to graphml writer --- src/main/java/GraphMLFileWriter.java | 30 ++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/src/main/java/GraphMLFileWriter.java b/src/main/java/GraphMLFileWriter.java index b767a8f..f357828 100644 --- a/src/main/java/GraphMLFileWriter.java +++ b/src/main/java/GraphMLFileWriter.java @@ -18,7 +18,18 @@ public class GraphMLFileWriter { String filename; SimpleWeightedGraph graph; + GraphWithMapData data; + Map graphAttributes = new HashMap<>(); + public GraphMLFileWriter(String filename, GraphWithMapData data) { + if(!filename.matches(".*\\.graphml")){ + filename = filename + ".graphml"; + } + this.filename = filename; + this.data = data; + this.graph = data.getGraph(); + createGraphAttributes(); + } public GraphMLFileWriter(String filename, SimpleWeightedGraph graph) { if(!filename.matches(".*\\.graphml")){ @@ -28,6 +39,23 @@ public class GraphMLFileWriter { this.graph = graph; } + private void createGraphAttributes(){ + //Sample plate filename + graphAttributes.put("sample plate filename", DefaultAttribute.createAttribute(data.getSourceFilename())); + // Number of wells + graphAttributes.put("well count", DefaultAttribute.createAttribute(data.getNumWells().toString())); + //Well populations + Integer[] wellPopulations = data.getWellPopulations(); + StringBuilder populationsStringBuilder = new StringBuilder(); + populationsStringBuilder.append(wellPopulations[0].toString()); + for(int i = 1; i < wellPopulations.length; i++){ + populationsStringBuilder.append(", "); + populationsStringBuilder.append(wellPopulations[i].toString()); + } + String wellPopulationsString = populationsStringBuilder.toString(); + graphAttributes.put("well populations", DefaultAttribute.createAttribute(wellPopulationsString)); + } + public void writeGraphToFile() { try(BufferedWriter writer = Files.newBufferedWriter(Path.of(filename), StandardOpenOption.CREATE_NEW); ){ @@ -35,6 +63,8 @@ public class GraphMLFileWriter { GraphMLExporter> exporter = new GraphMLExporter<>(Vertex::getVertexLabel); //set to export weights exporter.setExportEdgeWeights(true); + //Set graph attributes + exporter.setGraphAttributeProvider( () -> graphAttributes); //set type, sequence, and occupancy attributes for each vertex exporter.setVertexAttributeProvider( v -> { Map attributes = new HashMap<>(); From 75b2aa955301a585c52609ada8d907de0cfbc874 Mon Sep 17 00:00:00 2001 From: efischer Date: Sat, 26 Feb 2022 08:58:52 -0600 Subject: [PATCH 07/18] testing graph attributes --- src/main/java/BiGpairSEQ.java | 4 ++-- src/main/java/GraphMLFileReader.java | 14 ++++++++++++-- src/main/java/GraphMLFileWriter.java | 18 ++++++++++++------ src/main/java/InteractiveInterface.java | 2 +- 4 files changed, 27 insertions(+), 11 deletions(-) diff --git a/src/main/java/BiGpairSEQ.java b/src/main/java/BiGpairSEQ.java index 5a894ed..23798b7 100644 --- a/src/main/java/BiGpairSEQ.java +++ b/src/main/java/BiGpairSEQ.java @@ -14,8 +14,8 @@ public class BiGpairSEQ { private static boolean cachePlate = false; private static boolean cacheGraph = false; private static HeapType priorityQueueHeapType = HeapType.FIBONACCI; - private static boolean outputBinary = false; - private static boolean outputGraphML = true; + private static boolean outputBinary = true; + private static boolean outputGraphML = false; public static void main(String[] args) { if (args.length == 0) { diff --git a/src/main/java/GraphMLFileReader.java b/src/main/java/GraphMLFileReader.java index fc75f3b..9b9d550 100644 --- a/src/main/java/GraphMLFileReader.java +++ b/src/main/java/GraphMLFileReader.java @@ -6,11 +6,14 @@ import java.io.BufferedReader; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; +import java.util.HashMap; +import java.util.Map; public class GraphMLFileReader { - private String filename; - private SimpleWeightedGraph graph; + private final String filename; + private final SimpleWeightedGraph graph; + private final Map graphAttributes = new HashMap<>(); public GraphMLFileReader(String filename, SimpleWeightedGraph graph) { if(!filename.matches(".*\\.graphml")){ @@ -23,6 +26,9 @@ public class GraphMLFileReader { BufferedReader reader = Files.newBufferedReader(Path.of(filename)); ){ GraphMLImporter importer = new GraphMLImporter<>(); + importer.addGraphAttributeConsumer((str, attribute) -> { + graphAttributes.put(str, attribute.getValue()); + }); importer.addVertexWithAttributesConsumer((vertex, attributes) -> { vertex.setType(attributes.get("type").getValue()); vertex.setSequence(attributes.get("sequence").getValue()); @@ -38,4 +44,8 @@ public class GraphMLFileReader { public SimpleWeightedGraph getGraph() { return graph; } + public Map getGraphAttributes() { return graphAttributes; } + + public String getFilename() {return filename;} + } diff --git a/src/main/java/GraphMLFileWriter.java b/src/main/java/GraphMLFileWriter.java index f357828..3d6c8fb 100644 --- a/src/main/java/GraphMLFileWriter.java +++ b/src/main/java/GraphMLFileWriter.java @@ -5,6 +5,7 @@ import org.jgrapht.nio.AttributeType; import org.jgrapht.nio.DefaultAttribute; import org.jgrapht.nio.graphml.GraphMLExporter; import org.jgrapht.nio.graphml.GraphMLExporter.AttributeCategory; +import org.w3c.dom.Attr; import java.io.BufferedWriter; import java.io.IOException; @@ -19,7 +20,7 @@ public class GraphMLFileWriter { String filename; SimpleWeightedGraph graph; GraphWithMapData data; - Map graphAttributes = new HashMap<>(); + Map graphAttributes; public GraphMLFileWriter(String filename, GraphWithMapData data) { if(!filename.matches(".*\\.graphml")){ @@ -28,7 +29,7 @@ public class GraphMLFileWriter { this.filename = filename; this.data = data; this.graph = data.getGraph(); - createGraphAttributes(); + graphAttributes = createGraphAttributes(); } public GraphMLFileWriter(String filename, SimpleWeightedGraph graph) { @@ -39,11 +40,12 @@ public class GraphMLFileWriter { this.graph = graph; } - private void createGraphAttributes(){ + private Map createGraphAttributes(){ + Map ga = new HashMap<>(); //Sample plate filename - graphAttributes.put("sample plate filename", DefaultAttribute.createAttribute(data.getSourceFilename())); + ga.put("sample plate filename", DefaultAttribute.createAttribute(data.getSourceFilename())); // Number of wells - graphAttributes.put("well count", DefaultAttribute.createAttribute(data.getNumWells().toString())); + ga.put("well count", DefaultAttribute.createAttribute(data.getNumWells().toString())); //Well populations Integer[] wellPopulations = data.getWellPopulations(); StringBuilder populationsStringBuilder = new StringBuilder(); @@ -53,7 +55,8 @@ public class GraphMLFileWriter { populationsStringBuilder.append(wellPopulations[i].toString()); } String wellPopulationsString = populationsStringBuilder.toString(); - graphAttributes.put("well populations", DefaultAttribute.createAttribute(wellPopulationsString)); + ga.put("well populations", DefaultAttribute.createAttribute(wellPopulationsString)); + return ga; } public void writeGraphToFile() { @@ -74,6 +77,9 @@ public class GraphMLFileWriter { return attributes; }); //register the attributes + for(String s : graphAttributes.keySet()) { + exporter.registerAttribute(s, AttributeCategory.GRAPH, AttributeType.STRING); + } exporter.registerAttribute("type", AttributeCategory.NODE, AttributeType.STRING); exporter.registerAttribute("sequence", AttributeCategory.NODE, AttributeType.STRING); exporter.registerAttribute("occupancy", AttributeCategory.NODE, AttributeType.STRING); diff --git a/src/main/java/InteractiveInterface.java b/src/main/java/InteractiveInterface.java index 0ab30b2..776f433 100644 --- a/src/main/java/InteractiveInterface.java +++ b/src/main/java/InteractiveInterface.java @@ -315,7 +315,7 @@ public class InteractiveInterface { System.out.println("Serialized binary graph/data file written to: " + filename); } if(BiGpairSEQ.outputGraphML()) { - GraphMLFileWriter graphMLWriter = new GraphMLFileWriter(filename, data.getGraph()); + GraphMLFileWriter graphMLWriter = new GraphMLFileWriter(filename, data); graphMLWriter.writeGraphToFile(); System.out.println("GraphML file written to: " + filename); } From 1ea68045ce3fd06a4de296970c72c28aae45358b Mon Sep 17 00:00:00 2001 From: efischer Date: Sat, 26 Feb 2022 09:49:16 -0600 Subject: [PATCH 08/18] Refactor cdr3 matching to use new Vertex class --- src/main/java/GraphMLFileWriter.java | 4 +- src/main/java/GraphModificationFunctions.java | 70 +++++++++---------- src/main/java/Simulator.java | 63 ++++++++++------- src/main/java/Vertex.java | 19 ++--- 4 files changed, 81 insertions(+), 75 deletions(-) diff --git a/src/main/java/GraphMLFileWriter.java b/src/main/java/GraphMLFileWriter.java index 3d6c8fb..8411502 100644 --- a/src/main/java/GraphMLFileWriter.java +++ b/src/main/java/GraphMLFileWriter.java @@ -63,7 +63,7 @@ public class GraphMLFileWriter { try(BufferedWriter writer = Files.newBufferedWriter(Path.of(filename), StandardOpenOption.CREATE_NEW); ){ //create exporter. Let the vertex labels be the unique ids for the vertices - GraphMLExporter> exporter = new GraphMLExporter<>(Vertex::getVertexLabel); + GraphMLExporter> exporter = new GraphMLExporter<>(v -> v.getVertexLabel().toString()); //set to export weights exporter.setExportEdgeWeights(true); //Set graph attributes @@ -71,7 +71,7 @@ public class GraphMLFileWriter { //set type, sequence, and occupancy attributes for each vertex exporter.setVertexAttributeProvider( v -> { Map attributes = new HashMap<>(); - attributes.put("type", DefaultAttribute.createAttribute(v.getType())); + attributes.put("type", DefaultAttribute.createAttribute(v.getType().name())); attributes.put("sequence", DefaultAttribute.createAttribute(v.getSequence())); attributes.put("occupancy", DefaultAttribute.createAttribute(v.getOccupancy())); return attributes; diff --git a/src/main/java/GraphModificationFunctions.java b/src/main/java/GraphModificationFunctions.java index a4ab1a0..68cc9d2 100644 --- a/src/main/java/GraphModificationFunctions.java +++ b/src/main/java/GraphModificationFunctions.java @@ -2,23 +2,25 @@ import org.jgrapht.graph.DefaultWeightedEdge; import org.jgrapht.graph.SimpleWeightedGraph; import java.util.ArrayList; +import java.util.HashMap; import java.util.List; import java.util.Map; public interface GraphModificationFunctions { //remove over- and under-weight edges - static List filterByOverlapThresholds(SimpleWeightedGraph graph, + static Map filterByOverlapThresholds(SimpleWeightedGraph graph, int low, int high, boolean saveEdges) { - List removedEdges = new ArrayList<>(); + Map removedEdges = new HashMap<>(); + //List removedEdges = new ArrayList<>(); for (DefaultWeightedEdge e : graph.edgeSet()) { if ((graph.getEdgeWeight(e) > high) || (graph.getEdgeWeight(e) < low)) { if(saveEdges) { - Integer source = graph.getEdgeSource(e); - Integer target = graph.getEdgeTarget(e); + Vertex source = graph.getEdgeSource(e); + Vertex target = graph.getEdgeTarget(e); Integer weight = (int) graph.getEdgeWeight(e); - Integer[] edge = {source, target, weight}; - removedEdges.add(edge); + Vertex[] edge = {source, target}; + removedEdges.put(edge, weight); } else { graph.setEdgeWeight(e, 0.0); @@ -26,7 +28,7 @@ public interface GraphModificationFunctions { } } if(saveEdges) { - for (Integer[] edge : removedEdges) { + for (Vertex[] edge : removedEdges.keySet()) { graph.removeEdge(edge[0], edge[1]); } } @@ -34,23 +36,19 @@ public interface GraphModificationFunctions { } //Remove edges for pairs with large occupancy discrepancy - static List filterByRelativeOccupancy(SimpleWeightedGraph graph, - Map alphaWellCounts, - Map betaWellCounts, - Map plateVtoAMap, - Map plateVtoBMap, + static Map filterByRelativeOccupancy(SimpleWeightedGraph graph, Integer maxOccupancyDifference, boolean saveEdges) { - List removedEdges = new ArrayList<>(); + Map removedEdges = new HashMap<>(); for (DefaultWeightedEdge e : graph.edgeSet()) { - Integer alphaOcc = alphaWellCounts.get(plateVtoAMap.get(graph.getEdgeSource(e))); - Integer betaOcc = betaWellCounts.get(plateVtoBMap.get(graph.getEdgeTarget(e))); + Integer alphaOcc = graph.getEdgeSource(e).getOccupancy(); + Integer betaOcc = graph.getEdgeTarget(e).getOccupancy(); if (Math.abs(alphaOcc - betaOcc) >= maxOccupancyDifference) { if (saveEdges) { - Integer source = graph.getEdgeSource(e); - Integer target = graph.getEdgeTarget(e); + Vertex source = graph.getEdgeSource(e); + Vertex target = graph.getEdgeTarget(e); Integer weight = (int) graph.getEdgeWeight(e); - Integer[] edge = {source, target, weight}; - removedEdges.add(edge); + Vertex[] edge = {source, target}; + removedEdges.put(edge, weight); } else { graph.setEdgeWeight(e, 0.0); @@ -58,7 +56,7 @@ public interface GraphModificationFunctions { } } if(saveEdges) { - for (Integer[] edge : removedEdges) { + for (Vertex[] edge : removedEdges.keySet()) { graph.removeEdge(edge[0], edge[1]); } } @@ -66,26 +64,22 @@ public interface GraphModificationFunctions { } //Remove edges for pairs where overlap size is significantly lower than the well occupancy - static List filterByOverlapPercent(SimpleWeightedGraph graph, - Map alphaWellCounts, - Map betaWellCounts, - Map plateVtoAMap, - Map plateVtoBMap, + static Map filterByOverlapPercent(SimpleWeightedGraph graph, Integer minOverlapPercent, boolean saveEdges) { - List removedEdges = new ArrayList<>(); + Map removedEdges = new HashMap<>(); for (DefaultWeightedEdge e : graph.edgeSet()) { - Integer alphaOcc = alphaWellCounts.get(plateVtoAMap.get(graph.getEdgeSource(e))); - Integer betaOcc = betaWellCounts.get(plateVtoBMap.get(graph.getEdgeTarget(e))); + Integer alphaOcc = graph.getEdgeSource(e).getOccupancy(); + Integer betaOcc = graph.getEdgeTarget(e).getOccupancy(); double weight = graph.getEdgeWeight(e); double min = minOverlapPercent / 100.0; if ((weight / alphaOcc < min) || (weight / betaOcc < min)) { - if(saveEdges) { - Integer source = graph.getEdgeSource(e); - Integer target = graph.getEdgeTarget(e); + if (saveEdges) { + Vertex source = graph.getEdgeSource(e); + Vertex target = graph.getEdgeTarget(e); Integer intWeight = (int) graph.getEdgeWeight(e); - Integer[] edge = {source, target, intWeight}; - removedEdges.add(edge); + Vertex[] edge = {source, target}; + removedEdges.put(edge, intWeight); } else { graph.setEdgeWeight(e, 0.0); @@ -93,18 +87,18 @@ public interface GraphModificationFunctions { } } if(saveEdges) { - for (Integer[] edge : removedEdges) { + for (Vertex[] edge : removedEdges.keySet()) { graph.removeEdge(edge[0], edge[1]); } } return removedEdges; } - static void addRemovedEdges(SimpleWeightedGraph graph, - List removedEdges) { - for (Integer[] edge : removedEdges) { + static void addRemovedEdges(SimpleWeightedGraph graph, + Map removedEdges) { + for (Vertex[] edge : removedEdges.keySet()) { DefaultWeightedEdge e = graph.addEdge(edge[0], edge[1]); - graph.setEdgeWeight(e, (double) edge[2]); + graph.setEdgeWeight(e, removedEdges.get(edge)); } } diff --git a/src/main/java/Simulator.java b/src/main/java/Simulator.java index f63b5b8..2d635d7 100644 --- a/src/main/java/Simulator.java +++ b/src/main/java/Simulator.java @@ -141,35 +141,39 @@ public class Simulator implements GraphModificationFunctions { Integer highThreshold, Integer maxOccupancyDifference, Integer minOverlapPercent, boolean verbose) { Instant start = Instant.now(); - List removedEdges = new ArrayList<>(); + SimpleWeightedGraph graph = data.getGraph(); + Map removedEdges = new HashMap<>(); boolean saveEdges = BiGpairSEQ.cacheGraph(); int numWells = data.getNumWells(); Integer alphaCount = data.getAlphaCount(); Integer betaCount = data.getBetaCount(); Map distCellsMapAlphaKey = data.getDistCellsMapAlphaKey(); - Map plateVtoAMap = data.getPlateVtoAMap(); - Map plateVtoBMap = data.getPlateVtoBMap(); - Map alphaWellCounts = data.getAlphaWellCounts(); - Map betaWellCounts = data.getBetaWellCounts(); - SimpleWeightedGraph graph = data.getGraph(); + Set alphas = new HashSet<>(); + Set betas = new HashSet<>(); + for(Vertex v: graph.vertexSet()) { + if (SequenceType.CDR3_ALPHA.equals(v.getType())){ + alphas.add(v); + } + else { + betas.add(v); + } + } //remove edges with weights outside given overlap thresholds, add those to removed edge list if(verbose){System.out.println("Eliminating edges with weights outside overlap threshold values");} - removedEdges.addAll(GraphModificationFunctions.filterByOverlapThresholds(graph, lowThreshold, highThreshold, saveEdges)); + removedEdges.putAll(GraphModificationFunctions.filterByOverlapThresholds(graph, lowThreshold, highThreshold, saveEdges)); if(verbose){System.out.println("Over- and under-weight edges removed");} //remove edges between vertices with too small an overlap size, add those to removed edge list if(verbose){System.out.println("Eliminating edges with weights less than " + minOverlapPercent.toString() + " percent of vertex occupancy value.");} - removedEdges.addAll(GraphModificationFunctions.filterByOverlapPercent(graph, alphaWellCounts, betaWellCounts, - plateVtoAMap, plateVtoBMap, minOverlapPercent, saveEdges)); + removedEdges.putAll(GraphModificationFunctions.filterByOverlapPercent(graph, minOverlapPercent, saveEdges)); if(verbose){System.out.println("Edges with weights too far below a vertex occupancy value removed");} //Filter by relative occupancy if(verbose){System.out.println("Eliminating edges between vertices with occupancy difference > " + maxOccupancyDifference);} - removedEdges.addAll(GraphModificationFunctions.filterByRelativeOccupancy(graph, alphaWellCounts, betaWellCounts, - plateVtoAMap, plateVtoBMap, maxOccupancyDifference, saveEdges)); + removedEdges.putAll(GraphModificationFunctions.filterByRelativeOccupancy(graph, maxOccupancyDifference, saveEdges)); if(verbose){System.out.println("Edges between vertices of with excessively different occupancy values " + "removed");} @@ -182,20 +186,20 @@ public class Simulator implements GraphModificationFunctions { switch (heapType) { case "PAIRING" -> { maxWeightMatching = new MaximumWeightBipartiteMatching(graph, - plateVtoAMap.keySet(), - plateVtoBMap.keySet(), + alphas, + betas, i -> new PairingHeap(Comparator.naturalOrder())); } case "FIBONACCI" -> { maxWeightMatching = new MaximumWeightBipartiteMatching(graph, - plateVtoAMap.keySet(), - plateVtoBMap.keySet(), + alphas, + betas, i -> new FibonacciHeap(Comparator.naturalOrder())); } default -> { maxWeightMatching = new MaximumWeightBipartiteMatching(graph, - plateVtoAMap.keySet(), - plateVtoBMap.keySet()); + alphas, + betas); } } //get the matching @@ -225,11 +229,14 @@ public class Simulator implements GraphModificationFunctions { Map matchMap = new HashMap<>(); while(weightIter.hasNext()) { e = weightIter.next(); - Integer source = graph.getEdgeSource(e); - Integer target = graph.getEdgeTarget(e); + Vertex source = graph.getEdgeSource(e); + Vertex target = graph.getEdgeTarget(e); + //Integer source = graph.getEdgeSource(e); + //Integer target = graph.getEdgeTarget(e); //The match map is all matches found, not just true matches! - matchMap.put(plateVtoAMap.get(source), plateVtoBMap.get(target)); - check = plateVtoBMap.get(target).equals(distCellsMapAlphaKey.get(plateVtoAMap.get(source))); + matchMap.put(source.getSequence(), target.getSequence()); + check = target.getOccupancy().equals(distCellsMapAlphaKey.get(source.getSequence())); + //check = plateVtoBMap.get(target).equals(distCellsMapAlphaKey.get(plateVtoAMap.get(source))); if(check) { trueCount++; } @@ -237,17 +244,19 @@ public class Simulator implements GraphModificationFunctions { falseCount++; } List result = new ArrayList<>(); - result.add(plateVtoAMap.get(source).toString()); + //alpha sequence + result.add(source.getSequence().toString()); //alpha well count - result.add(alphaWellCounts.get(plateVtoAMap.get(source)).toString()); - result.add(plateVtoBMap.get(target).toString()); + result.add(source.getOccupancy().toString()); + //beta sequence + result.add(target.getSequence().toString()); //beta well count - result.add(betaWellCounts.get(plateVtoBMap.get(target)).toString()); + result.add(target.getOccupancy().toString()); //overlap count result.add(Double.toString(graph.getEdgeWeight(e))); result.add(Boolean.toString(check)); - double pValue = Equations.pValue(numWells, alphaWellCounts.get(plateVtoAMap.get(source)), - betaWellCounts.get(plateVtoBMap.get(target)), graph.getEdgeWeight(e)); + double pValue = Equations.pValue(numWells, source.getOccupancy(), + target.getOccupancy(), graph.getEdgeWeight(e)); BigDecimal pValueTrunc = new BigDecimal(pValue, mc); result.add(pValueTrunc.toString()); allResults.add(result); diff --git a/src/main/java/Vertex.java b/src/main/java/Vertex.java index a801b87..a7ddda9 100644 --- a/src/main/java/Vertex.java +++ b/src/main/java/Vertex.java @@ -6,6 +6,9 @@ public class Vertex implements Serializable { private Integer sequence; private Integer occupancy; + public Vertex(Integer vertexLabel) { + this.vertexLabel = vertexLabel; + } public Vertex(String vertexLabel) { this.vertexLabel = Integer.parseInt((vertexLabel)); } @@ -18,33 +21,33 @@ public class Vertex implements Serializable { } - public String getType() { - return type.name(); + public SequenceType getType() { + return type; } public void setType(String type) { this.type = SequenceType.valueOf(type); } - public String getVertexLabel() { - return vertexLabel.toString(); + public Integer getVertexLabel() { + return vertexLabel; } public void setVertexLabel(String label) { this.vertexLabel = Integer.parseInt(label); } - public String getSequence() { + public Integer getSequence() { - return sequence.toString(); + return sequence; } public void setSequence(String sequence) { this.sequence = Integer.parseInt(sequence); } - public String getOccupancy() { - return occupancy.toString(); + public Integer getOccupancy() { + return occupancy; } public void setOccupancy(String occupancy) { From 817fe51708baf3623fcf99bae3d61b67f5694ff8 Mon Sep 17 00:00:00 2001 From: efischer Date: Sat, 26 Feb 2022 09:56:46 -0600 Subject: [PATCH 09/18] Code cleanup --- src/main/java/GraphWithMapData.java | 95 +++++++++++++++-------------- src/main/java/Simulator.java | 21 ++++--- 2 files changed, 59 insertions(+), 57 deletions(-) diff --git a/src/main/java/GraphWithMapData.java b/src/main/java/GraphWithMapData.java index 3795190..0e4c09b 100644 --- a/src/main/java/GraphWithMapData.java +++ b/src/main/java/GraphWithMapData.java @@ -15,32 +15,33 @@ public class GraphWithMapData implements java.io.Serializable { private Integer alphaCount; private Integer betaCount; private final Map distCellsMapAlphaKey; - private final Map plateVtoAMap; - private final Map plateVtoBMap; - private final Map plateAtoVMap; - private final Map plateBtoVMap; - private final Map alphaWellCounts; - private final Map betaWellCounts; +// private final Map plateVtoAMap; +// private final Map plateVtoBMap; +// private final Map plateAtoVMap; +// private final Map plateBtoVMap; +// private final Map alphaWellCounts; +// private final Map betaWellCounts; private final Duration time; public GraphWithMapData(SimpleWeightedGraph graph, Integer numWells, Integer[] wellConcentrations, - Integer alphaCount, Integer betaCount, - Map distCellsMapAlphaKey, Map plateVtoAMap, - Map plateVtoBMap, Map plateAtoVMap, - Map plateBtoVMap, Map alphaWellCounts, - Map betaWellCounts, Duration time) { + Map distCellsMapAlphaKey, Duration time){ + +// Map plateVtoAMap, Integer alphaCount, Integer betaCount, +// Map plateVtoBMap, Map plateAtoVMap, +// Map plateBtoVMap, Map alphaWellCounts, +// Map betaWellCounts,) { this.graph = graph; this.numWells = numWells; this.wellPopulations = wellConcentrations; this.alphaCount = alphaCount; this.betaCount = betaCount; this.distCellsMapAlphaKey = distCellsMapAlphaKey; - this.plateVtoAMap = plateVtoAMap; - this.plateVtoBMap = plateVtoBMap; - this.plateAtoVMap = plateAtoVMap; - this.plateBtoVMap = plateBtoVMap; - this.alphaWellCounts = alphaWellCounts; - this.betaWellCounts = betaWellCounts; +// this.plateVtoAMap = plateVtoAMap; +// this.plateVtoBMap = plateVtoBMap; +// this.plateAtoVMap = plateAtoVMap; +// this.plateBtoVMap = plateBtoVMap; +// this.alphaWellCounts = alphaWellCounts; +// this.betaWellCounts = betaWellCounts; this.time = time; } @@ -56,41 +57,41 @@ public class GraphWithMapData implements java.io.Serializable { return wellPopulations; } - public Integer getAlphaCount() { - return alphaCount; - } - - public Integer getBetaCount() { - return betaCount; - } +// public Integer getAlphaCount() { +// return alphaCount; +// } +// +// public Integer getBetaCount() { +// return betaCount; +// } public Map getDistCellsMapAlphaKey() { return distCellsMapAlphaKey; } - public Map getPlateVtoAMap() { - return plateVtoAMap; - } - - public Map getPlateVtoBMap() { - return plateVtoBMap; - } - - public Map getPlateAtoVMap() { - return plateAtoVMap; - } - - public Map getPlateBtoVMap() { - return plateBtoVMap; - } - - public Map getAlphaWellCounts() { - return alphaWellCounts; - } - - public Map getBetaWellCounts() { - return betaWellCounts; - } +// public Map getPlateVtoAMap() { +// return plateVtoAMap; +// } +// +// public Map getPlateVtoBMap() { +// return plateVtoBMap; +// } +// +// public Map getPlateAtoVMap() { +// return plateAtoVMap; +// } +// +// public Map getPlateBtoVMap() { +// return plateBtoVMap; +// } +// +// public Map getAlphaWellCounts() { +// return alphaWellCounts; +// } +// +// public Map getBetaWellCounts() { +// return betaWellCounts; +// } public Duration getTime() { return time; diff --git a/src/main/java/Simulator.java b/src/main/java/Simulator.java index 2d635d7..5fb414b 100644 --- a/src/main/java/Simulator.java +++ b/src/main/java/Simulator.java @@ -19,11 +19,12 @@ import static java.lang.Float.*; //NOTE: "sequence" in method and variable names refers to a peptide sequence from a simulated T cell public class Simulator implements GraphModificationFunctions { - //These are the indices of the different sequences within a cell array - private static final int cdr3AlphaIndex = 0; - private static final int cdr3BetaIndex = 1; - private static final int cdr1AlphaIndex = 2; - private static final int cdr1BetaIndex = 3; + //Replaced with SequenceType ordinals +// //These are the indices of the different sequences within a cell array +// private static final int cdr3AlphaIndex = 0; +// private static final int cdr3BetaIndex = 1; +// private static final int cdr1AlphaIndex = 2; +// private static final int cdr1BetaIndex = 3; //Make the graph needed for matching sequences. //sourceVertexIndices and targetVertexIndices are indices within the cell to use as for the two sets of vertices @@ -127,9 +128,7 @@ public class Simulator implements GraphModificationFunctions { Duration time = Duration.between(start, stop); //create GraphWithMapData object - GraphWithMapData output = new GraphWithMapData(graph, numWells, samplePlate.getPopulations(), alphaCount, betaCount, - distCellsMapAlphaKey, plateVtoAMap, plateVtoBMap, plateAtoVMap, - plateBtoVMap, alphaWellCounts, betaWellCounts, time); + GraphWithMapData output = new GraphWithMapData(graph, numWells, samplePlate.getPopulations(), distCellsMapAlphaKey, time); //Set source file name in graph to name of sample plate output.setSourceFilename(samplePlate.getSourceFileName()); //return GraphWithMapData object @@ -145,8 +144,8 @@ public class Simulator implements GraphModificationFunctions { Map removedEdges = new HashMap<>(); boolean saveEdges = BiGpairSEQ.cacheGraph(); int numWells = data.getNumWells(); - Integer alphaCount = data.getAlphaCount(); - Integer betaCount = data.getBetaCount(); + //Integer alphaCount = data.getAlphaCount(); + //Integer betaCount = data.getBetaCount(); Map distCellsMapAlphaKey = data.getDistCellsMapAlphaKey(); Set alphas = new HashSet<>(); Set betas = new HashSet<>(); @@ -158,6 +157,8 @@ public class Simulator implements GraphModificationFunctions { betas.add(v); } } + Integer alphaCount = alphas.size(); + Integer betaCount = betas.size(); //remove edges with weights outside given overlap thresholds, add those to removed edge list if(verbose){System.out.println("Eliminating edges with weights outside overlap threshold values");} From 9ae38bf2473df36279ad446603b142c0354dff69 Mon Sep 17 00:00:00 2001 From: eugenefischer <66030419+eugenefischer@users.noreply.github.com> Date: Wed, 21 Sep 2022 15:59:23 -0500 Subject: [PATCH 10/18] Fix bug in correct match counter --- src/main/java/Simulator.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/Simulator.java b/src/main/java/Simulator.java index 5fb414b..08761c7 100644 --- a/src/main/java/Simulator.java +++ b/src/main/java/Simulator.java @@ -106,7 +106,7 @@ public class Simulator implements GraphModificationFunctions { //List alphaVertices = new ArrayList<>(plateVtoAMap.keySet()); //This will work because LinkedHashMap preserves order of entry List alphaVertices = new ArrayList<>(); //start with map of all alphas mapped to vertex values, get occupancy from the alphaWellCounts map - for (Integer seq: plateAtoVMap.keySet()) { + for (Integer seq : plateAtoVMap.keySet()) { Vertex alphaVertex = new Vertex(SequenceType.CDR3_ALPHA, seq, alphaWellCounts.get(seq), plateAtoVMap.get(seq)); alphaVertices.add(alphaVertex); } @@ -236,7 +236,7 @@ public class Simulator implements GraphModificationFunctions { //Integer target = graph.getEdgeTarget(e); //The match map is all matches found, not just true matches! matchMap.put(source.getSequence(), target.getSequence()); - check = target.getOccupancy().equals(distCellsMapAlphaKey.get(source.getSequence())); + check = target.getSequence().equals(distCellsMapAlphaKey.get(source.getSequence())); //check = plateVtoBMap.get(target).equals(distCellsMapAlphaKey.get(plateVtoAMap.get(source))); if(check) { trueCount++; From dea4972927c732613ddb091aaa707348520a7932 Mon Sep 17 00:00:00 2001 From: eugenefischer <66030419+eugenefischer@users.noreply.github.com> Date: Wed, 21 Sep 2022 16:09:08 -0500 Subject: [PATCH 11/18] remove prefiltering of singletons and saturating sequences --- src/main/java/Simulator.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/main/java/Simulator.java b/src/main/java/Simulator.java index 08761c7..b673ba3 100644 --- a/src/main/java/Simulator.java +++ b/src/main/java/Simulator.java @@ -53,10 +53,10 @@ public class Simulator implements GraphModificationFunctions { if(verbose){System.out.println("All betas count: " + betaCount);} if(verbose){System.out.println("Well maps made");} - if(verbose){System.out.println("Removing sequences present in all wells.");} - filterByOccupancyThresholds(allAlphas, 1, numWells - 1); - filterByOccupancyThresholds(allBetas, 1, numWells - 1); - if(verbose){System.out.println("Sequences removed");} +// if(verbose){System.out.println("Removing sequences present in all wells.");} +// filterByOccupancyThresholds(allAlphas, 1, numWells - 1); +// filterByOccupancyThresholds(allBetas, 1, numWells - 1); +// if(verbose){System.out.println("Sequences removed");} int pairableAlphaCount = allAlphas.size(); if(verbose){System.out.println("Remaining alphas count: " + pairableAlphaCount);} int pairableBetaCount = allBetas.size(); From 1971a964674e8685a21d2ad2120e4bb486515750 Mon Sep 17 00:00:00 2001 From: eugenefischer <66030419+eugenefischer@users.noreply.github.com> Date: Sun, 25 Sep 2022 14:55:43 -0500 Subject: [PATCH 12/18] Remove pre-filtering of singleton and saturating sequences --- src/main/java/Simulator.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/main/java/Simulator.java b/src/main/java/Simulator.java index 6ba2cbf..5deac67 100644 --- a/src/main/java/Simulator.java +++ b/src/main/java/Simulator.java @@ -48,9 +48,9 @@ public class Simulator implements GraphModificationFunctions { if(verbose){System.out.println("Well maps made");} - if(verbose){System.out.println("Removing singleton sequences and sequences present in all wells.");} - filterByOccupancyThresholds(allAlphas, 2, numWells - 1); - filterByOccupancyThresholds(allBetas, 2, numWells - 1); + //if(verbose){System.out.println("Removing singleton sequences and sequences present in all wells.");} + //filterByOccupancyThresholds(allAlphas, 2, numWells - 1); + //filterByOccupancyThresholds(allBetas, 2, numWells - 1); if(verbose){System.out.println("Sequences removed");} int pairableAlphaCount = allAlphas.size(); if(verbose){System.out.println("Remaining alphas count: " + pairableAlphaCount);} From 4f2fa4cbbef35e6623f3f4e00fb3fafdc27e5353 Mon Sep 17 00:00:00 2001 From: eugenefischer <66030419+eugenefischer@users.noreply.github.com> Date: Sun, 25 Sep 2022 15:19:56 -0500 Subject: [PATCH 13/18] Pre-filter saturating sequences only. Retaining singletons seems to improve matching accuracy in high sample rate test (well populations 10% of total cell sample size) --- src/main/java/Simulator.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/main/java/Simulator.java b/src/main/java/Simulator.java index 5deac67..c844d88 100644 --- a/src/main/java/Simulator.java +++ b/src/main/java/Simulator.java @@ -48,9 +48,9 @@ public class Simulator implements GraphModificationFunctions { if(verbose){System.out.println("Well maps made");} - //if(verbose){System.out.println("Removing singleton sequences and sequences present in all wells.");} - //filterByOccupancyThresholds(allAlphas, 2, numWells - 1); - //filterByOccupancyThresholds(allBetas, 2, numWells - 1); + if(verbose){System.out.println("Removing sequences present in all wells.");} + filterByOccupancyThresholds(allAlphas, 1, numWells - 1); + filterByOccupancyThresholds(allBetas, 1, numWells - 1); if(verbose){System.out.println("Sequences removed");} int pairableAlphaCount = allAlphas.size(); if(verbose){System.out.println("Remaining alphas count: " + pairableAlphaCount);} From 475bbf31073e6a7498ecfc3b6fb462348700e54a Mon Sep 17 00:00:00 2001 From: eugenefischer <66030419+eugenefischer@users.noreply.github.com> Date: Sun, 25 Sep 2022 15:54:28 -0500 Subject: [PATCH 14/18] Sort vertex lists by vertex label before making adjacency matrix --- src/main/java/Simulator.java | 2 ++ src/main/java/Vertex.java | 7 ++++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/src/main/java/Simulator.java b/src/main/java/Simulator.java index c844d88..d53ece6 100644 --- a/src/main/java/Simulator.java +++ b/src/main/java/Simulator.java @@ -105,6 +105,7 @@ public class Simulator implements GraphModificationFunctions { Vertex alphaVertex = new Vertex(SequenceType.CDR3_ALPHA, seq, alphaWellCounts.get(seq), plateAtoVMap.get(seq)); alphaVertices.add(alphaVertex); } + Collections.sort(alphaVertices); graphGenerator.first(alphaVertices); //the list of beta vertices //List betaVertices = new ArrayList<>(plateVtoBMap.keySet());//This will work because LinkedHashMap preserves order of entry @@ -113,6 +114,7 @@ public class Simulator implements GraphModificationFunctions { Vertex betaVertex = new Vertex(SequenceType.CDR3_BETA, seq, betaWellCounts.get(seq), plateBtoVMap.get(seq)); betaVertices.add(betaVertex); } + Collections.sort(betaVertices); graphGenerator.second(betaVertices); //use adjacency matrix of weight created previously graphGenerator.weights(weights); diff --git a/src/main/java/Vertex.java b/src/main/java/Vertex.java index a7ddda9..35719b8 100644 --- a/src/main/java/Vertex.java +++ b/src/main/java/Vertex.java @@ -1,6 +1,6 @@ import java.io.Serializable; -public class Vertex implements Serializable { +public class Vertex implements Serializable, Comparable { private SequenceType type; private Integer vertexLabel; private Integer sequence; @@ -89,4 +89,9 @@ public class Vertex implements Serializable { return sb.toString(); } + @Override + public int compareTo(Vertex other) { + return this.vertexLabel - other.getVertexLabel(); + } + } From 58fa140ee5bd62a8ecf9d1a3a26074fd990f4d8d Mon Sep 17 00:00:00 2001 From: eugenefischer <66030419+eugenefischer@users.noreply.github.com> Date: Sun, 25 Sep 2022 16:10:17 -0500 Subject: [PATCH 15/18] add comments --- src/main/java/GraphModificationFunctions.java | 6 +++--- src/main/java/Simulator.java | 6 +++++- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/main/java/GraphModificationFunctions.java b/src/main/java/GraphModificationFunctions.java index 68cc9d2..4b5591a 100644 --- a/src/main/java/GraphModificationFunctions.java +++ b/src/main/java/GraphModificationFunctions.java @@ -8,7 +8,7 @@ import java.util.Map; public interface GraphModificationFunctions { - //remove over- and under-weight edges + //remove over- and under-weight edges, return removed edges static Map filterByOverlapThresholds(SimpleWeightedGraph graph, int low, int high, boolean saveEdges) { Map removedEdges = new HashMap<>(); @@ -35,7 +35,7 @@ public interface GraphModificationFunctions { return removedEdges; } - //Remove edges for pairs with large occupancy discrepancy + //Remove edges for pairs with large occupancy discrepancy, return removed edges static Map filterByRelativeOccupancy(SimpleWeightedGraph graph, Integer maxOccupancyDifference, boolean saveEdges) { Map removedEdges = new HashMap<>(); @@ -63,7 +63,7 @@ public interface GraphModificationFunctions { return removedEdges; } - //Remove edges for pairs where overlap size is significantly lower than the well occupancy + //Remove edges for pairs where overlap size is significantly lower than the well occupancy, return removed edges static Map filterByOverlapPercent(SimpleWeightedGraph graph, Integer minOverlapPercent, boolean saveEdges) { diff --git a/src/main/java/Simulator.java b/src/main/java/Simulator.java index d53ece6..0d9e17b 100644 --- a/src/main/java/Simulator.java +++ b/src/main/java/Simulator.java @@ -47,7 +47,7 @@ public class Simulator implements GraphModificationFunctions { if(verbose){System.out.println("All betas count: " + betaCount);} if(verbose){System.out.println("Well maps made");} - + //ideally we wouldn't do any graph pre-filtering. But sequences present in all wells add a huge number of edges to the graph and don't carry any signal value if(verbose){System.out.println("Removing sequences present in all wells.");} filterByOccupancyThresholds(allAlphas, 1, numWells - 1); filterByOccupancyThresholds(allBetas, 1, numWells - 1); @@ -105,7 +105,9 @@ public class Simulator implements GraphModificationFunctions { Vertex alphaVertex = new Vertex(SequenceType.CDR3_ALPHA, seq, alphaWellCounts.get(seq), plateAtoVMap.get(seq)); alphaVertices.add(alphaVertex); } + //Sort to make sure the order of vertices in list matches the order of the adjacency matrix Collections.sort(alphaVertices); + //Add ordered list of vertices to the graph graphGenerator.first(alphaVertices); //the list of beta vertices //List betaVertices = new ArrayList<>(plateVtoBMap.keySet());//This will work because LinkedHashMap preserves order of entry @@ -114,7 +116,9 @@ public class Simulator implements GraphModificationFunctions { Vertex betaVertex = new Vertex(SequenceType.CDR3_BETA, seq, betaWellCounts.get(seq), plateBtoVMap.get(seq)); betaVertices.add(betaVertex); } + //Sort to make sure the order of vertices in list matches the order of the adjacency matrix Collections.sort(betaVertices); + //Add ordered list of vertices to the graph graphGenerator.second(betaVertices); //use adjacency matrix of weight created previously graphGenerator.weights(weights); From 8a77d53f1f24e5b9c306a86da8d9cf655f8f1684 Mon Sep 17 00:00:00 2001 From: eugenefischer <66030419+eugenefischer@users.noreply.github.com> Date: Sun, 25 Sep 2022 17:20:50 -0500 Subject: [PATCH 16/18] Output sequence counts before and after pre-filtering (currently pre-filtering only sequences present in all wells) --- src/main/java/GraphWithMapData.java | 18 +++++++++--------- src/main/java/Simulator.java | 14 ++++++++------ 2 files changed, 17 insertions(+), 15 deletions(-) diff --git a/src/main/java/GraphWithMapData.java b/src/main/java/GraphWithMapData.java index 8f9ab03..aad6c2f 100644 --- a/src/main/java/GraphWithMapData.java +++ b/src/main/java/GraphWithMapData.java @@ -25,9 +25,9 @@ public class GraphWithMapData implements java.io.Serializable { private final Duration time; public GraphWithMapData(SimpleWeightedGraph graph, Integer numWells, Integer[] wellConcentrations, - Map distCellsMapAlphaKey, Duration time){ + Map distCellsMapAlphaKey, Integer alphaCount, Integer betaCount, Duration time){ -// Map plateVtoAMap, Integer alphaCount, Integer betaCount, +// Map plateVtoAMap, // Map plateVtoBMap, Map plateAtoVMap, // Map plateBtoVMap, Map alphaWellCounts, // Map betaWellCounts,) { @@ -58,13 +58,13 @@ public class GraphWithMapData implements java.io.Serializable { return wellPopulations; } -// public Integer getAlphaCount() { -// return alphaCount; -// } -// -// public Integer getBetaCount() { -// return betaCount; -// } + public Integer getAlphaCount() { + return alphaCount; + } + + public Integer getBetaCount() { + return betaCount; + } public Map getDistCellsMapAlphaKey() { return distCellsMapAlphaKey; diff --git a/src/main/java/Simulator.java b/src/main/java/Simulator.java index 0d9e17b..28dd8ff 100644 --- a/src/main/java/Simulator.java +++ b/src/main/java/Simulator.java @@ -129,7 +129,7 @@ public class Simulator implements GraphModificationFunctions { Duration time = Duration.between(start, stop); //create GraphWithMapData object - GraphWithMapData output = new GraphWithMapData(graph, numWells, samplePlate.getPopulations(), distCellsMapAlphaKey, time); + GraphWithMapData output = new GraphWithMapData(graph, numWells, samplePlate.getPopulations(), distCellsMapAlphaKey, alphaCount, betaCount, time); //Set source file name in graph to name of sample plate output.setSourceFilename(samplePlate.getFilename()); //return GraphWithMapData object @@ -158,8 +158,8 @@ public class Simulator implements GraphModificationFunctions { betas.add(v); } } - Integer alphaCount = alphas.size(); - Integer betaCount = betas.size(); + Integer graphAlphaCount = alphas.size(); + Integer graphBetaCount = betas.size(); //remove edges with weights outside given overlap thresholds, add those to removed edge list if(verbose){System.out.println("Eliminating edges with weights outside overlap threshold values");} @@ -266,7 +266,7 @@ public class Simulator implements GraphModificationFunctions { //Metadata comments for CSV file String algoType = "LEDA book with heap: " + heapType; - int min = Math.min(alphaCount, betaCount); + int min = Math.min(graphAlphaCount, graphBetaCount); //matching weight BigDecimal totalMatchingWeight = maxWeightMatching.getMatchingWeight(); //rate of attempted matching @@ -301,8 +301,10 @@ public class Simulator implements GraphModificationFunctions { metadata.put("algorithm type", algoType); metadata.put("matching weight", totalMatchingWeight.toString()); metadata.put("well populations", wellPopulationsString); - metadata.put("total alphas found", alphaCount.toString()); - metadata.put("total betas found", betaCount.toString()); + metadata.put("total alphas on plate", data.getAlphaCount().toString()); + metadata.put("total betas on plate", data.getBetaCount().toString()); + metadata.put("alphas in graph (after pre-filtering)", graphAlphaCount.toString()); + metadata.put("betas in graph (after pre-filtering)", graphBetaCount.toString()); metadata.put("high overlap threshold", highThreshold.toString()); metadata.put("low overlap threshold", lowThreshold.toString()); metadata.put("minimum overlap percent", minOverlapPercent.toString()); From 740835f8142e71380fe87477eed57c0cad597cd6 Mon Sep 17 00:00:00 2001 From: eugenefischer <66030419+eugenefischer@users.noreply.github.com> Date: Sun, 25 Sep 2022 17:47:07 -0500 Subject: [PATCH 17/18] fix typo --- readme.md | 2 +- src/main/java/Simulator.java | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/readme.md b/readme.md index b6f2251..73ab977 100644 --- a/readme.md +++ b/readme.md @@ -281,7 +281,7 @@ with different filtering options), the actual elapsed time was greater. File I/O slightly less time than the simulation itself. Real elapsed time from start to finish was under 30 minutes. As mentioned in the theory section, performance could be improved by implementing a more efficient algorithm for finding -the maximum weighted matching. +the maximum weight matching. ## BEHAVIOR WITH RANDOMIZED WELL POPULATIONS diff --git a/src/main/java/Simulator.java b/src/main/java/Simulator.java index 28dd8ff..ede14e8 100644 --- a/src/main/java/Simulator.java +++ b/src/main/java/Simulator.java @@ -179,9 +179,9 @@ public class Simulator implements GraphModificationFunctions { if(verbose){System.out.println("Edges between vertices of with excessively different occupancy values " + "removed");} - //Find Maximum Weighted Matching + //Find Maximum Weight Matching //using jheaps library class PairingHeap for improved efficiency - if(verbose){System.out.println("Finding maximum weighted matching");} + if(verbose){System.out.println("Finding maximum weight matching");} MaximumWeightBipartiteMatching maxWeightMatching; //Use correct heap type for priority queue String heapType = BiGpairSEQ.getPriorityQueueHeapType(); From 04a077da2ea1084bba431e97ff836e2d519d68a4 Mon Sep 17 00:00:00 2001 From: eugenefischer <66030419+eugenefischer@users.noreply.github.com> Date: Sun, 25 Sep 2022 18:24:12 -0500 Subject: [PATCH 18/18] update Readme --- readme.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/readme.md b/readme.md index 73ab977..6ee5bea 100644 --- a/readme.md +++ b/readme.md @@ -351,6 +351,8 @@ roughly as though it had a constant well population equal to the plate's average * Advantage: would eliminate the need to use maps to associate vertices with sequences, which would make the code easier to understand. * ~~Have a branch where this is implemented, but there's a bug that broke matching. Don't currently have time to fix.~~ * ~~Re-implement command line arguments, to enable scripting and statistical simulation studies~~ DONE +* ~~Implement custom Vertex class to simplify code and make it easier to implement different MWM algorithms~~ DONE + * This also seems to be faster when using the same algorithm than the version with lots of maps, which is a nice bonus! * Re-implement CDR1 matching method * Implement Duan and Su's maximum weight matching algorithm * Add controllable algorithm-type parameter? @@ -361,7 +363,7 @@ roughly as though it had a constant well population equal to the plate's average * Implement Vose's alias method for arbitrary statistical distributions of cells * Should probably refactor to use apache commons rng for this * Use commons JCS for caching -* Enable post-filtering instead of pre-filtering. Pre-filtering of things like singleton sequences or saturating-occupancy sequences reduces graph size, but could conceivably reduce pairing accuracy by throwing away data. While these sequences have very little signal, it would be interesting to compare unfiltered results to filtered results. This would require a much, much faster MWM algorithm, though, to handle the much larger graphs. Possible one of the linear-time approximation algorithms. +* Parameterize pre-filtering. Currently, sequences present in all wells are filtered out before constructing the graph, which massively reduces graph size. But, ideally, no pre-filtering would be necessary. ## CITATIONS