diff --git a/readme.md b/readme.md index cd42fc9..6ee5bea 100644 --- a/readme.md +++ b/readme.md @@ -347,9 +347,9 @@ roughly as though it had a constant well population equal to the plate's average * ~~Apache Commons CSV library writes entries a row at a time~~ * _Got this working, but at the cost of a profoundly strange bug in graph occupancy filtering. Have reverted the repo until I can figure out what caused that. Given how easily Thingiverse transposes CSV matrices in R, might not even be worth fixing. * ~~Enable GraphML output in addition to serialized object binaries, for data portability~~ DONE - * ~~Custom vertex type with attribute for sequence occupancy?~~ ABANDONED + * ~~Custom vertex type with attribute for sequence occupancy?~~ DONE * Advantage: would eliminate the need to use maps to associate vertices with sequences, which would make the code easier to understand. - * Have a branch where this is implemented, but there's a bug that broke matching. Don't currently have time to fix. + * ~~Have a branch where this is implemented, but there's a bug that broke matching. Don't currently have time to fix.~~ * ~~Re-implement command line arguments, to enable scripting and statistical simulation studies~~ DONE * ~~Implement custom Vertex class to simplify code and make it easier to implement different MWM algorithms~~ DONE * This also seems to be faster when using the same algorithm than the version with lots of maps, which is a nice bonus! diff --git a/src/main/java/BiGpairSEQ.java b/src/main/java/BiGpairSEQ.java index 1f9483a..360e1f5 100644 --- a/src/main/java/BiGpairSEQ.java +++ b/src/main/java/BiGpairSEQ.java @@ -16,7 +16,7 @@ public class BiGpairSEQ { private static HeapType priorityQueueHeapType = HeapType.FIBONACCI; private static boolean outputBinary = true; private static boolean outputGraphML = false; - private static final String version = "version 2.0"; + private static final String version = "version 3.0"; public static void main(String[] args) { if (args.length == 0) { diff --git a/src/main/java/GraphMLFileWriter.java b/src/main/java/GraphMLFileWriter.java index 96ad358..8411502 100644 --- a/src/main/java/GraphMLFileWriter.java +++ b/src/main/java/GraphMLFileWriter.java @@ -3,8 +3,9 @@ import org.jgrapht.graph.SimpleWeightedGraph; import org.jgrapht.nio.Attribute; import org.jgrapht.nio.AttributeType; import org.jgrapht.nio.DefaultAttribute; -import org.jgrapht.nio.dot.DOTExporter; import org.jgrapht.nio.graphml.GraphMLExporter; +import org.jgrapht.nio.graphml.GraphMLExporter.AttributeCategory; +import org.w3c.dom.Attr; import java.io.BufferedWriter; import java.io.IOException; @@ -12,14 +13,14 @@ import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.StandardOpenOption; import java.util.HashMap; -import java.util.LinkedHashMap; import java.util.Map; public class GraphMLFileWriter { String filename; + SimpleWeightedGraph graph; GraphWithMapData data; - + Map graphAttributes; public GraphMLFileWriter(String filename, GraphWithMapData data) { if(!filename.matches(".*\\.graphml")){ @@ -27,52 +28,61 @@ public class GraphMLFileWriter { } this.filename = filename; this.data = data; + this.graph = data.getGraph(); + graphAttributes = createGraphAttributes(); } -// public void writeGraphToFile() { -// try(BufferedWriter writer = Files.newBufferedWriter(Path.of(filename), StandardOpenOption.CREATE_NEW); -// ){ -// GraphMLExporter exporter = new GraphMLExporter<>(); -// exporter.exportGraph(graph, writer); -// } catch(IOException ex){ -// System.out.println("Could not make new file named "+filename); -// System.err.println(ex); -// } -// } + public GraphMLFileWriter(String filename, SimpleWeightedGraph graph) { + if(!filename.matches(".*\\.graphml")){ + filename = filename + ".graphml"; + } + this.filename = filename; + this.graph = graph; + } + + private Map createGraphAttributes(){ + Map ga = new HashMap<>(); + //Sample plate filename + ga.put("sample plate filename", DefaultAttribute.createAttribute(data.getSourceFilename())); + // Number of wells + ga.put("well count", DefaultAttribute.createAttribute(data.getNumWells().toString())); + //Well populations + Integer[] wellPopulations = data.getWellPopulations(); + StringBuilder populationsStringBuilder = new StringBuilder(); + populationsStringBuilder.append(wellPopulations[0].toString()); + for(int i = 1; i < wellPopulations.length; i++){ + populationsStringBuilder.append(", "); + populationsStringBuilder.append(wellPopulations[i].toString()); + } + String wellPopulationsString = populationsStringBuilder.toString(); + ga.put("well populations", DefaultAttribute.createAttribute(wellPopulationsString)); + return ga; + } public void writeGraphToFile() { - SimpleWeightedGraph graph = data.getGraph(); - Map vertexToAlphaMap = data.getPlateVtoAMap(); - Map vertexToBetaMap = data.getPlateVtoBMap(); - Map alphaOccs = data.getAlphaWellCounts(); - Map betaOccs = data.getBetaWellCounts(); try(BufferedWriter writer = Files.newBufferedWriter(Path.of(filename), StandardOpenOption.CREATE_NEW); ){ //create exporter. Let the vertex labels be the unique ids for the vertices - GraphMLExporter> exporter = new GraphMLExporter<>(v -> v.toString()); + GraphMLExporter> exporter = new GraphMLExporter<>(v -> v.getVertexLabel().toString()); //set to export weights exporter.setExportEdgeWeights(true); + //Set graph attributes + exporter.setGraphAttributeProvider( () -> graphAttributes); //set type, sequence, and occupancy attributes for each vertex exporter.setVertexAttributeProvider( v -> { Map attributes = new HashMap<>(); - if(vertexToAlphaMap.containsKey(v)) { - attributes.put("type", DefaultAttribute.createAttribute("CDR3 Alpha")); - attributes.put("sequence", DefaultAttribute.createAttribute(vertexToAlphaMap.get(v))); - attributes.put("occupancy", DefaultAttribute.createAttribute( - alphaOccs.get(vertexToAlphaMap.get(v)))); - } - else if(vertexToBetaMap.containsKey(v)) { - attributes.put("type", DefaultAttribute.createAttribute("CDR3 Beta")); - attributes.put("sequence", DefaultAttribute.createAttribute(vertexToBetaMap.get(v))); - attributes.put("occupancy", DefaultAttribute.createAttribute( - betaOccs.get(vertexToBetaMap.get(v)))); - } + attributes.put("type", DefaultAttribute.createAttribute(v.getType().name())); + attributes.put("sequence", DefaultAttribute.createAttribute(v.getSequence())); + attributes.put("occupancy", DefaultAttribute.createAttribute(v.getOccupancy())); return attributes; }); //register the attributes - exporter.registerAttribute("type", GraphMLExporter.AttributeCategory.NODE, AttributeType.STRING); - exporter.registerAttribute("sequence", GraphMLExporter.AttributeCategory.NODE, AttributeType.STRING); - exporter.registerAttribute("occupancy", GraphMLExporter.AttributeCategory.NODE, AttributeType.STRING); + for(String s : graphAttributes.keySet()) { + exporter.registerAttribute(s, AttributeCategory.GRAPH, AttributeType.STRING); + } + exporter.registerAttribute("type", AttributeCategory.NODE, AttributeType.STRING); + exporter.registerAttribute("sequence", AttributeCategory.NODE, AttributeType.STRING); + exporter.registerAttribute("occupancy", AttributeCategory.NODE, AttributeType.STRING); //export the graph exporter.exportGraph(graph, writer); } catch(IOException ex){ @@ -81,4 +91,3 @@ public class GraphMLFileWriter { } } } - diff --git a/src/main/java/InteractiveInterface.java b/src/main/java/InteractiveInterface.java index 7080807..5055e00 100644 --- a/src/main/java/InteractiveInterface.java +++ b/src/main/java/InteractiveInterface.java @@ -258,7 +258,7 @@ public class InteractiveInterface { cellFile = sc.next(); System.out.print("\nPlease enter name of an existing sample plate file: "); plateFile = sc.next(); - System.out.println("\nThe graph and occupancy data will be written to a serialized binary file."); + System.out.println("\nThe graph and occupancy data will be written to a file."); System.out.print("Please enter a name for the output file: "); filename = sc.next(); } catch (InputMismatchException ex) { @@ -504,7 +504,7 @@ public class InteractiveInterface { System.out.println("2) Turn " + getOnOff(!BiGpairSEQ.cachePlate()) + " plate file caching"); System.out.println("3) Turn " + getOnOff(!BiGpairSEQ.cacheGraph()) + " graph/data file caching"); System.out.println("4) Turn " + getOnOff(!BiGpairSEQ.outputBinary()) + " serialized binary graph output"); - System.out.println("5) Turn " + getOnOff(!BiGpairSEQ.outputGraphML()) + " GraphML graph output"); + System.out.println("5) Turn " + getOnOff(!BiGpairSEQ.outputGraphML()) + " GraphML graph output (for data portability to other programs)"); System.out.println("6) Maximum weight matching algorithm options"); System.out.println("0) Return to main menu"); try { diff --git a/src/main/java/SequenceType.java b/src/main/java/SequenceType.java new file mode 100644 index 0000000..54de083 --- /dev/null +++ b/src/main/java/SequenceType.java @@ -0,0 +1,8 @@ +//enum for tagging types of sequences +//Listed in order that they appear in a cell array, so ordinal() method will return correct index +public enum SequenceType { + CDR3_ALPHA, + CDR3_BETA, + CDR1_ALPHA, + CDR1_BETA +}