From 1ea68045ce3fd06a4de296970c72c28aae45358b Mon Sep 17 00:00:00 2001 From: efischer Date: Sat, 26 Feb 2022 09:49:16 -0600 Subject: [PATCH] Refactor cdr3 matching to use new Vertex class --- src/main/java/GraphMLFileWriter.java | 4 +- src/main/java/GraphModificationFunctions.java | 70 +++++++++---------- src/main/java/Simulator.java | 63 ++++++++++------- src/main/java/Vertex.java | 19 ++--- 4 files changed, 81 insertions(+), 75 deletions(-) diff --git a/src/main/java/GraphMLFileWriter.java b/src/main/java/GraphMLFileWriter.java index 3d6c8fb..8411502 100644 --- a/src/main/java/GraphMLFileWriter.java +++ b/src/main/java/GraphMLFileWriter.java @@ -63,7 +63,7 @@ public class GraphMLFileWriter { try(BufferedWriter writer = Files.newBufferedWriter(Path.of(filename), StandardOpenOption.CREATE_NEW); ){ //create exporter. Let the vertex labels be the unique ids for the vertices - GraphMLExporter> exporter = new GraphMLExporter<>(Vertex::getVertexLabel); + GraphMLExporter> exporter = new GraphMLExporter<>(v -> v.getVertexLabel().toString()); //set to export weights exporter.setExportEdgeWeights(true); //Set graph attributes @@ -71,7 +71,7 @@ public class GraphMLFileWriter { //set type, sequence, and occupancy attributes for each vertex exporter.setVertexAttributeProvider( v -> { Map attributes = new HashMap<>(); - attributes.put("type", DefaultAttribute.createAttribute(v.getType())); + attributes.put("type", DefaultAttribute.createAttribute(v.getType().name())); attributes.put("sequence", DefaultAttribute.createAttribute(v.getSequence())); attributes.put("occupancy", DefaultAttribute.createAttribute(v.getOccupancy())); return attributes; diff --git a/src/main/java/GraphModificationFunctions.java b/src/main/java/GraphModificationFunctions.java index a4ab1a0..68cc9d2 100644 --- a/src/main/java/GraphModificationFunctions.java +++ b/src/main/java/GraphModificationFunctions.java @@ -2,23 +2,25 @@ import org.jgrapht.graph.DefaultWeightedEdge; import org.jgrapht.graph.SimpleWeightedGraph; import java.util.ArrayList; +import java.util.HashMap; import java.util.List; import java.util.Map; public interface GraphModificationFunctions { //remove over- and under-weight edges - static List filterByOverlapThresholds(SimpleWeightedGraph graph, + static Map filterByOverlapThresholds(SimpleWeightedGraph graph, int low, int high, boolean saveEdges) { - List removedEdges = new ArrayList<>(); + Map removedEdges = new HashMap<>(); + //List removedEdges = new ArrayList<>(); for (DefaultWeightedEdge e : graph.edgeSet()) { if ((graph.getEdgeWeight(e) > high) || (graph.getEdgeWeight(e) < low)) { if(saveEdges) { - Integer source = graph.getEdgeSource(e); - Integer target = graph.getEdgeTarget(e); + Vertex source = graph.getEdgeSource(e); + Vertex target = graph.getEdgeTarget(e); Integer weight = (int) graph.getEdgeWeight(e); - Integer[] edge = {source, target, weight}; - removedEdges.add(edge); + Vertex[] edge = {source, target}; + removedEdges.put(edge, weight); } else { graph.setEdgeWeight(e, 0.0); @@ -26,7 +28,7 @@ public interface GraphModificationFunctions { } } if(saveEdges) { - for (Integer[] edge : removedEdges) { + for (Vertex[] edge : removedEdges.keySet()) { graph.removeEdge(edge[0], edge[1]); } } @@ -34,23 +36,19 @@ public interface GraphModificationFunctions { } //Remove edges for pairs with large occupancy discrepancy - static List filterByRelativeOccupancy(SimpleWeightedGraph graph, - Map alphaWellCounts, - Map betaWellCounts, - Map plateVtoAMap, - Map plateVtoBMap, + static Map filterByRelativeOccupancy(SimpleWeightedGraph graph, Integer maxOccupancyDifference, boolean saveEdges) { - List removedEdges = new ArrayList<>(); + Map removedEdges = new HashMap<>(); for (DefaultWeightedEdge e : graph.edgeSet()) { - Integer alphaOcc = alphaWellCounts.get(plateVtoAMap.get(graph.getEdgeSource(e))); - Integer betaOcc = betaWellCounts.get(plateVtoBMap.get(graph.getEdgeTarget(e))); + Integer alphaOcc = graph.getEdgeSource(e).getOccupancy(); + Integer betaOcc = graph.getEdgeTarget(e).getOccupancy(); if (Math.abs(alphaOcc - betaOcc) >= maxOccupancyDifference) { if (saveEdges) { - Integer source = graph.getEdgeSource(e); - Integer target = graph.getEdgeTarget(e); + Vertex source = graph.getEdgeSource(e); + Vertex target = graph.getEdgeTarget(e); Integer weight = (int) graph.getEdgeWeight(e); - Integer[] edge = {source, target, weight}; - removedEdges.add(edge); + Vertex[] edge = {source, target}; + removedEdges.put(edge, weight); } else { graph.setEdgeWeight(e, 0.0); @@ -58,7 +56,7 @@ public interface GraphModificationFunctions { } } if(saveEdges) { - for (Integer[] edge : removedEdges) { + for (Vertex[] edge : removedEdges.keySet()) { graph.removeEdge(edge[0], edge[1]); } } @@ -66,26 +64,22 @@ public interface GraphModificationFunctions { } //Remove edges for pairs where overlap size is significantly lower than the well occupancy - static List filterByOverlapPercent(SimpleWeightedGraph graph, - Map alphaWellCounts, - Map betaWellCounts, - Map plateVtoAMap, - Map plateVtoBMap, + static Map filterByOverlapPercent(SimpleWeightedGraph graph, Integer minOverlapPercent, boolean saveEdges) { - List removedEdges = new ArrayList<>(); + Map removedEdges = new HashMap<>(); for (DefaultWeightedEdge e : graph.edgeSet()) { - Integer alphaOcc = alphaWellCounts.get(plateVtoAMap.get(graph.getEdgeSource(e))); - Integer betaOcc = betaWellCounts.get(plateVtoBMap.get(graph.getEdgeTarget(e))); + Integer alphaOcc = graph.getEdgeSource(e).getOccupancy(); + Integer betaOcc = graph.getEdgeTarget(e).getOccupancy(); double weight = graph.getEdgeWeight(e); double min = minOverlapPercent / 100.0; if ((weight / alphaOcc < min) || (weight / betaOcc < min)) { - if(saveEdges) { - Integer source = graph.getEdgeSource(e); - Integer target = graph.getEdgeTarget(e); + if (saveEdges) { + Vertex source = graph.getEdgeSource(e); + Vertex target = graph.getEdgeTarget(e); Integer intWeight = (int) graph.getEdgeWeight(e); - Integer[] edge = {source, target, intWeight}; - removedEdges.add(edge); + Vertex[] edge = {source, target}; + removedEdges.put(edge, intWeight); } else { graph.setEdgeWeight(e, 0.0); @@ -93,18 +87,18 @@ public interface GraphModificationFunctions { } } if(saveEdges) { - for (Integer[] edge : removedEdges) { + for (Vertex[] edge : removedEdges.keySet()) { graph.removeEdge(edge[0], edge[1]); } } return removedEdges; } - static void addRemovedEdges(SimpleWeightedGraph graph, - List removedEdges) { - for (Integer[] edge : removedEdges) { + static void addRemovedEdges(SimpleWeightedGraph graph, + Map removedEdges) { + for (Vertex[] edge : removedEdges.keySet()) { DefaultWeightedEdge e = graph.addEdge(edge[0], edge[1]); - graph.setEdgeWeight(e, (double) edge[2]); + graph.setEdgeWeight(e, removedEdges.get(edge)); } } diff --git a/src/main/java/Simulator.java b/src/main/java/Simulator.java index f63b5b8..2d635d7 100644 --- a/src/main/java/Simulator.java +++ b/src/main/java/Simulator.java @@ -141,35 +141,39 @@ public class Simulator implements GraphModificationFunctions { Integer highThreshold, Integer maxOccupancyDifference, Integer minOverlapPercent, boolean verbose) { Instant start = Instant.now(); - List removedEdges = new ArrayList<>(); + SimpleWeightedGraph graph = data.getGraph(); + Map removedEdges = new HashMap<>(); boolean saveEdges = BiGpairSEQ.cacheGraph(); int numWells = data.getNumWells(); Integer alphaCount = data.getAlphaCount(); Integer betaCount = data.getBetaCount(); Map distCellsMapAlphaKey = data.getDistCellsMapAlphaKey(); - Map plateVtoAMap = data.getPlateVtoAMap(); - Map plateVtoBMap = data.getPlateVtoBMap(); - Map alphaWellCounts = data.getAlphaWellCounts(); - Map betaWellCounts = data.getBetaWellCounts(); - SimpleWeightedGraph graph = data.getGraph(); + Set alphas = new HashSet<>(); + Set betas = new HashSet<>(); + for(Vertex v: graph.vertexSet()) { + if (SequenceType.CDR3_ALPHA.equals(v.getType())){ + alphas.add(v); + } + else { + betas.add(v); + } + } //remove edges with weights outside given overlap thresholds, add those to removed edge list if(verbose){System.out.println("Eliminating edges with weights outside overlap threshold values");} - removedEdges.addAll(GraphModificationFunctions.filterByOverlapThresholds(graph, lowThreshold, highThreshold, saveEdges)); + removedEdges.putAll(GraphModificationFunctions.filterByOverlapThresholds(graph, lowThreshold, highThreshold, saveEdges)); if(verbose){System.out.println("Over- and under-weight edges removed");} //remove edges between vertices with too small an overlap size, add those to removed edge list if(verbose){System.out.println("Eliminating edges with weights less than " + minOverlapPercent.toString() + " percent of vertex occupancy value.");} - removedEdges.addAll(GraphModificationFunctions.filterByOverlapPercent(graph, alphaWellCounts, betaWellCounts, - plateVtoAMap, plateVtoBMap, minOverlapPercent, saveEdges)); + removedEdges.putAll(GraphModificationFunctions.filterByOverlapPercent(graph, minOverlapPercent, saveEdges)); if(verbose){System.out.println("Edges with weights too far below a vertex occupancy value removed");} //Filter by relative occupancy if(verbose){System.out.println("Eliminating edges between vertices with occupancy difference > " + maxOccupancyDifference);} - removedEdges.addAll(GraphModificationFunctions.filterByRelativeOccupancy(graph, alphaWellCounts, betaWellCounts, - plateVtoAMap, plateVtoBMap, maxOccupancyDifference, saveEdges)); + removedEdges.putAll(GraphModificationFunctions.filterByRelativeOccupancy(graph, maxOccupancyDifference, saveEdges)); if(verbose){System.out.println("Edges between vertices of with excessively different occupancy values " + "removed");} @@ -182,20 +186,20 @@ public class Simulator implements GraphModificationFunctions { switch (heapType) { case "PAIRING" -> { maxWeightMatching = new MaximumWeightBipartiteMatching(graph, - plateVtoAMap.keySet(), - plateVtoBMap.keySet(), + alphas, + betas, i -> new PairingHeap(Comparator.naturalOrder())); } case "FIBONACCI" -> { maxWeightMatching = new MaximumWeightBipartiteMatching(graph, - plateVtoAMap.keySet(), - plateVtoBMap.keySet(), + alphas, + betas, i -> new FibonacciHeap(Comparator.naturalOrder())); } default -> { maxWeightMatching = new MaximumWeightBipartiteMatching(graph, - plateVtoAMap.keySet(), - plateVtoBMap.keySet()); + alphas, + betas); } } //get the matching @@ -225,11 +229,14 @@ public class Simulator implements GraphModificationFunctions { Map matchMap = new HashMap<>(); while(weightIter.hasNext()) { e = weightIter.next(); - Integer source = graph.getEdgeSource(e); - Integer target = graph.getEdgeTarget(e); + Vertex source = graph.getEdgeSource(e); + Vertex target = graph.getEdgeTarget(e); + //Integer source = graph.getEdgeSource(e); + //Integer target = graph.getEdgeTarget(e); //The match map is all matches found, not just true matches! - matchMap.put(plateVtoAMap.get(source), plateVtoBMap.get(target)); - check = plateVtoBMap.get(target).equals(distCellsMapAlphaKey.get(plateVtoAMap.get(source))); + matchMap.put(source.getSequence(), target.getSequence()); + check = target.getOccupancy().equals(distCellsMapAlphaKey.get(source.getSequence())); + //check = plateVtoBMap.get(target).equals(distCellsMapAlphaKey.get(plateVtoAMap.get(source))); if(check) { trueCount++; } @@ -237,17 +244,19 @@ public class Simulator implements GraphModificationFunctions { falseCount++; } List result = new ArrayList<>(); - result.add(plateVtoAMap.get(source).toString()); + //alpha sequence + result.add(source.getSequence().toString()); //alpha well count - result.add(alphaWellCounts.get(plateVtoAMap.get(source)).toString()); - result.add(plateVtoBMap.get(target).toString()); + result.add(source.getOccupancy().toString()); + //beta sequence + result.add(target.getSequence().toString()); //beta well count - result.add(betaWellCounts.get(plateVtoBMap.get(target)).toString()); + result.add(target.getOccupancy().toString()); //overlap count result.add(Double.toString(graph.getEdgeWeight(e))); result.add(Boolean.toString(check)); - double pValue = Equations.pValue(numWells, alphaWellCounts.get(plateVtoAMap.get(source)), - betaWellCounts.get(plateVtoBMap.get(target)), graph.getEdgeWeight(e)); + double pValue = Equations.pValue(numWells, source.getOccupancy(), + target.getOccupancy(), graph.getEdgeWeight(e)); BigDecimal pValueTrunc = new BigDecimal(pValue, mc); result.add(pValueTrunc.toString()); allResults.add(result); diff --git a/src/main/java/Vertex.java b/src/main/java/Vertex.java index a801b87..a7ddda9 100644 --- a/src/main/java/Vertex.java +++ b/src/main/java/Vertex.java @@ -6,6 +6,9 @@ public class Vertex implements Serializable { private Integer sequence; private Integer occupancy; + public Vertex(Integer vertexLabel) { + this.vertexLabel = vertexLabel; + } public Vertex(String vertexLabel) { this.vertexLabel = Integer.parseInt((vertexLabel)); } @@ -18,33 +21,33 @@ public class Vertex implements Serializable { } - public String getType() { - return type.name(); + public SequenceType getType() { + return type; } public void setType(String type) { this.type = SequenceType.valueOf(type); } - public String getVertexLabel() { - return vertexLabel.toString(); + public Integer getVertexLabel() { + return vertexLabel; } public void setVertexLabel(String label) { this.vertexLabel = Integer.parseInt(label); } - public String getSequence() { + public Integer getSequence() { - return sequence.toString(); + return sequence; } public void setSequence(String sequence) { this.sequence = Integer.parseInt(sequence); } - public String getOccupancy() { - return occupancy.toString(); + public Integer getOccupancy() { + return occupancy; } public void setOccupancy(String occupancy) {