Merge branch 'Dev_Vertex'

# Conflicts:
#	src/main/java/GraphModificationFunctions.java
#	src/main/java/GraphWithMapData.java
#	src/main/java/Simulator.java
#	src/main/java/Vertex.java
This commit is contained in:
eugenefischer
2022-09-25 18:33:26 -05:00
9 changed files with 264 additions and 150 deletions

View File

@@ -281,7 +281,7 @@ with different filtering options), the actual elapsed time was greater. File I/O
slightly less time than the simulation itself. Real elapsed time from start to finish was under 30 minutes.
As mentioned in the theory section, performance could be improved by implementing a more efficient algorithm for finding
the maximum weighted matching.
the maximum weight matching.
## BEHAVIOR WITH RANDOMIZED WELL POPULATIONS
@@ -351,6 +351,8 @@ roughly as though it had a constant well population equal to the plate's average
* Advantage: would eliminate the need to use maps to associate vertices with sequences, which would make the code easier to understand.
* Have a branch where this is implemented, but there's a bug that broke matching. Don't currently have time to fix.
* ~~Re-implement command line arguments, to enable scripting and statistical simulation studies~~ DONE
* ~~Implement custom Vertex class to simplify code and make it easier to implement different MWM algorithms~~ DONE
* This also seems to be faster when using the same algorithm than the version with lots of maps, which is a nice bonus!
* Re-implement CDR1 matching method
* Implement Duan and Su's maximum weight matching algorithm
* Add controllable algorithm-type parameter?
@@ -361,7 +363,7 @@ roughly as though it had a constant well population equal to the plate's average
* Implement Vose's alias method for arbitrary statistical distributions of cells
* Should probably refactor to use apache commons rng for this
* Use commons JCS for caching
* Enable post-filtering instead of pre-filtering. Pre-filtering of things like singleton sequences or saturating-occupancy sequences reduces graph size, but could conceivably reduce pairing accuracy by throwing away data. While these sequences have very little signal, it would be interesting to compare unfiltered results to filtered results. This would require a much, much faster MWM algorithm, though, to handle the much larger graphs. Possible one of the linear-time approximation algorithms.
* Parameterize pre-filtering. Currently, sequences present in all wells are filtered out before constructing the graph, which massively reduces graph size. But, ideally, no pre-filtering would be necessary.
## CITATIONS

View File

@@ -13,7 +13,7 @@ public class BiGpairSEQ {
private static boolean cacheCells = false;
private static boolean cachePlate = false;
private static boolean cacheGraph = false;
private static String priorityQueueHeapType = "FIBONACCI";
private static HeapType priorityQueueHeapType = HeapType.FIBONACCI;
private static boolean outputBinary = true;
private static boolean outputGraphML = false;
private static final String version = "version 2.0";
@@ -157,15 +157,15 @@ public class BiGpairSEQ {
}
public static String getPriorityQueueHeapType() {
return priorityQueueHeapType;
return priorityQueueHeapType.name();
}
public static void setPairingHeap() {
priorityQueueHeapType = "PAIRING";
priorityQueueHeapType = HeapType.PAIRING;
}
public static void setFibonacciHeap() {
priorityQueueHeapType = "FIBONACCI";
priorityQueueHeapType = HeapType.FIBONACCI;
}
public static boolean outputBinary() {return outputBinary;}

View File

@@ -13,8 +13,12 @@ public class CellSample {
List<Integer> numbersCDR3 = new ArrayList<>();
List<Integer> numbersCDR1 = new ArrayList<>();
Integer numDistCDR3s = 2 * numDistinctCells + 1;
//Assign consecutive integers for each CDR3. This ensures they are all unique.
IntStream.range(1, numDistCDR3s + 1).forEach(i -> numbersCDR3.add(i));
//After all CDR3s are assigned, start assigning consecutive integers to CDR1s
//There will usually be fewer integers in the CDR1 list, which will allow repeats below
IntStream.range(numDistCDR3s + 1, numDistCDR3s + 1 + (numDistCDR3s / cdr1Freq) + 1).forEach(i -> numbersCDR1.add(i));
//randomize the order of the numbers in the lists
Collections.shuffle(numbersCDR3);
Collections.shuffle(numbersCDR1);
@@ -22,11 +26,15 @@ public class CellSample {
//two CDR3s, and two CDR1s. First two values are CDR3s (alpha, beta), second two are CDR1s (alpha, beta)
List<Integer[]> distinctCells = new ArrayList<>();
for(int i = 0; i < numbersCDR3.size() - 1; i = i + 2){
//Go through entire CDR3 list once, make pairs of alphas and betas
Integer tmpCDR3a = numbersCDR3.get(i);
Integer tmpCDR3b = numbersCDR3.get(i+1);
//Go through (likely shorter) CDR1 list as many times as necessary, make pairs of alphas and betas
Integer tmpCDR1a = numbersCDR1.get(i % numbersCDR1.size());
Integer tmpCDR1b = numbersCDR1.get((i+1) % numbersCDR1.size());
//Make the array representing the cell
Integer[] tmp = {tmpCDR3a, tmpCDR3b, tmpCDR1a, tmpCDR1b};
//Add the cell to the list of distinct cells
distinctCells.add(tmp);
}
this.cells = distinctCells;

View File

@@ -2,23 +2,25 @@ import org.jgrapht.graph.DefaultWeightedEdge;
import org.jgrapht.graph.SimpleWeightedGraph;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
public interface GraphModificationFunctions {
//remove over- and under-weight edges
static List<Integer[]> filterByOverlapThresholds(SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph,
//remove over- and under-weight edges, return removed edges
static Map<Vertex[], Integer> filterByOverlapThresholds(SimpleWeightedGraph<Vertex, DefaultWeightedEdge> graph,
int low, int high, boolean saveEdges) {
List<Integer[]> removedEdges = new ArrayList<>();
Map<Vertex[], Integer> removedEdges = new HashMap<>();
//List<Integer[]> removedEdges = new ArrayList<>();
for (DefaultWeightedEdge e : graph.edgeSet()) {
if ((graph.getEdgeWeight(e) > high) || (graph.getEdgeWeight(e) < low)) {
if(saveEdges) {
Integer source = graph.getEdgeSource(e);
Integer target = graph.getEdgeTarget(e);
Vertex source = graph.getEdgeSource(e);
Vertex target = graph.getEdgeTarget(e);
Integer weight = (int) graph.getEdgeWeight(e);
Integer[] edge = {source, target, weight};
removedEdges.add(edge);
Vertex[] edge = {source, target};
removedEdges.put(edge, weight);
}
else {
graph.setEdgeWeight(e, 0.0);
@@ -26,31 +28,27 @@ public interface GraphModificationFunctions {
}
}
if(saveEdges) {
for (Integer[] edge : removedEdges) {
for (Vertex[] edge : removedEdges.keySet()) {
graph.removeEdge(edge[0], edge[1]);
}
}
return removedEdges;
}
//Remove edges for pairs with large occupancy discrepancy
static List<Integer[]> filterByRelativeOccupancy(SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph,
Map<Integer, Integer> alphaWellCounts,
Map<Integer, Integer> betaWellCounts,
Map<Integer, Integer> plateVtoAMap,
Map<Integer, Integer> plateVtoBMap,
//Remove edges for pairs with large occupancy discrepancy, return removed edges
static Map<Vertex[], Integer> filterByRelativeOccupancy(SimpleWeightedGraph<Vertex, DefaultWeightedEdge> graph,
Integer maxOccupancyDifference, boolean saveEdges) {
List<Integer[]> removedEdges = new ArrayList<>();
Map<Vertex[], Integer> removedEdges = new HashMap<>();
for (DefaultWeightedEdge e : graph.edgeSet()) {
Integer alphaOcc = alphaWellCounts.get(plateVtoAMap.get(graph.getEdgeSource(e)));
Integer betaOcc = betaWellCounts.get(plateVtoBMap.get(graph.getEdgeTarget(e)));
Integer alphaOcc = graph.getEdgeSource(e).getOccupancy();
Integer betaOcc = graph.getEdgeTarget(e).getOccupancy();
if (Math.abs(alphaOcc - betaOcc) >= maxOccupancyDifference) {
if (saveEdges) {
Integer source = graph.getEdgeSource(e);
Integer target = graph.getEdgeTarget(e);
Vertex source = graph.getEdgeSource(e);
Vertex target = graph.getEdgeTarget(e);
Integer weight = (int) graph.getEdgeWeight(e);
Integer[] edge = {source, target, weight};
removedEdges.add(edge);
Vertex[] edge = {source, target};
removedEdges.put(edge, weight);
}
else {
graph.setEdgeWeight(e, 0.0);
@@ -58,34 +56,30 @@ public interface GraphModificationFunctions {
}
}
if(saveEdges) {
for (Integer[] edge : removedEdges) {
for (Vertex[] edge : removedEdges.keySet()) {
graph.removeEdge(edge[0], edge[1]);
}
}
return removedEdges;
}
//Remove edges for pairs where overlap size is significantly lower than the well occupancy
static List<Integer[]> filterByOverlapPercent(SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph,
Map<Integer, Integer> alphaWellCounts,
Map<Integer, Integer> betaWellCounts,
Map<Integer, Integer> plateVtoAMap,
Map<Integer, Integer> plateVtoBMap,
//Remove edges for pairs where overlap size is significantly lower than the well occupancy, return removed edges
static Map<Vertex[], Integer> filterByOverlapPercent(SimpleWeightedGraph<Vertex, DefaultWeightedEdge> graph,
Integer minOverlapPercent,
boolean saveEdges) {
List<Integer[]> removedEdges = new ArrayList<>();
Map<Vertex[], Integer> removedEdges = new HashMap<>();
for (DefaultWeightedEdge e : graph.edgeSet()) {
Integer alphaOcc = alphaWellCounts.get(plateVtoAMap.get(graph.getEdgeSource(e)));
Integer betaOcc = betaWellCounts.get(plateVtoBMap.get(graph.getEdgeTarget(e)));
Integer alphaOcc = graph.getEdgeSource(e).getOccupancy();
Integer betaOcc = graph.getEdgeTarget(e).getOccupancy();
double weight = graph.getEdgeWeight(e);
double min = minOverlapPercent / 100.0;
if ((weight / alphaOcc < min) || (weight / betaOcc < min)) {
if (saveEdges) {
Integer source = graph.getEdgeSource(e);
Integer target = graph.getEdgeTarget(e);
Vertex source = graph.getEdgeSource(e);
Vertex target = graph.getEdgeTarget(e);
Integer intWeight = (int) graph.getEdgeWeight(e);
Integer[] edge = {source, target, intWeight};
removedEdges.add(edge);
Vertex[] edge = {source, target};
removedEdges.put(edge, intWeight);
}
else {
graph.setEdgeWeight(e, 0.0);
@@ -93,18 +87,18 @@ public interface GraphModificationFunctions {
}
}
if(saveEdges) {
for (Integer[] edge : removedEdges) {
for (Vertex[] edge : removedEdges.keySet()) {
graph.removeEdge(edge[0], edge[1]);
}
}
return removedEdges;
}
static void addRemovedEdges(SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph,
List<Integer[]> removedEdges) {
for (Integer[] edge : removedEdges) {
static void addRemovedEdges(SimpleWeightedGraph<Vertex, DefaultWeightedEdge> graph,
Map<Vertex[], Integer> removedEdges) {
for (Vertex[] edge : removedEdges.keySet()) {
DefaultWeightedEdge e = graph.addEdge(edge[0], edge[1]);
graph.setEdgeWeight(e, (double) edge[2]);
graph.setEdgeWeight(e, removedEdges.get(edge));
}
}

View File

@@ -6,6 +6,7 @@ import java.util.Map;
//Can't just write the graph, because I need the occupancy data too.
//Makes most sense to serialize object and write that to a file.
//Which means there's no reason to split map data and graph data up.
//Custom vertex class means a lot of the map data can now be encoded in the graph itself
public class GraphWithMapData implements java.io.Serializable {
private String sourceFilename;
@@ -15,32 +16,33 @@ public class GraphWithMapData implements java.io.Serializable {
private Integer alphaCount;
private Integer betaCount;
private final Map<Integer, Integer> distCellsMapAlphaKey;
private final Map<Integer, Integer> plateVtoAMap;
private final Map<Integer, Integer> plateVtoBMap;
private final Map<Integer, Integer> plateAtoVMap;
private final Map<Integer, Integer> plateBtoVMap;
private final Map<Integer, Integer> alphaWellCounts;
private final Map<Integer, Integer> betaWellCounts;
// private final Map<Integer, Integer> plateVtoAMap;
// private final Map<Integer, Integer> plateVtoBMap;
// private final Map<Integer, Integer> plateAtoVMap;
// private final Map<Integer, Integer> plateBtoVMap;
// private final Map<Integer, Integer> alphaWellCounts;
// private final Map<Integer, Integer> betaWellCounts;
private final Duration time;
public GraphWithMapData(SimpleWeightedGraph graph, Integer numWells, Integer[] wellConcentrations,
Integer alphaCount, Integer betaCount,
Map<Integer, Integer> distCellsMapAlphaKey, Map<Integer, Integer> plateVtoAMap,
Map<Integer,Integer> plateVtoBMap, Map<Integer, Integer> plateAtoVMap,
Map<Integer, Integer> plateBtoVMap, Map<Integer, Integer> alphaWellCounts,
Map<Integer, Integer> betaWellCounts, Duration time) {
Map<Integer, Integer> distCellsMapAlphaKey, Integer alphaCount, Integer betaCount, Duration time){
// Map<Integer, Integer> plateVtoAMap,
// Map<Integer,Integer> plateVtoBMap, Map<Integer, Integer> plateAtoVMap,
// Map<Integer, Integer> plateBtoVMap, Map<Integer, Integer> alphaWellCounts,
// Map<Integer, Integer> betaWellCounts,) {
this.graph = graph;
this.numWells = numWells;
this.wellPopulations = wellConcentrations;
this.alphaCount = alphaCount;
this.betaCount = betaCount;
this.distCellsMapAlphaKey = distCellsMapAlphaKey;
this.plateVtoAMap = plateVtoAMap;
this.plateVtoBMap = plateVtoBMap;
this.plateAtoVMap = plateAtoVMap;
this.plateBtoVMap = plateBtoVMap;
this.alphaWellCounts = alphaWellCounts;
this.betaWellCounts = betaWellCounts;
// this.plateVtoAMap = plateVtoAMap;
// this.plateVtoBMap = plateVtoBMap;
// this.plateAtoVMap = plateAtoVMap;
// this.plateBtoVMap = plateBtoVMap;
// this.alphaWellCounts = alphaWellCounts;
// this.betaWellCounts = betaWellCounts;
this.time = time;
}
@@ -68,29 +70,29 @@ public class GraphWithMapData implements java.io.Serializable {
return distCellsMapAlphaKey;
}
public Map<Integer, Integer> getPlateVtoAMap() {
return plateVtoAMap;
}
public Map<Integer, Integer> getPlateVtoBMap() {
return plateVtoBMap;
}
public Map<Integer, Integer> getPlateAtoVMap() {
return plateAtoVMap;
}
public Map<Integer, Integer> getPlateBtoVMap() {
return plateBtoVMap;
}
public Map<Integer, Integer> getAlphaWellCounts() {
return alphaWellCounts;
}
public Map<Integer, Integer> getBetaWellCounts() {
return betaWellCounts;
}
// public Map<Integer, Integer> getPlateVtoAMap() {
// return plateVtoAMap;
// }
//
// public Map<Integer, Integer> getPlateVtoBMap() {
// return plateVtoBMap;
// }
//
// public Map<Integer, Integer> getPlateAtoVMap() {
// return plateAtoVMap;
// }
//
// public Map<Integer, Integer> getPlateBtoVMap() {
// return plateBtoVMap;
// }
//
// public Map<Integer, Integer> getAlphaWellCounts() {
// return alphaWellCounts;
// }
//
// public Map<Integer, Integer> getBetaWellCounts() {
// return betaWellCounts;
// }
public Duration getTime() {
return time;

View File

@@ -0,0 +1,4 @@
public enum HeapType {
FIBONACCI,
PAIRING
}

View File

@@ -158,9 +158,9 @@ public class Plate {
//returns a map of the counts of the sequence at cell index sIndex, in a range of wells
public Map<Integer, Integer> assayWellsSequenceS(int start, int end, int... sIndices) {
Map<Integer,Integer> assay = new HashMap<>();
for(int pIndex: sIndices){
for(int sIndex: sIndices){
for(int i = start; i < end; i++){
countSequences(assay, wells.get(i), pIndex);
countSequences(assay, wells.get(i), sIndex);
}
}
return assay;
@@ -169,6 +169,7 @@ public class Plate {
private void countSequences(Map<Integer, Integer> wellMap, List<Integer[]> well, int... sIndices) {
for(Integer[] cell : well) {
for(int sIndex: sIndices){
//skip dropout sequences, which have value -1
if(cell[sIndex] != -1){
wellMap.merge(cell[sIndex], 1, (oldValue, newValue) -> oldValue + newValue);
}

View File

@@ -18,36 +18,36 @@ import static java.lang.Float.*;
//NOTE: "sequence" in method and variable names refers to a peptide sequence from a simulated T cell
public class Simulator implements GraphModificationFunctions {
private static final int cdr3AlphaIndex = 0;
private static final int cdr3BetaIndex = 1;
private static final int cdr1AlphaIndex = 2;
private static final int cdr1BetaIndex = 3;
//Make the graph needed for matching CDR3s
//Make the graph needed for matching sequences.
//sourceVertexIndices and targetVertexIndices are indices within the cell to use as for the two sets of vertices
//in the bipartite graph. "Source" and "target" are JGraphT terms for the two vertices an edge touches,
//even if not directed.
public static GraphWithMapData makeGraph(CellSample cellSample, Plate samplePlate, boolean verbose) {
Instant start = Instant.now();
List<Integer[]> distinctCells = cellSample.getCells();
int[] alphaIndex = {cdr3AlphaIndex};
int[] betaIndex = {cdr3BetaIndex};
int[] alphaIndices = {SequenceType.CDR3_ALPHA.ordinal()};
int[] betaIndices = {SequenceType.CDR3_BETA.ordinal()};
int numWells = samplePlate.getSize();
if(verbose){System.out.println("Making cell maps");}
//HashMap keyed to Alphas, values Betas
Map<Integer, Integer> distCellsMapAlphaKey = makeSequenceToSequenceMap(distinctCells, cdr3AlphaIndex, cdr3BetaIndex);
Map<Integer, Integer> distCellsMapAlphaKey = makeSequenceToSequenceMap(distinctCells, 0, 1);
if(verbose){System.out.println("Cell maps made");}
if(verbose){System.out.println("Making well maps");}
Map<Integer, Integer> allAlphas = samplePlate.assayWellsSequenceS(alphaIndex);
Map<Integer, Integer> allBetas = samplePlate.assayWellsSequenceS(betaIndex);
Map<Integer, Integer> allAlphas = samplePlate.assayWellsSequenceS(alphaIndices);
Map<Integer, Integer> allBetas = samplePlate.assayWellsSequenceS(betaIndices);
int alphaCount = allAlphas.size();
if(verbose){System.out.println("All alphas count: " + alphaCount);}
int betaCount = allBetas.size();
if(verbose){System.out.println("All betas count: " + betaCount);}
if(verbose){System.out.println("Well maps made");}
//ideally we wouldn't do any graph pre-filtering. But sequences present in all wells add a huge number of edges to the graph and don't carry any signal value
if(verbose){System.out.println("Removing sequences present in all wells.");}
filterByOccupancyThresholds(allAlphas, 1, numWells - 1);
filterByOccupancyThresholds(allBetas, 1, numWells - 1);
@@ -80,29 +80,46 @@ public class Simulator implements GraphModificationFunctions {
//(technically this is only 1/4 of an adjacency matrix, but that's all you need
//for a bipartite graph, and all the SimpleWeightedBipartiteGraphMatrixGenerator class expects.)
if(verbose){System.out.println("Creating adjacency matrix");}
//Count how many wells each alpha appears in
//Count how many wells each alpha sequence appears in
Map<Integer, Integer> alphaWellCounts = new HashMap<>();
//count how many wells each beta appears in
//count how many wells each beta sequence appears in
Map<Integer, Integer> betaWellCounts = new HashMap<>();
//the adjacency matrix to be used by the graph generator
double[][] weights = new double[plateVtoAMap.size()][plateVtoBMap.size()];
countSequencesAndFillMatrix(samplePlate, allAlphas, allBetas, plateAtoVMap,
plateBtoVMap, alphaIndex, betaIndex, alphaWellCounts, betaWellCounts, weights);
plateBtoVMap, alphaIndices, betaIndices, alphaWellCounts, betaWellCounts, weights);
if(verbose){System.out.println("Matrix created");}
//create bipartite graph
if(verbose){System.out.println("Creating graph");}
//the graph object
SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph =
SimpleWeightedGraph<Vertex, DefaultWeightedEdge> graph =
new SimpleWeightedGraph<>(DefaultWeightedEdge.class);
//the graph generator
SimpleWeightedBipartiteGraphMatrixGenerator graphGenerator = new SimpleWeightedBipartiteGraphMatrixGenerator();
//the list of alpha vertices
List<Integer> alphaVertices = new ArrayList<>(plateVtoAMap.keySet()); //This will work because LinkedHashMap preserves order of entry
//List<Integer> alphaVertices = new ArrayList<>(plateVtoAMap.keySet()); //This will work because LinkedHashMap preserves order of entry
List<Vertex> alphaVertices = new ArrayList<>();
//start with map of all alphas mapped to vertex values, get occupancy from the alphaWellCounts map
for (Integer seq : plateAtoVMap.keySet()) {
Vertex alphaVertex = new Vertex(SequenceType.CDR3_ALPHA, seq, alphaWellCounts.get(seq), plateAtoVMap.get(seq));
alphaVertices.add(alphaVertex);
}
//Sort to make sure the order of vertices in list matches the order of the adjacency matrix
Collections.sort(alphaVertices);
//Add ordered list of vertices to the graph
graphGenerator.first(alphaVertices);
//the list of beta vertices
List<Integer> betaVertices = new ArrayList<>(plateVtoBMap.keySet());
graphGenerator.second(betaVertices); //This will work because LinkedHashMap preserves order of entry
//List<Integer> betaVertices = new ArrayList<>(plateVtoBMap.keySet());//This will work because LinkedHashMap preserves order of entry
List<Vertex> betaVertices = new ArrayList<>();
for (Integer seq : plateBtoVMap.keySet()) {
Vertex betaVertex = new Vertex(SequenceType.CDR3_BETA, seq, betaWellCounts.get(seq), plateBtoVMap.get(seq));
betaVertices.add(betaVertex);
}
//Sort to make sure the order of vertices in list matches the order of the adjacency matrix
Collections.sort(betaVertices);
//Add ordered list of vertices to the graph
graphGenerator.second(betaVertices);
//use adjacency matrix of weight created previously
graphGenerator.weights(weights);
graphGenerator.generateGraph(graph);
@@ -112,9 +129,7 @@ public class Simulator implements GraphModificationFunctions {
Duration time = Duration.between(start, stop);
//create GraphWithMapData object
GraphWithMapData output = new GraphWithMapData(graph, numWells, samplePlate.getPopulations(), alphaCount, betaCount,
distCellsMapAlphaKey, plateVtoAMap, plateVtoBMap, plateAtoVMap,
plateBtoVMap, alphaWellCounts, betaWellCounts, time);
GraphWithMapData output = new GraphWithMapData(graph, numWells, samplePlate.getPopulations(), distCellsMapAlphaKey, alphaCount, betaCount, time);
//Set source file name in graph to name of sample plate
output.setSourceFilename(samplePlate.getFilename());
//return GraphWithMapData object
@@ -126,60 +141,67 @@ public class Simulator implements GraphModificationFunctions {
Integer highThreshold, Integer maxOccupancyDifference,
Integer minOverlapPercent, boolean verbose) {
Instant start = Instant.now();
List<Integer[]> removedEdges = new ArrayList<>();
SimpleWeightedGraph<Vertex, DefaultWeightedEdge> graph = data.getGraph();
Map<Vertex[], Integer> removedEdges = new HashMap<>();
boolean saveEdges = BiGpairSEQ.cacheGraph();
int numWells = data.getNumWells();
Integer alphaCount = data.getAlphaCount();
Integer betaCount = data.getBetaCount();
//Integer alphaCount = data.getAlphaCount();
//Integer betaCount = data.getBetaCount();
Map<Integer, Integer> distCellsMapAlphaKey = data.getDistCellsMapAlphaKey();
Map<Integer, Integer> plateVtoAMap = data.getPlateVtoAMap();
Map<Integer, Integer> plateVtoBMap = data.getPlateVtoBMap();
Map<Integer, Integer> alphaWellCounts = data.getAlphaWellCounts();
Map<Integer, Integer> betaWellCounts = data.getBetaWellCounts();
SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph = data.getGraph();
Set<Vertex> alphas = new HashSet<>();
Set<Vertex> betas = new HashSet<>();
for(Vertex v: graph.vertexSet()) {
if (SequenceType.CDR3_ALPHA.equals(v.getType())){
alphas.add(v);
}
else {
betas.add(v);
}
}
Integer graphAlphaCount = alphas.size();
Integer graphBetaCount = betas.size();
//remove edges with weights outside given overlap thresholds, add those to removed edge list
if(verbose){System.out.println("Eliminating edges with weights outside overlap threshold values");}
removedEdges.addAll(GraphModificationFunctions.filterByOverlapThresholds(graph, lowThreshold, highThreshold, saveEdges));
removedEdges.putAll(GraphModificationFunctions.filterByOverlapThresholds(graph, lowThreshold, highThreshold, saveEdges));
if(verbose){System.out.println("Over- and under-weight edges removed");}
//remove edges between vertices with too small an overlap size, add those to removed edge list
if(verbose){System.out.println("Eliminating edges with weights less than " + minOverlapPercent.toString() +
" percent of vertex occupancy value.");}
removedEdges.addAll(GraphModificationFunctions.filterByOverlapPercent(graph, alphaWellCounts, betaWellCounts,
plateVtoAMap, plateVtoBMap, minOverlapPercent, saveEdges));
removedEdges.putAll(GraphModificationFunctions.filterByOverlapPercent(graph, minOverlapPercent, saveEdges));
if(verbose){System.out.println("Edges with weights too far below a vertex occupancy value removed");}
//Filter by relative occupancy
if(verbose){System.out.println("Eliminating edges between vertices with occupancy difference > "
+ maxOccupancyDifference);}
removedEdges.addAll(GraphModificationFunctions.filterByRelativeOccupancy(graph, alphaWellCounts, betaWellCounts,
plateVtoAMap, plateVtoBMap, maxOccupancyDifference, saveEdges));
removedEdges.putAll(GraphModificationFunctions.filterByRelativeOccupancy(graph, maxOccupancyDifference, saveEdges));
if(verbose){System.out.println("Edges between vertices of with excessively different occupancy values " +
"removed");}
//Find Maximum Weighted Matching
if(verbose){System.out.println("Finding maximum weighted matching");}
//Find Maximum Weight Matching
//using jheaps library class PairingHeap for improved efficiency
if(verbose){System.out.println("Finding maximum weight matching");}
MaximumWeightBipartiteMatching maxWeightMatching;
//Use correct heap type for priority queue
String heapType = BiGpairSEQ.getPriorityQueueHeapType();
switch (heapType) {
case "PAIRING" -> {
maxWeightMatching = new MaximumWeightBipartiteMatching(graph,
plateVtoAMap.keySet(),
plateVtoBMap.keySet(),
alphas,
betas,
i -> new PairingHeap(Comparator.naturalOrder()));
}
case "FIBONACCI" -> {
maxWeightMatching = new MaximumWeightBipartiteMatching(graph,
plateVtoAMap.keySet(),
plateVtoBMap.keySet(),
alphas,
betas,
i -> new FibonacciHeap(Comparator.naturalOrder()));
}
default -> {
maxWeightMatching = new MaximumWeightBipartiteMatching(graph,
plateVtoAMap.keySet(),
plateVtoBMap.keySet());
alphas,
betas);
}
}
//get the matching
@@ -209,11 +231,14 @@ public class Simulator implements GraphModificationFunctions {
Map<Integer, Integer> matchMap = new HashMap<>();
while(weightIter.hasNext()) {
e = weightIter.next();
Integer source = graph.getEdgeSource(e);
Integer target = graph.getEdgeTarget(e);
Vertex source = graph.getEdgeSource(e);
Vertex target = graph.getEdgeTarget(e);
//Integer source = graph.getEdgeSource(e);
//Integer target = graph.getEdgeTarget(e);
//The match map is all matches found, not just true matches!
matchMap.put(plateVtoAMap.get(source), plateVtoBMap.get(target));
check = plateVtoBMap.get(target).equals(distCellsMapAlphaKey.get(plateVtoAMap.get(source)));
matchMap.put(source.getSequence(), target.getSequence());
check = target.getSequence().equals(distCellsMapAlphaKey.get(source.getSequence()));
//check = plateVtoBMap.get(target).equals(distCellsMapAlphaKey.get(plateVtoAMap.get(source)));
if(check) {
trueCount++;
}
@@ -221,17 +246,19 @@ public class Simulator implements GraphModificationFunctions {
falseCount++;
}
List<String> result = new ArrayList<>();
result.add(plateVtoAMap.get(source).toString());
//alpha sequence
result.add(source.getSequence().toString());
//alpha well count
result.add(alphaWellCounts.get(plateVtoAMap.get(source)).toString());
result.add(plateVtoBMap.get(target).toString());
result.add(source.getOccupancy().toString());
//beta sequence
result.add(target.getSequence().toString());
//beta well count
result.add(betaWellCounts.get(plateVtoBMap.get(target)).toString());
result.add(target.getOccupancy().toString());
//overlap count
result.add(Double.toString(graph.getEdgeWeight(e)));
result.add(Boolean.toString(check));
double pValue = Equations.pValue(numWells, alphaWellCounts.get(plateVtoAMap.get(source)),
betaWellCounts.get(plateVtoBMap.get(target)), graph.getEdgeWeight(e));
double pValue = Equations.pValue(numWells, source.getOccupancy(),
target.getOccupancy(), graph.getEdgeWeight(e));
BigDecimal pValueTrunc = new BigDecimal(pValue, mc);
result.add(pValueTrunc.toString());
allResults.add(result);
@@ -239,7 +266,7 @@ public class Simulator implements GraphModificationFunctions {
//Metadata comments for CSV file
String algoType = "LEDA book with heap: " + heapType;
int min = Math.min(alphaCount, betaCount);
int min = Math.min(graphAlphaCount, graphBetaCount);
//matching weight
BigDecimal totalMatchingWeight = maxWeightMatching.getMatchingWeight();
//rate of attempted matching
@@ -274,8 +301,10 @@ public class Simulator implements GraphModificationFunctions {
metadata.put("algorithm type", algoType);
metadata.put("matching weight", totalMatchingWeight.toString());
metadata.put("well populations", wellPopulationsString);
metadata.put("total alphas found", alphaCount.toString());
metadata.put("total betas found", betaCount.toString());
metadata.put("total alphas on plate", data.getAlphaCount().toString());
metadata.put("total betas on plate", data.getBetaCount().toString());
metadata.put("alphas in graph (after pre-filtering)", graphAlphaCount.toString());
metadata.put("betas in graph (after pre-filtering)", graphBetaCount.toString());
metadata.put("high overlap threshold", highThreshold.toString());
metadata.put("low overlap threshold", lowThreshold.toString());
metadata.put("minimum overlap percent", minOverlapPercent.toString());

View File

@@ -1,23 +1,97 @@
import java.io.Serializable;
public class Vertex implements Serializable, Comparable<Vertex> {
private SequenceType type;
private Integer vertexLabel;
private Integer sequence;
private Integer occupancy;
public class Vertex {
private final Integer vertexLabel;
private final Integer sequence;
private final Integer occupancy;
public Vertex(Integer vertexLabel) {
this.vertexLabel = vertexLabel;
}
public Vertex(String vertexLabel) {
this.vertexLabel = Integer.parseInt((vertexLabel));
}
public Vertex(Integer vertexLabel, Integer sequence, Integer occupancy) {
public Vertex(SequenceType type, Integer sequence, Integer occupancy, Integer vertexLabel) {
this.type = type;
this.vertexLabel = vertexLabel;
this.sequence = sequence;
this.occupancy = occupancy;
}
public Integer getVertexLabel() { return vertexLabel; }
public SequenceType getType() {
return type;
}
public void setType(String type) {
this.type = SequenceType.valueOf(type);
}
public Integer getVertexLabel() {
return vertexLabel;
}
public void setVertexLabel(String label) {
this.vertexLabel = Integer.parseInt(label);
}
public Integer getSequence() {
return sequence;
}
public void setSequence(String sequence) {
this.sequence = Integer.parseInt(sequence);
}
public Integer getOccupancy() {
return occupancy;
}
public void setOccupancy(String occupancy) {
this.occupancy = Integer.parseInt(occupancy);
}
@Override //adapted from JGraphT example code
public int hashCode()
{
return (sequence == null) ? 0 : sequence.hashCode();
}
@Override //adapted from JGraphT example code
public boolean equals(Object obj)
{
if (this == obj)
return true;
if (obj == null)
return false;
if (getClass() != obj.getClass())
return false;
Vertex other = (Vertex) obj;
if (sequence == null) {
return other.sequence == null;
} else {
return sequence.equals(other.sequence);
}
}
@Override //adapted from JGraphT example code
public String toString()
{
StringBuilder sb = new StringBuilder();
sb.append("(").append(vertexLabel)
.append(", Type: ").append(type.name())
.append(", Sequence: ").append(sequence)
.append(", Occupancy: ").append(occupancy).append(")");
return sb.toString();
}
@Override
public int compareTo(Vertex other) {
return this.vertexLabel - other.getVertexLabel();
}
}