From b604b1d3cd6a7a5f7e13dc88366c081de331ac8b Mon Sep 17 00:00:00 2001 From: efischer Date: Sat, 26 Feb 2022 06:19:08 -0600 Subject: [PATCH] Changing graph to use Vertex class --- src/main/java/CellSample.java | 8 +++++ src/main/java/InteractiveInterface.java | 2 +- src/main/java/Plate.java | 5 +-- src/main/java/SequenceType.java | 8 +++++ src/main/java/Simulator.java | 47 +++++++++++++++++-------- src/main/java/Vertex.java | 6 +++- 6 files changed, 57 insertions(+), 19 deletions(-) create mode 100644 src/main/java/SequenceType.java diff --git a/src/main/java/CellSample.java b/src/main/java/CellSample.java index cfb8c0b..95b6e39 100644 --- a/src/main/java/CellSample.java +++ b/src/main/java/CellSample.java @@ -13,8 +13,12 @@ public class CellSample { List numbersCDR3 = new ArrayList<>(); List numbersCDR1 = new ArrayList<>(); Integer numDistCDR3s = 2 * numDistinctCells + 1; + //Assign consecutive integers for each CDR3. This ensures they are all unique. IntStream.range(1, numDistCDR3s + 1).forEach(i -> numbersCDR3.add(i)); + //After all CDR3s are assigned, start assigning consecutive integers to CDR1s + //There will usually be fewer integers in the CDR1 list, which will allow repeats below IntStream.range(numDistCDR3s + 1, numDistCDR3s + 1 + (numDistCDR3s / cdr1Freq) + 1).forEach(i -> numbersCDR1.add(i)); + //randomize the order of the numbers in the lists Collections.shuffle(numbersCDR3); Collections.shuffle(numbersCDR1); @@ -22,11 +26,15 @@ public class CellSample { //two CDR3s, and two CDR1s. First two values are CDR3s (alpha, beta), second two are CDR1s (alpha, beta) List distinctCells = new ArrayList<>(); for(int i = 0; i < numbersCDR3.size() - 1; i = i + 2){ + //Go through entire CDR3 list once, make pairs of alphas and betas Integer tmpCDR3a = numbersCDR3.get(i); Integer tmpCDR3b = numbersCDR3.get(i+1); + //Go through (likely shorter) CDR1 list as many times as necessary, make pairs of alphas and betas Integer tmpCDR1a = numbersCDR1.get(i % numbersCDR1.size()); Integer tmpCDR1b = numbersCDR1.get((i+1) % numbersCDR1.size()); + //Make the array representing the cell Integer[] tmp = {tmpCDR3a, tmpCDR3b, tmpCDR1a, tmpCDR1b}; + //Add the cell to the list of distinct cells distinctCells.add(tmp); } this.cells = distinctCells; diff --git a/src/main/java/InteractiveInterface.java b/src/main/java/InteractiveInterface.java index 044e03b..0ab30b2 100644 --- a/src/main/java/InteractiveInterface.java +++ b/src/main/java/InteractiveInterface.java @@ -307,7 +307,7 @@ public class InteractiveInterface { } else{ List cells = cellSample.getCells(); - GraphWithMapData data = Simulator.makeGraph(cells, plate, true); + GraphWithMapData data = Simulator.makeCDR3Graph(cells, plate, true); assert filename != null; if(BiGpairSEQ.outputBinary()) { GraphDataObjectWriter dataWriter = new GraphDataObjectWriter(filename, data); diff --git a/src/main/java/Plate.java b/src/main/java/Plate.java index 5a13eff..7f70c43 100644 --- a/src/main/java/Plate.java +++ b/src/main/java/Plate.java @@ -138,9 +138,9 @@ public class Plate { //returns a map of the counts of the sequence at cell index sIndex, in a range of wells public Map assayWellsSequenceS(int start, int end, int... sIndices) { Map assay = new HashMap<>(); - for(int pIndex: sIndices){ + for(int sIndex: sIndices){ for(int i = start; i < end; i++){ - countSequences(assay, wells.get(i), pIndex); + countSequences(assay, wells.get(i), sIndex); } } return assay; @@ -149,6 +149,7 @@ public class Plate { private void countSequences(Map wellMap, List well, int... sIndices) { for(Integer[] cell : well) { for(int sIndex: sIndices){ + //skip dropout sequences, which have value -1 if(cell[sIndex] != -1){ wellMap.merge(cell[sIndex], 1, (oldValue, newValue) -> oldValue + newValue); } diff --git a/src/main/java/SequenceType.java b/src/main/java/SequenceType.java new file mode 100644 index 0000000..54de083 --- /dev/null +++ b/src/main/java/SequenceType.java @@ -0,0 +1,8 @@ +//enum for tagging types of sequences +//Listed in order that they appear in a cell array, so ordinal() method will return correct index +public enum SequenceType { + CDR3_ALPHA, + CDR3_BETA, + CDR1_ALPHA, + CDR1_BETA +} diff --git a/src/main/java/Simulator.java b/src/main/java/Simulator.java index 6d3f6eb..c3d0397 100644 --- a/src/main/java/Simulator.java +++ b/src/main/java/Simulator.java @@ -18,18 +18,24 @@ import static java.lang.Float.*; //NOTE: "sequence" in method and variable names refers to a peptide sequence from a simulated T cell public class Simulator implements GraphModificationFunctions { + + //These are the indices of the different sequences within a cell array private static final int cdr3AlphaIndex = 0; private static final int cdr3BetaIndex = 1; private static final int cdr1AlphaIndex = 2; private static final int cdr1BetaIndex = 3; - //Make the graph needed for matching CDR3s - public static GraphWithMapData makeGraph(List distinctCells, Plate samplePlate, boolean verbose) { + //Make the graph needed for matching sequences. + //sourceVertexIndices and targetVertexIndices are indices within the cell to use as for the two sets of vertices + //in the bipartite graph. "Source" and "target" are JGraphT terms for the two vertices an edge touches, + //even if not directed. + public static GraphWithMapData makeCDR3Graph(List distinctCells, Plate samplePlate, boolean verbose) { Instant start = Instant.now(); - int[] alphaIndex = {cdr3AlphaIndex}; - int[] betaIndex = {cdr3BetaIndex}; int numWells = samplePlate.getSize(); + //The ordinal value of the sequence type enum is also that sequence's index in a cell array + int[] alphaIndices = {SequenceType.CDR3_ALPHA.ordinal()}; + int[] betaIndices = {SequenceType.CDR3_BETA.ordinal()}; if(verbose){System.out.println("Making cell maps");} //HashMap keyed to Alphas, values Betas @@ -37,8 +43,9 @@ public class Simulator implements GraphModificationFunctions { if(verbose){System.out.println("Cell maps made");} if(verbose){System.out.println("Making well maps");} - Map allAlphas = samplePlate.assayWellsSequenceS(alphaIndex); - Map allBetas = samplePlate.assayWellsSequenceS(betaIndex); + + Map allAlphas = samplePlate.assayWellsSequenceS(alphaIndices); + Map allBetas = samplePlate.assayWellsSequenceS(betaIndices); int alphaCount = allAlphas.size(); if(verbose){System.out.println("All alphas count: " + alphaCount);} int betaCount = allBetas.size(); @@ -77,29 +84,40 @@ public class Simulator implements GraphModificationFunctions { //(technically this is only 1/4 of an adjacency matrix, but that's all you need //for a bipartite graph, and all the SimpleWeightedBipartiteGraphMatrixGenerator class expects.) if(verbose){System.out.println("Creating adjacency matrix");} - //Count how many wells each alpha appears in + //Count how many wells each alpha sequence appears in Map alphaWellCounts = new HashMap<>(); - //count how many wells each beta appears in + //count how many wells each beta sequence appears in Map betaWellCounts = new HashMap<>(); //the adjacency matrix to be used by the graph generator double[][] weights = new double[plateVtoAMap.size()][plateVtoBMap.size()]; countSequencesAndFillMatrix(samplePlate, allAlphas, allBetas, plateAtoVMap, - plateBtoVMap, alphaIndex, betaIndex, alphaWellCounts, betaWellCounts, weights); + plateBtoVMap, alphaIndices, betaIndices, alphaWellCounts, betaWellCounts, weights); if(verbose){System.out.println("Matrix created");} //create bipartite graph if(verbose){System.out.println("Creating graph");} //the graph object - SimpleWeightedGraph graph = + SimpleWeightedGraph graph = new SimpleWeightedGraph<>(DefaultWeightedEdge.class); //the graph generator SimpleWeightedBipartiteGraphMatrixGenerator graphGenerator = new SimpleWeightedBipartiteGraphMatrixGenerator(); //the list of alpha vertices - List alphaVertices = new ArrayList<>(plateVtoAMap.keySet()); //This will work because LinkedHashMap preserves order of entry + //List alphaVertices = new ArrayList<>(plateVtoAMap.keySet()); //This will work because LinkedHashMap preserves order of entry + List alphaVertices = new ArrayList<>(); + //start with map of all alphas mapped to vertex values, get occupancy from the alphaWellCounts map + for (Integer seq: plateAtoVMap.keySet()) { + Vertex alphaVertex = new Vertex(SequenceType.CDR1_ALPHA, seq, alphaWellCounts.get(seq), plateAtoVMap.get(seq)); + alphaVertices.add(alphaVertex); + } graphGenerator.first(alphaVertices); //the list of beta vertices - List betaVertices = new ArrayList<>(plateVtoBMap.keySet()); - graphGenerator.second(betaVertices); //This will work because LinkedHashMap preserves order of entry + //List betaVertices = new ArrayList<>(plateVtoBMap.keySet());//This will work because LinkedHashMap preserves order of entry + List betaVertices = new ArrayList<>(); + for (Integer seq : plateBtoVMap.keySet()) { + Vertex betaVertex = new Vertex(SequenceType.CDR3_BETA, seq, betaWellCounts.get(seq), plateBtoVMap.get(seq)); + betaVertices.add(betaVertex); + } + graphGenerator.second(betaVertices); //use adjacency matrix of weight created previously graphGenerator.weights(weights); graphGenerator.generateGraph(graph); @@ -653,7 +671,6 @@ public class Simulator implements GraphModificationFunctions { } } } - } } @@ -668,7 +685,7 @@ public class Simulator implements GraphModificationFunctions { private static Map makeVertexToSequenceMap(Map sequences, Integer startValue) { Map map = new LinkedHashMap<>(); //LinkedHashMap to preserve order of entry - Integer index = startValue; //is this necessary? I don't think I use this. + Integer index = startValue; for (Integer k: sequences.keySet()) { map.put(index, k); index++; diff --git a/src/main/java/Vertex.java b/src/main/java/Vertex.java index ef962ae..32627ae 100644 --- a/src/main/java/Vertex.java +++ b/src/main/java/Vertex.java @@ -1,16 +1,20 @@ public class Vertex { + private final SequenceType type; private final Integer vertexLabel; private final Integer sequence; private final Integer occupancy; - public Vertex(Integer vertexLabel, Integer sequence, Integer occupancy) { + public Vertex(SequenceType type, Integer sequence, Integer occupancy, Integer vertexLabel) { + this.type = type; this.vertexLabel = vertexLabel; this.sequence = sequence; this.occupancy = occupancy; } + public SequenceType getType() { return type; } + public Integer getVertexLabel() { return vertexLabel; } public Integer getSequence() {