Refactor to reduce code repetition

This commit is contained in:
2021-11-18 14:11:04 -06:00
parent 2064d7e9fc
commit 34e96d3b3d
3 changed files with 179 additions and 590 deletions

View File

@@ -1,6 +1,8 @@
import java.util.*;
//Need to write function to output plate to a file that I can read in.
/*
TODO: Implement discrete frequency distributions using Vose's Alias Method
*/
public class Plate {
private List<List<Integer[]>> wells;
@@ -28,8 +30,6 @@ public class Plate {
int section = 0;
double m;
int n;
//testing
//System.out.println("Cell size: " + cells.get(0).length);
while (section < numSections){
for (int i = 0; i < (size / numSections); i++) {
List<Integer[]> well = new ArrayList<>();
@@ -52,11 +52,6 @@ public class Plate {
}
}
public void writePlateToFile(String filename) {
}
public Integer[] getConcentrations(){
return concentrations;
}
@@ -77,142 +72,32 @@ public class Plate {
return wells;
}
//returns a map of counts of all the CDR3s (alphas and betas) in all wells
public Map<Integer, Integer>assayWellsCDR3(){
return this.assayWellsCDR3(0, size);
}
//returns a map of counts of all the CDR3 alphas in all wells
public Map<Integer, Integer> assayWellsCDR3Alpha() {
return this.assayWellsCDR3Alpha(0, size);
}
//returns a map of counts of all the CDR3 betas in all wells
public Map<Integer, Integer> assayWellsCDR3Beta() {
return this.assayWellsCDR3Beta(0, size);
}
//returns a map of counts of all CDR1s (alphas and betas) in all wells
public Map<Integer, Integer> assayWellsCDR1(){
return this.assayWellsCDR1(0, size);
}
//returns a map of counts of all the CDR1 alphas in all wells
public Map<Integer, Integer> assayWellsCDR1Alpha() {
return this.assayWellsCDR1Alpha(0, size);
}
//returns a map of counts of all the CDR1 betas in all wells
public Map<Integer, Integer> assayWellsCDR1Beta() {
return this.assayWellsCDR1Beta(0, size);
//returns a map of the counts of the peptide at cell index pIndex, in all wells
public Map<Integer, Integer> assayWellsPeptideP(int... pIndices){
return this.assayWellsPeptideP(0, size, pIndices);
}
//returns a map of counts of the CDR3s (alphas and betas) in a specific well
public Map<Integer, Integer>assayWellsCDR3(int n){
return this.assayWellsCDR3(n, n+1);
}
//returns a map of counts of the CDR1s (alphas and betas) in a specific well
public Map<Integer, Integer> assayWellsCDR1(int n){
return this.assayWellsCDR1(n, n+1);
}
//returns a map of counts of the CDR3 alphas in a specific well
public Map<Integer, Integer> assayWellsCDR3Alpha(int n) {
return this.assayWellsCDR3Alpha(n, n+1);
}
//returns a map of counts of the CDR3 betas in a specific well
public Map<Integer, Integer> assayWellsCDR3Beta(int n) {
return this.assayWellsCDR3Beta(n, n+1);
}
//returns a map of counts of the CDR1 alphas in a specific well
public Map<Integer, Integer> assayWellsCDR1Alpha(int n) {
return this.assayWellsCDR1Alpha(n, n+1);
}
//returns a map of counts of the CDR1 betas in a specific well
public Map<Integer, Integer> assayWellsCDR1Beta(int n) {
return this.assayWellsCDR1Beta(n, n+1);
}
//returns a map of the counts of the peptide at cell index pIndex, in a specific well
public Map<Integer, Integer> assayWellsPeptideP(int n, int... pIndices) { return this.assayWellsPeptideP(n, n+1, pIndices);}
//returns a map of the counts of the CDR3s (alphas and betas) in a range of wells
public Map<Integer, Integer>assayWellsCDR3(int start, int end){
//returns a map of the counts of the peptide at cell index pIndex, in a range of wells
public Map<Integer, Integer> assayWellsPeptideP(int start, int end, int... pIndices) {
Map<Integer,Integer> assay = new HashMap<>();
for(int i = start; i < end; i++){
countCDR3Alphas(assay, wells.get(i));
countCDR3Betas(assay,wells.get(i));
for(int pIndex: pIndices){
for(int i = start; i < end; i++){
countPeptides(assay, wells.get(i), pIndex);
}
}
return assay;
}
//returns a map of the counts of the CDR1s (alphas and betas) in a range of wells
public Map<Integer, Integer>assayWellsCDR1(int start, int end){
Map<Integer,Integer> assay = new HashMap<>();
for(int i = start; i < end; i++){
countCDR1Alphas(assay, wells.get(i));
countCDR1Betas(assay,wells.get(i));
}
return assay;
}
//returns a map of the counts of the CDR3 alphas in a range of wells
public Map<Integer, Integer> assayWellsCDR3Alpha(int start, int end) {
Map<Integer, Integer> assay = new HashMap<>();
for(int i = start; i < end; i++){
countCDR3Alphas(assay, wells.get(i));
}
return assay;
}
//returns a map of the counts of the CDR3 betas in a range of wells
public Map<Integer, Integer> assayWellsCDR3Beta(int start, int end) {
Map<Integer, Integer> assay = new HashMap<>();
for(int i = start; i < end; i++){
countCDR3Betas(assay, wells.get(i));
}
return assay;
}
//returns a map of the counts of the CDR1 alphas in a range of wells
public Map<Integer, Integer> assayWellsCDR1Alpha(int start, int end) {
Map<Integer, Integer> assay = new HashMap<>();
for(int i = start; i < end; i++){
countCDR1Alphas(assay, wells.get(i));
}
return assay;
}
//returns a map of the counts of the CDR1 betas in a range of wells
public Map<Integer, Integer> assayWellsCDR1Beta(int start, int end) {
Map<Integer, Integer> assay = new HashMap<>();
for(int i = start; i < end; i++){
countCDR1Betas(assay, wells.get(i));
}
return assay;
}
//given a map, counts distinct CDR3 alphas in a well
private void countCDR3Alphas(Map<Integer, Integer> wellMap, List<Integer[]> well){
//For the peptides at cell indices pIndices, counts number of unique peptides in the given well into the given map
private void countPeptides(Map<Integer, Integer> wellMap, List<Integer[]> well, int... pIndices) {
for(Integer[] cell : well) {
if(cell[0] != -1){
//keys are alphas, value is how many of them have been assayed
wellMap.merge(cell[0], 1, (oldValue, newValue) -> oldValue + newValue);
for(int pIndex: pIndices){
if(cell[pIndex] != -1){
wellMap.merge(cell[pIndex], 1, (oldValue, newValue) -> oldValue + newValue);
}
}
}
}
//given a map, counts distinct CDR3 betas in a well
private void countCDR3Betas(Map<Integer, Integer> wellMap, List<Integer[]> well){
for(Integer[] cell : well) {
if(cell[1] != -1){
wellMap.merge(cell[1], 1, (oldValue, newValue) -> oldValue + newValue);
}
}
}
//given a map, counts distinct CDR1 alphas in a well
private void countCDR1Alphas(Map<Integer, Integer> wellMap, List<Integer[]> well){
for(Integer[] cell: well){
if(cell[2] != -1){
wellMap.merge(cell[2], 1, (oldValue, newValue) -> oldValue + newValue);
}
}
}
//given a map, counts distinct CDR1 betas in a well
private void countCDR1Betas(Map<Integer, Integer> wellMap, List<Integer[]> well){
for(Integer[] cell: well){
if(cell[3] != -1){
wellMap.merge(cell[3], 1, (oldValue, newValue) -> oldValue + newValue);
}
}
}
}

View File

@@ -14,20 +14,13 @@ import java.util.*;
import java.util.stream.IntStream;
public class Simulator {
/*
* Old instance fields from before object-oriented refactor
*
private static Integer numDistinctCells = 2_000_000;
private static double stdDeviation = 200; //square root of numDistCells would approximate poisson dist
private static int numWells;
private static int numConcentrations = 1;
private static double errorRate = 0.1;
private static Integer[] concentrations = {500};
private static int lowThreshold = 2; //min number of shared wells to attempt pairing
private static int highThreshold = numWells - 3; //max number of shared wells to attempt pairing
private static boolean use2DArrayForGraph = true; //Doing this is much faster for larger graphs
private static boolean useJGraphTGraphMatrixGenerator = true; //fastest option
*/
private static int cdr3AlphaIndex = 0;
private static int cdr3BetaIndex = 1;
private static int cdr1AlphaIndex = 2;
private static int cdr1BetaIndex = 3;
public static CellSample generateCellSample(Integer numDistinctCells, Integer cdr1Freq) {
//In real T cells, CDR1s have about one third the diversity of CDR3s
@@ -57,25 +50,20 @@ public class Simulator {
public static MatchingResult matchCDR3s(List<Integer[]> distinctCells,
Plate samplePlate, Integer lowThreshold, Integer highThreshold){
System.out.println("Cells: " + distinctCells.size());
Instant start = Instant.now();
Instant start = Instant.now();
int numWells = samplePlate.getSize();
int[] alphaIndex = {cdr3AlphaIndex};
int[] betaIndex = {cdr3BetaIndex};
System.out.println("Making cell maps");
//HashMap keyed to Alphas, values Betas
Map<Integer, Integer> distCellsMapAlphaKey = new HashMap<>();
for (Integer[] cell : distinctCells) {
distCellsMapAlphaKey.put(cell[0], cell[1]);
}
//HashMap keyed to Betas, values Alphas
Map<Integer, Integer> distCellsMapBetaKey = new HashMap<>();
for (Integer[] cell : distinctCells) {
distCellsMapBetaKey.put(cell[1], cell[0]);
}
Map<Integer, Integer> distCellsMapAlphaKey = makePeptideToPeptideMap(distinctCells, 0, 1);
System.out.println("Cell maps made");
System.out.println("Making well maps");
Map<Integer, Integer> allAlphas = samplePlate.assayWellsCDR3Alpha();
Map<Integer, Integer> allBetas = samplePlate.assayWellsCDR3Beta();
Map<Integer, Integer> allAlphas = samplePlate.assayWellsPeptideP(alphaIndex);
Map<Integer, Integer> allBetas = samplePlate.assayWellsPeptideP(betaIndex);
int alphaCount = allAlphas.size();
System.out.println("all alphas count: " + alphaCount);
int betaCount = allBetas.size();
@@ -86,25 +74,10 @@ public class Simulator {
//Remove saturating-occupancy peptides because they have no signal value.
//Remove below-minimum-overlap-threshold peptides because they can't possibly have an overlap with another
//peptide that's above the threshold.
System.out.println("Removing below-minimum-overlap-threshold and saturating-occupancy peptides");
List<Integer> noiseAlphas = new ArrayList<>();
List<Integer> noiseBetas = new ArrayList<>();
for(Integer k: allAlphas.keySet()){
if((allAlphas.get(k)>numWells - 1) || (allAlphas.get(k) < lowThreshold)){
noiseAlphas.add(k);
}
}
for(Integer k: allBetas.keySet()){
if((allBetas.get(k)> numWells - 1 ) || (allBetas.get(k) < lowThreshold)){
noiseBetas.add(k);
}
}
for(Integer p: noiseAlphas){
allAlphas.remove(p);
}
for(Integer p: noiseBetas){
allBetas.remove(p);
}
System.out.println("Removing peptides present in all wells.");
System.out.println("Removing peptides with occupancy below the minimum overlap threshold");
filterByOccupancyThreshold(allAlphas, lowThreshold, numWells - 1);
filterByOccupancyThreshold(allBetas, lowThreshold, numWells - 1);
System.out.println("Peptides removed");
int pairableAlphaCount = allAlphas.size();
System.out.println("Remaining alpha count: " + pairableAlphaCount);
@@ -112,16 +85,18 @@ public class Simulator {
System.out.println("Remaining beta count: " + pairableBetaCount);
System.out.println("Making vertex maps");
//Using Integers instead of Strings to label vertices so I can do clever stuff with indices if I need to
// when I refactor to use a 2d array to make the graph
//For the autogenerator, all vertices must have distinct numbers associated with them
//For the SimpleWeightedBipartiteGraphMatrixGenerator, all vertices must have
// distinct numbers associated with them. Since I'm using a 2D array, that means
// distinct indices between the rows and columns. vertexStartValue lets me track where I switch
// from numbering rows to columns, so I can assign unique numbers to every vertex, and then
// subtract the vertexStartValue from betas to use their vertex labels as array indices
Integer vertexStartValue = 0;
//keys are sequential integer vertices, values are alphas
Map<Integer, Integer> plateVtoAMap = getVertexToPeptideMap(allAlphas, vertexStartValue);
Map<Integer, Integer> plateVtoAMap = makeVertexToPeptideMap(allAlphas, vertexStartValue);
//New start value for vertex to beta map should be one more than final vertex value in alpha map
vertexStartValue += plateVtoAMap.size();
//keys are sequential integers vertices, values are betas
Map<Integer, Integer> plateVtoBMap = getVertexToPeptideMap(allBetas, vertexStartValue);
Map<Integer, Integer> plateVtoBMap = makeVertexToPeptideMap(allBetas, vertexStartValue);
//keys are alphas, values are sequential integer vertices from previous map
Map<Integer, Integer> plateAtoVMap = invertVertexMap(plateVtoAMap);
//keys are betas, values are sequential integer vertices from previous map
@@ -133,40 +108,15 @@ public class Simulator {
Map<Integer, Integer> alphaWellCounts = new HashMap<>();
//count how many wells each beta appears in
Map<Integer, Integer> betaWellCounts = new HashMap<>();
//add edges, where weights are number of wells the peptides share in common.
//If this is too slow, can make a 2d array and use the SimpleWeightedGraphMatrixGenerator class
Map<Integer, Integer> wellNAlphas = null;
Map<Integer, Integer> wellNBetas = null;
SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph =
new SimpleWeightedGraph<>(DefaultWeightedEdge.class);
double[][] weights = new double[plateVtoAMap.size()][plateVtoBMap.size()];
for (int n = 0; n < numWells; n++) {
wellNAlphas = samplePlate.assayWellsCDR3Alpha(n);
for (Integer a : wellNAlphas.keySet()) {
if(allAlphas.containsKey(a)){
alphaWellCounts.merge(a, 1, (oldValue, newValue) -> oldValue + newValue);
}
}
wellNBetas = samplePlate.assayWellsCDR3Beta(n);
for (Integer b : wellNBetas.keySet()) {
if(allBetas.containsKey(b)){
betaWellCounts.merge(b, 1, (oldValue, newValue) -> oldValue + newValue);
}
}
for (Integer i : wellNAlphas.keySet()) {
if(allAlphas.containsKey(i)){
for (Integer j : wellNBetas.keySet()) {
if(allBetas.containsKey(j)){
weights[plateAtoVMap.get(i)][plateBtoVMap.get(j) - vertexStartValue] += 1.0;
}
}
}
}
}
countPeptidesAndFillMatrix(samplePlate, allAlphas, allBetas, plateAtoVMap,
plateBtoVMap, alphaIndex, betaIndex, alphaWellCounts, betaWellCounts, weights);
System.out.println("matrix created");
System.out.println("creating graph");
SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph =
new SimpleWeightedGraph<>(DefaultWeightedEdge.class);
SimpleWeightedBipartiteGraphMatrixGenerator graphGenerator = new SimpleWeightedBipartiteGraphMatrixGenerator();
List<Integer> alphaVertices = new ArrayList<>();
alphaVertices.addAll(plateVtoAMap.keySet()); //This will work because LinkedHashMap preserves order of entry
@@ -179,11 +129,7 @@ public class Simulator {
System.out.println("Graph created");
System.out.println("Eliminating edges with weights outside threshold values");
for(DefaultWeightedEdge e: graph.edgeSet()){
if ((graph.getEdgeWeight(e) > highThreshold) || (graph.getEdgeWeight(e) < lowThreshold)){
graph.setEdgeWeight(e, 0.0);
}
}
filterByOccupancyThreshold(graph, lowThreshold, highThreshold);
System.out.println("Over- and under-weight edges set to 0.0");
@@ -263,48 +209,47 @@ public class Simulator {
comments.add("Pairing error rate: " + pairingErrorRateTrunc);
Duration time = Duration.between(start, stop);
comments.add("Simulation time: " + nf.format(time.toSeconds()) + " seconds");
System.out.println("Simulation time: " + nf.format(time.toSeconds()) + " seconds");
for(String s: comments){
System.out.println(s);
}
return new MatchingResult(comments, header, allResults, matchMap, time);
}
//Simulated matching of CDR1s to CDR3s. Requires MatchingResult from prior run of matchCDR3s.
public static MatchingResult[] matchCDR1s(List<Integer[]> distinctCells,
Plate samplePlate, Integer lowThreshold,
Integer highThreshold, Map<Integer, Integer> previousMatches, Duration previousTime){
Integer highThreshold, MatchingResult priorResult){
Instant start = Instant.now();
Duration previousTime = priorResult.getTime();
Map<Integer, Integer> previousMatches = priorResult.getMatchMap();
int numWells = samplePlate.getSize();
int[] cdr3Indices = {cdr3AlphaIndex, cdr3BetaIndex};
int[] cdr1Indices = {cdr1AlphaIndex, cdr1BetaIndex};
System.out.println("Making previous match maps");
Map<Integer, Integer> CDR3AtoBMap = previousMatches;
Map<Integer, Integer> CDR3BtoAMap = invertVertexMap(CDR3AtoBMap);
Map<Integer, Integer> cdr3AtoBMap = previousMatches;
Map<Integer, Integer> cdr3BtoAMap = invertVertexMap(cdr3AtoBMap);
System.out.println("Previous match maps made");
System.out.println("Making cell maps");
Map<Integer, Integer> alphaCDR3toCDR1Map = new HashMap<>();
for (Integer[] cell : distinctCells) {
alphaCDR3toCDR1Map.put(cell[0], cell[2]);
}
//HashMap keyed to Betas, values Alphas
Map<Integer, Integer> betaCDR3toCDR1Map = new HashMap<>();
for (Integer[] cell : distinctCells) {
betaCDR3toCDR1Map.put(cell[1], cell[3]);
}
Map<Integer, Integer> alphaCDR3toCDR1Map = makePeptideToPeptideMap(distinctCells, cdr3AlphaIndex, cdr1AlphaIndex);
Map<Integer, Integer> betaCDR3toCDR1Map = makePeptideToPeptideMap(distinctCells, cdr3BetaIndex, cdr1BetaIndex);
System.out.println("Cell maps made");
System.out.println("Making well maps");
Map<Integer, Integer> allCDR3s = samplePlate.assayWellsCDR3();
Map<Integer, Integer> allCDR1s = samplePlate.assayWellsCDR1();
Map<Integer, Integer> allCDR3s = samplePlate.assayWellsPeptideP(cdr3Indices);
Map<Integer, Integer> allCDR1s = samplePlate.assayWellsPeptideP(cdr1Indices);
int CDR3Count = allCDR3s.size();
System.out.println("all CDR3s count: " + CDR3Count);
int CDR1Count = allCDR1s.size();
System.out.println("all CDR1s count: " + CDR1Count);
System.out.println("Well maps made");
//NEW FILTERING CODE TO TEST
System.out.println("Removing unpaired CDR3s from well maps");
List<Integer> unpairedCDR3s = new ArrayList<>();
for(Integer i: allCDR3s.keySet()){
if(!(CDR3AtoBMap.containsKey(i) || CDR3BtoAMap.containsKey(i))){
if(!(cdr3AtoBMap.containsKey(i) || cdr3BtoAMap.containsKey(i))){
unpairedCDR3s.add(i);
}
}
@@ -315,85 +260,61 @@ public class Simulator {
System.out.println("Remaining CDR3 count: " + allCDR3s.size());
System.out.println("Removing below-minimum-overlap-threshold and saturating-occupancy CDR1s");
List<Integer> noiseCDR1s = new ArrayList<>();
for(Integer i: allCDR1s.keySet()){
if((allCDR1s.get(i) < lowThreshold) || (allCDR1s.get(i)) > numWells - 1){
noiseCDR1s.add(i);
}
}
for(Integer i: noiseCDR1s){
allCDR1s.remove(i);
}
filterByOccupancyThreshold(allCDR1s, lowThreshold, numWells - 1);
System.out.println("CDR1s removed.");
System.out.println("Remaining CDR1 count: " + allCDR1s.size());
//END NEW CODE
System.out.println("Making vertex maps");
//Using Integers instead of Strings to label vertices so I can do clever stuff with indices if I need to
// when I refactor to use a 2d array to make the graph
//For the autogenerator, all vertices must have distinct numbers associated with them
//For the SimpleWeightedBipartiteGraphMatrixGenerator, all vertices must have
// distinct numbers associated with them. Since I'm using a 2D array, that means
// distinct indices between the rows and columns. vertexStartValue lets me track where I switch
// from numbering rows to columns, so I can assign unique numbers to every vertex, and then
// subtract the vertexStartValue from CDR1s to use their vertex labels as array indices
Integer vertexStartValue = 0;
//keys are sequential integer vertices, values are CDR3s
Map<Integer, Integer> plateVtoCDR3Map = getVertexToPeptideMap(allCDR3s, vertexStartValue);
//New start value for vertex to beta map should be one more than final vertex value in alpha map
Map<Integer, Integer> plateVtoCDR3Map = makeVertexToPeptideMap(allCDR3s, vertexStartValue);
//New start value for vertex to CDR1 map should be one more than final vertex value in CDR3 map
vertexStartValue += plateVtoCDR3Map.size();
//keys are sequential integers vertices, values are CDR1s
Map<Integer, Integer> plateVtoCDR1Map = getVertexToPeptideMap(allCDR1s, vertexStartValue);
Map<Integer, Integer> plateVtoCDR1Map = makeVertexToPeptideMap(allCDR1s, vertexStartValue);
//keys are CDR3s, values are sequential integer vertices from previous map
Map<Integer, Integer> plateCDR3toVMap = invertVertexMap(plateVtoCDR3Map);
//keys are CDR1s, values are sequential integer vertices from previous map
Map<Integer, Integer> plateCDR1toVMap = invertVertexMap(plateVtoCDR1Map);
System.out.println("Vertex maps made");
System.out.println("Creating Graph");
System.out.println("Creating adjacency matrix");
//Count how many wells each CDR3 appears in
Map<Integer, Integer> CDR3WellCounts = new HashMap<>();
Map<Integer, Integer> cdr3WellCounts = new HashMap<>();
//count how many wells each CDR1 appears in
Map<Integer, Integer> CDR1WellCounts = new HashMap<>();
Map<Integer, Integer> cdr1WellCounts = new HashMap<>();
//add edges, where weights are number of wells the peptides share in common.
//If this is too slow, can make a 2d array and use the SimpleWeightedGraphMatrixGenerator class
Map<Integer, Integer> wellNCDR3s = null;
Map<Integer, Integer> wellNCDR1s = null;
double[][] weights = new double[plateVtoCDR3Map.size()][plateVtoCDR1Map.size()];
countPeptidesAndFillMatrix(samplePlate, allCDR3s, allCDR1s, plateCDR3toVMap, plateCDR1toVMap,
cdr3Indices, cdr1Indices, cdr3WellCounts, cdr1WellCounts, weights);
System.out.println("Matrix created");
System.out.println("Creating graph");
SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph =
new SimpleWeightedGraph<>(DefaultWeightedEdge.class);
double[][] weights = new double[plateVtoCDR3Map.size()][plateVtoCDR1Map.size()];
for (int n = 0; n < numWells; n++) {
wellNCDR3s = samplePlate.assayWellsCDR3(n);
for (Integer a : wellNCDR3s.keySet()) {
CDR3WellCounts.merge(a, 1, (oldValue, newValue) -> oldValue + newValue);
}
wellNCDR1s = samplePlate.assayWellsCDR1(n);
for (Integer b : wellNCDR1s.keySet()) {
CDR1WellCounts.merge(b, 1, (oldValue, newValue) -> oldValue + newValue);
}
for (Integer i : wellNCDR3s.keySet()) {
if(allCDR3s.containsKey(i)){//only consider CDR3s we have matches for - rest filtered out
for (Integer j : wellNCDR1s.keySet()) {
if(allCDR1s.containsKey(j)){ //only those CDR1s we didn't filter out
weights[plateCDR3toVMap.get(i)][plateCDR1toVMap.get(j) - vertexStartValue] += 1.0;
}
}
}
}
}
SimpleWeightedBipartiteGraphMatrixGenerator graphGenerator = new SimpleWeightedBipartiteGraphMatrixGenerator();
List<Integer> CDR3Vertices = new ArrayList<>();
CDR3Vertices.addAll(plateVtoCDR3Map.keySet()); //This will work because LinkedHashMap preserves order of entry
graphGenerator.first(CDR3Vertices);
List<Integer> CDR1Vertices = new ArrayList<>();
CDR1Vertices.addAll(plateVtoCDR1Map.keySet());
graphGenerator.second(CDR1Vertices); //This will work because LinkedHashMap preserves order of entry
List<Integer> cdr3Vertices = new ArrayList<>();
cdr3Vertices.addAll(plateVtoCDR3Map.keySet()); //This will work because LinkedHashMap preserves order of entry
graphGenerator.first(cdr3Vertices);
List<Integer> cdr1Vertices = new ArrayList<>();
cdr1Vertices.addAll(plateVtoCDR1Map.keySet());
graphGenerator.second(cdr1Vertices); //This will work because LinkedHashMap preserves order of entry
graphGenerator.weights(weights);
graphGenerator.generateGraph(graph);
System.out.println("Graph created");
System.out.println("Number of edges: " + graph.edgeSet().size());
System.out.println("Removing edges outside of weight thresholds");
for(DefaultWeightedEdge e: graph.edgeSet()){
if ((graph.getEdgeWeight(e) > highThreshold) || (graph.getEdgeWeight(e) < lowThreshold)){
graph.setEdgeWeight(e, 0.0);
}
}
filterByOccupancyThreshold(graph, lowThreshold, highThreshold);
System.out.println("Over- and under-weight edges set to 0.0");
System.out.println("Finding first maximum weighted matching");
@@ -409,24 +330,22 @@ public class Simulator {
DefaultWeightedEdge e = null;
while(weightIter.hasNext()){
e = weightIter.next();
if(graph.getEdgeWeight(e) < lowThreshold || graph.getEdgeWeight(e) > highThreshold) {
continue;
}
Integer source = graph.getEdgeSource(e);
// if(!(CDR3AtoBMap.containsKey(source) || CDR3BtoAMap.containsKey(source))){
// if(graph.getEdgeWeight(e) < lowThreshold || graph.getEdgeWeight(e) > highThreshold) {
// continue;
// }
Integer source = graph.getEdgeSource(e);
Integer target = graph.getEdgeTarget(e);
firstMatchCDR3toCDR1Map.put(plateVtoCDR3Map.get(source), plateVtoCDR1Map.get(target));
}
System.out.println("First pass matches: " + firstMatchCDR3toCDR1Map.size());
System.out.println("Removing edges from first maximum weighted matching");
//zero out the edge weights in the matching
weightIter = graphMatching.iterator();
while(weightIter.hasNext()){
graph.removeEdge(weightIter.next());
}
System.out.println("Edges removed");
//Generate a new matching
System.out.println("Finding second maximum weighted matching");
@@ -441,9 +360,9 @@ public class Simulator {
weightIter = graphMatching.iterator();
while(weightIter.hasNext()){
e = weightIter.next();
if(graph.getEdgeWeight(e) < lowThreshold || graph.getEdgeWeight(e) > highThreshold) {
continue;
}
// if(graph.getEdgeWeight(e) < lowThreshold || graph.getEdgeWeight(e) > highThreshold) {
// continue;
// }
Integer source = graph.getEdgeSource(e);
// if(!(CDR3AtoBMap.containsKey(source) || CDR3BtoAMap.containsKey(source))){
// continue;
@@ -451,32 +370,33 @@ public class Simulator {
Integer target = graph.getEdgeTarget(e);
secondMatchCDR3toCDR1Map.put(plateVtoCDR3Map.get(source), plateVtoCDR1Map.get(target));
}
System.out.println("Second pass matches: " + secondMatchCDR3toCDR1Map.size());
System.out.println("Mapping first pass CDR3 alpha/beta pairs");
//get linked map for first matching attempt
Map<Integer, Integer> firstMatchesMap = new LinkedHashMap<>();
for(Integer alphaCDR3: CDR3AtoBMap.keySet()) {
for(Integer alphaCDR3: cdr3AtoBMap.keySet()) {
if (!(firstMatchCDR3toCDR1Map.containsKey(alphaCDR3))) {
continue;
}
Integer betaCDR3 = CDR3AtoBMap.get(alphaCDR3);
Integer betaCDR3 = cdr3AtoBMap.get(alphaCDR3);
if (!(firstMatchCDR3toCDR1Map.containsKey(betaCDR3))) {
continue;
}
firstMatchesMap.put(alphaCDR3, firstMatchCDR3toCDR1Map.get(alphaCDR3));
firstMatchesMap.put(betaCDR3, firstMatchCDR3toCDR1Map.get(betaCDR3));
}
System.out.println("First pass CDR3 alpha/beta pairs mapped");
System.out.println("Mapping second pass CDR3 alpha/beta pairs.");
System.out.println("Finding CDR3 pairs that swapped CDR1 matches between first pass and second pass.");
//Look for matches that simply swapped already-matched alpha and beta CDR3s
Map<Integer, Integer> dualMatchesMap = new LinkedHashMap<>();
for(Integer alphaCDR3: CDR3AtoBMap.keySet()) {
for(Integer alphaCDR3: cdr3AtoBMap.keySet()) {
if (!(firstMatchCDR3toCDR1Map.containsKey(alphaCDR3) && secondMatchCDR3toCDR1Map.containsKey(alphaCDR3))) {
continue;
}
Integer betaCDR3 = CDR3AtoBMap.get(alphaCDR3);
Integer betaCDR3 = cdr3AtoBMap.get(alphaCDR3);
if (!(firstMatchCDR3toCDR1Map.containsKey(betaCDR3) && secondMatchCDR3toCDR1Map.containsKey(betaCDR3))) {
continue;
}
@@ -487,12 +407,11 @@ public class Simulator {
}
}
}
System.out.println("Second pass mapping made. Dual CDR3/CDR1 pairings found.");
Instant stop = Instant.now();
//results for first map
System.out.println("Results for first pass");
System.out.println("RESULTS FOR FIRST PASS MATCHING");
List<List<String>> allResults = new ArrayList<>();
Integer trueCount = 0;
Iterator iter = firstMatchesMap.keySet().iterator();
@@ -550,7 +469,7 @@ public class Simulator {
MatchingResult firstTest = new MatchingResult(comments, headers, allResults, dualMatchesMap, time);
//results for dual map
System.out.println("Results for second pass");
System.out.println("RESULTS FOR SECOND PASS MATCHING");
allResults = new ArrayList<>();
trueCount = 0;
iter = dualMatchesMap.keySet().iterator();
@@ -596,260 +515,85 @@ public class Simulator {
}
System.out.println("Simulation time: " + nf.format(time.toSeconds()) + " seconds");
MatchingResult dualTest = new MatchingResult(comments, headers, allResults, dualMatchesMap, time);
MatchingResult[] output = {firstTest, dualTest};
return output;
}
/*
** Old main method before object-oriented refactor
*
public static void Simulate() {
Instant start = Instant.now();
//Four things to try to improve this
//1. Run it on hardware with more memory
//2. implement p-values and just check exhaustively for strictly-bounded weights
// If I do this, can start with highest weights and zero out columns when I find a p-value I like maybe
//3. hand-implement the maximum weighted matching algorithm to use arrays and be efficient
// 3.5 Could also do a version where I make the graph one edge at a time, but from a 2D array so I have the
// weights already
// Tried it. It was way better than doing the graph directly, but slightly slower than the built-in
//4. Implement a discrete distribution function (along the lines of the alias method)
//
//With array method, could also try simply zeroing high-well-count peptides before graph creation
//stdDeviation = Math.sqrt(numDistinctCells.doubleValue());
//concentrations = new int[numConcentrations];
//OUTPUT
System.out.println("Generating cells");
List<Integer> numbers = new ArrayList<>();
IntStream.range(1, (2 * numDistinctCells) + 1).forEach(i -> numbers.add(i));
Collections.shuffle(numbers);
//Each cell represented by two numbers from the random permutation
//These represent unique alpha and beta peptides
List<Integer[]> distinctCells = new ArrayList<>();
for(int i = 0; i < numbers.size() - 1; i = i + 2) {
Integer tmp1 = numbers.get(i);
Integer tmp2 = numbers.get(i+1);
Integer[] tmp = {tmp1, tmp2};
distinctCells.add(tmp);
}
//OUTPUT
System.out.println("Cells generated");
System.out.println("Filling wells");
//HashMap keyed to Alphas, values Betas
Map<Integer, Integer> distCellsMapAlphaKey = new HashMap<>();
for (Integer[] cell : distinctCells) {
distCellsMapAlphaKey.put(cell[0], cell[1]);
}
//HashMap keyed to Betas, values Alphas
Map<Integer, Integer> distCellsMapBetaKey = new HashMap<>();
for (Integer[] cell : distinctCells) {
distCellsMapBetaKey.put(cell[1], cell[0]);
}
Plate samplePlate = new Plate(numWells, errorRate, concentrations, stdDeviation);
samplePlate.fillWells(distinctCells);
//OUTPUT
System.out.println("Wells filled");
System.out.println("Making maps");
//All alphas on plate, with
Map<Integer, Integer> allAlphas = samplePlate.assayWellsAlpha();
Map<Integer, Integer> allBetas = samplePlate.assayWellsBeta();
int alphaCount = allAlphas.size();
int betaCount = allBetas.size();
//Using Integers instead of Strings to label vertices so I can do clever stuff with indices if I need to
// when I refactor to use a 2d array to make the graph
//For the autogenerator, all vertices must have distinct numbers associated with them
Integer vertexStartValue = 0;
//keys are sequential integer vertices, values are alphas
Map<Integer, Integer> plateVtoAMap = getVertexToPeptideMap(allAlphas, vertexStartValue);
//New start value for vertex to beta map should be one more than final vertex value in alpha map
vertexStartValue += plateVtoAMap.size();
//keys are sequential integers vertices, values are betas
Map<Integer, Integer> plateVtoBMap = getVertexToPeptideMap(allBetas, vertexStartValue);
//keys are alphas, values are sequential integer vertices from previous map
Map<Integer, Integer> plateAtoVMap = invertVertexMap(plateVtoAMap);
//keys are betas, values are sequential integer vertices from previous map
Map<Integer, Integer> plateBtoVMap = invertVertexMap(plateVtoBMap);
//OUTPUT
System.out.println("Maps made");
System.out.println("Creating Graph");
//Count how many wells each alpha appears in
Map<Integer, Integer> alphaWellCounts = new HashMap<>();
//count how many wells each beta appears in
Map<Integer, Integer> betaWellCounts = new HashMap<>();
//add edges, where weights are number of wells the peptides share in common.
//If this is too slow, can make a 2d array and use the SimpleWeightedGraphMatrixGenerator class
Map<Integer, Integer> wellNAlphas = null;
Map<Integer, Integer> wellNBetas = null;
SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph =
new SimpleWeightedGraph<Integer, DefaultWeightedEdge>(DefaultWeightedEdge.class);
//Flag in global variables determines if edges are added to graph one at a time directly, or if a graph
//is made all at once from a 2D array using the SimpleWeightedBipartiteGraphMatrixGenerator class
if(use2DArrayForGraph) {
double[][] weights = new double[plateVtoAMap.size()][plateVtoBMap.size()];
for (int n = 0; n < numWells; n++) {
wellNAlphas = samplePlate.assayWellsAlpha(n);
for(Integer a: wellNAlphas.keySet()){
alphaWellCounts.merge(a, 1, (oldValue, newValue) -> oldValue + newValue);
//Counts the well occupancy of the row peptides and column peptides into given maps, and
//fills weights in the given 2D array
private static void countPeptidesAndFillMatrix(Plate samplePlate,
Map<Integer,Integer> allRowPeptides,
Map<Integer,Integer> allColumnPeptides,
Map<Integer,Integer> rowPeptideToVertexMap,
Map<Integer,Integer> columnPeptideToVertexMap,
int[] rowPeptideIndices,
int[] colPeptideIndices,
Map<Integer, Integer> rowPeptideCounts,
Map<Integer,Integer> columnPeptideCounts,
double[][] weights){
Map<Integer, Integer> wellNRowPeptides = null;
Map<Integer, Integer> wellNColumnPeptides = null;
int vertexStartValue = rowPeptideToVertexMap.size();
int numWells = samplePlate.getSize();
for (int n = 0; n < numWells; n++) {
wellNRowPeptides = samplePlate.assayWellsPeptideP(n, rowPeptideIndices);
for (Integer a : wellNRowPeptides.keySet()) {
if(allRowPeptides.containsKey(a)){
rowPeptideCounts.merge(a, 1, (oldValue, newValue) -> oldValue + newValue);
}
wellNBetas = samplePlate.assayWellsBeta(n);
for(Integer b: wellNBetas.keySet()){
betaWellCounts.merge(b, 1, (oldValue, newValue) -> oldValue + newValue);
}
wellNColumnPeptides = samplePlate.assayWellsPeptideP(n, colPeptideIndices);
for (Integer b : wellNColumnPeptides.keySet()) {
if(allColumnPeptides.containsKey(b)){
columnPeptideCounts.merge(b, 1, (oldValue, newValue) -> oldValue + newValue);
}
for(Integer i: wellNAlphas.keySet()) {
for(Integer j: wellNBetas.keySet()){
weights[plateAtoVMap.get(i)][plateBtoVMap.get(j) - vertexStartValue] += 1.0;
}
}
}
if(useJGraphTGraphMatrixGenerator) {
SimpleWeightedBipartiteGraphMatrixGenerator graphGenerator = new SimpleWeightedBipartiteGraphMatrixGenerator();
List<Integer> alphaVertices = new ArrayList<>();
alphaVertices.addAll(plateVtoAMap.keySet()); //This will work because LinkedHashMap preserves order of entry
graphGenerator.first(alphaVertices);
List<Integer> betaVertices = new ArrayList<>();
betaVertices.addAll(plateVtoBMap.keySet());
graphGenerator.second(betaVertices); //This will work because LinkedHashMap preserves order of entry
graphGenerator.weights(weights);
graphGenerator.generateGraph(graph);
} else {
for(int i = 0; i < alphaCount; i++) {
for (int j = vertexStartValue; j < betaCount + vertexStartValue; j++){
graph.addVertex(i);
graph.addVertex(j);
addEdgeWithWeight(graph, i, (j), weights[i][j - vertexStartValue]);
}
for (Integer i : wellNRowPeptides.keySet()) {
if(allRowPeptides.containsKey(i)){
for (Integer j : wellNColumnPeptides.keySet()) {
if(allColumnPeptides.containsKey(j)){
weights[rowPeptideToVertexMap.get(i)][columnPeptideToVertexMap.get(j) - vertexStartValue] += 1.0;
}
}
}
}
}
else {
//construct graph
//add vertices
for(Integer v: plateVtoAMap.keySet()) {
graph.addVertex(v);
}
for(Integer v: plateVtoBMap.keySet()) {
graph.addVertex(v);
}
for (int n = 0; n < numWells; n++) {
wellNAlphas = samplePlate.assayWellsAlpha(n);
for (Integer a : wellNAlphas.keySet()) {
alphaWellCounts.merge(a, 1, (oldValue, newValue) -> oldValue + newValue);
}
wellNBetas = samplePlate.assayWellsBeta(n);
for (Integer b : wellNBetas.keySet()) {
betaWellCounts.merge(b, 1, (oldValue, newValue) -> oldValue + newValue);
}
for (Integer i : wellNAlphas.keySet()) {
for (Integer j : wellNBetas.keySet()) {
addEdgeOrWeight(graph, plateAtoVMap.get(i), plateBtoVMap.get(j));
}
}
}
}
//OUTPUT
System.out.println("Graph created");
System.out.println("Finding maximum weighted matching");
MaximumWeightBipartiteMatching maxWeightMatching =
new MaximumWeightBipartiteMatching(graph, plateVtoAMap.keySet(), plateVtoBMap.keySet());
MatchingAlgorithm.Matching<String, DefaultWeightedEdge> graphMatching = maxWeightMatching.getMatching();
Instant stop = Instant.now();
//OUTPUT
System.out.println("Matching completed");
Iterator<DefaultWeightedEdge> weightIter = graphMatching.iterator();
DefaultWeightedEdge e = null;
int trueCount = 0;
int falseCount = 0;
boolean check = false;
while(weightIter.hasNext()) {
e = weightIter.next();
if(graph.getEdgeWeight(e) < lowThreshold || graph.getEdgeWeight(e) > highThreshold) {
continue;
}
Integer source = graph.getEdgeSource(e);
Integer target = graph.getEdgeTarget(e);
//Need map of each peptide and number of wells it appears in.
check = printData(graph.getEdgeWeight(e), source, alphaWellCounts.get(plateVtoAMap.get(source)), target, betaWellCounts.get(plateVtoBMap.get(target)), distCellsMapAlphaKey, plateVtoAMap, plateVtoBMap);
if(check) {
trueCount++;
}
else {
falseCount++;
}
}
//RESULTS SUMMARY
NumberFormat nf = NumberFormat.getInstance(Locale.US);
System.out.println("");
System.out.println("Distinct cell population: " + nf.format(numDistinctCells));
System.out.println("1 standard deviation of distribution: " + stdDeviation + " most common cells");
System.out.print("Sample plate: " + numWells +" wells, ");
System.out.print("divided into " + concentrations.length + " sections of ");
System.out.println(Arrays.toString(concentrations) + " cells per well.");
System.out.print("Attempted to match alpha/beta overlaps of at least " + lowThreshold + " wells ");
System.out.println("and no more than " + highThreshold + " wells.");
System.out.println(nf.format(alphaCount) + " alpha peptides found");
System.out.println(nf.format(betaCount) + " beta peptides found");
MathContext mc = new MathContext(3);
int min = alphaCount > betaCount ? betaCount : alphaCount;
double attemptRate = (double) (trueCount + falseCount) / min;
BigDecimal attR = new BigDecimal(attemptRate, mc);
System.out.println("Pairing attempt rate: " + attR);
System.out.println("Correct pairings: " + nf.format(trueCount));
System.out.println("Incorrect pairings: " + nf.format(falseCount));
double experimentalError = (double) falseCount / (trueCount + falseCount);
BigDecimal expErr = new BigDecimal(experimentalError, mc);
System.out.println("Pairing error rate: " + expErr);
Duration time = Duration.between(start, stop);
System.out.println("Simulation time: " + nf.format(time.toSeconds()) + " seconds");
if(use2DArrayForGraph) {
System.out.print("Graph generated from 2D array of weights");
if(useJGraphTGraphMatrixGenerator){
System.out.print(" using JGraphT's SimpleWeightedBipartiteGraphMatrixGenerator");
}
else {
System.out.print(" directly.");
}
System.out.println("");
}
else {
System.out.println("Graph generated directly from individual well maps");
}
System.out.println("");
}
*/
private static Map<Integer, Integer> getVertexToPeptideMap(Map<Integer, Integer> peptides, Integer startValue) {
private static void filterByOccupancyThreshold(SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph,
int low, int high) {
for(DefaultWeightedEdge e: graph.edgeSet()){
if ((graph.getEdgeWeight(e) > high) || (graph.getEdgeWeight(e) < low)){
graph.setEdgeWeight(e, 0.0);
}
}
}
private static void filterByOccupancyThreshold(Map<Integer, Integer> wellMap, int low, int high){
List<Integer> noise = new ArrayList<>();
for(Integer k: wellMap.keySet()){
if((wellMap.get(k) > high) || (wellMap.get(k) < low)){
noise.add(k);
}
}
for(Integer k: noise) {
wellMap.remove(k);
}
}
private static Map<Integer, Integer> makePeptideToPeptideMap(List<Integer[]> cells, int keyPeptideIndex,
int valuePeptideIndex){
Map<Integer, Integer> keyPeptideToValuePeptideMap = new HashMap<>();
for (Integer[] cell : cells) {
keyPeptideToValuePeptideMap.put(cell[keyPeptideIndex], cell[valuePeptideIndex]);
}
return keyPeptideToValuePeptideMap;
}
private static Map<Integer, Integer> makeVertexToPeptideMap(Map<Integer, Integer> peptides, Integer startValue) {
Map<Integer, Integer> map = new LinkedHashMap<>(); //LinkedHashMap to preserve order of entry
Integer index = startValue;
for (Integer k: peptides.keySet()) {
@@ -867,44 +611,4 @@ public class Simulator {
return inverse;
}
/*
* Old methods for graph generation algorithms less efficient than built in matrix generator
*
private static void addEdgeWithWeight(Graph g, Integer v1, Integer v2, double weight) {
g.addEdge(v1, v2);
g.setEdgeWeight(v1, v2, weight);
}
private static void addEdgeOrWeight(Graph g, Integer v1, Integer v2) {
if (g.containsEdge(v1, v2)) {
g.setEdgeWeight(v1, v2, g.getEdgeWeight(g.getEdge(v1, v2)) + 1.0);
}
else {
g.addEdge(v1, v2);
}
}
*/
/*
* Old output method before object-oriented refactor
*
private static boolean printData (Double edgeWeight, Integer source, Integer sourceCount, Integer target,
Integer targetCount, Map<Integer,Integer> AtoBMap, Map<Integer,Integer> VtoAMap,
Map<Integer, Integer> VtoBMap) {
System.out.println("Alpha peptide " + source + " present in " + sourceCount + " wells");
System.out.println("Beta peptide " + target + " present in " + targetCount + " wells");
System.out.println("Both alpha and beta present in " + edgeWeight + " wells");
if(VtoBMap.get(target).equals(AtoBMap.get(VtoAMap.get(source)))){
System.out.println("True");
System.out.println("p-value: " + Equations.pValue(numWells, sourceCount, targetCount, edgeWeight));
return true;
}
else {
System.out.println("False");
System.out.println("p-value: " + Equations.pValue(numWells, sourceCount, targetCount, edgeWeight));
System.out.println("The real pair is " + source + " and " + AtoBMap.get(VtoAMap.get(source)));
return false;
}
}*/
}

View File

@@ -260,7 +260,7 @@ public class UserInterface {
List<Integer[]> cells = cellReader.getCells();
MatchingResult preliminaryResults = Simulator.matchCDR3s(cells, plate, lowThresholdCDR3, highThresholdCDR3);
MatchingResult[] results = Simulator.matchCDR1s(cells, plate, lowThresholdCDR1,
highThresholdCDR1, preliminaryResults.getMatchMap(), preliminaryResults.getTime());
highThresholdCDR1, preliminaryResults);
//result writer
MatchingFileWriter writer = new MatchingFileWriter(filename + "First", results[0].getComments(),