UI cleanup, some code cleanup

This commit is contained in:
2022-02-20 01:05:28 -06:00
parent 0bebbc7602
commit 837ef7bfe4
5 changed files with 179 additions and 436 deletions

View File

@@ -11,6 +11,7 @@ public class GraphWithMapData implements java.io.Serializable {
private String sourceFilename;
private final SimpleWeightedGraph graph;
private Integer numWells;
private Integer[] wellConcentrations;
private Integer alphaCount;
private Integer betaCount;
private Integer lowThreshold;
@@ -24,14 +25,15 @@ public class GraphWithMapData implements java.io.Serializable {
private final Map<Integer, Integer> betaWellCounts;
private final Duration time;
public GraphWithMapData(SimpleWeightedGraph graph, Integer numWells, Integer alphaCount, Integer betaCount,
Integer lowThreshold, Integer highThreshold,
public GraphWithMapData(SimpleWeightedGraph graph, Integer numWells, Integer[] wellConcentrations,
Integer alphaCount, Integer betaCount, Integer lowThreshold, Integer highThreshold,
Map<Integer, Integer> distCellsMapAlphaKey, Map<Integer, Integer> plateVtoAMap,
Map<Integer,Integer> plateVtoBMap, Map<Integer, Integer> plateAtoVMap,
Map<Integer, Integer> plateBtoVMap, Map<Integer, Integer> alphaWellCounts,
Map<Integer, Integer> betaWellCounts, Duration time) {
this.graph = graph;
this.numWells = numWells;
this.wellConcentrations = wellConcentrations;
this.alphaCount = alphaCount;
this.betaCount = betaCount;
this.lowThreshold = lowThreshold;
@@ -54,6 +56,10 @@ public class GraphWithMapData implements java.io.Serializable {
return numWells;
}
public Integer[] getWellConcentrations() {
return wellConcentrations;
}
public Integer getAlphaCount() {
return alphaCount;
}

View File

@@ -17,7 +17,7 @@ public class Plate {
boolean exponential = false;
public Plate (int size, double error, Integer[] concentrations) {
public Plate(int size, double error, Integer[] concentrations) {
this.size = size;
this.error = error;
this.concentrations = concentrations;
@@ -25,7 +25,7 @@ public class Plate {
wells = new ArrayList<>();
}
public Plate(String sourceFileName, List<List<Integer[]>> wells){
public Plate(String sourceFileName, List<List<Integer[]>> wells) {
this.sourceFile = sourceFileName;
this.wells = wells;
this.size = wells.size();

View File

@@ -1,4 +1,3 @@
//import org.jgrapht.Graph;
import org.jgrapht.alg.interfaces.MatchingAlgorithm;
import org.jgrapht.alg.matching.MaximumWeightBipartiteMatching;
import org.jgrapht.generate.SimpleWeightedBipartiteGraphMatrixGenerator;
@@ -14,6 +13,7 @@ import java.time.Duration;
import java.util.*;
import java.util.stream.IntStream;
//NOTE: "sequence" in method and variable names refers to a peptide sequence from a simulated T cell
public class Simulator {
private static final int cdr3AlphaIndex = 0;
private static final int cdr3BetaIndex = 1;
@@ -22,7 +22,6 @@ public class Simulator {
public static CellSample generateCellSample(Integer numDistinctCells, Integer cdr1Freq) {
//In real T cells, CDR1s have about one third the diversity of CDR3s
//previous sim was only CDR3s
List<Integer> numbersCDR3 = new ArrayList<>();
List<Integer> numbersCDR1 = new ArrayList<>();
Integer numDistCDR3s = 2 * numDistinctCells + 1;
@@ -32,7 +31,7 @@ public class Simulator {
Collections.shuffle(numbersCDR1);
//Each cell represented by 4 values
//two CDR3s, and two CDR1s. First two values are CDR3s, second two are CDR1s
//two CDR3s, and two CDR1s. First two values are CDR3s (alpha, beta), second two are CDR1s (alpha, beta)
List<Integer[]> distinctCells = new ArrayList<>();
for(int i = 0; i < numbersCDR3.size() - 1; i = i + 2){
Integer tmpCDR3a = numbersCDR3.get(i);
@@ -45,16 +44,6 @@ public class Simulator {
return new CellSample(distinctCells, cdr1Freq);
}
// Version that reads in a graph? Possibly should just separate graph-making into its own function
// public static MatchingResult matchCDR3s(List<Integer[]> distinctCells,
// Plate samplePlate, Integer lowThreshold,
// Integer highThreshold, Integer maxOccupancyDifference,
// Integer minOverlapPercent, boolean verbose, boolean importGraph,
// SimpleWeightedGraph graph){
//
// }
//Make the graph needed for matching CDR3s
public static GraphWithMapData makeGraph(List<Integer[]> distinctCells, Plate samplePlate, Integer lowThreshold,
Integer highThreshold, boolean verbose) {
@@ -65,7 +54,7 @@ public class Simulator {
if(verbose){System.out.println("Making cell maps");}
//HashMap keyed to Alphas, values Betas
Map<Integer, Integer> distCellsMapAlphaKey = makePeptideToPeptideMap(distinctCells, 0, 1);
Map<Integer, Integer> distCellsMapAlphaKey = makeSequenceToSequenceMap(distinctCells, 0, 1);
if(verbose){System.out.println("Cell maps made");}
if(verbose){System.out.println("Making well maps");}
@@ -78,14 +67,13 @@ public class Simulator {
if(verbose){System.out.println("Well maps made");}
//Remove saturating-occupancy peptides because they have no signal value.
//Remove below-minimum-overlap-threshold peptides because they can't possibly have an overlap with another
//peptide that's above the threshold.
if(verbose){System.out.println("Removing peptides present in all wells.");}
if(verbose){System.out.println("Removing peptides with occupancy below the minimum overlap threshold");}
//Remove saturating-occupancy sequences because they have no signal value.
//Remove sequences with total occupancy below minimum pair overlap threshold
if(verbose){System.out.println("Removing sequences present in all wells.");}
if(verbose){System.out.println("Removing sequences with occupancy below the minimum overlap threshold");}
filterByOccupancyThreshold(allAlphas, lowThreshold, numWells - 1);
filterByOccupancyThreshold(allBetas, lowThreshold, numWells - 1);
if(verbose){System.out.println("Peptides removed");}
if(verbose){System.out.println("Sequences removed");}
int pairableAlphaCount = allAlphas.size();
if(verbose){System.out.println("Remaining alphas count: " + pairableAlphaCount);}
int pairableBetaCount = allBetas.size();
@@ -93,23 +81,26 @@ public class Simulator {
if(verbose){System.out.println("Making vertex maps");}
//For the SimpleWeightedBipartiteGraphMatrixGenerator, all vertices must have
// distinct numbers associated with them. Since I'm using a 2D array, that means
// distinct indices between the rows and columns. vertexStartValue lets me track where I switch
// from numbering rows to columns, so I can assign unique numbers to every vertex, and then
// subtract the vertexStartValue from betas to use their vertex labels as array indices
//distinct numbers associated with them. Since I'm using a 2D array, that means
//distinct indices between the rows and columns. vertexStartValue lets me track where I switch
//from numbering rows to columns, so I can assign unique numbers to every vertex, and then
//subtract the vertexStartValue from betas to use their vertex labels as array indices
Integer vertexStartValue = 0;
//keys are sequential integer vertices, values are alphas
Map<Integer, Integer> plateVtoAMap = makeVertexToPeptideMap(allAlphas, vertexStartValue);
//New start value for vertex to beta map should be one more than final vertex value in alpha map
Map<Integer, Integer> plateVtoAMap = makeVertexToSequenceMap(allAlphas, vertexStartValue);
//new start value for vertex to beta map should be one more than final vertex value in alpha map
vertexStartValue += plateVtoAMap.size();
//keys are sequential integers vertices, values are betas
Map<Integer, Integer> plateVtoBMap = makeVertexToPeptideMap(allBetas, vertexStartValue);
Map<Integer, Integer> plateVtoBMap = makeVertexToSequenceMap(allBetas, vertexStartValue);
//keys are alphas, values are sequential integer vertices from previous map
Map<Integer, Integer> plateAtoVMap = invertVertexMap(plateVtoAMap);
//keys are betas, values are sequential integer vertices from previous map
Map<Integer, Integer> plateBtoVMap = invertVertexMap(plateVtoBMap);
if(verbose){System.out.println("Vertex maps made");}
//make adjacency matrix for bipartite graph generator
//(technically this is only 1/4 of an adjacency matrix, but that's all you need
//for a bipartite graph, and all the SimpleWeightedBipartiteGraphMatrixGenerator class expects.)
if(verbose){System.out.println("Creating adjacency matrix");}
//Count how many wells each alpha appears in
Map<Integer, Integer> alphaWellCounts = new HashMap<>();
@@ -117,7 +108,7 @@ public class Simulator {
Map<Integer, Integer> betaWellCounts = new HashMap<>();
//the adjacency matrix to be used by the graph generator
double[][] weights = new double[plateVtoAMap.size()][plateVtoBMap.size()];
countPeptidesAndFillMatrix(samplePlate, allAlphas, allBetas, plateAtoVMap,
countSequencesAndFillMatrix(samplePlate, allAlphas, allBetas, plateAtoVMap,
plateBtoVMap, alphaIndex, betaIndex, alphaWellCounts, betaWellCounts, weights);
if(verbose){System.out.println("Matrix created");}
@@ -134,20 +125,24 @@ public class Simulator {
//the list of beta vertices
List<Integer> betaVertices = new ArrayList<>(plateVtoBMap.keySet());
graphGenerator.second(betaVertices); //This will work because LinkedHashMap preserves order of entry
//use adjacency matrix of weight created previously
graphGenerator.weights(weights);
graphGenerator.generateGraph(graph);
if(verbose){System.out.println("Graph created");}
if(verbose){System.out.println("Eliminating edges with weights outside threshold values");}
//remove weights outside given overlap thresholds
if(verbose){System.out.println("Eliminating edges with weights outside overlap threshold values");}
filterByOccupancyThreshold(graph, lowThreshold, highThreshold);
if(verbose){System.out.println("Over- and under-weight edges set to 0.0");}
Instant stop = Instant.now();
Duration time = Duration.between(start, stop);
return new GraphWithMapData(graph, numWells, alphaCount, betaCount, lowThreshold, highThreshold,
//return GraphWithMapData object
return new GraphWithMapData(graph, numWells, samplePlate.getConcentrations(), alphaCount, betaCount, lowThreshold, highThreshold,
distCellsMapAlphaKey, plateVtoAMap, plateVtoBMap, plateAtoVMap,
plateBtoVMap, alphaWellCounts, betaWellCounts, time);
}
//match CDR3s
//match CDR3s.
public static MatchingResult matchCDR3s(GraphWithMapData data, Integer maxOccupancyDifference,
Integer minOverlapPercent, boolean verbose) {
Instant start = Instant.now();
@@ -164,25 +159,20 @@ public class Simulator {
SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph = data.getGraph();
//Filter by overlap size
if(verbose){System.out.println("Eliminating edges with weights much less than occupancy values");}
if(verbose){System.out.println("Eliminating edges with weights less than" + minOverlapPercent.toString() +
" percent of vertex occupancy value.");}
filterByOverlapSize(graph, alphaWellCounts, betaWellCounts, plateVtoAMap, plateVtoBMap, minOverlapPercent);
if(verbose){System.out.println("Edges with weights much less than occupancy values set to 0.0");}
if(verbose){System.out.println("Edges with weights too far below vertex occupancy values set to 0.0");}
//Filter by relative occupancy
if(verbose){System.out.println("Eliminating edges between vertices of massively different occupancy");}
if(verbose){System.out.println("Eliminating edges between vertices with occupancy difference > "
+ maxOccupancyDifference);}
filterByRelativeOccupancy(graph, alphaWellCounts, betaWellCounts, plateVtoAMap, plateVtoBMap,
maxOccupancyDifference);
if(verbose){System.out.println("Edges between vertices of massively different occupancy set to 0.0");}
if(verbose){System.out.println("Edges between vertices of with excessively different occupancy values " +
"set to 0.0");}
//Find Maximum Weighted Matching
// if(verbose){System.out.println("Finding maximum weighted matching");}
// MaximumWeightBipartiteMatching maxWeightMatching =
// new MaximumWeightBipartiteMatching(graph, plateVtoAMap.keySet(), plateVtoBMap.keySet());
// MatchingAlgorithm.Matching<String, DefaultWeightedEdge> graphMatching = maxWeightMatching.getMatching();
// if(verbose){System.out.println("Matching completed");}
// Instant stop = Instant.now();
//trying with jheaps addressable now to improve performance
//using jheaps library class PairingHeap for improved efficiency
if(verbose){System.out.println("Finding maximum weighted matching");}
//Attempting to use addressable heap to improve performance
MaximumWeightBipartiteMatching maxWeightMatching =
@@ -204,7 +194,6 @@ public class Simulator {
header.add("Matched correctly?");
header.add("P-value");
//Results for csv file
List<List<String>> allResults = new ArrayList<>();
NumberFormat nf = NumberFormat.getInstance(Locale.US);
@@ -274,215 +263,6 @@ public class Simulator {
return new MatchingResult(data.getSourceFilename(), comments, header, allResults, matchMap, time);
}
public static MatchingResult matchCDR3s(List<Integer[]> distinctCells,
Plate samplePlate, Integer lowThreshold,
Integer highThreshold, Integer maxOccupancyDifference,
Integer minOverlapPercent, boolean verbose){
if(verbose){System.out.println("Cells: " + distinctCells.size());}
Instant start = Instant.now();
int numWells = samplePlate.getSize();
int[] alphaIndex = {cdr3AlphaIndex};
int[] betaIndex = {cdr3BetaIndex};
if(verbose){System.out.println("Making cell maps");}
//HashMap from cells, keyed to Alphas, values Betas, for checking if matches are correct
Map<Integer, Integer> distCellsMapAlphaKey = makePeptideToPeptideMap(distinctCells, 0, 1);
if(verbose){System.out.println("Cell maps made");}
if(verbose){System.out.println("Making well maps");}
Map<Integer, Integer> allAlphas = samplePlate.assayWellsPeptideP(alphaIndex);
Map<Integer, Integer> allBetas = samplePlate.assayWellsPeptideP(betaIndex);
int alphaCount = allAlphas.size();
if(verbose){System.out.println("all alphas count: " + alphaCount);}
int betaCount = allBetas.size();
if(verbose){System.out.println("all betas count: " + betaCount);}
if(verbose){System.out.println("Well maps made");}
//Remove saturating-occupancy peptides because they have no signal value.
//Remove below-minimum-overlap-threshold peptides because they can't possibly have an overlap with another
//peptide that's above the threshold.
if(verbose){System.out.println("Removing peptides present in all wells.");}
if(verbose){System.out.println("Removing peptides with occupancy below the minimum overlap threshold");}
filterByOccupancyThreshold(allAlphas, lowThreshold, numWells - 1);
filterByOccupancyThreshold(allBetas, lowThreshold, numWells - 1);
if(verbose){System.out.println("Peptides removed");}
int pairableAlphaCount = allAlphas.size();
if(verbose){System.out.println("Remaining alpha count: " + pairableAlphaCount);}
int pairableBetaCount = allBetas.size();
if(verbose){System.out.println("Remaining beta count: " + pairableBetaCount);}
if(verbose){System.out.println("Making vertex maps");}
//For the SimpleWeightedBipartiteGraphMatrixGenerator, all vertices must have
// distinct numbers associated with them. Since I'm using a 2D array, that means
// distinct indices between the rows and columns. vertexStartValue lets me track where I switch
// from numbering rows to columns, so I can assign unique numbers to every vertex, and then
// subtract the vertexStartValue from betas to use their vertex labels as array indices
Integer vertexStartValue = 0;
//keys are sequential integer vertices, values are alphas
Map<Integer, Integer> plateVtoAMap = makeVertexToPeptideMap(allAlphas, vertexStartValue);
//New start value for vertex to beta map should be one more than final vertex value in alpha map
vertexStartValue += plateVtoAMap.size();
//keys are sequential integers vertices, values are betas
Map<Integer, Integer> plateVtoBMap = makeVertexToPeptideMap(allBetas, vertexStartValue);
//keys are alphas, values are sequential integer vertices from previous map
Map<Integer, Integer> plateAtoVMap = invertVertexMap(plateVtoAMap);
//keys are betas, values are sequential integer vertices from previous map
Map<Integer, Integer> plateBtoVMap = invertVertexMap(plateVtoBMap);
if(verbose){System.out.println("Vertex maps made");}
if(verbose){System.out.println("Creating adjacency matrix");}
//Count how many wells each alpha appears in
Map<Integer, Integer> alphaWellCounts = new HashMap<>();
//count how many wells each beta appears in
Map<Integer, Integer> betaWellCounts = new HashMap<>();
//the adjacency matrix to be used by the graph generator
double[][] weights = new double[plateVtoAMap.size()][plateVtoBMap.size()];
countPeptidesAndFillMatrix(samplePlate, allAlphas, allBetas, plateAtoVMap,
plateBtoVMap, alphaIndex, betaIndex, alphaWellCounts, betaWellCounts, weights);
if(verbose){System.out.println("matrix created");}
//create bipartite graph
if(verbose){System.out.println("creating graph");}
//the graph object
SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph =
new SimpleWeightedGraph<>(DefaultWeightedEdge.class);
//the graph generator
SimpleWeightedBipartiteGraphMatrixGenerator graphGenerator = new SimpleWeightedBipartiteGraphMatrixGenerator();
//the list of alpha vertices
List<Integer> alphaVertices = new ArrayList<>(plateVtoAMap.keySet()); //This will work because LinkedHashMap preserves order of entry
graphGenerator.first(alphaVertices);
//the list of beta vertices
List<Integer> betaVertices = new ArrayList<>(plateVtoBMap.keySet());
graphGenerator.second(betaVertices); //This will work because LinkedHashMap preserves order of entry
graphGenerator.weights(weights);
graphGenerator.generateGraph(graph);
if(verbose){System.out.println("Graph created");}
//write graph to file
GraphMLFileWriter writer = new GraphMLFileWriter("graph", graph);
writer.writeGraphToFile();
if(verbose){System.out.println("Eliminating edges with weights outside threshold values");}
filterByOccupancyThreshold(graph, lowThreshold, highThreshold);
if(verbose){System.out.println("Over- and under-weight edges set to 0.0");}
//Filter by overlap size
if(verbose){System.out.println("Eliminating edges with weights much less than occupancy values");}
filterByOverlapSize(graph, alphaWellCounts, betaWellCounts, plateVtoAMap, plateVtoBMap, minOverlapPercent);
if(verbose){System.out.println("Edges with weights much less than occupancy values set to 0.0");}
//Filter by relative occupancy
if(verbose){System.out.println("Eliminating edges between vertices of massively different occupancy");}
filterByRelativeOccupancy(graph, alphaWellCounts, betaWellCounts, plateVtoAMap, plateVtoBMap,
maxOccupancyDifference);
if(verbose){System.out.println("Edges between vertices of massively different occupancy set to 0.0");}
//Find Maximum Weighted Matching
// if(verbose){System.out.println("Finding maximum weighted matching");}
// MaximumWeightBipartiteMatching maxWeightMatching =
// new MaximumWeightBipartiteMatching(graph, plateVtoAMap.keySet(), plateVtoBMap.keySet());
// MatchingAlgorithm.Matching<String, DefaultWeightedEdge> graphMatching = maxWeightMatching.getMatching();
// if(verbose){System.out.println("Matching completed");}
// Instant stop = Instant.now();
//trying with jheaps addressable now to improve performance
if(verbose){System.out.println("Finding maximum weighted matching");}
//Attempting to use addressable heap to improve performance
MaximumWeightBipartiteMatching maxWeightMatching =
new MaximumWeightBipartiteMatching(graph,
plateVtoAMap.keySet(),
plateVtoBMap.keySet(),
i -> new PairingHeap(Comparator.naturalOrder()));
MatchingAlgorithm.Matching<String, DefaultWeightedEdge> graphMatching = maxWeightMatching.getMatching();
if(verbose){System.out.println("Matching completed");}
Instant stop = Instant.now();
//Header for CSV file
List<String> header = new ArrayList<>();
header.add("Alpha");
header.add("Alpha well count");
header.add("Beta");
header.add("Beta well count");
header.add("Overlap well count");
header.add("Matched correctly?");
header.add("P-value");
//Results for csv file
List<List<String>> allResults = new ArrayList<>();
NumberFormat nf = NumberFormat.getInstance(Locale.US);
MathContext mc = new MathContext(3);
Iterator<DefaultWeightedEdge> weightIter = graphMatching.iterator();
DefaultWeightedEdge e;
int trueCount = 0;
int falseCount = 0;
boolean check;
Map<Integer, Integer> matchMap = new HashMap<>();
while(weightIter.hasNext()) {
e = weightIter.next();
Integer source = graph.getEdgeSource(e);
Integer target = graph.getEdgeTarget(e);
//The match map is all matches found, not just true matches!
matchMap.put(plateVtoAMap.get(source), plateVtoBMap.get(target));
check = plateVtoBMap.get(target).equals(distCellsMapAlphaKey.get(plateVtoAMap.get(source)));
if(check) {
trueCount++;
}
else {
falseCount++;
}
List<String> result = new ArrayList<>();
result.add(plateVtoAMap.get(source).toString());
//alpha well count
result.add(alphaWellCounts.get(plateVtoAMap.get(source)).toString());
result.add(plateVtoBMap.get(target).toString());
//beta well count
result.add(betaWellCounts.get(plateVtoBMap.get(target)).toString());
//overlap count
result.add(Double.toString(graph.getEdgeWeight(e)));
result.add(Boolean.toString(check));
double pValue = Equations.pValue(numWells, alphaWellCounts.get(plateVtoAMap.get(source)),
betaWellCounts.get(plateVtoBMap.get(target)), graph.getEdgeWeight(e));
BigDecimal pValueTrunc = new BigDecimal(pValue, mc);
result.add(pValueTrunc.toString());
allResults.add(result);
}
//Metadate comments for CSV file
int min = Math.min(alphaCount, betaCount);
double attemptRate = (double) (trueCount + falseCount) / min;
BigDecimal attemptRateTrunc = new BigDecimal(attemptRate, mc);
double pairingErrorRate = (double) falseCount / (trueCount + falseCount);
BigDecimal pairingErrorRateTrunc = new BigDecimal(pairingErrorRate, mc);
List<String> comments = new ArrayList<>();
comments.add("Total alphas found: " + alphaCount);
comments.add("Total betas found: " + betaCount);
comments.add("High overlap threshold: " + highThreshold);
comments.add("Low overlap threshold: " + lowThreshold);
comments.add("Minimum overlap percent: " + minOverlapPercent);
comments.add("Maximum occupancy difference: " + maxOccupancyDifference);
comments.add("Pairing attempt rate: " + attemptRateTrunc);
comments.add("Correct pairings: " + trueCount);
comments.add("Incorrect pairings: " + falseCount);
comments.add("Pairing error rate: " + pairingErrorRateTrunc);
Duration time = Duration.between(start, stop);
comments.add("Simulation time: " + nf.format(time.toSeconds()) + " seconds");
if(verbose){
for(String s: comments){
System.out.println(s);
}
}
return new MatchingResult(samplePlate.getSourceFileName(), comments, header, allResults, matchMap, time);
}
//Simulated matching of CDR1s to CDR3s. Requires MatchingResult from prior run of matchCDR3s.
public static MatchingResult[] matchCDR1s(List<Integer[]> distinctCells,
Plate samplePlate, Integer lowThreshold,
@@ -500,8 +280,8 @@ public class Simulator {
System.out.println("Previous match maps made");
System.out.println("Making cell maps");
Map<Integer, Integer> alphaCDR3toCDR1Map = makePeptideToPeptideMap(distinctCells, cdr3AlphaIndex, cdr1AlphaIndex);
Map<Integer, Integer> betaCDR3toCDR1Map = makePeptideToPeptideMap(distinctCells, cdr3BetaIndex, cdr1BetaIndex);
Map<Integer, Integer> alphaCDR3toCDR1Map = makeSequenceToSequenceMap(distinctCells, cdr3AlphaIndex, cdr1AlphaIndex);
Map<Integer, Integer> betaCDR3toCDR1Map = makeSequenceToSequenceMap(distinctCells, cdr3BetaIndex, cdr1BetaIndex);
System.out.println("Cell maps made");
System.out.println("Making well maps");
@@ -540,11 +320,11 @@ public class Simulator {
// subtract the vertexStartValue from CDR1s to use their vertex labels as array indices
Integer vertexStartValue = 0;
//keys are sequential integer vertices, values are CDR3s
Map<Integer, Integer> plateVtoCDR3Map = makeVertexToPeptideMap(allCDR3s, vertexStartValue);
Map<Integer, Integer> plateVtoCDR3Map = makeVertexToSequenceMap(allCDR3s, vertexStartValue);
//New start value for vertex to CDR1 map should be one more than final vertex value in CDR3 map
vertexStartValue += plateVtoCDR3Map.size();
//keys are sequential integers vertices, values are CDR1s
Map<Integer, Integer> plateVtoCDR1Map = makeVertexToPeptideMap(allCDR1s, vertexStartValue);
Map<Integer, Integer> plateVtoCDR1Map = makeVertexToSequenceMap(allCDR1s, vertexStartValue);
//keys are CDR3s, values are sequential integer vertices from previous map
Map<Integer, Integer> plateCDR3toVMap = invertVertexMap(plateVtoCDR3Map);
//keys are CDR1s, values are sequential integer vertices from previous map
@@ -561,7 +341,7 @@ public class Simulator {
Map<Integer, Integer> wellNCDR3s = null;
Map<Integer, Integer> wellNCDR1s = null;
double[][] weights = new double[plateVtoCDR3Map.size()][plateVtoCDR1Map.size()];
countPeptidesAndFillMatrix(samplePlate, allCDR3s, allCDR1s, plateCDR3toVMap, plateCDR1toVMap,
countSequencesAndFillMatrix(samplePlate, allCDR3s, allCDR1s, plateCDR3toVMap, plateCDR1toVMap,
cdr3Indices, cdr1Indices, cdr3WellCounts, cdr1WellCounts, weights);
System.out.println("Matrix created");
@@ -790,38 +570,38 @@ public class Simulator {
//Counts the well occupancy of the row peptides and column peptides into given maps, and
//fills weights in the given 2D array
private static void countPeptidesAndFillMatrix(Plate samplePlate,
Map<Integer,Integer> allRowPeptides,
Map<Integer,Integer> allColumnPeptides,
Map<Integer,Integer> rowPeptideToVertexMap,
Map<Integer,Integer> columnPeptideToVertexMap,
int[] rowPeptideIndices,
int[] colPeptideIndices,
Map<Integer, Integer> rowPeptideCounts,
Map<Integer,Integer> columnPeptideCounts,
double[][] weights){
private static void countSequencesAndFillMatrix(Plate samplePlate,
Map<Integer,Integer> allRowSequences,
Map<Integer,Integer> allColumnSequences,
Map<Integer,Integer> rowSequenceToVertexMap,
Map<Integer,Integer> columnSequenceToVertexMap,
int[] rowSequenceIndices,
int[] colSequenceIndices,
Map<Integer, Integer> rowSequenceCounts,
Map<Integer,Integer> columnSequenceCounts,
double[][] weights){
Map<Integer, Integer> wellNRowPeptides = null;
Map<Integer, Integer> wellNColumnPeptides = null;
int vertexStartValue = rowPeptideToVertexMap.size();
int vertexStartValue = rowSequenceToVertexMap.size();
int numWells = samplePlate.getSize();
for (int n = 0; n < numWells; n++) {
wellNRowPeptides = samplePlate.assayWellsPeptideP(n, rowPeptideIndices);
wellNRowPeptides = samplePlate.assayWellsPeptideP(n, rowSequenceIndices);
for (Integer a : wellNRowPeptides.keySet()) {
if(allRowPeptides.containsKey(a)){
rowPeptideCounts.merge(a, 1, (oldValue, newValue) -> oldValue + newValue);
if(allRowSequences.containsKey(a)){
rowSequenceCounts.merge(a, 1, (oldValue, newValue) -> oldValue + newValue);
}
}
wellNColumnPeptides = samplePlate.assayWellsPeptideP(n, colPeptideIndices);
wellNColumnPeptides = samplePlate.assayWellsPeptideP(n, colSequenceIndices);
for (Integer b : wellNColumnPeptides.keySet()) {
if(allColumnPeptides.containsKey(b)){
columnPeptideCounts.merge(b, 1, (oldValue, newValue) -> oldValue + newValue);
if(allColumnSequences.containsKey(b)){
columnSequenceCounts.merge(b, 1, (oldValue, newValue) -> oldValue + newValue);
}
}
for (Integer i : wellNRowPeptides.keySet()) {
if(allRowPeptides.containsKey(i)){
if(allRowSequences.containsKey(i)){
for (Integer j : wellNColumnPeptides.keySet()) {
if(allColumnPeptides.containsKey(j)){
weights[rowPeptideToVertexMap.get(i)][columnPeptideToVertexMap.get(j) - vertexStartValue] += 1.0;
if(allColumnSequences.containsKey(j)){
weights[rowSequenceToVertexMap.get(i)][columnSequenceToVertexMap.get(j) - vertexStartValue] += 1.0;
}
}
}
@@ -886,19 +666,19 @@ public class Simulator {
}
}
private static Map<Integer, Integer> makePeptideToPeptideMap(List<Integer[]> cells, int keyPeptideIndex,
int valuePeptideIndex){
Map<Integer, Integer> keyPeptideToValuePeptideMap = new HashMap<>();
private static Map<Integer, Integer> makeSequenceToSequenceMap(List<Integer[]> cells, int keySequenceIndex,
int valueSequenceIndex){
Map<Integer, Integer> keySequenceToValueSequenceMap = new HashMap<>();
for (Integer[] cell : cells) {
keyPeptideToValuePeptideMap.put(cell[keyPeptideIndex], cell[valuePeptideIndex]);
keySequenceToValueSequenceMap.put(cell[keySequenceIndex], cell[valueSequenceIndex]);
}
return keyPeptideToValuePeptideMap;
return keySequenceToValueSequenceMap;
}
private static Map<Integer, Integer> makeVertexToPeptideMap(Map<Integer, Integer> peptides, Integer startValue) {
private static Map<Integer, Integer> makeVertexToSequenceMap(Map<Integer, Integer> sequences, Integer startValue) {
Map<Integer, Integer> map = new LinkedHashMap<>(); //LinkedHashMap to preserve order of entry
Integer index = startValue;
for (Integer k: peptides.keySet()) {
for (Integer k: sequences.keySet()) {
map.put(index, k);
index++;
}

View File

@@ -256,15 +256,20 @@ public class UserInterface {
// }
// else {
while (!quit) {
System.out.println("\nALPHA/BETA T-CELL RECEPTOR MATCHING SIMULATOR");
System.out.println("");
System.out.println("--------BiGPairSEQ SIMULATOR--------");
System.out.println("ALPHA/BETA T-CELL RECEPTOR MATCHING");
System.out.println(" USING WEIGHTED BIPARTITE GRAPHS ");
System.out.println("------------------------------------");
System.out.println("Please select an option:");
System.out.println("1) Generate a population of distinct cells");
System.out.println("2) Generate a sample plate of T cells");
System.out.println("3) Generate CDR3 alpha/beta occupancy graph and data");
System.out.println("4) Simulate CDR3 alpha/beta T cell matching");
System.out.println("3) Generate CDR3 alpha/beta occupancy data and overlap graph");
System.out.println("4) Simulate bipartite graph CDR3 alpha/beta matching (BiGpairSEQ)");
//Need to re-do the CDR3/CDR1 matching to correspond to new pattern
//System.out.println("5) Generate CDR3/CDR1 occupancy graph");
System.out.println("6) Simulate CDR3/CDR1 T cell matching");
System.out.println("7) Acknowledgements");
//System.out.println("6) Simulate CDR3/CDR1 T cell matching");
System.out.println("9) Acknowledgements");
System.out.println("0) Exit");
try {
input = sc.nextInt();
@@ -273,8 +278,8 @@ public class UserInterface {
case 2 -> makePlate();
case 3 -> makeCDR3Graph();
case 4 -> matchCDR3s();
case 6 -> matchCellsCDR1();
case 7 -> acknowledge();
//case 6 -> matchCellsCDR1();
case 9 -> acknowledge();
case 0 -> quit = true;
default -> throw new InputMismatchException("Invalid input.");
}
@@ -295,7 +300,7 @@ public class UserInterface {
System.out.println("\nSimulated T-Cells consist of integer values representing:\n" +
"* a pair of alpha and beta CDR3 peptides (unique within simulated population)\n" +
"* a pair of alpha and beta CDR1 peptides (not necessarily unique).");
System.out.println("\nThe cells will be written to a file.");
System.out.println("\nThe cells will be written to a CSV file.");
System.out.print("Please enter a file name: ");
filename = sc.next();
System.out.println("CDR3 sequences are more diverse than CDR1 sequences.");
@@ -311,7 +316,6 @@ public class UserInterface {
sc.next();
}
CellSample sample = Simulator.generateCellSample(numCells, cdr1Freq);
//CellSample sample = Simulator.generateCellSampleExp(numCells, cdr1Freq);
CellFileWriter writer = new CellFileWriter(filename, sample);
writer.writeCellsToFile();
}
@@ -364,10 +368,16 @@ public class UserInterface {
boolean exponential = false;
double lambda = 1.5;
try {
System.out.println("\nSimulated sample plates consist of:");
System.out.println("* a number of wells");
System.out.println(" * separated into one or more sections");
System.out.println(" * each of which has a set quantity of cells per well");
System.out.println(" * selected from a statistical distribution of distinct cells");
System.out.println(" * with a set dropout rate for individual sequences within a cell");
System.out.println("\nMaking a sample plate requires a population of distinct cells");
System.out.println("Please enter name of an existing cell sample file: ");
cellFile = sc.next();
System.out.println("\nThe sample plate will be written to file");
System.out.println("\nThe sample plate will be written to a CSV file");
System.out.print("Please enter a name for the output file: ");
filename = sc.next();
System.out.println("Select T-cell frequency distribution function");
@@ -401,7 +411,6 @@ public class UserInterface {
break;
default:
System.out.println("Invalid input. Defaulting to exponential.");
//poisson = true;
exponential = true;
}
System.out.print("Number of wells on plate: ");
@@ -426,7 +435,7 @@ public class UserInterface {
i++;
numSections--;
}
System.out.println("Errors in amplification can induce a well dropout rate for peptides");
System.out.println("Errors in amplification can induce a well dropout rate for sequences");
System.out.print("Enter well dropout rate (0.0 to 1.0): ");
dropOutRate = sc.nextDouble();
if(dropOutRate < 0.0 || dropOutRate > 1.0) {
@@ -462,14 +471,14 @@ public class UserInterface {
Integer lowThreshold = 0;
Integer highThreshold = Integer.MAX_VALUE;
try {
String str = "\nGenerating bipartite weighted graph encoding occupancy data requires ";
str = str.concat("a cell sample file and a sample plate file.");
String str = "\nGenerating bipartite weighted graph encoding occupancy overlap data ";
str = str.concat("\nrequires a cell sample file and a sample plate file.");
System.out.println(str);
System.out.print("Please enter name of an existing cell sample file: ");
cellFile = sc.next();
System.out.print("Please enter name of an existing sample plate file: ");
plateFile = sc.next();
System.out.println("The graph and occupancy data will be written to a file.");
System.out.println("The graph and occupancy data will be written to a serialized binary file.");
System.out.print("Please enter a name for the output file: ");
filename = sc.next();
System.out.println("What is the minimum number of CDR3 alpha/beta overlap wells to include in graph?");
@@ -502,7 +511,7 @@ public class UserInterface {
GraphWithMapData data = Simulator.makeGraph(cells, plate, lowThreshold, highThreshold, true);
GraphDataObjectWriter dataWriter = new GraphDataObjectWriter(filename, data);
System.out.println("Writing graph and occupancy data to file. This may take some time.");
System.out.println("Time to write file is not included in simulation time data.");
System.out.println("File I/O time is not included in results.");
dataWriter.writeDataToFile();
}
}
@@ -514,14 +523,18 @@ public class UserInterface {
Integer maxOccupancyDiff = Integer.MAX_VALUE;
Integer minOverlapPercent = 0;
try {
System.out.println("\nSimulated matching experiment requires graph and occupancy data file");
System.out.println("\nBiGpairSEQ simulation requires an occupancy data and overlap graph file");
System.out.println("Please enter name of an existing graph and occupancy data file: ");
dataFile = sc.next();
System.out.println("The matching results will be written to a file.");
System.out.print("Please enter a name for the output file: ");
filename = sc.next();
System.out.println("Bipartite graph can be pre-filtered for relative alpha/beta occupancy.");
System.out.println("(To skip pre-filtering: enter number of wells on the sample plate used to make graph)");
System.out.println("What is the maximum difference in alpha/beta occupancy to attempt matching?");
maxOccupancyDiff = sc.nextInt();
System.out.println("Bipartite graph can be pre-filtered for pair well overlap percentage");
System.out.println("(To skip pre-filtering: enter 0");
System.out.println("What is the minimum overlap percentage to attempt matching? (0 - 100)");
minOverlapPercent = sc.nextInt();
if (minOverlapPercent < 0 || minOverlapPercent > 100) {
@@ -532,8 +545,8 @@ public class UserInterface {
sc.next();
}
//read object data from file
System.out.println("Reading data from file. This may take some time");
System.out.println("Time to read file is not included in simulation time data");
System.out.println("Reading graph data from file. This may take some time");
System.out.println("File I/O time is not included in results");
GraphDataObjectReader dataReader = new GraphDataObjectReader(dataFile);
GraphWithMapData data = dataReader.getData();
//set source file name
@@ -546,15 +559,26 @@ public class UserInterface {
writer.writeResultsToFile();
}
//old version before I wrote graph data to a file
// private static void matchCells() {
///////
//Rewrite this to fit new matchCDR3 method with file I/O
///////
// public static void matchCellsCDR1(){
// /*
// The idea here is that we'll get the CDR3 alpha/beta matches first. Then we'll try to match CDR3s to CDR1s by
// looking at the top two matches for each CDR3. If CDR3s in the same cell simply swap CDR1s, we assume a correct
// match
// */
// String filename = null;
// String preliminaryResultsFilename = null;
// String cellFile = null;
// String plateFile = null;
// Integer lowThreshold = 0;
// Integer highThreshold = Integer.MAX_VALUE;
// Integer maxOccupancyDiff = 96; //no filtering if max difference is all wells by default
// Integer minOverlapPercent = 0; //no filtering if min percentage is zero by default
// Integer lowThresholdCDR3 = 0;
// Integer highThresholdCDR3 = Integer.MAX_VALUE;
// Integer maxOccupancyDiffCDR3 = 96; //no filtering if max difference is all wells by default
// Integer minOverlapPercentCDR3 = 0; //no filtering if min percentage is zero by default
// Integer lowThresholdCDR1 = 0;
// Integer highThresholdCDR1 = Integer.MAX_VALUE;
// boolean outputCDR3Matches = false;
// try {
// System.out.println("\nSimulated experiment requires a cell sample file and a sample plate file.");
// System.out.print("Please enter name of an existing cell sample file: ");
@@ -564,20 +588,42 @@ public class UserInterface {
// System.out.println("The matching results will be written to a file.");
// System.out.print("Please enter a name for the output file: ");
// filename = sc.next();
// System.out.println("What is the minimum number of alpha/beta overlap wells to attempt matching?");
// lowThreshold = sc.nextInt();
// if(lowThreshold < 1){
// System.out.println("What is the minimum number of CDR3 alpha/beta overlap wells to attempt matching?");
// lowThresholdCDR3 = sc.nextInt();
// if(lowThresholdCDR3 < 1){
// throw new InputMismatchException("Minimum value for low threshold is 1");
// }
// System.out.println("What is the maximum number of alpha/beta overlap wells to attempt matching?");
// highThreshold = sc.nextInt();
// System.out.println("What is the maximum difference in alpha/beta occupancy to attempt matching?");
// maxOccupancyDiff = sc.nextInt();
// System.out.println("What is the minimum overlap percentage to attempt matching? (0 - 100)");
// minOverlapPercent = sc.nextInt();
// if (minOverlapPercent < 0 || minOverlapPercent > 100) {
// System.out.println("What is the maximum number of CDR3 alpha/beta overlap wells to attempt matching?");
// highThresholdCDR3 = sc.nextInt();
// System.out.println("What is the maximum difference in CDR3 alpha/beta occupancy to attempt matching?");
// maxOccupancyDiffCDR3 = sc.nextInt();
// System.out.println("What is the minimum CDR3 overlap percentage to attempt matching? (0 - 100)");
// minOverlapPercentCDR3 = sc.nextInt();
// if (minOverlapPercentCDR3 < 0 || minOverlapPercentCDR3 > 100) {
// throw new InputMismatchException("Value outside range. Minimum percent set to 0");
// }
// System.out.println("What is the minimum number of CDR3/CDR1 overlap wells to attempt matching?");
// lowThresholdCDR1 = sc.nextInt();
// if(lowThresholdCDR1 < 1){
// throw new InputMismatchException("Minimum value for low threshold is 1");
// }
// System.out.println("What is the maximum number of CDR3/CDR1 overlap wells to attempt matching?");
// highThresholdCDR1 = sc.nextInt();
// System.out.println("Matching CDR3s to CDR1s requires first matching CDR3 alpha/betas.");
// System.out.println("Output a file for CDR3 alpha/beta match results as well?");
// System.out.print("Please enter y/n: ");
// String ans = sc.next();
// Pattern pattern = Pattern.compile("(?:yes|y)", Pattern.CASE_INSENSITIVE);
// Matcher matcher = pattern.matcher(ans);
// if(matcher.matches()){
// outputCDR3Matches = true;
// System.out.println("Please enter filename for CDR3 alpha/beta match results");
// preliminaryResultsFilename = sc.next();
// System.out.println("CDR3 alpha/beta matches will be output to file");
// }
// else{
// System.out.println("CDR3 alpha/beta matches will not be output to file");
// }
// } catch (InputMismatchException ex) {
// System.out.println(ex);
// sc.next();
@@ -592,129 +638,40 @@ public class UserInterface {
// else if(plate.getWells().size() == 0){
// System.out.println("No sample plate found.");
// System.out.println("Returning to main menu.");
//
// }
// else{
// if(highThreshold >= plate.getSize()){
// highThreshold = plate.getSize() - 1;
// if(highThresholdCDR3 >= plate.getSize()){
// highThresholdCDR3 = plate.getSize() - 1;
// }
// if(highThresholdCDR1 >= plate.getSize()){
// highThresholdCDR1 = plate.getSize() - 1;
// }
// List<Integer[]> cells = cellReader.getCells();
// GraphWithMapData data = Simulator.makeGraph(cells, plate, lowThreshold, highThreshold, true);
// //write data to a file
//
// MatchingResult results = Simulator.matchCDR3s(data, maxOccupancyDiff,minOverlapPercent, true);
// //result writer
// MatchingFileWriter writer = new MatchingFileWriter(filename, results);
// MatchingResult preliminaryResults = Simulator.matchCDR3s(cells, plate, lowThresholdCDR3, highThresholdCDR3,
// maxOccupancyDiffCDR3, minOverlapPercentCDR3, true);
// MatchingResult[] results = Simulator.matchCDR1s(cells, plate, lowThresholdCDR1,
// highThresholdCDR1, preliminaryResults);
// MatchingFileWriter writer = new MatchingFileWriter(filename + "_FirstPass", results[0]);
// writer.writeResultsToFile();
// writer = new MatchingFileWriter(filename + "_SecondPass", results[1]);
// writer.writeResultsToFile();
// if(outputCDR3Matches){
// writer = new MatchingFileWriter(preliminaryResultsFilename, preliminaryResults);
// writer.writeResultsToFile();
// }
// }
// }
public static void matchCellsCDR1(){
/*
The idea here is that we'll get the CDR3 alpha/beta matches first. Then we'll try to match CDR3s to CDR1s by
looking at the top two matches for each CDR3. If CDR3s in the same cell simply swap CDR1s, we assume a correct
match
*/
String filename = null;
String preliminaryResultsFilename = null;
String cellFile = null;
String plateFile = null;
Integer lowThresholdCDR3 = 0;
Integer highThresholdCDR3 = Integer.MAX_VALUE;
Integer maxOccupancyDiffCDR3 = 96; //no filtering if max difference is all wells by default
Integer minOverlapPercentCDR3 = 0; //no filtering if min percentage is zero by default
Integer lowThresholdCDR1 = 0;
Integer highThresholdCDR1 = Integer.MAX_VALUE;
boolean outputCDR3Matches = false;
try {
System.out.println("\nSimulated experiment requires a cell sample file and a sample plate file.");
System.out.print("Please enter name of an existing cell sample file: ");
cellFile = sc.next();
System.out.print("Please enter name of an existing sample plate file: ");
plateFile = sc.next();
System.out.println("The matching results will be written to a file.");
System.out.print("Please enter a name for the output file: ");
filename = sc.next();
System.out.println("What is the minimum number of CDR3 alpha/beta overlap wells to attempt matching?");
lowThresholdCDR3 = sc.nextInt();
if(lowThresholdCDR3 < 1){
throw new InputMismatchException("Minimum value for low threshold is 1");
}
System.out.println("What is the maximum number of CDR3 alpha/beta overlap wells to attempt matching?");
highThresholdCDR3 = sc.nextInt();
System.out.println("What is the maximum difference in CDR3 alpha/beta occupancy to attempt matching?");
maxOccupancyDiffCDR3 = sc.nextInt();
System.out.println("What is the minimum CDR3 overlap percentage to attempt matching? (0 - 100)");
minOverlapPercentCDR3 = sc.nextInt();
if (minOverlapPercentCDR3 < 0 || minOverlapPercentCDR3 > 100) {
throw new InputMismatchException("Value outside range. Minimum percent set to 0");
}
System.out.println("What is the minimum number of CDR3/CDR1 overlap wells to attempt matching?");
lowThresholdCDR1 = sc.nextInt();
if(lowThresholdCDR1 < 1){
throw new InputMismatchException("Minimum value for low threshold is 1");
}
System.out.println("What is the maximum number of CDR3/CDR1 overlap wells to attempt matching?");
highThresholdCDR1 = sc.nextInt();
System.out.println("Matching CDR3s to CDR1s requires first matching CDR3 alpha/betas.");
System.out.println("Output a file for CDR3 alpha/beta match results as well?");
System.out.print("Please enter y/n: ");
String ans = sc.next();
Pattern pattern = Pattern.compile("(?:yes|y)", Pattern.CASE_INSENSITIVE);
Matcher matcher = pattern.matcher(ans);
if(matcher.matches()){
outputCDR3Matches = true;
System.out.println("Please enter filename for CDR3 alpha/beta match results");
preliminaryResultsFilename = sc.next();
System.out.println("CDR3 alpha/beta matches will be output to file");
}
else{
System.out.println("CDR3 alpha/beta matches will not be output to file");
}
} catch (InputMismatchException ex) {
System.out.println(ex);
sc.next();
}
CellFileReader cellReader = new CellFileReader(cellFile);
PlateFileReader plateReader = new PlateFileReader(plateFile);
Plate plate = new Plate(plateReader.getFilename(), plateReader.getWells());
if (cellReader.getCells().size() == 0){
System.out.println("No cell sample found.");
System.out.println("Returning to main menu.");
}
else if(plate.getWells().size() == 0){
System.out.println("No sample plate found.");
System.out.println("Returning to main menu.");
}
else{
if(highThresholdCDR3 >= plate.getSize()){
highThresholdCDR3 = plate.getSize() - 1;
}
if(highThresholdCDR1 >= plate.getSize()){
highThresholdCDR1 = plate.getSize() - 1;
}
List<Integer[]> cells = cellReader.getCells();
MatchingResult preliminaryResults = Simulator.matchCDR3s(cells, plate, lowThresholdCDR3, highThresholdCDR3,
maxOccupancyDiffCDR3, minOverlapPercentCDR3, true);
MatchingResult[] results = Simulator.matchCDR1s(cells, plate, lowThresholdCDR1,
highThresholdCDR1, preliminaryResults);
MatchingFileWriter writer = new MatchingFileWriter(filename + "_FirstPass", results[0]);
writer.writeResultsToFile();
writer = new MatchingFileWriter(filename + "_SecondPass", results[1]);
writer.writeResultsToFile();
if(outputCDR3Matches){
writer = new MatchingFileWriter(preliminaryResultsFilename, preliminaryResults);
writer.writeResultsToFile();
}
}
}
private static void acknowledge(){
System.out.println("Simulation based on:");
System.out.println("This program simulates BiGpairSEQ, a graph theory based adaptation");
System.out.println("of the pairSEQ algorithm for pairing T cell receptor sequences.");
System.out.println("");
System.out.println("pairSEQ citation:");
System.out.println("Howie, B., Sherwood, A. M., et. al.");
System.out.println("High-throughput pairing of T cell receptor alpha and beta sequences.");
System.out.println("Sci. Transl. Med. 7, 301ra131 (2015)");
System.out.println("");
System.out.println("Simulation by Eugene Fischer, 2021");
System.out.println("Simulation by Eugene Fischer, 2021-2022");
}
}