Refactoring to allow graphs from file

This commit is contained in:
2022-02-19 17:23:55 -06:00
parent cfa473c7ce
commit 568a6be3c7
8 changed files with 730 additions and 244 deletions

15
.idea/libraries/jgrapht_io.xml generated Normal file
View File

@@ -0,0 +1,15 @@
<component name="libraryTable">
<library name="jgrapht.io" type="repository">
<properties maven-id="org.jgrapht:jgrapht-io:1.5.1" />
<CLASSES>
<root url="jar://$MAVEN_REPOSITORY$/org/jgrapht/jgrapht-io/1.5.1/jgrapht-io-1.5.1.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/jgrapht/jgrapht-core/1.5.1/jgrapht-core-1.5.1.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/jheaps/jheaps/0.13/jheaps-0.13.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/antlr/antlr4-runtime/4.8-1/antlr4-runtime-4.8-1.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/apache/commons/commons-text/1.8/commons-text-1.8.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/apache/commons/commons-lang3/3.9/commons-lang3-3.9.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES />
</library>
</component>

View File

@@ -0,0 +1,32 @@
import org.jgrapht.graph.SimpleWeightedGraph;
import org.jgrapht.nio.graphml.GraphMLImporter;
import java.io.BufferedReader;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
public class GraphFileReader {
private String filename;
private SimpleWeightedGraph graph;
public GraphFileReader(String filename, SimpleWeightedGraph graph) {
this.filename = filename;
this.graph = graph;
try(//don't need to close reader bc of try-with-resources auto-closing
BufferedReader reader = Files.newBufferedReader(Path.of(filename));
){
GraphMLImporter<SimpleWeightedGraph, BufferedReader> importer = new GraphMLImporter<>();
importer.importGraph(graph, reader);
}
catch (IOException ex) {
System.out.println("Graph file " + filename + " not found.");
System.err.println(ex);
}
}
public SimpleWeightedGraph getGraph() { return graph; }
}

View File

@@ -0,0 +1,34 @@
import org.jgrapht.graph.SimpleWeightedGraph;
import org.jgrapht.nio.dot.DOTExporter;
import org.jgrapht.nio.graphml.GraphMLExporter;
import java.io.BufferedWriter;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
public class GraphFileWriter {
String filename;
SimpleWeightedGraph graph;
public GraphFileWriter(String filename, SimpleWeightedGraph graph) {
if(!filename.matches(".*\\.gv")){
filename = filename + ".gv";
}
this.filename = filename;
this.graph = graph;
}
public void writeGraphToFile() {
try(BufferedWriter writer = Files.newBufferedWriter(Path.of(filename), StandardOpenOption.CREATE_NEW);
){
GraphMLExporter<SimpleWeightedGraph, BufferedWriter> exporter = new GraphMLExporter<>();
exporter.exportGraph(graph, writer);
} catch(IOException ex){
System.out.println("Could not make new file named "+filename);
System.err.println(ex);
}
}
}

View File

@@ -0,0 +1,65 @@
import org.jgrapht.graph.SimpleWeightedGraph;
import java.time.Duration;
import java.util.Map;
public class GraphWithMapData {
private final SimpleWeightedGraph graph;
private final MapData maps;
private final Map<Integer, Integer> alphaWellCounts;
private final Map<Integer, Integer> betaWellCounts;
private final Duration time;
public GraphWithMapData(SimpleWeightedGraph graph, MapData maps, Map<Integer, Integer> alphaWellCounts,
Map<Integer, Integer> betaWellCounts, Duration time) {
this.graph = graph;
this.maps = maps;
this.alphaWellCounts = alphaWellCounts;
this.betaWellCounts = betaWellCounts;
this.time = time.plus(maps.getTime());
}
public SimpleWeightedGraph getGraph() {
return graph;
}
public Map<Integer, Integer> getDistCellsMapAlphaKey() {
return maps.getDistCellsMapAlphaKey();
}
public Map<Integer, Integer> getAllAlphas() {
return maps.getAllAlphas();
}
public Map<Integer, Integer> getAllBetas() {
return maps.getAllBetas();
}
public Map<Integer, Integer> getPlateVtoAMap() {
return maps.getPlateVtoAMap();
}
public Map<Integer, Integer> getPlateVtoBMap() {
return maps.getPlateVtoBMap();
}
public Map<Integer, Integer> getPlateAtoVMap() {
return maps.getPlateAtoVMap();
}
public Map<Integer, Integer> getPlateBtoVMap() {
return maps.getPlateBtoVMap();
}
public Map<Integer, Integer> getAlphaWellCounts() {
return alphaWellCounts;
}
public Map<Integer, Integer> getBetaWellCounts() {
return betaWellCounts;
}
public Duration getTime() {
return time;
}
}

View File

@@ -0,0 +1,64 @@
import org.jgrapht.graph.SimpleWeightedGraph;
import java.time.Duration;
import java.util.Map;
public class MapData {
private final Map<Integer, Integer> distCellsMapAlphaKey;
private final Map<Integer, Integer> allAlphas;
private final Map<Integer, Integer> allBetas;
private final Map<Integer, Integer> plateVtoAMap;
private final Map<Integer, Integer> plateVtoBMap;
private final Map<Integer, Integer> plateAtoVMap;
private final Map<Integer, Integer> plateBtoVMap;
private final Duration time;
public MapData(Map<Integer, Integer> distCellsMapAlphaKey,
Map<Integer, Integer> allAlphas, Map<Integer, Integer> allBetas,
Map<Integer, Integer> plateVtoAMap, Map<Integer, Integer> plateVtoBMap,
Map<Integer, Integer> plateAtoVMap, Map<Integer, Integer> plateBtoVMap,
Duration time) {
this.distCellsMapAlphaKey = distCellsMapAlphaKey;
this.allAlphas = allAlphas;
this.allBetas = allBetas;
this.plateVtoAMap = plateVtoAMap;
this.plateVtoBMap = plateVtoBMap;
this.plateAtoVMap = plateAtoVMap;
this.plateBtoVMap = plateBtoVMap;
this.time = time;
}
public Map<Integer, Integer> getDistCellsMapAlphaKey() {
return distCellsMapAlphaKey;
}
public Map<Integer, Integer> getAllAlphas() {
return allAlphas;
}
public Map<Integer, Integer> getAllBetas() {
return allBetas;
}
public Map<Integer, Integer> getPlateVtoAMap() {
return plateVtoAMap;
}
public Map<Integer, Integer> getPlateVtoBMap() {
return plateVtoBMap;
}
public Map<Integer, Integer> getPlateAtoVMap() {
return plateAtoVMap;
}
public Map<Integer, Integer> getPlateBtoVMap() {
return plateBtoVMap;
}
public Duration getTime() {
return time;
}
}

View File

@@ -45,6 +45,244 @@ public class Simulator {
return new CellSample(distinctCells, cdr1Freq);
}
// Version that reads in a graph? Possibly should just separate graph-making into its own function
// public static MatchingResult matchCDR3s(List<Integer[]> distinctCells,
// Plate samplePlate, Integer lowThreshold,
// Integer highThreshold, Integer maxOccupancyDifference,
// Integer minOverlapPercent, boolean verbose, boolean importGraph,
// SimpleWeightedGraph graph){
//
// }
//Make the maps needed for matching CDR3s
public static MapData makeMaps(List<Integer[]> distinctCells,
Plate samplePlate, Integer lowThreshold, boolean verbose) {
Instant start = Instant.now();
int numWells = samplePlate.getSize();
int[] alphaIndex = {cdr3AlphaIndex};
int[] betaIndex = {cdr3BetaIndex};
if(verbose){System.out.println("Making cell maps");}
//HashMap keyed to Alphas, values Betas
Map<Integer, Integer> distCellsMapAlphaKey = makePeptideToPeptideMap(distinctCells, 0, 1);
if(verbose){System.out.println("Cell maps made");}
if(verbose){System.out.println("Making well maps");}
Map<Integer, Integer> allAlphas = samplePlate.assayWellsPeptideP(alphaIndex);
Map<Integer, Integer> allBetas = samplePlate.assayWellsPeptideP(betaIndex);
int alphaCount = allAlphas.size();
if(verbose){System.out.println("all alphas count: " + alphaCount);}
int betaCount = allBetas.size();
if(verbose){System.out.println("all betas count: " + betaCount);}
if(verbose){System.out.println("Well maps made");}
//Remove saturating-occupancy peptides because they have no signal value.
//Remove below-minimum-overlap-threshold peptides because they can't possibly have an overlap with another
//peptide that's above the threshold.
if(verbose){System.out.println("Removing peptides present in all wells.");}
if(verbose){System.out.println("Removing peptides with occupancy below the minimum overlap threshold");}
filterByOccupancyThreshold(allAlphas, lowThreshold, numWells - 1);
filterByOccupancyThreshold(allBetas, lowThreshold, numWells - 1);
if(verbose){System.out.println("Peptides removed");}
int pairableAlphaCount = allAlphas.size();
if(verbose){System.out.println("Remaining alpha count: " + pairableAlphaCount);}
int pairableBetaCount = allBetas.size();
if(verbose){System.out.println("Remaining beta count: " + pairableBetaCount);}
if(verbose){System.out.println("Making vertex maps");}
//For the SimpleWeightedBipartiteGraphMatrixGenerator, all vertices must have
// distinct numbers associated with them. Since I'm using a 2D array, that means
// distinct indices between the rows and columns. vertexStartValue lets me track where I switch
// from numbering rows to columns, so I can assign unique numbers to every vertex, and then
// subtract the vertexStartValue from betas to use their vertex labels as array indices
Integer vertexStartValue = 0;
//keys are sequential integer vertices, values are alphas
Map<Integer, Integer> plateVtoAMap = makeVertexToPeptideMap(allAlphas, vertexStartValue);
//New start value for vertex to beta map should be one more than final vertex value in alpha map
vertexStartValue += plateVtoAMap.size();
//keys are sequential integers vertices, values are betas
Map<Integer, Integer> plateVtoBMap = makeVertexToPeptideMap(allBetas, vertexStartValue);
//keys are alphas, values are sequential integer vertices from previous map
Map<Integer, Integer> plateAtoVMap = invertVertexMap(plateVtoAMap);
//keys are betas, values are sequential integer vertices from previous map
Map<Integer, Integer> plateBtoVMap = invertVertexMap(plateVtoBMap);
if(verbose){System.out.println("Vertex maps made");}
Instant stop = Instant.now();
Duration time = Duration.between(start, stop);
return new MapData(distCellsMapAlphaKey, allAlphas, allBetas, plateVtoAMap, plateVtoBMap, plateAtoVMap,
plateBtoVMap, time);
}
//Make the graph needed for matching CDR3s
public static GraphWithMapData makeGraph(Plate samplePlate, MapData maps, Integer lowThreshold,
Integer highThreshold, boolean verbose) {
int[] alphaIndex = {cdr3AlphaIndex};
int[] betaIndex = {cdr3BetaIndex};
Instant start = Instant.now();
Map<Integer, Integer> plateVtoAMap = maps.getPlateVtoAMap();
Map<Integer, Integer> plateVtoBMap = maps.getPlateVtoBMap();
Map<Integer, Integer> plateAtoVMap = maps.getPlateAtoVMap();
Map<Integer, Integer> plateBtoVMap = maps.getPlateBtoVMap();
Map<Integer, Integer> allAlphas = maps.getAllAlphas();
Map<Integer, Integer> allBetas = maps.getAllBetas();
if(verbose){System.out.println("Creating adjacency matrix");}
//Count how many wells each alpha appears in
Map<Integer, Integer> alphaWellCounts = new HashMap<>();
//count how many wells each beta appears in
Map<Integer, Integer> betaWellCounts = new HashMap<>();
//the adjacency matrix to be used by the graph generator
double[][] weights = new double[plateVtoAMap.size()][plateVtoBMap.size()];
countPeptidesAndFillMatrix(samplePlate, allAlphas, allBetas, plateAtoVMap,
plateBtoVMap, alphaIndex, betaIndex, alphaWellCounts, betaWellCounts, weights);
if(verbose){System.out.println("matrix created");}
//create bipartite graph
if(verbose){System.out.println("creating graph");}
//the graph object
SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph =
new SimpleWeightedGraph<>(DefaultWeightedEdge.class);
//the graph generator
SimpleWeightedBipartiteGraphMatrixGenerator graphGenerator = new SimpleWeightedBipartiteGraphMatrixGenerator();
//the list of alpha vertices
List<Integer> alphaVertices = new ArrayList<>(plateVtoAMap.keySet()); //This will work because LinkedHashMap preserves order of entry
graphGenerator.first(alphaVertices);
//the list of beta vertices
List<Integer> betaVertices = new ArrayList<>(plateVtoBMap.keySet());
graphGenerator.second(betaVertices); //This will work because LinkedHashMap preserves order of entry
graphGenerator.weights(weights);
graphGenerator.generateGraph(graph);
if(verbose){System.out.println("Graph created");}
if(verbose){System.out.println("Eliminating edges with weights outside threshold values");}
filterByOccupancyThreshold(graph, lowThreshold, highThreshold);
if(verbose){System.out.println("Over- and under-weight edges set to 0.0");}
Instant stop = Instant.now();
Duration time = Duration.between(start, stop);
time = time.plus(maps.getTime());
return new GraphWithMapData(graph, maps, alphaWellCounts, betaWellCounts, time);
}
//match CDR3s
public static MatchingResult matchCDR3s(Plate samplePlate, GraphWithMapData data, Integer lowThreshold,
Integer highThreshold, Integer maxOccupancyDifference,
Integer minOverlapPercent, boolean verbose) {
Instant start = Instant.now();
int numWells = samplePlate.getSize();
Map<Integer, Integer> distCellsMapAlphaKey = data.getDistCellsMapAlphaKey();
Map<Integer, Integer> plateVtoAMap = data.getPlateVtoAMap();
Map<Integer, Integer> plateVtoBMap = data.getPlateVtoBMap();
Map<Integer, Integer> allAlphas = data.getAllAlphas();
Map<Integer, Integer> allBetas = data.getAllBetas();
Map<Integer, Integer> alphaWellCounts = data.getAlphaWellCounts();
Map<Integer, Integer> betaWellCounts = data.getBetaWellCounts();
Integer alphaCount = allAlphas.size();
Integer betaCount = allBetas.size();
SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph = data.getGraph();
//Find Maximum Weighted Matching
// if(verbose){System.out.println("Finding maximum weighted matching");}
// MaximumWeightBipartiteMatching maxWeightMatching =
// new MaximumWeightBipartiteMatching(graph, plateVtoAMap.keySet(), plateVtoBMap.keySet());
// MatchingAlgorithm.Matching<String, DefaultWeightedEdge> graphMatching = maxWeightMatching.getMatching();
// if(verbose){System.out.println("Matching completed");}
// Instant stop = Instant.now();
//trying with jheaps addressable now to improve performance
if(verbose){System.out.println("Finding maximum weighted matching");}
//Attempting to use addressable heap to improve performance
MaximumWeightBipartiteMatching maxWeightMatching =
new MaximumWeightBipartiteMatching(graph,
plateVtoAMap.keySet(),
plateVtoBMap.keySet(),
i -> new PairingHeap(Comparator.naturalOrder()));
MatchingAlgorithm.Matching<String, DefaultWeightedEdge> graphMatching = maxWeightMatching.getMatching();
if(verbose){System.out.println("Matching completed");}
Instant stop = Instant.now();
//Header for CSV file
List<String> header = new ArrayList<>();
header.add("Alpha");
header.add("Alpha well count");
header.add("Beta");
header.add("Beta well count");
header.add("Overlap well count");
header.add("Matched correctly?");
header.add("P-value");
//Results for csv file
List<List<String>> allResults = new ArrayList<>();
NumberFormat nf = NumberFormat.getInstance(Locale.US);
MathContext mc = new MathContext(3);
Iterator<DefaultWeightedEdge> weightIter = graphMatching.iterator();
DefaultWeightedEdge e;
int trueCount = 0;
int falseCount = 0;
boolean check;
Map<Integer, Integer> matchMap = new HashMap<>();
while(weightIter.hasNext()) {
e = weightIter.next();
Integer source = graph.getEdgeSource(e);
Integer target = graph.getEdgeTarget(e);
//The match map is all matches found, not just true matches!
matchMap.put(plateVtoAMap.get(source), plateVtoBMap.get(target));
check = plateVtoBMap.get(target).equals(distCellsMapAlphaKey.get(plateVtoAMap.get(source)));
if(check) {
trueCount++;
}
else {
falseCount++;
}
List<String> result = new ArrayList<>();
result.add(plateVtoAMap.get(source).toString());
//alpha well count
result.add(alphaWellCounts.get(plateVtoAMap.get(source)).toString());
result.add(plateVtoBMap.get(target).toString());
//beta well count
result.add(betaWellCounts.get(plateVtoBMap.get(target)).toString());
//overlap count
result.add(Double.toString(graph.getEdgeWeight(e)));
result.add(Boolean.toString(check));
double pValue = Equations.pValue(numWells, alphaWellCounts.get(plateVtoAMap.get(source)),
betaWellCounts.get(plateVtoBMap.get(target)), graph.getEdgeWeight(e));
BigDecimal pValueTrunc = new BigDecimal(pValue, mc);
result.add(pValueTrunc.toString());
allResults.add(result);
}
//Metadate comments for CSV file
int min = Math.min(alphaCount, betaCount);
double attemptRate = (double) (trueCount + falseCount) / min;
BigDecimal attemptRateTrunc = new BigDecimal(attemptRate, mc);
double pairingErrorRate = (double) falseCount / (trueCount + falseCount);
BigDecimal pairingErrorRateTrunc = new BigDecimal(pairingErrorRate, mc);
List<String> comments = new ArrayList<>();
comments.add("Total alphas found: " + alphaCount);
comments.add("Total betas found: " + betaCount);
comments.add("High overlap threshold: " + highThreshold);
comments.add("Low overlap threshold: " + lowThreshold);
comments.add("Minimum overlap percent: " + minOverlapPercent);
comments.add("Maximum occupancy difference: " + maxOccupancyDifference);
comments.add("Pairing attempt rate: " + attemptRateTrunc);
comments.add("Correct pairings: " + trueCount);
comments.add("Incorrect pairings: " + falseCount);
comments.add("Pairing error rate: " + pairingErrorRateTrunc);
Duration time = Duration.between(start, stop);
time = time.plus(data.getTime());
comments.add("Simulation time: " + nf.format(time.toSeconds()) + " seconds");
if(verbose){
for(String s: comments){
System.out.println(s);
}
}
return new MatchingResult(samplePlate.getSourceFileName(), comments, header, allResults, matchMap, time);
}
public static MatchingResult matchCDR3s(List<Integer[]> distinctCells,
Plate samplePlate, Integer lowThreshold,
Integer highThreshold, Integer maxOccupancyDifference,
@@ -58,7 +296,7 @@ public class Simulator {
if(verbose){System.out.println("Making cell maps");}
//HashMap keyed to Alphas, values Betas
//HashMap from cells, keyed to Alphas, values Betas, for checking if matches are correct
Map<Integer, Integer> distCellsMapAlphaKey = makePeptideToPeptideMap(distinctCells, 0, 1);
if(verbose){System.out.println("Cell maps made");}
@@ -132,10 +370,16 @@ public class Simulator {
graphGenerator.generateGraph(graph);
if(verbose){System.out.println("Graph created");}
//write graph to file
GraphFileWriter writer = new GraphFileWriter("graph", graph);
writer.writeGraphToFile();
if(verbose){System.out.println("Eliminating edges with weights outside threshold values");}
filterByOccupancyThreshold(graph, lowThreshold, highThreshold);
if(verbose){System.out.println("Over- and under-weight edges set to 0.0");}
//Filter by overlap size
if(verbose){System.out.println("Eliminating edges with weights much less than occupancy values");}
filterByOverlapSize(graph, alphaWellCounts, betaWellCounts, plateVtoAMap, plateVtoBMap, minOverlapPercent);

View File

@@ -14,261 +14,266 @@ public class UserInterface {
static boolean quit = false;
public static void main(String[] args) {
//for now, commenting out all the command line argument stuff.
// Refactoring to output files of graphs, so it would all need to change anyway.
if(args.length != 0){
//These command line options are a big mess
//Really, I don't think command line tools are expected to work in this many different modes
//making cells, making plates, and matching are the sort of thing that UNIX philosophy would say
//should be three separate programs.
//There might be a way to do it with option parameters?
Options mainOptions = new Options();
Option makeCells = Option.builder("cells")
.longOpt("make-cells")
.desc("Makes a file of distinct cells")
.build();
Option makePlate = Option.builder("plates")
.longOpt("make-plates")
.desc("Makes a sample plate file")
.build();
Option matchCDR3 = Option.builder("match")
.longOpt("match-cdr3")
.desc("Match CDR3s. Requires a cell sample file and any number of plate files.")
.build();
OptionGroup mainGroup = new OptionGroup();
mainGroup.addOption(makeCells);
mainGroup.addOption(makePlate);
mainGroup.addOption(matchCDR3);
mainGroup.setRequired(true);
mainOptions.addOptionGroup(mainGroup);
//Reuse clones of this for other options groups, rather than making it lots of times
Option outputFile = Option.builder("o")
.longOpt("output-file")
.hasArg()
.argName("filename")
.desc("Name of output file")
.build();
mainOptions.addOption(outputFile);
//Options cellOptions = new Options();
Option numCells = Option.builder("nc")
.longOpt("num-cells")
.desc("The number of distinct cells to generate")
.hasArg()
.argName("number")
.build();
mainOptions.addOption(numCells);
Option cdr1Freq = Option.builder("d")
.longOpt("peptide-diversity-factor")
.hasArg()
.argName("number")
.desc("Number of distinct CDR3s for every CDR1")
.build();
mainOptions.addOption(cdr1Freq);
//Option cellOutput = (Option) outputFile.clone();
//cellOutput.setRequired(true);
//mainOptions.addOption(cellOutput);
//Options plateOptions = new Options();
Option inputCells = Option.builder("c")
.longOpt("cell-file")
.hasArg()
.argName("file")
.desc("The cell sample file used for filling wells")
.build();
mainOptions.addOption(inputCells);
Option numWells = Option.builder("w")
.longOpt("num-wells")
.hasArg()
.argName("number")
.desc("The number of wells on each plate")
.build();
mainOptions.addOption(numWells);
Option numPlates = Option.builder("np")
.longOpt("num-plates")
.hasArg()
.argName("number")
.desc("The number of plate files to output")
.build();
mainOptions.addOption(numPlates);
//Option plateOutput = (Option) outputFile.clone();
//plateOutput.setRequired(true);
//plateOutput.setDescription("Prefix for plate output filenames");
//mainOptions.addOption(plateOutput);
Option plateErr = Option.builder("err")
.longOpt("drop-out-rate")
.hasArg()
.argName("number")
.desc("Well drop-out rate. (Probability between 0 and 1)")
.build();
mainOptions.addOption(plateErr);
Option plateConcentrations = Option.builder("t")
.longOpt("t-cells-per-well")
.hasArgs()
.argName("number 1, number 2, ...")
.desc("Number of T cells per well for each plate section")
.build();
mainOptions.addOption(plateConcentrations);
//different distributions, mutually exclusive
OptionGroup plateDistributions = new OptionGroup();
Option plateExp = Option.builder("exponential")
.desc("Sample from distinct cells with exponential frequency distribution")
.build();
plateDistributions.addOption(plateExp);
Option plateGaussian = Option.builder("gaussian")
.desc("Sample from distinct cells with gaussain frequency distribution")
.build();
plateDistributions.addOption(plateGaussian);
Option platePoisson = Option.builder("poisson")
.desc("Sample from distinct cells with poisson frequency distribution")
.build();
plateDistributions.addOption(platePoisson);
mainOptions.addOptionGroup(plateDistributions);
Option plateStdDev = Option.builder("stddev")
.desc("Standard deviation for gaussian distribution")
.hasArg()
.argName("number")
.build();
mainOptions.addOption(plateStdDev);
Option plateLambda = Option.builder("lambda")
.desc("Lambda for exponential distribution")
.hasArg()
.argName("number")
.build();
mainOptions.addOption(plateLambda);
// if(args.length != 0){
// //These command line options are a big mess
// //Really, I don't think command line tools are expected to work in this many different modes
// //making cells, making plates, and matching are the sort of thing that UNIX philosophy would say
// //should be three separate programs.
// //There might be a way to do it with option parameters?
//
// String cellFile, String filename, Double stdDev,
// Integer numWells, Integer numSections,
// Integer[] concentrations, Double dropOutRate
// Options mainOptions = new Options();
// Option makeCells = Option.builder("cells")
// .longOpt("make-cells")
// .desc("Makes a file of distinct cells")
// .build();
// Option makePlate = Option.builder("plates")
// .longOpt("make-plates")
// .desc("Makes a sample plate file")
// .build();
// Option matchCDR3 = Option.builder("match")
// .longOpt("match-cdr3")
// .desc("Match CDR3s. Requires a cell sample file and any number of plate files.")
// .build();
// OptionGroup mainGroup = new OptionGroup();
// mainGroup.addOption(makeCells);
// mainGroup.addOption(makePlate);
// mainGroup.addOption(matchCDR3);
// mainGroup.setRequired(true);
// mainOptions.addOptionGroup(mainGroup);
//
//Options matchOptions = new Options();
inputCells.setDescription("The cell sample file to be used for matching.");
mainOptions.addOption(inputCells);
Option lowThresh = Option.builder("low")
.longOpt("low-threshold")
.hasArg()
.argName("number")
.desc("Sets the minimum occupancy overlap to attempt matching")
.build();
mainOptions.addOption(lowThresh);
Option highThresh = Option.builder("high")
.longOpt("high-threshold")
.hasArg()
.argName("number")
.desc("Sets the maximum occupancy overlap to attempt matching")
.build();
mainOptions.addOption(highThresh);
Option occDiff = Option.builder("occdiff")
.longOpt("occupancy-difference")
.hasArg()
.argName("Number")
.desc("Maximum difference in alpha/beta occupancy to attempt matching")
.build();
mainOptions.addOption(occDiff);
Option overlapPer = Option.builder("ovper")
.longOpt("overlap-percent")
.hasArg()
.argName("Percent")
.desc("Minimum overlap percent to attempt matching (0 -100)")
.build();
mainOptions.addOption(overlapPer);
Option inputPlates = Option.builder("p")
.longOpt("plate-files")
.hasArgs()
.desc("Plate files to match")
.build();
mainOptions.addOption(inputPlates);
CommandLineParser parser = new DefaultParser();
try {
CommandLine line = parser.parse(mainOptions, args);
if(line.hasOption("match")){
//line = parser.parse(mainOptions, args);
String cellFile = line.getOptionValue("c");
Integer lowThreshold = Integer.valueOf(line.getOptionValue(lowThresh));
Integer highThreshold = Integer.valueOf(line.getOptionValue(highThresh));
Integer occupancyDifference = Integer.valueOf(line.getOptionValue(occDiff));
Integer overlapPercent = Integer.valueOf(line.getOptionValue(overlapPer));
for(String plate: line.getOptionValues("p")) {
matchCDR3s(cellFile, plate, lowThreshold, highThreshold, occupancyDifference, overlapPercent);
}
}
else if(line.hasOption("cells")){
//line = parser.parse(mainOptions, args);
String filename = line.getOptionValue("o");
Integer numDistCells = Integer.valueOf(line.getOptionValue("nc"));
Integer freq = Integer.valueOf(line.getOptionValue("d"));
makeCells(filename, numDistCells, freq);
}
else if(line.hasOption("plates")){
//line = parser.parse(mainOptions, args);
String cellFile = line.getOptionValue("c");
String filenamePrefix = line.getOptionValue("o");
Integer numWellsOnPlate = Integer.valueOf(line.getOptionValue("w"));
Integer numPlatesToMake = Integer.valueOf(line.getOptionValue("np"));
String[] concentrationsToUseString = line.getOptionValues("t");
Integer numSections = concentrationsToUseString.length;
Integer[] concentrationsToUse = new Integer[numSections];
for(int i = 0; i <numSections; i++){
concentrationsToUse[i] = Integer.valueOf(concentrationsToUseString[i]);
}
Double dropOutRate = Double.valueOf(line.getOptionValue("err"));
if(line.hasOption("exponential")){
Double lambda = Double.valueOf(line.getOptionValue("lambda"));
for(int i = 1; i <= numPlatesToMake; i++){
makePlateExp(cellFile, filenamePrefix + i, lambda, numWellsOnPlate,
concentrationsToUse,dropOutRate);
}
}
else if(line.hasOption("gaussian")){
Double stdDev = Double.valueOf(line.getOptionValue("std-dev"));
for(int i = 1; i <= numPlatesToMake; i++){
makePlate(cellFile, filenamePrefix + i, stdDev, numWellsOnPlate,
concentrationsToUse,dropOutRate);
}
}
else if(line.hasOption("poisson")){
for(int i = 1; i <= numPlatesToMake; i++){
makePlatePoisson(cellFile, filenamePrefix + i, numWellsOnPlate,
concentrationsToUse,dropOutRate);
}
}
}
}
catch (ParseException exp) {
System.err.println("Parsing failed. Reason: " + exp.getMessage());
}
}
else {
// //Reuse clones of this for other options groups, rather than making it lots of times
// Option outputFile = Option.builder("o")
// .longOpt("output-file")
// .hasArg()
// .argName("filename")
// .desc("Name of output file")
// .build();
// mainOptions.addOption(outputFile);
//
// //Options cellOptions = new Options();
// Option numCells = Option.builder("nc")
// .longOpt("num-cells")
// .desc("The number of distinct cells to generate")
// .hasArg()
// .argName("number")
// .build();
// mainOptions.addOption(numCells);
// Option cdr1Freq = Option.builder("d")
// .longOpt("peptide-diversity-factor")
// .hasArg()
// .argName("number")
// .desc("Number of distinct CDR3s for every CDR1")
// .build();
// mainOptions.addOption(cdr1Freq);
// //Option cellOutput = (Option) outputFile.clone();
// //cellOutput.setRequired(true);
// //mainOptions.addOption(cellOutput);
//
// //Options plateOptions = new Options();
// Option inputCells = Option.builder("c")
// .longOpt("cell-file")
// .hasArg()
// .argName("file")
// .desc("The cell sample file used for filling wells")
// .build();
// mainOptions.addOption(inputCells);
// Option numWells = Option.builder("w")
// .longOpt("num-wells")
// .hasArg()
// .argName("number")
// .desc("The number of wells on each plate")
// .build();
// mainOptions.addOption(numWells);
// Option numPlates = Option.builder("np")
// .longOpt("num-plates")
// .hasArg()
// .argName("number")
// .desc("The number of plate files to output")
// .build();
// mainOptions.addOption(numPlates);
// //Option plateOutput = (Option) outputFile.clone();
// //plateOutput.setRequired(true);
// //plateOutput.setDescription("Prefix for plate output filenames");
// //mainOptions.addOption(plateOutput);
// Option plateErr = Option.builder("err")
// .longOpt("drop-out-rate")
// .hasArg()
// .argName("number")
// .desc("Well drop-out rate. (Probability between 0 and 1)")
// .build();
// mainOptions.addOption(plateErr);
// Option plateConcentrations = Option.builder("t")
// .longOpt("t-cells-per-well")
// .hasArgs()
// .argName("number 1, number 2, ...")
// .desc("Number of T cells per well for each plate section")
// .build();
// mainOptions.addOption(plateConcentrations);
//
////different distributions, mutually exclusive
// OptionGroup plateDistributions = new OptionGroup();
// Option plateExp = Option.builder("exponential")
// .desc("Sample from distinct cells with exponential frequency distribution")
// .build();
// plateDistributions.addOption(plateExp);
// Option plateGaussian = Option.builder("gaussian")
// .desc("Sample from distinct cells with gaussain frequency distribution")
// .build();
// plateDistributions.addOption(plateGaussian);
// Option platePoisson = Option.builder("poisson")
// .desc("Sample from distinct cells with poisson frequency distribution")
// .build();
// plateDistributions.addOption(platePoisson);
// mainOptions.addOptionGroup(plateDistributions);
//
// Option plateStdDev = Option.builder("stddev")
// .desc("Standard deviation for gaussian distribution")
// .hasArg()
// .argName("number")
// .build();
// mainOptions.addOption(plateStdDev);
//
// Option plateLambda = Option.builder("lambda")
// .desc("Lambda for exponential distribution")
// .hasArg()
// .argName("number")
// .build();
// mainOptions.addOption(plateLambda);
//
//
//
////
//// String cellFile, String filename, Double stdDev,
//// Integer numWells, Integer numSections,
//// Integer[] concentrations, Double dropOutRate
////
//
// //Options matchOptions = new Options();
// inputCells.setDescription("The cell sample file to be used for matching.");
// mainOptions.addOption(inputCells);
// Option lowThresh = Option.builder("low")
// .longOpt("low-threshold")
// .hasArg()
// .argName("number")
// .desc("Sets the minimum occupancy overlap to attempt matching")
// .build();
// mainOptions.addOption(lowThresh);
// Option highThresh = Option.builder("high")
// .longOpt("high-threshold")
// .hasArg()
// .argName("number")
// .desc("Sets the maximum occupancy overlap to attempt matching")
// .build();
// mainOptions.addOption(highThresh);
// Option occDiff = Option.builder("occdiff")
// .longOpt("occupancy-difference")
// .hasArg()
// .argName("Number")
// .desc("Maximum difference in alpha/beta occupancy to attempt matching")
// .build();
// mainOptions.addOption(occDiff);
// Option overlapPer = Option.builder("ovper")
// .longOpt("overlap-percent")
// .hasArg()
// .argName("Percent")
// .desc("Minimum overlap percent to attempt matching (0 -100)")
// .build();
// mainOptions.addOption(overlapPer);
// Option inputPlates = Option.builder("p")
// .longOpt("plate-files")
// .hasArgs()
// .desc("Plate files to match")
// .build();
// mainOptions.addOption(inputPlates);
//
//
//
// CommandLineParser parser = new DefaultParser();
// try {
// CommandLine line = parser.parse(mainOptions, args);
// if(line.hasOption("match")){
// //line = parser.parse(mainOptions, args);
// String cellFile = line.getOptionValue("c");
// Integer lowThreshold = Integer.valueOf(line.getOptionValue(lowThresh));
// Integer highThreshold = Integer.valueOf(line.getOptionValue(highThresh));
// Integer occupancyDifference = Integer.valueOf(line.getOptionValue(occDiff));
// Integer overlapPercent = Integer.valueOf(line.getOptionValue(overlapPer));
// for(String plate: line.getOptionValues("p")) {
// matchCDR3s(cellFile, plate, lowThreshold, highThreshold, occupancyDifference, overlapPercent);
// }
// }
// else if(line.hasOption("cells")){
// //line = parser.parse(mainOptions, args);
// String filename = line.getOptionValue("o");
// Integer numDistCells = Integer.valueOf(line.getOptionValue("nc"));
// Integer freq = Integer.valueOf(line.getOptionValue("d"));
// makeCells(filename, numDistCells, freq);
// }
// else if(line.hasOption("plates")){
// //line = parser.parse(mainOptions, args);
// String cellFile = line.getOptionValue("c");
// String filenamePrefix = line.getOptionValue("o");
// Integer numWellsOnPlate = Integer.valueOf(line.getOptionValue("w"));
// Integer numPlatesToMake = Integer.valueOf(line.getOptionValue("np"));
// String[] concentrationsToUseString = line.getOptionValues("t");
// Integer numSections = concentrationsToUseString.length;
//
// Integer[] concentrationsToUse = new Integer[numSections];
// for(int i = 0; i <numSections; i++){
// concentrationsToUse[i] = Integer.valueOf(concentrationsToUseString[i]);
// }
// Double dropOutRate = Double.valueOf(line.getOptionValue("err"));
// if(line.hasOption("exponential")){
// Double lambda = Double.valueOf(line.getOptionValue("lambda"));
// for(int i = 1; i <= numPlatesToMake; i++){
// makePlateExp(cellFile, filenamePrefix + i, lambda, numWellsOnPlate,
// concentrationsToUse,dropOutRate);
// }
// }
// else if(line.hasOption("gaussian")){
// Double stdDev = Double.valueOf(line.getOptionValue("std-dev"));
// for(int i = 1; i <= numPlatesToMake; i++){
// makePlate(cellFile, filenamePrefix + i, stdDev, numWellsOnPlate,
// concentrationsToUse,dropOutRate);
// }
//
// }
// else if(line.hasOption("poisson")){
// for(int i = 1; i <= numPlatesToMake; i++){
// makePlatePoisson(cellFile, filenamePrefix + i, numWellsOnPlate,
// concentrationsToUse,dropOutRate);
// }
// }
// }
// }
// catch (ParseException exp) {
// System.err.println("Parsing failed. Reason: " + exp.getMessage());
// }
// }
// else {
while (!quit) {
System.out.println("\nALPHA/BETA T-CELL RECEPTOR MATCHING SIMULATOR");
System.out.println("Please select an option:");
System.out.println("1) Generate a population of distinct cells");
System.out.println("2) Generate a sample plate of T cells");
System.out.println("3) Simulate CDR3 alpha/beta T cell matching");
System.out.println("4) Simulate CDR3/CDR1 T cell matching");
System.out.println("5) Acknowledgements");
System.out.println("3) Generate CDR3 alpha/beta occupancy graph");
//System.out.println("4) Generate CDR3/CDR1 occupancy graph");
System.out.println("5) Simulate CDR3 alpha/beta T cell matching");
System.out.println("6) Simulate CDR3/CDR1 T cell matching");
System.out.println("7) Acknowledgements");
System.out.println("0) Exit");
try {
input = sc.nextInt();
switch (input) {
case 1 -> makeCells();
case 2 -> makePlate();
case 3 -> matchCells();
case 4 -> matchCellsCDR1();
case 5 -> acknowledge();
case 3 -> makeCDR3Graph();
case 5 -> matchCells();
case 6 -> matchCellsCDR1();
case 7 -> acknowledge();
case 0 -> quit = true;
default -> throw new InputMismatchException("Invalid input.");
}
@@ -278,7 +283,7 @@ public class UserInterface {
}
}
sc.close();
}
// }
}
private static void makeCells() {
@@ -448,6 +453,8 @@ public class UserInterface {
}
}
private static void matchCDR3s(String cellFile, String plateFile, Integer lowThreshold, Integer highThreshold,
Integer maxOccupancyDifference, Integer minOverlapPercent){
CellFileReader cellReader = new CellFileReader(cellFile);
@@ -472,6 +479,11 @@ public class UserInterface {
}
}
private static void makeCDR3Graph() {
String filename = null;
String cellFile = null;
String plateFile = null;
}
private static void matchCells() {
String filename = null;
@@ -525,7 +537,10 @@ public class UserInterface {
highThreshold = plate.getSize() - 1;
}
List<Integer[]> cells = cellReader.getCells();
MatchingResult results = Simulator.matchCDR3s(cells, plate, lowThreshold, highThreshold, maxOccupancyDiff, minOverlapPercent, true);
MapData maps = Simulator.makeMaps(cells, plate, lowThreshold, true);
GraphWithMapData data = Simulator.makeGraph(plate, maps, lowThreshold, highThreshold, true);
MatchingResult results = Simulator.matchCDR3s(plate, data, lowThreshold, highThreshold,
maxOccupancyDiff,minOverlapPercent, true);
//result writer
MatchingFileWriter writer = new MatchingFileWriter(filename, results);
writer.writeResultsToFile();

17
src/main/java/Vertex.java Normal file
View File

@@ -0,0 +1,17 @@
public class Vertex {
private final Integer peptide;
private final Integer occupancy;
public Vertex(Integer peptide, Integer occupancy) {
this.peptide = peptide;
this.occupancy = occupancy;
}
public Integer getPeptide() {
return peptide;
}
public Integer getOccupancy() {
return occupancy;
}
}