17 Commits
v1.5 ... v2.0

10 changed files with 454 additions and 299 deletions

View File

@@ -48,8 +48,13 @@ For example, to run the program with 32 gigabytes of memory, use the command:
`java -Xmx32G -jar BiGpairSEQ_Sim.jar` `java -Xmx32G -jar BiGpairSEQ_Sim.jar`
Once running, BiGpairSEQ_Sim has an interactive, menu-driven CLI for generating files and simulating TCR pairing. The There are a number of command line options, to allow the program to be used in shell scripts. For a full list,
main menu looks like this: use the -help flag:
`java -jar BiGpairSEQ_Sim.jar -help`
If no command line arguments are given, BiGpairSEQ_Sim will launch with an interactive, menu-driven CLI for
generating files and simulating TCR pairing. The main menu looks like this:
``` ```
--------BiGPairSEQ SIMULATOR-------- --------BiGPairSEQ SIMULATOR--------
@@ -78,6 +83,7 @@ By default, the Options menu looks like this:
0) Return to main menu 0) Return to main menu
``` ```
### INPUT/OUTPUT ### INPUT/OUTPUT
To run the simulation, the program reads and writes 4 kinds of files: To run the simulation, the program reads and writes 4 kinds of files:
@@ -290,11 +296,12 @@ slightly less time than the simulation itself. Real elapsed time from start to f
* ~~Enable GraphML output in addition to serialized object binaries, for data portability~~ DONE * ~~Enable GraphML output in addition to serialized object binaries, for data portability~~ DONE
* ~~Custom vertex type with attribute for sequence occupancy?~~ ABANDONED * ~~Custom vertex type with attribute for sequence occupancy?~~ ABANDONED
* Have a branch where this is implemented, but there's a bug that broke matching. Don't currently have time to fix. * Have a branch where this is implemented, but there's a bug that broke matching. Don't currently have time to fix.
* Re-implement command line arguments, to enable scripting and statistical simulation studies * ~~Re-implement command line arguments, to enable scripting and statistical simulation studies~~ DONE
* Re-implement CDR1 matching method * Re-implement CDR1 matching method
* Implement Duan and Su's maximum weight matching algorithm * Implement Duan and Su's maximum weight matching algorithm
* Add controllable algorithm-type parameter? * Add controllable algorithm-type parameter?
* This would be fun and valuable, but probably take more time than I have for a hobby project. * This would be fun and valuable, but probably take more time than I have for a hobby project.
* Implement Vose's alias method for arbitrary statistical distributions of cells
## CITATIONS ## CITATIONS

View File

@@ -23,8 +23,8 @@ public class BiGpairSEQ {
} }
else { else {
//This will be uncommented when command line arguments are re-implemented. //This will be uncommented when command line arguments are re-implemented.
//CommandLineInterface.startCLI(args); CommandLineInterface.startCLI(args);
System.out.println("Command line arguments are still being re-implemented."); //System.out.println("Command line arguments are still being re-implemented.");
} }
} }

View File

@@ -1,5 +1,9 @@
import org.apache.commons.cli.*; import org.apache.commons.cli.*;
import java.io.IOException;
import java.util.Arrays;
import java.util.stream.Stream;
/* /*
* Class for parsing options passed to program from command line * Class for parsing options passed to program from command line
* *
@@ -29,6 +33,8 @@ import org.apache.commons.cli.*;
* cellfile : name of the cell sample file to use as input * cellfile : name of the cell sample file to use as input
* platefile : name of the sample plate file to use as input * platefile : name of the sample plate file to use as input
* output : name of the output file * output : name of the output file
* graphml : output a graphml file
* binary : output a serialized binary object file
* *
* Match flags: * Match flags:
* graphFile : name of graph and data file to use as input * graphFile : name of graph and data file to use as input
@@ -43,286 +49,379 @@ import org.apache.commons.cli.*;
public class CommandLineInterface { public class CommandLineInterface {
public static void startCLI(String[] args) { public static void startCLI(String[] args) {
//These command line options are a big mess //Options sets for the different modes
//Really, I don't think command line tools are expected to work in this many different modes Options mainOptions = buildMainOptions();
//making cells, making plates, and matching are the sort of thing that UNIX philosophy would say Options cellOptions = buildCellOptions();
//should be three separate programs. Options plateOptions = buildPlateOptions();
//There might be a way to do it with option parameters? Options graphOptions = buildGraphOptions();
Options matchOptions = buildMatchCDR3options();
//main options set
Options mainOptions = new Options();
Option makeCells = Option.builder("cells")
.longOpt("make-cells")
.desc("Makes a file of distinct cells")
.build();
Option makePlate = Option.builder("plates")
.longOpt("make-plates")
.desc("Makes a sample plate file")
.build();
Option makeGraph = Option.builder("graph")
.longOpt("make-graph")
.desc("Makes a graph and data file")
.build();
Option matchCDR3 = Option.builder("match")
.longOpt("match-cdr3")
.desc("Match CDR3s. Requires a cell sample file and any number of plate files.")
.build();
OptionGroup mainGroup = new OptionGroup();
mainGroup.addOption(makeCells);
mainGroup.addOption(makePlate);
mainGroup.addOption(makeGraph);
mainGroup.addOption(matchCDR3);
mainGroup.setRequired(true);
mainOptions.addOptionGroup(mainGroup);
//Reuse clones of this for other options groups, rather than making it lots of times
Option outputFile = Option.builder("o")
.longOpt("output-file")
.hasArg()
.argName("filename")
.desc("Name of output file")
.build();
mainOptions.addOption(outputFile);
//Options cellOptions = new Options();
Option numCells = Option.builder("nc")
.longOpt("num-cells")
.desc("The number of distinct cells to generate")
.hasArg()
.argName("number")
.build();
mainOptions.addOption(numCells);
Option cdr1Freq = Option.builder("d")
.longOpt("peptide-diversity-factor")
.hasArg()
.argName("number")
.desc("Number of distinct CDR3s for every CDR1")
.build();
mainOptions.addOption(cdr1Freq);
//Option cellOutput = (Option) outputFile.clone();
//cellOutput.setRequired(true);
//mainOptions.addOption(cellOutput);
//Options plateOptions = new Options();
Option inputCells = Option.builder("c")
.longOpt("cell-file")
.hasArg()
.argName("file")
.desc("The cell sample file used for filling wells")
.build();
mainOptions.addOption(inputCells);
Option numWells = Option.builder("w")
.longOpt("num-wells")
.hasArg()
.argName("number")
.desc("The number of wells on each plate")
.build();
mainOptions.addOption(numWells);
Option numPlates = Option.builder("np")
.longOpt("num-plates")
.hasArg()
.argName("number")
.desc("The number of plate files to output")
.build();
mainOptions.addOption(numPlates);
//Option plateOutput = (Option) outputFile.clone();
//plateOutput.setRequired(true);
//plateOutput.setDescription("Prefix for plate output filenames");
//mainOptions.addOption(plateOutput);
Option plateErr = Option.builder("err")
.longOpt("drop-out-rate")
.hasArg()
.argName("number")
.desc("Well drop-out rate. (Probability between 0 and 1)")
.build();
mainOptions.addOption(plateErr);
Option plateConcentrations = Option.builder("t")
.longOpt("t-cells-per-well")
.hasArgs()
.argName("number 1, number 2, ...")
.desc("Number of T cells per well for each plate section")
.build();
mainOptions.addOption(plateConcentrations);
//different distributions, mutually exclusive
OptionGroup plateDistributions = new OptionGroup();
Option plateExp = Option.builder("exponential")
.desc("Sample from distinct cells with exponential frequency distribution")
.build();
plateDistributions.addOption(plateExp);
Option plateGaussian = Option.builder("gaussian")
.desc("Sample from distinct cells with gaussain frequency distribution")
.build();
plateDistributions.addOption(plateGaussian);
Option platePoisson = Option.builder("poisson")
.desc("Sample from distinct cells with poisson frequency distribution")
.build();
plateDistributions.addOption(platePoisson);
mainOptions.addOptionGroup(plateDistributions);
Option plateStdDev = Option.builder("stddev")
.desc("Standard deviation for gaussian distribution")
.hasArg()
.argName("number")
.build();
mainOptions.addOption(plateStdDev);
Option plateLambda = Option.builder("lambda")
.desc("Lambda for exponential distribution")
.hasArg()
.argName("number")
.build();
mainOptions.addOption(plateLambda);
//
// String cellFile, String filename, Double stdDev,
// Integer numWells, Integer numSections,
// Integer[] concentrations, Double dropOutRate
//
//Options matchOptions = new Options();
inputCells.setDescription("The cell sample file to be used for matching.");
mainOptions.addOption(inputCells);
Option lowThresh = Option.builder("low")
.longOpt("low-threshold")
.hasArg()
.argName("number")
.desc("Sets the minimum occupancy overlap to attempt matching")
.build();
mainOptions.addOption(lowThresh);
Option highThresh = Option.builder("high")
.longOpt("high-threshold")
.hasArg()
.argName("number")
.desc("Sets the maximum occupancy overlap to attempt matching")
.build();
mainOptions.addOption(highThresh);
Option occDiff = Option.builder("occdiff")
.longOpt("occupancy-difference")
.hasArg()
.argName("Number")
.desc("Maximum difference in alpha/beta occupancy to attempt matching")
.build();
mainOptions.addOption(occDiff);
Option overlapPer = Option.builder("ovper")
.longOpt("overlap-percent")
.hasArg()
.argName("Percent")
.desc("Minimum overlap percent to attempt matching (0 -100)")
.build();
mainOptions.addOption(overlapPer);
Option inputPlates = Option.builder("p")
.longOpt("plate-files")
.hasArgs()
.desc("Plate files to match")
.build();
mainOptions.addOption(inputPlates);
CommandLineParser parser = new DefaultParser(); CommandLineParser parser = new DefaultParser();
try { try{
CommandLine line = parser.parse(mainOptions, args); CommandLine line = parser.parse(mainOptions, Arrays.copyOfRange(args, 0, 1));
if(line.hasOption("match")){
//line = parser.parse(mainOptions, args); if (line.hasOption("help")) {
//String cellFile = line.getOptionValue("c"); HelpFormatter formatter = new HelpFormatter();
String graphFile = line.getOptionValue("g"); formatter.printHelp("BiGpairSEQ_Sim", mainOptions);
Integer lowThreshold = Integer.valueOf(line.getOptionValue(lowThresh)); System.out.println();
Integer highThreshold = Integer.valueOf(line.getOptionValue(highThresh)); formatter.printHelp("BiGpairSEQ_SIM -cells", cellOptions);
Integer occupancyDifference = Integer.valueOf(line.getOptionValue(occDiff)); System.out.println();
Integer overlapPercent = Integer.valueOf(line.getOptionValue(overlapPer)); formatter.printHelp("BiGpairSEQ_Sim -plate", plateOptions);
for(String plate: line.getOptionValues("p")) { System.out.println();
matchCDR3s(graphFile, lowThreshold, highThreshold, occupancyDifference, overlapPercent); formatter.printHelp("BiGpairSEQ_Sim -graph", graphOptions);
} System.out.println();
formatter.printHelp("BiGpairSEQ_Sim -match", matchOptions);
} }
else if(line.hasOption("cells")){ else if (line.hasOption("cells")) {
//line = parser.parse(mainOptions, args); line = parser.parse(cellOptions, Arrays.copyOfRange(args, 1, args.length));
Integer number = Integer.valueOf(line.getOptionValue("n"));
Integer diversity = Integer.valueOf(line.getOptionValue("d"));
String filename = line.getOptionValue("o"); String filename = line.getOptionValue("o");
Integer numDistCells = Integer.valueOf(line.getOptionValue("nc")); makeCells(filename, number, diversity);
Integer freq = Integer.valueOf(line.getOptionValue("d"));
makeCells(filename, numDistCells, freq);
} }
else if(line.hasOption("plates")){
//line = parser.parse(mainOptions, args);
String cellFile = line.getOptionValue("c");
String filenamePrefix = line.getOptionValue("o");
Integer numWellsOnPlate = Integer.valueOf(line.getOptionValue("w"));
Integer numPlatesToMake = Integer.valueOf(line.getOptionValue("np"));
String[] concentrationsToUseString = line.getOptionValues("t");
Integer numSections = concentrationsToUseString.length;
Integer[] concentrationsToUse = new Integer[numSections]; else if (line.hasOption("plate")) {
for(int i = 0; i <numSections; i++){ line = parser.parse(plateOptions, Arrays.copyOfRange(args, 1, args.length));
concentrationsToUse[i] = Integer.valueOf(concentrationsToUseString[i]); //get the cells
String cellFilename = line.getOptionValue("c");
CellSample cells = getCells(cellFilename);
//get the rest of the parameters
Integer[] populations;
String outputFilename = line.getOptionValue("o");
Integer numWells = Integer.parseInt(line.getOptionValue("w"));
Double dropoutRate = Double.parseDouble(line.getOptionValue("err"));
if (line.hasOption("random")) {
//Array holding values of minimum and maximum populations
Integer[] min_max = Stream.of(line.getOptionValues("random"))
.mapToInt(Integer::parseInt)
.boxed()
.toArray(Integer[]::new);
populations = BiGpairSEQ.getRand().ints(min_max[0], min_max[1] + 1)
.limit(numWells)
.boxed()
.toArray(Integer[]::new);
} }
Double dropOutRate = Double.valueOf(line.getOptionValue("err")); else if (line.hasOption("pop")) {
if(line.hasOption("exponential")){ populations = Stream.of(line.getOptionValues("pop"))
Double lambda = Double.valueOf(line.getOptionValue("lambda")); .mapToInt(Integer::parseInt)
for(int i = 1; i <= numPlatesToMake; i++){ .boxed()
makePlateExp(cellFile, filenamePrefix + i, lambda, numWellsOnPlate, .toArray(Integer[]::new);
concentrationsToUse,dropOutRate);
}
} }
else if(line.hasOption("gaussian")){ else{
Double stdDev = Double.valueOf(line.getOptionValue("std-dev")); populations = new Integer[1];
for(int i = 1; i <= numPlatesToMake; i++){ populations[0] = 1;
makePlate(cellFile, filenamePrefix + i, stdDev, numWellsOnPlate, }
concentrationsToUse,dropOutRate); //make the plate
} Plate plate;
if (line.hasOption("poisson")) {
Double stdDev = Math.sqrt(numWells);
plate = new Plate(cells, cellFilename, numWells, populations, dropoutRate, stdDev, false);
}
else if (line.hasOption("gaussian")) {
Double stdDev = Double.parseDouble(line.getOptionValue("stddev"));
plate = new Plate(cells, cellFilename, numWells, populations, dropoutRate, stdDev, false);
}
else {
assert line.hasOption("exponential");
Double lambda = Double.parseDouble(line.getOptionValue("lambda"));
plate = new Plate(cells, cellFilename, numWells, populations, dropoutRate, lambda, true);
}
PlateFileWriter writer = new PlateFileWriter(outputFilename, plate);
writer.writePlateFile();
}
else if (line.hasOption("graph")) { //Making a graph
line = parser.parse(graphOptions, Arrays.copyOfRange(args, 1, args.length));
String cellFilename = line.getOptionValue("c");
String plateFilename = line.getOptionValue("p");
String outputFilename = line.getOptionValue("o");
//get cells
CellSample cells = getCells(cellFilename);
//get plate
Plate plate = getPlate(plateFilename);
GraphWithMapData graph = Simulator.makeGraph(cells, plate, false);
if (!line.hasOption("no-binary")) { //output binary file unless told not to
GraphDataObjectWriter writer = new GraphDataObjectWriter(outputFilename, graph, false);
writer.writeDataToFile();
} }
else if(line.hasOption("poisson")){ if (line.hasOption("graphml")) { //if told to, output graphml file
for(int i = 1; i <= numPlatesToMake; i++){ GraphMLFileWriter gmlwriter = new GraphMLFileWriter(outputFilename, graph);
makePlatePoisson(cellFile, filenamePrefix + i, numWellsOnPlate, gmlwriter.writeGraphToFile();
concentrationsToUse,dropOutRate);
}
} }
} }
else if (line.hasOption("match")) { //can add a flag for which match type in future, spit this in two
line = parser.parse(matchOptions, Arrays.copyOfRange(args, 1, args.length));
String graphFilename = line.getOptionValue("g");
String outputFilename = line.getOptionValue("o");
Integer minThreshold = Integer.parseInt(line.getOptionValue("min"));
Integer maxThreshold = Integer.parseInt(line.getOptionValue("max"));
Integer minOverlapPct;
if (line.hasOption("minpct")) { //see if this filter is being used
minOverlapPct = Integer.parseInt(line.getOptionValue("minpct"));
}
else {
minOverlapPct = 0;
}
Integer maxOccupancyDiff;
if (line.hasOption("maxdiff")) { //see if this filter is being used
maxOccupancyDiff = Integer.parseInt(line.getOptionValue("maxdiff"));
}
else {
maxOccupancyDiff = Integer.MAX_VALUE;
}
GraphWithMapData graph = getGraph(graphFilename);
MatchingResult result = Simulator.matchCDR3s(graph, graphFilename, minThreshold, maxThreshold,
maxOccupancyDiff, minOverlapPct, false);
MatchingFileWriter writer = new MatchingFileWriter(outputFilename, result);
writer.writeResultsToFile();
//can put a bunch of ifs for outputting various things from the MatchingResult to System.out here
//after I put those flags in the matchOptions
}
} }
catch (ParseException exp) { catch (ParseException exp) {
System.err.println("Parsing failed. Reason: " + exp.getMessage()); System.err.println("Parsing failed. Reason: " + exp.getMessage());
} }
} }
private static Option outputFileOption() {
Option outputFile = Option.builder("o")
.longOpt("output-file")
.hasArg()
.argName("filename")
.desc("Name of output file")
.required()
.build();
return outputFile;
}
private static Options buildMainOptions() {
Options mainOptions = new Options();
Option help = Option.builder("help")
.desc("Displays this help menu")
.build();
Option makeCells = Option.builder("cells")
.longOpt("make-cells")
.desc("Makes a cell sample file of distinct T cells")
.build();
Option makePlate = Option.builder("plate")
.longOpt("make-plate")
.desc("Makes a sample plate file. Requires a cell sample file.")
.build();
Option makeGraph = Option.builder("graph")
.longOpt("make-graph")
.desc("Makes a graph/data file. Requires a cell sample file and a sample plate file")
.build();
Option matchCDR3 = Option.builder("match")
.longOpt("match-cdr3")
.desc("Matches CDR3s. Requires a graph/data file.")
.build();
OptionGroup mainGroup = new OptionGroup();
mainGroup.addOption(help);
mainGroup.addOption(makeCells);
mainGroup.addOption(makePlate);
mainGroup.addOption(makeGraph);
mainGroup.addOption(matchCDR3);
mainGroup.setRequired(true);
mainOptions.addOptionGroup(mainGroup);
return mainOptions;
}
private static Options buildCellOptions() {
Options cellOptions = new Options();
Option numCells = Option.builder("n")
.longOpt("num-cells")
.desc("The number of distinct cells to generate")
.hasArg()
.argName("number")
.required().build();
Option cdr3Diversity = Option.builder("d")
.longOpt("diversity-factor")
.desc("The factor by which unique CDR3s outnumber unique CDR1s")
.hasArg()
.argName("factor")
.required().build();
cellOptions.addOption(numCells);
cellOptions.addOption(cdr3Diversity);
cellOptions.addOption(outputFileOption());
return cellOptions;
}
private static Options buildPlateOptions() {
Options plateOptions = new Options();
Option cellFile = Option.builder("c") // add this to plate options
.longOpt("cell-file")
.desc("The cell sample file to use")
.hasArg()
.argName("filename")
.required().build();
Option numWells = Option.builder("w")// add this to plate options
.longOpt("wells")
.desc("The number of wells on the sample plate")
.hasArg()
.argName("number")
.required().build();
//options group for choosing with distribution to use
OptionGroup distributions = new OptionGroup();// add this to plate options
distributions.setRequired(true);
Option poisson = Option.builder("poisson")
.desc("Use a Poisson distribution for cell sample")
.build();
Option gaussian = Option.builder("gaussian")
.desc("Use a Gaussian distribution for cell sample")
.build();
Option exponential = Option.builder("exponential")
.desc("Use an exponential distribution for cell sample")
.build();
distributions.addOption(poisson);
distributions.addOption(gaussian);
distributions.addOption(exponential);
//options group for statistical distribution parameters
OptionGroup statParams = new OptionGroup();// add this to plate options
Option stdDev = Option.builder("stddev")
.desc("If using -gaussian flag, standard deviation for distrbution")
.hasArg()
.argName("value")
.build();
Option lambda = Option.builder("lambda")
.desc("If using -exponential flag, lambda value for distribution")
.hasArg()
.argName("value")
.build();
statParams.addOption(stdDev);
statParams.addOption(lambda);
//Option group for random plate or set populations
OptionGroup wellPopOptions = new OptionGroup(); // add this to plate options
wellPopOptions.setRequired(true);
Option randomWellPopulations = Option.builder("random")
.desc("Randomize well populations on sample plate. Takes two arguments: the minimum possible population and the maximum possible population.")
.hasArgs()
.numberOfArgs(2)
.argName("minimum maximum")
.build();
Option specificWellPopulations = Option.builder("pop")
.desc("The well populations for each section of the sample plate. There will be as many sections as there are populations given.")
.hasArgs()
.argName("number [number]...")
.build();
Option dropoutRate = Option.builder("err") //add this to plate options
.hasArg()
.desc("The sequence dropout rate due to amplification error. (0.0 - 1.0)")
.argName("rate")
.required()
.build();
wellPopOptions.addOption(randomWellPopulations);
wellPopOptions.addOption(specificWellPopulations);
plateOptions.addOption(cellFile);
plateOptions.addOption(numWells);
plateOptions.addOptionGroup(distributions);
plateOptions.addOptionGroup(statParams);
plateOptions.addOptionGroup(wellPopOptions);
plateOptions.addOption(dropoutRate);
plateOptions.addOption(outputFileOption());
return plateOptions;
}
private static Options buildGraphOptions() {
Options graphOptions = new Options();
Option cellFilename = Option.builder("c")
.longOpt("cell-file")
.desc("Cell sample file to use for checking accuracy")
.hasArg()
.argName("filename")
.required().build();
Option plateFilename = Option.builder("p")
.longOpt("plate-filename")
.desc("Sample plate file (made from given cell sample file) to construct graph from")
.hasArg()
.argName("filename")
.required().build();
Option outputGraphML = Option.builder("graphml")
.desc("Output GraphML file")
.build();
Option outputSerializedBinary = Option.builder("nb")
.longOpt("no-binary")
.desc("Don't output serialized binary file")
.build();
graphOptions.addOption(cellFilename);
graphOptions.addOption(plateFilename);
graphOptions.addOption(outputFileOption());
graphOptions.addOption(outputGraphML);
graphOptions.addOption(outputSerializedBinary);
return graphOptions;
}
private static Options buildMatchCDR3options() {
Options matchCDR3options = new Options();
Option graphFilename = Option.builder("g")
.longOpt("graph-file")
.desc("The graph/data file to use")
.hasArg()
.argName("filename")
.required().build();
Option minOccupancyOverlap = Option.builder("min")
.desc("The minimum number of shared wells to attempt to match a sequence pair")
.hasArg()
.argName("number")
.required().build();
Option maxOccupancyOverlap = Option.builder("max")
.desc("The maximum number of shared wells to attempt to match a sequence pair")
.hasArg()
.argName("number")
.required().build();
Option minOverlapPercent = Option.builder("minpct")
.desc("(Optional) The minimum percentage of a sequence's total occupancy shared by another sequence to attempt matching. (0 - 100) ")
.hasArg()
.argName("percent")
.build();
Option maxOccupancyDifference = Option.builder("maxdiff")
.desc("(Optional) The maximum difference in total occupancy between two sequences to attempt matching.")
.hasArg()
.argName("number")
.build();
matchCDR3options.addOption(graphFilename);
matchCDR3options.addOption(minOccupancyOverlap);
matchCDR3options.addOption(maxOccupancyOverlap);
matchCDR3options.addOption(minOverlapPercent);
matchCDR3options.addOption(maxOccupancyDifference);
matchCDR3options.addOption(outputFileOption());
//options for output to System.out
//Option printPairingErrorRate = Option.builder()
return matchCDR3options;
}
private static CellSample getCells(String cellFilename) {
assert cellFilename != null;
CellFileReader reader = new CellFileReader(cellFilename);
return reader.getCellSample();
}
private static Plate getPlate(String plateFilename) {
assert plateFilename != null;
PlateFileReader reader = new PlateFileReader(plateFilename);
return reader.getSamplePlate();
}
private static GraphWithMapData getGraph(String graphFilename) {
assert graphFilename != null;
try{
GraphDataObjectReader reader = new GraphDataObjectReader(graphFilename, false);
return reader.getData();
}
catch (IOException ex) {
ex.printStackTrace();
return null;
}
}
//for calling from command line //for calling from command line
public static void makeCells(String filename, Integer numCells, Integer cdr1Freq){ public static void makeCells(String filename, Integer numCells, Integer cdr1Freq) {
CellSample sample = new CellSample(numCells, cdr1Freq); CellSample sample = new CellSample(numCells, cdr1Freq);
CellFileWriter writer = new CellFileWriter(filename, sample); CellFileWriter writer = new CellFileWriter(filename, sample);
writer.writeCellsToFile(); writer.writeCellsToFile();
} }
public static void makePlateExp(String cellFile, String filename, Double lambda,
Integer numWells, Integer[] concentrations, Double dropOutRate){
CellFileReader cellReader = new CellFileReader(cellFile);
Plate samplePlate = new Plate(numWells, dropOutRate, concentrations);
samplePlate.fillWellsExponential(cellReader.getFilename(), cellReader.getListOfDistinctCellsDEPRECATED(), lambda);
PlateFileWriter writer = new PlateFileWriter(filename, samplePlate);
writer.writePlateFile();
}
private static void makePlatePoisson(String cellFile, String filename, Integer numWells,
Integer[] concentrations, Double dropOutRate){
CellFileReader cellReader = new CellFileReader(cellFile);
Double stdDev = Math.sqrt(cellReader.getCellCountDEPRECATED());
Plate samplePlate = new Plate(numWells, dropOutRate, concentrations);
samplePlate.fillWells(cellReader.getFilename(), cellReader.getListOfDistinctCellsDEPRECATED(), stdDev);
PlateFileWriter writer = new PlateFileWriter(filename, samplePlate);
writer.writePlateFile();
}
private static void makePlate(String cellFile, String filename, Double stdDev,
Integer numWells, Integer[] concentrations, Double dropOutRate){
CellFileReader cellReader = new CellFileReader(cellFile);
Plate samplePlate = new Plate(numWells, dropOutRate, concentrations);
samplePlate.fillWells(cellReader.getFilename(), cellReader.getListOfDistinctCellsDEPRECATED(), stdDev);
PlateFileWriter writer = new PlateFileWriter(filename, samplePlate);
writer.writePlateFile();
}
private static void matchCDR3s(String graphFile, Integer lowThreshold, Integer highThreshold,
Integer occupancyDifference, Integer overlapPercent) {
}
} }

View File

@@ -1,10 +1,12 @@
import java.io.*; import java.io.*;
public class GraphDataObjectReader { public class GraphDataObjectReader {
private GraphWithMapData data; private GraphWithMapData data;
private String filename; private String filename;
private boolean verbose = true;
public GraphDataObjectReader(String filename) throws IOException { public GraphDataObjectReader(String filename, boolean verbose) throws IOException {
if(!filename.matches(".*\\.ser")){ if(!filename.matches(".*\\.ser")){
filename = filename + ".ser"; filename = filename + ".ser";
} }

View File

@@ -1,3 +1,5 @@
import org.jgrapht.Graph;
import java.io.BufferedOutputStream; import java.io.BufferedOutputStream;
import java.io.FileOutputStream; import java.io.FileOutputStream;
import java.io.IOException; import java.io.IOException;
@@ -7,6 +9,7 @@ public class GraphDataObjectWriter {
private GraphWithMapData data; private GraphWithMapData data;
private String filename; private String filename;
private boolean verbose = true;
public GraphDataObjectWriter(String filename, GraphWithMapData data) { public GraphDataObjectWriter(String filename, GraphWithMapData data) {
if(!filename.matches(".*\\.ser")){ if(!filename.matches(".*\\.ser")){
@@ -16,13 +19,24 @@ public class GraphDataObjectWriter {
this.data = data; this.data = data;
} }
public GraphDataObjectWriter(String filename, GraphWithMapData data, boolean verbose) {
this.verbose = verbose;
if(!filename.matches(".*\\.ser")){
filename = filename + ".ser";
}
this.filename = filename;
this.data = data;
}
public void writeDataToFile() { public void writeDataToFile() {
try (BufferedOutputStream bufferedOut = new BufferedOutputStream(new FileOutputStream(filename)); try (BufferedOutputStream bufferedOut = new BufferedOutputStream(new FileOutputStream(filename));
ObjectOutputStream out = new ObjectOutputStream(bufferedOut); ObjectOutputStream out = new ObjectOutputStream(bufferedOut);
){ ){
System.out.println("Writing graph and occupancy data to file. This may take some time."); if(verbose) {
System.out.println("File I/O time is not included in results."); System.out.println("Writing graph and occupancy data to file. This may take some time.");
System.out.println("File I/O time is not included in results.");
}
out.writeObject(data); out.writeObject(data);
} catch (IOException ex) { } catch (IOException ex) {
ex.printStackTrace(); ex.printStackTrace();

View File

@@ -227,16 +227,14 @@ public class InteractiveInterface {
Plate samplePlate; Plate samplePlate;
PlateFileWriter writer; PlateFileWriter writer;
if(exponential){ if(exponential){
samplePlate = new Plate(numWells, dropOutRate, populations); samplePlate = new Plate(cells, cellFile, numWells, populations, dropOutRate, lambda, true);
samplePlate.fillWellsExponential(cellFile, cells.getCells(), lambda);
writer = new PlateFileWriter(filename, samplePlate); writer = new PlateFileWriter(filename, samplePlate);
} }
else { else {
if (poisson) { if (poisson) {
stdDev = Math.sqrt(cells.getCellCount()); //gaussian with square root of elements approximates poisson stdDev = Math.sqrt(cells.getCellCount()); //gaussian with square root of elements approximates poisson
} }
samplePlate = new Plate(numWells, dropOutRate, populations); samplePlate = new Plate(cells, cellFile, numWells, populations, dropOutRate, stdDev, false);
samplePlate.fillWells(cellFile, cells.getCells(), stdDev);
writer = new PlateFileWriter(filename, samplePlate); writer = new PlateFileWriter(filename, samplePlate);
} }
System.out.println("Writing Sample Plate to file"); System.out.println("Writing Sample Plate to file");
@@ -292,7 +290,7 @@ public class InteractiveInterface {
else { else {
System.out.println("Reading Sample Plate file: " + plateFile); System.out.println("Reading Sample Plate file: " + plateFile);
PlateFileReader plateReader = new PlateFileReader(plateFile); PlateFileReader plateReader = new PlateFileReader(plateFile);
plate = new Plate(plateReader.getFilename(), plateReader.getWells()); plate = plateReader.getSamplePlate();
if(BiGpairSEQ.cachePlate()) { if(BiGpairSEQ.cachePlate()) {
BiGpairSEQ.setPlateInMemory(plate, plateFile); BiGpairSEQ.setPlateInMemory(plate, plateFile);
} }
@@ -306,8 +304,7 @@ public class InteractiveInterface {
System.out.println("Returning to main menu."); System.out.println("Returning to main menu.");
} }
else{ else{
List<Integer[]> cells = cellSample.getCells(); GraphWithMapData data = Simulator.makeGraph(cellSample, plate, true);
GraphWithMapData data = Simulator.makeGraph(cells, plate, true);
assert filename != null; assert filename != null;
if(BiGpairSEQ.outputBinary()) { if(BiGpairSEQ.outputBinary()) {
GraphDataObjectWriter dataWriter = new GraphDataObjectWriter(filename, data); GraphDataObjectWriter dataWriter = new GraphDataObjectWriter(filename, data);
@@ -378,7 +375,7 @@ public class InteractiveInterface {
data = BiGpairSEQ.getGraphInMemory(); data = BiGpairSEQ.getGraphInMemory();
} }
else { else {
GraphDataObjectReader dataReader = new GraphDataObjectReader(graphFilename); GraphDataObjectReader dataReader = new GraphDataObjectReader(graphFilename, true);
data = dataReader.getData(); data = dataReader.getData();
if(BiGpairSEQ.cacheGraph()) { if(BiGpairSEQ.cacheGraph()) {
BiGpairSEQ.setGraphInMemory(data, graphFilename); BiGpairSEQ.setGraphInMemory(data, graphFilename);

View File

@@ -21,15 +21,15 @@ public class MatchingResult {
* well populations * * well populations *
* total alphas found * * total alphas found *
* total betas found * * total betas found *
* high overlap threshold * high overlap threshold *
* low overlap threshold * low overlap threshold *
* maximum occupancy difference * maximum occupancy difference *
* minimum overlap percent * minimum overlap percent *
* pairing attempt rate * pairing attempt rate *
* correct pairing count * correct pairing count *
* incorrect pairing count * incorrect pairing count *
* pairing error rate * pairing error rate *
* simulation time * simulation time (seconds)
*/ */
this.metadata = metadata; this.metadata = metadata;
this.comments = new ArrayList<>(); this.comments = new ArrayList<>();
@@ -91,6 +91,22 @@ public class MatchingResult {
return Integer.parseInt(metadata.get("total beta count")); return Integer.parseInt(metadata.get("total beta count"));
} }
//put in the rest of these methods following the same pattern public Integer getHighOverlapThreshold() { return Integer.parseInt(metadata.get("high overlap threshold"));}
public Integer getLowOverlapThreshold() { return Integer.parseInt(metadata.get("low overlap threshold"));}
public Integer getMaxOccupancyDifference() { return Integer.parseInt(metadata.get("maximum occupancy difference"));}
public Integer getMinOverlapPercent() { return Integer.parseInt(metadata.get("minimum overlap percent"));}
public Double getPairingAttemptRate() { return Double.parseDouble(metadata.get("pairing attempt rate"));}
public Integer getCorrectPairingCount() { return Integer.parseInt(metadata.get("correct pairing count"));}
public Integer getIncorrectPairingCount() { return Integer.parseInt(metadata.get("incorrect pairing count"));}
public Double getPairingErrorRate() { return Double.parseDouble(metadata.get("pairing error rate"));}
public String getSimulationTime() { return metadata.get("simulation time (seconds)"); }
} }

View File

@@ -8,7 +8,9 @@ TODO: Implement discrete frequency distributions using Vose's Alias Method
import java.util.*; import java.util.*;
public class Plate { public class Plate {
private CellSample cells;
private String sourceFile; private String sourceFile;
private String filename;
private List<List<Integer[]>> wells; private List<List<Integer[]>> wells;
private final Random rand = BiGpairSEQ.getRand(); private final Random rand = BiGpairSEQ.getRand();
private int size; private int size;
@@ -18,6 +20,25 @@ public class Plate {
private double lambda; private double lambda;
boolean exponential = false; boolean exponential = false;
public Plate(CellSample cells, String cellFilename, int numWells, Integer[] populations,
double dropoutRate, double stdDev_or_lambda, boolean exponential){
this.cells = cells;
this.sourceFile = cellFilename;
this.size = numWells;
this.wells = new ArrayList<>();
this.error = dropoutRate;
this.populations = populations;
this.exponential = exponential;
if (this.exponential) {
this.lambda = stdDev_or_lambda;
fillWellsExponential(cells.getCells(), this.lambda);
}
else {
this.stdDev = stdDev_or_lambda;
fillWells(cells.getCells(), this.stdDev);
}
}
public Plate(int size, double error, Integer[] populations) { public Plate(int size, double error, Integer[] populations) {
this.size = size; this.size = size;
@@ -26,8 +47,9 @@ public class Plate {
wells = new ArrayList<>(); wells = new ArrayList<>();
} }
public Plate(String sourceFileName, List<List<Integer[]>> wells) { //constructor for returning a Plate from a PlateFileReader
this.sourceFile = sourceFileName; public Plate(String filename, List<List<Integer[]>> wells) {
this.filename = filename;
this.wells = wells; this.wells = wells;
this.size = wells.size(); this.size = wells.size();
@@ -43,10 +65,9 @@ public class Plate {
} }
} }
public void fillWellsExponential(String sourceFileName, List<Integer[]> cells, double lambda){ private void fillWellsExponential(List<Integer[]> cells, double lambda){
this.lambda = lambda; this.lambda = lambda;
exponential = true; exponential = true;
sourceFile = sourceFileName;
int numSections = populations.length; int numSections = populations.length;
int section = 0; int section = 0;
double m; double m;
@@ -74,9 +95,8 @@ public class Plate {
} }
} }
public void fillWells(String sourceFileName, List<Integer[]> cells, double stdDev) { private void fillWells( List<Integer[]> cells, double stdDev) {
this.stdDev = stdDev; this.stdDev = stdDev;
sourceFile = sourceFileName;
int numSections = populations.length; int numSections = populations.length;
int section = 0; int section = 0;
double m; double m;
@@ -159,4 +179,6 @@ public class Plate {
public String getSourceFileName() { public String getSourceFileName() {
return sourceFile; return sourceFile;
} }
public String getFilename() { return filename; }
} }

View File

@@ -56,11 +56,8 @@ public class PlateFileReader {
} }
public List<List<Integer[]>> getWells() { public Plate getSamplePlate() {
return wells; return new Plate(filename, wells);
} }
public String getFilename() {
return filename;
}
} }

View File

@@ -24,8 +24,9 @@ public class Simulator implements GraphModificationFunctions {
private static final int cdr1BetaIndex = 3; private static final int cdr1BetaIndex = 3;
//Make the graph needed for matching CDR3s //Make the graph needed for matching CDR3s
public static GraphWithMapData makeGraph(List<Integer[]> distinctCells, Plate samplePlate, boolean verbose) { public static GraphWithMapData makeGraph(CellSample cellSample, Plate samplePlate, boolean verbose) {
Instant start = Instant.now(); Instant start = Instant.now();
List<Integer[]> distinctCells = cellSample.getCells();
int[] alphaIndex = {cdr3AlphaIndex}; int[] alphaIndex = {cdr3AlphaIndex};
int[] betaIndex = {cdr3BetaIndex}; int[] betaIndex = {cdr3BetaIndex};
@@ -113,7 +114,7 @@ public class Simulator implements GraphModificationFunctions {
distCellsMapAlphaKey, plateVtoAMap, plateVtoBMap, plateAtoVMap, distCellsMapAlphaKey, plateVtoAMap, plateVtoBMap, plateAtoVMap,
plateBtoVMap, alphaWellCounts, betaWellCounts, time); plateBtoVMap, alphaWellCounts, betaWellCounts, time);
//Set source file name in graph to name of sample plate //Set source file name in graph to name of sample plate
output.setSourceFilename(samplePlate.getSourceFileName()); output.setSourceFilename(samplePlate.getFilename());
//return GraphWithMapData object //return GraphWithMapData object
return output; return output;
} }
@@ -279,7 +280,7 @@ public class Simulator implements GraphModificationFunctions {
metadata.put("correct pairing count", Integer.toString(trueCount)); metadata.put("correct pairing count", Integer.toString(trueCount));
metadata.put("incorrect pairing count", Integer.toString(falseCount)); metadata.put("incorrect pairing count", Integer.toString(falseCount));
metadata.put("pairing error rate", pairingErrorRateTrunc.toString()); metadata.put("pairing error rate", pairingErrorRateTrunc.toString());
metadata.put("simulation time", nf.format(time.toSeconds())); metadata.put("simulation time (seconds)", nf.format(time.toSeconds()));
//create MatchingResult object //create MatchingResult object
MatchingResult output = new MatchingResult(metadata, header, allResults, matchMap, time); MatchingResult output = new MatchingResult(metadata, header, allResults, matchMap, time);
if(verbose){ if(verbose){