Files
BiGpairSEQ/src/main/java/CommandLineInterface.java
2022-03-01 08:40:34 -06:00

469 lines
21 KiB
Java

import org.apache.commons.cli.*;
import java.io.IOException;
import java.util.Arrays;
import java.util.stream.Stream;
/*
* Class for parsing options passed to program from command line
*
* Top-level flags:
* cells : to make a cell sample file
* plate : to make a sample plate file
* graph : to make a graph and data file
* match : to do a cdr3 matching (WITH OR WITHOUT MAKING A RESULTS FILE. May just want to print summary for piping.)
*
* Cell flags:
* count : number of cells to generate
* diversity factor : factor by which CDR3s are more diverse than CDR1s
* output : name of the output file
*
* Plate flags:
* cellfile : name of the cell sample file to use as input
* wells : the number of wells on the plate
* dist : the statistical distribution to use
* (if exponential) lambda : the lambda value of the exponential distribution
* (if gaussian) stddev : the standard deviation of the gaussian distribution
* rand : randomize well populations, take a minimum argument and a maximum argument
* populations : number of t cells per well per section (number of arguments determines number of sections)
* dropout : plate dropout rate, double from 0.0 to 1.0
* output : name of the output file
*
* Graph flags:
* cellfile : name of the cell sample file to use as input
* platefile : name of the sample plate file to use as input
* output : name of the output file
* graphml : output a graphml file
* binary : output a serialized binary object file
*
* Match flags:
* graphFile : name of graph and data file to use as input
* min : minimum number of overlap wells to attempt a matching
* max : the maximum number of overlap wells to attempt a matching
* maxdiff : (optional) the maximum difference in occupancy to attempt a matching
* minpercent : (optional) the minimum percent overlap to attempt a matching.
* writefile : (optional) the filename to write results to
* output : the values to print to System.out for piping
*
*/
public class CommandLineInterface {
public static void startCLI(String[] args) {
//Options sets for the different modes
Options mainOptions = buildMainOptions();
Options cellOptions = buildCellOptions();
Options plateOptions = buildPlateOptions();
Options graphOptions = buildGraphOptions();
Options matchOptions = buildMatchCDR3options();
CommandLineParser parser = new DefaultParser();
try{
CommandLine line = parser.parse(mainOptions, Arrays.copyOfRange(args, 0, 1));
if (line.hasOption("help")) {
HelpFormatter formatter = new HelpFormatter();
formatter.printHelp("BiGpairSEQ_Sim.jar", mainOptions);
System.out.println();
formatter.printHelp("BiGpairSEQ_Sim.jar -cells", cellOptions);
System.out.println();
formatter.printHelp("BiGpairSEQ_Sim.jar -plate", plateOptions);
System.out.println();
formatter.printHelp("BiGpairSEQ_Sim.jar -graph", graphOptions);
System.out.println();
formatter.printHelp("BiGpairSEQ_Sim.jar -match", matchOptions);
}
else if (line.hasOption("version")) {
System.out.println("BiGpairSEQ_Sim " + BiGpairSEQ.getVersion());
}
else if (line.hasOption("cells")) {
line = parser.parse(cellOptions, Arrays.copyOfRange(args, 1, args.length));
Integer number = Integer.valueOf(line.getOptionValue("n"));
Integer diversity = Integer.valueOf(line.getOptionValue("d"));
String filename = line.getOptionValue("o");
makeCells(filename, number, diversity);
}
else if (line.hasOption("plate")) {
line = parser.parse(plateOptions, Arrays.copyOfRange(args, 1, args.length));
//get the cells
String cellFilename = line.getOptionValue("c");
CellSample cells = getCells(cellFilename);
//get the rest of the parameters
Integer[] populations;
String outputFilename = line.getOptionValue("o");
Integer numWells = Integer.parseInt(line.getOptionValue("w"));
Double dropoutRate = Double.parseDouble(line.getOptionValue("err"));
if (line.hasOption("random")) {
//Array holding values of minimum and maximum populations
Integer[] min_max = Stream.of(line.getOptionValues("random"))
.mapToInt(Integer::parseInt)
.boxed()
.toArray(Integer[]::new);
populations = BiGpairSEQ.getRand().ints(min_max[0], min_max[1] + 1)
.limit(numWells)
.boxed()
.toArray(Integer[]::new);
}
else if (line.hasOption("pop")) {
populations = Stream.of(line.getOptionValues("pop"))
.mapToInt(Integer::parseInt)
.boxed()
.toArray(Integer[]::new);
}
else{
populations = new Integer[1];
populations[0] = 1;
}
//make the plate
Plate plate;
if (line.hasOption("poisson")) {
Double stdDev = Math.sqrt(numWells);
plate = new Plate(cells, cellFilename, numWells, populations, dropoutRate, stdDev, false);
}
else if (line.hasOption("gaussian")) {
Double stdDev = Double.parseDouble(line.getOptionValue("stddev"));
plate = new Plate(cells, cellFilename, numWells, populations, dropoutRate, stdDev, false);
}
else {
assert line.hasOption("exponential");
Double lambda = Double.parseDouble(line.getOptionValue("lambda"));
plate = new Plate(cells, cellFilename, numWells, populations, dropoutRate, lambda, true);
}
PlateFileWriter writer = new PlateFileWriter(outputFilename, plate);
writer.writePlateFile();
}
else if (line.hasOption("graph")) { //Making a graph
line = parser.parse(graphOptions, Arrays.copyOfRange(args, 1, args.length));
String cellFilename = line.getOptionValue("c");
String plateFilename = line.getOptionValue("p");
String outputFilename = line.getOptionValue("o");
//get cells
CellSample cells = getCells(cellFilename);
//get plate
Plate plate = getPlate(plateFilename);
GraphWithMapData graph = Simulator.makeGraph(cells, plate, false);
if (!line.hasOption("no-binary")) { //output binary file unless told not to
GraphDataObjectWriter writer = new GraphDataObjectWriter(outputFilename, graph, false);
writer.writeDataToFile();
}
if (line.hasOption("graphml")) { //if told to, output graphml file
GraphMLFileWriter gmlwriter = new GraphMLFileWriter(outputFilename, graph);
gmlwriter.writeGraphToFile();
}
}
else if (line.hasOption("match")) { //can add a flag for which match type in future, spit this in two
line = parser.parse(matchOptions, Arrays.copyOfRange(args, 1, args.length));
String graphFilename = line.getOptionValue("g");
String outputFilename;
if(line.hasOption("o")) {
outputFilename = line.getOptionValue("o");
}
else {
outputFilename = null;
}
Integer minThreshold = Integer.parseInt(line.getOptionValue("min"));
Integer maxThreshold = Integer.parseInt(line.getOptionValue("max"));
int minOverlapPct;
if (line.hasOption("minpct")) { //see if this filter is being used
minOverlapPct = Integer.parseInt(line.getOptionValue("minpct"));
}
else {
minOverlapPct = 0;
}
int maxOccupancyDiff;
if (line.hasOption("maxdiff")) { //see if this filter is being used
maxOccupancyDiff = Integer.parseInt(line.getOptionValue("maxdiff"));
}
else {
maxOccupancyDiff = Integer.MAX_VALUE;
}
GraphWithMapData graph = getGraph(graphFilename);
MatchingResult result = Simulator.matchCDR3s(graph, graphFilename, minThreshold, maxThreshold,
maxOccupancyDiff, minOverlapPct, false);
if(outputFilename != null){
MatchingFileWriter writer = new MatchingFileWriter(outputFilename, result);
writer.writeResultsToFile();
}
//can put a bunch of ifs for outputting various things from the MatchingResult to System.out here
//after I put those flags in the matchOptions
if(line.hasOption("print-metadata")) {
for (String k : result.getMetadata().keySet()) {
System.out.println(k + ": " + result.getMetadata().get(k));
}
}
}
}
catch (ParseException exp) {
System.err.println("Parsing failed. Reason: " + exp.getMessage());
}
}
private static Option outputFileOption() {
Option outputFile = Option.builder("o")
.longOpt("output-file")
.hasArg()
.argName("filename")
.desc("Name of output file")
.required()
.build();
return outputFile;
}
private static Options buildMainOptions() {
Options mainOptions = new Options();
Option help = Option.builder("help")
.desc("Displays this help menu")
.build();
Option makeCells = Option.builder("cells")
.longOpt("make-cells")
.desc("Makes a cell sample file of distinct T cells")
.build();
Option makePlate = Option.builder("plate")
.longOpt("make-plate")
.desc("Makes a sample plate file. Requires a cell sample file.")
.build();
Option makeGraph = Option.builder("graph")
.longOpt("make-graph")
.desc("Makes a graph/data file. Requires a cell sample file and a sample plate file")
.build();
Option matchCDR3 = Option.builder("match")
.longOpt("match-cdr3")
.desc("Matches CDR3s. Requires a graph/data file.")
.build();
Option printVersion = Option.builder("version")
.desc("Prints the program version number to stdout").build();
OptionGroup mainGroup = new OptionGroup();
mainGroup.addOption(help);
mainGroup.addOption(printVersion);
mainGroup.addOption(makeCells);
mainGroup.addOption(makePlate);
mainGroup.addOption(makeGraph);
mainGroup.addOption(matchCDR3);
mainGroup.setRequired(true);
mainOptions.addOptionGroup(mainGroup);
return mainOptions;
}
private static Options buildCellOptions() {
Options cellOptions = new Options();
Option numCells = Option.builder("n")
.longOpt("num-cells")
.desc("The number of distinct cells to generate")
.hasArg()
.argName("number")
.required().build();
Option cdr3Diversity = Option.builder("d")
.longOpt("diversity-factor")
.desc("The factor by which unique CDR3s outnumber unique CDR1s")
.hasArg()
.argName("factor")
.required().build();
cellOptions.addOption(numCells);
cellOptions.addOption(cdr3Diversity);
cellOptions.addOption(outputFileOption());
return cellOptions;
}
private static Options buildPlateOptions() {
Options plateOptions = new Options();
Option cellFile = Option.builder("c") // add this to plate options
.longOpt("cell-file")
.desc("The cell sample file to use")
.hasArg()
.argName("filename")
.required().build();
Option numWells = Option.builder("w")// add this to plate options
.longOpt("wells")
.desc("The number of wells on the sample plate")
.hasArg()
.argName("number")
.required().build();
//options group for choosing with distribution to use
OptionGroup distributions = new OptionGroup();// add this to plate options
distributions.setRequired(true);
Option poisson = Option.builder("poisson")
.desc("Use a Poisson distribution for cell sample")
.build();
Option gaussian = Option.builder("gaussian")
.desc("Use a Gaussian distribution for cell sample")
.build();
Option exponential = Option.builder("exponential")
.desc("Use an exponential distribution for cell sample")
.build();
distributions.addOption(poisson);
distributions.addOption(gaussian);
distributions.addOption(exponential);
//options group for statistical distribution parameters
OptionGroup statParams = new OptionGroup();// add this to plate options
Option stdDev = Option.builder("stddev")
.desc("If using -gaussian flag, standard deviation for distrbution")
.hasArg()
.argName("value")
.build();
Option lambda = Option.builder("lambda")
.desc("If using -exponential flag, lambda value for distribution")
.hasArg()
.argName("value")
.build();
statParams.addOption(stdDev);
statParams.addOption(lambda);
//Option group for random plate or set populations
OptionGroup wellPopOptions = new OptionGroup(); // add this to plate options
wellPopOptions.setRequired(true);
Option randomWellPopulations = Option.builder("random")
.desc("Randomize well populations on sample plate. Takes two arguments: the minimum possible population and the maximum possible population.")
.hasArgs()
.numberOfArgs(2)
.argName("min> <max")
.build();
Option specificWellPopulations = Option.builder("pop")
.desc("The well populations for each section of the sample plate. There will be as many sections as there are populations given.")
.hasArgs()
.argName("number [number]...")
.build();
Option dropoutRate = Option.builder("err") //add this to plate options
.hasArg()
.desc("The sequence dropout rate due to amplification error. (0.0 - 1.0)")
.argName("rate")
.required()
.build();
wellPopOptions.addOption(randomWellPopulations);
wellPopOptions.addOption(specificWellPopulations);
plateOptions.addOption(cellFile);
plateOptions.addOption(numWells);
plateOptions.addOptionGroup(distributions);
plateOptions.addOptionGroup(statParams);
plateOptions.addOptionGroup(wellPopOptions);
plateOptions.addOption(dropoutRate);
plateOptions.addOption(outputFileOption());
return plateOptions;
}
private static Options buildGraphOptions() {
Options graphOptions = new Options();
Option cellFilename = Option.builder("c")
.longOpt("cell-file")
.desc("Cell sample file to use for checking pairing accuracy")
.hasArg()
.argName("filename")
.required().build();
Option plateFilename = Option.builder("p")
.longOpt("plate-filename")
.desc("Sample plate file from which to construct graph")
.hasArg()
.argName("filename")
.required().build();
Option outputGraphML = Option.builder("graphml")
.desc("(Optional) Output GraphML file")
.build();
Option outputSerializedBinary = Option.builder("nb")
.longOpt("no-binary")
.desc("(Optional) Don't output serialized binary file")
.build();
graphOptions.addOption(cellFilename);
graphOptions.addOption(plateFilename);
graphOptions.addOption(outputFileOption());
graphOptions.addOption(outputGraphML);
graphOptions.addOption(outputSerializedBinary);
return graphOptions;
}
private static Options buildMatchCDR3options() {
Options matchCDR3options = new Options();
Option graphFilename = Option.builder("g")
.longOpt("graph-file")
.desc("The graph/data file to use")
.hasArg()
.argName("filename")
.required().build();
Option minOccupancyOverlap = Option.builder("min")
.desc("The minimum number of shared wells to attempt to match a sequence pair")
.hasArg()
.argName("number")
.required().build();
Option maxOccupancyOverlap = Option.builder("max")
.desc("The maximum number of shared wells to attempt to match a sequence pair")
.hasArg()
.argName("number")
.required().build();
Option minOverlapPercent = Option.builder("minpct")
.desc("(Optional) The minimum percentage of a sequence's total occupancy shared by another sequence to attempt matching. (0 - 100) ")
.hasArg()
.argName("percent")
.build();
Option maxOccupancyDifference = Option.builder("maxdiff")
.desc("(Optional) The maximum difference in total occupancy between two sequences to attempt matching.")
.hasArg()
.argName("number")
.build();
Option outputFile = Option.builder("o") //can't call the method this time, because this one's optional
.longOpt("output-file")
.hasArg()
.argName("filename")
.desc("(Optional) Name of output the output file. If not present, no file will be written.")
.build();
matchCDR3options.addOption(graphFilename)
.addOption(minOccupancyOverlap)
.addOption(maxOccupancyOverlap)
.addOption(minOverlapPercent)
.addOption(maxOccupancyDifference)
.addOption(outputFile);
//options for output to System.out
// Option printErrorRate = Option.builder().longOpt("print-error")
// .desc("(Optional) Print the pairing error rate to stdout").build();
// Option printAttempt = Option.builder().longOpt("print-attempt")
// .desc("(Optional) Print the pairing attempt rate to stdout").build();
// Option printCorrect = Option.builder().longOpt("print-correct")
// .desc("(Optional) Print the number of correct pairs to stdout").build();
// Option printIncorrect = Option.builder().longOpt("print-incorrect")
// .desc("(Optional) Print the number of incorrect pairs to stdout").build();
Option printMetadata = Option.builder().longOpt("print-metadata")
.desc("(Optional) Print all metadata to stdout").build();
matchCDR3options
// .addOption(printErrorRate)
// .addOption(printAttempt)
// .addOption(printCorrect)
// .addOption(printIncorrect)
.addOption(printMetadata);
return matchCDR3options;
}
private static CellSample getCells(String cellFilename) {
assert cellFilename != null;
CellFileReader reader = new CellFileReader(cellFilename);
return reader.getCellSample();
}
private static Plate getPlate(String plateFilename) {
assert plateFilename != null;
PlateFileReader reader = new PlateFileReader(plateFilename);
return reader.getSamplePlate();
}
private static GraphWithMapData getGraph(String graphFilename) {
assert graphFilename != null;
try{
GraphDataObjectReader reader = new GraphDataObjectReader(graphFilename, false);
return reader.getData();
}
catch (IOException ex) {
ex.printStackTrace();
return null;
}
}
//for calling from command line
public static void makeCells(String filename, Integer numCells, Integer cdr1Freq) {
CellSample sample = new CellSample(numCells, cdr1Freq);
CellFileWriter writer = new CellFileWriter(filename, sample);
writer.writeCellsToFile();
}
}