From 5246cc4a0ccf9ba41acb6b0bff70edf47b5243e7 Mon Sep 17 00:00:00 2001 From: efischer Date: Sun, 27 Feb 2022 15:35:07 -0600 Subject: [PATCH] Re-implement command line options --- src/main/java/BiGpairSEQ.java | 4 +- src/main/java/CommandLineInterface.java | 526 ++++++++++++++---------- src/main/java/Plate.java | 4 +- 3 files changed, 307 insertions(+), 227 deletions(-) diff --git a/src/main/java/BiGpairSEQ.java b/src/main/java/BiGpairSEQ.java index 935888c..3e67a68 100644 --- a/src/main/java/BiGpairSEQ.java +++ b/src/main/java/BiGpairSEQ.java @@ -23,8 +23,8 @@ public class BiGpairSEQ { } else { //This will be uncommented when command line arguments are re-implemented. - //CommandLineInterface.startCLI(args); - System.out.println("Command line arguments are still being re-implemented."); + CommandLineInterface.startCLI(args); + //System.out.println("Command line arguments are still being re-implemented."); } } diff --git a/src/main/java/CommandLineInterface.java b/src/main/java/CommandLineInterface.java index 0c527b1..aadfb3a 100644 --- a/src/main/java/CommandLineInterface.java +++ b/src/main/java/CommandLineInterface.java @@ -1,5 +1,8 @@ import org.apache.commons.cli.*; +import java.io.IOException; +import java.util.stream.Stream; + /* * Class for parsing options passed to program from command line * @@ -29,6 +32,8 @@ import org.apache.commons.cli.*; * cellfile : name of the cell sample file to use as input * platefile : name of the sample plate file to use as input * output : name of the output file + * graphml : output a graphml file + * binary : output a serialized binary object file * * Match flags: * graphFile : name of graph and data file to use as input @@ -49,23 +54,156 @@ public class CommandLineInterface { //should be three separate programs. //There might be a way to do it with option parameters? - //main options set + //main options set - for the four different program modes + Options mainOptions = buildMainOptions(); + CommandLineParser parser = new DefaultParser(); + try{ + CommandLine line = parser.parse(mainOptions, args); + + if (line.hasOption("cells")) { + Options cellOptions = buildCellOptions(); + line = parser.parse(cellOptions, args); + Integer number = Integer.valueOf(line.getOptionValue("n")); + Integer diversity = Integer.valueOf(line.getOptionValue("d")); + String filename = line.getOptionValue("o"); + makeCells(filename, number, diversity); + } + + else if (line.hasOption("plate")) { + Options plateOptions = buildPlateOptions(); + line = parser.parse(plateOptions, args); + //get the cells + String cellFilename = line.getOptionValue("c"); + CellSample cells = getCells(cellFilename); + //get the rest of the parameters + Integer[] populations; + String outputFilename = line.getOptionValue("o"); + Integer numWells = Integer.parseInt(line.getOptionValue("w")); + Double dropoutRate = Double.parseDouble(line.getOptionValue("err")); + if (line.hasOption("random")) { + //Array holding values of minimum and maximum populations + Integer[] min_max = Stream.of(line.getOptionValues("random")) + .mapToInt(Integer::parseInt) + .boxed() + .toArray(Integer[]::new); + populations = BiGpairSEQ.getRand().ints(min_max[0], min_max[1] + 1) + .limit(numWells) + .boxed() + .toArray(Integer[]::new); + } + else if (line.hasOption("pop")) { + populations = Stream.of(line.getOptionValues("pop")) + .mapToInt(Integer::parseInt) + .boxed() + .toArray(Integer[]::new); + } + else{ + populations = new Integer[1]; + populations[0] = 1; + } + //make the plate + Plate plate; + if (line.hasOption("poisson")) { + Double stdDev = Math.sqrt(numWells); + plate = new Plate(cells, cellFilename, numWells, populations, dropoutRate, stdDev, false); + } + else if (line.hasOption("gaussian")) { + Double stdDev = Double.parseDouble(line.getOptionValue("stddev")); + plate = new Plate(cells, cellFilename, numWells, populations, dropoutRate, stdDev, false); + } + else { + assert line.hasOption("exponential"); + Double lambda = Double.parseDouble(line.getOptionValue("lambda")); + plate = new Plate(cells, cellFilename, numWells, populations, dropoutRate, lambda, true); + } + PlateFileWriter writer = new PlateFileWriter(outputFilename, plate); + writer.writePlateFile(); + } + + else if (line.hasOption("graph")) { //Making a graph + Options graphOptions = buildGraphOptions(); + line = parser.parse(graphOptions, args); + String cellFilename = line.getOptionValue("c"); + String plateFilename = line.getOptionValue("p"); + String outputFilename = line.getOptionValue("o"); + //get cells + CellSample cells = getCells(cellFilename); + //get plate + Plate plate = getPlate(plateFilename); + GraphWithMapData graph = Simulator.makeGraph(cells, plate, false); + if (!line.hasOption("no-binary")) { //output binary file unless told not to + GraphDataObjectWriter writer = new GraphDataObjectWriter(outputFilename, graph); + writer.writeDataToFile(); + } + if (line.hasOption("graphml")) { //if told to, output graphml file + GraphMLFileWriter gmlwriter = new GraphMLFileWriter(outputFilename, graph); + gmlwriter.writeGraphToFile(); + } + } + + else if (line.hasOption("match")) { //can add a flag for which match type in future, spit this in two + Options matchOptions = buildMatchCDR3options(); + line = parser.parse(matchOptions, args); + String graphFilename = line.getOptionValue("g"); + String outputFilename = line.getOptionValue("o"); + Integer minThreshold = Integer.parseInt(line.getOptionValue("min")); + Integer maxThreshold = Integer.parseInt(line.getOptionValue("max")); + Integer minOverlapPct; + if (line.hasOption("minpct")) { //see if this filter is being used + minOverlapPct = Integer.parseInt(line.getOptionValue("minpct")); + } + else { + minOverlapPct = 0; + } + Integer maxOccupancyDiff; + if (line.hasOption("maxdiff")) { //see if this filter is being used + maxOccupancyDiff = Integer.parseInt(line.getOptionValue("maxdiff")); + } + else { + maxOccupancyDiff = Integer.MAX_VALUE; + } + GraphWithMapData graph = getGraph(graphFilename); + MatchingResult result = Simulator.matchCDR3s(graph, graphFilename, minThreshold, maxThreshold, + maxOccupancyDiff, minOverlapPct, false); + MatchingFileWriter writer = new MatchingFileWriter(outputFilename, result); + writer.writeResultsToFile(); + //can put a bunch of ifs for outputting various things from the MatchingResult to System.out here + //after I put those flags in the matchOptions + } + } + catch (ParseException exp) { + System.err.println("Parsing failed. Reason: " + exp.getMessage()); + } + } + + private static Option outputFileOption() { + Option outputFile = Option.builder("o") + .longOpt("output-file") + .hasArg() + .argName("filename") + .desc("Name of output file") + .required() + .build(); + return outputFile; + } + + private static Options buildMainOptions() { Options mainOptions = new Options(); Option makeCells = Option.builder("cells") .longOpt("make-cells") - .desc("Makes a file of distinct cells") + .desc("Makes a cell sample file of distinct T cells") .build(); Option makePlate = Option.builder("plates") .longOpt("make-plates") - .desc("Makes a sample plate file") + .desc("Makes a sample plate file. Requires a cell sample file.") .build(); Option makeGraph = Option.builder("graph") .longOpt("make-graph") - .desc("Makes a graph and data file") + .desc("Makes a graph/data file. Requires a cell sample file and a sample plate file") .build(); Option matchCDR3 = Option.builder("match") .longOpt("match-cdr3") - .desc("Match CDR3s. Requires a cell sample file and any number of plate files.") + .desc("Matches CDR3s. Requires a graph/data file.") .build(); OptionGroup mainGroup = new OptionGroup(); mainGroup.addOption(makeCells); @@ -74,255 +212,197 @@ public class CommandLineInterface { mainGroup.addOption(matchCDR3); mainGroup.setRequired(true); mainOptions.addOptionGroup(mainGroup); + return mainOptions; + } - //Reuse clones of this for other options groups, rather than making it lots of times - Option outputFile = Option.builder("o") - .longOpt("output-file") - .hasArg() - .argName("filename") - .desc("Name of output file") - .build(); - mainOptions.addOption(outputFile); - - //Options cellOptions = new Options(); - Option numCells = Option.builder("nc") + private static Options buildCellOptions() { + Options cellOptions = new Options(); + Option numCells = Option.builder("n") .longOpt("num-cells") .desc("The number of distinct cells to generate") .hasArg() .argName("number") - .build(); - mainOptions.addOption(numCells); - Option cdr1Freq = Option.builder("d") - .longOpt("peptide-diversity-factor") + .required().build(); + Option cdr3Diversity = Option.builder("d") + .longOpt("diversity-factor") + .desc("The factor by which unique CDR3s outnumber unique CDR1s") .hasArg() - .argName("number") - .desc("Number of distinct CDR3s for every CDR1") - .build(); - mainOptions.addOption(cdr1Freq); - //Option cellOutput = (Option) outputFile.clone(); - //cellOutput.setRequired(true); - //mainOptions.addOption(cellOutput); + .argName("factor") + .required().build(); + cellOptions.addOption(numCells); + cellOptions.addOption(cdr3Diversity); + cellOptions.addOption(outputFileOption()); + return cellOptions; + } - //Options plateOptions = new Options(); - Option inputCells = Option.builder("c") + private static Options buildPlateOptions() { + Options plateOptions = new Options(); + Option cellFile = Option.builder("c") // add this to plate options .longOpt("cell-file") + .desc("The cell sample file to use") .hasArg() - .argName("file") - .desc("The cell sample file used for filling wells") - .build(); - mainOptions.addOption(inputCells); - Option numWells = Option.builder("w") - .longOpt("num-wells") + .required().build(); + Option numWells = Option.builder("w")// add this to plate options + .longOpt("wells") + .desc("The number of wells on the sample plate") .hasArg() - .argName("number") - .desc("The number of wells on each plate") + .required().build(); + //options group for choosing with distribution to use + OptionGroup distributions = new OptionGroup();// add this to plate options + distributions.setRequired(true); + Option poisson = Option.builder("poisson") + .desc("Use a Poisson distribution for cell sample") .build(); - mainOptions.addOption(numWells); - Option numPlates = Option.builder("np") - .longOpt("num-plates") + Option gaussian = Option.builder("gaussian") + .desc("Use a Gaussian distribution for cell sample") + .build(); + Option exponential = Option.builder("exponential") + .desc("Use an exponential distribution for cell sample") + .build(); + distributions.addOption(poisson); + distributions.addOption(gaussian); + distributions.addOption(exponential); + //options group for statistical distribution parameters + OptionGroup statParams = new OptionGroup();// add this to plate options + Option stdDev = Option.builder("stddev") + .desc("Standard deviation for Gaussian distribution") .hasArg() - .argName("number") - .desc("The number of plate files to output") .build(); - mainOptions.addOption(numPlates); - //Option plateOutput = (Option) outputFile.clone(); - //plateOutput.setRequired(true); - //plateOutput.setDescription("Prefix for plate output filenames"); - //mainOptions.addOption(plateOutput); - Option plateErr = Option.builder("err") - .longOpt("drop-out-rate") + Option lambda = Option.builder("lambda") + .desc("Lambda value for exponential distribution") .hasArg() - .argName("number") - .desc("Well drop-out rate. (Probability between 0 and 1)") .build(); - mainOptions.addOption(plateErr); - Option plateConcentrations = Option.builder("t") - .longOpt("t-cells-per-well") + statParams.addOption(stdDev); + statParams.addOption(lambda); + //Option group for random plate or set populations + OptionGroup wellPopOptions = new OptionGroup(); // add this to plate options + wellPopOptions.setRequired(true); + Option randomWellPopulations = Option.builder("random") + .desc("Randomize well populations on sample plate.") .hasArgs() - .argName("number 1, number 2, ...") - .desc("Number of T cells per well for each plate section") + .argName("MIN_POP MAX_POP") .build(); - mainOptions.addOption(plateConcentrations); - -//different distributions, mutually exclusive - OptionGroup plateDistributions = new OptionGroup(); - Option plateExp = Option.builder("exponential") - .desc("Sample from distinct cells with exponential frequency distribution") - .build(); - plateDistributions.addOption(plateExp); - Option plateGaussian = Option.builder("gaussian") - .desc("Sample from distinct cells with gaussain frequency distribution") - .build(); - plateDistributions.addOption(plateGaussian); - Option platePoisson = Option.builder("poisson") - .desc("Sample from distinct cells with poisson frequency distribution") - .build(); - plateDistributions.addOption(platePoisson); - mainOptions.addOptionGroup(plateDistributions); - - Option plateStdDev = Option.builder("stddev") - .desc("Standard deviation for gaussian distribution") - .hasArg() - .argName("number") - .build(); - mainOptions.addOption(plateStdDev); - - Option plateLambda = Option.builder("lambda") - .desc("Lambda for exponential distribution") - .hasArg() - .argName("number") - .build(); - mainOptions.addOption(plateLambda); - - - -// -// String cellFile, String filename, Double stdDev, -// Integer numWells, Integer numSections, -// Integer[] concentrations, Double dropOutRate -// - - //Options matchOptions = new Options(); - inputCells.setDescription("The cell sample file to be used for matching."); - mainOptions.addOption(inputCells); - Option lowThresh = Option.builder("low") - .longOpt("low-threshold") - .hasArg() - .argName("number") - .desc("Sets the minimum occupancy overlap to attempt matching") - .build(); - mainOptions.addOption(lowThresh); - Option highThresh = Option.builder("high") - .longOpt("high-threshold") - .hasArg() - .argName("number") - .desc("Sets the maximum occupancy overlap to attempt matching") - .build(); - mainOptions.addOption(highThresh); - Option occDiff = Option.builder("occdiff") - .longOpt("occupancy-difference") - .hasArg() - .argName("Number") - .desc("Maximum difference in alpha/beta occupancy to attempt matching") - .build(); - mainOptions.addOption(occDiff); - Option overlapPer = Option.builder("ovper") - .longOpt("overlap-percent") - .hasArg() - .argName("Percent") - .desc("Minimum overlap percent to attempt matching (0 -100)") - .build(); - mainOptions.addOption(overlapPer); - Option inputPlates = Option.builder("p") - .longOpt("plate-files") + Option specificWellPopulations = Option.builder("pop") + .longOpt("populations") + .desc("The well populations for each section of the sample plate") .hasArgs() - .desc("Plate files to match") + .argName("SECTION_1_POP [SECTION_2_POP] [SECTION_3_POP] ...") .build(); - mainOptions.addOption(inputPlates); + Option dropoutRate = Option.builder("err") //add this to plate options + .longOpt("dropout-rate") + .hasArg() + .desc("The sequence dropout rate due to amplification error") + .argName("DROPOUT_RATE (value between 0.0 and 1.0)") + .required() + .build(); + wellPopOptions.addOption(randomWellPopulations); + wellPopOptions.addOption(specificWellPopulations); + plateOptions.addOption(cellFile); + plateOptions.addOption(numWells); + plateOptions.addOptionGroup(distributions); + plateOptions.addOptionGroup(statParams); + plateOptions.addOptionGroup(wellPopOptions); + plateOptions.addOption(dropoutRate); + plateOptions.addOption(outputFileOption()); + return plateOptions; + } + + private static Options buildGraphOptions() { + Options graphOptions = new Options(); + Option cellFilename = Option.builder("c") + .longOpt("cell-file") + .desc("Cell sample file to use for checking accuracy") + .hasArg() + .argName("CELL_FILENAME") + .required().build(); + Option plateFilename = Option.builder("p") + .longOpt("plate-filename") + .desc("Sample plate file (made from given cell sample file) to construct graph from") + .hasArg() + .argName("PLATE_FILENAME") + .required().build(); + Option outputGraphML = Option.builder("graphml") + .desc("Output GraphML file") + .build(); + Option outputSerializedBinary = Option.builder("no-binary") + .desc("Don't output serialized binary file") + .build(); + graphOptions.addOption(cellFilename); + graphOptions.addOption(plateFilename); + graphOptions.addOption(outputFileOption()); + return graphOptions; + } + + private static Options buildMatchCDR3options() { + Options matchCDR3options = new Options(); + Option graphFilename = Option.builder("g") + .longOpt("graph-file") + .desc("Graph/data file to use") + .hasArg() + .argName("GRAPH/DATA_FILENAME") + .required().build(); + Option minOccupancyOverlap = Option.builder("min") + .longOpt("min-overlap-size") + .desc("The minimum number of shared wells to attempt to match a sequence pair") + .hasArg() + .argName("MIN_OVERLAP") + .required().build(); + Option maxOccupancyOverlap = Option.builder("max") + .longOpt("max_overlap_size") + .desc("The maximum number of shared wells to attempt to match a sequence pair") + .hasArg() + .argName("MAX_OVERLAP") + .required().build(); + Option minOverlapPercent = Option.builder("minpct") + .longOpt("min-overlap-percent") + .desc("The minimum percentage of a sequence's total occupancy shared by another sequence to attempt matching") + .hasArg() + .build(); + Option maxOccupancyDifference = Option.builder("maxdiff") + .longOpt("max-occupancy-difference") + .desc("The maximum difference in total occupancy between two sequences to attempt matching") + .hasArg() + .build(); + matchCDR3options.addOption(graphFilename); + matchCDR3options.addOption(minOccupancyOverlap); + matchCDR3options.addOption(maxOccupancyOverlap); + matchCDR3options.addOption(minOverlapPercent); + matchCDR3options.addOption(maxOccupancyDifference); + matchCDR3options.addOption(outputFileOption()); + return matchCDR3options; + } - CommandLineParser parser = new DefaultParser(); - try { - CommandLine line = parser.parse(mainOptions, args); - if(line.hasOption("match")){ - //line = parser.parse(mainOptions, args); - //String cellFile = line.getOptionValue("c"); - String graphFile = line.getOptionValue("g"); - Integer lowThreshold = Integer.valueOf(line.getOptionValue(lowThresh)); - Integer highThreshold = Integer.valueOf(line.getOptionValue(highThresh)); - Integer occupancyDifference = Integer.valueOf(line.getOptionValue(occDiff)); - Integer overlapPercent = Integer.valueOf(line.getOptionValue(overlapPer)); - for(String plate: line.getOptionValues("p")) { - matchCDR3s(graphFile, lowThreshold, highThreshold, occupancyDifference, overlapPercent); - } - } - else if(line.hasOption("cells")){ - //line = parser.parse(mainOptions, args); - String filename = line.getOptionValue("o"); - Integer numDistCells = Integer.valueOf(line.getOptionValue("nc")); - Integer freq = Integer.valueOf(line.getOptionValue("d")); - makeCells(filename, numDistCells, freq); - } - else if(line.hasOption("plates")){ - //line = parser.parse(mainOptions, args); - String cellFile = line.getOptionValue("c"); - String filenamePrefix = line.getOptionValue("o"); - Integer numWellsOnPlate = Integer.valueOf(line.getOptionValue("w")); - Integer numPlatesToMake = Integer.valueOf(line.getOptionValue("np")); - String[] concentrationsToUseString = line.getOptionValues("t"); - Integer numSections = concentrationsToUseString.length; + private static CellSample getCells(String cellFilename) { + assert cellFilename != null; + CellFileReader reader = new CellFileReader(cellFilename); + return reader.getCellSample(); + } - Integer[] concentrationsToUse = new Integer[numSections]; - for(int i = 0; i