import org.apache.commons.cli.*; import java.io.IOException; import java.util.List; import java.util.Scanner; import java.util.InputMismatchException; import java.util.regex.Matcher; import java.util.regex.Pattern; // public class UserInterface { final static Scanner sc = new Scanner(System.in); static int input; static boolean quit = false; public static void main(String[] args) { //for now, commenting out all the command line argument stuff. // Refactoring to output files of graphs, so it would all need to change anyway. // if(args.length != 0){ // //These command line options are a big mess // //Really, I don't think command line tools are expected to work in this many different modes // //making cells, making plates, and matching are the sort of thing that UNIX philosophy would say // //should be three separate programs. // //There might be a way to do it with option parameters? // // Options mainOptions = new Options(); // Option makeCells = Option.builder("cells") // .longOpt("make-cells") // .desc("Makes a file of distinct cells") // .build(); // Option makePlate = Option.builder("plates") // .longOpt("make-plates") // .desc("Makes a sample plate file") // .build(); // Option matchCDR3 = Option.builder("match") // .longOpt("match-cdr3") // .desc("Match CDR3s. Requires a cell sample file and any number of plate files.") // .build(); // OptionGroup mainGroup = new OptionGroup(); // mainGroup.addOption(makeCells); // mainGroup.addOption(makePlate); // mainGroup.addOption(matchCDR3); // mainGroup.setRequired(true); // mainOptions.addOptionGroup(mainGroup); // // //Reuse clones of this for other options groups, rather than making it lots of times // Option outputFile = Option.builder("o") // .longOpt("output-file") // .hasArg() // .argName("filename") // .desc("Name of output file") // .build(); // mainOptions.addOption(outputFile); // // //Options cellOptions = new Options(); // Option numCells = Option.builder("nc") // .longOpt("num-cells") // .desc("The number of distinct cells to generate") // .hasArg() // .argName("number") // .build(); // mainOptions.addOption(numCells); // Option cdr1Freq = Option.builder("d") // .longOpt("peptide-diversity-factor") // .hasArg() // .argName("number") // .desc("Number of distinct CDR3s for every CDR1") // .build(); // mainOptions.addOption(cdr1Freq); // //Option cellOutput = (Option) outputFile.clone(); // //cellOutput.setRequired(true); // //mainOptions.addOption(cellOutput); // // //Options plateOptions = new Options(); // Option inputCells = Option.builder("c") // .longOpt("cell-file") // .hasArg() // .argName("file") // .desc("The cell sample file used for filling wells") // .build(); // mainOptions.addOption(inputCells); // Option numWells = Option.builder("w") // .longOpt("num-wells") // .hasArg() // .argName("number") // .desc("The number of wells on each plate") // .build(); // mainOptions.addOption(numWells); // Option numPlates = Option.builder("np") // .longOpt("num-plates") // .hasArg() // .argName("number") // .desc("The number of plate files to output") // .build(); // mainOptions.addOption(numPlates); // //Option plateOutput = (Option) outputFile.clone(); // //plateOutput.setRequired(true); // //plateOutput.setDescription("Prefix for plate output filenames"); // //mainOptions.addOption(plateOutput); // Option plateErr = Option.builder("err") // .longOpt("drop-out-rate") // .hasArg() // .argName("number") // .desc("Well drop-out rate. (Probability between 0 and 1)") // .build(); // mainOptions.addOption(plateErr); // Option plateConcentrations = Option.builder("t") // .longOpt("t-cells-per-well") // .hasArgs() // .argName("number 1, number 2, ...") // .desc("Number of T cells per well for each plate section") // .build(); // mainOptions.addOption(plateConcentrations); // ////different distributions, mutually exclusive // OptionGroup plateDistributions = new OptionGroup(); // Option plateExp = Option.builder("exponential") // .desc("Sample from distinct cells with exponential frequency distribution") // .build(); // plateDistributions.addOption(plateExp); // Option plateGaussian = Option.builder("gaussian") // .desc("Sample from distinct cells with gaussain frequency distribution") // .build(); // plateDistributions.addOption(plateGaussian); // Option platePoisson = Option.builder("poisson") // .desc("Sample from distinct cells with poisson frequency distribution") // .build(); // plateDistributions.addOption(platePoisson); // mainOptions.addOptionGroup(plateDistributions); // // Option plateStdDev = Option.builder("stddev") // .desc("Standard deviation for gaussian distribution") // .hasArg() // .argName("number") // .build(); // mainOptions.addOption(plateStdDev); // // Option plateLambda = Option.builder("lambda") // .desc("Lambda for exponential distribution") // .hasArg() // .argName("number") // .build(); // mainOptions.addOption(plateLambda); // // // //// //// String cellFile, String filename, Double stdDev, //// Integer numWells, Integer numSections, //// Integer[] concentrations, Double dropOutRate //// // // //Options matchOptions = new Options(); // inputCells.setDescription("The cell sample file to be used for matching."); // mainOptions.addOption(inputCells); // Option lowThresh = Option.builder("low") // .longOpt("low-threshold") // .hasArg() // .argName("number") // .desc("Sets the minimum occupancy overlap to attempt matching") // .build(); // mainOptions.addOption(lowThresh); // Option highThresh = Option.builder("high") // .longOpt("high-threshold") // .hasArg() // .argName("number") // .desc("Sets the maximum occupancy overlap to attempt matching") // .build(); // mainOptions.addOption(highThresh); // Option occDiff = Option.builder("occdiff") // .longOpt("occupancy-difference") // .hasArg() // .argName("Number") // .desc("Maximum difference in alpha/beta occupancy to attempt matching") // .build(); // mainOptions.addOption(occDiff); // Option overlapPer = Option.builder("ovper") // .longOpt("overlap-percent") // .hasArg() // .argName("Percent") // .desc("Minimum overlap percent to attempt matching (0 -100)") // .build(); // mainOptions.addOption(overlapPer); // Option inputPlates = Option.builder("p") // .longOpt("plate-files") // .hasArgs() // .desc("Plate files to match") // .build(); // mainOptions.addOption(inputPlates); // // // // CommandLineParser parser = new DefaultParser(); // try { // CommandLine line = parser.parse(mainOptions, args); // if(line.hasOption("match")){ // //line = parser.parse(mainOptions, args); // String cellFile = line.getOptionValue("c"); // Integer lowThreshold = Integer.valueOf(line.getOptionValue(lowThresh)); // Integer highThreshold = Integer.valueOf(line.getOptionValue(highThresh)); // Integer occupancyDifference = Integer.valueOf(line.getOptionValue(occDiff)); // Integer overlapPercent = Integer.valueOf(line.getOptionValue(overlapPer)); // for(String plate: line.getOptionValues("p")) { // matchCDR3s(cellFile, plate, lowThreshold, highThreshold, occupancyDifference, overlapPercent); // } // } // else if(line.hasOption("cells")){ // //line = parser.parse(mainOptions, args); // String filename = line.getOptionValue("o"); // Integer numDistCells = Integer.valueOf(line.getOptionValue("nc")); // Integer freq = Integer.valueOf(line.getOptionValue("d")); // makeCells(filename, numDistCells, freq); // } // else if(line.hasOption("plates")){ // //line = parser.parse(mainOptions, args); // String cellFile = line.getOptionValue("c"); // String filenamePrefix = line.getOptionValue("o"); // Integer numWellsOnPlate = Integer.valueOf(line.getOptionValue("w")); // Integer numPlatesToMake = Integer.valueOf(line.getOptionValue("np")); // String[] concentrationsToUseString = line.getOptionValues("t"); // Integer numSections = concentrationsToUseString.length; // // Integer[] concentrationsToUse = new Integer[numSections]; // for(int i = 0; i makeCells(); case 2 -> makePlate(); case 3 -> makeCDR3Graph(); case 4 -> matchCDR3s(); //case 6 -> matchCellsCDR1(); case 9 -> acknowledge(); case 0 -> quit = true; default -> throw new InputMismatchException("Invalid input."); } } catch (InputMismatchException | IOException ex) { System.out.println(ex); sc.next(); } } sc.close(); // } } private static void makeCells() { String filename = null; Integer numCells = 0; Integer cdr1Freq = 1; try { System.out.println("\nSimulated T-Cells consist of integer values representing:\n" + "* a pair of alpha and beta CDR3 peptides (unique within simulated population)\n" + "* a pair of alpha and beta CDR1 peptides (not necessarily unique)."); System.out.println("\nThe cells will be written to a CSV file."); System.out.print("Please enter a file name: "); filename = sc.next(); System.out.println("\nCDR3 sequences are more diverse than CDR1 sequences."); System.out.println("Please enter the factor by which distinct CDR3s outnumber CDR1s: "); cdr1Freq = sc.nextInt(); System.out.print("\nPlease enter the number of T-cells to generate: "); numCells = sc.nextInt(); if(numCells <= 0){ throw new InputMismatchException("Number of cells must be a positive integer."); } } catch (InputMismatchException ex) { System.out.println(ex); sc.next(); } CellSample sample = Simulator.generateCellSample(numCells, cdr1Freq); assert filename != null; CellFileWriter writer = new CellFileWriter(filename, sample); writer.writeCellsToFile(); System.gc(); } // //for calling from command line // private static void makeCells(String filename, Integer numCells, Integer cdr1Freq){ // CellSample sample = Simulator.generateCellSample(numCells, cdr1Freq); // CellFileWriter writer = new CellFileWriter(filename, sample); // writer.writeCellsToFile(); // } // // private static void makePlateExp(String cellFile, String filename, Double lambda, // Integer numWells, Integer[] concentrations, Double dropOutRate){ // CellFileReader cellReader = new CellFileReader(cellFile); // Plate samplePlate = new Plate(numWells, dropOutRate, concentrations); // samplePlate.fillWellsExponential(cellReader.getFilename(), cellReader.getCells(), lambda); // PlateFileWriter writer = new PlateFileWriter(filename, samplePlate); // writer.writePlateFile(); // } // // private static void makePlatePoisson(String cellFile, String filename, Integer numWells, // Integer[] concentrations, Double dropOutRate){ // CellFileReader cellReader = new CellFileReader(cellFile); // Double stdDev = Math.sqrt(cellReader.getCellCount()); // Plate samplePlate = new Plate(numWells, dropOutRate, concentrations); // samplePlate.fillWells(cellReader.getFilename(), cellReader.getCells(), stdDev); // PlateFileWriter writer = new PlateFileWriter(filename, samplePlate); // writer.writePlateFile(); // } // // private static void makePlate(String cellFile, String filename, Double stdDev, // Integer numWells, Integer[] concentrations, Double dropOutRate){ // CellFileReader cellReader = new CellFileReader(cellFile); // Plate samplePlate = new Plate(numWells, dropOutRate, concentrations); // samplePlate.fillWells(cellReader.getFilename(), cellReader.getCells(), stdDev); // PlateFileWriter writer = new PlateFileWriter(filename, samplePlate); // writer.writePlateFile(); // } //Output a CSV of sample plate private static void makePlate() { String cellFile = null; String filename = null; Double stdDev = 0.0; Integer numWells = 0; Integer numSections; Integer[] concentrations = {1}; Double dropOutRate = 0.0; boolean poisson = false; boolean exponential = false; double lambda = 1.5; try { System.out.println("\nSimulated sample plates consist of:"); System.out.println("* a number of wells"); System.out.println(" * separated into one or more sections"); System.out.println(" * each of which has a set quantity of cells per well"); System.out.println(" * selected from a statistical distribution of distinct cells"); System.out.println(" * with a set dropout rate for individual sequences within a cell"); System.out.println("\nMaking a sample plate requires a population of distinct cells"); System.out.print("Please enter name of an existing cell sample file: "); cellFile = sc.next(); System.out.println("\nThe sample plate will be written to a CSV file"); System.out.print("Please enter a name for the output file: "); filename = sc.next(); System.out.println("\nSelect T-cell frequency distribution function"); System.out.println("1) Poisson"); System.out.println("2) Gaussian"); System.out.println("3) Exponential"); System.out.println("(Note: approximate distribution in original paper is exponential, lambda = 0.6)"); System.out.println("(lambda value approximated from slope of log-log graph in figure 4c)"); System.out.println("(Note: wider distributions are more memory intensive to match)"); System.out.print("Enter selection value: "); input = sc.nextInt(); switch (input) { case 1 -> poisson = true; case 2 -> { System.out.println("How many distinct T-cells within one standard deviation of peak frequency?"); System.out.println("(Note: wider distributions are more memory intensive to match)"); stdDev = sc.nextDouble(); if (stdDev <= 0.0) { throw new InputMismatchException("Value must be positive."); } } case 3 -> { exponential = true; System.out.println("Please enter lambda value for exponential distribution."); lambda = sc.nextDouble(); if (lambda <= 0.0) { throw new InputMismatchException("Value must be positive."); } } default -> { System.out.println("Invalid input. Defaulting to exponential."); exponential = true; } } System.out.print("\nNumber of wells on plate: "); numWells = sc.nextInt(); if(numWells < 1){ throw new InputMismatchException("No wells on plate"); } System.out.println("\nThe plate can be evenly sectioned to allow multiple concentrations of T-cells/well"); System.out.println("How many sections would you like to make (minimum 1)?"); numSections = sc.nextInt(); if(numSections < 1) { throw new InputMismatchException("Too few sections."); } else if (numSections > numWells) { throw new InputMismatchException("Cannot have more sections than wells."); } int i = 1; concentrations = new Integer[numSections]; while(numSections > 0) { System.out.print("Enter number of T-cells per well in section " + i +": "); concentrations[i - 1] = sc.nextInt(); i++; numSections--; } System.out.println("\nErrors in amplification can induce a well dropout rate for sequences"); System.out.print("Enter well dropout rate (0.0 to 1.0): "); dropOutRate = sc.nextDouble(); if(dropOutRate < 0.0 || dropOutRate > 1.0) { throw new InputMismatchException("The well dropout rate must be in the range [0.0, 1.0]"); } }catch(InputMismatchException ex){ System.out.println(ex); sc.next(); } System.out.println("Reading Cell Sample file: " + cellFile); assert cellFile != null; CellFileReader cellReader = new CellFileReader(cellFile); if(exponential){ Plate samplePlate = new Plate(numWells, dropOutRate, concentrations); samplePlate.fillWellsExponential(cellReader.getFilename(), cellReader.getCells(), lambda); PlateFileWriter writer = new PlateFileWriter(filename, samplePlate); writer.writePlateFile(); } else { if (poisson) { stdDev = Math.sqrt(cellReader.getCellCount()); //gaussian with square root of elements approximates poisson } Plate samplePlate = new Plate(numWells, dropOutRate, concentrations); samplePlate.fillWells(cellReader.getFilename(), cellReader.getCells(), stdDev); assert filename != null; PlateFileWriter writer = new PlateFileWriter(filename, samplePlate); System.out.println("Writing Sample Plate to file"); writer.writePlateFile(); System.out.println("Sample Plate written to file: " + filename); System.gc(); } } //Output serialized binary of GraphAndMapData object private static void makeCDR3Graph() { String filename = null; String cellFile = null; String plateFile = null; try { String str = "\nGenerating bipartite weighted graph encoding occupancy overlap data "; str = str.concat("\nrequires a cell sample file and a sample plate file."); System.out.println(str); System.out.print("\nPlease enter name of an existing cell sample file: "); cellFile = sc.next(); System.out.print("\nPlease enter name of an existing sample plate file: "); plateFile = sc.next(); System.out.println("\nThe graph and occupancy data will be written to a serialized binary file."); System.out.print("Please enter a name for the output file: "); filename = sc.next(); } catch (InputMismatchException ex) { System.out.println(ex); sc.next(); } System.out.println("Reading Cell Sample file: " + cellFile); assert cellFile != null; CellFileReader cellReader = new CellFileReader(cellFile); System.out.println("Reading Sample Plate file: " + plateFile); assert plateFile != null; PlateFileReader plateReader = new PlateFileReader(plateFile); Plate plate = new Plate(plateReader.getFilename(), plateReader.getWells()); if (cellReader.getCells().size() == 0){ System.out.println("No cell sample found."); System.out.println("Returning to main menu."); } else if(plate.getWells().size() == 0 || plate.getConcentrations().length == 0){ System.out.println("No sample plate found."); System.out.println("Returning to main menu."); } else{ List cells = cellReader.getCells(); GraphWithMapData data = Simulator.makeGraph(cells, plate, true); assert filename != null; GraphDataObjectWriter dataWriter = new GraphDataObjectWriter(filename, data); System.out.println("Writing graph and occupancy data to file. This may take some time."); System.out.println("File I/O time is not included in results."); dataWriter.writeDataToFile(); System.out.println("Graph and Data file written to: " + filename); System.gc(); } } //Simulate matching and output CSV file of results private static void matchCDR3s() throws IOException { String filename = null; String dataFilename = null; Integer lowThreshold = 0; Integer highThreshold = Integer.MAX_VALUE; Integer maxOccupancyDiff = Integer.MAX_VALUE; Integer minOverlapPercent = 0; try { System.out.println("\nBiGpairSEQ simulation requires an occupancy data and overlap graph file"); System.out.println("Please enter name of an existing graph and occupancy data file: "); dataFilename = sc.next(); System.out.println("The matching results will be written to a file."); System.out.print("Please enter a name for the output file: "); filename = sc.next(); System.out.println("\nWhat is the minimum number of CDR3 alpha/beta overlap wells to attempt matching?"); lowThreshold = sc.nextInt(); if(lowThreshold < 1){ throw new InputMismatchException("Minimum value for low threshold set to 1"); } System.out.println("\nWhat is the maximum number of CDR3 alpha/beta overlap wells to attempt matching?"); highThreshold = sc.nextInt(); System.out.println("\nWhat is the maximum difference in alpha/beta occupancy to attempt matching?"); maxOccupancyDiff = sc.nextInt(); System.out.println("\nWell overlap percentage = pair overlap / sequence occupancy"); System.out.println("What is the minimum well overlap percentage to attempt matching? (0 to 100)"); minOverlapPercent = sc.nextInt(); if (minOverlapPercent < 0 || minOverlapPercent > 100) { throw new InputMismatchException("Value outside range. Minimum percent set to 0"); } } catch (InputMismatchException ex) { System.out.println(ex); sc.next(); } //read object data from file System.out.println("Reading graph data from file. This may take some time"); System.out.println("File I/O time is not included in results"); assert dataFilename != null; GraphDataObjectReader dataReader = new GraphDataObjectReader(dataFilename); GraphWithMapData data = dataReader.getData(); //set source file name data.setSourceFilename(dataFilename); //simulate matching MatchingResult results = Simulator.matchCDR3s(data, dataFilename, lowThreshold, highThreshold, maxOccupancyDiff, minOverlapPercent, true); //write results to file assert filename != null; MatchingFileWriter writer = new MatchingFileWriter(filename, results); System.out.println("Writing results to file"); writer.writeResultsToFile(); System.out.println("Results written to file: " + filename); System.gc(); } /////// //Rewrite this to fit new matchCDR3 method with file I/O /////// // public static void matchCellsCDR1(){ // /* // The idea here is that we'll get the CDR3 alpha/beta matches first. Then we'll try to match CDR3s to CDR1s by // looking at the top two matches for each CDR3. If CDR3s in the same cell simply swap CDR1s, we assume a correct // match // */ // String filename = null; // String preliminaryResultsFilename = null; // String cellFile = null; // String plateFile = null; // Integer lowThresholdCDR3 = 0; // Integer highThresholdCDR3 = Integer.MAX_VALUE; // Integer maxOccupancyDiffCDR3 = 96; //no filtering if max difference is all wells by default // Integer minOverlapPercentCDR3 = 0; //no filtering if min percentage is zero by default // Integer lowThresholdCDR1 = 0; // Integer highThresholdCDR1 = Integer.MAX_VALUE; // boolean outputCDR3Matches = false; // try { // System.out.println("\nSimulated experiment requires a cell sample file and a sample plate file."); // System.out.print("Please enter name of an existing cell sample file: "); // cellFile = sc.next(); // System.out.print("Please enter name of an existing sample plate file: "); // plateFile = sc.next(); // System.out.println("The matching results will be written to a file."); // System.out.print("Please enter a name for the output file: "); // filename = sc.next(); // System.out.println("What is the minimum number of CDR3 alpha/beta overlap wells to attempt matching?"); // lowThresholdCDR3 = sc.nextInt(); // if(lowThresholdCDR3 < 1){ // throw new InputMismatchException("Minimum value for low threshold is 1"); // } // System.out.println("What is the maximum number of CDR3 alpha/beta overlap wells to attempt matching?"); // highThresholdCDR3 = sc.nextInt(); // System.out.println("What is the maximum difference in CDR3 alpha/beta occupancy to attempt matching?"); // maxOccupancyDiffCDR3 = sc.nextInt(); // System.out.println("What is the minimum CDR3 overlap percentage to attempt matching? (0 - 100)"); // minOverlapPercentCDR3 = sc.nextInt(); // if (minOverlapPercentCDR3 < 0 || minOverlapPercentCDR3 > 100) { // throw new InputMismatchException("Value outside range. Minimum percent set to 0"); // } // System.out.println("What is the minimum number of CDR3/CDR1 overlap wells to attempt matching?"); // lowThresholdCDR1 = sc.nextInt(); // if(lowThresholdCDR1 < 1){ // throw new InputMismatchException("Minimum value for low threshold is 1"); // } // System.out.println("What is the maximum number of CDR3/CDR1 overlap wells to attempt matching?"); // highThresholdCDR1 = sc.nextInt(); // System.out.println("Matching CDR3s to CDR1s requires first matching CDR3 alpha/betas."); // System.out.println("Output a file for CDR3 alpha/beta match results as well?"); // System.out.print("Please enter y/n: "); // String ans = sc.next(); // Pattern pattern = Pattern.compile("(?:yes|y)", Pattern.CASE_INSENSITIVE); // Matcher matcher = pattern.matcher(ans); // if(matcher.matches()){ // outputCDR3Matches = true; // System.out.println("Please enter filename for CDR3 alpha/beta match results"); // preliminaryResultsFilename = sc.next(); // System.out.println("CDR3 alpha/beta matches will be output to file"); // } // else{ // System.out.println("CDR3 alpha/beta matches will not be output to file"); // } // } catch (InputMismatchException ex) { // System.out.println(ex); // sc.next(); // } // CellFileReader cellReader = new CellFileReader(cellFile); // PlateFileReader plateReader = new PlateFileReader(plateFile); // Plate plate = new Plate(plateReader.getFilename(), plateReader.getWells()); // if (cellReader.getCells().size() == 0){ // System.out.println("No cell sample found."); // System.out.println("Returning to main menu."); // } // else if(plate.getWells().size() == 0){ // System.out.println("No sample plate found."); // System.out.println("Returning to main menu."); // // } // else{ // if(highThresholdCDR3 >= plate.getSize()){ // highThresholdCDR3 = plate.getSize() - 1; // } // if(highThresholdCDR1 >= plate.getSize()){ // highThresholdCDR1 = plate.getSize() - 1; // } // List cells = cellReader.getCells(); // MatchingResult preliminaryResults = Simulator.matchCDR3s(cells, plate, lowThresholdCDR3, highThresholdCDR3, // maxOccupancyDiffCDR3, minOverlapPercentCDR3, true); // MatchingResult[] results = Simulator.matchCDR1s(cells, plate, lowThresholdCDR1, // highThresholdCDR1, preliminaryResults); // MatchingFileWriter writer = new MatchingFileWriter(filename + "_FirstPass", results[0]); // writer.writeResultsToFile(); // writer = new MatchingFileWriter(filename + "_SecondPass", results[1]); // writer.writeResultsToFile(); // if(outputCDR3Matches){ // writer = new MatchingFileWriter(preliminaryResultsFilename, preliminaryResults); // writer.writeResultsToFile(); // } // } // } private static void acknowledge(){ System.out.println("This program simulates BiGpairSEQ, a graph theory based adaptation"); System.out.println("of the pairSEQ algorithm for pairing T cell receptor sequences."); System.out.println(); System.out.println("For full documentation, view readme.md file distributed with this code"); System.out.println("or visit https://gitea.ejsf.synology.me/efischer/BiGpairSEQ."); System.out.println(); System.out.println("pairSEQ citation:"); System.out.println("Howie, B., Sherwood, A. M., et. al."); System.out.println("High-throughput pairing of T cell receptor alpha and beta sequences."); System.out.println("Sci. Transl. Med. 7, 301ra131 (2015)"); System.out.println(); System.out.println("BiGpairSEQ_Sim by Eugene Fischer, 2021-2022"); } }