diff --git a/out/artifacts/TCellSim_jar/TCellSim.jar b/out/artifacts/TCellSim_jar/TCellSim.jar index b16781f..a397aa9 100644 Binary files a/out/artifacts/TCellSim_jar/TCellSim.jar and b/out/artifacts/TCellSim_jar/TCellSim.jar differ diff --git a/src/main/java/Equations.java b/src/main/java/Equations.java index 471656d..45652b2 100644 --- a/src/main/java/Equations.java +++ b/src/main/java/Equations.java @@ -4,6 +4,10 @@ import java.math.MathContext; public abstract class Equations { + public static int getRandomNumber(int min, int max) { + return (int) ((Math.random() * (max - min)) + min); + } + public static double pValue(Integer w, Integer w_a, Integer w_b, double w_ab_d) { int w_ab = (int) w_ab_d; double pv = 0.0; diff --git a/src/main/java/Plate.java b/src/main/java/Plate.java index c989c25..32d6ceb 100644 --- a/src/main/java/Plate.java +++ b/src/main/java/Plate.java @@ -1,6 +1,7 @@ import java.util.*; /* +TODO: Implement exponential distribution using inversion method - DONE TODO: Implement discrete frequency distributions using Vose's Alias Method */ @@ -12,13 +13,15 @@ public class Plate { private double error; private Integer[] concentrations; private double stdDev; + private double lambda; + boolean exponential = false; - public Plate (int size, double error, Integer[] concentrations, double stdDev) { + public Plate (int size, double error, Integer[] concentrations) { this.size = size; this.error = error; this.concentrations = concentrations; - this.stdDev = stdDev; + //this.stdDev = stdDev; wells = new ArrayList<>(); } @@ -28,7 +31,47 @@ public class Plate { this.size = wells.size(); } - public void fillWells(String sourceFileName, List cells) { + public void fillWellsExponential(String sourceFileName, List cells, double lambda){ + this.lambda = lambda; + exponential = true; + sourceFile = sourceFileName; + int numSections = concentrations.length; + int section = 0; + double m; + int n; + int test=0; + while (section < numSections){ + for (int i = 0; i < (size / numSections); i++) { + List well = new ArrayList<>(); + for (int j = 0; j < concentrations[section]; j++) { + do { + m = (Math.log10((1 - rand.nextDouble()))/(-lambda)) * Math.sqrt(cells.size()); + } while (m >= cells.size() || m < 0); + n = (int) Math.floor(m); + //n = Equations.getRandomNumber(0, cells.size()); + // was testing generating the cell sample file with exponential dist, then sampling flat here + //that would be more realistic + //But would mess up my + if(n > test){ + test = n; + } + Integer[] cellToAdd = cells.get(n).clone(); + for(int k = 0; k < cellToAdd.length; k++){ + if(Math.abs(rand.nextDouble()) < error){//error applied to each peptide + cellToAdd[k] = -1; + } + } + well.add(cellToAdd); + } + wells.add(well); + } + section++; + } + System.out.println("Highest index: " +test); + } + + public void fillWells(String sourceFileName, List cells, double stdDev) { + this.stdDev = stdDev; sourceFile = sourceFileName; int numSections = concentrations.length; int section = 0; @@ -68,6 +111,10 @@ public class Plate { return stdDev; } + public boolean isExponential(){return exponential;} + + public double getLambda(){return lambda;} + public double getError() { return error; } diff --git a/src/main/java/PlateFileWriter.java b/src/main/java/PlateFileWriter.java index f96c07c..e02bd51 100644 --- a/src/main/java/PlateFileWriter.java +++ b/src/main/java/PlateFileWriter.java @@ -13,11 +13,13 @@ public class PlateFileWriter { private int size; private List> wells; private double stdDev; + private double lambda; private Double error; private String filename; private String sourceFileName; private String[] headers; private List concentrations; + private boolean isExponential = false; public PlateFileWriter(String filename, Plate plate) { if(!filename.matches(".*\\.csv")){ @@ -26,7 +28,13 @@ public class PlateFileWriter { this.filename = filename; this.sourceFileName = plate.getSourceFileName(); this.size = plate.getSize(); - this.stdDev = plate.getStdDev(); + this.isExponential = plate.isExponential(); + if(isExponential) { + this.lambda = plate.getLambda(); + } + else{ + this.stdDev = plate.getStdDev(); + } this.error = plate.getError(); this.wells = plate.getWells(); this.concentrations = Arrays.asList(plate.getConcentrations()); @@ -82,7 +90,12 @@ public class PlateFileWriter { printer.printComment("Plate size: " + size); printer.printComment("Error rate: " + error); printer.printComment("Concentrations: " + concenString); - printer.printComment("Std. dev.: " + stdDev); + if(isExponential){ + printer.printComment("Lambda: " + lambda); + } + else { + printer.printComment("Std. dev.: " + stdDev); + } printer.printRecords(wellsAsStrings); } catch(IOException ex){ System.out.println("Could not make new file named "+filename); diff --git a/src/main/java/Simulator.java b/src/main/java/Simulator.java index 877d3b7..e645252 100644 --- a/src/main/java/Simulator.java +++ b/src/main/java/Simulator.java @@ -20,8 +20,57 @@ public class Simulator { private static int cdr1BetaIndex = 3; - - + //Tested generating the cell sample file itself with a set distribution, but it wasn't working well + //realized I'd have to change matching algos as well, so abandoned it. +// public static CellSample generateCellSampleExp(Integer numDistinctCells, Integer cdr1Freq){ +// //was told I=in real T cells, CDR1s have about one third the diversity of CDR3s +// //this could be wrong, though. could be less diversity than that. +// //previous sim was only CDR3s +// //Integer numDistinctCells = 1000000; +// //int cdr1Freq = 3; +// +// List numbersCDR3 = new ArrayList<>(); +// List numbersCDR1 = new ArrayList<>(); +// Integer numDistCDR3s = 2 * numDistinctCells + 1; +// IntStream.range(1, numDistCDR3s + 1).forEach(i -> numbersCDR3.add(i)); +// IntStream.range(numDistCDR3s + 1, numDistCDR3s + 1 + (numDistCDR3s / cdr1Freq) + 1).forEach(i -> numbersCDR1.add(i)); +// Collections.shuffle(numbersCDR3); +// Collections.shuffle(numbersCDR1); +// +// //Each cell represented by 4 values +// //two CDR3s, and two CDR1s. First two values are CDR3s, second two are CDR1s +// List distinctCells = new ArrayList<>(); +// for(int i = 0; i < numbersCDR3.size() - 1; i = i + 2){ +// Integer tmpCDR3a = numbersCDR3.get(i); +// Integer tmpCDR3b = numbersCDR3.get(i+1); +// Integer tmpCDR1a = numbersCDR1.get(i % numbersCDR1.size()); +// Integer tmpCDR1b = numbersCDR1.get((i+1) % numbersCDR1.size()); +// Integer[] tmp = {tmpCDR3a, tmpCDR3b, tmpCDR1a, tmpCDR1b}; +// distinctCells.add(tmp); +// } +// ListsampleCells = new ArrayList<>(); +// int count; +// int lastFactor = 0; +// int factor = 10; +// int index = 0; +// while(numDistinctCells / factor >= 1){ +// int i = index; +// while(i < index + factor - lastFactor){ +// count = 0; +// while(count < (numDistinctCells/(factor * 10))){ +// sampleCells.add(distinctCells.get(i)); +// count++; +// } +// i++; +// } +// index = i; +// lastFactor = factor; +// factor *=10; +// } +// System.out.println("Total cells in sample: " + sampleCells.size()); +// System.out.println("Distinct cells in sample: " + index); +// return new CellSample(sampleCells, cdr1Freq); +// } public static CellSample generateCellSample(Integer numDistinctCells, Integer cdr1Freq) { //In real T cells, CDR1s have about one third the diversity of CDR3s //previous sim was only CDR3s diff --git a/src/main/java/UserInterface.java b/src/main/java/UserInterface.java index 2224c0a..9c559c5 100644 --- a/src/main/java/UserInterface.java +++ b/src/main/java/UserInterface.java @@ -16,42 +16,164 @@ public class UserInterface { public static void main(String[] args) { if(args.length != 0){ - Options options = new Options(); - Option matchCDR3 = Option.builder("m") - .longOpt("match") + Options mainOptions = new Options(); + Option makeCells = Option.builder("cells") + .longOpt("make-cells") + .desc("Makes a file of distinct cells") + .build(); + Option makePlate = Option.builder("plates") + .longOpt("make-plates") + .desc("Makes a sample plate file") + .build(); + Option matchCDR3 = Option.builder("match") + .longOpt("match-cdr3") .desc("Match CDR3s. Requires a cell sample file and any number of plate files.") .build(); - options.addOption(matchCDR3); + OptionGroup mainGroup = new OptionGroup(); + mainGroup.addOption(makeCells); + mainGroup.addOption(makePlate); + mainGroup.addOption(matchCDR3); + mainGroup.setRequired(true); + mainOptions.addOptionGroup(mainGroup); + + //Reuse clones of this for other options groups, rather than making it lots of times + Option outputFile = Option.builder("o") + .longOpt("output-file") + .hasArg() + .argName("filename") + .desc("Name of output file") + .build(); + mainOptions.addOption(outputFile); + + //Options cellOptions = new Options(); + Option numCells = Option.builder("nc") + .longOpt("num-cells") + .desc("The number of distinct cells to generate") + .hasArg() + .argName("number") + .build(); + mainOptions.addOption(numCells); + Option cdr1Freq = Option.builder("d") + .longOpt("peptide-diversity-factor") + .hasArg() + .argName("number") + .desc("Number of distinct CDR3s for every CDR1") + .build(); + mainOptions.addOption(cdr1Freq); + //Option cellOutput = (Option) outputFile.clone(); + //cellOutput.setRequired(true); + //mainOptions.addOption(cellOutput); + + //Options plateOptions = new Options(); Option inputCells = Option.builder("c") - .longOpt("cellfile") + .longOpt("cell-file") .hasArg() .argName("file") - .desc("The cell sample file used for matching") - .required().build(); - options.addOption(inputCells); + .desc("The cell sample file used for filling wells") + .build(); + mainOptions.addOption(inputCells); + Option numWells = Option.builder("w") + .longOpt("num-wells") + .hasArg() + .argName("number") + .desc("The number of wells on each plate") + .build(); + mainOptions.addOption(numWells); + Option numPlates = Option.builder("np") + .longOpt("num-plates") + .hasArg() + .argName("number") + .desc("The number of plate files to output") + .build(); + mainOptions.addOption(numPlates); + //Option plateOutput = (Option) outputFile.clone(); + //plateOutput.setRequired(true); + //plateOutput.setDescription("Prefix for plate output filenames"); + //mainOptions.addOption(plateOutput); + Option plateErr = Option.builder("err") + .longOpt("drop-out-rate") + .hasArg() + .argName("number") + .desc("Well drop-out rate. (Probability between 0 and 1)") + .build(); + mainOptions.addOption(plateErr); + Option plateConcentrations = Option.builder("t") + .longOpt("t-cells-per-well") + .hasArgs() + .argName("number 1, number 2, ...") + .desc("Number of T cells per well for each plate section") + .build(); + mainOptions.addOption(plateConcentrations); + +//different distributions, mutually exclusive + OptionGroup plateDistributions = new OptionGroup(); + Option plateExp = Option.builder("exponential") + .desc("Sample from distinct cells with exponential frequency distribution") + .build(); + plateDistributions.addOption(plateExp); + Option plateGaussian = Option.builder("gaussian") + .desc("Sample from distinct cells with gaussain frequency distribution") + .build(); + plateDistributions.addOption(plateGaussian); + Option platePoisson = Option.builder("poisson") + .desc("Sample from distinct cells with poisson frequency distribution") + .build(); + plateDistributions.addOption(platePoisson); + mainOptions.addOptionGroup(plateDistributions); + + Option plateStdDev = Option.builder("stddev") + .desc("Standard deviation for gaussian distribution") + .hasArg() + .argName("number") + .build(); + mainOptions.addOption(plateStdDev); + + Option plateLambda = Option.builder("lambda") + .desc("Lambda for exponential distribution") + .hasArg() + .argName("number") + .build(); + mainOptions.addOption(plateLambda); + + + +// +// String cellFile, String filename, Double stdDev, +// Integer numWells, Integer numSections, +// Integer[] concentrations, Double dropOutRate +// + + //Options matchOptions = new Options(); + inputCells.setDescription("The cell sample file to be used for matching."); + mainOptions.addOption(inputCells); Option lowThresh = Option.builder("low") + .longOpt("low-threshold") .hasArg() .argName("number") .desc("Sets the minimum occupancy overlap to attempt matching") - .required().build(); - options.addOption(lowThresh); + .build(); + mainOptions.addOption(lowThresh); Option highThresh = Option.builder("high") + .longOpt("high-threshold") .hasArg() .argName("number") .desc("Sets the maximum occupancy overlap to attempt matching") - .required().build(); - options.addOption(highThresh); + .build(); + mainOptions.addOption(highThresh); Option inputPlates = Option.builder("p") - .longOpt("platefiles") + .longOpt("plate-files") .hasArgs() .desc("Plate files to match") - .required().build(); - options.addOption(inputPlates); + .build(); + mainOptions.addOption(inputPlates); + + CommandLineParser parser = new DefaultParser(); try { - CommandLine line = parser.parse(options, args); - if(line.hasOption("m")){ + CommandLine line = parser.parse(mainOptions, args); + if(line.hasOption("match")){ + //line = parser.parse(mainOptions, args); String cellFile = line.getOptionValue("c"); Integer lowThreshold = Integer.valueOf(line.getOptionValue(lowThresh)); Integer highThreshold = Integer.valueOf(line.getOptionValue(highThresh)); @@ -59,6 +181,49 @@ public class UserInterface { matchCDR3s(cellFile, plate, lowThreshold, highThreshold); } } + else if(line.hasOption("cells")){ + //line = parser.parse(mainOptions, args); + String filename = line.getOptionValue("o"); + Integer numDistCells = Integer.valueOf(line.getOptionValue("nc")); + Integer freq = Integer.valueOf(line.getOptionValue("d")); + makeCells(filename, numDistCells, freq); + } + else if(line.hasOption("plates")){ + //line = parser.parse(mainOptions, args); + String cellFile = line.getOptionValue("c"); + String filenamePrefix = line.getOptionValue("o"); + Integer numWellsOnPlate = Integer.valueOf(line.getOptionValue("w")); + Integer numPlatesToMake = Integer.valueOf(line.getOptionValue("np")); + String[] concentrationsToUseString = line.getOptionValues("t"); + Integer numSections = concentrationsToUseString.length; + + Integer[] concentrationsToUse = new Integer[numSections]; + for(int i = 0; i