Command line arguments working, need better documentation and error handling

This commit is contained in:
2021-11-23 12:24:48 -06:00
parent 32593308df
commit acff88475b
6 changed files with 358 additions and 38 deletions

View File

@@ -4,6 +4,10 @@ import java.math.MathContext;
public abstract class Equations {
public static int getRandomNumber(int min, int max) {
return (int) ((Math.random() * (max - min)) + min);
}
public static double pValue(Integer w, Integer w_a, Integer w_b, double w_ab_d) {
int w_ab = (int) w_ab_d;
double pv = 0.0;

View File

@@ -1,6 +1,7 @@
import java.util.*;
/*
TODO: Implement exponential distribution using inversion method - DONE
TODO: Implement discrete frequency distributions using Vose's Alias Method
*/
@@ -12,13 +13,15 @@ public class Plate {
private double error;
private Integer[] concentrations;
private double stdDev;
private double lambda;
boolean exponential = false;
public Plate (int size, double error, Integer[] concentrations, double stdDev) {
public Plate (int size, double error, Integer[] concentrations) {
this.size = size;
this.error = error;
this.concentrations = concentrations;
this.stdDev = stdDev;
//this.stdDev = stdDev;
wells = new ArrayList<>();
}
@@ -28,7 +31,47 @@ public class Plate {
this.size = wells.size();
}
public void fillWells(String sourceFileName, List<Integer[]> cells) {
public void fillWellsExponential(String sourceFileName, List<Integer[]> cells, double lambda){
this.lambda = lambda;
exponential = true;
sourceFile = sourceFileName;
int numSections = concentrations.length;
int section = 0;
double m;
int n;
int test=0;
while (section < numSections){
for (int i = 0; i < (size / numSections); i++) {
List<Integer[]> well = new ArrayList<>();
for (int j = 0; j < concentrations[section]; j++) {
do {
m = (Math.log10((1 - rand.nextDouble()))/(-lambda)) * Math.sqrt(cells.size());
} while (m >= cells.size() || m < 0);
n = (int) Math.floor(m);
//n = Equations.getRandomNumber(0, cells.size());
// was testing generating the cell sample file with exponential dist, then sampling flat here
//that would be more realistic
//But would mess up my
if(n > test){
test = n;
}
Integer[] cellToAdd = cells.get(n).clone();
for(int k = 0; k < cellToAdd.length; k++){
if(Math.abs(rand.nextDouble()) < error){//error applied to each peptide
cellToAdd[k] = -1;
}
}
well.add(cellToAdd);
}
wells.add(well);
}
section++;
}
System.out.println("Highest index: " +test);
}
public void fillWells(String sourceFileName, List<Integer[]> cells, double stdDev) {
this.stdDev = stdDev;
sourceFile = sourceFileName;
int numSections = concentrations.length;
int section = 0;
@@ -68,6 +111,10 @@ public class Plate {
return stdDev;
}
public boolean isExponential(){return exponential;}
public double getLambda(){return lambda;}
public double getError() {
return error;
}

View File

@@ -13,11 +13,13 @@ public class PlateFileWriter {
private int size;
private List<List<Integer[]>> wells;
private double stdDev;
private double lambda;
private Double error;
private String filename;
private String sourceFileName;
private String[] headers;
private List<Integer> concentrations;
private boolean isExponential = false;
public PlateFileWriter(String filename, Plate plate) {
if(!filename.matches(".*\\.csv")){
@@ -26,7 +28,13 @@ public class PlateFileWriter {
this.filename = filename;
this.sourceFileName = plate.getSourceFileName();
this.size = plate.getSize();
this.isExponential = plate.isExponential();
if(isExponential) {
this.lambda = plate.getLambda();
}
else{
this.stdDev = plate.getStdDev();
}
this.error = plate.getError();
this.wells = plate.getWells();
this.concentrations = Arrays.asList(plate.getConcentrations());
@@ -82,7 +90,12 @@ public class PlateFileWriter {
printer.printComment("Plate size: " + size);
printer.printComment("Error rate: " + error);
printer.printComment("Concentrations: " + concenString);
if(isExponential){
printer.printComment("Lambda: " + lambda);
}
else {
printer.printComment("Std. dev.: " + stdDev);
}
printer.printRecords(wellsAsStrings);
} catch(IOException ex){
System.out.println("Could not make new file named "+filename);

View File

@@ -20,8 +20,57 @@ public class Simulator {
private static int cdr1BetaIndex = 3;
//Tested generating the cell sample file itself with a set distribution, but it wasn't working well
//realized I'd have to change matching algos as well, so abandoned it.
// public static CellSample generateCellSampleExp(Integer numDistinctCells, Integer cdr1Freq){
// //was told I=in real T cells, CDR1s have about one third the diversity of CDR3s
// //this could be wrong, though. could be less diversity than that.
// //previous sim was only CDR3s
// //Integer numDistinctCells = 1000000;
// //int cdr1Freq = 3;
//
// List<Integer> numbersCDR3 = new ArrayList<>();
// List<Integer> numbersCDR1 = new ArrayList<>();
// Integer numDistCDR3s = 2 * numDistinctCells + 1;
// IntStream.range(1, numDistCDR3s + 1).forEach(i -> numbersCDR3.add(i));
// IntStream.range(numDistCDR3s + 1, numDistCDR3s + 1 + (numDistCDR3s / cdr1Freq) + 1).forEach(i -> numbersCDR1.add(i));
// Collections.shuffle(numbersCDR3);
// Collections.shuffle(numbersCDR1);
//
// //Each cell represented by 4 values
// //two CDR3s, and two CDR1s. First two values are CDR3s, second two are CDR1s
// List<Integer[]> distinctCells = new ArrayList<>();
// for(int i = 0; i < numbersCDR3.size() - 1; i = i + 2){
// Integer tmpCDR3a = numbersCDR3.get(i);
// Integer tmpCDR3b = numbersCDR3.get(i+1);
// Integer tmpCDR1a = numbersCDR1.get(i % numbersCDR1.size());
// Integer tmpCDR1b = numbersCDR1.get((i+1) % numbersCDR1.size());
// Integer[] tmp = {tmpCDR3a, tmpCDR3b, tmpCDR1a, tmpCDR1b};
// distinctCells.add(tmp);
// }
// List<Integer[]>sampleCells = new ArrayList<>();
// int count;
// int lastFactor = 0;
// int factor = 10;
// int index = 0;
// while(numDistinctCells / factor >= 1){
// int i = index;
// while(i < index + factor - lastFactor){
// count = 0;
// while(count < (numDistinctCells/(factor * 10))){
// sampleCells.add(distinctCells.get(i));
// count++;
// }
// i++;
// }
// index = i;
// lastFactor = factor;
// factor *=10;
// }
// System.out.println("Total cells in sample: " + sampleCells.size());
// System.out.println("Distinct cells in sample: " + index);
// return new CellSample(sampleCells, cdr1Freq);
// }
public static CellSample generateCellSample(Integer numDistinctCells, Integer cdr1Freq) {
//In real T cells, CDR1s have about one third the diversity of CDR3s
//previous sim was only CDR3s

View File

@@ -16,42 +16,164 @@ public class UserInterface {
public static void main(String[] args) {
if(args.length != 0){
Options options = new Options();
Option matchCDR3 = Option.builder("m")
.longOpt("match")
Options mainOptions = new Options();
Option makeCells = Option.builder("cells")
.longOpt("make-cells")
.desc("Makes a file of distinct cells")
.build();
Option makePlate = Option.builder("plates")
.longOpt("make-plates")
.desc("Makes a sample plate file")
.build();
Option matchCDR3 = Option.builder("match")
.longOpt("match-cdr3")
.desc("Match CDR3s. Requires a cell sample file and any number of plate files.")
.build();
options.addOption(matchCDR3);
OptionGroup mainGroup = new OptionGroup();
mainGroup.addOption(makeCells);
mainGroup.addOption(makePlate);
mainGroup.addOption(matchCDR3);
mainGroup.setRequired(true);
mainOptions.addOptionGroup(mainGroup);
//Reuse clones of this for other options groups, rather than making it lots of times
Option outputFile = Option.builder("o")
.longOpt("output-file")
.hasArg()
.argName("filename")
.desc("Name of output file")
.build();
mainOptions.addOption(outputFile);
//Options cellOptions = new Options();
Option numCells = Option.builder("nc")
.longOpt("num-cells")
.desc("The number of distinct cells to generate")
.hasArg()
.argName("number")
.build();
mainOptions.addOption(numCells);
Option cdr1Freq = Option.builder("d")
.longOpt("peptide-diversity-factor")
.hasArg()
.argName("number")
.desc("Number of distinct CDR3s for every CDR1")
.build();
mainOptions.addOption(cdr1Freq);
//Option cellOutput = (Option) outputFile.clone();
//cellOutput.setRequired(true);
//mainOptions.addOption(cellOutput);
//Options plateOptions = new Options();
Option inputCells = Option.builder("c")
.longOpt("cellfile")
.longOpt("cell-file")
.hasArg()
.argName("file")
.desc("The cell sample file used for matching")
.required().build();
options.addOption(inputCells);
.desc("The cell sample file used for filling wells")
.build();
mainOptions.addOption(inputCells);
Option numWells = Option.builder("w")
.longOpt("num-wells")
.hasArg()
.argName("number")
.desc("The number of wells on each plate")
.build();
mainOptions.addOption(numWells);
Option numPlates = Option.builder("np")
.longOpt("num-plates")
.hasArg()
.argName("number")
.desc("The number of plate files to output")
.build();
mainOptions.addOption(numPlates);
//Option plateOutput = (Option) outputFile.clone();
//plateOutput.setRequired(true);
//plateOutput.setDescription("Prefix for plate output filenames");
//mainOptions.addOption(plateOutput);
Option plateErr = Option.builder("err")
.longOpt("drop-out-rate")
.hasArg()
.argName("number")
.desc("Well drop-out rate. (Probability between 0 and 1)")
.build();
mainOptions.addOption(plateErr);
Option plateConcentrations = Option.builder("t")
.longOpt("t-cells-per-well")
.hasArgs()
.argName("number 1, number 2, ...")
.desc("Number of T cells per well for each plate section")
.build();
mainOptions.addOption(plateConcentrations);
//different distributions, mutually exclusive
OptionGroup plateDistributions = new OptionGroup();
Option plateExp = Option.builder("exponential")
.desc("Sample from distinct cells with exponential frequency distribution")
.build();
plateDistributions.addOption(plateExp);
Option plateGaussian = Option.builder("gaussian")
.desc("Sample from distinct cells with gaussain frequency distribution")
.build();
plateDistributions.addOption(plateGaussian);
Option platePoisson = Option.builder("poisson")
.desc("Sample from distinct cells with poisson frequency distribution")
.build();
plateDistributions.addOption(platePoisson);
mainOptions.addOptionGroup(plateDistributions);
Option plateStdDev = Option.builder("stddev")
.desc("Standard deviation for gaussian distribution")
.hasArg()
.argName("number")
.build();
mainOptions.addOption(plateStdDev);
Option plateLambda = Option.builder("lambda")
.desc("Lambda for exponential distribution")
.hasArg()
.argName("number")
.build();
mainOptions.addOption(plateLambda);
//
// String cellFile, String filename, Double stdDev,
// Integer numWells, Integer numSections,
// Integer[] concentrations, Double dropOutRate
//
//Options matchOptions = new Options();
inputCells.setDescription("The cell sample file to be used for matching.");
mainOptions.addOption(inputCells);
Option lowThresh = Option.builder("low")
.longOpt("low-threshold")
.hasArg()
.argName("number")
.desc("Sets the minimum occupancy overlap to attempt matching")
.required().build();
options.addOption(lowThresh);
.build();
mainOptions.addOption(lowThresh);
Option highThresh = Option.builder("high")
.longOpt("high-threshold")
.hasArg()
.argName("number")
.desc("Sets the maximum occupancy overlap to attempt matching")
.required().build();
options.addOption(highThresh);
.build();
mainOptions.addOption(highThresh);
Option inputPlates = Option.builder("p")
.longOpt("platefiles")
.longOpt("plate-files")
.hasArgs()
.desc("Plate files to match")
.required().build();
options.addOption(inputPlates);
.build();
mainOptions.addOption(inputPlates);
CommandLineParser parser = new DefaultParser();
try {
CommandLine line = parser.parse(options, args);
if(line.hasOption("m")){
CommandLine line = parser.parse(mainOptions, args);
if(line.hasOption("match")){
//line = parser.parse(mainOptions, args);
String cellFile = line.getOptionValue("c");
Integer lowThreshold = Integer.valueOf(line.getOptionValue(lowThresh));
Integer highThreshold = Integer.valueOf(line.getOptionValue(highThresh));
@@ -59,6 +181,49 @@ public class UserInterface {
matchCDR3s(cellFile, plate, lowThreshold, highThreshold);
}
}
else if(line.hasOption("cells")){
//line = parser.parse(mainOptions, args);
String filename = line.getOptionValue("o");
Integer numDistCells = Integer.valueOf(line.getOptionValue("nc"));
Integer freq = Integer.valueOf(line.getOptionValue("d"));
makeCells(filename, numDistCells, freq);
}
else if(line.hasOption("plates")){
//line = parser.parse(mainOptions, args);
String cellFile = line.getOptionValue("c");
String filenamePrefix = line.getOptionValue("o");
Integer numWellsOnPlate = Integer.valueOf(line.getOptionValue("w"));
Integer numPlatesToMake = Integer.valueOf(line.getOptionValue("np"));
String[] concentrationsToUseString = line.getOptionValues("t");
Integer numSections = concentrationsToUseString.length;
Integer[] concentrationsToUse = new Integer[numSections];
for(int i = 0; i <numSections; i++){
concentrationsToUse[i] = Integer.valueOf(concentrationsToUseString[i]);
}
Double dropOutRate = Double.valueOf(line.getOptionValue("err"));
if(line.hasOption("exponential")){
Double lambda = Double.valueOf(line.getOptionValue("lambda"));
for(int i = 1; i <= numPlatesToMake; i++){
makePlateExp(cellFile, filenamePrefix + i, lambda, numWellsOnPlate,
concentrationsToUse,dropOutRate);
}
}
else if(line.hasOption("gaussian")){
Double stdDev = Double.valueOf(line.getOptionValue("std-dev"));
for(int i = 1; i <= numPlatesToMake; i++){
makePlate(cellFile, filenamePrefix + i, stdDev, numWellsOnPlate,
concentrationsToUse,dropOutRate);
}
}
else if(line.hasOption("poisson")){
for(int i = 1; i <= numPlatesToMake; i++){
makePlatePoisson(cellFile, filenamePrefix + i, numWellsOnPlate,
concentrationsToUse,dropOutRate);
}
}
}
}
catch (ParseException exp) {
System.err.println("Parsing failed. Reason: " + exp.getMessage());
@@ -118,27 +283,47 @@ public class UserInterface {
sc.next();
}
CellSample sample = Simulator.generateCellSample(numCells, cdr1Freq);
//CellSample sample = Simulator.generateCellSampleExp(numCells, cdr1Freq);
CellFileWriter writer = new CellFileWriter(filename, sample);
writer.writeCellsToFile();
}
//for calling from command line
private static void makeCells(String filename, Integer numCells, Integer cdr1Freq){
CellSample sample = Simulator.generateCellSample(numCells, cdr1Freq);
CellFileWriter writer = new CellFileWriter(filename, sample);
writer.writeCellsToFile();
}
private static void makePlate(String cellFile, String filename, Double stdDev,
Integer numWells, Integer numSections,
Integer[] concentrations, Double dropOutRate){
private static void makePlateExp(String cellFile, String filename, Double lambda,
Integer numWells, Integer[] concentrations, Double dropOutRate){
CellFileReader cellReader = new CellFileReader(cellFile);
Plate samplePlate = new Plate(numWells, dropOutRate, concentrations, stdDev);
samplePlate.fillWells(cellReader.getFilename(), cellReader.getCells());
Plate samplePlate = new Plate(numWells, dropOutRate, concentrations);
samplePlate.fillWellsExponential(cellReader.getFilename(), cellReader.getCells(), lambda);
PlateFileWriter writer = new PlateFileWriter(filename, samplePlate);
writer.writePlateFile();
}
//method to output a CSV of
private static void makePlatePoisson(String cellFile, String filename, Integer numWells,
Integer[] concentrations, Double dropOutRate){
CellFileReader cellReader = new CellFileReader(cellFile);
Double stdDev = Math.sqrt(cellReader.getCellCount());
Plate samplePlate = new Plate(numWells, dropOutRate, concentrations);
samplePlate.fillWells(cellReader.getFilename(), cellReader.getCells(), stdDev);
PlateFileWriter writer = new PlateFileWriter(filename, samplePlate);
writer.writePlateFile();
}
private static void makePlate(String cellFile, String filename, Double stdDev,
Integer numWells, Integer[] concentrations, Double dropOutRate){
CellFileReader cellReader = new CellFileReader(cellFile);
Plate samplePlate = new Plate(numWells, dropOutRate, concentrations);
samplePlate.fillWells(cellReader.getFilename(), cellReader.getCells(), stdDev);
PlateFileWriter writer = new PlateFileWriter(filename, samplePlate);
writer.writePlateFile();
}
//method to output a CSV of sample plate
private static void makePlate() {
String cellFile = null;
String filename = null;
@@ -148,6 +333,8 @@ public class UserInterface {
Integer[] concentrations = {1};
Double dropOutRate = 0.0;
boolean poisson = false;
boolean exponential = false;
double lambda = 1.5;
try {
System.out.println("\nMaking a sample plate requires a population of distinct cells");
System.out.println("Please enter name of an existing cell sample file: ");
@@ -158,6 +345,9 @@ public class UserInterface {
System.out.println("Select T-cell frequency distribution function");
System.out.println("1) Poisson");
System.out.println("2) Gaussian");
System.out.println("3) Exponential");
System.out.println("(Note: approximate distribution in original paper is exponential, lambda = 0.6.)");
System.out.println("(approximated from slope of log-log graph in figure 4c)");
System.out.println("(Note: wider distributions are more memory intensive to match)");
System.out.print("Enter selection value: ");
input = sc.nextInt();
@@ -173,9 +363,18 @@ public class UserInterface {
throw new InputMismatchException("Value must be positive.");
}
break;
case 3:
exponential = true;
System.out.println("Please enter lambda value for exponential distribution.");
lambda = sc.nextDouble();
if(lambda <= 0.0){
throw new InputMismatchException("Value must be positive.");
}
break;
default:
System.out.println("Invalid input. Defaulting to Poisson.");
poisson = true;
System.out.println("Invalid input. Defaulting to exponential.");
//poisson = true;
exponential = true;
}
System.out.print("Number of wells on plate: ");
numWells = sc.nextInt();
@@ -210,14 +409,22 @@ public class UserInterface {
sc.next();
}
CellFileReader cellReader = new CellFileReader(cellFile);
if(poisson) {
stdDev = Math.sqrt(cellReader.getCellCount()); //gaussian with square root of elements approximates poisson
}
Plate samplePlate = new Plate(numWells, dropOutRate, concentrations, stdDev);
samplePlate.fillWells(cellReader.getFilename(), cellReader.getCells());
if(exponential){
Plate samplePlate = new Plate(numWells, dropOutRate, concentrations);
samplePlate.fillWellsExponential(cellReader.getFilename(), cellReader.getCells(), lambda);
PlateFileWriter writer = new PlateFileWriter(filename, samplePlate);
writer.writePlateFile();
}
else {
if (poisson) {
stdDev = Math.sqrt(cellReader.getCellCount()); //gaussian with square root of elements approximates poisson
}
Plate samplePlate = new Plate(numWells, dropOutRate, concentrations);
samplePlate.fillWells(cellReader.getFilename(), cellReader.getCells(), stdDev);
PlateFileWriter writer = new PlateFileWriter(filename, samplePlate);
writer.writePlateFile();
}
}
private static void matchCDR3s(String cellFile, String plateFile, Integer lowThreshold, Integer highThreshold){
CellFileReader cellReader = new CellFileReader(cellFile);