Compare commits
4 Commits
161a52aa89
...
v4.3
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b7597cff2a | ||
|
|
7bbeaf7dad | ||
|
|
945b967382 | ||
|
|
a43ee469ea |
1
.idea/.name
generated
Normal file
1
.idea/.name
generated
Normal file
@@ -0,0 +1 @@
|
||||
BiGpairSEQ
|
||||
13
.idea/libraries/commons_rng_1.xml
generated
Normal file
13
.idea/libraries/commons_rng_1.xml
generated
Normal file
@@ -0,0 +1,13 @@
|
||||
<component name="libraryTable">
|
||||
<library name="commons-rng-1">
|
||||
<CLASSES>
|
||||
<root url="file://$USER_HOME$/Downloads/commons-rng-1.6" />
|
||||
</CLASSES>
|
||||
<JAVADOC />
|
||||
<SOURCES>
|
||||
<root url="file://$USER_HOME$/Downloads/commons-rng-1.6" />
|
||||
</SOURCES>
|
||||
<jarDirectory url="file://$USER_HOME$/Downloads/commons-rng-1.6" recursive="false" />
|
||||
<jarDirectory url="file://$USER_HOME$/Downloads/commons-rng-1.6" recursive="false" type="SOURCES" />
|
||||
</library>
|
||||
</component>
|
||||
@@ -136,7 +136,7 @@ There are a number of command line options, to allow the program to be used in s
|
||||
`java -jar BiGpairSEQ_Sim.jar -help`
|
||||
|
||||
```
|
||||
usage: BiGpairSEQ_Sim.jar
|
||||
usage: BiGpairSEQ_Sim.jar
|
||||
-cells,--make-cells Makes a cell sample file of distinct T cells
|
||||
-graph,--make-graph Makes a graph/data file. Requires a cell sample
|
||||
file and a sample plate file
|
||||
@@ -156,6 +156,8 @@ usage: BiGpairSEQ_Sim.jar -plate
|
||||
-c,--cell-file <filename> The cell sample file to use
|
||||
-d,--dropout-rate <rate> The sequence dropout rate due to
|
||||
amplification error. (0.0 - 1.0)
|
||||
-exp <value> If using -zipf flag, exponent value for
|
||||
distribution
|
||||
-exponential Use an exponential distribution for cell
|
||||
sample
|
||||
-gaussian Use a Gaussian distribution for cell sample
|
||||
@@ -173,6 +175,7 @@ usage: BiGpairSEQ_Sim.jar -plate
|
||||
-stddev <value> If using -gaussian flag, standard deviation
|
||||
for distrbution
|
||||
-w,--wells <number> The number of wells on the sample plate
|
||||
-zipf Use a Zipf distribution for cell sample
|
||||
|
||||
usage: BiGpairSEQ_Sim.jar -graph
|
||||
-c,--cell-file <filename> Cell sample file to use for
|
||||
@@ -234,7 +237,6 @@ usage: BiGpairSEQ_Sim.jar -match
|
||||
to stdout.
|
||||
-pv,--p-value (Optional) Calculate p-values for sequence
|
||||
pairs.
|
||||
|
||||
```
|
||||
|
||||
### INTERACTIVE INTERFACE
|
||||
@@ -340,6 +342,8 @@ Options when making a Sample Plate file:
|
||||
* Standard deviation size
|
||||
* Exponential
|
||||
* Lambda value
|
||||
* Zipf
|
||||
* Exponent value
|
||||
* Total number of wells on the plate
|
||||
* Well populations random or fixed
|
||||
* If random, minimum and maximum population sizes
|
||||
@@ -630,6 +634,7 @@ a means of exploring some very beautiful math.
|
||||
|
||||
## TODO
|
||||
|
||||
* Update CLI option text in this readme to include Zipf distribution options
|
||||
* ~~Try invoking GC at end of workloads to reduce paging to disk~~ DONE
|
||||
* ~~Hold graph data in memory until another graph is read-in? ABANDONED UNABANDONED~~ DONE
|
||||
* ~~*No, this won't work, because BiGpairSEQ simulations alter the underlying graph based on filtering constraints. Changes would cascade with multiple experiments.*~~
|
||||
|
||||
@@ -15,6 +15,7 @@ public class BiGpairSEQ {
|
||||
private static boolean cacheGraph = false;
|
||||
private static AlgorithmType matchingAlgoritmType = AlgorithmType.HUNGARIAN;
|
||||
private static HeapType priorityQueueHeapType = HeapType.PAIRING;
|
||||
private static DistributionType distributionType = DistributionType.ZIPF;
|
||||
private static boolean outputBinary = true;
|
||||
private static boolean outputGraphML = false;
|
||||
private static boolean calculatePValue = false;
|
||||
@@ -60,6 +61,10 @@ public class BiGpairSEQ {
|
||||
return cellFilename;
|
||||
}
|
||||
|
||||
public static DistributionType getDistributionType() {return distributionType;}
|
||||
|
||||
public static void setDistributionType(DistributionType type) {distributionType = type;}
|
||||
|
||||
public static Plate getPlateInMemory() {
|
||||
return plateInMemory;
|
||||
}
|
||||
|
||||
@@ -123,16 +123,20 @@ public class CommandLineInterface {
|
||||
Plate plate;
|
||||
if (line.hasOption("poisson")) {
|
||||
Double stdDev = Math.sqrt(numWells);
|
||||
plate = new Plate(cells, cellFilename, numWells, populations, dropoutRate, stdDev, false);
|
||||
plate = new Plate(cells, cellFilename, numWells, populations, dropoutRate, stdDev);
|
||||
}
|
||||
else if (line.hasOption("gaussian")) {
|
||||
Double stdDev = Double.parseDouble(line.getOptionValue("stddev"));
|
||||
plate = new Plate(cells, cellFilename, numWells, populations, dropoutRate, stdDev, false);
|
||||
plate = new Plate(cells, cellFilename, numWells, populations, dropoutRate, stdDev);
|
||||
}
|
||||
else if (line.hasOption("zipf")) {
|
||||
Double zipfExponent = Double.parseDouble(line.getOptionValue("exp"));
|
||||
plate = new Plate(cells, cellFilename, numWells, populations, dropoutRate, zipfExponent);
|
||||
}
|
||||
else {
|
||||
assert line.hasOption("exponential");
|
||||
Double lambda = Double.parseDouble(line.getOptionValue("lambda"));
|
||||
plate = new Plate(cells, cellFilename, numWells, populations, dropoutRate, lambda, true);
|
||||
plate = new Plate(cells, cellFilename, numWells, populations, dropoutRate, lambda);
|
||||
}
|
||||
PlateFileWriter writer = new PlateFileWriter(outputFilename, plate);
|
||||
writer.writePlateFile();
|
||||
@@ -340,9 +344,13 @@ public class CommandLineInterface {
|
||||
Option exponential = Option.builder("exponential")
|
||||
.desc("Use an exponential distribution for cell sample")
|
||||
.build();
|
||||
Option zipf = Option.builder("zipf")
|
||||
.desc("Use a Zipf distribution for cell sample")
|
||||
.build();
|
||||
distributions.addOption(poisson);
|
||||
distributions.addOption(gaussian);
|
||||
distributions.addOption(exponential);
|
||||
distributions.addOption(zipf);
|
||||
//options group for statistical distribution parameters
|
||||
OptionGroup statParams = new OptionGroup();// add this to plate options
|
||||
Option stdDev = Option.builder("stddev")
|
||||
@@ -355,6 +363,11 @@ public class CommandLineInterface {
|
||||
.hasArg()
|
||||
.argName("value")
|
||||
.build();
|
||||
Option zipfExponent = Option.builder("exp")
|
||||
.desc("If using -zipf flag, exponent value for distribution")
|
||||
.hasArg()
|
||||
.argName("value")
|
||||
.build();
|
||||
statParams.addOption(stdDev);
|
||||
statParams.addOption(lambda);
|
||||
//Option group for random plate or set populations
|
||||
@@ -386,6 +399,7 @@ public class CommandLineInterface {
|
||||
plateOptions.addOptionGroup(statParams);
|
||||
plateOptions.addOptionGroup(wellPopOptions);
|
||||
plateOptions.addOption(dropoutRate);
|
||||
plateOptions.addOption(zipfExponent);
|
||||
plateOptions.addOption(outputFileOption());
|
||||
return plateOptions;
|
||||
}
|
||||
|
||||
6
src/main/java/DistributionType.java
Normal file
6
src/main/java/DistributionType.java
Normal file
@@ -0,0 +1,6 @@
|
||||
public enum DistributionType {
|
||||
POISSON,
|
||||
GAUSSIAN,
|
||||
EXPONENTIAL,
|
||||
ZIPF
|
||||
}
|
||||
@@ -89,14 +89,12 @@ public class InteractiveInterface {
|
||||
private static void makePlate() {
|
||||
String cellFile = null;
|
||||
String filename = null;
|
||||
Double stdDev = 0.0;
|
||||
Double parameter = 0.0;
|
||||
Integer numWells = 0;
|
||||
Integer numSections;
|
||||
Integer[] populations = {1};
|
||||
Double dropOutRate = 0.0;
|
||||
boolean poisson = false;
|
||||
boolean exponential = false;
|
||||
double lambda = 1.5;
|
||||
;
|
||||
try {
|
||||
System.out.println("\nSimulated sample plates consist of:");
|
||||
System.out.println("* a number of wells");
|
||||
@@ -114,33 +112,46 @@ public class InteractiveInterface {
|
||||
System.out.println("1) Poisson");
|
||||
System.out.println("2) Gaussian");
|
||||
System.out.println("3) Exponential");
|
||||
// System.out.println("(Note: approximate distribution in original paper is exponential, lambda = 0.6)");
|
||||
// System.out.println("(lambda value approximated from slope of log-log graph in figure 4c)");
|
||||
System.out.println("4) Zipf");
|
||||
|
||||
System.out.println("(Note: wider distributions are more memory intensive to match)");
|
||||
System.out.print("Enter selection value: ");
|
||||
input = sc.nextInt();
|
||||
switch (input) {
|
||||
case 1 -> poisson = true;
|
||||
case 1 -> {
|
||||
BiGpairSEQ.setDistributionType(DistributionType.POISSON);
|
||||
}
|
||||
case 2 -> {
|
||||
BiGpairSEQ.setDistributionType(DistributionType.GAUSSIAN);
|
||||
System.out.println("How many distinct T-cells within one standard deviation of peak frequency?");
|
||||
System.out.println("(Note: wider distributions are more memory intensive to match)");
|
||||
stdDev = sc.nextDouble();
|
||||
if (stdDev <= 0.0) {
|
||||
parameter = sc.nextDouble();
|
||||
if (parameter <= 0.0) {
|
||||
throw new InputMismatchException("Value must be positive.");
|
||||
}
|
||||
}
|
||||
case 3 -> {
|
||||
exponential = true;
|
||||
BiGpairSEQ.setDistributionType(DistributionType.EXPONENTIAL);
|
||||
System.out.print("Please enter lambda value for exponential distribution: ");
|
||||
lambda = sc.nextDouble();
|
||||
if (lambda <= 0.0) {
|
||||
lambda = 0.6;
|
||||
System.out.println("Value must be positive. Defaulting to 0.6.");
|
||||
parameter = sc.nextDouble();
|
||||
if (parameter <= 0.0) {
|
||||
parameter = 1.4;
|
||||
System.out.println("Value must be positive. Defaulting to 1.4.");
|
||||
}
|
||||
}
|
||||
case 4 -> {
|
||||
BiGpairSEQ.setDistributionType(DistributionType.ZIPF);
|
||||
System.out.print("Please enter exponent value for Zipf distribution: ");
|
||||
parameter = sc.nextDouble();
|
||||
if (parameter <= 0.0) {
|
||||
parameter = 1.4;
|
||||
System.out.println("Value must be positive. Defaulting to 1.4.");
|
||||
}
|
||||
}
|
||||
default -> {
|
||||
System.out.println("Invalid input. Defaulting to exponential.");
|
||||
exponential = true;
|
||||
parameter = 1.4;
|
||||
BiGpairSEQ.setDistributionType(DistributionType.EXPONENTIAL);
|
||||
}
|
||||
}
|
||||
System.out.print("\nNumber of wells on plate: ");
|
||||
@@ -226,17 +237,18 @@ public class InteractiveInterface {
|
||||
assert filename != null;
|
||||
Plate samplePlate;
|
||||
PlateFileWriter writer;
|
||||
if(exponential){
|
||||
samplePlate = new Plate(cells, cellFile, numWells, populations, dropOutRate, lambda, true);
|
||||
DistributionType type = BiGpairSEQ.getDistributionType();
|
||||
switch(type) {
|
||||
case POISSON -> {
|
||||
parameter = Math.sqrt(cells.getCellCount()); //gaussian with square root of elements approximates poisson
|
||||
samplePlate = new Plate(cells, cellFile, numWells, populations, dropOutRate, parameter);
|
||||
writer = new PlateFileWriter(filename, samplePlate);
|
||||
}
|
||||
else {
|
||||
if (poisson) {
|
||||
stdDev = Math.sqrt(cells.getCellCount()); //gaussian with square root of elements approximates poisson
|
||||
}
|
||||
samplePlate = new Plate(cells, cellFile, numWells, populations, dropOutRate, stdDev, false);
|
||||
default -> {
|
||||
samplePlate = new Plate(cells, cellFile, numWells, populations, dropOutRate, parameter);
|
||||
writer = new PlateFileWriter(filename, samplePlate);
|
||||
}
|
||||
}
|
||||
System.out.println("Writing Sample Plate to file");
|
||||
writer.writePlateFile();
|
||||
System.out.println("Sample Plate written to file: " + filename);
|
||||
@@ -605,12 +617,13 @@ public class InteractiveInterface {
|
||||
case 3 -> {
|
||||
BiGpairSEQ.setAuctionAlgorithm();
|
||||
System.out.println("MWM algorithm set to auction");
|
||||
backToOptions = true;
|
||||
}
|
||||
case 4 -> {
|
||||
System.out.println("Scaling integer weight MWM algorithm not yet fully implemented. Sorry.");
|
||||
// BiGpairSEQ.setIntegerWeightScalingAlgorithm();
|
||||
// System.out.println("MWM algorithm set to integer weight scaling algorithm of Duan and Su");
|
||||
backToOptions = true;
|
||||
// backToOptions = true;
|
||||
}
|
||||
case 0 -> backToOptions = true;
|
||||
default -> System.out.println("Invalid input");
|
||||
|
||||
@@ -13,6 +13,11 @@ TODO: Implement discrete frequency distributions using Vose's Alias Method
|
||||
*/
|
||||
|
||||
|
||||
import org.apache.commons.rng.UniformRandomProvider;
|
||||
import org.apache.commons.rng.core.BaseProvider;
|
||||
import org.apache.commons.rng.sampling.distribution.RejectionInversionZipfSampler;
|
||||
import org.apache.commons.rng.simple.JDKRandomWrapper;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
public class Plate {
|
||||
@@ -26,25 +31,22 @@ public class Plate {
|
||||
private Integer[] populations;
|
||||
private double stdDev;
|
||||
private double lambda;
|
||||
boolean exponential = false;
|
||||
private double zipfExponent;
|
||||
private DistributionType distributionType;
|
||||
|
||||
public Plate(CellSample cells, String cellFilename, int numWells, Integer[] populations,
|
||||
double dropoutRate, double stdDev_or_lambda, boolean exponential){
|
||||
double dropoutRate, double parameter){
|
||||
this.cells = cells;
|
||||
this.sourceFile = cellFilename;
|
||||
this.size = numWells;
|
||||
this.wells = new ArrayList<>();
|
||||
this.error = dropoutRate;
|
||||
this.populations = populations;
|
||||
this.exponential = exponential;
|
||||
if (this.exponential) {
|
||||
this.lambda = stdDev_or_lambda;
|
||||
fillWellsExponential(cells.getCells(), this.lambda);
|
||||
}
|
||||
else {
|
||||
this.stdDev = stdDev_or_lambda;
|
||||
fillWells(cells.getCells(), this.stdDev);
|
||||
}
|
||||
this.stdDev = parameter;
|
||||
this.lambda = parameter;
|
||||
this.zipfExponent = parameter;
|
||||
this.distributionType = BiGpairSEQ.getDistributionType();
|
||||
fillWells(cells.getCells());
|
||||
}
|
||||
|
||||
|
||||
@@ -85,9 +87,33 @@ public class Plate {
|
||||
}
|
||||
}
|
||||
|
||||
private void fillWellsZipf(List<String[]> cells, double exponent) {
|
||||
int numSections = populations.length;
|
||||
int section = 0;
|
||||
int n;
|
||||
RejectionInversionZipfSampler zipfSampler = new RejectionInversionZipfSampler(new JDKRandomWrapper(rand), cells.size(), exponent);
|
||||
while (section < numSections){
|
||||
for (int i = 0; i < (size / numSections); i++) {
|
||||
List<String[]> well = new ArrayList<>();
|
||||
for (int j = 0; j < populations[section]; j++) {
|
||||
do {
|
||||
n = zipfSampler.sample();
|
||||
} while (n >= cells.size() || n < 0);
|
||||
String[] cellToAdd = cells.get(n).clone();
|
||||
for(int k = 0; k < cellToAdd.length; k++){
|
||||
if(Math.abs(rand.nextDouble()) < error){//error applied to each sequence
|
||||
cellToAdd[k] = "-1";
|
||||
}
|
||||
}
|
||||
well.add(cellToAdd);
|
||||
}
|
||||
wells.add(well);
|
||||
}
|
||||
section++;
|
||||
}
|
||||
}
|
||||
|
||||
private void fillWellsExponential(List<String[]> cells, double lambda){
|
||||
this.lambda = lambda;
|
||||
exponential = true;
|
||||
int numSections = populations.length;
|
||||
int section = 0;
|
||||
double m;
|
||||
@@ -143,6 +169,24 @@ public class Plate {
|
||||
}
|
||||
}
|
||||
|
||||
private void fillWells(List<String[]> cells){
|
||||
DistributionType type = BiGpairSEQ.getDistributionType();
|
||||
switch (type) {
|
||||
case POISSON, GAUSSIAN -> {
|
||||
fillWells(cells, getStdDev());
|
||||
break;
|
||||
}
|
||||
case EXPONENTIAL -> {
|
||||
fillWellsExponential(cells, getLambda());
|
||||
break;
|
||||
}
|
||||
case ZIPF -> {
|
||||
fillWellsZipf(cells, getZipfExponent());
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public Integer[] getPopulations(){
|
||||
return populations;
|
||||
}
|
||||
@@ -155,10 +199,12 @@ public class Plate {
|
||||
return stdDev;
|
||||
}
|
||||
|
||||
public boolean isExponential(){return exponential;}
|
||||
public DistributionType getDistributionType() { return distributionType;}
|
||||
|
||||
public double getLambda(){return lambda;}
|
||||
|
||||
public double getZipfExponent(){return zipfExponent;}
|
||||
|
||||
public double getError() {
|
||||
return error;
|
||||
}
|
||||
|
||||
@@ -13,11 +13,13 @@ public class PlateFileWriter {
|
||||
private List<List<String[]>> wells;
|
||||
private double stdDev;
|
||||
private double lambda;
|
||||
private double zipfExponent;
|
||||
private DistributionType distributionType;
|
||||
private Double error;
|
||||
private String filename;
|
||||
private String sourceFileName;
|
||||
private Integer[] populations;
|
||||
private boolean isExponential = false;
|
||||
|
||||
|
||||
public PlateFileWriter(String filename, Plate plate) {
|
||||
if(!filename.matches(".*\\.csv")){
|
||||
@@ -26,12 +28,17 @@ public class PlateFileWriter {
|
||||
this.filename = filename;
|
||||
this.sourceFileName = plate.getSourceFileName();
|
||||
this.size = plate.getSize();
|
||||
this.isExponential = plate.isExponential();
|
||||
if(isExponential) {
|
||||
this.distributionType = plate.getDistributionType();
|
||||
switch(distributionType) {
|
||||
case POISSON, GAUSSIAN -> {
|
||||
this.stdDev = plate.getStdDev();
|
||||
}
|
||||
case EXPONENTIAL -> {
|
||||
this.lambda = plate.getLambda();
|
||||
}
|
||||
else{
|
||||
this.stdDev = plate.getStdDev();
|
||||
case ZIPF -> {
|
||||
this.zipfExponent = plate.getZipfExponent();
|
||||
}
|
||||
}
|
||||
this.error = plate.getError();
|
||||
this.wells = plate.getWells();
|
||||
@@ -95,11 +102,22 @@ public class PlateFileWriter {
|
||||
printer.printComment("Plate size: " + size);
|
||||
printer.printComment("Well populations: " + wellPopulationsString);
|
||||
printer.printComment("Error rate: " + error);
|
||||
if(isExponential){
|
||||
printer.printComment("Lambda: " + lambda);
|
||||
switch (distributionType) {
|
||||
case POISSON -> {
|
||||
printer.printComment("Cell frequency distribution: POISSON");
|
||||
}
|
||||
case GAUSSIAN -> {
|
||||
printer.printComment("Cell frequency distribution: GAUSSIAN");
|
||||
printer.printComment("--Standard deviation: " + stdDev);
|
||||
}
|
||||
case EXPONENTIAL -> {
|
||||
printer.printComment("Cell frequency distribution: EXPONENTIAL");
|
||||
printer.printComment("--Lambda: " + lambda);
|
||||
}
|
||||
case ZIPF -> {
|
||||
printer.printComment("Cell frequency distribution: ZIPF");
|
||||
printer.printComment("--Exponent: " + zipfExponent);
|
||||
}
|
||||
else {
|
||||
printer.printComment("Std. dev.: " + stdDev);
|
||||
}
|
||||
printer.printRecords(wellsAsStrings);
|
||||
} catch(IOException ex){
|
||||
|
||||
Reference in New Issue
Block a user