diff --git a/.idea/.name b/.idea/.name
new file mode 100644
index 0000000..54e592b
--- /dev/null
+++ b/.idea/.name
@@ -0,0 +1 @@
+BiGpairSEQ
\ No newline at end of file
diff --git a/.idea/libraries/commons_rng_1.xml b/.idea/libraries/commons_rng_1.xml
new file mode 100644
index 0000000..8523868
--- /dev/null
+++ b/.idea/libraries/commons_rng_1.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/src/main/java/BiGpairSEQ.java b/src/main/java/BiGpairSEQ.java
index 17aad88..674e9d2 100644
--- a/src/main/java/BiGpairSEQ.java
+++ b/src/main/java/BiGpairSEQ.java
@@ -15,6 +15,7 @@ public class BiGpairSEQ {
private static boolean cacheGraph = false;
private static AlgorithmType matchingAlgoritmType = AlgorithmType.HUNGARIAN;
private static HeapType priorityQueueHeapType = HeapType.PAIRING;
+ private static DistributionType distributionType = DistributionType.ZIPF;
private static boolean outputBinary = true;
private static boolean outputGraphML = false;
private static boolean calculatePValue = false;
@@ -60,6 +61,10 @@ public class BiGpairSEQ {
return cellFilename;
}
+ public static DistributionType getDistributionType() {return distributionType;}
+
+ public static void setDistributionType(DistributionType type) {distributionType = type;}
+
public static Plate getPlateInMemory() {
return plateInMemory;
}
diff --git a/src/main/java/CommandLineInterface.java b/src/main/java/CommandLineInterface.java
index b6ee0fe..18a3fbe 100644
--- a/src/main/java/CommandLineInterface.java
+++ b/src/main/java/CommandLineInterface.java
@@ -123,16 +123,20 @@ public class CommandLineInterface {
Plate plate;
if (line.hasOption("poisson")) {
Double stdDev = Math.sqrt(numWells);
- plate = new Plate(cells, cellFilename, numWells, populations, dropoutRate, stdDev, false);
+ plate = new Plate(cells, cellFilename, numWells, populations, dropoutRate, stdDev);
}
else if (line.hasOption("gaussian")) {
Double stdDev = Double.parseDouble(line.getOptionValue("stddev"));
- plate = new Plate(cells, cellFilename, numWells, populations, dropoutRate, stdDev, false);
+ plate = new Plate(cells, cellFilename, numWells, populations, dropoutRate, stdDev);
+ }
+ else if (line.hasOption("zipf")) {
+ Double zipfExponent = Double.parseDouble(line.getOptionValue("exp"));
+ plate = new Plate(cells, cellFilename, numWells, populations, dropoutRate, zipfExponent);
}
else {
assert line.hasOption("exponential");
Double lambda = Double.parseDouble(line.getOptionValue("lambda"));
- plate = new Plate(cells, cellFilename, numWells, populations, dropoutRate, lambda, true);
+ plate = new Plate(cells, cellFilename, numWells, populations, dropoutRate, lambda);
}
PlateFileWriter writer = new PlateFileWriter(outputFilename, plate);
writer.writePlateFile();
@@ -340,9 +344,13 @@ public class CommandLineInterface {
Option exponential = Option.builder("exponential")
.desc("Use an exponential distribution for cell sample")
.build();
+ Option zipf = Option.builder("zipf")
+ .desc("Use a Zipf distribution for cell sample")
+ .build();
distributions.addOption(poisson);
distributions.addOption(gaussian);
distributions.addOption(exponential);
+ distributions.addOption(zipf);
//options group for statistical distribution parameters
OptionGroup statParams = new OptionGroup();// add this to plate options
Option stdDev = Option.builder("stddev")
@@ -355,6 +363,11 @@ public class CommandLineInterface {
.hasArg()
.argName("value")
.build();
+ Option zipfExponent = Option.builder("exp")
+ .desc("If using -zipf flag, exponent value for distribution")
+ .hasArg()
+ .argName("value")
+ .build();
statParams.addOption(stdDev);
statParams.addOption(lambda);
//Option group for random plate or set populations
diff --git a/src/main/java/DistributionType.java b/src/main/java/DistributionType.java
new file mode 100644
index 0000000..a0272ab
--- /dev/null
+++ b/src/main/java/DistributionType.java
@@ -0,0 +1,6 @@
+public enum DistributionType {
+ POISSON,
+ GAUSSIAN,
+ EXPONENTIAL,
+ ZIPF
+}
diff --git a/src/main/java/InteractiveInterface.java b/src/main/java/InteractiveInterface.java
index 57f1358..027d0b7 100644
--- a/src/main/java/InteractiveInterface.java
+++ b/src/main/java/InteractiveInterface.java
@@ -89,14 +89,12 @@ public class InteractiveInterface {
private static void makePlate() {
String cellFile = null;
String filename = null;
- Double stdDev = 0.0;
+ Double parameter = 0.0;
Integer numWells = 0;
Integer numSections;
Integer[] populations = {1};
Double dropOutRate = 0.0;
- boolean poisson = false;
- boolean exponential = false;
- double lambda = 1.5;
+;
try {
System.out.println("\nSimulated sample plates consist of:");
System.out.println("* a number of wells");
@@ -114,33 +112,46 @@ public class InteractiveInterface {
System.out.println("1) Poisson");
System.out.println("2) Gaussian");
System.out.println("3) Exponential");
-// System.out.println("(Note: approximate distribution in original paper is exponential, lambda = 0.6)");
-// System.out.println("(lambda value approximated from slope of log-log graph in figure 4c)");
+ System.out.println("4) Zipf");
+
System.out.println("(Note: wider distributions are more memory intensive to match)");
System.out.print("Enter selection value: ");
input = sc.nextInt();
switch (input) {
- case 1 -> poisson = true;
+ case 1 -> {
+ BiGpairSEQ.setDistributionType(DistributionType.POISSON);
+ }
case 2 -> {
+ BiGpairSEQ.setDistributionType(DistributionType.GAUSSIAN);
System.out.println("How many distinct T-cells within one standard deviation of peak frequency?");
System.out.println("(Note: wider distributions are more memory intensive to match)");
- stdDev = sc.nextDouble();
- if (stdDev <= 0.0) {
+ parameter = sc.nextDouble();
+ if (parameter <= 0.0) {
throw new InputMismatchException("Value must be positive.");
}
}
case 3 -> {
- exponential = true;
+ BiGpairSEQ.setDistributionType(DistributionType.EXPONENTIAL);
System.out.print("Please enter lambda value for exponential distribution: ");
- lambda = sc.nextDouble();
- if (lambda <= 0.0) {
- lambda = 0.6;
- System.out.println("Value must be positive. Defaulting to 0.6.");
+ parameter = sc.nextDouble();
+ if (parameter <= 0.0) {
+ parameter = 1.4;
+ System.out.println("Value must be positive. Defaulting to 1.4.");
+ }
+ }
+ case 4 -> {
+ BiGpairSEQ.setDistributionType(DistributionType.ZIPF);
+ System.out.print("Please enter exponent value for Zipf distribution: ");
+ parameter = sc.nextDouble();
+ if (parameter <= 0.0) {
+ parameter = 1.4;
+ System.out.println("Value must be positive. Defaulting to 1.4.");
}
}
default -> {
System.out.println("Invalid input. Defaulting to exponential.");
- exponential = true;
+ parameter = 1.4;
+ BiGpairSEQ.setDistributionType(DistributionType.EXPONENTIAL);
}
}
System.out.print("\nNumber of wells on plate: ");
@@ -226,16 +237,17 @@ public class InteractiveInterface {
assert filename != null;
Plate samplePlate;
PlateFileWriter writer;
- if(exponential){
- samplePlate = new Plate(cells, cellFile, numWells, populations, dropOutRate, lambda, true);
- writer = new PlateFileWriter(filename, samplePlate);
- }
- else {
- if (poisson) {
- stdDev = Math.sqrt(cells.getCellCount()); //gaussian with square root of elements approximates poisson
+ DistributionType type = BiGpairSEQ.getDistributionType();
+ switch(type) {
+ case POISSON -> {
+ parameter = Math.sqrt(cells.getCellCount()); //gaussian with square root of elements approximates poisson
+ samplePlate = new Plate(cells, cellFile, numWells, populations, dropOutRate, parameter);
+ writer = new PlateFileWriter(filename, samplePlate);
+ }
+ default -> {
+ samplePlate = new Plate(cells, cellFile, numWells, populations, dropOutRate, parameter);
+ writer = new PlateFileWriter(filename, samplePlate);
}
- samplePlate = new Plate(cells, cellFile, numWells, populations, dropOutRate, stdDev, false);
- writer = new PlateFileWriter(filename, samplePlate);
}
System.out.println("Writing Sample Plate to file");
writer.writePlateFile();
@@ -605,12 +617,13 @@ public class InteractiveInterface {
case 3 -> {
BiGpairSEQ.setAuctionAlgorithm();
System.out.println("MWM algorithm set to auction");
+ backToOptions = true;
}
case 4 -> {
System.out.println("Scaling integer weight MWM algorithm not yet fully implemented. Sorry.");
// BiGpairSEQ.setIntegerWeightScalingAlgorithm();
// System.out.println("MWM algorithm set to integer weight scaling algorithm of Duan and Su");
- backToOptions = true;
+// backToOptions = true;
}
case 0 -> backToOptions = true;
default -> System.out.println("Invalid input");
diff --git a/src/main/java/Plate.java b/src/main/java/Plate.java
index d88d7ee..da8ba6b 100644
--- a/src/main/java/Plate.java
+++ b/src/main/java/Plate.java
@@ -13,6 +13,11 @@ TODO: Implement discrete frequency distributions using Vose's Alias Method
*/
+import org.apache.commons.rng.UniformRandomProvider;
+import org.apache.commons.rng.core.BaseProvider;
+import org.apache.commons.rng.sampling.distribution.RejectionInversionZipfSampler;
+import org.apache.commons.rng.simple.JDKRandomWrapper;
+
import java.util.*;
public class Plate {
@@ -26,25 +31,22 @@ public class Plate {
private Integer[] populations;
private double stdDev;
private double lambda;
- boolean exponential = false;
+ private double zipfExponent;
+ private DistributionType distributionType;
public Plate(CellSample cells, String cellFilename, int numWells, Integer[] populations,
- double dropoutRate, double stdDev_or_lambda, boolean exponential){
+ double dropoutRate, double parameter){
this.cells = cells;
this.sourceFile = cellFilename;
this.size = numWells;
this.wells = new ArrayList<>();
this.error = dropoutRate;
this.populations = populations;
- this.exponential = exponential;
- if (this.exponential) {
- this.lambda = stdDev_or_lambda;
- fillWellsExponential(cells.getCells(), this.lambda);
- }
- else {
- this.stdDev = stdDev_or_lambda;
- fillWells(cells.getCells(), this.stdDev);
- }
+ this.stdDev = parameter;
+ this.lambda = parameter;
+ this.zipfExponent = parameter;
+ this.distributionType = BiGpairSEQ.getDistributionType();
+ fillWells(cells.getCells());
}
@@ -85,9 +87,33 @@ public class Plate {
}
}
+ private void fillWellsZipf(List cells, double exponent) {
+ int numSections = populations.length;
+ int section = 0;
+ int n;
+ RejectionInversionZipfSampler zipfSampler = new RejectionInversionZipfSampler(new JDKRandomWrapper(rand), cells.size(), exponent);
+ while (section < numSections){
+ for (int i = 0; i < (size / numSections); i++) {
+ List well = new ArrayList<>();
+ for (int j = 0; j < populations[section]; j++) {
+ do {
+ n = zipfSampler.sample();
+ } while (n >= cells.size() || n < 0);
+ String[] cellToAdd = cells.get(n).clone();
+ for(int k = 0; k < cellToAdd.length; k++){
+ if(Math.abs(rand.nextDouble()) < error){//error applied to each sequence
+ cellToAdd[k] = "-1";
+ }
+ }
+ well.add(cellToAdd);
+ }
+ wells.add(well);
+ }
+ section++;
+ }
+ }
+
private void fillWellsExponential(List cells, double lambda){
- this.lambda = lambda;
- exponential = true;
int numSections = populations.length;
int section = 0;
double m;
@@ -143,6 +169,24 @@ public class Plate {
}
}
+ private void fillWells(List cells){
+ DistributionType type = BiGpairSEQ.getDistributionType();
+ switch (type) {
+ case POISSON, GAUSSIAN -> {
+ fillWells(cells, getStdDev());
+ break;
+ }
+ case EXPONENTIAL -> {
+ fillWellsExponential(cells, getLambda());
+ break;
+ }
+ case ZIPF -> {
+ fillWellsZipf(cells, getZipfExponent());
+ break;
+ }
+ }
+ }
+
public Integer[] getPopulations(){
return populations;
}
@@ -155,10 +199,12 @@ public class Plate {
return stdDev;
}
- public boolean isExponential(){return exponential;}
+ public DistributionType getDistributionType() { return distributionType;}
public double getLambda(){return lambda;}
+ public double getZipfExponent(){return zipfExponent;}
+
public double getError() {
return error;
}
diff --git a/src/main/java/PlateFileWriter.java b/src/main/java/PlateFileWriter.java
index 191ce0e..258e9c7 100644
--- a/src/main/java/PlateFileWriter.java
+++ b/src/main/java/PlateFileWriter.java
@@ -13,11 +13,13 @@ public class PlateFileWriter {
private List> wells;
private double stdDev;
private double lambda;
+ private double zipfExponent;
+ private DistributionType distributionType;
private Double error;
private String filename;
private String sourceFileName;
private Integer[] populations;
- private boolean isExponential = false;
+
public PlateFileWriter(String filename, Plate plate) {
if(!filename.matches(".*\\.csv")){
@@ -26,12 +28,17 @@ public class PlateFileWriter {
this.filename = filename;
this.sourceFileName = plate.getSourceFileName();
this.size = plate.getSize();
- this.isExponential = plate.isExponential();
- if(isExponential) {
- this.lambda = plate.getLambda();
- }
- else{
- this.stdDev = plate.getStdDev();
+ this.distributionType = plate.getDistributionType();
+ switch(distributionType) {
+ case POISSON, GAUSSIAN -> {
+ this.stdDev = plate.getStdDev();
+ }
+ case EXPONENTIAL -> {
+ this.lambda = plate.getLambda();
+ }
+ case ZIPF -> {
+ this.zipfExponent = plate.getZipfExponent();
+ }
}
this.error = plate.getError();
this.wells = plate.getWells();
@@ -95,11 +102,22 @@ public class PlateFileWriter {
printer.printComment("Plate size: " + size);
printer.printComment("Well populations: " + wellPopulationsString);
printer.printComment("Error rate: " + error);
- if(isExponential){
- printer.printComment("Lambda: " + lambda);
- }
- else {
- printer.printComment("Std. dev.: " + stdDev);
+ switch (distributionType) {
+ case POISSON -> {
+ printer.printComment("Cell frequency distribution: POISSON");
+ }
+ case GAUSSIAN -> {
+ printer.printComment("Cell frequency distribution: GAUSSIAN");
+ printer.printComment("--Standard deviation: " + stdDev);
+ }
+ case EXPONENTIAL -> {
+ printer.printComment("Cell frequency distribution: EXPONENTIAL");
+ printer.printComment("--Lambda: " + lambda);
+ }
+ case ZIPF -> {
+ printer.printComment("Cell frequency distribution: ZIPF");
+ printer.printComment("--Exponent: " + zipfExponent);
+ }
}
printer.printRecords(wellsAsStrings);
} catch(IOException ex){