Compare commits
13 Commits
161a52aa89
...
v4.4
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d1810c453d | ||
|
|
187401f2d6 | ||
|
|
678ce99424 | ||
|
|
c21e375303 | ||
|
|
57fe9c1619 | ||
|
|
e1888a99c6 | ||
|
|
bcf5a4c749 | ||
|
|
81d8a12765 | ||
|
|
b5c0568e22 | ||
|
|
b7597cff2a | ||
|
|
7bbeaf7dad | ||
|
|
945b967382 | ||
|
|
a43ee469ea |
1
.idea/.name
generated
Normal file
1
.idea/.name
generated
Normal file
@@ -0,0 +1 @@
|
||||
BiGpairSEQ
|
||||
27
.idea/artifacts/BiGpairSEQ_Sim_jar.xml
generated
27
.idea/artifacts/BiGpairSEQ_Sim_jar.xml
generated
@@ -1,16 +1,27 @@
|
||||
<component name="ArtifactManager">
|
||||
<artifact type="jar" build-on-make="true" name="BiGpairSEQ_Sim:jar">
|
||||
<artifact type="jar" name="BiGpairSEQ_Sim:jar">
|
||||
<output-path>$PROJECT_DIR$/out/artifacts/BiGpairSEQ_Sim_jar</output-path>
|
||||
<root id="archive" name="BiGpairSEQ_Sim.jar">
|
||||
<element id="directory" name="META-INF">
|
||||
<element id="file-copy" path="$PROJECT_DIR$/src/main/java/META-INF/MANIFEST.MF" />
|
||||
<element id="file-copy" path="$PROJECT_DIR$/META-INF/MANIFEST.MF" />
|
||||
</element>
|
||||
<element id="module-output" name="BigPairSEQ" />
|
||||
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/jgrapht/jgrapht-core/1.5.1/jgrapht-core-1.5.1.jar" path-in-jar="/" />
|
||||
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/jheaps/jheaps/0.13/jheaps-0.13.jar" path-in-jar="/" />
|
||||
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/commons-cli/commons-cli/1.5.0/commons-cli-1.5.0.jar" path-in-jar="/" />
|
||||
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/apache/commons/commons-csv/1.9.0/commons-csv-1.9.0.jar" path-in-jar="/" />
|
||||
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/jetbrains/annotations/23.0.0/annotations-23.0.0.jar" path-in-jar="/" />
|
||||
<element id="module-output" name="BiGpairSEQ_Sim" />
|
||||
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/jgrapht/jgrapht-core/1.5.2/jgrapht-core-1.5.2.jar" path-in-jar="/" />
|
||||
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/apache/commons/commons-rng-sampling/1.6/commons-rng-sampling-1.6.jar" path-in-jar="/" />
|
||||
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/apache/commons/commons-csv/1.14.0/commons-csv-1.14.0.jar" path-in-jar="/" />
|
||||
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/jetbrains/annotations/26.0.2/annotations-26.0.2.jar" path-in-jar="/" />
|
||||
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/jgrapht/jgrapht-io/1.5.2/jgrapht-io-1.5.2.jar" path-in-jar="/" />
|
||||
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/apache/commons/commons-rng-simple/1.6/commons-rng-simple-1.6.jar" path-in-jar="/" />
|
||||
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/commons-io/commons-io/2.18.0/commons-io-2.18.0.jar" path-in-jar="/" />
|
||||
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/apache/commons/commons-rng-core/1.6/commons-rng-core-1.6.jar" path-in-jar="/" />
|
||||
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/commons-codec/commons-codec/1.18.0/commons-codec-1.18.0.jar" path-in-jar="/" />
|
||||
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/apache/commons/commons-rng-client-api/1.6/commons-rng-client-api-1.6.jar" path-in-jar="/" />
|
||||
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/commons-cli/commons-cli/1.9.0/commons-cli-1.9.0.jar" path-in-jar="/" />
|
||||
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/apache/commons/commons-lang3/3.12.0/commons-lang3-3.12.0.jar" path-in-jar="/" />
|
||||
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/antlr/antlr4-runtime/4.12.0/antlr4-runtime-4.12.0.jar" path-in-jar="/" />
|
||||
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/apfloat/apfloat/1.10.1/apfloat-1.10.1.jar" path-in-jar="/" />
|
||||
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/apache/commons/commons-text/1.10.0/commons-text-1.10.0.jar" path-in-jar="/" />
|
||||
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/jheaps/jheaps/0.14/jheaps-0.14.jar" path-in-jar="/" />
|
||||
</root>
|
||||
</artifact>
|
||||
</component>
|
||||
1
.idea/compiler.xml
generated
1
.idea/compiler.xml
generated
@@ -7,6 +7,7 @@
|
||||
<sourceTestOutputDir name="target/generated-test-sources/test-annotations" />
|
||||
<outputRelativeToContentRoot value="true" />
|
||||
<module name="BigPairSEQ" />
|
||||
<module name="BiGpairSEQ_Sim" />
|
||||
</profile>
|
||||
</annotationProcessing>
|
||||
</component>
|
||||
|
||||
25
.idea/jarRepositories.xml
generated
25
.idea/jarRepositories.xml
generated
@@ -1,20 +1,35 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="RemoteRepositoriesConfiguration">
|
||||
<remote-repository>
|
||||
<option name="id" value="my-internal-site" />
|
||||
<option name="name" value="my-internal-site" />
|
||||
<option name="url" value="https://myserver/repo" />
|
||||
</remote-repository>
|
||||
<remote-repository>
|
||||
<option name="id" value="central" />
|
||||
<option name="name" value="Central Repository" />
|
||||
<option name="url" value="https://repo1.maven.org/maven2" />
|
||||
</remote-repository>
|
||||
<remote-repository>
|
||||
<option name="id" value="central repo" />
|
||||
<option name="name" value="central repo" />
|
||||
<option name="url" value="https://repo1.maven.org/maven2/" />
|
||||
</remote-repository>
|
||||
<remote-repository>
|
||||
<option name="id" value="central" />
|
||||
<option name="name" value="Central Repository" />
|
||||
<option name="url" value="https://repo.maven.apache.org/maven2" />
|
||||
</remote-repository>
|
||||
<remote-repository>
|
||||
<option name="id" value="central" />
|
||||
<option name="name" value="Maven Central repository" />
|
||||
<option name="url" value="https://repo1.maven.org/maven2" />
|
||||
</remote-repository>
|
||||
<remote-repository>
|
||||
<option name="id" value="jboss.community" />
|
||||
<option name="name" value="JBoss Community repository" />
|
||||
<option name="url" value="https://repository.jboss.org/nexus/content/repositories/public/" />
|
||||
</remote-repository>
|
||||
<remote-repository>
|
||||
<option name="id" value="34d16bdc-85f0-48ee-8e8b-144091765be1" />
|
||||
<option name="name" value="34d16bdc-85f0-48ee-8e8b-144091765be1" />
|
||||
<option name="url" value="https://repository.mulesoft.org/nexus/content/repositories/public/" />
|
||||
</remote-repository>
|
||||
</component>
|
||||
</project>
|
||||
6
.idea/libraries/apache_commons_csv.xml
generated
6
.idea/libraries/apache_commons_csv.xml
generated
@@ -1,8 +1,10 @@
|
||||
<component name="libraryTable">
|
||||
<library name="apache.commons.csv" type="repository">
|
||||
<properties maven-id="org.apache.commons:commons-csv:1.9.0" />
|
||||
<properties maven-id="org.apache.commons:commons-csv:1.14.0" />
|
||||
<CLASSES>
|
||||
<root url="jar://$MAVEN_REPOSITORY$/org/apache/commons/commons-csv/1.9.0/commons-csv-1.9.0.jar!/" />
|
||||
<root url="jar://$MAVEN_REPOSITORY$/org/apache/commons/commons-csv/1.14.0/commons-csv-1.14.0.jar!/" />
|
||||
<root url="jar://$MAVEN_REPOSITORY$/commons-io/commons-io/2.18.0/commons-io-2.18.0.jar!/" />
|
||||
<root url="jar://$MAVEN_REPOSITORY$/commons-codec/commons-codec/1.18.0/commons-codec-1.18.0.jar!/" />
|
||||
</CLASSES>
|
||||
<JAVADOC />
|
||||
<SOURCES />
|
||||
|
||||
4
.idea/libraries/commons_cli.xml
generated
4
.idea/libraries/commons_cli.xml
generated
@@ -1,8 +1,8 @@
|
||||
<component name="libraryTable">
|
||||
<library name="commons.cli" type="repository">
|
||||
<properties maven-id="commons-cli:commons-cli:1.5.0" />
|
||||
<properties maven-id="commons-cli:commons-cli:1.9.0" />
|
||||
<CLASSES>
|
||||
<root url="jar://$MAVEN_REPOSITORY$/commons-cli/commons-cli/1.5.0/commons-cli-1.5.0.jar!/" />
|
||||
<root url="jar://$MAVEN_REPOSITORY$/commons-cli/commons-cli/1.9.0/commons-cli-1.9.0.jar!/" />
|
||||
</CLASSES>
|
||||
<JAVADOC />
|
||||
<SOURCES />
|
||||
|
||||
7
.idea/libraries/jgrapht_core.xml
generated
7
.idea/libraries/jgrapht_core.xml
generated
@@ -1,9 +1,10 @@
|
||||
<component name="libraryTable">
|
||||
<library name="jgrapht.core" type="repository">
|
||||
<properties maven-id="org.jgrapht:jgrapht-core:1.5.1" />
|
||||
<properties maven-id="org.jgrapht:jgrapht-core:1.5.2" />
|
||||
<CLASSES>
|
||||
<root url="jar://$MAVEN_REPOSITORY$/org/jgrapht/jgrapht-core/1.5.1/jgrapht-core-1.5.1.jar!/" />
|
||||
<root url="jar://$MAVEN_REPOSITORY$/org/jheaps/jheaps/0.13/jheaps-0.13.jar!/" />
|
||||
<root url="jar://$MAVEN_REPOSITORY$/org/jgrapht/jgrapht-core/1.5.2/jgrapht-core-1.5.2.jar!/" />
|
||||
<root url="jar://$MAVEN_REPOSITORY$/org/jheaps/jheaps/0.14/jheaps-0.14.jar!/" />
|
||||
<root url="jar://$MAVEN_REPOSITORY$/org/apfloat/apfloat/1.10.1/apfloat-1.10.1.jar!/" />
|
||||
</CLASSES>
|
||||
<JAVADOC />
|
||||
<SOURCES />
|
||||
|
||||
15
.idea/libraries/jgrapht_io.xml
generated
15
.idea/libraries/jgrapht_io.xml
generated
@@ -1,13 +1,14 @@
|
||||
<component name="libraryTable">
|
||||
<library name="jgrapht.io" type="repository">
|
||||
<properties maven-id="org.jgrapht:jgrapht-io:1.5.1" />
|
||||
<properties maven-id="org.jgrapht:jgrapht-io:1.5.2" />
|
||||
<CLASSES>
|
||||
<root url="jar://$MAVEN_REPOSITORY$/org/jgrapht/jgrapht-io/1.5.1/jgrapht-io-1.5.1.jar!/" />
|
||||
<root url="jar://$MAVEN_REPOSITORY$/org/jgrapht/jgrapht-core/1.5.1/jgrapht-core-1.5.1.jar!/" />
|
||||
<root url="jar://$MAVEN_REPOSITORY$/org/jheaps/jheaps/0.13/jheaps-0.13.jar!/" />
|
||||
<root url="jar://$MAVEN_REPOSITORY$/org/antlr/antlr4-runtime/4.8-1/antlr4-runtime-4.8-1.jar!/" />
|
||||
<root url="jar://$MAVEN_REPOSITORY$/org/apache/commons/commons-text/1.8/commons-text-1.8.jar!/" />
|
||||
<root url="jar://$MAVEN_REPOSITORY$/org/apache/commons/commons-lang3/3.9/commons-lang3-3.9.jar!/" />
|
||||
<root url="jar://$MAVEN_REPOSITORY$/org/jgrapht/jgrapht-io/1.5.2/jgrapht-io-1.5.2.jar!/" />
|
||||
<root url="jar://$MAVEN_REPOSITORY$/org/jgrapht/jgrapht-core/1.5.2/jgrapht-core-1.5.2.jar!/" />
|
||||
<root url="jar://$MAVEN_REPOSITORY$/org/jheaps/jheaps/0.14/jheaps-0.14.jar!/" />
|
||||
<root url="jar://$MAVEN_REPOSITORY$/org/apfloat/apfloat/1.10.1/apfloat-1.10.1.jar!/" />
|
||||
<root url="jar://$MAVEN_REPOSITORY$/org/antlr/antlr4-runtime/4.12.0/antlr4-runtime-4.12.0.jar!/" />
|
||||
<root url="jar://$MAVEN_REPOSITORY$/org/apache/commons/commons-text/1.10.0/commons-text-1.10.0.jar!/" />
|
||||
<root url="jar://$MAVEN_REPOSITORY$/org/apache/commons/commons-lang3/3.12.0/commons-lang3-3.12.0.jar!/" />
|
||||
</CLASSES>
|
||||
<JAVADOC />
|
||||
<SOURCES />
|
||||
|
||||
44
pom.xml
44
pom.xml
@@ -5,7 +5,7 @@
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<groupId>org.example</groupId>
|
||||
<artifactId>TCellSim</artifactId>
|
||||
<artifactId>BiGpairSEQ_Sim</artifactId>
|
||||
<version>1.0-SNAPSHOT</version>
|
||||
<build>
|
||||
<plugins>
|
||||
@@ -26,8 +26,48 @@
|
||||
<version>RELEASE</version>
|
||||
<scope>compile</scope>
|
||||
</dependency>
|
||||
<!-- https://mvnrepository.com/artifact/org.apache.commons/commons-rng-simple -->
|
||||
<dependency>
|
||||
<groupId>org.apache.commons</groupId>
|
||||
<artifactId>commons-rng-simple</artifactId>
|
||||
<version>1.6</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.commons</groupId>
|
||||
<artifactId>commons-rng-sampling</artifactId>
|
||||
<version>1.6</version>
|
||||
</dependency>
|
||||
<!-- https://mvnrepository.com/artifact/org.apache.commons/commons-csv -->
|
||||
<dependency>
|
||||
<groupId>org.apache.commons</groupId>
|
||||
<artifactId>commons-csv</artifactId>
|
||||
<version>1.14.0</version>
|
||||
</dependency>
|
||||
<!-- https://mvnrepository.com/artifact/org.jgrapht/jgrapht-core -->
|
||||
<dependency>
|
||||
<groupId>org.jgrapht</groupId>
|
||||
<artifactId>jgrapht-core</artifactId>
|
||||
<version>1.5.2</version>
|
||||
</dependency>
|
||||
<!-- https://mvnrepository.com/artifact/org.jgrapht/jgrapht-io -->
|
||||
<dependency>
|
||||
<groupId>org.jgrapht</groupId>
|
||||
<artifactId>jgrapht-io</artifactId>
|
||||
<version>1.5.2</version>
|
||||
</dependency>
|
||||
<!-- https://mvnrepository.com/artifact/org.jheaps/jheaps -->
|
||||
<dependency>
|
||||
<groupId>org.jheaps</groupId>
|
||||
<artifactId>jheaps</artifactId>
|
||||
<version>0.14</version>
|
||||
</dependency>
|
||||
<!-- https://mvnrepository.com/artifact/commons-cli/commons-cli -->
|
||||
<dependency>
|
||||
<groupId>commons-cli</groupId>
|
||||
<artifactId>commons-cli</artifactId>
|
||||
<version>1.9.0</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<properties>
|
||||
<maven.compiler.source>11</maven.compiler.source>
|
||||
<maven.compiler.target>11</maven.compiler.target>
|
||||
|
||||
@@ -156,6 +156,8 @@ usage: BiGpairSEQ_Sim.jar -plate
|
||||
-c,--cell-file <filename> The cell sample file to use
|
||||
-d,--dropout-rate <rate> The sequence dropout rate due to
|
||||
amplification error. (0.0 - 1.0)
|
||||
-exp <value> If using -zipf flag, exponent value for
|
||||
distribution
|
||||
-exponential Use an exponential distribution for cell
|
||||
sample
|
||||
-gaussian Use a Gaussian distribution for cell sample
|
||||
@@ -173,6 +175,7 @@ usage: BiGpairSEQ_Sim.jar -plate
|
||||
-stddev <value> If using -gaussian flag, standard deviation
|
||||
for distrbution
|
||||
-w,--wells <number> The number of wells on the sample plate
|
||||
-zipf Use a Zipf distribution for cell sample
|
||||
|
||||
usage: BiGpairSEQ_Sim.jar -graph
|
||||
-c,--cell-file <filename> Cell sample file to use for
|
||||
@@ -234,7 +237,6 @@ usage: BiGpairSEQ_Sim.jar -match
|
||||
to stdout.
|
||||
-pv,--p-value (Optional) Calculate p-values for sequence
|
||||
pairs.
|
||||
|
||||
```
|
||||
|
||||
### INTERACTIVE INTERFACE
|
||||
@@ -340,6 +342,8 @@ Options when making a Sample Plate file:
|
||||
* Standard deviation size
|
||||
* Exponential
|
||||
* Lambda value
|
||||
* Zipf
|
||||
* Exponent value
|
||||
* Total number of wells on the plate
|
||||
* Well populations random or fixed
|
||||
* If random, minimum and maximum population sizes
|
||||
@@ -630,6 +634,7 @@ a means of exploring some very beautiful math.
|
||||
|
||||
## TODO
|
||||
|
||||
* Update CLI option text in this readme to include Zipf distribution options
|
||||
* ~~Try invoking GC at end of workloads to reduce paging to disk~~ DONE
|
||||
* ~~Hold graph data in memory until another graph is read-in? ABANDONED UNABANDONED~~ DONE
|
||||
* ~~*No, this won't work, because BiGpairSEQ simulations alter the underlying graph based on filtering constraints. Changes would cascade with multiple experiments.*~~
|
||||
|
||||
@@ -13,8 +13,9 @@ public class BiGpairSEQ {
|
||||
private static boolean cacheCells = false;
|
||||
private static boolean cachePlate = false;
|
||||
private static boolean cacheGraph = false;
|
||||
private static AlgorithmType matchingAlgoritmType = AlgorithmType.HUNGARIAN;
|
||||
private static AlgorithmType matchingAlgorithmType = AlgorithmType.HUNGARIAN;
|
||||
private static HeapType priorityQueueHeapType = HeapType.PAIRING;
|
||||
private static DistributionType distributionType = DistributionType.ZIPF;
|
||||
private static boolean outputBinary = true;
|
||||
private static boolean outputGraphML = false;
|
||||
private static boolean calculatePValue = false;
|
||||
@@ -60,6 +61,10 @@ public class BiGpairSEQ {
|
||||
return cellFilename;
|
||||
}
|
||||
|
||||
public static DistributionType getDistributionType() {return distributionType;}
|
||||
|
||||
public static void setDistributionType(DistributionType type) {distributionType = type;}
|
||||
|
||||
public static Plate getPlateInMemory() {
|
||||
return plateInMemory;
|
||||
}
|
||||
@@ -161,13 +166,13 @@ public class BiGpairSEQ {
|
||||
return priorityQueueHeapType;
|
||||
}
|
||||
|
||||
public static AlgorithmType getMatchingAlgoritmType() { return matchingAlgoritmType; }
|
||||
public static AlgorithmType getMatchingAlgorithmType() { return matchingAlgorithmType; }
|
||||
|
||||
public static void setHungarianAlgorithm() { matchingAlgoritmType = AlgorithmType.HUNGARIAN; }
|
||||
public static void setHungarianAlgorithm() { matchingAlgorithmType = AlgorithmType.HUNGARIAN; }
|
||||
|
||||
public static void setIntegerWeightScalingAlgorithm() { matchingAlgoritmType = AlgorithmType.INTEGER_WEIGHT_SCALING; }
|
||||
public static void setIntegerWeightScalingAlgorithm() { matchingAlgorithmType = AlgorithmType.INTEGER_WEIGHT_SCALING; }
|
||||
|
||||
public static void setAuctionAlgorithm() { matchingAlgoritmType = AlgorithmType.AUCTION; }
|
||||
public static void setAuctionAlgorithm() { matchingAlgorithmType = AlgorithmType.AUCTION; }
|
||||
|
||||
public static void setPairingHeap() {
|
||||
priorityQueueHeapType = HeapType.PAIRING;
|
||||
|
||||
@@ -123,16 +123,20 @@ public class CommandLineInterface {
|
||||
Plate plate;
|
||||
if (line.hasOption("poisson")) {
|
||||
Double stdDev = Math.sqrt(numWells);
|
||||
plate = new Plate(cells, cellFilename, numWells, populations, dropoutRate, stdDev, false);
|
||||
plate = new Plate(cells, cellFilename, numWells, populations, dropoutRate, stdDev);
|
||||
}
|
||||
else if (line.hasOption("gaussian")) {
|
||||
Double stdDev = Double.parseDouble(line.getOptionValue("stddev"));
|
||||
plate = new Plate(cells, cellFilename, numWells, populations, dropoutRate, stdDev, false);
|
||||
plate = new Plate(cells, cellFilename, numWells, populations, dropoutRate, stdDev);
|
||||
}
|
||||
else if (line.hasOption("zipf")) {
|
||||
Double zipfExponent = Double.parseDouble(line.getOptionValue("exp"));
|
||||
plate = new Plate(cells, cellFilename, numWells, populations, dropoutRate, zipfExponent);
|
||||
}
|
||||
else {
|
||||
assert line.hasOption("exponential");
|
||||
Double lambda = Double.parseDouble(line.getOptionValue("lambda"));
|
||||
plate = new Plate(cells, cellFilename, numWells, populations, dropoutRate, lambda, true);
|
||||
plate = new Plate(cells, cellFilename, numWells, populations, dropoutRate, lambda);
|
||||
}
|
||||
PlateFileWriter writer = new PlateFileWriter(outputFilename, plate);
|
||||
writer.writePlateFile();
|
||||
@@ -340,9 +344,13 @@ public class CommandLineInterface {
|
||||
Option exponential = Option.builder("exponential")
|
||||
.desc("Use an exponential distribution for cell sample")
|
||||
.build();
|
||||
Option zipf = Option.builder("zipf")
|
||||
.desc("Use a Zipf distribution for cell sample")
|
||||
.build();
|
||||
distributions.addOption(poisson);
|
||||
distributions.addOption(gaussian);
|
||||
distributions.addOption(exponential);
|
||||
distributions.addOption(zipf);
|
||||
//options group for statistical distribution parameters
|
||||
OptionGroup statParams = new OptionGroup();// add this to plate options
|
||||
Option stdDev = Option.builder("stddev")
|
||||
@@ -355,6 +363,11 @@ public class CommandLineInterface {
|
||||
.hasArg()
|
||||
.argName("value")
|
||||
.build();
|
||||
Option zipfExponent = Option.builder("exp")
|
||||
.desc("If using -zipf flag, exponent value for distribution")
|
||||
.hasArg()
|
||||
.argName("value")
|
||||
.build();
|
||||
statParams.addOption(stdDev);
|
||||
statParams.addOption(lambda);
|
||||
//Option group for random plate or set populations
|
||||
@@ -386,6 +399,7 @@ public class CommandLineInterface {
|
||||
plateOptions.addOptionGroup(statParams);
|
||||
plateOptions.addOptionGroup(wellPopOptions);
|
||||
plateOptions.addOption(dropoutRate);
|
||||
plateOptions.addOption(zipfExponent);
|
||||
plateOptions.addOption(outputFileOption());
|
||||
return plateOptions;
|
||||
}
|
||||
|
||||
6
src/main/java/DistributionType.java
Normal file
6
src/main/java/DistributionType.java
Normal file
@@ -0,0 +1,6 @@
|
||||
public enum DistributionType {
|
||||
POISSON,
|
||||
GAUSSIAN,
|
||||
EXPONENTIAL,
|
||||
ZIPF
|
||||
}
|
||||
@@ -1,72 +1,54 @@
|
||||
import org.jgrapht.graph.DefaultWeightedEdge;
|
||||
import org.jgrapht.graph.SimpleWeightedGraph;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.*;
|
||||
|
||||
public interface GraphModificationFunctions {
|
||||
|
||||
//remove over- and under-weight edges, return removed edges
|
||||
static Map<Vertex[], Integer> filterByOverlapThresholds(SimpleWeightedGraph<Vertex, DefaultWeightedEdge> graph,
|
||||
static Map<DefaultWeightedEdge, Vertex[]> filterByOverlapThresholds(SimpleWeightedGraph<Vertex, DefaultWeightedEdge> graph,
|
||||
int low, int high, boolean saveEdges) {
|
||||
Map<Vertex[], Integer> removedEdges = new HashMap<>();
|
||||
Map<DefaultWeightedEdge, Vertex[]> removedEdges = new HashMap<>();
|
||||
Set<DefaultWeightedEdge> edgesToRemove = new HashSet<>();
|
||||
for (DefaultWeightedEdge e : graph.edgeSet()) {
|
||||
if ((graph.getEdgeWeight(e) > high) || (graph.getEdgeWeight(e) < low)) {
|
||||
if(saveEdges) {
|
||||
Vertex source = graph.getEdgeSource(e);
|
||||
Vertex target = graph.getEdgeTarget(e);
|
||||
Integer weight = (int) graph.getEdgeWeight(e);
|
||||
Vertex[] edge = {source, target};
|
||||
removedEdges.put(edge, weight);
|
||||
Vertex[] vertices = {graph.getEdgeSource(e), graph.getEdgeTarget(e)};
|
||||
removedEdges.put(e, vertices);
|
||||
}
|
||||
else {
|
||||
graph.setEdgeWeight(e, 0.0);
|
||||
}
|
||||
}
|
||||
}
|
||||
if(saveEdges) {
|
||||
for (Vertex[] edge : removedEdges.keySet()) {
|
||||
graph.removeEdge(edge[0], edge[1]);
|
||||
edgesToRemove.add(e);
|
||||
}
|
||||
}
|
||||
edgesToRemove.forEach(graph::removeEdge);
|
||||
return removedEdges;
|
||||
}
|
||||
|
||||
//Remove edges for pairs with large occupancy discrepancy, return removed edges
|
||||
static Map<Vertex[], Integer> filterByRelativeOccupancy(SimpleWeightedGraph<Vertex, DefaultWeightedEdge> graph,
|
||||
static Map<DefaultWeightedEdge, Vertex[]> filterByRelativeOccupancy(SimpleWeightedGraph<Vertex, DefaultWeightedEdge> graph,
|
||||
Integer maxOccupancyDifference, boolean saveEdges) {
|
||||
Map<Vertex[], Integer> removedEdges = new HashMap<>();
|
||||
Map<DefaultWeightedEdge, Vertex[]> removedEdges = new HashMap<>();
|
||||
Set<DefaultWeightedEdge> edgesToRemove = new HashSet<>();
|
||||
for (DefaultWeightedEdge e : graph.edgeSet()) {
|
||||
Integer alphaOcc = graph.getEdgeSource(e).getOccupancy();
|
||||
Integer betaOcc = graph.getEdgeTarget(e).getOccupancy();
|
||||
if (Math.abs(alphaOcc - betaOcc) >= maxOccupancyDifference) {
|
||||
if (saveEdges) {
|
||||
Vertex source = graph.getEdgeSource(e);
|
||||
Vertex target = graph.getEdgeTarget(e);
|
||||
Integer weight = (int) graph.getEdgeWeight(e);
|
||||
Vertex[] edge = {source, target};
|
||||
removedEdges.put(edge, weight);
|
||||
Vertex[] vertices = {graph.getEdgeSource(e), graph.getEdgeTarget(e)};
|
||||
removedEdges.put(e, vertices);
|
||||
}
|
||||
else {
|
||||
graph.setEdgeWeight(e, 0.0);
|
||||
}
|
||||
}
|
||||
}
|
||||
if(saveEdges) {
|
||||
for (Vertex[] edge : removedEdges.keySet()) {
|
||||
graph.removeEdge(edge[0], edge[1]);
|
||||
edgesToRemove.add(e);
|
||||
}
|
||||
}
|
||||
edgesToRemove.forEach(graph::removeEdge);
|
||||
return removedEdges;
|
||||
}
|
||||
|
||||
//Remove edges for pairs where overlap size is significantly lower than the well occupancy, return removed edges
|
||||
static Map<Vertex[], Integer> filterByOverlapPercent(SimpleWeightedGraph<Vertex, DefaultWeightedEdge> graph,
|
||||
static Map<DefaultWeightedEdge, Vertex[]> filterByOverlapPercent(SimpleWeightedGraph<Vertex, DefaultWeightedEdge> graph,
|
||||
Integer minOverlapPercent,
|
||||
boolean saveEdges) {
|
||||
Map<Vertex[], Integer> removedEdges = new HashMap<>();
|
||||
Map<DefaultWeightedEdge, Vertex[]> removedEdges = new HashMap<>();
|
||||
Set<DefaultWeightedEdge> edgesToRemove = new HashSet<>();
|
||||
for (DefaultWeightedEdge e : graph.edgeSet()) {
|
||||
Integer alphaOcc = graph.getEdgeSource(e).getOccupancy();
|
||||
Integer betaOcc = graph.getEdgeTarget(e).getOccupancy();
|
||||
@@ -74,22 +56,13 @@ public interface GraphModificationFunctions {
|
||||
double min = minOverlapPercent / 100.0;
|
||||
if ((weight / alphaOcc < min) || (weight / betaOcc < min)) {
|
||||
if (saveEdges) {
|
||||
Vertex source = graph.getEdgeSource(e);
|
||||
Vertex target = graph.getEdgeTarget(e);
|
||||
Integer intWeight = (int) graph.getEdgeWeight(e);
|
||||
Vertex[] edge = {source, target};
|
||||
removedEdges.put(edge, intWeight);
|
||||
Vertex[] vertices = {graph.getEdgeSource(e), graph.getEdgeTarget(e)};
|
||||
removedEdges.put(e, vertices);
|
||||
}
|
||||
else {
|
||||
graph.setEdgeWeight(e, 0.0);
|
||||
}
|
||||
}
|
||||
}
|
||||
if(saveEdges) {
|
||||
for (Vertex[] edge : removedEdges.keySet()) {
|
||||
graph.removeEdge(edge[0], edge[1]);
|
||||
edgesToRemove.add(e);
|
||||
}
|
||||
}
|
||||
edgesToRemove.forEach(graph::removeEdge);
|
||||
return removedEdges;
|
||||
}
|
||||
|
||||
@@ -126,10 +99,10 @@ public interface GraphModificationFunctions {
|
||||
}
|
||||
|
||||
static void addRemovedEdges(SimpleWeightedGraph<Vertex, DefaultWeightedEdge> graph,
|
||||
Map<Vertex[], Integer> removedEdges) {
|
||||
for (Vertex[] edge : removedEdges.keySet()) {
|
||||
DefaultWeightedEdge e = graph.addEdge(edge[0], edge[1]);
|
||||
graph.setEdgeWeight(e, removedEdges.get(edge));
|
||||
Map<DefaultWeightedEdge, Vertex[]> removedEdges) {
|
||||
for (DefaultWeightedEdge edge : removedEdges.keySet()) {
|
||||
Vertex[] vertices = removedEdges.get(edge);
|
||||
graph.addEdge(vertices[0], vertices[1], edge);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -89,14 +89,12 @@ public class InteractiveInterface {
|
||||
private static void makePlate() {
|
||||
String cellFile = null;
|
||||
String filename = null;
|
||||
Double stdDev = 0.0;
|
||||
Double parameter = 0.0;
|
||||
Integer numWells = 0;
|
||||
Integer numSections;
|
||||
Integer[] populations = {1};
|
||||
Double dropOutRate = 0.0;
|
||||
boolean poisson = false;
|
||||
boolean exponential = false;
|
||||
double lambda = 1.5;
|
||||
;
|
||||
try {
|
||||
System.out.println("\nSimulated sample plates consist of:");
|
||||
System.out.println("* a number of wells");
|
||||
@@ -114,33 +112,46 @@ public class InteractiveInterface {
|
||||
System.out.println("1) Poisson");
|
||||
System.out.println("2) Gaussian");
|
||||
System.out.println("3) Exponential");
|
||||
// System.out.println("(Note: approximate distribution in original paper is exponential, lambda = 0.6)");
|
||||
// System.out.println("(lambda value approximated from slope of log-log graph in figure 4c)");
|
||||
System.out.println("4) Zipf");
|
||||
|
||||
System.out.println("(Note: wider distributions are more memory intensive to match)");
|
||||
System.out.print("Enter selection value: ");
|
||||
input = sc.nextInt();
|
||||
switch (input) {
|
||||
case 1 -> poisson = true;
|
||||
case 1 -> {
|
||||
BiGpairSEQ.setDistributionType(DistributionType.POISSON);
|
||||
}
|
||||
case 2 -> {
|
||||
BiGpairSEQ.setDistributionType(DistributionType.GAUSSIAN);
|
||||
System.out.println("How many distinct T-cells within one standard deviation of peak frequency?");
|
||||
System.out.println("(Note: wider distributions are more memory intensive to match)");
|
||||
stdDev = sc.nextDouble();
|
||||
if (stdDev <= 0.0) {
|
||||
parameter = sc.nextDouble();
|
||||
if (parameter <= 0.0) {
|
||||
throw new InputMismatchException("Value must be positive.");
|
||||
}
|
||||
}
|
||||
case 3 -> {
|
||||
exponential = true;
|
||||
BiGpairSEQ.setDistributionType(DistributionType.EXPONENTIAL);
|
||||
System.out.print("Please enter lambda value for exponential distribution: ");
|
||||
lambda = sc.nextDouble();
|
||||
if (lambda <= 0.0) {
|
||||
lambda = 0.6;
|
||||
System.out.println("Value must be positive. Defaulting to 0.6.");
|
||||
parameter = sc.nextDouble();
|
||||
if (parameter <= 0.0) {
|
||||
parameter = 1.4;
|
||||
System.out.println("Value must be positive. Defaulting to 1.4.");
|
||||
}
|
||||
}
|
||||
case 4 -> {
|
||||
BiGpairSEQ.setDistributionType(DistributionType.ZIPF);
|
||||
System.out.print("Please enter exponent value for Zipf distribution: ");
|
||||
parameter = sc.nextDouble();
|
||||
if (parameter <= 0.0) {
|
||||
parameter = 1.4;
|
||||
System.out.println("Value must be positive. Defaulting to 1.4.");
|
||||
}
|
||||
}
|
||||
default -> {
|
||||
System.out.println("Invalid input. Defaulting to exponential.");
|
||||
exponential = true;
|
||||
parameter = 1.4;
|
||||
BiGpairSEQ.setDistributionType(DistributionType.EXPONENTIAL);
|
||||
}
|
||||
}
|
||||
System.out.print("\nNumber of wells on plate: ");
|
||||
@@ -226,17 +237,18 @@ public class InteractiveInterface {
|
||||
assert filename != null;
|
||||
Plate samplePlate;
|
||||
PlateFileWriter writer;
|
||||
if(exponential){
|
||||
samplePlate = new Plate(cells, cellFile, numWells, populations, dropOutRate, lambda, true);
|
||||
DistributionType type = BiGpairSEQ.getDistributionType();
|
||||
switch(type) {
|
||||
case POISSON -> {
|
||||
parameter = Math.sqrt(cells.getCellCount()); //gaussian with square root of elements approximates poisson
|
||||
samplePlate = new Plate(cells, cellFile, numWells, populations, dropOutRate, parameter);
|
||||
writer = new PlateFileWriter(filename, samplePlate);
|
||||
}
|
||||
else {
|
||||
if (poisson) {
|
||||
stdDev = Math.sqrt(cells.getCellCount()); //gaussian with square root of elements approximates poisson
|
||||
}
|
||||
samplePlate = new Plate(cells, cellFile, numWells, populations, dropOutRate, stdDev, false);
|
||||
default -> {
|
||||
samplePlate = new Plate(cells, cellFile, numWells, populations, dropOutRate, parameter);
|
||||
writer = new PlateFileWriter(filename, samplePlate);
|
||||
}
|
||||
}
|
||||
System.out.println("Writing Sample Plate to file");
|
||||
writer.writePlateFile();
|
||||
System.out.println("Sample Plate written to file: " + filename);
|
||||
@@ -605,12 +617,13 @@ public class InteractiveInterface {
|
||||
case 3 -> {
|
||||
BiGpairSEQ.setAuctionAlgorithm();
|
||||
System.out.println("MWM algorithm set to auction");
|
||||
backToOptions = true;
|
||||
}
|
||||
case 4 -> {
|
||||
System.out.println("Scaling integer weight MWM algorithm not yet fully implemented. Sorry.");
|
||||
// BiGpairSEQ.setIntegerWeightScalingAlgorithm();
|
||||
// System.out.println("MWM algorithm set to integer weight scaling algorithm of Duan and Su");
|
||||
backToOptions = true;
|
||||
// backToOptions = true;
|
||||
}
|
||||
case 0 -> backToOptions = true;
|
||||
default -> System.out.println("Invalid input");
|
||||
|
||||
@@ -13,6 +13,10 @@ TODO: Implement discrete frequency distributions using Vose's Alias Method
|
||||
*/
|
||||
|
||||
|
||||
|
||||
import org.apache.commons.rng.sampling.distribution.RejectionInversionZipfSampler;
|
||||
import org.apache.commons.rng.simple.JDKRandomWrapper;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
public class Plate {
|
||||
@@ -26,25 +30,22 @@ public class Plate {
|
||||
private Integer[] populations;
|
||||
private double stdDev;
|
||||
private double lambda;
|
||||
boolean exponential = false;
|
||||
private double zipfExponent;
|
||||
private DistributionType distributionType;
|
||||
|
||||
public Plate(CellSample cells, String cellFilename, int numWells, Integer[] populations,
|
||||
double dropoutRate, double stdDev_or_lambda, boolean exponential){
|
||||
double dropoutRate, double parameter){
|
||||
this.cells = cells;
|
||||
this.sourceFile = cellFilename;
|
||||
this.size = numWells;
|
||||
this.wells = new ArrayList<>();
|
||||
this.error = dropoutRate;
|
||||
this.populations = populations;
|
||||
this.exponential = exponential;
|
||||
if (this.exponential) {
|
||||
this.lambda = stdDev_or_lambda;
|
||||
fillWellsExponential(cells.getCells(), this.lambda);
|
||||
}
|
||||
else {
|
||||
this.stdDev = stdDev_or_lambda;
|
||||
fillWells(cells.getCells(), this.stdDev);
|
||||
}
|
||||
this.stdDev = parameter;
|
||||
this.lambda = parameter;
|
||||
this.zipfExponent = parameter;
|
||||
this.distributionType = BiGpairSEQ.getDistributionType();
|
||||
fillWells(cells.getCells());
|
||||
}
|
||||
|
||||
|
||||
@@ -85,9 +86,33 @@ public class Plate {
|
||||
}
|
||||
}
|
||||
|
||||
private void fillWellsZipf(List<String[]> cells, double exponent) {
|
||||
int numSections = populations.length;
|
||||
int section = 0;
|
||||
int n;
|
||||
RejectionInversionZipfSampler zipfSampler = new RejectionInversionZipfSampler(new JDKRandomWrapper(rand), cells.size(), exponent);
|
||||
while (section < numSections){
|
||||
for (int i = 0; i < (size / numSections); i++) {
|
||||
List<String[]> well = new ArrayList<>();
|
||||
for (int j = 0; j < populations[section]; j++) {
|
||||
do {
|
||||
n = zipfSampler.sample();
|
||||
} while (n >= cells.size() || n < 0);
|
||||
String[] cellToAdd = cells.get(n).clone();
|
||||
for(int k = 0; k < cellToAdd.length; k++){
|
||||
if(Math.abs(rand.nextDouble()) < error){//error applied to each sequence
|
||||
cellToAdd[k] = "-1";
|
||||
}
|
||||
}
|
||||
well.add(cellToAdd);
|
||||
}
|
||||
wells.add(well);
|
||||
}
|
||||
section++;
|
||||
}
|
||||
}
|
||||
|
||||
private void fillWellsExponential(List<String[]> cells, double lambda){
|
||||
this.lambda = lambda;
|
||||
exponential = true;
|
||||
int numSections = populations.length;
|
||||
int section = 0;
|
||||
double m;
|
||||
@@ -143,6 +168,24 @@ public class Plate {
|
||||
}
|
||||
}
|
||||
|
||||
private void fillWells(List<String[]> cells){
|
||||
DistributionType type = BiGpairSEQ.getDistributionType();
|
||||
switch (type) {
|
||||
case POISSON, GAUSSIAN -> {
|
||||
fillWells(cells, getStdDev());
|
||||
break;
|
||||
}
|
||||
case EXPONENTIAL -> {
|
||||
fillWellsExponential(cells, getLambda());
|
||||
break;
|
||||
}
|
||||
case ZIPF -> {
|
||||
fillWellsZipf(cells, getZipfExponent());
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public Integer[] getPopulations(){
|
||||
return populations;
|
||||
}
|
||||
@@ -155,10 +198,12 @@ public class Plate {
|
||||
return stdDev;
|
||||
}
|
||||
|
||||
public boolean isExponential(){return exponential;}
|
||||
public DistributionType getDistributionType() { return distributionType;}
|
||||
|
||||
public double getLambda(){return lambda;}
|
||||
|
||||
public double getZipfExponent(){return zipfExponent;}
|
||||
|
||||
public double getError() {
|
||||
return error;
|
||||
}
|
||||
@@ -196,7 +241,7 @@ public class Plate {
|
||||
sequencesAndMisreads.put(currentSequence, new ArrayList<>());
|
||||
}
|
||||
//The specific misread hasn't happened before
|
||||
if (rand.nextDouble() >= errorCollisionRate || sequencesAndMisreads.get(currentSequence).size() == 0) {
|
||||
if (rand.nextDouble() >= errorCollisionRate || sequencesAndMisreads.get(currentSequence).isEmpty()) {
|
||||
//The misread doesn't collide with a real sequence already on the plate and some sequences have already been read
|
||||
if(rand.nextDouble() >= realSequenceCollisionRate || !sequenceMap.isEmpty()){
|
||||
StringBuilder spurious = new StringBuilder(currentSequence);
|
||||
|
||||
@@ -13,11 +13,13 @@ public class PlateFileWriter {
|
||||
private List<List<String[]>> wells;
|
||||
private double stdDev;
|
||||
private double lambda;
|
||||
private double zipfExponent;
|
||||
private DistributionType distributionType;
|
||||
private Double error;
|
||||
private String filename;
|
||||
private String sourceFileName;
|
||||
private Integer[] populations;
|
||||
private boolean isExponential = false;
|
||||
|
||||
|
||||
public PlateFileWriter(String filename, Plate plate) {
|
||||
if(!filename.matches(".*\\.csv")){
|
||||
@@ -26,12 +28,17 @@ public class PlateFileWriter {
|
||||
this.filename = filename;
|
||||
this.sourceFileName = plate.getSourceFileName();
|
||||
this.size = plate.getSize();
|
||||
this.isExponential = plate.isExponential();
|
||||
if(isExponential) {
|
||||
this.distributionType = plate.getDistributionType();
|
||||
switch(distributionType) {
|
||||
case POISSON, GAUSSIAN -> {
|
||||
this.stdDev = plate.getStdDev();
|
||||
}
|
||||
case EXPONENTIAL -> {
|
||||
this.lambda = plate.getLambda();
|
||||
}
|
||||
else{
|
||||
this.stdDev = plate.getStdDev();
|
||||
case ZIPF -> {
|
||||
this.zipfExponent = plate.getZipfExponent();
|
||||
}
|
||||
}
|
||||
this.error = plate.getError();
|
||||
this.wells = plate.getWells();
|
||||
@@ -95,11 +102,22 @@ public class PlateFileWriter {
|
||||
printer.printComment("Plate size: " + size);
|
||||
printer.printComment("Well populations: " + wellPopulationsString);
|
||||
printer.printComment("Error rate: " + error);
|
||||
if(isExponential){
|
||||
printer.printComment("Lambda: " + lambda);
|
||||
switch (distributionType) {
|
||||
case POISSON -> {
|
||||
printer.printComment("Cell frequency distribution: POISSON");
|
||||
}
|
||||
case GAUSSIAN -> {
|
||||
printer.printComment("Cell frequency distribution: GAUSSIAN");
|
||||
printer.printComment("--Standard deviation: " + stdDev);
|
||||
}
|
||||
case EXPONENTIAL -> {
|
||||
printer.printComment("Cell frequency distribution: EXPONENTIAL");
|
||||
printer.printComment("--Lambda: " + lambda);
|
||||
}
|
||||
case ZIPF -> {
|
||||
printer.printComment("Cell frequency distribution: ZIPF");
|
||||
printer.printComment("--Exponent: " + zipfExponent);
|
||||
}
|
||||
else {
|
||||
printer.printComment("Std. dev.: " + stdDev);
|
||||
}
|
||||
printer.printRecords(wellsAsStrings);
|
||||
} catch(IOException ex){
|
||||
|
||||
@@ -1,9 +1,7 @@
|
||||
import org.jgrapht.alg.interfaces.MatchingAlgorithm;
|
||||
import org.jgrapht.alg.matching.MaximumWeightBipartiteMatching;
|
||||
import org.jgrapht.generate.SimpleWeightedBipartiteGraphMatrixGenerator;
|
||||
import org.jgrapht.graph.DefaultWeightedEdge;
|
||||
import org.jgrapht.graph.SimpleWeightedGraph;
|
||||
import org.jheaps.tree.FibonacciHeap;
|
||||
import org.jheaps.tree.PairingHeap;
|
||||
|
||||
import java.math.BigDecimal;
|
||||
@@ -70,58 +68,102 @@ public class Simulator implements GraphModificationFunctions {
|
||||
if(verbose){System.out.println("Total beta sequence wells removed: " + betaWellsRemoved);}
|
||||
}
|
||||
|
||||
//construct the graph. For simplicity, going to make
|
||||
if(verbose){System.out.println("Making vertex maps");}
|
||||
//For the SimpleWeightedBipartiteGraphMatrixGenerator, all vertices must have
|
||||
//distinct numbers associated with them. Since I'm using a 2D array, that means
|
||||
//distinct indices between the rows and columns. vertexStartValue lets me track where I switch
|
||||
//from numbering rows to columns, so I can assign unique numbers to every vertex, and then
|
||||
//subtract the vertexStartValue from betas to use their vertex labels as array indices
|
||||
int vertexStartValue = 0;
|
||||
//keys are sequential integer vertices, values are alphas
|
||||
Map<String, Integer> plateAtoVMap = makeSequenceToVertexMap(alphaSequences, vertexStartValue);
|
||||
//new start value for vertex to beta map should be one more than final vertex value in alpha map
|
||||
vertexStartValue += plateAtoVMap.size();
|
||||
//keys are betas, values are sequential integers
|
||||
Map<String, Integer> plateBtoVMap = makeSequenceToVertexMap(betaSequences, vertexStartValue);
|
||||
if(verbose){System.out.println("Vertex maps made");}
|
||||
//make adjacency matrix for bipartite graph generator
|
||||
//(technically this is only 1/4 of an adjacency matrix, but that's all you need
|
||||
//for a bipartite graph, and all the SimpleWeightedBipartiteGraphMatrixGenerator class expects.)
|
||||
if(verbose){System.out.println("Making adjacency matrix");}
|
||||
double[][] weights = new double[plateAtoVMap.size()][plateBtoVMap.size()];
|
||||
fillAdjacencyMatrix(weights, vertexStartValue, alphaSequences, betaSequences, plateAtoVMap, plateBtoVMap);
|
||||
if(verbose){System.out.println("Adjacency matrix made");}
|
||||
/*
|
||||
* The commented out code below works beautifully for small enough graphs. However, after implementing a
|
||||
* Zipf distribution and attempting to simulate Experiment 3 from the paper again, I discovered that
|
||||
* this method uses too much memory. Even a 120GB heap is not enough to build this adjacency matrix.
|
||||
* So I'm going to attempt to build this graph directly and see if that is less memory intensive
|
||||
*/
|
||||
// //construct the graph. For simplicity, going to make
|
||||
// if(verbose){System.out.println("Making vertex maps");}
|
||||
// //For the SimpleWeightedBipartiteGraphMatrixGenerator, all vertices must have
|
||||
// //distinct numbers associated with them. Since I'm using a 2D array, that means
|
||||
// //distinct indices between the rows and columns. vertexStartValue lets me track where I switch
|
||||
// //from numbering rows to columns, so I can assign unique numbers to every vertex, and then
|
||||
// //subtract the vertexStartValue from betas to use their vertex labels as array indices
|
||||
// int vertexStartValue = 0;
|
||||
// //keys are sequential integer vertices, values are alphas
|
||||
// Map<String, Integer> plateAtoVMap = makeSequenceToVertexMap(alphaSequences, vertexStartValue);
|
||||
// //new start value for vertex to beta map should be one more than final vertex value in alpha map
|
||||
// vertexStartValue += plateAtoVMap.size();
|
||||
// //keys are betas, values are sequential integers
|
||||
// Map<String, Integer> plateBtoVMap = makeSequenceToVertexMap(betaSequences, vertexStartValue);
|
||||
// if(verbose){System.out.println("Vertex maps made");}
|
||||
// //make adjacency matrix for bipartite graph generator
|
||||
// //(technically this is only 1/4 of an adjacency matrix, but that's all you need
|
||||
// //for a bipartite graph, and all the SimpleWeightedBipartiteGraphMatrixGenerator class expects.)
|
||||
// if(verbose){System.out.println("Making adjacency matrix");}
|
||||
// double[][] weights = new double[plateAtoVMap.size()][plateBtoVMap.size()];
|
||||
// fillAdjacencyMatrix(weights, vertexStartValue, alphaSequences, betaSequences, plateAtoVMap, plateBtoVMap);
|
||||
// if(verbose){System.out.println("Adjacency matrix made");}
|
||||
// //make bipartite graph
|
||||
// if(verbose){System.out.println("Making bipartite weighted graph");}
|
||||
// //the graph object
|
||||
// SimpleWeightedGraph<Vertex, DefaultWeightedEdge> graph =
|
||||
// new SimpleWeightedGraph<>(DefaultWeightedEdge.class);
|
||||
// //the graph generator
|
||||
// SimpleWeightedBipartiteGraphMatrixGenerator graphGenerator = new SimpleWeightedBipartiteGraphMatrixGenerator();
|
||||
// //the list of alpha vertices
|
||||
// List<Vertex> alphaVertices = new ArrayList<>();
|
||||
// for (String seq : plateAtoVMap.keySet()) {
|
||||
// Vertex alphaVertex = new Vertex(alphaSequences.get(seq), plateAtoVMap.get(seq));
|
||||
// alphaVertices.add(alphaVertex);
|
||||
// }
|
||||
// //Sort to make sure the order of vertices in list matches the order of the adjacency matrix
|
||||
// Collections.sort(alphaVertices);
|
||||
// //Add ordered list of vertices to the graph
|
||||
// graphGenerator.first(alphaVertices);
|
||||
// //the list of beta vertices
|
||||
// List<Vertex> betaVertices = new ArrayList<>();
|
||||
// for (String seq : plateBtoVMap.keySet()) {
|
||||
// Vertex betaVertex = new Vertex(betaSequences.get(seq), plateBtoVMap.get(seq));
|
||||
// betaVertices.add(betaVertex);
|
||||
// }
|
||||
// //Sort to make sure the order of vertices in list matches the order of the adjacency matrix
|
||||
// Collections.sort(betaVertices);
|
||||
// //Add ordered list of vertices to the graph
|
||||
// graphGenerator.second(betaVertices);
|
||||
// //use adjacency matrix of weight created previously
|
||||
// graphGenerator.weights(weights);
|
||||
// graphGenerator.generateGraph(graph);
|
||||
|
||||
//make bipartite graph
|
||||
if(verbose){System.out.println("Making bipartite weighted graph");}
|
||||
//the graph object
|
||||
SimpleWeightedGraph<Vertex, DefaultWeightedEdge> graph =
|
||||
new SimpleWeightedGraph<>(DefaultWeightedEdge.class);
|
||||
//the graph generator
|
||||
SimpleWeightedBipartiteGraphMatrixGenerator graphGenerator = new SimpleWeightedBipartiteGraphMatrixGenerator();
|
||||
//the list of alpha vertices
|
||||
int vertexLabelValue = 0;
|
||||
//create and add alpha sequence vertices
|
||||
List<Vertex> alphaVertices = new ArrayList<>();
|
||||
for (String seq : plateAtoVMap.keySet()) {
|
||||
Vertex alphaVertex = new Vertex(alphaSequences.get(seq), plateAtoVMap.get(seq));
|
||||
alphaVertices.add(alphaVertex);
|
||||
for (Map.Entry<String, SequenceRecord> entry: alphaSequences.entrySet()) {
|
||||
alphaVertices.add(new Vertex(entry.getValue(), vertexLabelValue));
|
||||
vertexLabelValue++;
|
||||
}
|
||||
//Sort to make sure the order of vertices in list matches the order of the adjacency matrix
|
||||
Collections.sort(alphaVertices);
|
||||
//Add ordered list of vertices to the graph
|
||||
graphGenerator.first(alphaVertices);
|
||||
//the list of beta vertices
|
||||
alphaVertices.forEach(graph::addVertex);
|
||||
//add beta sequence vertices
|
||||
List<Vertex> betaVertices = new ArrayList<>();
|
||||
for (String seq : plateBtoVMap.keySet()) {
|
||||
Vertex betaVertex = new Vertex(betaSequences.get(seq), plateBtoVMap.get(seq));
|
||||
betaVertices.add(betaVertex);
|
||||
for (Map.Entry<String, SequenceRecord> entry: betaSequences.entrySet()) {
|
||||
betaVertices.add(new Vertex(entry.getValue(), vertexLabelValue));
|
||||
vertexLabelValue++;
|
||||
}
|
||||
betaVertices.forEach(graph::addVertex);
|
||||
//add edges
|
||||
for(Vertex a: alphaVertices) {
|
||||
for(Vertex b: betaVertices) {
|
||||
Set<Integer> sharedWells = new HashSet<>(a.getRecord().getWells());
|
||||
sharedWells.retainAll(b.getRecord().getWells());
|
||||
double weight = (double) sharedWells.size();
|
||||
if (weight != 0.0) {
|
||||
System.out.println("Edge weight: " + weight);
|
||||
DefaultWeightedEdge edge = graph.addEdge(a, b);
|
||||
graph.setEdgeWeight(edge, weight);
|
||||
}
|
||||
else {
|
||||
System.out.println("No overlap");
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
//Sort to make sure the order of vertices in list matches the order of the adjacency matrix
|
||||
Collections.sort(betaVertices);
|
||||
//Add ordered list of vertices to the graph
|
||||
graphGenerator.second(betaVertices);
|
||||
//use adjacency matrix of weight created previously
|
||||
graphGenerator.weights(weights);
|
||||
graphGenerator.generateGraph(graph);
|
||||
if(verbose){System.out.println("Graph created");}
|
||||
//stop timing
|
||||
Instant stop = Instant.now();
|
||||
@@ -145,7 +187,7 @@ public class Simulator implements GraphModificationFunctions {
|
||||
Integer minOverlapPercent, boolean verbose, boolean calculatePValue) {
|
||||
Instant start = Instant.now();
|
||||
SimpleWeightedGraph<Vertex, DefaultWeightedEdge> graph = data.getGraph();
|
||||
Map<Vertex[], Integer> removedEdges = new HashMap<>();
|
||||
Map<DefaultWeightedEdge, Vertex[]> removedEdges = new HashMap<>();
|
||||
boolean saveEdges = BiGpairSEQ.cacheGraph();
|
||||
int numWells = data.getNumWells();
|
||||
//Integer alphaCount = data.getAlphaCount();
|
||||
@@ -163,6 +205,7 @@ public class Simulator implements GraphModificationFunctions {
|
||||
}
|
||||
Integer graphAlphaCount = alphas.size();
|
||||
Integer graphBetaCount = betas.size();
|
||||
Integer graphEdgeCount = graph.edgeSet().size();
|
||||
|
||||
//remove edges with weights outside given overlap thresholds, add those to removed edge list
|
||||
if(verbose){System.out.println("Eliminating edges with weights outside overlap threshold values");}
|
||||
@@ -182,12 +225,14 @@ public class Simulator implements GraphModificationFunctions {
|
||||
if(verbose){System.out.println("Edges between vertices of with excessively different occupancy values " +
|
||||
"removed");}
|
||||
|
||||
Integer filteredGraphEdgeCount = graph.edgeSet().size();
|
||||
|
||||
//Find Maximum Weight Matching
|
||||
if(verbose){System.out.println("Finding maximum weight matching");}
|
||||
//The matching object
|
||||
MatchingAlgorithm<Vertex, DefaultWeightedEdge> maxWeightMatching;
|
||||
//Determine algorithm type
|
||||
AlgorithmType algorithm = BiGpairSEQ.getMatchingAlgoritmType();
|
||||
AlgorithmType algorithm = BiGpairSEQ.getMatchingAlgorithmType();
|
||||
switch (algorithm) { //Only two options now, but I have room to add more algorithms in the future this way
|
||||
case AUCTION -> {
|
||||
//create a new MaximumIntegerWeightBipartiteAuctionMatching
|
||||
@@ -333,8 +378,10 @@ public class Simulator implements GraphModificationFunctions {
|
||||
metadata.put("real sequence collision rate", data.getRealSequenceCollisionRate().toString());
|
||||
metadata.put("total alphas read from plate", data.getAlphaCount().toString());
|
||||
metadata.put("total betas read from plate", data.getBetaCount().toString());
|
||||
metadata.put("initial edges in graph", graphEdgeCount.toString());
|
||||
metadata.put("alphas in graph (after pre-filtering)", graphAlphaCount.toString());
|
||||
metadata.put("betas in graph (after pre-filtering)", graphBetaCount.toString());
|
||||
metadata.put("final edges in graph (after pre-filtering)", filteredGraphEdgeCount.toString());
|
||||
metadata.put("high overlap threshold for pairing", highThreshold.toString());
|
||||
metadata.put("low overlap threshold for pairing", lowThreshold.toString());
|
||||
metadata.put("minimum overlap percent for pairing", minOverlapPercent.toString());
|
||||
|
||||
Reference in New Issue
Block a user