13 Commits

Author SHA1 Message Date
eugenefischer
d1810c453d Even more efficient graph creation (my initial scheme, but this time without accidentally changing what's in the sequence records) 2025-04-10 15:03:10 -05:00
eugenefischer
187401f2d6 More efficient graph creation 2025-04-10 14:06:11 -05:00
eugenefischer
678ce99424 iterate over vertex wells correctly 2025-04-10 13:34:04 -05:00
eugenefischer
c21e375303 fix concurrent modification bug 2025-04-10 13:33:47 -05:00
eugenefischer
57fe9c1619 Update graph modification functions to work with edges directly 2025-04-10 12:42:19 -05:00
eugenefischer
e1888a99c6 refactor to construct the bipartite graph directly, rather than by using an adjacency matrix and a graph generator. 2025-04-10 11:47:15 -05:00
eugenefischer
bcf5a4c749 change artifact details 2025-04-10 11:05:08 -05:00
eugenefischer
81d8a12765 dependency update stuff 2025-04-10 10:54:05 -05:00
eugenefischer
b5c0568e22 Add dependencies 2025-04-10 10:53:42 -05:00
eugenefischer
b7597cff2a update readme and add Zipf exponent option to CLI 2025-04-09 16:16:46 -05:00
eugenefischer
7bbeaf7dad update readme 2025-04-09 14:40:49 -05:00
eugenefischer
945b967382 update readme 2025-04-09 14:39:46 -05:00
eugenefischer
a43ee469ea implement Zipf distribution 2025-04-09 14:32:02 -05:00
18 changed files with 390 additions and 192 deletions

1
.idea/.name generated Normal file
View File

@@ -0,0 +1 @@
BiGpairSEQ

View File

@@ -1,16 +1,27 @@
<component name="ArtifactManager">
<artifact type="jar" build-on-make="true" name="BiGpairSEQ_Sim:jar">
<artifact type="jar" name="BiGpairSEQ_Sim:jar">
<output-path>$PROJECT_DIR$/out/artifacts/BiGpairSEQ_Sim_jar</output-path>
<root id="archive" name="BiGpairSEQ_Sim.jar">
<element id="directory" name="META-INF">
<element id="file-copy" path="$PROJECT_DIR$/src/main/java/META-INF/MANIFEST.MF" />
<element id="file-copy" path="$PROJECT_DIR$/META-INF/MANIFEST.MF" />
</element>
<element id="module-output" name="BigPairSEQ" />
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/jgrapht/jgrapht-core/1.5.1/jgrapht-core-1.5.1.jar" path-in-jar="/" />
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/jheaps/jheaps/0.13/jheaps-0.13.jar" path-in-jar="/" />
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/commons-cli/commons-cli/1.5.0/commons-cli-1.5.0.jar" path-in-jar="/" />
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/apache/commons/commons-csv/1.9.0/commons-csv-1.9.0.jar" path-in-jar="/" />
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/jetbrains/annotations/23.0.0/annotations-23.0.0.jar" path-in-jar="/" />
<element id="module-output" name="BiGpairSEQ_Sim" />
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/jgrapht/jgrapht-core/1.5.2/jgrapht-core-1.5.2.jar" path-in-jar="/" />
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/apache/commons/commons-rng-sampling/1.6/commons-rng-sampling-1.6.jar" path-in-jar="/" />
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/apache/commons/commons-csv/1.14.0/commons-csv-1.14.0.jar" path-in-jar="/" />
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/jetbrains/annotations/26.0.2/annotations-26.0.2.jar" path-in-jar="/" />
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/jgrapht/jgrapht-io/1.5.2/jgrapht-io-1.5.2.jar" path-in-jar="/" />
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/apache/commons/commons-rng-simple/1.6/commons-rng-simple-1.6.jar" path-in-jar="/" />
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/commons-io/commons-io/2.18.0/commons-io-2.18.0.jar" path-in-jar="/" />
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/apache/commons/commons-rng-core/1.6/commons-rng-core-1.6.jar" path-in-jar="/" />
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/commons-codec/commons-codec/1.18.0/commons-codec-1.18.0.jar" path-in-jar="/" />
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/apache/commons/commons-rng-client-api/1.6/commons-rng-client-api-1.6.jar" path-in-jar="/" />
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/commons-cli/commons-cli/1.9.0/commons-cli-1.9.0.jar" path-in-jar="/" />
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/apache/commons/commons-lang3/3.12.0/commons-lang3-3.12.0.jar" path-in-jar="/" />
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/antlr/antlr4-runtime/4.12.0/antlr4-runtime-4.12.0.jar" path-in-jar="/" />
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/apfloat/apfloat/1.10.1/apfloat-1.10.1.jar" path-in-jar="/" />
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/apache/commons/commons-text/1.10.0/commons-text-1.10.0.jar" path-in-jar="/" />
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/jheaps/jheaps/0.14/jheaps-0.14.jar" path-in-jar="/" />
</root>
</artifact>
</component>

1
.idea/compiler.xml generated
View File

@@ -7,6 +7,7 @@
<sourceTestOutputDir name="target/generated-test-sources/test-annotations" />
<outputRelativeToContentRoot value="true" />
<module name="BigPairSEQ" />
<module name="BiGpairSEQ_Sim" />
</profile>
</annotationProcessing>
</component>

View File

@@ -1,20 +1,35 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="RemoteRepositoriesConfiguration">
<remote-repository>
<option name="id" value="my-internal-site" />
<option name="name" value="my-internal-site" />
<option name="url" value="https://myserver/repo" />
</remote-repository>
<remote-repository>
<option name="id" value="central" />
<option name="name" value="Central Repository" />
<option name="url" value="https://repo1.maven.org/maven2" />
</remote-repository>
<remote-repository>
<option name="id" value="central repo" />
<option name="name" value="central repo" />
<option name="url" value="https://repo1.maven.org/maven2/" />
</remote-repository>
<remote-repository>
<option name="id" value="central" />
<option name="name" value="Central Repository" />
<option name="url" value="https://repo.maven.apache.org/maven2" />
</remote-repository>
<remote-repository>
<option name="id" value="central" />
<option name="name" value="Maven Central repository" />
<option name="url" value="https://repo1.maven.org/maven2" />
</remote-repository>
<remote-repository>
<option name="id" value="jboss.community" />
<option name="name" value="JBoss Community repository" />
<option name="url" value="https://repository.jboss.org/nexus/content/repositories/public/" />
</remote-repository>
<remote-repository>
<option name="id" value="34d16bdc-85f0-48ee-8e8b-144091765be1" />
<option name="name" value="34d16bdc-85f0-48ee-8e8b-144091765be1" />
<option name="url" value="https://repository.mulesoft.org/nexus/content/repositories/public/" />
</remote-repository>
</component>
</project>

View File

@@ -1,8 +1,10 @@
<component name="libraryTable">
<library name="apache.commons.csv" type="repository">
<properties maven-id="org.apache.commons:commons-csv:1.9.0" />
<properties maven-id="org.apache.commons:commons-csv:1.14.0" />
<CLASSES>
<root url="jar://$MAVEN_REPOSITORY$/org/apache/commons/commons-csv/1.9.0/commons-csv-1.9.0.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/apache/commons/commons-csv/1.14.0/commons-csv-1.14.0.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/commons-io/commons-io/2.18.0/commons-io-2.18.0.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/commons-codec/commons-codec/1.18.0/commons-codec-1.18.0.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES />

View File

@@ -1,8 +1,8 @@
<component name="libraryTable">
<library name="commons.cli" type="repository">
<properties maven-id="commons-cli:commons-cli:1.5.0" />
<properties maven-id="commons-cli:commons-cli:1.9.0" />
<CLASSES>
<root url="jar://$MAVEN_REPOSITORY$/commons-cli/commons-cli/1.5.0/commons-cli-1.5.0.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/commons-cli/commons-cli/1.9.0/commons-cli-1.9.0.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES />

View File

@@ -1,9 +1,10 @@
<component name="libraryTable">
<library name="jgrapht.core" type="repository">
<properties maven-id="org.jgrapht:jgrapht-core:1.5.1" />
<properties maven-id="org.jgrapht:jgrapht-core:1.5.2" />
<CLASSES>
<root url="jar://$MAVEN_REPOSITORY$/org/jgrapht/jgrapht-core/1.5.1/jgrapht-core-1.5.1.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/jheaps/jheaps/0.13/jheaps-0.13.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/jgrapht/jgrapht-core/1.5.2/jgrapht-core-1.5.2.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/jheaps/jheaps/0.14/jheaps-0.14.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/apfloat/apfloat/1.10.1/apfloat-1.10.1.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES />

View File

@@ -1,13 +1,14 @@
<component name="libraryTable">
<library name="jgrapht.io" type="repository">
<properties maven-id="org.jgrapht:jgrapht-io:1.5.1" />
<properties maven-id="org.jgrapht:jgrapht-io:1.5.2" />
<CLASSES>
<root url="jar://$MAVEN_REPOSITORY$/org/jgrapht/jgrapht-io/1.5.1/jgrapht-io-1.5.1.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/jgrapht/jgrapht-core/1.5.1/jgrapht-core-1.5.1.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/jheaps/jheaps/0.13/jheaps-0.13.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/antlr/antlr4-runtime/4.8-1/antlr4-runtime-4.8-1.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/apache/commons/commons-text/1.8/commons-text-1.8.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/apache/commons/commons-lang3/3.9/commons-lang3-3.9.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/jgrapht/jgrapht-io/1.5.2/jgrapht-io-1.5.2.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/jgrapht/jgrapht-core/1.5.2/jgrapht-core-1.5.2.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/jheaps/jheaps/0.14/jheaps-0.14.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/apfloat/apfloat/1.10.1/apfloat-1.10.1.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/antlr/antlr4-runtime/4.12.0/antlr4-runtime-4.12.0.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/apache/commons/commons-text/1.10.0/commons-text-1.10.0.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/apache/commons/commons-lang3/3.12.0/commons-lang3-3.12.0.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES />

44
pom.xml
View File

@@ -5,7 +5,7 @@
<modelVersion>4.0.0</modelVersion>
<groupId>org.example</groupId>
<artifactId>TCellSim</artifactId>
<artifactId>BiGpairSEQ_Sim</artifactId>
<version>1.0-SNAPSHOT</version>
<build>
<plugins>
@@ -26,8 +26,48 @@
<version>RELEASE</version>
<scope>compile</scope>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.commons/commons-rng-simple -->
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-rng-simple</artifactId>
<version>1.6</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-rng-sampling</artifactId>
<version>1.6</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.commons/commons-csv -->
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-csv</artifactId>
<version>1.14.0</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.jgrapht/jgrapht-core -->
<dependency>
<groupId>org.jgrapht</groupId>
<artifactId>jgrapht-core</artifactId>
<version>1.5.2</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.jgrapht/jgrapht-io -->
<dependency>
<groupId>org.jgrapht</groupId>
<artifactId>jgrapht-io</artifactId>
<version>1.5.2</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.jheaps/jheaps -->
<dependency>
<groupId>org.jheaps</groupId>
<artifactId>jheaps</artifactId>
<version>0.14</version>
</dependency>
<!-- https://mvnrepository.com/artifact/commons-cli/commons-cli -->
<dependency>
<groupId>commons-cli</groupId>
<artifactId>commons-cli</artifactId>
<version>1.9.0</version>
</dependency>
</dependencies>
<properties>
<maven.compiler.source>11</maven.compiler.source>
<maven.compiler.target>11</maven.compiler.target>

View File

@@ -156,6 +156,8 @@ usage: BiGpairSEQ_Sim.jar -plate
-c,--cell-file <filename> The cell sample file to use
-d,--dropout-rate <rate> The sequence dropout rate due to
amplification error. (0.0 - 1.0)
-exp <value> If using -zipf flag, exponent value for
distribution
-exponential Use an exponential distribution for cell
sample
-gaussian Use a Gaussian distribution for cell sample
@@ -173,6 +175,7 @@ usage: BiGpairSEQ_Sim.jar -plate
-stddev <value> If using -gaussian flag, standard deviation
for distrbution
-w,--wells <number> The number of wells on the sample plate
-zipf Use a Zipf distribution for cell sample
usage: BiGpairSEQ_Sim.jar -graph
-c,--cell-file <filename> Cell sample file to use for
@@ -234,7 +237,6 @@ usage: BiGpairSEQ_Sim.jar -match
to stdout.
-pv,--p-value (Optional) Calculate p-values for sequence
pairs.
```
### INTERACTIVE INTERFACE
@@ -340,6 +342,8 @@ Options when making a Sample Plate file:
* Standard deviation size
* Exponential
* Lambda value
* Zipf
* Exponent value
* Total number of wells on the plate
* Well populations random or fixed
* If random, minimum and maximum population sizes
@@ -630,6 +634,7 @@ a means of exploring some very beautiful math.
## TODO
* Update CLI option text in this readme to include Zipf distribution options
* ~~Try invoking GC at end of workloads to reduce paging to disk~~ DONE
* ~~Hold graph data in memory until another graph is read-in? ABANDONED UNABANDONED~~ DONE
* ~~*No, this won't work, because BiGpairSEQ simulations alter the underlying graph based on filtering constraints. Changes would cascade with multiple experiments.*~~

View File

@@ -13,8 +13,9 @@ public class BiGpairSEQ {
private static boolean cacheCells = false;
private static boolean cachePlate = false;
private static boolean cacheGraph = false;
private static AlgorithmType matchingAlgoritmType = AlgorithmType.HUNGARIAN;
private static AlgorithmType matchingAlgorithmType = AlgorithmType.HUNGARIAN;
private static HeapType priorityQueueHeapType = HeapType.PAIRING;
private static DistributionType distributionType = DistributionType.ZIPF;
private static boolean outputBinary = true;
private static boolean outputGraphML = false;
private static boolean calculatePValue = false;
@@ -60,6 +61,10 @@ public class BiGpairSEQ {
return cellFilename;
}
public static DistributionType getDistributionType() {return distributionType;}
public static void setDistributionType(DistributionType type) {distributionType = type;}
public static Plate getPlateInMemory() {
return plateInMemory;
}
@@ -161,13 +166,13 @@ public class BiGpairSEQ {
return priorityQueueHeapType;
}
public static AlgorithmType getMatchingAlgoritmType() { return matchingAlgoritmType; }
public static AlgorithmType getMatchingAlgorithmType() { return matchingAlgorithmType; }
public static void setHungarianAlgorithm() { matchingAlgoritmType = AlgorithmType.HUNGARIAN; }
public static void setHungarianAlgorithm() { matchingAlgorithmType = AlgorithmType.HUNGARIAN; }
public static void setIntegerWeightScalingAlgorithm() { matchingAlgoritmType = AlgorithmType.INTEGER_WEIGHT_SCALING; }
public static void setIntegerWeightScalingAlgorithm() { matchingAlgorithmType = AlgorithmType.INTEGER_WEIGHT_SCALING; }
public static void setAuctionAlgorithm() { matchingAlgoritmType = AlgorithmType.AUCTION; }
public static void setAuctionAlgorithm() { matchingAlgorithmType = AlgorithmType.AUCTION; }
public static void setPairingHeap() {
priorityQueueHeapType = HeapType.PAIRING;

View File

@@ -123,16 +123,20 @@ public class CommandLineInterface {
Plate plate;
if (line.hasOption("poisson")) {
Double stdDev = Math.sqrt(numWells);
plate = new Plate(cells, cellFilename, numWells, populations, dropoutRate, stdDev, false);
plate = new Plate(cells, cellFilename, numWells, populations, dropoutRate, stdDev);
}
else if (line.hasOption("gaussian")) {
Double stdDev = Double.parseDouble(line.getOptionValue("stddev"));
plate = new Plate(cells, cellFilename, numWells, populations, dropoutRate, stdDev, false);
plate = new Plate(cells, cellFilename, numWells, populations, dropoutRate, stdDev);
}
else if (line.hasOption("zipf")) {
Double zipfExponent = Double.parseDouble(line.getOptionValue("exp"));
plate = new Plate(cells, cellFilename, numWells, populations, dropoutRate, zipfExponent);
}
else {
assert line.hasOption("exponential");
Double lambda = Double.parseDouble(line.getOptionValue("lambda"));
plate = new Plate(cells, cellFilename, numWells, populations, dropoutRate, lambda, true);
plate = new Plate(cells, cellFilename, numWells, populations, dropoutRate, lambda);
}
PlateFileWriter writer = new PlateFileWriter(outputFilename, plate);
writer.writePlateFile();
@@ -340,9 +344,13 @@ public class CommandLineInterface {
Option exponential = Option.builder("exponential")
.desc("Use an exponential distribution for cell sample")
.build();
Option zipf = Option.builder("zipf")
.desc("Use a Zipf distribution for cell sample")
.build();
distributions.addOption(poisson);
distributions.addOption(gaussian);
distributions.addOption(exponential);
distributions.addOption(zipf);
//options group for statistical distribution parameters
OptionGroup statParams = new OptionGroup();// add this to plate options
Option stdDev = Option.builder("stddev")
@@ -355,6 +363,11 @@ public class CommandLineInterface {
.hasArg()
.argName("value")
.build();
Option zipfExponent = Option.builder("exp")
.desc("If using -zipf flag, exponent value for distribution")
.hasArg()
.argName("value")
.build();
statParams.addOption(stdDev);
statParams.addOption(lambda);
//Option group for random plate or set populations
@@ -386,6 +399,7 @@ public class CommandLineInterface {
plateOptions.addOptionGroup(statParams);
plateOptions.addOptionGroup(wellPopOptions);
plateOptions.addOption(dropoutRate);
plateOptions.addOption(zipfExponent);
plateOptions.addOption(outputFileOption());
return plateOptions;
}

View File

@@ -0,0 +1,6 @@
public enum DistributionType {
POISSON,
GAUSSIAN,
EXPONENTIAL,
ZIPF
}

View File

@@ -1,72 +1,54 @@
import org.jgrapht.graph.DefaultWeightedEdge;
import org.jgrapht.graph.SimpleWeightedGraph;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.*;
public interface GraphModificationFunctions {
//remove over- and under-weight edges, return removed edges
static Map<Vertex[], Integer> filterByOverlapThresholds(SimpleWeightedGraph<Vertex, DefaultWeightedEdge> graph,
static Map<DefaultWeightedEdge, Vertex[]> filterByOverlapThresholds(SimpleWeightedGraph<Vertex, DefaultWeightedEdge> graph,
int low, int high, boolean saveEdges) {
Map<Vertex[], Integer> removedEdges = new HashMap<>();
Map<DefaultWeightedEdge, Vertex[]> removedEdges = new HashMap<>();
Set<DefaultWeightedEdge> edgesToRemove = new HashSet<>();
for (DefaultWeightedEdge e : graph.edgeSet()) {
if ((graph.getEdgeWeight(e) > high) || (graph.getEdgeWeight(e) < low)) {
if(saveEdges) {
Vertex source = graph.getEdgeSource(e);
Vertex target = graph.getEdgeTarget(e);
Integer weight = (int) graph.getEdgeWeight(e);
Vertex[] edge = {source, target};
removedEdges.put(edge, weight);
Vertex[] vertices = {graph.getEdgeSource(e), graph.getEdgeTarget(e)};
removedEdges.put(e, vertices);
}
else {
graph.setEdgeWeight(e, 0.0);
}
}
}
if(saveEdges) {
for (Vertex[] edge : removedEdges.keySet()) {
graph.removeEdge(edge[0], edge[1]);
edgesToRemove.add(e);
}
}
edgesToRemove.forEach(graph::removeEdge);
return removedEdges;
}
//Remove edges for pairs with large occupancy discrepancy, return removed edges
static Map<Vertex[], Integer> filterByRelativeOccupancy(SimpleWeightedGraph<Vertex, DefaultWeightedEdge> graph,
static Map<DefaultWeightedEdge, Vertex[]> filterByRelativeOccupancy(SimpleWeightedGraph<Vertex, DefaultWeightedEdge> graph,
Integer maxOccupancyDifference, boolean saveEdges) {
Map<Vertex[], Integer> removedEdges = new HashMap<>();
Map<DefaultWeightedEdge, Vertex[]> removedEdges = new HashMap<>();
Set<DefaultWeightedEdge> edgesToRemove = new HashSet<>();
for (DefaultWeightedEdge e : graph.edgeSet()) {
Integer alphaOcc = graph.getEdgeSource(e).getOccupancy();
Integer betaOcc = graph.getEdgeTarget(e).getOccupancy();
if (Math.abs(alphaOcc - betaOcc) >= maxOccupancyDifference) {
if (saveEdges) {
Vertex source = graph.getEdgeSource(e);
Vertex target = graph.getEdgeTarget(e);
Integer weight = (int) graph.getEdgeWeight(e);
Vertex[] edge = {source, target};
removedEdges.put(edge, weight);
Vertex[] vertices = {graph.getEdgeSource(e), graph.getEdgeTarget(e)};
removedEdges.put(e, vertices);
}
else {
graph.setEdgeWeight(e, 0.0);
}
}
}
if(saveEdges) {
for (Vertex[] edge : removedEdges.keySet()) {
graph.removeEdge(edge[0], edge[1]);
edgesToRemove.add(e);
}
}
edgesToRemove.forEach(graph::removeEdge);
return removedEdges;
}
//Remove edges for pairs where overlap size is significantly lower than the well occupancy, return removed edges
static Map<Vertex[], Integer> filterByOverlapPercent(SimpleWeightedGraph<Vertex, DefaultWeightedEdge> graph,
static Map<DefaultWeightedEdge, Vertex[]> filterByOverlapPercent(SimpleWeightedGraph<Vertex, DefaultWeightedEdge> graph,
Integer minOverlapPercent,
boolean saveEdges) {
Map<Vertex[], Integer> removedEdges = new HashMap<>();
Map<DefaultWeightedEdge, Vertex[]> removedEdges = new HashMap<>();
Set<DefaultWeightedEdge> edgesToRemove = new HashSet<>();
for (DefaultWeightedEdge e : graph.edgeSet()) {
Integer alphaOcc = graph.getEdgeSource(e).getOccupancy();
Integer betaOcc = graph.getEdgeTarget(e).getOccupancy();
@@ -74,22 +56,13 @@ public interface GraphModificationFunctions {
double min = minOverlapPercent / 100.0;
if ((weight / alphaOcc < min) || (weight / betaOcc < min)) {
if (saveEdges) {
Vertex source = graph.getEdgeSource(e);
Vertex target = graph.getEdgeTarget(e);
Integer intWeight = (int) graph.getEdgeWeight(e);
Vertex[] edge = {source, target};
removedEdges.put(edge, intWeight);
Vertex[] vertices = {graph.getEdgeSource(e), graph.getEdgeTarget(e)};
removedEdges.put(e, vertices);
}
else {
graph.setEdgeWeight(e, 0.0);
}
}
}
if(saveEdges) {
for (Vertex[] edge : removedEdges.keySet()) {
graph.removeEdge(edge[0], edge[1]);
edgesToRemove.add(e);
}
}
edgesToRemove.forEach(graph::removeEdge);
return removedEdges;
}
@@ -126,10 +99,10 @@ public interface GraphModificationFunctions {
}
static void addRemovedEdges(SimpleWeightedGraph<Vertex, DefaultWeightedEdge> graph,
Map<Vertex[], Integer> removedEdges) {
for (Vertex[] edge : removedEdges.keySet()) {
DefaultWeightedEdge e = graph.addEdge(edge[0], edge[1]);
graph.setEdgeWeight(e, removedEdges.get(edge));
Map<DefaultWeightedEdge, Vertex[]> removedEdges) {
for (DefaultWeightedEdge edge : removedEdges.keySet()) {
Vertex[] vertices = removedEdges.get(edge);
graph.addEdge(vertices[0], vertices[1], edge);
}
}

View File

@@ -89,14 +89,12 @@ public class InteractiveInterface {
private static void makePlate() {
String cellFile = null;
String filename = null;
Double stdDev = 0.0;
Double parameter = 0.0;
Integer numWells = 0;
Integer numSections;
Integer[] populations = {1};
Double dropOutRate = 0.0;
boolean poisson = false;
boolean exponential = false;
double lambda = 1.5;
;
try {
System.out.println("\nSimulated sample plates consist of:");
System.out.println("* a number of wells");
@@ -114,33 +112,46 @@ public class InteractiveInterface {
System.out.println("1) Poisson");
System.out.println("2) Gaussian");
System.out.println("3) Exponential");
// System.out.println("(Note: approximate distribution in original paper is exponential, lambda = 0.6)");
// System.out.println("(lambda value approximated from slope of log-log graph in figure 4c)");
System.out.println("4) Zipf");
System.out.println("(Note: wider distributions are more memory intensive to match)");
System.out.print("Enter selection value: ");
input = sc.nextInt();
switch (input) {
case 1 -> poisson = true;
case 1 -> {
BiGpairSEQ.setDistributionType(DistributionType.POISSON);
}
case 2 -> {
BiGpairSEQ.setDistributionType(DistributionType.GAUSSIAN);
System.out.println("How many distinct T-cells within one standard deviation of peak frequency?");
System.out.println("(Note: wider distributions are more memory intensive to match)");
stdDev = sc.nextDouble();
if (stdDev <= 0.0) {
parameter = sc.nextDouble();
if (parameter <= 0.0) {
throw new InputMismatchException("Value must be positive.");
}
}
case 3 -> {
exponential = true;
BiGpairSEQ.setDistributionType(DistributionType.EXPONENTIAL);
System.out.print("Please enter lambda value for exponential distribution: ");
lambda = sc.nextDouble();
if (lambda <= 0.0) {
lambda = 0.6;
System.out.println("Value must be positive. Defaulting to 0.6.");
parameter = sc.nextDouble();
if (parameter <= 0.0) {
parameter = 1.4;
System.out.println("Value must be positive. Defaulting to 1.4.");
}
}
case 4 -> {
BiGpairSEQ.setDistributionType(DistributionType.ZIPF);
System.out.print("Please enter exponent value for Zipf distribution: ");
parameter = sc.nextDouble();
if (parameter <= 0.0) {
parameter = 1.4;
System.out.println("Value must be positive. Defaulting to 1.4.");
}
}
default -> {
System.out.println("Invalid input. Defaulting to exponential.");
exponential = true;
parameter = 1.4;
BiGpairSEQ.setDistributionType(DistributionType.EXPONENTIAL);
}
}
System.out.print("\nNumber of wells on plate: ");
@@ -226,17 +237,18 @@ public class InteractiveInterface {
assert filename != null;
Plate samplePlate;
PlateFileWriter writer;
if(exponential){
samplePlate = new Plate(cells, cellFile, numWells, populations, dropOutRate, lambda, true);
DistributionType type = BiGpairSEQ.getDistributionType();
switch(type) {
case POISSON -> {
parameter = Math.sqrt(cells.getCellCount()); //gaussian with square root of elements approximates poisson
samplePlate = new Plate(cells, cellFile, numWells, populations, dropOutRate, parameter);
writer = new PlateFileWriter(filename, samplePlate);
}
else {
if (poisson) {
stdDev = Math.sqrt(cells.getCellCount()); //gaussian with square root of elements approximates poisson
}
samplePlate = new Plate(cells, cellFile, numWells, populations, dropOutRate, stdDev, false);
default -> {
samplePlate = new Plate(cells, cellFile, numWells, populations, dropOutRate, parameter);
writer = new PlateFileWriter(filename, samplePlate);
}
}
System.out.println("Writing Sample Plate to file");
writer.writePlateFile();
System.out.println("Sample Plate written to file: " + filename);
@@ -605,12 +617,13 @@ public class InteractiveInterface {
case 3 -> {
BiGpairSEQ.setAuctionAlgorithm();
System.out.println("MWM algorithm set to auction");
backToOptions = true;
}
case 4 -> {
System.out.println("Scaling integer weight MWM algorithm not yet fully implemented. Sorry.");
// BiGpairSEQ.setIntegerWeightScalingAlgorithm();
// System.out.println("MWM algorithm set to integer weight scaling algorithm of Duan and Su");
backToOptions = true;
// backToOptions = true;
}
case 0 -> backToOptions = true;
default -> System.out.println("Invalid input");

View File

@@ -13,6 +13,10 @@ TODO: Implement discrete frequency distributions using Vose's Alias Method
*/
import org.apache.commons.rng.sampling.distribution.RejectionInversionZipfSampler;
import org.apache.commons.rng.simple.JDKRandomWrapper;
import java.util.*;
public class Plate {
@@ -26,25 +30,22 @@ public class Plate {
private Integer[] populations;
private double stdDev;
private double lambda;
boolean exponential = false;
private double zipfExponent;
private DistributionType distributionType;
public Plate(CellSample cells, String cellFilename, int numWells, Integer[] populations,
double dropoutRate, double stdDev_or_lambda, boolean exponential){
double dropoutRate, double parameter){
this.cells = cells;
this.sourceFile = cellFilename;
this.size = numWells;
this.wells = new ArrayList<>();
this.error = dropoutRate;
this.populations = populations;
this.exponential = exponential;
if (this.exponential) {
this.lambda = stdDev_or_lambda;
fillWellsExponential(cells.getCells(), this.lambda);
}
else {
this.stdDev = stdDev_or_lambda;
fillWells(cells.getCells(), this.stdDev);
}
this.stdDev = parameter;
this.lambda = parameter;
this.zipfExponent = parameter;
this.distributionType = BiGpairSEQ.getDistributionType();
fillWells(cells.getCells());
}
@@ -85,9 +86,33 @@ public class Plate {
}
}
private void fillWellsZipf(List<String[]> cells, double exponent) {
int numSections = populations.length;
int section = 0;
int n;
RejectionInversionZipfSampler zipfSampler = new RejectionInversionZipfSampler(new JDKRandomWrapper(rand), cells.size(), exponent);
while (section < numSections){
for (int i = 0; i < (size / numSections); i++) {
List<String[]> well = new ArrayList<>();
for (int j = 0; j < populations[section]; j++) {
do {
n = zipfSampler.sample();
} while (n >= cells.size() || n < 0);
String[] cellToAdd = cells.get(n).clone();
for(int k = 0; k < cellToAdd.length; k++){
if(Math.abs(rand.nextDouble()) < error){//error applied to each sequence
cellToAdd[k] = "-1";
}
}
well.add(cellToAdd);
}
wells.add(well);
}
section++;
}
}
private void fillWellsExponential(List<String[]> cells, double lambda){
this.lambda = lambda;
exponential = true;
int numSections = populations.length;
int section = 0;
double m;
@@ -143,6 +168,24 @@ public class Plate {
}
}
private void fillWells(List<String[]> cells){
DistributionType type = BiGpairSEQ.getDistributionType();
switch (type) {
case POISSON, GAUSSIAN -> {
fillWells(cells, getStdDev());
break;
}
case EXPONENTIAL -> {
fillWellsExponential(cells, getLambda());
break;
}
case ZIPF -> {
fillWellsZipf(cells, getZipfExponent());
break;
}
}
}
public Integer[] getPopulations(){
return populations;
}
@@ -155,10 +198,12 @@ public class Plate {
return stdDev;
}
public boolean isExponential(){return exponential;}
public DistributionType getDistributionType() { return distributionType;}
public double getLambda(){return lambda;}
public double getZipfExponent(){return zipfExponent;}
public double getError() {
return error;
}
@@ -196,7 +241,7 @@ public class Plate {
sequencesAndMisreads.put(currentSequence, new ArrayList<>());
}
//The specific misread hasn't happened before
if (rand.nextDouble() >= errorCollisionRate || sequencesAndMisreads.get(currentSequence).size() == 0) {
if (rand.nextDouble() >= errorCollisionRate || sequencesAndMisreads.get(currentSequence).isEmpty()) {
//The misread doesn't collide with a real sequence already on the plate and some sequences have already been read
if(rand.nextDouble() >= realSequenceCollisionRate || !sequenceMap.isEmpty()){
StringBuilder spurious = new StringBuilder(currentSequence);

View File

@@ -13,11 +13,13 @@ public class PlateFileWriter {
private List<List<String[]>> wells;
private double stdDev;
private double lambda;
private double zipfExponent;
private DistributionType distributionType;
private Double error;
private String filename;
private String sourceFileName;
private Integer[] populations;
private boolean isExponential = false;
public PlateFileWriter(String filename, Plate plate) {
if(!filename.matches(".*\\.csv")){
@@ -26,12 +28,17 @@ public class PlateFileWriter {
this.filename = filename;
this.sourceFileName = plate.getSourceFileName();
this.size = plate.getSize();
this.isExponential = plate.isExponential();
if(isExponential) {
this.distributionType = plate.getDistributionType();
switch(distributionType) {
case POISSON, GAUSSIAN -> {
this.stdDev = plate.getStdDev();
}
case EXPONENTIAL -> {
this.lambda = plate.getLambda();
}
else{
this.stdDev = plate.getStdDev();
case ZIPF -> {
this.zipfExponent = plate.getZipfExponent();
}
}
this.error = plate.getError();
this.wells = plate.getWells();
@@ -95,11 +102,22 @@ public class PlateFileWriter {
printer.printComment("Plate size: " + size);
printer.printComment("Well populations: " + wellPopulationsString);
printer.printComment("Error rate: " + error);
if(isExponential){
printer.printComment("Lambda: " + lambda);
switch (distributionType) {
case POISSON -> {
printer.printComment("Cell frequency distribution: POISSON");
}
case GAUSSIAN -> {
printer.printComment("Cell frequency distribution: GAUSSIAN");
printer.printComment("--Standard deviation: " + stdDev);
}
case EXPONENTIAL -> {
printer.printComment("Cell frequency distribution: EXPONENTIAL");
printer.printComment("--Lambda: " + lambda);
}
case ZIPF -> {
printer.printComment("Cell frequency distribution: ZIPF");
printer.printComment("--Exponent: " + zipfExponent);
}
else {
printer.printComment("Std. dev.: " + stdDev);
}
printer.printRecords(wellsAsStrings);
} catch(IOException ex){

View File

@@ -1,9 +1,7 @@
import org.jgrapht.alg.interfaces.MatchingAlgorithm;
import org.jgrapht.alg.matching.MaximumWeightBipartiteMatching;
import org.jgrapht.generate.SimpleWeightedBipartiteGraphMatrixGenerator;
import org.jgrapht.graph.DefaultWeightedEdge;
import org.jgrapht.graph.SimpleWeightedGraph;
import org.jheaps.tree.FibonacciHeap;
import org.jheaps.tree.PairingHeap;
import java.math.BigDecimal;
@@ -70,58 +68,102 @@ public class Simulator implements GraphModificationFunctions {
if(verbose){System.out.println("Total beta sequence wells removed: " + betaWellsRemoved);}
}
//construct the graph. For simplicity, going to make
if(verbose){System.out.println("Making vertex maps");}
//For the SimpleWeightedBipartiteGraphMatrixGenerator, all vertices must have
//distinct numbers associated with them. Since I'm using a 2D array, that means
//distinct indices between the rows and columns. vertexStartValue lets me track where I switch
//from numbering rows to columns, so I can assign unique numbers to every vertex, and then
//subtract the vertexStartValue from betas to use their vertex labels as array indices
int vertexStartValue = 0;
//keys are sequential integer vertices, values are alphas
Map<String, Integer> plateAtoVMap = makeSequenceToVertexMap(alphaSequences, vertexStartValue);
//new start value for vertex to beta map should be one more than final vertex value in alpha map
vertexStartValue += plateAtoVMap.size();
//keys are betas, values are sequential integers
Map<String, Integer> plateBtoVMap = makeSequenceToVertexMap(betaSequences, vertexStartValue);
if(verbose){System.out.println("Vertex maps made");}
//make adjacency matrix for bipartite graph generator
//(technically this is only 1/4 of an adjacency matrix, but that's all you need
//for a bipartite graph, and all the SimpleWeightedBipartiteGraphMatrixGenerator class expects.)
if(verbose){System.out.println("Making adjacency matrix");}
double[][] weights = new double[plateAtoVMap.size()][plateBtoVMap.size()];
fillAdjacencyMatrix(weights, vertexStartValue, alphaSequences, betaSequences, plateAtoVMap, plateBtoVMap);
if(verbose){System.out.println("Adjacency matrix made");}
/*
* The commented out code below works beautifully for small enough graphs. However, after implementing a
* Zipf distribution and attempting to simulate Experiment 3 from the paper again, I discovered that
* this method uses too much memory. Even a 120GB heap is not enough to build this adjacency matrix.
* So I'm going to attempt to build this graph directly and see if that is less memory intensive
*/
// //construct the graph. For simplicity, going to make
// if(verbose){System.out.println("Making vertex maps");}
// //For the SimpleWeightedBipartiteGraphMatrixGenerator, all vertices must have
// //distinct numbers associated with them. Since I'm using a 2D array, that means
// //distinct indices between the rows and columns. vertexStartValue lets me track where I switch
// //from numbering rows to columns, so I can assign unique numbers to every vertex, and then
// //subtract the vertexStartValue from betas to use their vertex labels as array indices
// int vertexStartValue = 0;
// //keys are sequential integer vertices, values are alphas
// Map<String, Integer> plateAtoVMap = makeSequenceToVertexMap(alphaSequences, vertexStartValue);
// //new start value for vertex to beta map should be one more than final vertex value in alpha map
// vertexStartValue += plateAtoVMap.size();
// //keys are betas, values are sequential integers
// Map<String, Integer> plateBtoVMap = makeSequenceToVertexMap(betaSequences, vertexStartValue);
// if(verbose){System.out.println("Vertex maps made");}
// //make adjacency matrix for bipartite graph generator
// //(technically this is only 1/4 of an adjacency matrix, but that's all you need
// //for a bipartite graph, and all the SimpleWeightedBipartiteGraphMatrixGenerator class expects.)
// if(verbose){System.out.println("Making adjacency matrix");}
// double[][] weights = new double[plateAtoVMap.size()][plateBtoVMap.size()];
// fillAdjacencyMatrix(weights, vertexStartValue, alphaSequences, betaSequences, plateAtoVMap, plateBtoVMap);
// if(verbose){System.out.println("Adjacency matrix made");}
// //make bipartite graph
// if(verbose){System.out.println("Making bipartite weighted graph");}
// //the graph object
// SimpleWeightedGraph<Vertex, DefaultWeightedEdge> graph =
// new SimpleWeightedGraph<>(DefaultWeightedEdge.class);
// //the graph generator
// SimpleWeightedBipartiteGraphMatrixGenerator graphGenerator = new SimpleWeightedBipartiteGraphMatrixGenerator();
// //the list of alpha vertices
// List<Vertex> alphaVertices = new ArrayList<>();
// for (String seq : plateAtoVMap.keySet()) {
// Vertex alphaVertex = new Vertex(alphaSequences.get(seq), plateAtoVMap.get(seq));
// alphaVertices.add(alphaVertex);
// }
// //Sort to make sure the order of vertices in list matches the order of the adjacency matrix
// Collections.sort(alphaVertices);
// //Add ordered list of vertices to the graph
// graphGenerator.first(alphaVertices);
// //the list of beta vertices
// List<Vertex> betaVertices = new ArrayList<>();
// for (String seq : plateBtoVMap.keySet()) {
// Vertex betaVertex = new Vertex(betaSequences.get(seq), plateBtoVMap.get(seq));
// betaVertices.add(betaVertex);
// }
// //Sort to make sure the order of vertices in list matches the order of the adjacency matrix
// Collections.sort(betaVertices);
// //Add ordered list of vertices to the graph
// graphGenerator.second(betaVertices);
// //use adjacency matrix of weight created previously
// graphGenerator.weights(weights);
// graphGenerator.generateGraph(graph);
//make bipartite graph
if(verbose){System.out.println("Making bipartite weighted graph");}
//the graph object
SimpleWeightedGraph<Vertex, DefaultWeightedEdge> graph =
new SimpleWeightedGraph<>(DefaultWeightedEdge.class);
//the graph generator
SimpleWeightedBipartiteGraphMatrixGenerator graphGenerator = new SimpleWeightedBipartiteGraphMatrixGenerator();
//the list of alpha vertices
int vertexLabelValue = 0;
//create and add alpha sequence vertices
List<Vertex> alphaVertices = new ArrayList<>();
for (String seq : plateAtoVMap.keySet()) {
Vertex alphaVertex = new Vertex(alphaSequences.get(seq), plateAtoVMap.get(seq));
alphaVertices.add(alphaVertex);
for (Map.Entry<String, SequenceRecord> entry: alphaSequences.entrySet()) {
alphaVertices.add(new Vertex(entry.getValue(), vertexLabelValue));
vertexLabelValue++;
}
//Sort to make sure the order of vertices in list matches the order of the adjacency matrix
Collections.sort(alphaVertices);
//Add ordered list of vertices to the graph
graphGenerator.first(alphaVertices);
//the list of beta vertices
alphaVertices.forEach(graph::addVertex);
//add beta sequence vertices
List<Vertex> betaVertices = new ArrayList<>();
for (String seq : plateBtoVMap.keySet()) {
Vertex betaVertex = new Vertex(betaSequences.get(seq), plateBtoVMap.get(seq));
betaVertices.add(betaVertex);
for (Map.Entry<String, SequenceRecord> entry: betaSequences.entrySet()) {
betaVertices.add(new Vertex(entry.getValue(), vertexLabelValue));
vertexLabelValue++;
}
betaVertices.forEach(graph::addVertex);
//add edges
for(Vertex a: alphaVertices) {
for(Vertex b: betaVertices) {
Set<Integer> sharedWells = new HashSet<>(a.getRecord().getWells());
sharedWells.retainAll(b.getRecord().getWells());
double weight = (double) sharedWells.size();
if (weight != 0.0) {
System.out.println("Edge weight: " + weight);
DefaultWeightedEdge edge = graph.addEdge(a, b);
graph.setEdgeWeight(edge, weight);
}
else {
System.out.println("No overlap");
}
}
}
//Sort to make sure the order of vertices in list matches the order of the adjacency matrix
Collections.sort(betaVertices);
//Add ordered list of vertices to the graph
graphGenerator.second(betaVertices);
//use adjacency matrix of weight created previously
graphGenerator.weights(weights);
graphGenerator.generateGraph(graph);
if(verbose){System.out.println("Graph created");}
//stop timing
Instant stop = Instant.now();
@@ -145,7 +187,7 @@ public class Simulator implements GraphModificationFunctions {
Integer minOverlapPercent, boolean verbose, boolean calculatePValue) {
Instant start = Instant.now();
SimpleWeightedGraph<Vertex, DefaultWeightedEdge> graph = data.getGraph();
Map<Vertex[], Integer> removedEdges = new HashMap<>();
Map<DefaultWeightedEdge, Vertex[]> removedEdges = new HashMap<>();
boolean saveEdges = BiGpairSEQ.cacheGraph();
int numWells = data.getNumWells();
//Integer alphaCount = data.getAlphaCount();
@@ -163,6 +205,7 @@ public class Simulator implements GraphModificationFunctions {
}
Integer graphAlphaCount = alphas.size();
Integer graphBetaCount = betas.size();
Integer graphEdgeCount = graph.edgeSet().size();
//remove edges with weights outside given overlap thresholds, add those to removed edge list
if(verbose){System.out.println("Eliminating edges with weights outside overlap threshold values");}
@@ -182,12 +225,14 @@ public class Simulator implements GraphModificationFunctions {
if(verbose){System.out.println("Edges between vertices of with excessively different occupancy values " +
"removed");}
Integer filteredGraphEdgeCount = graph.edgeSet().size();
//Find Maximum Weight Matching
if(verbose){System.out.println("Finding maximum weight matching");}
//The matching object
MatchingAlgorithm<Vertex, DefaultWeightedEdge> maxWeightMatching;
//Determine algorithm type
AlgorithmType algorithm = BiGpairSEQ.getMatchingAlgoritmType();
AlgorithmType algorithm = BiGpairSEQ.getMatchingAlgorithmType();
switch (algorithm) { //Only two options now, but I have room to add more algorithms in the future this way
case AUCTION -> {
//create a new MaximumIntegerWeightBipartiteAuctionMatching
@@ -333,8 +378,10 @@ public class Simulator implements GraphModificationFunctions {
metadata.put("real sequence collision rate", data.getRealSequenceCollisionRate().toString());
metadata.put("total alphas read from plate", data.getAlphaCount().toString());
metadata.put("total betas read from plate", data.getBetaCount().toString());
metadata.put("initial edges in graph", graphEdgeCount.toString());
metadata.put("alphas in graph (after pre-filtering)", graphAlphaCount.toString());
metadata.put("betas in graph (after pre-filtering)", graphBetaCount.toString());
metadata.put("final edges in graph (after pre-filtering)", filteredGraphEdgeCount.toString());
metadata.put("high overlap threshold for pairing", highThreshold.toString());
metadata.put("low overlap threshold for pairing", lowThreshold.toString());
metadata.put("minimum overlap percent for pairing", minOverlapPercent.toString());