Compare commits
12 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
4099ec2623 | ||
|
|
7744586e79 | ||
|
|
83eff0d1e7 | ||
|
|
d1810c453d | ||
|
|
187401f2d6 | ||
|
|
678ce99424 | ||
|
|
c21e375303 | ||
|
|
57fe9c1619 | ||
|
|
e1888a99c6 | ||
|
|
bcf5a4c749 | ||
|
|
81d8a12765 | ||
|
|
b5c0568e22 |
27
.idea/artifacts/BiGpairSEQ_Sim_jar.xml
generated
27
.idea/artifacts/BiGpairSEQ_Sim_jar.xml
generated
@@ -1,16 +1,27 @@
|
||||
<component name="ArtifactManager">
|
||||
<artifact type="jar" build-on-make="true" name="BiGpairSEQ_Sim:jar">
|
||||
<artifact type="jar" name="BiGpairSEQ_Sim:jar">
|
||||
<output-path>$PROJECT_DIR$/out/artifacts/BiGpairSEQ_Sim_jar</output-path>
|
||||
<root id="archive" name="BiGpairSEQ_Sim.jar">
|
||||
<element id="directory" name="META-INF">
|
||||
<element id="file-copy" path="$PROJECT_DIR$/src/main/java/META-INF/MANIFEST.MF" />
|
||||
<element id="file-copy" path="$PROJECT_DIR$/META-INF/MANIFEST.MF" />
|
||||
</element>
|
||||
<element id="module-output" name="BigPairSEQ" />
|
||||
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/jgrapht/jgrapht-core/1.5.1/jgrapht-core-1.5.1.jar" path-in-jar="/" />
|
||||
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/jheaps/jheaps/0.13/jheaps-0.13.jar" path-in-jar="/" />
|
||||
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/commons-cli/commons-cli/1.5.0/commons-cli-1.5.0.jar" path-in-jar="/" />
|
||||
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/apache/commons/commons-csv/1.9.0/commons-csv-1.9.0.jar" path-in-jar="/" />
|
||||
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/jetbrains/annotations/23.0.0/annotations-23.0.0.jar" path-in-jar="/" />
|
||||
<element id="module-output" name="BiGpairSEQ_Sim" />
|
||||
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/jgrapht/jgrapht-core/1.5.2/jgrapht-core-1.5.2.jar" path-in-jar="/" />
|
||||
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/apache/commons/commons-rng-sampling/1.6/commons-rng-sampling-1.6.jar" path-in-jar="/" />
|
||||
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/apache/commons/commons-csv/1.14.0/commons-csv-1.14.0.jar" path-in-jar="/" />
|
||||
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/jetbrains/annotations/26.0.2/annotations-26.0.2.jar" path-in-jar="/" />
|
||||
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/jgrapht/jgrapht-io/1.5.2/jgrapht-io-1.5.2.jar" path-in-jar="/" />
|
||||
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/apache/commons/commons-rng-simple/1.6/commons-rng-simple-1.6.jar" path-in-jar="/" />
|
||||
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/commons-io/commons-io/2.18.0/commons-io-2.18.0.jar" path-in-jar="/" />
|
||||
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/apache/commons/commons-rng-core/1.6/commons-rng-core-1.6.jar" path-in-jar="/" />
|
||||
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/commons-codec/commons-codec/1.18.0/commons-codec-1.18.0.jar" path-in-jar="/" />
|
||||
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/apache/commons/commons-rng-client-api/1.6/commons-rng-client-api-1.6.jar" path-in-jar="/" />
|
||||
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/commons-cli/commons-cli/1.9.0/commons-cli-1.9.0.jar" path-in-jar="/" />
|
||||
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/apache/commons/commons-lang3/3.12.0/commons-lang3-3.12.0.jar" path-in-jar="/" />
|
||||
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/antlr/antlr4-runtime/4.12.0/antlr4-runtime-4.12.0.jar" path-in-jar="/" />
|
||||
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/apfloat/apfloat/1.10.1/apfloat-1.10.1.jar" path-in-jar="/" />
|
||||
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/apache/commons/commons-text/1.10.0/commons-text-1.10.0.jar" path-in-jar="/" />
|
||||
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/jheaps/jheaps/0.14/jheaps-0.14.jar" path-in-jar="/" />
|
||||
</root>
|
||||
</artifact>
|
||||
</component>
|
||||
1
.idea/compiler.xml
generated
1
.idea/compiler.xml
generated
@@ -7,6 +7,7 @@
|
||||
<sourceTestOutputDir name="target/generated-test-sources/test-annotations" />
|
||||
<outputRelativeToContentRoot value="true" />
|
||||
<module name="BigPairSEQ" />
|
||||
<module name="BiGpairSEQ_Sim" />
|
||||
</profile>
|
||||
</annotationProcessing>
|
||||
</component>
|
||||
|
||||
25
.idea/jarRepositories.xml
generated
25
.idea/jarRepositories.xml
generated
@@ -1,20 +1,35 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="RemoteRepositoriesConfiguration">
|
||||
<remote-repository>
|
||||
<option name="id" value="my-internal-site" />
|
||||
<option name="name" value="my-internal-site" />
|
||||
<option name="url" value="https://myserver/repo" />
|
||||
</remote-repository>
|
||||
<remote-repository>
|
||||
<option name="id" value="central" />
|
||||
<option name="name" value="Central Repository" />
|
||||
<option name="url" value="https://repo1.maven.org/maven2" />
|
||||
</remote-repository>
|
||||
<remote-repository>
|
||||
<option name="id" value="central repo" />
|
||||
<option name="name" value="central repo" />
|
||||
<option name="url" value="https://repo1.maven.org/maven2/" />
|
||||
</remote-repository>
|
||||
<remote-repository>
|
||||
<option name="id" value="central" />
|
||||
<option name="name" value="Central Repository" />
|
||||
<option name="url" value="https://repo.maven.apache.org/maven2" />
|
||||
</remote-repository>
|
||||
<remote-repository>
|
||||
<option name="id" value="central" />
|
||||
<option name="name" value="Maven Central repository" />
|
||||
<option name="url" value="https://repo1.maven.org/maven2" />
|
||||
</remote-repository>
|
||||
<remote-repository>
|
||||
<option name="id" value="jboss.community" />
|
||||
<option name="name" value="JBoss Community repository" />
|
||||
<option name="url" value="https://repository.jboss.org/nexus/content/repositories/public/" />
|
||||
</remote-repository>
|
||||
<remote-repository>
|
||||
<option name="id" value="34d16bdc-85f0-48ee-8e8b-144091765be1" />
|
||||
<option name="name" value="34d16bdc-85f0-48ee-8e8b-144091765be1" />
|
||||
<option name="url" value="https://repository.mulesoft.org/nexus/content/repositories/public/" />
|
||||
</remote-repository>
|
||||
</component>
|
||||
</project>
|
||||
6
.idea/libraries/apache_commons_csv.xml
generated
6
.idea/libraries/apache_commons_csv.xml
generated
@@ -1,8 +1,10 @@
|
||||
<component name="libraryTable">
|
||||
<library name="apache.commons.csv" type="repository">
|
||||
<properties maven-id="org.apache.commons:commons-csv:1.9.0" />
|
||||
<properties maven-id="org.apache.commons:commons-csv:1.14.0" />
|
||||
<CLASSES>
|
||||
<root url="jar://$MAVEN_REPOSITORY$/org/apache/commons/commons-csv/1.9.0/commons-csv-1.9.0.jar!/" />
|
||||
<root url="jar://$MAVEN_REPOSITORY$/org/apache/commons/commons-csv/1.14.0/commons-csv-1.14.0.jar!/" />
|
||||
<root url="jar://$MAVEN_REPOSITORY$/commons-io/commons-io/2.18.0/commons-io-2.18.0.jar!/" />
|
||||
<root url="jar://$MAVEN_REPOSITORY$/commons-codec/commons-codec/1.18.0/commons-codec-1.18.0.jar!/" />
|
||||
</CLASSES>
|
||||
<JAVADOC />
|
||||
<SOURCES />
|
||||
|
||||
4
.idea/libraries/commons_cli.xml
generated
4
.idea/libraries/commons_cli.xml
generated
@@ -1,8 +1,8 @@
|
||||
<component name="libraryTable">
|
||||
<library name="commons.cli" type="repository">
|
||||
<properties maven-id="commons-cli:commons-cli:1.5.0" />
|
||||
<properties maven-id="commons-cli:commons-cli:1.9.0" />
|
||||
<CLASSES>
|
||||
<root url="jar://$MAVEN_REPOSITORY$/commons-cli/commons-cli/1.5.0/commons-cli-1.5.0.jar!/" />
|
||||
<root url="jar://$MAVEN_REPOSITORY$/commons-cli/commons-cli/1.9.0/commons-cli-1.9.0.jar!/" />
|
||||
</CLASSES>
|
||||
<JAVADOC />
|
||||
<SOURCES />
|
||||
|
||||
13
.idea/libraries/commons_rng_1.xml
generated
13
.idea/libraries/commons_rng_1.xml
generated
@@ -1,13 +0,0 @@
|
||||
<component name="libraryTable">
|
||||
<library name="commons-rng-1">
|
||||
<CLASSES>
|
||||
<root url="file://$USER_HOME$/Downloads/commons-rng-1.6" />
|
||||
</CLASSES>
|
||||
<JAVADOC />
|
||||
<SOURCES>
|
||||
<root url="file://$USER_HOME$/Downloads/commons-rng-1.6" />
|
||||
</SOURCES>
|
||||
<jarDirectory url="file://$USER_HOME$/Downloads/commons-rng-1.6" recursive="false" />
|
||||
<jarDirectory url="file://$USER_HOME$/Downloads/commons-rng-1.6" recursive="false" type="SOURCES" />
|
||||
</library>
|
||||
</component>
|
||||
7
.idea/libraries/jgrapht_core.xml
generated
7
.idea/libraries/jgrapht_core.xml
generated
@@ -1,9 +1,10 @@
|
||||
<component name="libraryTable">
|
||||
<library name="jgrapht.core" type="repository">
|
||||
<properties maven-id="org.jgrapht:jgrapht-core:1.5.1" />
|
||||
<properties maven-id="org.jgrapht:jgrapht-core:1.5.2" />
|
||||
<CLASSES>
|
||||
<root url="jar://$MAVEN_REPOSITORY$/org/jgrapht/jgrapht-core/1.5.1/jgrapht-core-1.5.1.jar!/" />
|
||||
<root url="jar://$MAVEN_REPOSITORY$/org/jheaps/jheaps/0.13/jheaps-0.13.jar!/" />
|
||||
<root url="jar://$MAVEN_REPOSITORY$/org/jgrapht/jgrapht-core/1.5.2/jgrapht-core-1.5.2.jar!/" />
|
||||
<root url="jar://$MAVEN_REPOSITORY$/org/jheaps/jheaps/0.14/jheaps-0.14.jar!/" />
|
||||
<root url="jar://$MAVEN_REPOSITORY$/org/apfloat/apfloat/1.10.1/apfloat-1.10.1.jar!/" />
|
||||
</CLASSES>
|
||||
<JAVADOC />
|
||||
<SOURCES />
|
||||
|
||||
15
.idea/libraries/jgrapht_io.xml
generated
15
.idea/libraries/jgrapht_io.xml
generated
@@ -1,13 +1,14 @@
|
||||
<component name="libraryTable">
|
||||
<library name="jgrapht.io" type="repository">
|
||||
<properties maven-id="org.jgrapht:jgrapht-io:1.5.1" />
|
||||
<properties maven-id="org.jgrapht:jgrapht-io:1.5.2" />
|
||||
<CLASSES>
|
||||
<root url="jar://$MAVEN_REPOSITORY$/org/jgrapht/jgrapht-io/1.5.1/jgrapht-io-1.5.1.jar!/" />
|
||||
<root url="jar://$MAVEN_REPOSITORY$/org/jgrapht/jgrapht-core/1.5.1/jgrapht-core-1.5.1.jar!/" />
|
||||
<root url="jar://$MAVEN_REPOSITORY$/org/jheaps/jheaps/0.13/jheaps-0.13.jar!/" />
|
||||
<root url="jar://$MAVEN_REPOSITORY$/org/antlr/antlr4-runtime/4.8-1/antlr4-runtime-4.8-1.jar!/" />
|
||||
<root url="jar://$MAVEN_REPOSITORY$/org/apache/commons/commons-text/1.8/commons-text-1.8.jar!/" />
|
||||
<root url="jar://$MAVEN_REPOSITORY$/org/apache/commons/commons-lang3/3.9/commons-lang3-3.9.jar!/" />
|
||||
<root url="jar://$MAVEN_REPOSITORY$/org/jgrapht/jgrapht-io/1.5.2/jgrapht-io-1.5.2.jar!/" />
|
||||
<root url="jar://$MAVEN_REPOSITORY$/org/jgrapht/jgrapht-core/1.5.2/jgrapht-core-1.5.2.jar!/" />
|
||||
<root url="jar://$MAVEN_REPOSITORY$/org/jheaps/jheaps/0.14/jheaps-0.14.jar!/" />
|
||||
<root url="jar://$MAVEN_REPOSITORY$/org/apfloat/apfloat/1.10.1/apfloat-1.10.1.jar!/" />
|
||||
<root url="jar://$MAVEN_REPOSITORY$/org/antlr/antlr4-runtime/4.12.0/antlr4-runtime-4.12.0.jar!/" />
|
||||
<root url="jar://$MAVEN_REPOSITORY$/org/apache/commons/commons-text/1.10.0/commons-text-1.10.0.jar!/" />
|
||||
<root url="jar://$MAVEN_REPOSITORY$/org/apache/commons/commons-lang3/3.12.0/commons-lang3-3.12.0.jar!/" />
|
||||
</CLASSES>
|
||||
<JAVADOC />
|
||||
<SOURCES />
|
||||
|
||||
44
pom.xml
44
pom.xml
@@ -5,7 +5,7 @@
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<groupId>org.example</groupId>
|
||||
<artifactId>TCellSim</artifactId>
|
||||
<artifactId>BiGpairSEQ_Sim</artifactId>
|
||||
<version>1.0-SNAPSHOT</version>
|
||||
<build>
|
||||
<plugins>
|
||||
@@ -26,8 +26,48 @@
|
||||
<version>RELEASE</version>
|
||||
<scope>compile</scope>
|
||||
</dependency>
|
||||
<!-- https://mvnrepository.com/artifact/org.apache.commons/commons-rng-simple -->
|
||||
<dependency>
|
||||
<groupId>org.apache.commons</groupId>
|
||||
<artifactId>commons-rng-simple</artifactId>
|
||||
<version>1.6</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.commons</groupId>
|
||||
<artifactId>commons-rng-sampling</artifactId>
|
||||
<version>1.6</version>
|
||||
</dependency>
|
||||
<!-- https://mvnrepository.com/artifact/org.apache.commons/commons-csv -->
|
||||
<dependency>
|
||||
<groupId>org.apache.commons</groupId>
|
||||
<artifactId>commons-csv</artifactId>
|
||||
<version>1.14.0</version>
|
||||
</dependency>
|
||||
<!-- https://mvnrepository.com/artifact/org.jgrapht/jgrapht-core -->
|
||||
<dependency>
|
||||
<groupId>org.jgrapht</groupId>
|
||||
<artifactId>jgrapht-core</artifactId>
|
||||
<version>1.5.2</version>
|
||||
</dependency>
|
||||
<!-- https://mvnrepository.com/artifact/org.jgrapht/jgrapht-io -->
|
||||
<dependency>
|
||||
<groupId>org.jgrapht</groupId>
|
||||
<artifactId>jgrapht-io</artifactId>
|
||||
<version>1.5.2</version>
|
||||
</dependency>
|
||||
<!-- https://mvnrepository.com/artifact/org.jheaps/jheaps -->
|
||||
<dependency>
|
||||
<groupId>org.jheaps</groupId>
|
||||
<artifactId>jheaps</artifactId>
|
||||
<version>0.14</version>
|
||||
</dependency>
|
||||
<!-- https://mvnrepository.com/artifact/commons-cli/commons-cli -->
|
||||
<dependency>
|
||||
<groupId>commons-cli</groupId>
|
||||
<artifactId>commons-cli</artifactId>
|
||||
<version>1.9.0</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<properties>
|
||||
<maven.compiler.source>11</maven.compiler.source>
|
||||
<maven.compiler.target>11</maven.compiler.target>
|
||||
|
||||
@@ -634,7 +634,10 @@ a means of exploring some very beautiful math.
|
||||
|
||||
## TODO
|
||||
|
||||
* Update CLI option text in this readme to include Zipf distribution options
|
||||
* Consider whether a graph database might be a better option than keeping things in memory.
|
||||
* Look at fastUtil for more performant maps and arrays. Note that there is an optional jGraphT library to work with fastUtil (see FastutilMapIntVertexGraph, for example).
|
||||
* Consider implementing an option to use the jGrapht sparse graph representation for a lower memory cost with very large graphs (tens or hundreds of thousands of distinct sequences).
|
||||
* ~~Update CLI option text in this readme to include Zipf distribution options~~
|
||||
* ~~Try invoking GC at end of workloads to reduce paging to disk~~ DONE
|
||||
* ~~Hold graph data in memory until another graph is read-in? ABANDONED UNABANDONED~~ DONE
|
||||
* ~~*No, this won't work, because BiGpairSEQ simulations alter the underlying graph based on filtering constraints. Changes would cascade with multiple experiments.*~~
|
||||
|
||||
@@ -13,7 +13,7 @@ public class BiGpairSEQ {
|
||||
private static boolean cacheCells = false;
|
||||
private static boolean cachePlate = false;
|
||||
private static boolean cacheGraph = false;
|
||||
private static AlgorithmType matchingAlgoritmType = AlgorithmType.HUNGARIAN;
|
||||
private static AlgorithmType matchingAlgorithmType = AlgorithmType.HUNGARIAN;
|
||||
private static HeapType priorityQueueHeapType = HeapType.PAIRING;
|
||||
private static DistributionType distributionType = DistributionType.ZIPF;
|
||||
private static boolean outputBinary = true;
|
||||
@@ -166,13 +166,13 @@ public class BiGpairSEQ {
|
||||
return priorityQueueHeapType;
|
||||
}
|
||||
|
||||
public static AlgorithmType getMatchingAlgoritmType() { return matchingAlgoritmType; }
|
||||
public static AlgorithmType getMatchingAlgorithmType() { return matchingAlgorithmType; }
|
||||
|
||||
public static void setHungarianAlgorithm() { matchingAlgoritmType = AlgorithmType.HUNGARIAN; }
|
||||
public static void setHungarianAlgorithm() { matchingAlgorithmType = AlgorithmType.HUNGARIAN; }
|
||||
|
||||
public static void setIntegerWeightScalingAlgorithm() { matchingAlgoritmType = AlgorithmType.INTEGER_WEIGHT_SCALING; }
|
||||
public static void setIntegerWeightScalingAlgorithm() { matchingAlgorithmType = AlgorithmType.INTEGER_WEIGHT_SCALING; }
|
||||
|
||||
public static void setAuctionAlgorithm() { matchingAlgoritmType = AlgorithmType.AUCTION; }
|
||||
public static void setAuctionAlgorithm() { matchingAlgorithmType = AlgorithmType.AUCTION; }
|
||||
|
||||
public static void setPairingHeap() {
|
||||
priorityQueueHeapType = HeapType.PAIRING;
|
||||
|
||||
@@ -1,72 +1,54 @@
|
||||
import org.jgrapht.graph.DefaultWeightedEdge;
|
||||
import org.jgrapht.graph.SimpleWeightedGraph;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.*;
|
||||
|
||||
public interface GraphModificationFunctions {
|
||||
|
||||
//remove over- and under-weight edges, return removed edges
|
||||
static Map<Vertex[], Integer> filterByOverlapThresholds(SimpleWeightedGraph<Vertex, DefaultWeightedEdge> graph,
|
||||
int low, int high, boolean saveEdges) {
|
||||
Map<Vertex[], Integer> removedEdges = new HashMap<>();
|
||||
static Map<DefaultWeightedEdge, Vertex[]> filterByOverlapThresholds(SimpleWeightedGraph<Vertex, DefaultWeightedEdge> graph,
|
||||
int low, int high, boolean saveEdges) {
|
||||
Map<DefaultWeightedEdge, Vertex[]> removedEdges = new HashMap<>();
|
||||
Set<DefaultWeightedEdge> edgesToRemove = new HashSet<>();
|
||||
for (DefaultWeightedEdge e : graph.edgeSet()) {
|
||||
if ((graph.getEdgeWeight(e) > high) || (graph.getEdgeWeight(e) < low)) {
|
||||
if(saveEdges) {
|
||||
Vertex source = graph.getEdgeSource(e);
|
||||
Vertex target = graph.getEdgeTarget(e);
|
||||
Integer weight = (int) graph.getEdgeWeight(e);
|
||||
Vertex[] edge = {source, target};
|
||||
removedEdges.put(edge, weight);
|
||||
}
|
||||
else {
|
||||
graph.setEdgeWeight(e, 0.0);
|
||||
Vertex[] vertices = {graph.getEdgeSource(e), graph.getEdgeTarget(e)};
|
||||
removedEdges.put(e, vertices);
|
||||
}
|
||||
edgesToRemove.add(e);
|
||||
}
|
||||
}
|
||||
if(saveEdges) {
|
||||
for (Vertex[] edge : removedEdges.keySet()) {
|
||||
graph.removeEdge(edge[0], edge[1]);
|
||||
}
|
||||
}
|
||||
edgesToRemove.forEach(graph::removeEdge);
|
||||
return removedEdges;
|
||||
}
|
||||
|
||||
//Remove edges for pairs with large occupancy discrepancy, return removed edges
|
||||
static Map<Vertex[], Integer> filterByRelativeOccupancy(SimpleWeightedGraph<Vertex, DefaultWeightedEdge> graph,
|
||||
static Map<DefaultWeightedEdge, Vertex[]> filterByRelativeOccupancy(SimpleWeightedGraph<Vertex, DefaultWeightedEdge> graph,
|
||||
Integer maxOccupancyDifference, boolean saveEdges) {
|
||||
Map<Vertex[], Integer> removedEdges = new HashMap<>();
|
||||
Map<DefaultWeightedEdge, Vertex[]> removedEdges = new HashMap<>();
|
||||
Set<DefaultWeightedEdge> edgesToRemove = new HashSet<>();
|
||||
for (DefaultWeightedEdge e : graph.edgeSet()) {
|
||||
Integer alphaOcc = graph.getEdgeSource(e).getOccupancy();
|
||||
Integer betaOcc = graph.getEdgeTarget(e).getOccupancy();
|
||||
if (Math.abs(alphaOcc - betaOcc) >= maxOccupancyDifference) {
|
||||
if (saveEdges) {
|
||||
Vertex source = graph.getEdgeSource(e);
|
||||
Vertex target = graph.getEdgeTarget(e);
|
||||
Integer weight = (int) graph.getEdgeWeight(e);
|
||||
Vertex[] edge = {source, target};
|
||||
removedEdges.put(edge, weight);
|
||||
}
|
||||
else {
|
||||
graph.setEdgeWeight(e, 0.0);
|
||||
Vertex[] vertices = {graph.getEdgeSource(e), graph.getEdgeTarget(e)};
|
||||
removedEdges.put(e, vertices);
|
||||
}
|
||||
edgesToRemove.add(e);
|
||||
}
|
||||
}
|
||||
if(saveEdges) {
|
||||
for (Vertex[] edge : removedEdges.keySet()) {
|
||||
graph.removeEdge(edge[0], edge[1]);
|
||||
}
|
||||
}
|
||||
edgesToRemove.forEach(graph::removeEdge);
|
||||
return removedEdges;
|
||||
}
|
||||
|
||||
//Remove edges for pairs where overlap size is significantly lower than the well occupancy, return removed edges
|
||||
static Map<Vertex[], Integer> filterByOverlapPercent(SimpleWeightedGraph<Vertex, DefaultWeightedEdge> graph,
|
||||
static Map<DefaultWeightedEdge, Vertex[]> filterByOverlapPercent(SimpleWeightedGraph<Vertex, DefaultWeightedEdge> graph,
|
||||
Integer minOverlapPercent,
|
||||
boolean saveEdges) {
|
||||
Map<Vertex[], Integer> removedEdges = new HashMap<>();
|
||||
Map<DefaultWeightedEdge, Vertex[]> removedEdges = new HashMap<>();
|
||||
Set<DefaultWeightedEdge> edgesToRemove = new HashSet<>();
|
||||
for (DefaultWeightedEdge e : graph.edgeSet()) {
|
||||
Integer alphaOcc = graph.getEdgeSource(e).getOccupancy();
|
||||
Integer betaOcc = graph.getEdgeTarget(e).getOccupancy();
|
||||
@@ -74,22 +56,13 @@ public interface GraphModificationFunctions {
|
||||
double min = minOverlapPercent / 100.0;
|
||||
if ((weight / alphaOcc < min) || (weight / betaOcc < min)) {
|
||||
if (saveEdges) {
|
||||
Vertex source = graph.getEdgeSource(e);
|
||||
Vertex target = graph.getEdgeTarget(e);
|
||||
Integer intWeight = (int) graph.getEdgeWeight(e);
|
||||
Vertex[] edge = {source, target};
|
||||
removedEdges.put(edge, intWeight);
|
||||
}
|
||||
else {
|
||||
graph.setEdgeWeight(e, 0.0);
|
||||
Vertex[] vertices = {graph.getEdgeSource(e), graph.getEdgeTarget(e)};
|
||||
removedEdges.put(e, vertices);
|
||||
}
|
||||
edgesToRemove.add(e);
|
||||
}
|
||||
}
|
||||
if(saveEdges) {
|
||||
for (Vertex[] edge : removedEdges.keySet()) {
|
||||
graph.removeEdge(edge[0], edge[1]);
|
||||
}
|
||||
}
|
||||
edgesToRemove.forEach(graph::removeEdge);
|
||||
return removedEdges;
|
||||
}
|
||||
|
||||
@@ -126,10 +99,10 @@ public interface GraphModificationFunctions {
|
||||
}
|
||||
|
||||
static void addRemovedEdges(SimpleWeightedGraph<Vertex, DefaultWeightedEdge> graph,
|
||||
Map<Vertex[], Integer> removedEdges) {
|
||||
for (Vertex[] edge : removedEdges.keySet()) {
|
||||
DefaultWeightedEdge e = graph.addEdge(edge[0], edge[1]);
|
||||
graph.setEdgeWeight(e, removedEdges.get(edge));
|
||||
Map<DefaultWeightedEdge, Vertex[]> removedEdges) {
|
||||
for (DefaultWeightedEdge edge : removedEdges.keySet()) {
|
||||
Vertex[] vertices = removedEdges.get(edge);
|
||||
graph.addEdge(vertices[0], vertices[1], edge);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -13,8 +13,7 @@ TODO: Implement discrete frequency distributions using Vose's Alias Method
|
||||
*/
|
||||
|
||||
|
||||
import org.apache.commons.rng.UniformRandomProvider;
|
||||
import org.apache.commons.rng.core.BaseProvider;
|
||||
|
||||
import org.apache.commons.rng.sampling.distribution.RejectionInversionZipfSampler;
|
||||
import org.apache.commons.rng.simple.JDKRandomWrapper;
|
||||
|
||||
@@ -242,7 +241,7 @@ public class Plate {
|
||||
sequencesAndMisreads.put(currentSequence, new ArrayList<>());
|
||||
}
|
||||
//The specific misread hasn't happened before
|
||||
if (rand.nextDouble() >= errorCollisionRate || sequencesAndMisreads.get(currentSequence).size() == 0) {
|
||||
if (rand.nextDouble() >= errorCollisionRate || sequencesAndMisreads.get(currentSequence).isEmpty()) {
|
||||
//The misread doesn't collide with a real sequence already on the plate and some sequences have already been read
|
||||
if(rand.nextDouble() >= realSequenceCollisionRate || !sequenceMap.isEmpty()){
|
||||
StringBuilder spurious = new StringBuilder(currentSequence);
|
||||
|
||||
@@ -1,9 +1,8 @@
|
||||
import org.jgrapht.Graphs;
|
||||
import org.jgrapht.alg.interfaces.MatchingAlgorithm;
|
||||
import org.jgrapht.alg.matching.MaximumWeightBipartiteMatching;
|
||||
import org.jgrapht.generate.SimpleWeightedBipartiteGraphMatrixGenerator;
|
||||
import org.jgrapht.graph.DefaultWeightedEdge;
|
||||
import org.jgrapht.graph.SimpleWeightedGraph;
|
||||
import org.jheaps.tree.FibonacciHeap;
|
||||
import org.jheaps.tree.PairingHeap;
|
||||
|
||||
import java.math.BigDecimal;
|
||||
@@ -70,58 +69,104 @@ public class Simulator implements GraphModificationFunctions {
|
||||
if(verbose){System.out.println("Total beta sequence wells removed: " + betaWellsRemoved);}
|
||||
}
|
||||
|
||||
//construct the graph. For simplicity, going to make
|
||||
if(verbose){System.out.println("Making vertex maps");}
|
||||
//For the SimpleWeightedBipartiteGraphMatrixGenerator, all vertices must have
|
||||
//distinct numbers associated with them. Since I'm using a 2D array, that means
|
||||
//distinct indices between the rows and columns. vertexStartValue lets me track where I switch
|
||||
//from numbering rows to columns, so I can assign unique numbers to every vertex, and then
|
||||
//subtract the vertexStartValue from betas to use their vertex labels as array indices
|
||||
int vertexStartValue = 0;
|
||||
//keys are sequential integer vertices, values are alphas
|
||||
Map<String, Integer> plateAtoVMap = makeSequenceToVertexMap(alphaSequences, vertexStartValue);
|
||||
//new start value for vertex to beta map should be one more than final vertex value in alpha map
|
||||
vertexStartValue += plateAtoVMap.size();
|
||||
//keys are betas, values are sequential integers
|
||||
Map<String, Integer> plateBtoVMap = makeSequenceToVertexMap(betaSequences, vertexStartValue);
|
||||
if(verbose){System.out.println("Vertex maps made");}
|
||||
//make adjacency matrix for bipartite graph generator
|
||||
//(technically this is only 1/4 of an adjacency matrix, but that's all you need
|
||||
//for a bipartite graph, and all the SimpleWeightedBipartiteGraphMatrixGenerator class expects.)
|
||||
if(verbose){System.out.println("Making adjacency matrix");}
|
||||
double[][] weights = new double[plateAtoVMap.size()][plateBtoVMap.size()];
|
||||
fillAdjacencyMatrix(weights, vertexStartValue, alphaSequences, betaSequences, plateAtoVMap, plateBtoVMap);
|
||||
if(verbose){System.out.println("Adjacency matrix made");}
|
||||
/*
|
||||
* The commented out code below works beautifully for small enough graphs. However, after implementing a
|
||||
* Zipf distribution and attempting to simulate Experiment 3 from the paper again, I discovered that
|
||||
* this method uses too much memory. Even a 120GB heap is not enough to build this adjacency matrix.
|
||||
* So I'm going to attempt to build this graph directly and see if that is less memory intensive
|
||||
*/
|
||||
// //construct the graph. For simplicity, going to make
|
||||
// if(verbose){System.out.println("Making vertex maps");}
|
||||
// //For the SimpleWeightedBipartiteGraphMatrixGenerator, all vertices must have
|
||||
// //distinct numbers associated with them. Since I'm using a 2D array, that means
|
||||
// //distinct indices between the rows and columns. vertexStartValue lets me track where I switch
|
||||
// //from numbering rows to columns, so I can assign unique numbers to every vertex, and then
|
||||
// //subtract the vertexStartValue from betas to use their vertex labels as array indices
|
||||
// int vertexStartValue = 0;
|
||||
// //keys are sequential integer vertices, values are alphas
|
||||
// Map<String, Integer> plateAtoVMap = makeSequenceToVertexMap(alphaSequences, vertexStartValue);
|
||||
// //new start value for vertex to beta map should be one more than final vertex value in alpha map
|
||||
// vertexStartValue += plateAtoVMap.size();
|
||||
// //keys are betas, values are sequential integers
|
||||
// Map<String, Integer> plateBtoVMap = makeSequenceToVertexMap(betaSequences, vertexStartValue);
|
||||
// if(verbose){System.out.println("Vertex maps made");}
|
||||
// //make adjacency matrix for bipartite graph generator
|
||||
// //(technically this is only 1/4 of an adjacency matrix, but that's all you need
|
||||
// //for a bipartite graph, and all the SimpleWeightedBipartiteGraphMatrixGenerator class expects.)
|
||||
// if(verbose){System.out.println("Making adjacency matrix");}
|
||||
// double[][] weights = new double[plateAtoVMap.size()][plateBtoVMap.size()];
|
||||
// fillAdjacencyMatrix(weights, vertexStartValue, alphaSequences, betaSequences, plateAtoVMap, plateBtoVMap);
|
||||
// if(verbose){System.out.println("Adjacency matrix made");}
|
||||
// //make bipartite graph
|
||||
// if(verbose){System.out.println("Making bipartite weighted graph");}
|
||||
// //the graph object
|
||||
// SimpleWeightedGraph<Vertex, DefaultWeightedEdge> graph =
|
||||
// new SimpleWeightedGraph<>(DefaultWeightedEdge.class);
|
||||
// //the graph generator
|
||||
// SimpleWeightedBipartiteGraphMatrixGenerator graphGenerator = new SimpleWeightedBipartiteGraphMatrixGenerator();
|
||||
// //the list of alpha vertices
|
||||
// List<Vertex> alphaVertices = new ArrayList<>();
|
||||
// for (String seq : plateAtoVMap.keySet()) {
|
||||
// Vertex alphaVertex = new Vertex(alphaSequences.get(seq), plateAtoVMap.get(seq));
|
||||
// alphaVertices.add(alphaVertex);
|
||||
// }
|
||||
// //Sort to make sure the order of vertices in list matches the order of the adjacency matrix
|
||||
// Collections.sort(alphaVertices);
|
||||
// //Add ordered list of vertices to the graph
|
||||
// graphGenerator.first(alphaVertices);
|
||||
// //the list of beta vertices
|
||||
// List<Vertex> betaVertices = new ArrayList<>();
|
||||
// for (String seq : plateBtoVMap.keySet()) {
|
||||
// Vertex betaVertex = new Vertex(betaSequences.get(seq), plateBtoVMap.get(seq));
|
||||
// betaVertices.add(betaVertex);
|
||||
// }
|
||||
// //Sort to make sure the order of vertices in list matches the order of the adjacency matrix
|
||||
// Collections.sort(betaVertices);
|
||||
// //Add ordered list of vertices to the graph
|
||||
// graphGenerator.second(betaVertices);
|
||||
// //use adjacency matrix of weight created previously
|
||||
// graphGenerator.weights(weights);
|
||||
// graphGenerator.generateGraph(graph);
|
||||
|
||||
//make bipartite graph
|
||||
if(verbose){System.out.println("Making bipartite weighted graph");}
|
||||
//the graph object
|
||||
SimpleWeightedGraph<Vertex, DefaultWeightedEdge> graph =
|
||||
new SimpleWeightedGraph<>(DefaultWeightedEdge.class);
|
||||
//the graph generator
|
||||
SimpleWeightedBipartiteGraphMatrixGenerator graphGenerator = new SimpleWeightedBipartiteGraphMatrixGenerator();
|
||||
//the list of alpha vertices
|
||||
int vertexLabelValue = 0;
|
||||
//create and add alpha sequence vertices
|
||||
List<Vertex> alphaVertices = new ArrayList<>();
|
||||
for (String seq : plateAtoVMap.keySet()) {
|
||||
Vertex alphaVertex = new Vertex(alphaSequences.get(seq), plateAtoVMap.get(seq));
|
||||
alphaVertices.add(alphaVertex);
|
||||
for (Map.Entry<String, SequenceRecord> entry: alphaSequences.entrySet()) {
|
||||
alphaVertices.add(new Vertex(entry.getValue(), vertexLabelValue));
|
||||
vertexLabelValue++;
|
||||
}
|
||||
//Sort to make sure the order of vertices in list matches the order of the adjacency matrix
|
||||
Collections.sort(alphaVertices);
|
||||
//Add ordered list of vertices to the graph
|
||||
graphGenerator.first(alphaVertices);
|
||||
//the list of beta vertices
|
||||
alphaVertices.forEach(graph::addVertex);
|
||||
//add beta sequence vertices
|
||||
List<Vertex> betaVertices = new ArrayList<>();
|
||||
for (String seq : plateBtoVMap.keySet()) {
|
||||
Vertex betaVertex = new Vertex(betaSequences.get(seq), plateBtoVMap.get(seq));
|
||||
betaVertices.add(betaVertex);
|
||||
for (Map.Entry<String, SequenceRecord> entry: betaSequences.entrySet()) {
|
||||
betaVertices.add(new Vertex(entry.getValue(), vertexLabelValue));
|
||||
vertexLabelValue++;
|
||||
}
|
||||
betaVertices.forEach(graph::addVertex);
|
||||
//add edges (best so far)
|
||||
int edgesAddedCount = 0;
|
||||
for(Vertex a: alphaVertices) {
|
||||
Set<Integer> a_wells = a.getRecord().getWells();
|
||||
for(Vertex b: betaVertices) {
|
||||
Set<Integer> sharedWells = new HashSet<>(a_wells);
|
||||
sharedWells.retainAll(b.getRecord().getWells());
|
||||
if (!sharedWells.isEmpty()) {
|
||||
Graphs.addEdge(graph, a, b, (double) sharedWells.size());
|
||||
}
|
||||
edgesAddedCount++;
|
||||
if (edgesAddedCount % 10000000 == 0) { //collect garbage every 10,000,000 edges
|
||||
System.out.println(edgesAddedCount + " edges added");
|
||||
//request garbage collection
|
||||
System.gc();
|
||||
System.out.println("Garbage collection requested");
|
||||
}
|
||||
}
|
||||
}
|
||||
//Sort to make sure the order of vertices in list matches the order of the adjacency matrix
|
||||
Collections.sort(betaVertices);
|
||||
//Add ordered list of vertices to the graph
|
||||
graphGenerator.second(betaVertices);
|
||||
//use adjacency matrix of weight created previously
|
||||
graphGenerator.weights(weights);
|
||||
graphGenerator.generateGraph(graph);
|
||||
if(verbose){System.out.println("Graph created");}
|
||||
//stop timing
|
||||
Instant stop = Instant.now();
|
||||
@@ -145,7 +190,7 @@ public class Simulator implements GraphModificationFunctions {
|
||||
Integer minOverlapPercent, boolean verbose, boolean calculatePValue) {
|
||||
Instant start = Instant.now();
|
||||
SimpleWeightedGraph<Vertex, DefaultWeightedEdge> graph = data.getGraph();
|
||||
Map<Vertex[], Integer> removedEdges = new HashMap<>();
|
||||
Map<DefaultWeightedEdge, Vertex[]> removedEdges = new HashMap<>();
|
||||
boolean saveEdges = BiGpairSEQ.cacheGraph();
|
||||
int numWells = data.getNumWells();
|
||||
//Integer alphaCount = data.getAlphaCount();
|
||||
@@ -163,6 +208,7 @@ public class Simulator implements GraphModificationFunctions {
|
||||
}
|
||||
Integer graphAlphaCount = alphas.size();
|
||||
Integer graphBetaCount = betas.size();
|
||||
Integer graphEdgeCount = graph.edgeSet().size();
|
||||
|
||||
//remove edges with weights outside given overlap thresholds, add those to removed edge list
|
||||
if(verbose){System.out.println("Eliminating edges with weights outside overlap threshold values");}
|
||||
@@ -182,12 +228,14 @@ public class Simulator implements GraphModificationFunctions {
|
||||
if(verbose){System.out.println("Edges between vertices of with excessively different occupancy values " +
|
||||
"removed");}
|
||||
|
||||
Integer filteredGraphEdgeCount = graph.edgeSet().size();
|
||||
|
||||
//Find Maximum Weight Matching
|
||||
if(verbose){System.out.println("Finding maximum weight matching");}
|
||||
//The matching object
|
||||
MatchingAlgorithm<Vertex, DefaultWeightedEdge> maxWeightMatching;
|
||||
//Determine algorithm type
|
||||
AlgorithmType algorithm = BiGpairSEQ.getMatchingAlgoritmType();
|
||||
AlgorithmType algorithm = BiGpairSEQ.getMatchingAlgorithmType();
|
||||
switch (algorithm) { //Only two options now, but I have room to add more algorithms in the future this way
|
||||
case AUCTION -> {
|
||||
//create a new MaximumIntegerWeightBipartiteAuctionMatching
|
||||
@@ -333,8 +381,10 @@ public class Simulator implements GraphModificationFunctions {
|
||||
metadata.put("real sequence collision rate", data.getRealSequenceCollisionRate().toString());
|
||||
metadata.put("total alphas read from plate", data.getAlphaCount().toString());
|
||||
metadata.put("total betas read from plate", data.getBetaCount().toString());
|
||||
metadata.put("initial edges in graph", graphEdgeCount.toString());
|
||||
metadata.put("alphas in graph (after pre-filtering)", graphAlphaCount.toString());
|
||||
metadata.put("betas in graph (after pre-filtering)", graphBetaCount.toString());
|
||||
metadata.put("final edges in graph (after pre-filtering)", filteredGraphEdgeCount.toString());
|
||||
metadata.put("high overlap threshold for pairing", highThreshold.toString());
|
||||
metadata.put("low overlap threshold for pairing", lowThreshold.toString());
|
||||
metadata.put("minimum overlap percent for pairing", minOverlapPercent.toString());
|
||||
|
||||
Reference in New Issue
Block a user