9 Commits
v4.3 ... v4.4

Author SHA1 Message Date
eugenefischer
d1810c453d Even more efficient graph creation (my initial scheme, but this time without accidentally changing what's in the sequence records) 2025-04-10 15:03:10 -05:00
eugenefischer
187401f2d6 More efficient graph creation 2025-04-10 14:06:11 -05:00
eugenefischer
678ce99424 iterate over vertex wells correctly 2025-04-10 13:34:04 -05:00
eugenefischer
c21e375303 fix concurrent modification bug 2025-04-10 13:33:47 -05:00
eugenefischer
57fe9c1619 Update graph modification functions to work with edges directly 2025-04-10 12:42:19 -05:00
eugenefischer
e1888a99c6 refactor to construct the bipartite graph directly, rather than by using an adjacency matrix and a graph generator. 2025-04-10 11:47:15 -05:00
eugenefischer
bcf5a4c749 change artifact details 2025-04-10 11:05:08 -05:00
eugenefischer
81d8a12765 dependency update stuff 2025-04-10 10:54:05 -05:00
eugenefischer
b5c0568e22 Add dependencies 2025-04-10 10:53:42 -05:00
13 changed files with 228 additions and 151 deletions

View File

@@ -1,16 +1,27 @@
<component name="ArtifactManager">
<artifact type="jar" build-on-make="true" name="BiGpairSEQ_Sim:jar">
<artifact type="jar" name="BiGpairSEQ_Sim:jar">
<output-path>$PROJECT_DIR$/out/artifacts/BiGpairSEQ_Sim_jar</output-path>
<root id="archive" name="BiGpairSEQ_Sim.jar">
<element id="directory" name="META-INF">
<element id="file-copy" path="$PROJECT_DIR$/src/main/java/META-INF/MANIFEST.MF" />
<element id="file-copy" path="$PROJECT_DIR$/META-INF/MANIFEST.MF" />
</element>
<element id="module-output" name="BigPairSEQ" />
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/jgrapht/jgrapht-core/1.5.1/jgrapht-core-1.5.1.jar" path-in-jar="/" />
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/jheaps/jheaps/0.13/jheaps-0.13.jar" path-in-jar="/" />
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/commons-cli/commons-cli/1.5.0/commons-cli-1.5.0.jar" path-in-jar="/" />
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/apache/commons/commons-csv/1.9.0/commons-csv-1.9.0.jar" path-in-jar="/" />
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/jetbrains/annotations/23.0.0/annotations-23.0.0.jar" path-in-jar="/" />
<element id="module-output" name="BiGpairSEQ_Sim" />
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/jgrapht/jgrapht-core/1.5.2/jgrapht-core-1.5.2.jar" path-in-jar="/" />
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/apache/commons/commons-rng-sampling/1.6/commons-rng-sampling-1.6.jar" path-in-jar="/" />
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/apache/commons/commons-csv/1.14.0/commons-csv-1.14.0.jar" path-in-jar="/" />
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/jetbrains/annotations/26.0.2/annotations-26.0.2.jar" path-in-jar="/" />
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/jgrapht/jgrapht-io/1.5.2/jgrapht-io-1.5.2.jar" path-in-jar="/" />
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/apache/commons/commons-rng-simple/1.6/commons-rng-simple-1.6.jar" path-in-jar="/" />
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/commons-io/commons-io/2.18.0/commons-io-2.18.0.jar" path-in-jar="/" />
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/apache/commons/commons-rng-core/1.6/commons-rng-core-1.6.jar" path-in-jar="/" />
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/commons-codec/commons-codec/1.18.0/commons-codec-1.18.0.jar" path-in-jar="/" />
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/apache/commons/commons-rng-client-api/1.6/commons-rng-client-api-1.6.jar" path-in-jar="/" />
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/commons-cli/commons-cli/1.9.0/commons-cli-1.9.0.jar" path-in-jar="/" />
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/apache/commons/commons-lang3/3.12.0/commons-lang3-3.12.0.jar" path-in-jar="/" />
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/antlr/antlr4-runtime/4.12.0/antlr4-runtime-4.12.0.jar" path-in-jar="/" />
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/apfloat/apfloat/1.10.1/apfloat-1.10.1.jar" path-in-jar="/" />
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/apache/commons/commons-text/1.10.0/commons-text-1.10.0.jar" path-in-jar="/" />
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/jheaps/jheaps/0.14/jheaps-0.14.jar" path-in-jar="/" />
</root>
</artifact>
</component>

1
.idea/compiler.xml generated
View File

@@ -7,6 +7,7 @@
<sourceTestOutputDir name="target/generated-test-sources/test-annotations" />
<outputRelativeToContentRoot value="true" />
<module name="BigPairSEQ" />
<module name="BiGpairSEQ_Sim" />
</profile>
</annotationProcessing>
</component>

View File

@@ -1,20 +1,35 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="RemoteRepositoriesConfiguration">
<remote-repository>
<option name="id" value="my-internal-site" />
<option name="name" value="my-internal-site" />
<option name="url" value="https://myserver/repo" />
</remote-repository>
<remote-repository>
<option name="id" value="central" />
<option name="name" value="Central Repository" />
<option name="url" value="https://repo1.maven.org/maven2" />
</remote-repository>
<remote-repository>
<option name="id" value="central repo" />
<option name="name" value="central repo" />
<option name="url" value="https://repo1.maven.org/maven2/" />
</remote-repository>
<remote-repository>
<option name="id" value="central" />
<option name="name" value="Central Repository" />
<option name="url" value="https://repo.maven.apache.org/maven2" />
</remote-repository>
<remote-repository>
<option name="id" value="central" />
<option name="name" value="Maven Central repository" />
<option name="url" value="https://repo1.maven.org/maven2" />
</remote-repository>
<remote-repository>
<option name="id" value="jboss.community" />
<option name="name" value="JBoss Community repository" />
<option name="url" value="https://repository.jboss.org/nexus/content/repositories/public/" />
</remote-repository>
<remote-repository>
<option name="id" value="34d16bdc-85f0-48ee-8e8b-144091765be1" />
<option name="name" value="34d16bdc-85f0-48ee-8e8b-144091765be1" />
<option name="url" value="https://repository.mulesoft.org/nexus/content/repositories/public/" />
</remote-repository>
</component>
</project>

View File

@@ -1,8 +1,10 @@
<component name="libraryTable">
<library name="apache.commons.csv" type="repository">
<properties maven-id="org.apache.commons:commons-csv:1.9.0" />
<properties maven-id="org.apache.commons:commons-csv:1.14.0" />
<CLASSES>
<root url="jar://$MAVEN_REPOSITORY$/org/apache/commons/commons-csv/1.9.0/commons-csv-1.9.0.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/apache/commons/commons-csv/1.14.0/commons-csv-1.14.0.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/commons-io/commons-io/2.18.0/commons-io-2.18.0.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/commons-codec/commons-codec/1.18.0/commons-codec-1.18.0.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES />

View File

@@ -1,8 +1,8 @@
<component name="libraryTable">
<library name="commons.cli" type="repository">
<properties maven-id="commons-cli:commons-cli:1.5.0" />
<properties maven-id="commons-cli:commons-cli:1.9.0" />
<CLASSES>
<root url="jar://$MAVEN_REPOSITORY$/commons-cli/commons-cli/1.5.0/commons-cli-1.5.0.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/commons-cli/commons-cli/1.9.0/commons-cli-1.9.0.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES />

View File

@@ -1,13 +0,0 @@
<component name="libraryTable">
<library name="commons-rng-1">
<CLASSES>
<root url="file://$USER_HOME$/Downloads/commons-rng-1.6" />
</CLASSES>
<JAVADOC />
<SOURCES>
<root url="file://$USER_HOME$/Downloads/commons-rng-1.6" />
</SOURCES>
<jarDirectory url="file://$USER_HOME$/Downloads/commons-rng-1.6" recursive="false" />
<jarDirectory url="file://$USER_HOME$/Downloads/commons-rng-1.6" recursive="false" type="SOURCES" />
</library>
</component>

View File

@@ -1,9 +1,10 @@
<component name="libraryTable">
<library name="jgrapht.core" type="repository">
<properties maven-id="org.jgrapht:jgrapht-core:1.5.1" />
<properties maven-id="org.jgrapht:jgrapht-core:1.5.2" />
<CLASSES>
<root url="jar://$MAVEN_REPOSITORY$/org/jgrapht/jgrapht-core/1.5.1/jgrapht-core-1.5.1.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/jheaps/jheaps/0.13/jheaps-0.13.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/jgrapht/jgrapht-core/1.5.2/jgrapht-core-1.5.2.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/jheaps/jheaps/0.14/jheaps-0.14.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/apfloat/apfloat/1.10.1/apfloat-1.10.1.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES />

View File

@@ -1,13 +1,14 @@
<component name="libraryTable">
<library name="jgrapht.io" type="repository">
<properties maven-id="org.jgrapht:jgrapht-io:1.5.1" />
<properties maven-id="org.jgrapht:jgrapht-io:1.5.2" />
<CLASSES>
<root url="jar://$MAVEN_REPOSITORY$/org/jgrapht/jgrapht-io/1.5.1/jgrapht-io-1.5.1.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/jgrapht/jgrapht-core/1.5.1/jgrapht-core-1.5.1.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/jheaps/jheaps/0.13/jheaps-0.13.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/antlr/antlr4-runtime/4.8-1/antlr4-runtime-4.8-1.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/apache/commons/commons-text/1.8/commons-text-1.8.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/apache/commons/commons-lang3/3.9/commons-lang3-3.9.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/jgrapht/jgrapht-io/1.5.2/jgrapht-io-1.5.2.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/jgrapht/jgrapht-core/1.5.2/jgrapht-core-1.5.2.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/jheaps/jheaps/0.14/jheaps-0.14.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/apfloat/apfloat/1.10.1/apfloat-1.10.1.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/antlr/antlr4-runtime/4.12.0/antlr4-runtime-4.12.0.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/apache/commons/commons-text/1.10.0/commons-text-1.10.0.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/apache/commons/commons-lang3/3.12.0/commons-lang3-3.12.0.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES />

44
pom.xml
View File

@@ -5,7 +5,7 @@
<modelVersion>4.0.0</modelVersion>
<groupId>org.example</groupId>
<artifactId>TCellSim</artifactId>
<artifactId>BiGpairSEQ_Sim</artifactId>
<version>1.0-SNAPSHOT</version>
<build>
<plugins>
@@ -26,8 +26,48 @@
<version>RELEASE</version>
<scope>compile</scope>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.commons/commons-rng-simple -->
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-rng-simple</artifactId>
<version>1.6</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-rng-sampling</artifactId>
<version>1.6</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.commons/commons-csv -->
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-csv</artifactId>
<version>1.14.0</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.jgrapht/jgrapht-core -->
<dependency>
<groupId>org.jgrapht</groupId>
<artifactId>jgrapht-core</artifactId>
<version>1.5.2</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.jgrapht/jgrapht-io -->
<dependency>
<groupId>org.jgrapht</groupId>
<artifactId>jgrapht-io</artifactId>
<version>1.5.2</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.jheaps/jheaps -->
<dependency>
<groupId>org.jheaps</groupId>
<artifactId>jheaps</artifactId>
<version>0.14</version>
</dependency>
<!-- https://mvnrepository.com/artifact/commons-cli/commons-cli -->
<dependency>
<groupId>commons-cli</groupId>
<artifactId>commons-cli</artifactId>
<version>1.9.0</version>
</dependency>
</dependencies>
<properties>
<maven.compiler.source>11</maven.compiler.source>
<maven.compiler.target>11</maven.compiler.target>

View File

@@ -13,7 +13,7 @@ public class BiGpairSEQ {
private static boolean cacheCells = false;
private static boolean cachePlate = false;
private static boolean cacheGraph = false;
private static AlgorithmType matchingAlgoritmType = AlgorithmType.HUNGARIAN;
private static AlgorithmType matchingAlgorithmType = AlgorithmType.HUNGARIAN;
private static HeapType priorityQueueHeapType = HeapType.PAIRING;
private static DistributionType distributionType = DistributionType.ZIPF;
private static boolean outputBinary = true;
@@ -166,13 +166,13 @@ public class BiGpairSEQ {
return priorityQueueHeapType;
}
public static AlgorithmType getMatchingAlgoritmType() { return matchingAlgoritmType; }
public static AlgorithmType getMatchingAlgorithmType() { return matchingAlgorithmType; }
public static void setHungarianAlgorithm() { matchingAlgoritmType = AlgorithmType.HUNGARIAN; }
public static void setHungarianAlgorithm() { matchingAlgorithmType = AlgorithmType.HUNGARIAN; }
public static void setIntegerWeightScalingAlgorithm() { matchingAlgoritmType = AlgorithmType.INTEGER_WEIGHT_SCALING; }
public static void setIntegerWeightScalingAlgorithm() { matchingAlgorithmType = AlgorithmType.INTEGER_WEIGHT_SCALING; }
public static void setAuctionAlgorithm() { matchingAlgoritmType = AlgorithmType.AUCTION; }
public static void setAuctionAlgorithm() { matchingAlgorithmType = AlgorithmType.AUCTION; }
public static void setPairingHeap() {
priorityQueueHeapType = HeapType.PAIRING;

View File

@@ -1,72 +1,54 @@
import org.jgrapht.graph.DefaultWeightedEdge;
import org.jgrapht.graph.SimpleWeightedGraph;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.*;
public interface GraphModificationFunctions {
//remove over- and under-weight edges, return removed edges
static Map<Vertex[], Integer> filterByOverlapThresholds(SimpleWeightedGraph<Vertex, DefaultWeightedEdge> graph,
int low, int high, boolean saveEdges) {
Map<Vertex[], Integer> removedEdges = new HashMap<>();
static Map<DefaultWeightedEdge, Vertex[]> filterByOverlapThresholds(SimpleWeightedGraph<Vertex, DefaultWeightedEdge> graph,
int low, int high, boolean saveEdges) {
Map<DefaultWeightedEdge, Vertex[]> removedEdges = new HashMap<>();
Set<DefaultWeightedEdge> edgesToRemove = new HashSet<>();
for (DefaultWeightedEdge e : graph.edgeSet()) {
if ((graph.getEdgeWeight(e) > high) || (graph.getEdgeWeight(e) < low)) {
if(saveEdges) {
Vertex source = graph.getEdgeSource(e);
Vertex target = graph.getEdgeTarget(e);
Integer weight = (int) graph.getEdgeWeight(e);
Vertex[] edge = {source, target};
removedEdges.put(edge, weight);
}
else {
graph.setEdgeWeight(e, 0.0);
Vertex[] vertices = {graph.getEdgeSource(e), graph.getEdgeTarget(e)};
removedEdges.put(e, vertices);
}
edgesToRemove.add(e);
}
}
if(saveEdges) {
for (Vertex[] edge : removedEdges.keySet()) {
graph.removeEdge(edge[0], edge[1]);
}
}
edgesToRemove.forEach(graph::removeEdge);
return removedEdges;
}
//Remove edges for pairs with large occupancy discrepancy, return removed edges
static Map<Vertex[], Integer> filterByRelativeOccupancy(SimpleWeightedGraph<Vertex, DefaultWeightedEdge> graph,
static Map<DefaultWeightedEdge, Vertex[]> filterByRelativeOccupancy(SimpleWeightedGraph<Vertex, DefaultWeightedEdge> graph,
Integer maxOccupancyDifference, boolean saveEdges) {
Map<Vertex[], Integer> removedEdges = new HashMap<>();
Map<DefaultWeightedEdge, Vertex[]> removedEdges = new HashMap<>();
Set<DefaultWeightedEdge> edgesToRemove = new HashSet<>();
for (DefaultWeightedEdge e : graph.edgeSet()) {
Integer alphaOcc = graph.getEdgeSource(e).getOccupancy();
Integer betaOcc = graph.getEdgeTarget(e).getOccupancy();
if (Math.abs(alphaOcc - betaOcc) >= maxOccupancyDifference) {
if (saveEdges) {
Vertex source = graph.getEdgeSource(e);
Vertex target = graph.getEdgeTarget(e);
Integer weight = (int) graph.getEdgeWeight(e);
Vertex[] edge = {source, target};
removedEdges.put(edge, weight);
}
else {
graph.setEdgeWeight(e, 0.0);
Vertex[] vertices = {graph.getEdgeSource(e), graph.getEdgeTarget(e)};
removedEdges.put(e, vertices);
}
edgesToRemove.add(e);
}
}
if(saveEdges) {
for (Vertex[] edge : removedEdges.keySet()) {
graph.removeEdge(edge[0], edge[1]);
}
}
edgesToRemove.forEach(graph::removeEdge);
return removedEdges;
}
//Remove edges for pairs where overlap size is significantly lower than the well occupancy, return removed edges
static Map<Vertex[], Integer> filterByOverlapPercent(SimpleWeightedGraph<Vertex, DefaultWeightedEdge> graph,
static Map<DefaultWeightedEdge, Vertex[]> filterByOverlapPercent(SimpleWeightedGraph<Vertex, DefaultWeightedEdge> graph,
Integer minOverlapPercent,
boolean saveEdges) {
Map<Vertex[], Integer> removedEdges = new HashMap<>();
Map<DefaultWeightedEdge, Vertex[]> removedEdges = new HashMap<>();
Set<DefaultWeightedEdge> edgesToRemove = new HashSet<>();
for (DefaultWeightedEdge e : graph.edgeSet()) {
Integer alphaOcc = graph.getEdgeSource(e).getOccupancy();
Integer betaOcc = graph.getEdgeTarget(e).getOccupancy();
@@ -74,22 +56,13 @@ public interface GraphModificationFunctions {
double min = minOverlapPercent / 100.0;
if ((weight / alphaOcc < min) || (weight / betaOcc < min)) {
if (saveEdges) {
Vertex source = graph.getEdgeSource(e);
Vertex target = graph.getEdgeTarget(e);
Integer intWeight = (int) graph.getEdgeWeight(e);
Vertex[] edge = {source, target};
removedEdges.put(edge, intWeight);
}
else {
graph.setEdgeWeight(e, 0.0);
Vertex[] vertices = {graph.getEdgeSource(e), graph.getEdgeTarget(e)};
removedEdges.put(e, vertices);
}
edgesToRemove.add(e);
}
}
if(saveEdges) {
for (Vertex[] edge : removedEdges.keySet()) {
graph.removeEdge(edge[0], edge[1]);
}
}
edgesToRemove.forEach(graph::removeEdge);
return removedEdges;
}
@@ -126,10 +99,10 @@ public interface GraphModificationFunctions {
}
static void addRemovedEdges(SimpleWeightedGraph<Vertex, DefaultWeightedEdge> graph,
Map<Vertex[], Integer> removedEdges) {
for (Vertex[] edge : removedEdges.keySet()) {
DefaultWeightedEdge e = graph.addEdge(edge[0], edge[1]);
graph.setEdgeWeight(e, removedEdges.get(edge));
Map<DefaultWeightedEdge, Vertex[]> removedEdges) {
for (DefaultWeightedEdge edge : removedEdges.keySet()) {
Vertex[] vertices = removedEdges.get(edge);
graph.addEdge(vertices[0], vertices[1], edge);
}
}

View File

@@ -13,8 +13,7 @@ TODO: Implement discrete frequency distributions using Vose's Alias Method
*/
import org.apache.commons.rng.UniformRandomProvider;
import org.apache.commons.rng.core.BaseProvider;
import org.apache.commons.rng.sampling.distribution.RejectionInversionZipfSampler;
import org.apache.commons.rng.simple.JDKRandomWrapper;
@@ -242,7 +241,7 @@ public class Plate {
sequencesAndMisreads.put(currentSequence, new ArrayList<>());
}
//The specific misread hasn't happened before
if (rand.nextDouble() >= errorCollisionRate || sequencesAndMisreads.get(currentSequence).size() == 0) {
if (rand.nextDouble() >= errorCollisionRate || sequencesAndMisreads.get(currentSequence).isEmpty()) {
//The misread doesn't collide with a real sequence already on the plate and some sequences have already been read
if(rand.nextDouble() >= realSequenceCollisionRate || !sequenceMap.isEmpty()){
StringBuilder spurious = new StringBuilder(currentSequence);

View File

@@ -1,9 +1,7 @@
import org.jgrapht.alg.interfaces.MatchingAlgorithm;
import org.jgrapht.alg.matching.MaximumWeightBipartiteMatching;
import org.jgrapht.generate.SimpleWeightedBipartiteGraphMatrixGenerator;
import org.jgrapht.graph.DefaultWeightedEdge;
import org.jgrapht.graph.SimpleWeightedGraph;
import org.jheaps.tree.FibonacciHeap;
import org.jheaps.tree.PairingHeap;
import java.math.BigDecimal;
@@ -70,58 +68,102 @@ public class Simulator implements GraphModificationFunctions {
if(verbose){System.out.println("Total beta sequence wells removed: " + betaWellsRemoved);}
}
//construct the graph. For simplicity, going to make
if(verbose){System.out.println("Making vertex maps");}
//For the SimpleWeightedBipartiteGraphMatrixGenerator, all vertices must have
//distinct numbers associated with them. Since I'm using a 2D array, that means
//distinct indices between the rows and columns. vertexStartValue lets me track where I switch
//from numbering rows to columns, so I can assign unique numbers to every vertex, and then
//subtract the vertexStartValue from betas to use their vertex labels as array indices
int vertexStartValue = 0;
//keys are sequential integer vertices, values are alphas
Map<String, Integer> plateAtoVMap = makeSequenceToVertexMap(alphaSequences, vertexStartValue);
//new start value for vertex to beta map should be one more than final vertex value in alpha map
vertexStartValue += plateAtoVMap.size();
//keys are betas, values are sequential integers
Map<String, Integer> plateBtoVMap = makeSequenceToVertexMap(betaSequences, vertexStartValue);
if(verbose){System.out.println("Vertex maps made");}
//make adjacency matrix for bipartite graph generator
//(technically this is only 1/4 of an adjacency matrix, but that's all you need
//for a bipartite graph, and all the SimpleWeightedBipartiteGraphMatrixGenerator class expects.)
if(verbose){System.out.println("Making adjacency matrix");}
double[][] weights = new double[plateAtoVMap.size()][plateBtoVMap.size()];
fillAdjacencyMatrix(weights, vertexStartValue, alphaSequences, betaSequences, plateAtoVMap, plateBtoVMap);
if(verbose){System.out.println("Adjacency matrix made");}
/*
* The commented out code below works beautifully for small enough graphs. However, after implementing a
* Zipf distribution and attempting to simulate Experiment 3 from the paper again, I discovered that
* this method uses too much memory. Even a 120GB heap is not enough to build this adjacency matrix.
* So I'm going to attempt to build this graph directly and see if that is less memory intensive
*/
// //construct the graph. For simplicity, going to make
// if(verbose){System.out.println("Making vertex maps");}
// //For the SimpleWeightedBipartiteGraphMatrixGenerator, all vertices must have
// //distinct numbers associated with them. Since I'm using a 2D array, that means
// //distinct indices between the rows and columns. vertexStartValue lets me track where I switch
// //from numbering rows to columns, so I can assign unique numbers to every vertex, and then
// //subtract the vertexStartValue from betas to use their vertex labels as array indices
// int vertexStartValue = 0;
// //keys are sequential integer vertices, values are alphas
// Map<String, Integer> plateAtoVMap = makeSequenceToVertexMap(alphaSequences, vertexStartValue);
// //new start value for vertex to beta map should be one more than final vertex value in alpha map
// vertexStartValue += plateAtoVMap.size();
// //keys are betas, values are sequential integers
// Map<String, Integer> plateBtoVMap = makeSequenceToVertexMap(betaSequences, vertexStartValue);
// if(verbose){System.out.println("Vertex maps made");}
// //make adjacency matrix for bipartite graph generator
// //(technically this is only 1/4 of an adjacency matrix, but that's all you need
// //for a bipartite graph, and all the SimpleWeightedBipartiteGraphMatrixGenerator class expects.)
// if(verbose){System.out.println("Making adjacency matrix");}
// double[][] weights = new double[plateAtoVMap.size()][plateBtoVMap.size()];
// fillAdjacencyMatrix(weights, vertexStartValue, alphaSequences, betaSequences, plateAtoVMap, plateBtoVMap);
// if(verbose){System.out.println("Adjacency matrix made");}
// //make bipartite graph
// if(verbose){System.out.println("Making bipartite weighted graph");}
// //the graph object
// SimpleWeightedGraph<Vertex, DefaultWeightedEdge> graph =
// new SimpleWeightedGraph<>(DefaultWeightedEdge.class);
// //the graph generator
// SimpleWeightedBipartiteGraphMatrixGenerator graphGenerator = new SimpleWeightedBipartiteGraphMatrixGenerator();
// //the list of alpha vertices
// List<Vertex> alphaVertices = new ArrayList<>();
// for (String seq : plateAtoVMap.keySet()) {
// Vertex alphaVertex = new Vertex(alphaSequences.get(seq), plateAtoVMap.get(seq));
// alphaVertices.add(alphaVertex);
// }
// //Sort to make sure the order of vertices in list matches the order of the adjacency matrix
// Collections.sort(alphaVertices);
// //Add ordered list of vertices to the graph
// graphGenerator.first(alphaVertices);
// //the list of beta vertices
// List<Vertex> betaVertices = new ArrayList<>();
// for (String seq : plateBtoVMap.keySet()) {
// Vertex betaVertex = new Vertex(betaSequences.get(seq), plateBtoVMap.get(seq));
// betaVertices.add(betaVertex);
// }
// //Sort to make sure the order of vertices in list matches the order of the adjacency matrix
// Collections.sort(betaVertices);
// //Add ordered list of vertices to the graph
// graphGenerator.second(betaVertices);
// //use adjacency matrix of weight created previously
// graphGenerator.weights(weights);
// graphGenerator.generateGraph(graph);
//make bipartite graph
if(verbose){System.out.println("Making bipartite weighted graph");}
//the graph object
SimpleWeightedGraph<Vertex, DefaultWeightedEdge> graph =
new SimpleWeightedGraph<>(DefaultWeightedEdge.class);
//the graph generator
SimpleWeightedBipartiteGraphMatrixGenerator graphGenerator = new SimpleWeightedBipartiteGraphMatrixGenerator();
//the list of alpha vertices
int vertexLabelValue = 0;
//create and add alpha sequence vertices
List<Vertex> alphaVertices = new ArrayList<>();
for (String seq : plateAtoVMap.keySet()) {
Vertex alphaVertex = new Vertex(alphaSequences.get(seq), plateAtoVMap.get(seq));
alphaVertices.add(alphaVertex);
for (Map.Entry<String, SequenceRecord> entry: alphaSequences.entrySet()) {
alphaVertices.add(new Vertex(entry.getValue(), vertexLabelValue));
vertexLabelValue++;
}
//Sort to make sure the order of vertices in list matches the order of the adjacency matrix
Collections.sort(alphaVertices);
//Add ordered list of vertices to the graph
graphGenerator.first(alphaVertices);
//the list of beta vertices
alphaVertices.forEach(graph::addVertex);
//add beta sequence vertices
List<Vertex> betaVertices = new ArrayList<>();
for (String seq : plateBtoVMap.keySet()) {
Vertex betaVertex = new Vertex(betaSequences.get(seq), plateBtoVMap.get(seq));
betaVertices.add(betaVertex);
for (Map.Entry<String, SequenceRecord> entry: betaSequences.entrySet()) {
betaVertices.add(new Vertex(entry.getValue(), vertexLabelValue));
vertexLabelValue++;
}
betaVertices.forEach(graph::addVertex);
//add edges
for(Vertex a: alphaVertices) {
for(Vertex b: betaVertices) {
Set<Integer> sharedWells = new HashSet<>(a.getRecord().getWells());
sharedWells.retainAll(b.getRecord().getWells());
double weight = (double) sharedWells.size();
if (weight != 0.0) {
System.out.println("Edge weight: " + weight);
DefaultWeightedEdge edge = graph.addEdge(a, b);
graph.setEdgeWeight(edge, weight);
}
else {
System.out.println("No overlap");
}
}
}
//Sort to make sure the order of vertices in list matches the order of the adjacency matrix
Collections.sort(betaVertices);
//Add ordered list of vertices to the graph
graphGenerator.second(betaVertices);
//use adjacency matrix of weight created previously
graphGenerator.weights(weights);
graphGenerator.generateGraph(graph);
if(verbose){System.out.println("Graph created");}
//stop timing
Instant stop = Instant.now();
@@ -145,7 +187,7 @@ public class Simulator implements GraphModificationFunctions {
Integer minOverlapPercent, boolean verbose, boolean calculatePValue) {
Instant start = Instant.now();
SimpleWeightedGraph<Vertex, DefaultWeightedEdge> graph = data.getGraph();
Map<Vertex[], Integer> removedEdges = new HashMap<>();
Map<DefaultWeightedEdge, Vertex[]> removedEdges = new HashMap<>();
boolean saveEdges = BiGpairSEQ.cacheGraph();
int numWells = data.getNumWells();
//Integer alphaCount = data.getAlphaCount();
@@ -163,6 +205,7 @@ public class Simulator implements GraphModificationFunctions {
}
Integer graphAlphaCount = alphas.size();
Integer graphBetaCount = betas.size();
Integer graphEdgeCount = graph.edgeSet().size();
//remove edges with weights outside given overlap thresholds, add those to removed edge list
if(verbose){System.out.println("Eliminating edges with weights outside overlap threshold values");}
@@ -182,12 +225,14 @@ public class Simulator implements GraphModificationFunctions {
if(verbose){System.out.println("Edges between vertices of with excessively different occupancy values " +
"removed");}
Integer filteredGraphEdgeCount = graph.edgeSet().size();
//Find Maximum Weight Matching
if(verbose){System.out.println("Finding maximum weight matching");}
//The matching object
MatchingAlgorithm<Vertex, DefaultWeightedEdge> maxWeightMatching;
//Determine algorithm type
AlgorithmType algorithm = BiGpairSEQ.getMatchingAlgoritmType();
AlgorithmType algorithm = BiGpairSEQ.getMatchingAlgorithmType();
switch (algorithm) { //Only two options now, but I have room to add more algorithms in the future this way
case AUCTION -> {
//create a new MaximumIntegerWeightBipartiteAuctionMatching
@@ -333,8 +378,10 @@ public class Simulator implements GraphModificationFunctions {
metadata.put("real sequence collision rate", data.getRealSequenceCollisionRate().toString());
metadata.put("total alphas read from plate", data.getAlphaCount().toString());
metadata.put("total betas read from plate", data.getBetaCount().toString());
metadata.put("initial edges in graph", graphEdgeCount.toString());
metadata.put("alphas in graph (after pre-filtering)", graphAlphaCount.toString());
metadata.put("betas in graph (after pre-filtering)", graphBetaCount.toString());
metadata.put("final edges in graph (after pre-filtering)", filteredGraphEdgeCount.toString());
metadata.put("high overlap threshold for pairing", highThreshold.toString());
metadata.put("low overlap threshold for pairing", lowThreshold.toString());
metadata.put("minimum overlap percent for pairing", minOverlapPercent.toString());