Even more efficient graph creation (my initial scheme, but this time without accidentally changing what's in the sequence records)

More efficient graph creation
iterate over vertex wells correctly
2025-04-10 15:03:10 -05:00 · 2025-04-10 14:06:11 -05:00 · 2025-04-10 13:34:04 -05:00 · 2025-04-10 13:33:47 -05:00 · 2025-04-10 12:42:19 -05:00 · 2025-04-10 11:47:15 -05:00
18 changed files with 390 additions and 192 deletions
--- a/.idea/.name
+++ b/.idea/.name
@@ -0,0 +1 @@
+BiGpairSEQ
--- a/.idea/artifacts/BiGpairSEQ_Sim_jar.xml
+++ b/.idea/artifacts/BiGpairSEQ_Sim_jar.xml
@@ -1,16 +1,27 @@
 <component name="ArtifactManager">
-  <artifact type="jar" build-on-make="true" name="BiGpairSEQ_Sim:jar">
+  <artifact type="jar" name="BiGpairSEQ_Sim:jar">
    <output-path>$PROJECT_DIR$/out/artifacts/BiGpairSEQ_Sim_jar</output-path>
    <root id="archive" name="BiGpairSEQ_Sim.jar">
      <element id="directory" name="META-INF">
-        <element id="file-copy" path="$PROJECT_DIR$/src/main/java/META-INF/MANIFEST.MF" />
+        <element id="file-copy" path="$PROJECT_DIR$/META-INF/MANIFEST.MF" />
      </element>
-      <element id="module-output" name="BigPairSEQ" />
-      <element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/jgrapht/jgrapht-core/1.5.1/jgrapht-core-1.5.1.jar" path-in-jar="/" />
-      <element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/jheaps/jheaps/0.13/jheaps-0.13.jar" path-in-jar="/" />
-      <element id="extracted-dir" path="$MAVEN_REPOSITORY$/commons-cli/commons-cli/1.5.0/commons-cli-1.5.0.jar" path-in-jar="/" />
-      <element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/apache/commons/commons-csv/1.9.0/commons-csv-1.9.0.jar" path-in-jar="/" />
-      <element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/jetbrains/annotations/23.0.0/annotations-23.0.0.jar" path-in-jar="/" />
+      <element id="module-output" name="BiGpairSEQ_Sim" />
+      <element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/jgrapht/jgrapht-core/1.5.2/jgrapht-core-1.5.2.jar" path-in-jar="/" />
+      <element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/apache/commons/commons-rng-sampling/1.6/commons-rng-sampling-1.6.jar" path-in-jar="/" />
+      <element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/apache/commons/commons-csv/1.14.0/commons-csv-1.14.0.jar" path-in-jar="/" />
+      <element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/jetbrains/annotations/26.0.2/annotations-26.0.2.jar" path-in-jar="/" />
+      <element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/jgrapht/jgrapht-io/1.5.2/jgrapht-io-1.5.2.jar" path-in-jar="/" />
+      <element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/apache/commons/commons-rng-simple/1.6/commons-rng-simple-1.6.jar" path-in-jar="/" />
+      <element id="extracted-dir" path="$MAVEN_REPOSITORY$/commons-io/commons-io/2.18.0/commons-io-2.18.0.jar" path-in-jar="/" />
+      <element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/apache/commons/commons-rng-core/1.6/commons-rng-core-1.6.jar" path-in-jar="/" />
+      <element id="extracted-dir" path="$MAVEN_REPOSITORY$/commons-codec/commons-codec/1.18.0/commons-codec-1.18.0.jar" path-in-jar="/" />
+      <element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/apache/commons/commons-rng-client-api/1.6/commons-rng-client-api-1.6.jar" path-in-jar="/" />
+      <element id="extracted-dir" path="$MAVEN_REPOSITORY$/commons-cli/commons-cli/1.9.0/commons-cli-1.9.0.jar" path-in-jar="/" />
+      <element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/apache/commons/commons-lang3/3.12.0/commons-lang3-3.12.0.jar" path-in-jar="/" />
+      <element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/antlr/antlr4-runtime/4.12.0/antlr4-runtime-4.12.0.jar" path-in-jar="/" />
+      <element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/apfloat/apfloat/1.10.1/apfloat-1.10.1.jar" path-in-jar="/" />
+      <element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/apache/commons/commons-text/1.10.0/commons-text-1.10.0.jar" path-in-jar="/" />
+      <element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/jheaps/jheaps/0.14/jheaps-0.14.jar" path-in-jar="/" />
    </root>
  </artifact>
 </component>
--- a/.idea/compiler.xml
+++ b/.idea/compiler.xml
@@ -7,6 +7,7 @@
        <sourceTestOutputDir name="target/generated-test-sources/test-annotations" />
        <outputRelativeToContentRoot value="true" />
        <module name="BigPairSEQ" />
+        <module name="BiGpairSEQ_Sim" />
      </profile>
    </annotationProcessing>
  </component>
--- a/.idea/jarRepositories.xml
+++ b/.idea/jarRepositories.xml
@@ -1,20 +1,35 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
  <component name="RemoteRepositoriesConfiguration">
+    <remote-repository>
+      <option name="id" value="my-internal-site" />
+      <option name="name" value="my-internal-site" />
+      <option name="url" value="https://myserver/repo" />
+    </remote-repository>
+    <remote-repository>
+      <option name="id" value="central" />
+      <option name="name" value="Central Repository" />
+      <option name="url" value="https://repo1.maven.org/maven2" />
+    </remote-repository>
+    <remote-repository>
+      <option name="id" value="central repo" />
+      <option name="name" value="central repo" />
+      <option name="url" value="https://repo1.maven.org/maven2/" />
+    </remote-repository>
    <remote-repository>
      <option name="id" value="central" />
      <option name="name" value="Central Repository" />
      <option name="url" value="https://repo.maven.apache.org/maven2" />
    </remote-repository>
-    <remote-repository>
-      <option name="id" value="central" />
-      <option name="name" value="Maven Central repository" />
-      <option name="url" value="https://repo1.maven.org/maven2" />
-    </remote-repository>
    <remote-repository>
      <option name="id" value="jboss.community" />
      <option name="name" value="JBoss Community repository" />
      <option name="url" value="https://repository.jboss.org/nexus/content/repositories/public/" />
    </remote-repository>
+    <remote-repository>
+      <option name="id" value="34d16bdc-85f0-48ee-8e8b-144091765be1" />
+      <option name="name" value="34d16bdc-85f0-48ee-8e8b-144091765be1" />
+      <option name="url" value="https://repository.mulesoft.org/nexus/content/repositories/public/" />
+    </remote-repository>
  </component>
 </project>
--- a/.idea/libraries/apache_commons_csv.xml
+++ b/.idea/libraries/apache_commons_csv.xml
@@ -1,8 +1,10 @@
 <component name="libraryTable">
  <library name="apache.commons.csv" type="repository">
-    <properties maven-id="org.apache.commons:commons-csv:1.9.0" />
+    <properties maven-id="org.apache.commons:commons-csv:1.14.0" />
    <CLASSES>
-      <root url="jar://$MAVEN_REPOSITORY$/org/apache/commons/commons-csv/1.9.0/commons-csv-1.9.0.jar!/" />
+      <root url="jar://$MAVEN_REPOSITORY$/org/apache/commons/commons-csv/1.14.0/commons-csv-1.14.0.jar!/" />
+      <root url="jar://$MAVEN_REPOSITORY$/commons-io/commons-io/2.18.0/commons-io-2.18.0.jar!/" />
+      <root url="jar://$MAVEN_REPOSITORY$/commons-codec/commons-codec/1.18.0/commons-codec-1.18.0.jar!/" />
    </CLASSES>
    <JAVADOC />
    <SOURCES />
--- a/.idea/libraries/commons_cli.xml
+++ b/.idea/libraries/commons_cli.xml
@@ -1,8 +1,8 @@
 <component name="libraryTable">
  <library name="commons.cli" type="repository">
-    <properties maven-id="commons-cli:commons-cli:1.5.0" />
+    <properties maven-id="commons-cli:commons-cli:1.9.0" />
    <CLASSES>
-      <root url="jar://$MAVEN_REPOSITORY$/commons-cli/commons-cli/1.5.0/commons-cli-1.5.0.jar!/" />
+      <root url="jar://$MAVEN_REPOSITORY$/commons-cli/commons-cli/1.9.0/commons-cli-1.9.0.jar!/" />
    </CLASSES>
    <JAVADOC />
    <SOURCES />
--- a/.idea/libraries/jgrapht_core.xml
+++ b/.idea/libraries/jgrapht_core.xml
@@ -1,9 +1,10 @@
 <component name="libraryTable">
  <library name="jgrapht.core" type="repository">
-    <properties maven-id="org.jgrapht:jgrapht-core:1.5.1" />
+    <properties maven-id="org.jgrapht:jgrapht-core:1.5.2" />
    <CLASSES>
-      <root url="jar://$MAVEN_REPOSITORY$/org/jgrapht/jgrapht-core/1.5.1/jgrapht-core-1.5.1.jar!/" />
-      <root url="jar://$MAVEN_REPOSITORY$/org/jheaps/jheaps/0.13/jheaps-0.13.jar!/" />
+      <root url="jar://$MAVEN_REPOSITORY$/org/jgrapht/jgrapht-core/1.5.2/jgrapht-core-1.5.2.jar!/" />
+      <root url="jar://$MAVEN_REPOSITORY$/org/jheaps/jheaps/0.14/jheaps-0.14.jar!/" />
+      <root url="jar://$MAVEN_REPOSITORY$/org/apfloat/apfloat/1.10.1/apfloat-1.10.1.jar!/" />
    </CLASSES>
    <JAVADOC />
    <SOURCES />
--- a/.idea/libraries/jgrapht_io.xml
+++ b/.idea/libraries/jgrapht_io.xml
@@ -1,13 +1,14 @@
 <component name="libraryTable">
  <library name="jgrapht.io" type="repository">
-    <properties maven-id="org.jgrapht:jgrapht-io:1.5.1" />
+    <properties maven-id="org.jgrapht:jgrapht-io:1.5.2" />
    <CLASSES>
-      <root url="jar://$MAVEN_REPOSITORY$/org/jgrapht/jgrapht-io/1.5.1/jgrapht-io-1.5.1.jar!/" />
-      <root url="jar://$MAVEN_REPOSITORY$/org/jgrapht/jgrapht-core/1.5.1/jgrapht-core-1.5.1.jar!/" />
-      <root url="jar://$MAVEN_REPOSITORY$/org/jheaps/jheaps/0.13/jheaps-0.13.jar!/" />
-      <root url="jar://$MAVEN_REPOSITORY$/org/antlr/antlr4-runtime/4.8-1/antlr4-runtime-4.8-1.jar!/" />
-      <root url="jar://$MAVEN_REPOSITORY$/org/apache/commons/commons-text/1.8/commons-text-1.8.jar!/" />
-      <root url="jar://$MAVEN_REPOSITORY$/org/apache/commons/commons-lang3/3.9/commons-lang3-3.9.jar!/" />
+      <root url="jar://$MAVEN_REPOSITORY$/org/jgrapht/jgrapht-io/1.5.2/jgrapht-io-1.5.2.jar!/" />
+      <root url="jar://$MAVEN_REPOSITORY$/org/jgrapht/jgrapht-core/1.5.2/jgrapht-core-1.5.2.jar!/" />
+      <root url="jar://$MAVEN_REPOSITORY$/org/jheaps/jheaps/0.14/jheaps-0.14.jar!/" />
+      <root url="jar://$MAVEN_REPOSITORY$/org/apfloat/apfloat/1.10.1/apfloat-1.10.1.jar!/" />
+      <root url="jar://$MAVEN_REPOSITORY$/org/antlr/antlr4-runtime/4.12.0/antlr4-runtime-4.12.0.jar!/" />
+      <root url="jar://$MAVEN_REPOSITORY$/org/apache/commons/commons-text/1.10.0/commons-text-1.10.0.jar!/" />
+      <root url="jar://$MAVEN_REPOSITORY$/org/apache/commons/commons-lang3/3.12.0/commons-lang3-3.12.0.jar!/" />
    </CLASSES>
    <JAVADOC />
    <SOURCES />
--- a/pom.xml
+++ b/pom.xml
@@ -5,7 +5,7 @@
    <modelVersion>4.0.0</modelVersion>

    <groupId>org.example</groupId>
-    <artifactId>TCellSim</artifactId>
+    <artifactId>BiGpairSEQ_Sim</artifactId>
    <version>1.0-SNAPSHOT</version>
    <build>
        <plugins>
@@ -26,8 +26,48 @@
            <version>RELEASE</version>
            <scope>compile</scope>
        </dependency>
+        <!-- https://mvnrepository.com/artifact/org.apache.commons/commons-rng-simple -->
+        <dependency>
+            <groupId>org.apache.commons</groupId>
+            <artifactId>commons-rng-simple</artifactId>
+            <version>1.6</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.commons</groupId>
+            <artifactId>commons-rng-sampling</artifactId>
+            <version>1.6</version>
+        </dependency>
+        <!-- https://mvnrepository.com/artifact/org.apache.commons/commons-csv -->
+        <dependency>
+            <groupId>org.apache.commons</groupId>
+            <artifactId>commons-csv</artifactId>
+            <version>1.14.0</version>
+        </dependency>
+        <!-- https://mvnrepository.com/artifact/org.jgrapht/jgrapht-core -->
+        <dependency>
+            <groupId>org.jgrapht</groupId>
+            <artifactId>jgrapht-core</artifactId>
+            <version>1.5.2</version>
+        </dependency>
+        <!-- https://mvnrepository.com/artifact/org.jgrapht/jgrapht-io -->
+        <dependency>
+            <groupId>org.jgrapht</groupId>
+            <artifactId>jgrapht-io</artifactId>
+            <version>1.5.2</version>
+        </dependency>
+        <!-- https://mvnrepository.com/artifact/org.jheaps/jheaps -->
+        <dependency>
+            <groupId>org.jheaps</groupId>
+            <artifactId>jheaps</artifactId>
+            <version>0.14</version>
+        </dependency>
+        <!-- https://mvnrepository.com/artifact/commons-cli/commons-cli -->
+        <dependency>
+            <groupId>commons-cli</groupId>
+            <artifactId>commons-cli</artifactId>
+            <version>1.9.0</version>
+        </dependency>
    </dependencies>
-
    <properties>
        <maven.compiler.source>11</maven.compiler.source>
        <maven.compiler.target>11</maven.compiler.target>
--- a/readme.md
+++ b/readme.md
@@ -156,6 +156,8 @@ usage: BiGpairSEQ_Sim.jar -plate
 -c,--cell-file <filename>     The cell sample file to use
 -d,--dropout-rate <rate>      The sequence dropout rate due to
                               amplification error. (0.0 - 1.0)
+ -exp <value>                  If using -zipf flag, exponent value for
+                               distribution
 -exponential                  Use an exponential distribution for cell
                               sample
 -gaussian                     Use a Gaussian distribution for cell sample
@@ -173,6 +175,7 @@ usage: BiGpairSEQ_Sim.jar -plate
 -stddev <value>               If using -gaussian flag, standard deviation
                               for distrbution
 -w,--wells <number>           The number of wells on the sample plate
+ -zipf                         Use a Zipf distribution for cell sample

 usage: BiGpairSEQ_Sim.jar -graph
 -c,--cell-file <filename>                Cell sample file to use for
@@ -234,7 +237,6 @@ usage: BiGpairSEQ_Sim.jar -match
                               to stdout.
 -pv,--p-value                 (Optional) Calculate p-values for sequence
                               pairs.
-
 ```

 ### INTERACTIVE INTERFACE
@@ -340,6 +342,8 @@ Options when making a Sample Plate file:
    * Standard deviation size 
  * Exponential
    * Lambda value
+  * Zipf
+    * Exponent value
 * Total number of wells on the plate
 * Well populations random or fixed
  * If random, minimum and maximum population sizes
@@ -630,6 +634,7 @@ a means of exploring some very beautiful math.

 ## TODO

+* Update CLI option text in this readme to include Zipf distribution options
 * ~~Try invoking GC at end of workloads to reduce paging to disk~~ DONE
 * ~~Hold graph data in memory until another graph is read-in? ABANDONED UNABANDONED~~ DONE
    * ~~*No, this won't work, because BiGpairSEQ simulations alter the underlying graph based on filtering constraints. Changes would cascade with multiple experiments.*~~
--- a/src/main/java/BiGpairSEQ.java
+++ b/src/main/java/BiGpairSEQ.java
@@ -13,8 +13,9 @@ public class BiGpairSEQ {
    private static boolean cacheCells = false;
    private static boolean cachePlate = false;
    private static boolean cacheGraph = false;
-    private static AlgorithmType matchingAlgoritmType = AlgorithmType.HUNGARIAN;
+    private static AlgorithmType matchingAlgorithmType = AlgorithmType.HUNGARIAN;
    private static HeapType priorityQueueHeapType = HeapType.PAIRING;
+    private static DistributionType distributionType = DistributionType.ZIPF;
    private static boolean outputBinary = true;
    private static boolean outputGraphML = false;
    private static boolean calculatePValue = false;
@@ -60,6 +61,10 @@ public class BiGpairSEQ {
        return cellFilename;
    }

+    public static DistributionType getDistributionType() {return distributionType;}
+
+    public static void setDistributionType(DistributionType type) {distributionType = type;}
+
    public static Plate getPlateInMemory() {
        return plateInMemory;
    }
@@ -161,13 +166,13 @@ public class BiGpairSEQ {
        return priorityQueueHeapType;
    }

-    public static AlgorithmType getMatchingAlgoritmType() { return matchingAlgoritmType; }
+    public static AlgorithmType getMatchingAlgorithmType() { return matchingAlgorithmType; }

-    public static void setHungarianAlgorithm() { matchingAlgoritmType = AlgorithmType.HUNGARIAN; }
+    public static void setHungarianAlgorithm() { matchingAlgorithmType = AlgorithmType.HUNGARIAN; }

-    public static void setIntegerWeightScalingAlgorithm() { matchingAlgoritmType = AlgorithmType.INTEGER_WEIGHT_SCALING; }
+    public static void setIntegerWeightScalingAlgorithm() { matchingAlgorithmType = AlgorithmType.INTEGER_WEIGHT_SCALING; }

-    public static void setAuctionAlgorithm() { matchingAlgoritmType = AlgorithmType.AUCTION; }
+    public static void setAuctionAlgorithm() { matchingAlgorithmType = AlgorithmType.AUCTION; }

    public static void setPairingHeap() {
        priorityQueueHeapType = HeapType.PAIRING;
--- a/src/main/java/CommandLineInterface.java
+++ b/src/main/java/CommandLineInterface.java
@@ -123,16 +123,20 @@ public class CommandLineInterface {
                Plate plate;
                if (line.hasOption("poisson")) {
                    Double stdDev = Math.sqrt(numWells);
-                    plate = new Plate(cells, cellFilename, numWells, populations, dropoutRate, stdDev, false);
+                    plate = new Plate(cells, cellFilename, numWells, populations, dropoutRate, stdDev);
                }
                else if (line.hasOption("gaussian")) {
                    Double stdDev = Double.parseDouble(line.getOptionValue("stddev"));
-                    plate = new Plate(cells, cellFilename, numWells, populations, dropoutRate, stdDev, false);
+                    plate = new Plate(cells, cellFilename, numWells, populations, dropoutRate, stdDev);
+                }
+                else if (line.hasOption("zipf")) {
+                    Double zipfExponent = Double.parseDouble(line.getOptionValue("exp"));
+                    plate = new Plate(cells, cellFilename, numWells, populations, dropoutRate, zipfExponent);
                }
                else {
                    assert line.hasOption("exponential");
                    Double lambda = Double.parseDouble(line.getOptionValue("lambda"));
-                    plate = new Plate(cells, cellFilename, numWells, populations, dropoutRate, lambda, true);
+                    plate = new Plate(cells, cellFilename, numWells, populations, dropoutRate, lambda);
                }
                PlateFileWriter writer = new PlateFileWriter(outputFilename, plate);
                writer.writePlateFile();
@@ -340,9 +344,13 @@ public class CommandLineInterface {
        Option exponential = Option.builder("exponential")
                .desc("Use an exponential distribution for cell sample")
                .build();
+        Option zipf = Option.builder("zipf")
+                .desc("Use a Zipf distribution for cell sample")
+                .build();
        distributions.addOption(poisson);
        distributions.addOption(gaussian);
        distributions.addOption(exponential);
+        distributions.addOption(zipf);
        //options group for statistical distribution parameters
        OptionGroup statParams = new OptionGroup();// add this to plate options
        Option stdDev = Option.builder("stddev")
@@ -355,6 +363,11 @@ public class CommandLineInterface {
                .hasArg()
                .argName("value")
                .build();
+        Option zipfExponent = Option.builder("exp")
+                .desc("If using -zipf flag, exponent value for distribution")
+                .hasArg()
+                .argName("value")
+                .build();
        statParams.addOption(stdDev);
        statParams.addOption(lambda);
        //Option group for random plate or set populations
@@ -386,6 +399,7 @@ public class CommandLineInterface {
        plateOptions.addOptionGroup(statParams);
        plateOptions.addOptionGroup(wellPopOptions);
        plateOptions.addOption(dropoutRate);
+        plateOptions.addOption(zipfExponent);
        plateOptions.addOption(outputFileOption());
        return plateOptions;
    }
--- a/src/main/java/DistributionType.java
+++ b/src/main/java/DistributionType.java
@@ -0,0 +1,6 @@
+public enum DistributionType {
+    POISSON,
+    GAUSSIAN,
+    EXPONENTIAL,
+    ZIPF
+}
--- a/src/main/java/GraphModificationFunctions.java
+++ b/src/main/java/GraphModificationFunctions.java
@@ -1,72 +1,54 @@
 import org.jgrapht.graph.DefaultWeightedEdge;
 import org.jgrapht.graph.SimpleWeightedGraph;

-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
+import java.util.*;

 public interface GraphModificationFunctions {

    //remove over- and under-weight edges, return removed edges
-    static Map<Vertex[], Integer> filterByOverlapThresholds(SimpleWeightedGraph<Vertex, DefaultWeightedEdge> graph,
+    static Map<DefaultWeightedEdge, Vertex[]> filterByOverlapThresholds(SimpleWeightedGraph<Vertex, DefaultWeightedEdge> graph,
                                                              int low, int high, boolean saveEdges) {
-        Map<Vertex[], Integer> removedEdges = new HashMap<>();
+        Map<DefaultWeightedEdge, Vertex[]> removedEdges = new HashMap<>();
+        Set<DefaultWeightedEdge> edgesToRemove = new HashSet<>();
        for (DefaultWeightedEdge e : graph.edgeSet()) {
            if ((graph.getEdgeWeight(e) > high) || (graph.getEdgeWeight(e) < low)) {
                if(saveEdges) {
-                    Vertex source = graph.getEdgeSource(e);
-                    Vertex target = graph.getEdgeTarget(e);
-                    Integer weight = (int) graph.getEdgeWeight(e);
-                    Vertex[] edge = {source, target};
-                    removedEdges.put(edge, weight);
+                    Vertex[] vertices = {graph.getEdgeSource(e), graph.getEdgeTarget(e)};
+                    removedEdges.put(e, vertices);
                }
-                else {
-                    graph.setEdgeWeight(e, 0.0);
-                }
-            }
-        }
-        if(saveEdges) {
-            for (Vertex[] edge : removedEdges.keySet()) {
-                graph.removeEdge(edge[0], edge[1]);
+                edgesToRemove.add(e);
            }
        }
+        edgesToRemove.forEach(graph::removeEdge);
        return removedEdges;
    }

    //Remove edges for pairs with large occupancy discrepancy, return removed edges
-    static Map<Vertex[], Integer> filterByRelativeOccupancy(SimpleWeightedGraph<Vertex, DefaultWeightedEdge> graph,
+    static Map<DefaultWeightedEdge, Vertex[]> filterByRelativeOccupancy(SimpleWeightedGraph<Vertex, DefaultWeightedEdge> graph,
                                                  Integer maxOccupancyDifference, boolean saveEdges) {
-        Map<Vertex[], Integer> removedEdges = new HashMap<>();
+        Map<DefaultWeightedEdge, Vertex[]> removedEdges = new HashMap<>();
+        Set<DefaultWeightedEdge> edgesToRemove = new HashSet<>();
        for (DefaultWeightedEdge e : graph.edgeSet()) {
            Integer alphaOcc = graph.getEdgeSource(e).getOccupancy();
            Integer betaOcc = graph.getEdgeTarget(e).getOccupancy();
            if (Math.abs(alphaOcc - betaOcc) >= maxOccupancyDifference) {
                if (saveEdges) {
-                    Vertex source = graph.getEdgeSource(e);
-                    Vertex target = graph.getEdgeTarget(e);
-                    Integer weight = (int) graph.getEdgeWeight(e);
-                    Vertex[] edge = {source, target};
-                    removedEdges.put(edge, weight);
+                    Vertex[] vertices = {graph.getEdgeSource(e), graph.getEdgeTarget(e)};
+                    removedEdges.put(e, vertices);
                }
-                else {
-                    graph.setEdgeWeight(e, 0.0);
-                }
-            }
-        }
-        if(saveEdges) {
-            for (Vertex[] edge : removedEdges.keySet()) {
-                graph.removeEdge(edge[0], edge[1]);
+                edgesToRemove.add(e);
            }
        }
+        edgesToRemove.forEach(graph::removeEdge);
        return removedEdges;
    }

    //Remove edges for pairs where overlap size is significantly lower than the well occupancy, return removed edges
-    static Map<Vertex[], Integer> filterByOverlapPercent(SimpleWeightedGraph<Vertex, DefaultWeightedEdge> graph,
+    static Map<DefaultWeightedEdge, Vertex[]> filterByOverlapPercent(SimpleWeightedGraph<Vertex, DefaultWeightedEdge> graph,
                                                         Integer minOverlapPercent,
                                                         boolean saveEdges) {
-        Map<Vertex[], Integer> removedEdges = new HashMap<>();
+        Map<DefaultWeightedEdge, Vertex[]> removedEdges = new HashMap<>();
+        Set<DefaultWeightedEdge> edgesToRemove = new HashSet<>();
        for (DefaultWeightedEdge e : graph.edgeSet()) {
            Integer alphaOcc = graph.getEdgeSource(e).getOccupancy();
            Integer betaOcc = graph.getEdgeTarget(e).getOccupancy();
@@ -74,22 +56,13 @@ public interface GraphModificationFunctions {
            double min = minOverlapPercent / 100.0;
            if ((weight / alphaOcc < min) || (weight / betaOcc < min)) {
                if (saveEdges) {
-                    Vertex source = graph.getEdgeSource(e);
-                    Vertex target = graph.getEdgeTarget(e);
-                    Integer intWeight = (int) graph.getEdgeWeight(e);
-                    Vertex[] edge = {source, target};
-                    removedEdges.put(edge, intWeight);
+                    Vertex[] vertices = {graph.getEdgeSource(e), graph.getEdgeTarget(e)};
+                    removedEdges.put(e, vertices);
                }
-                else {
-                    graph.setEdgeWeight(e, 0.0);
-                }
-            }
-        }
-        if(saveEdges) {
-            for (Vertex[] edge : removedEdges.keySet()) {
-                graph.removeEdge(edge[0], edge[1]);
+                edgesToRemove.add(e);
            }
        }
+        edgesToRemove.forEach(graph::removeEdge);
        return removedEdges;
    }

@@ -126,10 +99,10 @@ public interface GraphModificationFunctions {
    }

    static void addRemovedEdges(SimpleWeightedGraph<Vertex, DefaultWeightedEdge> graph,
-                                Map<Vertex[], Integer> removedEdges) {
-        for (Vertex[] edge : removedEdges.keySet()) {
-            DefaultWeightedEdge e = graph.addEdge(edge[0], edge[1]);
-            graph.setEdgeWeight(e, removedEdges.get(edge));
+                                Map<DefaultWeightedEdge, Vertex[]> removedEdges) {
+        for (DefaultWeightedEdge edge : removedEdges.keySet()) {
+            Vertex[] vertices = removedEdges.get(edge);
+            graph.addEdge(vertices[0], vertices[1], edge);
        }
    }

--- a/src/main/java/InteractiveInterface.java
+++ b/src/main/java/InteractiveInterface.java
@@ -89,14 +89,12 @@ public class InteractiveInterface {
    private static void makePlate() {
        String cellFile = null;
        String filename = null;
-        Double stdDev = 0.0;
+        Double parameter = 0.0;
        Integer numWells = 0;
        Integer numSections;
        Integer[] populations = {1};
        Double dropOutRate = 0.0;
-        boolean poisson = false;
-        boolean exponential = false;
-        double lambda = 1.5;
+;
        try {
            System.out.println("\nSimulated sample plates consist of:");
            System.out.println("* a number of wells");
@@ -114,33 +112,46 @@ public class InteractiveInterface {
            System.out.println("1) Poisson");
            System.out.println("2) Gaussian");
            System.out.println("3) Exponential");
-//            System.out.println("(Note: approximate distribution in original paper is exponential, lambda = 0.6)");
-//            System.out.println("(lambda value approximated from slope of log-log graph in figure 4c)");
+            System.out.println("4) Zipf");
+
            System.out.println("(Note: wider distributions are more memory intensive to match)");
            System.out.print("Enter selection value: ");
            input = sc.nextInt();
            switch (input) {
-                case 1 -> poisson = true;
+                case 1 -> {
+                    BiGpairSEQ.setDistributionType(DistributionType.POISSON);
+                }
                case 2 -> {
+                    BiGpairSEQ.setDistributionType(DistributionType.GAUSSIAN);
                    System.out.println("How many distinct T-cells within one standard deviation of peak frequency?");
                    System.out.println("(Note: wider distributions are more memory intensive to match)");
-                    stdDev = sc.nextDouble();
-                    if (stdDev <= 0.0) {
+                    parameter = sc.nextDouble();
+                    if (parameter <= 0.0) {
                        throw new InputMismatchException("Value must be positive.");
                    }
                }
                case 3 -> {
-                    exponential = true;
+                    BiGpairSEQ.setDistributionType(DistributionType.EXPONENTIAL);
                    System.out.print("Please enter lambda value for exponential distribution: ");
-                    lambda = sc.nextDouble();
-                    if (lambda <= 0.0) {
-                        lambda = 0.6;
-                        System.out.println("Value must be positive. Defaulting to 0.6.");
+                    parameter = sc.nextDouble();
+                    if (parameter <= 0.0) {
+                        parameter = 1.4;
+                        System.out.println("Value must be positive. Defaulting to 1.4.");
+                    }
+                }
+                case 4 -> {
+                    BiGpairSEQ.setDistributionType(DistributionType.ZIPF);
+                    System.out.print("Please enter exponent value for Zipf distribution: ");
+                    parameter = sc.nextDouble();
+                    if (parameter <= 0.0) {
+                        parameter = 1.4;
+                        System.out.println("Value must be positive. Defaulting to 1.4.");
                    }
                }
                default -> {
                    System.out.println("Invalid input. Defaulting to exponential.");
-                    exponential = true;
+                    parameter = 1.4;
+                    BiGpairSEQ.setDistributionType(DistributionType.EXPONENTIAL);
                }
            }
            System.out.print("\nNumber of wells on plate: ");
@@ -226,17 +237,18 @@ public class InteractiveInterface {
        assert filename != null;
        Plate samplePlate;
        PlateFileWriter writer;
-        if(exponential){
-            samplePlate = new Plate(cells, cellFile, numWells, populations, dropOutRate, lambda, true);
+        DistributionType type = BiGpairSEQ.getDistributionType();
+        switch(type) {
+            case POISSON -> {
+                parameter = Math.sqrt(cells.getCellCount()); //gaussian with square root of elements approximates poisson
+                samplePlate = new Plate(cells, cellFile, numWells, populations, dropOutRate, parameter);
                writer = new PlateFileWriter(filename, samplePlate);
            }
-        else {
-            if (poisson) {
-                stdDev = Math.sqrt(cells.getCellCount()); //gaussian with square root of elements approximates poisson
-            }
-            samplePlate = new Plate(cells, cellFile, numWells, populations, dropOutRate, stdDev, false);
+            default -> {
+                samplePlate = new Plate(cells, cellFile, numWells, populations, dropOutRate, parameter);
                writer = new PlateFileWriter(filename, samplePlate);
            }
+        }
        System.out.println("Writing Sample Plate to file");
        writer.writePlateFile();
        System.out.println("Sample Plate written to file: " + filename);
@@ -605,12 +617,13 @@ public class InteractiveInterface {
                    case 3 -> {
                        BiGpairSEQ.setAuctionAlgorithm();
                        System.out.println("MWM algorithm set to auction");
+                        backToOptions = true;
                    }
                    case 4 -> {
                        System.out.println("Scaling integer weight MWM algorithm not yet fully implemented. Sorry.");
 //                        BiGpairSEQ.setIntegerWeightScalingAlgorithm();
 //                        System.out.println("MWM algorithm set to integer weight scaling algorithm of Duan and Su");
-                        backToOptions = true;
+//                        backToOptions = true;
                    }
                    case 0 -> backToOptions = true;
                    default -> System.out.println("Invalid input");
--- a/src/main/java/Plate.java
+++ b/src/main/java/Plate.java
@@ -13,6 +13,10 @@ TODO: Implement discrete frequency distributions using Vose's Alias Method
 */


+
+import org.apache.commons.rng.sampling.distribution.RejectionInversionZipfSampler;
+import org.apache.commons.rng.simple.JDKRandomWrapper;
+
 import java.util.*;

 public class Plate {
@@ -26,25 +30,22 @@ public class Plate {
    private Integer[] populations;
    private double stdDev;
    private double lambda;
-    boolean exponential = false;
+    private double zipfExponent;
+    private DistributionType distributionType;

    public Plate(CellSample cells, String cellFilename, int numWells, Integer[] populations,
-                 double dropoutRate, double stdDev_or_lambda, boolean exponential){
+                 double dropoutRate, double parameter){
        this.cells = cells;
        this.sourceFile = cellFilename;
        this.size = numWells;
        this.wells = new ArrayList<>();
        this.error = dropoutRate;
        this.populations = populations;
-        this.exponential = exponential;
-        if (this.exponential) {
-            this.lambda = stdDev_or_lambda;
-            fillWellsExponential(cells.getCells(), this.lambda);
-        }
-        else {
-            this.stdDev = stdDev_or_lambda;
-            fillWells(cells.getCells(), this.stdDev);
-        }
+        this.stdDev = parameter;
+        this.lambda = parameter;
+        this.zipfExponent = parameter;
+        this.distributionType = BiGpairSEQ.getDistributionType();
+        fillWells(cells.getCells());
    }


@@ -85,9 +86,33 @@ public class Plate {
        }
    }

+    private void fillWellsZipf(List<String[]> cells, double exponent) {
+        int numSections = populations.length;
+        int section = 0;
+        int n;
+        RejectionInversionZipfSampler zipfSampler = new RejectionInversionZipfSampler(new JDKRandomWrapper(rand), cells.size(), exponent);
+        while (section < numSections){
+            for (int i = 0; i < (size / numSections); i++) {
+                List<String[]> well = new ArrayList<>();
+                for (int j = 0; j < populations[section]; j++) {
+                    do {
+                        n = zipfSampler.sample();
+                    } while (n >= cells.size() || n < 0);
+                    String[] cellToAdd = cells.get(n).clone();
+                    for(int k = 0; k < cellToAdd.length; k++){
+                        if(Math.abs(rand.nextDouble()) < error){//error applied to each sequence
+                            cellToAdd[k] = "-1";
+                        }
+                    }
+                    well.add(cellToAdd);
+                }
+                wells.add(well);
+            }
+            section++;
+        }
+    }
+
    private void fillWellsExponential(List<String[]> cells, double lambda){
-        this.lambda = lambda;
-        exponential = true;
        int numSections = populations.length;
        int section = 0;
        double m;
@@ -143,6 +168,24 @@ public class Plate {
        }
    }

+    private void fillWells(List<String[]> cells){
+        DistributionType type = BiGpairSEQ.getDistributionType();
+        switch (type) {
+            case POISSON, GAUSSIAN -> {
+                fillWells(cells, getStdDev());
+                break;
+            }
+            case EXPONENTIAL -> {
+                fillWellsExponential(cells, getLambda());
+                break;
+            }
+            case ZIPF -> {
+                fillWellsZipf(cells, getZipfExponent());
+                break;
+            }
+        }
+    }
+
    public Integer[] getPopulations(){
        return populations;
    }
@@ -155,10 +198,12 @@ public class Plate {
        return stdDev;
    }

-    public boolean isExponential(){return exponential;}
+    public DistributionType getDistributionType() { return distributionType;}

    public double getLambda(){return lambda;}

+    public double getZipfExponent(){return zipfExponent;}
+
    public double getError() {
        return error;
    }
@@ -196,7 +241,7 @@ public class Plate {
                                    sequencesAndMisreads.put(currentSequence, new ArrayList<>());
                                }
                                //The specific misread hasn't happened before
-                                if (rand.nextDouble() >= errorCollisionRate || sequencesAndMisreads.get(currentSequence).size() == 0) {
+                                if (rand.nextDouble() >= errorCollisionRate || sequencesAndMisreads.get(currentSequence).isEmpty()) {
                                    //The misread doesn't collide with a real sequence already on the plate and some sequences have already been read
                                    if(rand.nextDouble() >= realSequenceCollisionRate || !sequenceMap.isEmpty()){
                                        StringBuilder spurious = new StringBuilder(currentSequence);
--- a/src/main/java/PlateFileWriter.java
+++ b/src/main/java/PlateFileWriter.java
@@ -13,11 +13,13 @@ public class PlateFileWriter {
    private List<List<String[]>> wells;
    private double stdDev;
    private double lambda;
+    private double zipfExponent;
+    private DistributionType distributionType;
    private Double error;
    private String filename;
    private String sourceFileName;
    private Integer[] populations;
-    private boolean isExponential = false;
+

    public PlateFileWriter(String filename, Plate plate) {
        if(!filename.matches(".*\\.csv")){
@@ -26,12 +28,17 @@ public class PlateFileWriter {
        this.filename = filename;
        this.sourceFileName = plate.getSourceFileName();
        this.size = plate.getSize();
-        this.isExponential = plate.isExponential();
-        if(isExponential) {
+        this.distributionType = plate.getDistributionType();
+        switch(distributionType) {
+            case POISSON, GAUSSIAN -> {
+                this.stdDev = plate.getStdDev();
+            }
+            case EXPONENTIAL -> {
                this.lambda = plate.getLambda();
            }
-        else{
-            this.stdDev = plate.getStdDev();
+            case ZIPF -> {
+                this.zipfExponent = plate.getZipfExponent();
+            }
        }
        this.error = plate.getError();
        this.wells = plate.getWells();
@@ -95,11 +102,22 @@ public class PlateFileWriter {
            printer.printComment("Plate size: " + size);
            printer.printComment("Well populations: " + wellPopulationsString);
            printer.printComment("Error rate: " + error);
-            if(isExponential){
-                printer.printComment("Lambda: " + lambda);
+            switch (distributionType) {
+                case POISSON -> {
+                    printer.printComment("Cell frequency distribution: POISSON");
+                }
+                case GAUSSIAN -> {
+                    printer.printComment("Cell frequency distribution: GAUSSIAN");
+                    printer.printComment("--Standard deviation: " + stdDev);
+                }
+                case EXPONENTIAL -> {
+                    printer.printComment("Cell frequency distribution: EXPONENTIAL");
+                    printer.printComment("--Lambda: " + lambda);
+                }
+                case ZIPF -> {
+                    printer.printComment("Cell frequency distribution: ZIPF");
+                    printer.printComment("--Exponent: " + zipfExponent);
                }
-            else {
-                printer.printComment("Std. dev.: " + stdDev);
            }
            printer.printRecords(wellsAsStrings);
        } catch(IOException ex){
--- a/src/main/java/Simulator.java
+++ b/src/main/java/Simulator.java
@@ -1,9 +1,7 @@
 import org.jgrapht.alg.interfaces.MatchingAlgorithm;
 import org.jgrapht.alg.matching.MaximumWeightBipartiteMatching;
-import org.jgrapht.generate.SimpleWeightedBipartiteGraphMatrixGenerator;
 import org.jgrapht.graph.DefaultWeightedEdge;
 import org.jgrapht.graph.SimpleWeightedGraph;
-import org.jheaps.tree.FibonacciHeap;
 import org.jheaps.tree.PairingHeap;

 import java.math.BigDecimal;
@@ -70,58 +68,102 @@ public class Simulator implements GraphModificationFunctions {
            if(verbose){System.out.println("Total beta sequence wells removed: " + betaWellsRemoved);}
        }

-        //construct the graph. For simplicity, going to make
-        if(verbose){System.out.println("Making vertex maps");}
-        //For the SimpleWeightedBipartiteGraphMatrixGenerator, all vertices must have
-        //distinct numbers associated with them. Since I'm using a 2D array, that means
-        //distinct indices between the rows and columns. vertexStartValue lets me track where I switch
-        //from numbering rows to columns, so I can assign unique numbers to every vertex, and then
-        //subtract the vertexStartValue from betas to use their vertex labels as array indices
-        int vertexStartValue = 0;
-        //keys are sequential integer vertices, values are alphas
-        Map<String, Integer> plateAtoVMap = makeSequenceToVertexMap(alphaSequences, vertexStartValue);
-        //new start value for vertex to beta map should be one more than final vertex value in alpha map
-        vertexStartValue += plateAtoVMap.size();
-        //keys are betas, values are sequential integers
-        Map<String, Integer> plateBtoVMap = makeSequenceToVertexMap(betaSequences, vertexStartValue);
-        if(verbose){System.out.println("Vertex maps made");}
-        //make adjacency matrix for bipartite graph generator
-        //(technically this is only 1/4 of an adjacency matrix, but that's all you need
-        //for a bipartite graph, and all the SimpleWeightedBipartiteGraphMatrixGenerator class expects.)
-        if(verbose){System.out.println("Making adjacency matrix");}
-        double[][] weights = new double[plateAtoVMap.size()][plateBtoVMap.size()];
-        fillAdjacencyMatrix(weights, vertexStartValue, alphaSequences, betaSequences, plateAtoVMap, plateBtoVMap);
-        if(verbose){System.out.println("Adjacency matrix made");}
+        /*
+         * The commented out code below works beautifully for small enough graphs. However, after implementing a
+         * Zipf distribution and attempting to simulate Experiment 3 from the paper again, I discovered that
+         * this method uses too much memory. Even a 120GB heap is not enough to build this adjacency matrix.
+         * So I'm going to attempt to build this graph directly and see if that is less memory intensive
+         */
+//        //construct the graph. For simplicity, going to make
+//        if(verbose){System.out.println("Making vertex maps");}
+//        //For the SimpleWeightedBipartiteGraphMatrixGenerator, all vertices must have
+//        //distinct numbers associated with them. Since I'm using a 2D array, that means
+//        //distinct indices between the rows and columns. vertexStartValue lets me track where I switch
+//        //from numbering rows to columns, so I can assign unique numbers to every vertex, and then
+//        //subtract the vertexStartValue from betas to use their vertex labels as array indices
+//        int vertexStartValue = 0;
+//        //keys are sequential integer vertices, values are alphas
+//        Map<String, Integer> plateAtoVMap = makeSequenceToVertexMap(alphaSequences, vertexStartValue);
+//        //new start value for vertex to beta map should be one more than final vertex value in alpha map
+//        vertexStartValue += plateAtoVMap.size();
+//        //keys are betas, values are sequential integers
+//        Map<String, Integer> plateBtoVMap = makeSequenceToVertexMap(betaSequences, vertexStartValue);
+//        if(verbose){System.out.println("Vertex maps made");}
+//        //make adjacency matrix for bipartite graph generator
+//        //(technically this is only 1/4 of an adjacency matrix, but that's all you need
+//        //for a bipartite graph, and all the SimpleWeightedBipartiteGraphMatrixGenerator class expects.)
+//        if(verbose){System.out.println("Making adjacency matrix");}
+//        double[][] weights = new double[plateAtoVMap.size()][plateBtoVMap.size()];
+//        fillAdjacencyMatrix(weights, vertexStartValue, alphaSequences, betaSequences, plateAtoVMap, plateBtoVMap);
+//        if(verbose){System.out.println("Adjacency matrix made");}
+//        //make bipartite graph
+//        if(verbose){System.out.println("Making bipartite weighted graph");}
+//        //the graph object
+//        SimpleWeightedGraph<Vertex, DefaultWeightedEdge> graph =
+//                new SimpleWeightedGraph<>(DefaultWeightedEdge.class);
+//        //the graph generator
+//        SimpleWeightedBipartiteGraphMatrixGenerator graphGenerator = new SimpleWeightedBipartiteGraphMatrixGenerator();
+//        //the list of alpha vertices
+//        List<Vertex> alphaVertices = new ArrayList<>();
+//        for (String seq : plateAtoVMap.keySet()) {
+//            Vertex alphaVertex = new Vertex(alphaSequences.get(seq), plateAtoVMap.get(seq));
+//            alphaVertices.add(alphaVertex);
+//        }
+//        //Sort to make sure the order of vertices in list matches the order of the adjacency matrix
+//        Collections.sort(alphaVertices);
+//        //Add ordered list of vertices to the graph
+//        graphGenerator.first(alphaVertices);
+//        //the list of beta vertices
+//        List<Vertex> betaVertices = new ArrayList<>();
+//        for (String seq : plateBtoVMap.keySet()) {
+//            Vertex betaVertex = new Vertex(betaSequences.get(seq), plateBtoVMap.get(seq));
+//            betaVertices.add(betaVertex);
+//        }
+//        //Sort to make sure the order of vertices in list matches the order of the adjacency matrix
+//        Collections.sort(betaVertices);
+//        //Add ordered list of vertices to the graph
+//        graphGenerator.second(betaVertices);
+//        //use adjacency matrix of weight created previously
+//        graphGenerator.weights(weights);
+//        graphGenerator.generateGraph(graph);
+
        //make bipartite graph
        if(verbose){System.out.println("Making bipartite weighted graph");}
        //the graph object
        SimpleWeightedGraph<Vertex, DefaultWeightedEdge> graph =
                new SimpleWeightedGraph<>(DefaultWeightedEdge.class);
-        //the graph generator
-        SimpleWeightedBipartiteGraphMatrixGenerator graphGenerator = new SimpleWeightedBipartiteGraphMatrixGenerator();
-        //the list of alpha vertices
+        int vertexLabelValue = 0;
+        //create and add alpha sequence vertices
        List<Vertex> alphaVertices = new ArrayList<>();
-        for (String seq : plateAtoVMap.keySet()) {
-            Vertex alphaVertex = new Vertex(alphaSequences.get(seq), plateAtoVMap.get(seq));
-            alphaVertices.add(alphaVertex);
+        for (Map.Entry<String, SequenceRecord> entry: alphaSequences.entrySet()) {
+            alphaVertices.add(new Vertex(entry.getValue(), vertexLabelValue));
+            vertexLabelValue++;
        }
-        //Sort to make sure the order of vertices in list matches the order of the adjacency matrix
-        Collections.sort(alphaVertices);
-        //Add ordered list of vertices to the graph
-        graphGenerator.first(alphaVertices);
-        //the list of beta vertices
+        alphaVertices.forEach(graph::addVertex);
+        //add beta sequence vertices
        List<Vertex> betaVertices = new ArrayList<>();
-        for (String seq : plateBtoVMap.keySet()) {
-            Vertex betaVertex = new Vertex(betaSequences.get(seq), plateBtoVMap.get(seq));
-            betaVertices.add(betaVertex);
+        for (Map.Entry<String, SequenceRecord> entry: betaSequences.entrySet()) {
+            betaVertices.add(new Vertex(entry.getValue(), vertexLabelValue));
+            vertexLabelValue++;
+        }
+        betaVertices.forEach(graph::addVertex);
+        //add edges
+        for(Vertex a: alphaVertices) {
+            for(Vertex b: betaVertices) {
+                Set<Integer> sharedWells = new HashSet<>(a.getRecord().getWells());
+                sharedWells.retainAll(b.getRecord().getWells());
+                double weight = (double) sharedWells.size();
+                if (weight != 0.0) {
+                    System.out.println("Edge weight: " + weight);
+                    DefaultWeightedEdge edge = graph.addEdge(a, b);
+                    graph.setEdgeWeight(edge, weight);
+                }
+                else {
+                    System.out.println("No overlap");
+
+                }
+            }
        }
-        //Sort to make sure the order of vertices in list matches the order of the adjacency matrix
-        Collections.sort(betaVertices);
-        //Add ordered list of vertices to the graph
-        graphGenerator.second(betaVertices);
-        //use adjacency matrix of weight created previously
-        graphGenerator.weights(weights);
-        graphGenerator.generateGraph(graph);
        if(verbose){System.out.println("Graph created");}
        //stop timing
        Instant stop = Instant.now();
@@ -145,7 +187,7 @@ public class Simulator implements GraphModificationFunctions {
                                            Integer minOverlapPercent, boolean verbose, boolean calculatePValue) {
        Instant start = Instant.now();
        SimpleWeightedGraph<Vertex, DefaultWeightedEdge> graph = data.getGraph();
-        Map<Vertex[], Integer> removedEdges = new HashMap<>();
+        Map<DefaultWeightedEdge, Vertex[]> removedEdges = new HashMap<>();
        boolean saveEdges = BiGpairSEQ.cacheGraph();
        int numWells = data.getNumWells();
        //Integer alphaCount = data.getAlphaCount();
@@ -163,6 +205,7 @@ public class Simulator implements GraphModificationFunctions {
        }
        Integer graphAlphaCount = alphas.size();
        Integer graphBetaCount = betas.size();
+        Integer graphEdgeCount = graph.edgeSet().size();

        //remove edges with weights outside given overlap thresholds, add those to removed edge list
        if(verbose){System.out.println("Eliminating edges with weights outside overlap threshold values");}
@@ -182,12 +225,14 @@ public class Simulator implements GraphModificationFunctions {
        if(verbose){System.out.println("Edges between vertices of with excessively different occupancy values " +
                "removed");}

+        Integer filteredGraphEdgeCount = graph.edgeSet().size();
+
        //Find Maximum Weight Matching
        if(verbose){System.out.println("Finding maximum weight matching");}
        //The matching object
        MatchingAlgorithm<Vertex, DefaultWeightedEdge> maxWeightMatching;
        //Determine algorithm type
-        AlgorithmType algorithm = BiGpairSEQ.getMatchingAlgoritmType();
+        AlgorithmType algorithm = BiGpairSEQ.getMatchingAlgorithmType();
        switch (algorithm) { //Only two options now, but I have room to add more algorithms in the future this way
            case AUCTION -> {
                //create a new MaximumIntegerWeightBipartiteAuctionMatching
@@ -333,8 +378,10 @@ public class Simulator implements GraphModificationFunctions {
        metadata.put("real sequence collision rate", data.getRealSequenceCollisionRate().toString());
        metadata.put("total alphas read from plate", data.getAlphaCount().toString());
        metadata.put("total betas read from plate", data.getBetaCount().toString());
+        metadata.put("initial edges in graph", graphEdgeCount.toString());
        metadata.put("alphas in graph (after pre-filtering)", graphAlphaCount.toString());
        metadata.put("betas in graph (after pre-filtering)", graphBetaCount.toString());
+        metadata.put("final edges in graph (after pre-filtering)", filteredGraphEdgeCount.toString());
        metadata.put("high overlap threshold for pairing", highThreshold.toString());
        metadata.put("low overlap threshold for pairing", lowThreshold.toString());
        metadata.put("minimum overlap percent for pairing", minOverlapPercent.toString());
Author	SHA1	Message	Date
eugenefischer	d1810c453d	Even more efficient graph creation (my initial scheme, but this time without accidentally changing what's in the sequence records)	2025-04-10 15:03:10 -05:00
eugenefischer	187401f2d6	More efficient graph creation	2025-04-10 14:06:11 -05:00
eugenefischer	678ce99424	iterate over vertex wells correctly	2025-04-10 13:34:04 -05:00
eugenefischer	c21e375303	fix concurrent modification bug	2025-04-10 13:33:47 -05:00
eugenefischer	57fe9c1619	Update graph modification functions to work with edges directly	2025-04-10 12:42:19 -05:00
eugenefischer	e1888a99c6	refactor to construct the bipartite graph directly, rather than by using an adjacency matrix and a graph generator.	2025-04-10 11:47:15 -05:00
eugenefischer	bcf5a4c749	change artifact details	2025-04-10 11:05:08 -05:00
eugenefischer	81d8a12765	dependency update stuff	2025-04-10 10:54:05 -05:00
eugenefischer	b5c0568e22	Add dependencies	2025-04-10 10:53:42 -05:00
eugenefischer	b7597cff2a	update readme and add Zipf exponent option to CLI	2025-04-09 16:16:46 -05:00
eugenefischer	7bbeaf7dad	update readme	2025-04-09 14:40:49 -05:00
eugenefischer	945b967382	update readme	2025-04-09 14:39:46 -05:00
eugenefischer	a43ee469ea	implement Zipf distribution	2025-04-09 14:32:02 -05:00