Update readme with CLI arguments

2022-02-27 17:01:12 -06:00 · 2022-02-27 17:00:54 -06:00 · 2022-02-27 16:56:58 -06:00 · 2022-02-27 16:53:46 -06:00 · 2022-02-27 16:45:30 -06:00 · 2022-02-27 16:16:24 -06:00
29 changed files with 2845 additions and 1280 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1 @@
 /out/
--- a/.idea/artifacts/BiGpairSEQ_Sim_jar.xml
+++ b/.idea/artifacts/BiGpairSEQ_Sim_jar.xml
@@ -1,15 +1,16 @@
 <component name="ArtifactManager">
-  <artifact type="jar" name="TCellSim:jar">
+  <artifact type="jar" build-on-make="true" name="BiGpairSEQ_Sim:jar">
-    <output-path>$PROJECT_DIR$/out/artifacts/TCellSim_jar</output-path>
+    <output-path>$PROJECT_DIR$/out/artifacts/BiGpairSEQ_Sim_jar</output-path>
-    <root id="archive" name="TCellSim.jar">
+    <root id="archive" name="BiGpairSEQ_Sim.jar">
      <element id="directory" name="META-INF">
        <element id="file-copy" path="$PROJECT_DIR$/src/main/java/META-INF/MANIFEST.MF" />
      </element>
-      <element id="module-output" name="TCellSim" />
+      <element id="module-output" name="BigPairSEQ" />
      <element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/apache/commons/commons-csv/1.9.0/commons-csv-1.9.0.jar" path-in-jar="/" />
      <element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/jetbrains/annotations/23.0.0/annotations-23.0.0.jar" path-in-jar="/" />
      <element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/jgrapht/jgrapht-core/1.5.1/jgrapht-core-1.5.1.jar" path-in-jar="/" />
      <element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/jheaps/jheaps/0.13/jheaps-0.13.jar" path-in-jar="/" />
      <element id="extracted-dir" path="$MAVEN_REPOSITORY$/commons-cli/commons-cli/1.5.0/commons-cli-1.5.0.jar" path-in-jar="/" />
      <element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/apache/commons/commons-csv/1.9.0/commons-csv-1.9.0.jar" path-in-jar="/" />
      <element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/jetbrains/annotations/23.0.0/annotations-23.0.0.jar" path-in-jar="/" />
    </root>
  </artifact>
 </component>
--- a/.idea/compiler.xml
+++ b/.idea/compiler.xml
@@ -6,7 +6,7 @@
        <sourceOutputDir name="target/generated-sources/annotations" />
        <sourceTestOutputDir name="target/generated-test-sources/test-annotations" />
        <outputRelativeToContentRoot value="true" />
-        <module name="TCellSim" />
+        <module name="BigPairSEQ" />
      </profile>
    </annotationProcessing>
  </component>
--- a/.idea/libraries/commons_cli.xml
+++ b/.idea/libraries/commons_cli.xml
@@ -0,0 +1,10 @@
 <component name="libraryTable">
  <library name="commons.cli" type="repository">
    <properties maven-id="commons-cli:commons-cli:1.5.0" />
    <CLASSES>
      <root url="jar://$MAVEN_REPOSITORY$/commons-cli/commons-cli/1.5.0/commons-cli-1.5.0.jar!/" />
    </CLASSES>
    <JAVADOC />
    <SOURCES />
  </library>
 </component>
--- a/.idea/libraries/jgrapht_io.xml
+++ b/.idea/libraries/jgrapht_io.xml
@@ -0,0 +1,15 @@
 <component name="libraryTable">
  <library name="jgrapht.io" type="repository">
    <properties maven-id="org.jgrapht:jgrapht-io:1.5.1" />
    <CLASSES>
      <root url="jar://$MAVEN_REPOSITORY$/org/jgrapht/jgrapht-io/1.5.1/jgrapht-io-1.5.1.jar!/" />
      <root url="jar://$MAVEN_REPOSITORY$/org/jgrapht/jgrapht-core/1.5.1/jgrapht-core-1.5.1.jar!/" />
      <root url="jar://$MAVEN_REPOSITORY$/org/jheaps/jheaps/0.13/jheaps-0.13.jar!/" />
      <root url="jar://$MAVEN_REPOSITORY$/org/antlr/antlr4-runtime/4.8-1/antlr4-runtime-4.8-1.jar!/" />
      <root url="jar://$MAVEN_REPOSITORY$/org/apache/commons/commons-text/1.8/commons-text-1.8.jar!/" />
      <root url="jar://$MAVEN_REPOSITORY$/org/apache/commons/commons-lang3/3.9/commons-lang3-3.9.jar!/" />
    </CLASSES>
    <JAVADOC />
    <SOURCES />
  </library>
 </component>
--- a/.idea/libraries/jheaps.xml
+++ b/.idea/libraries/jheaps.xml
@@ -0,0 +1,10 @@
 <component name="libraryTable">
  <library name="jheaps" type="repository">
    <properties maven-id="org.jheaps:jheaps:0.14" />
    <CLASSES>
      <root url="jar://$MAVEN_REPOSITORY$/org/jheaps/jheaps/0.14/jheaps-0.14.jar!/" />
    </CLASSES>
    <JAVADOC />
    <SOURCES />
  </library>
 </component>
--- a/out/artifacts/TCellSim_jar/TCellSim.jar
+++ b/out/artifacts/TCellSim_jar/TCellSim.jar
--- a/readme.md
+++ b/readme.md
@@ -0,0 +1,324 @@
 # BiGpairSEQ SIMULATOR
 ## ABOUT
 This program simulates BiGpairSEQ (Bipartite Graph pairSEQ), a graph theory-based adaptation
 of the pairSEQ algorithm (Howie, et al. 2015) for pairing T cell receptor sequences.
 ## THEORY
 Unlike pairSEQ, which calculates p-values for every TCR alpha/beta overlap and compares
 against a null distribution, BiGpairSEQ does not do any statistical calculations
 directly.
 BiGpairSEQ creates a [weighted bipartite graph](https://en.wikipedia.org/wiki/Bipartite_graph) representing the sample plate.
 The distinct TCRA and TCRB sequences form the two sets of vertices. Every TCRA/TCRB pair that share a well
 are connected by an edge, with the edge weight set to the number of wells in which both sequences appear.
 (Sequences present in *all* wells are filtered out prior to creating the graph, as there is no signal in their occupancy pattern.)
 The problem of pairing TCRA/TCRB sequences thus reduces to the "assignment problem" of finding a maximum weight
 matching on a bipartite graph--the subset of vertex-disjoint edges whose weights sum to the maximum possible value.
 This is a well-studied combinatorial optimization problem, with many known solutions.
 The most efficient algorithm known to the author for maximum weight matching of a bipartite graph with strictly integral weights
 is from Duan and Su (2012). For a graph with m edges, n vertices per side, and maximum integer edge weight N, 
 their algorithm runs in **O(m sqrt(n) log(N))** time. As the graph representation of a pairSEQ experiment is 
 bipartite with integer weights, this algorithm is ideal for BiGpairSEQ.
 Unfortunately, it's a fairly new algorithm, and not yet implemented by the graph theory library used in this simulator.
 So this program instead uses the Fibonacci heap-based algorithm of Fredman and Tarjan (1987), which has a worst-case
 runtime of **O(n (n log(n) + m))**. The algorithm is implemented as described in Melhorn and Näher (1999).
 ## USAGE
 ### RUNNING THE PROGRAM
 [Download the current version of BiGpairSEQ_Sim.](https://gitea.ejsf.synology.me/efischer/BiGpairSEQ/releases)
 BiGpairSEQ_Sim is an executable .jar file. Requires Java 14 or higher. [OpenJDK 17](https://jdk.java.net/17/)
 recommended.
 Run with the command:
 `java -jar BiGpairSEQ_Sim.jar`
 Processing sample plates with tens of thousands of sequences may require large amounts 
 of RAM. It is often desirable to increase the JVM maximum heap allocation with the -Xmx flag.
 For example, to run the program with 32 gigabytes of memory, use the command:
 `java -Xmx32G -jar BiGpairSEQ_Sim.jar`
 There are a number of command line options, to allow the program to be used in shell scripts. For a full list,
 use the -help flag:
 `java -jar BiGpairSEQ_Sim.jar -help`
 If no command line arguments are given, BiGpairSEQ_Sim will launch with an interactive, menu-driven CLI for 
 generating files and simulating TCR pairing. The main menu looks like this:
 ```
 --------BiGPairSEQ SIMULATOR--------
 ALPHA/BETA T CELL RECEPTOR MATCHING
  USING WEIGHTED BIPARTITE GRAPHS  
 ------------------------------------
 Please select an option:
 1) Generate a population of distinct cells
 2) Generate a sample plate of T cells
 3) Generate CDR3 alpha/beta occupancy data and overlap graph
 4) Simulate bipartite graph CDR3 alpha/beta matching (BiGpairSEQ)
 8) Options
 9) About/Acknowledgments
 0) Exit
 ```
 By default, the Options menu looks like this:
 ```
 --------------OPTIONS---------------
 1) Turn on cell sample file caching
 2) Turn on plate file caching
 3) Turn on graph/data file caching
 4) Turn off serialized binary graph output
 5) Turn on GraphML graph output
 6) Maximum weight matching algorithm options
 0) Return to main menu
 ```
 ### INPUT/OUTPUT
 To run the simulation, the program reads and writes 4 kinds of files:
 * Cell Sample files in CSV format
 * Sample Plate files in CSV format
 * Graph/Data files in binary object serialization format
 * Matching Results files in CSV format
 These files are often generated in sequence. When entering filenames, it is not necessary to include the file extension
 (.csv or .ser). When reading or writing files, the program will automatically add the correct extension to any filename 
 without one.
 To save file I/O time, the most recent instance of each of these four
 files either generated or read from disk can be cached in program memory. When caching is active, subsequent uses of the 
 same data file won't need to be read in again until another file of that type is used or generated,
 or caching is turned off for that file type. The program checks whether it needs to update its cached data by comparing
 filenames as entered by the user. On encountering a new filename, the program flushes its cache and reads in the new file.
 (Note that cached Graph/Data files must be transformed back into their original state after a matching experiment, which
 may take some time. Whether file I/O or graph transformation takes longer for graph/data files is likely to be
 device-specific.)
 The program's caching behavior can be controlled in the Options menu. By default, all caching is OFF.
 The program can optionally output Graph/Data files in .GraphML format (.graphml) for data portability. This can be 
 turned on in the Options menu. By default, GraphML output is OFF.
 ---
 #### Cell Sample Files
 Cell Sample files consist of any number of distinct "T cells." Every cell contains 
 four sequences: Alpha CDR3, Beta CDR3, Alpha CDR1, Beta CDR1. The sequences are represented by
 random integers. CDR3 Alpha and Beta sequences are all unique within a given Cell Sample file. CDR1 Alpha and Beta sequences
 are not necessarily unique; the relative diversity can be set when making the file.
 (Note: though cells still have CDR1 sequences, matching of CDR1s is currently awaiting re-implementation.)
 Options when making a Cell Sample file:
 * Number of T cells to generate
 * Factor by which CDR3s are more diverse than CDR1s
 Files are in CSV format. Rows are distinct T cells, columns are sequences within the cells.
 Comments are preceded by `#`
 Structure:
    # Sample contains 1 unique CDR1 for every 4 unique CDR3s.
 | Alpha CDR3 | Beta CDR3 | Alpha CDR1 | Beta CDR1 |
 |---|---|---|---|
 |unique number|unique number|number|number|
 ---
 #### Sample Plate Files
 Sample Plate files consist of any number of "wells" containing any number of T cells (as 
 described above). The wells are filled randomly from a Cell Sample file, according to a selected
 frequency distribution. Additionally, every individual sequence within each cell may, with some
 given dropout probability, be omitted from the file; this simulates the effect of amplification errors
 prior to sequencing. Plates can also be partitioned into any number of sections, each of which can have a 
 different concentration of T cells per well.
 Options when making a Sample Plate file:
 * Cell Sample file to use
 * Statistical distribution to apply to Cell Sample file
  * Poisson
  * Gaussian
    * Standard deviation size 
  * Exponential
    * Lambda value
      * *(Based on the slope of the graph in Figure 4C of the pairSEQ paper, the distribution of the original experiment was approximately exponential with a lambda ~0.6. (Howie, et al. 2015))*
 * Total number of wells on the plate
 * Well populations random or fixed
  * If random, minimum and maximum population sizes
  * If fixed
    * Number of sections on plate
    * Number of T cells per well
      * per section, if more than one section
 * Dropout rate
 Files are in CSV format. There are no header labels. Every row represents a well. 
 Every value represents an individual cell, containing four sequences, depicted as an array string:
 `[CDR3A, CDR3B, CDR1A, CDR1B]`. So a representative cell might look like this: 
 `[525902, 791533, -1, 866282]`
 Notice that the CDR1 Alpha is missing in the cell above--sequence dropout from simulated amplification error.
 Dropout sequences are replaced with the value `-1`. Comments are preceded by `#`
 Structure:
 ```
 # Cell source file name:
 # Each row represents one well on the plate
 # Plate size:
 # Concentrations:
 # Lambda -or- StdDev: 
 ```
 | Well 1, cell 1 | Well 1, cell 2 | Well 1, cell 3| ... |
 |---|---|---|---|
 | **Well 2, cell 1** | **Well 2, cell 2** | **Well 2, cell 3**| **...** |
 | **Well 3, cell 1** | **Well 3, cell 2** | **Well 3, cell 3**| **...** |
 | **...** | **...** | **...** | **...** |
 ---
 #### Graph/Data Files
 Graph/Data files are serialized binaries of a Java object containing the weigthed bipartite graph representation of a
 Sample Plate, along with the necessary metadata for matching and results output. Making them requires a Cell Sample file 
 (to construct a list of correct sequence pairs for checking the accuracy of BiGpairSEQ simulations) and a 
 Sample Plate file (to construct the associated occupancy graph).
 These files can be several gigabytes in size. Writing them to a file lets us generate a graph and its metadata once,
 then use it for multiple different BiGpairSEQ simulations.
 Options for creating a Graph/Data file:
 * The Cell Sample file to use
 * The Sample Plate file to use. (This must have been generated from the selected Cell Sample file.)
 These files do not have a human-readable structure, and are not portable to other programs.
 (For portability to other software, turn on GraphML output in the Options menu. This will produce a .graphml file
 for the weighted graph, with vertex attributes sequence, type, and occupancy data.)
 ---
 #### Matching Results Files 
 Matching results files consist of the results of a BiGpairSEQ matching simulation. Making them requires a serialized
 binary Graph/Data file (.ser). (Because .graphML files are larger than .ser files, BiGpairSEQ_Sim supports .graphML
 output only. Graph/data input must use a serialized binary.)
 Matching results files are in CSV format. Rows are sequence pairings with extra relevant data. Columns are pairing-specific details.
 Metadata about the matching simulation is included as comments. Comments are preceded by `#`.
 Options when running a BiGpairSEQ simulation of CDR3 alpha/beta matching:
 * The minimum number of alpha/beta overlap wells to attempt to match
  * (must be >= 1)
 * The maximum number of alpha/beta overlap wells to attempt to match
  * (must be <= the number of wells on the plate - 1)
 * The maximum difference in alpha/beta occupancy to attempt to match
  * (Optional. To skip using this filter, enter a value >= the number of wells on the plate)
 * The minimum overlap percentage--the percentage of a sequence's occupied wells shared by another sequence--to attempt to match. Given as value in range 0 - 100.
  * (Optional. To skip using this filter, enter 0)
 Example output:
 ```
 # Source Sample Plate file: 4MilCellsPlate.csv
 # Source Graph and Data file: 4MilCellsPlateGraph.ser
 # T cell counts in sample plate wells: 30000
 # Total alphas found: 11813
 # Total betas found: 11808
 # High overlap threshold: 94
 # Low overlap threshold: 3
 # Minimum overlap percent: 0
 # Maximum occupancy difference: 96
 # Pairing attempt rate: 0.438
 # Correct pairings: 5151
 # Incorrect pairings: 18
 # Pairing error rate: 0.00348
 # Simulation time: 862 seconds
 ```
 | Alpha | Alpha well count | Beta | Beta well count | Overlap count | Matched Correctly? | P-value |
 |---|---|---|---|---|---|---|
 |5242972|17|1571520|18|17|true|1.41E-18|
 |5161027|18|2072219|18|18|true|7.31E-20|
 |4145198|33|1064455|30|29|true|2.65E-21|
 |7700582|18|112748|18|18|true|7.31E-20|
 |...|...|...|...|...|...|...|
 ---
 **NOTE: The p-values in the output are not used for matching**—they aren't part of the BiGpairSEQ algorithm at all. 
 P-values are calculated *after* BiGpairSEQ matching is completed, for purposes of comparison only, 
 using the (2021 corrected) formula from the original pairSEQ paper. (Howie, et al. 2015)
 ### PERFORMANCE
 Performance details of the example excerpted above:
 On a home computer with a Ryzen 5600X CPU, 64GB of 3200MHz DDR4 RAM (half of which was allocated to the Java Virtual Machine), and a PCIe 3.0 SSD, running Linux Mint 20.3 Edge (5.13 kernel), 
 the author ran a BiGpairSEQ simulation of a 96-well sample plate with 30,000 T cells/well comprising ~11,800 alphas and betas,
 taken from a sample of 4,000,000 distinct cells with an exponential frequency distribution.
 With min/max occupancy threshold of 3 and 94 wells for matching, and no other pre-filtering, BiGpairSEQ identified 5,151 
 correct pairings and 18 incorrect pairings, for an accuracy of 99.652%.
 The simulation time was 14'22". If intermediate results were held in memory, this would be equivalent to the total elapsed time.
 Since this implementation of BiGpairSEQ writes intermediate results to disk (to improve the efficiency of *repeated* simulations
 with different filtering options), the actual elapsed time was greater. File I/O time was not measured, but took 
 slightly less time than the simulation itself. Real elapsed time from start to finish was under 30 minutes.
 ## TODO
 * ~~Try invoking GC at end of workloads to reduce paging to disk~~ DONE
 * ~~Hold graph data in memory until another graph is read-in? ABANDONED UNABANDONED~~ DONE
  * ~~*No, this won't work, because BiGpairSEQ simulations alter the underlying graph based on filtering constraints. Changes would cascade with multiple experiments.*~~
  * Might have figured out a way to do it, by taking edges out and then putting them back into the graph. This may actually be possible.
  * It is possible, though the modifications to the graph incur their own performance penalties. Need testing to see which option is best.
 * ~~Test whether pairing heap (currently used) or Fibonacci heap is more efficient for priority queue in current matching algorithm~~ DONE
  * ~~in theory Fibonacci heap should be more efficient, but complexity overhead may eliminate theoretical advantage~~
  * ~~Add controllable heap-type parameter?~~
    * Parameter implemented. Fibonacci heap the current default.
 * ~~Implement sample plates with random numbers of T cells per well.~~ DONE
  * Possible BiGpairSEQ advantage over pairSEQ: BiGpairSEQ is resilient to variations in well population sizes on a sample plate; pairSEQ is not.
    * preliminary data suggests that BiGpairSEQ behaves roughly as though the whole plate had whatever the *average* well concentration is, but that's still speculative.
 * See if there's a reasonable way to reformat Sample Plate files so that wells are columns instead of rows. 
  * ~~Problem is variable number of cells in a well~~
  * ~~Apache Commons CSV library writes entries a row at a time~~ 
    * _Got this working, but at the cost of a profoundly strange bug in graph occupancy filtering. Have reverted the repo until I can figure out what caused that. Given how easily Thingiverse transposes CSV matrices in R, might not even be worth fixing.
 * ~~Enable GraphML output in addition to serialized object binaries, for data portability~~ DONE
  * ~~Custom vertex type with attribute for sequence occupancy?~~ ABANDONED
    * Have a branch where this is implemented, but there's a bug that broke matching. Don't currently have time to fix.
 * ~~Re-implement command line arguments, to enable scripting and statistical simulation studies~~ DONE
 * Re-implement CDR1 matching method
 * Implement Duan and Su's maximum weight matching algorithm
    * Add controllable algorithm-type parameter?
    * This would be fun and valuable, but probably take more time than I have for a hobby project.
 * Implement Vose's alias method for arbitrary statistical distributions of cells
 ## CITATIONS
 * Howie, B., Sherwood, A. M., et al. ["High-throughput pairing of T cell receptor alpha and beta sequences."](https://pubmed.ncbi.nlm.nih.gov/26290413/) Sci. Transl. Med. 7, 301ra131 (2015)
 * Duan, R., Su H. ["A Scaling Algorithm for Maximum Weight Matching in Bipartite Graphs."](https://web.eecs.umich.edu/~pettie/matching/Duan-Su-scaling-bipartite-matching.pdf) Proceedings of the Twenty-Third Annual ACM-SIAM Symposium on Discrete Algorithms, p. 1413-1424. (2012)
 * Melhorn, K., Näher, St. [The LEDA Platform of Combinatorial and Geometric Computing.](https://people.mpi-inf.mpg.de/~mehlhorn/LEDAbook.html) Cambridge University Press. Chapter 7, Graph Algorithms; p. 132-162 (1999)
 * Fredman, M., Tarjan, R. ["Fibonacci heaps and their uses in improved network optimization algorithms."](https://www.cl.cam.ac.uk/teaching/1011/AlgorithII/1987-FredmanTar-fibonacci.pdf) J. ACM, 34(3):596–615 (1987))
 ## EXTERNAL LIBRARIES USED
 * [JGraphT](https://jgrapht.org) -- Graph theory data structures and algorithms
 * [JHeaps](https://www.jheaps.org) -- For pairing heap priority queue used in maximum weight matching algorithm
 * [Apache Commons CSV](https://commons.apache.org/proper/commons-csv/) -- For CSV file output
 * [Apache Commons CLI](https://commons.apache.org/proper/commons-cli/) -- To enable command line arguments for scripting. (**Awaiting re-implementation**.)
 ## ACKNOWLEDGEMENTS
 BiGpairSEQ was conceived in collaboration with Dr. Alice MacQueen, who brought the original 
 pairSEQ paper to the author's attention and explained all the biology terms he didn't know.
 ## AUTHOR
 BiGpairSEQ algorithm and simulation by Eugene Fischer, 2021. UI improvements and documentation, 2022.
--- a/src/main/java/BiGpairSEQ.java
+++ b/src/main/java/BiGpairSEQ.java
@@ -0,0 +1,176 @@
 import java.util.Random;
 //main class. For choosing interface type and holding settings
 public class BiGpairSEQ {
    private static final Random rand = new Random();
    private static CellSample cellSampleInMemory = null;
    private static String cellFilename = null;
    private static Plate plateInMemory = null;
    private static String plateFilename = null;
    private static GraphWithMapData graphInMemory = null;
    private static String graphFilename = null;
    private static boolean cacheCells = false;
    private static boolean cachePlate = false;
    private static boolean cacheGraph = false;
    private static String priorityQueueHeapType = "FIBONACCI";
    private static boolean outputBinary = true;
    private static boolean outputGraphML = false;
    public static void main(String[] args) {
        if (args.length == 0) {
            InteractiveInterface.startInteractive();
        }
        else {
            //This will be uncommented when command line arguments are re-implemented.
            CommandLineInterface.startCLI(args);
            //System.out.println("Command line arguments are still being re-implemented.");
        }
    }
    public static Random getRand() {
        return rand;
    }
    public static CellSample getCellSampleInMemory() {
        return cellSampleInMemory;
    }
    public static void setCellSampleInMemory(CellSample cellSample, String filename) {
        if(cellSampleInMemory != null) {
            clearCellSampleInMemory();
        }
        cellSampleInMemory = cellSample;
        cellFilename = filename;
        System.out.println("Cell sample file " + filename + " cached.");
    }
    public static void clearCellSampleInMemory() {
        cellSampleInMemory = null;
        cellFilename = null;
        System.gc();
        System.out.println("Cell sample file cache cleared.");
    }
    public static String getCellFilename() {
        return cellFilename;
    }
    public static Plate getPlateInMemory() {
        return plateInMemory;
    }
    public static void setPlateInMemory(Plate plate, String filename) {
        if(plateInMemory != null) {
            clearPlateInMemory();
        }
        plateInMemory = plate;
        plateFilename = filename;
        System.out.println("Sample plate file " + filename + " cached.");
    }
    public static void clearPlateInMemory() {
        plateInMemory = null;
        plateFilename = null;
        System.gc();
        System.out.println("Sample plate file cache cleared.");
    }
    public static String getPlateFilename() {
        return plateFilename;
    }
    public static GraphWithMapData getGraphInMemory() {return graphInMemory;
    }
    public static void setGraphInMemory(GraphWithMapData g, String filename) {
        if (graphInMemory != null) {
            clearGraphInMemory();
        }
        graphInMemory = g;
        graphFilename = filename;
        System.out.println("Graph and data file " + filename + " cached.");
    }
    public static void clearGraphInMemory() {
        graphInMemory = null;
        graphFilename = null;
        System.gc();
        System.out.println("Graph and data file cache cleared.");
    }
    public static String getGraphFilename() {
        return graphFilename;
    }
    public static boolean cacheCells() {
        return cacheCells;
    }
    public static void setCacheCells(boolean cacheCells) {
        //if not caching, clear the memory
        if(!cacheCells){
            BiGpairSEQ.clearCellSampleInMemory();
            System.out.println("Cell sample file caching: OFF.");
        }
        else {
            System.out.println("Cell sample file caching: ON.");
        }
        BiGpairSEQ.cacheCells = cacheCells;
    }
    public static boolean cachePlate() {
        return cachePlate;
    }
    public static void setCachePlate(boolean cachePlate) {
        //if not caching, clear the memory
        if(!cachePlate) {
            BiGpairSEQ.clearPlateInMemory();
            System.out.println("Sample plate file caching: OFF.");
        }
        else {
            System.out.println("Sample plate file caching: ON.");
        }
        BiGpairSEQ.cachePlate = cachePlate;
    }
    public static boolean cacheGraph() {
        return cacheGraph;
    }
    public static void setCacheGraph(boolean cacheGraph) {
        //if not caching, clear the memory
        if(!cacheGraph) {
            BiGpairSEQ.clearGraphInMemory();
            System.out.println("Graph/data file caching: OFF.");
        }
        else {
            System.out.println("Graph/data file caching: ON.");
        }
        BiGpairSEQ.cacheGraph = cacheGraph;
    }
    public static String getPriorityQueueHeapType() {
        return priorityQueueHeapType;
    }
    public static void setPairingHeap() {
        priorityQueueHeapType = "PAIRING";
    }
    public static void setFibonacciHeap() {
        priorityQueueHeapType = "FIBONACCI";
    }
    public static boolean outputBinary() {return outputBinary;}
    public static void setOutputBinary(boolean b) {outputBinary = b;}
    public static boolean outputGraphML() {return outputGraphML;}
    public static void setOutputGraphML(boolean b) {outputGraphML = b;}
 }
--- a/src/main/java/CellFileReader.java
+++ b/src/main/java/CellFileReader.java
@@ -11,17 +11,20 @@ import java.util.List;
 public class CellFileReader {
    private String filename;
    private List<Integer[]> distinctCells = new ArrayList<>();
    private Integer cdr1Freq;
    public CellFileReader(String filename) {
        if(!filename.matches(".*\\.csv")){
            filename = filename + ".csv";
        }
        this.filename = filename;
        CSVFormat cellFileFormat = CSVFormat.Builder.create()
                .setHeader("Alpha CDR3", "Beta CDR3", "Alpha CDR1", "Beta CDR1")
                .setSkipHeaderRecord(true)
                .setCommentMarker('#')
                .build();
        try(//don't need to close reader bc of try-with-resources auto-closing
@@ -36,17 +39,37 @@ public class CellFileReader {
                cell[3] = Integer.valueOf(record.get("Beta CDR1"));
                distinctCells.add(cell);
            }
        } catch(IOException ex){
            System.out.println("cell file " + filename + " not found.");
            System.err.println(ex);
        }
        //get CDR1 frequency
        ArrayList<Integer> cdr1Alphas = new ArrayList<>();
        for (Integer[] cell : distinctCells) {
            cdr1Alphas.add(cell[3]);
        }
        double count = cdr1Alphas.stream().distinct().count();
        count = Math.ceil(distinctCells.size() / count);
        cdr1Freq = (int) count;
    }
-    public List<Integer[]> getCells(){
+    public CellSample getCellSample() {
        return new CellSample(distinctCells, cdr1Freq);
    }
    public String getFilename() { return filename;}
    //Refactor everything that uses this to have access to a Cell Sample and get the cells there instead.
    public List<Integer[]> getListOfDistinctCellsDEPRECATED(){
        return distinctCells;
    }
-    public Integer getCellCount() {
+    public Integer getCellCountDEPRECATED() {
        //Refactor everything that uses this to have access to a Cell Sample and get the count there instead.
        return distinctCells.size();
    }
 }
--- a/src/main/java/CellFileWriter.java
+++ b/src/main/java/CellFileWriter.java
@@ -13,6 +13,7 @@ public class CellFileWriter {
    private String[] headers = {"Alpha CDR3", "Beta CDR3", "Alpha CDR1", "Beta CDR1"};
    List<Integer[]> cells;
    String filename;
    Integer cdr1Freq;
    public CellFileWriter(String filename, CellSample cells) {
        if(!filename.matches(".*\\.csv")){
@@ -20,15 +21,18 @@ public class CellFileWriter {
        }
        this.filename = filename;
        this.cells = cells.getCells();
        this.cdr1Freq = cells.getCdr1Freq();
    }
    public void writeCellsToFile() {
        CSVFormat cellFileFormat = CSVFormat.Builder.create()
                .setCommentMarker('#')
                .setHeader(headers)
                .build();
        try(BufferedWriter writer = Files.newBufferedWriter(Path.of(filename), StandardOpenOption.CREATE_NEW);
            CSVPrinter printer = new CSVPrinter(writer, cellFileFormat);
        ){
            printer.printComment("Sample contains 1 unique CDR1 for every " + cdr1Freq + "unique CDR3s.");
            printer.printRecords(cells);
        } catch(IOException ex){
            System.out.println("Could not make new file named "+filename);
--- a/src/main/java/CellSample.java
+++ b/src/main/java/CellSample.java
@@ -1,18 +1,51 @@
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
 import java.util.stream.IntStream;
 public class CellSample {
    private List<Integer[]> cells;
    private Integer cdr1Freq;
-    public CellSample(List<Integer[]> cells){
+    public CellSample(Integer numDistinctCells, Integer cdr1Freq){
        this.cdr1Freq = cdr1Freq;
        List<Integer> numbersCDR3 = new ArrayList<>();
        List<Integer> numbersCDR1 = new ArrayList<>();
        Integer numDistCDR3s = 2 * numDistinctCells + 1;
        IntStream.range(1, numDistCDR3s + 1).forEach(i -> numbersCDR3.add(i));
        IntStream.range(numDistCDR3s + 1, numDistCDR3s + 1 + (numDistCDR3s / cdr1Freq) + 1).forEach(i -> numbersCDR1.add(i));
        Collections.shuffle(numbersCDR3);
        Collections.shuffle(numbersCDR1);
        //Each cell represented by 4 values
        //two CDR3s, and two CDR1s. First two values are CDR3s (alpha, beta), second two are CDR1s (alpha, beta)
        List<Integer[]> distinctCells = new ArrayList<>();
        for(int i = 0; i < numbersCDR3.size() - 1; i = i + 2){
            Integer tmpCDR3a = numbersCDR3.get(i);
            Integer tmpCDR3b = numbersCDR3.get(i+1);
            Integer tmpCDR1a = numbersCDR1.get(i % numbersCDR1.size());
            Integer tmpCDR1b = numbersCDR1.get((i+1) % numbersCDR1.size());
            Integer[] tmp = {tmpCDR3a, tmpCDR3b, tmpCDR1a, tmpCDR1b};
            distinctCells.add(tmp);
        }
        this.cells = distinctCells;
    }
    public CellSample(List<Integer[]> cells, Integer cdr1Freq){
        this.cells = cells;
        this.cdr1Freq = cdr1Freq;
    }
    public List<Integer[]> getCells(){
        return cells;
    }
-    public Integer population(){
+    public Integer getCdr1Freq() {
        return cdr1Freq;
    }
    public Integer getCellCount(){
        return cells.size();
    }
--- a/src/main/java/CommandLineInterface.java
+++ b/src/main/java/CommandLineInterface.java
@@ -0,0 +1,427 @@
 import org.apache.commons.cli.*;
 import java.io.IOException;
 import java.util.Arrays;
 import java.util.stream.Stream;
 /*
 * Class for parsing options passed to program from command line
 *
 * Top-level flags:
 * cells : to make a cell sample file
 * plate : to make a sample plate file
 * graph : to make a graph and data file
 * match : to do a cdr3 matching (WITH OR WITHOUT MAKING A RESULTS FILE. May just want to print summary for piping.)
 *
 * Cell flags:
 * count : number of cells to generate
 * diversity factor : factor by which CDR3s are more diverse than CDR1s
 * output : name of the output file
 *
 * Plate flags:
 * cellfile : name of the cell sample file to use as input
 * wells : the number of wells on the plate
 * dist : the statistical distribution to use
 *      (if exponential) lambda : the lambda value of the exponential distribution
 *      (if gaussian) stddev : the standard deviation of the gaussian distribution
 * rand : randomize well populations, take a minimum argument and a maximum argument
 * populations : number of t cells per well per section (number of arguments determines number of sections)
 * dropout : plate dropout rate, double from 0.0 to 1.0
 * output : name of the output file
 *
 * Graph flags:
 * cellfile : name of the cell sample file to use as input
 * platefile : name of the sample plate file to use as input
 * output : name of the output file
 * graphml : output a graphml file
 * binary : output a serialized binary object file
 *
 * Match flags:
 * graphFile : name of graph and data file to use as input
 * min : minimum number of overlap wells to attempt a matching
 * max : the maximum number of overlap wells to attempt a matching
 * maxdiff : (optional) the maximum difference in occupancy to attempt a matching
 * minpercent : (optional) the minimum percent overlap to attempt a matching.
 * writefile : (optional) the filename to write results to
 * output : the values to print to System.out for piping
 *
 */
 public class CommandLineInterface {
    public static void startCLI(String[] args) {
        //Options sets for the different modes
        Options mainOptions = buildMainOptions();
        Options cellOptions = buildCellOptions();
        Options plateOptions = buildPlateOptions();
        Options graphOptions = buildGraphOptions();
        Options matchOptions = buildMatchCDR3options();
        CommandLineParser parser = new DefaultParser();
        try{
            CommandLine line = parser.parse(mainOptions, Arrays.copyOfRange(args, 0, 1));
            if (line.hasOption("help")) {
                HelpFormatter formatter = new HelpFormatter();
                formatter.printHelp("BiGpairSEQ_Sim", mainOptions);
                System.out.println();
                formatter.printHelp("BiGpairSEQ_SIM -cells", cellOptions);
                System.out.println();
                formatter.printHelp("BiGpairSEQ_Sim -plate", plateOptions);
                System.out.println();
                formatter.printHelp("BiGpairSEQ_Sim -graph", graphOptions);
                System.out.println();
                formatter.printHelp("BiGpairSEQ_Sim -match", matchOptions);
            }
            else if (line.hasOption("cells")) {
                line = parser.parse(cellOptions, Arrays.copyOfRange(args, 1, args.length));
                Integer number = Integer.valueOf(line.getOptionValue("n"));
                Integer diversity = Integer.valueOf(line.getOptionValue("d"));
                String filename = line.getOptionValue("o");
                makeCells(filename, number, diversity);
            }
            else if (line.hasOption("plate")) {
                line = parser.parse(plateOptions, Arrays.copyOfRange(args, 1, args.length));
                //get the cells
                String cellFilename = line.getOptionValue("c");
                CellSample cells = getCells(cellFilename);
                //get the rest of the parameters
                Integer[] populations;
                String outputFilename = line.getOptionValue("o");
                Integer numWells = Integer.parseInt(line.getOptionValue("w"));
                Double dropoutRate = Double.parseDouble(line.getOptionValue("err"));
                if (line.hasOption("random")) {
                    //Array holding values of minimum and maximum populations
                   Integer[] min_max = Stream.of(line.getOptionValues("random"))
                           .mapToInt(Integer::parseInt)
                           .boxed()
                           .toArray(Integer[]::new);
                   populations = BiGpairSEQ.getRand().ints(min_max[0], min_max[1] + 1)
                            .limit(numWells)
                            .boxed()
                            .toArray(Integer[]::new);
                }
                else if (line.hasOption("pop")) {
                    populations = Stream.of(line.getOptionValues("pop"))
                            .mapToInt(Integer::parseInt)
                            .boxed()
                            .toArray(Integer[]::new);
                }
                else{
                    populations = new Integer[1];
                    populations[0] = 1;
                }
                //make the plate
                Plate plate;
                if (line.hasOption("poisson")) {
                    Double stdDev = Math.sqrt(numWells);
                    plate = new Plate(cells, cellFilename, numWells, populations, dropoutRate, stdDev, false);
                }
                else if (line.hasOption("gaussian")) {
                    Double stdDev = Double.parseDouble(line.getOptionValue("stddev"));
                    plate = new Plate(cells, cellFilename, numWells, populations, dropoutRate, stdDev, false);
                }
                else {
                    assert line.hasOption("exponential");
                    Double lambda = Double.parseDouble(line.getOptionValue("lambda"));
                    plate = new Plate(cells, cellFilename, numWells, populations, dropoutRate, lambda, true);
                }
                PlateFileWriter writer = new PlateFileWriter(outputFilename, plate);
                writer.writePlateFile();
            }
            else if (line.hasOption("graph")) { //Making a graph
                line = parser.parse(graphOptions, Arrays.copyOfRange(args, 1, args.length));
                String cellFilename = line.getOptionValue("c");
                String plateFilename = line.getOptionValue("p");
                String outputFilename = line.getOptionValue("o");
                //get cells
                CellSample cells = getCells(cellFilename);
                //get plate
                Plate plate = getPlate(plateFilename);
                GraphWithMapData graph = Simulator.makeGraph(cells, plate, false);
                if (!line.hasOption("no-binary")) { //output binary file unless told not to
                    GraphDataObjectWriter writer = new GraphDataObjectWriter(outputFilename, graph, false);
                    writer.writeDataToFile();
                }
                if (line.hasOption("graphml")) { //if told to, output graphml file
                    GraphMLFileWriter gmlwriter = new GraphMLFileWriter(outputFilename, graph);
                    gmlwriter.writeGraphToFile();
                }
            }
            else if (line.hasOption("match")) { //can add a flag for which match type in future, spit this in two
                line = parser.parse(matchOptions, Arrays.copyOfRange(args, 1, args.length));
                String graphFilename = line.getOptionValue("g");
                String outputFilename = line.getOptionValue("o");
                Integer minThreshold = Integer.parseInt(line.getOptionValue("min"));
                Integer maxThreshold = Integer.parseInt(line.getOptionValue("max"));
                Integer minOverlapPct;
                if (line.hasOption("minpct")) { //see if this filter is being used
                    minOverlapPct = Integer.parseInt(line.getOptionValue("minpct"));
                }
                else {
                    minOverlapPct = 0;
                }
                Integer maxOccupancyDiff;
                if (line.hasOption("maxdiff")) { //see if this filter is being used
                    maxOccupancyDiff = Integer.parseInt(line.getOptionValue("maxdiff"));
                }
                else {
                    maxOccupancyDiff = Integer.MAX_VALUE;
                }
                GraphWithMapData graph = getGraph(graphFilename);
                MatchingResult result = Simulator.matchCDR3s(graph, graphFilename, minThreshold, maxThreshold,
                        maxOccupancyDiff, minOverlapPct, false);
                MatchingFileWriter writer = new MatchingFileWriter(outputFilename, result);
                writer.writeResultsToFile();
                //can put a bunch of ifs for outputting various things from the MatchingResult to System.out here
                //after I put those flags in the matchOptions
            }
        }
        catch (ParseException exp) {
            System.err.println("Parsing failed.  Reason: " + exp.getMessage());
        }
    }
    private static Option outputFileOption() {
        Option outputFile = Option.builder("o")
                .longOpt("output-file")
                .hasArg()
                .argName("filename")
                .desc("Name of output file")
                .required()
                .build();
        return outputFile;
    }
    private static Options buildMainOptions() {
        Options mainOptions = new Options();
        Option help = Option.builder("help")
                .desc("Displays this help menu")
                .build();
        Option makeCells = Option.builder("cells")
                .longOpt("make-cells")
                .desc("Makes a cell sample file of distinct T cells")
                .build();
        Option makePlate = Option.builder("plate")
                .longOpt("make-plate")
                .desc("Makes a sample plate file. Requires a cell sample file.")
                .build();
        Option makeGraph = Option.builder("graph")
                .longOpt("make-graph")
                .desc("Makes a graph/data file. Requires a cell sample file and a sample plate file")
                .build();
        Option matchCDR3 = Option.builder("match")
                .longOpt("match-cdr3")
                .desc("Matches CDR3s. Requires a graph/data file.")
                .build();
        OptionGroup mainGroup = new OptionGroup();
        mainGroup.addOption(help);
        mainGroup.addOption(makeCells);
        mainGroup.addOption(makePlate);
        mainGroup.addOption(makeGraph);
        mainGroup.addOption(matchCDR3);
        mainGroup.setRequired(true);
        mainOptions.addOptionGroup(mainGroup);
        return mainOptions;
    }
    private static Options buildCellOptions() {
        Options cellOptions = new Options();
        Option numCells = Option.builder("n")
                .longOpt("num-cells")
                .desc("The number of distinct cells to generate")
                .hasArg()
                .argName("number")
                .required().build();
        Option cdr3Diversity = Option.builder("d")
                .longOpt("diversity-factor")
                .desc("The factor by which unique CDR3s outnumber unique CDR1s")
                .hasArg()
                .argName("factor")
                .required().build();
        cellOptions.addOption(numCells);
        cellOptions.addOption(cdr3Diversity);
        cellOptions.addOption(outputFileOption());
        return cellOptions;
    }
    private static Options buildPlateOptions() {
        Options plateOptions = new Options();
        Option cellFile = Option.builder("c") // add this to plate options
                .longOpt("cell-file")
                .desc("The cell sample file to use")
                .hasArg()
                .argName("filename")
                .required().build();
        Option numWells = Option.builder("w")// add this to plate options
                .longOpt("wells")
                .desc("The number of wells on the sample plate")
                .hasArg()
                .argName("number")
                .required().build();
        //options group for choosing with distribution to use
        OptionGroup distributions = new OptionGroup();// add this to plate options
        distributions.setRequired(true);
        Option poisson = Option.builder("poisson")
                .desc("Use a Poisson distribution for cell sample")
                .build();
        Option gaussian = Option.builder("gaussian")
                .desc("Use a Gaussian distribution for cell sample")
                .build();
        Option exponential = Option.builder("exponential")
                .desc("Use an exponential distribution for cell sample")
                .build();
        distributions.addOption(poisson);
        distributions.addOption(gaussian);
        distributions.addOption(exponential);
        //options group for statistical distribution parameters
        OptionGroup statParams = new OptionGroup();// add this to plate options
        Option stdDev = Option.builder("stddev")
                .desc("If using -gaussian flag, standard deviation for distrbution")
                .hasArg()
                .argName("value")
                .build();
        Option lambda = Option.builder("lambda")
                .desc("If using -exponential flag, lambda value for distribution")
                .hasArg()
                .argName("value")
                .build();
        statParams.addOption(stdDev);
        statParams.addOption(lambda);
        //Option group for random plate or set populations
        OptionGroup wellPopOptions = new OptionGroup(); // add this to plate options
        wellPopOptions.setRequired(true);
        Option randomWellPopulations = Option.builder("random")
                .desc("Randomize well populations on sample plate. Takes two arguments: the minimum possible population and the maximum possible population.")
                .hasArgs()
                .numberOfArgs(2)
                .argName("minimum maximum")
                .build();
        Option specificWellPopulations = Option.builder("pop")
                .desc("The well populations for each section of the sample plate. There will be as many sections as there are populations given.")
                .hasArgs()
                .argName("number [number]...")
                .build();
        Option dropoutRate = Option.builder("err") //add this to plate options
                .hasArg()
                .desc("The sequence dropout rate due to amplification error. (0.0 - 1.0)")
                .argName("rate")
                .required()
                .build();
        wellPopOptions.addOption(randomWellPopulations);
        wellPopOptions.addOption(specificWellPopulations);
        plateOptions.addOption(cellFile);
        plateOptions.addOption(numWells);
        plateOptions.addOptionGroup(distributions);
        plateOptions.addOptionGroup(statParams);
        plateOptions.addOptionGroup(wellPopOptions);
        plateOptions.addOption(dropoutRate);
        plateOptions.addOption(outputFileOption());
        return plateOptions;
    }
    private static Options buildGraphOptions() {
        Options graphOptions = new Options();
        Option cellFilename = Option.builder("c")
                .longOpt("cell-file")
                .desc("Cell sample file to use for checking accuracy")
                .hasArg()
                .argName("filename")
                .required().build();
        Option plateFilename = Option.builder("p")
                .longOpt("plate-filename")
                .desc("Sample plate file (made from given cell sample file) to construct graph from")
                .hasArg()
                .argName("filename")
                .required().build();
        Option outputGraphML = Option.builder("graphml")
                .desc("Output GraphML file")
                .build();
        Option outputSerializedBinary = Option.builder("nb")
                .longOpt("no-binary")
                .desc("Don't output serialized binary file")
                .build();
        graphOptions.addOption(cellFilename);
        graphOptions.addOption(plateFilename);
        graphOptions.addOption(outputFileOption());
        graphOptions.addOption(outputGraphML);
        graphOptions.addOption(outputSerializedBinary);
        return graphOptions;
    }
    private static Options buildMatchCDR3options() {
        Options matchCDR3options = new Options();
        Option graphFilename = Option.builder("g")
                .longOpt("graph-file")
                .desc("The graph/data file to use")
                .hasArg()
                .argName("filename")
                .required().build();
        Option minOccupancyOverlap = Option.builder("min")
                .desc("The minimum number of shared wells to attempt to match a sequence pair")
                .hasArg()
                .argName("number")
                .required().build();
        Option maxOccupancyOverlap = Option.builder("max")
                .desc("The maximum number of shared wells to attempt to match a sequence pair")
                .hasArg()
                .argName("number")
                .required().build();
        Option minOverlapPercent = Option.builder("minpct")
                .desc("(Optional) The minimum percentage of a sequence's total occupancy shared by another sequence to attempt matching. (0 - 100) ")
                .hasArg()
                .argName("percent")
                .build();
        Option maxOccupancyDifference = Option.builder("maxdiff")
                .desc("(Optional) The maximum difference in total occupancy between two sequences to attempt matching.")
                .hasArg()
                .argName("number")
                .build();
        matchCDR3options.addOption(graphFilename);
        matchCDR3options.addOption(minOccupancyOverlap);
        matchCDR3options.addOption(maxOccupancyOverlap);
        matchCDR3options.addOption(minOverlapPercent);
        matchCDR3options.addOption(maxOccupancyDifference);
        matchCDR3options.addOption(outputFileOption());
        //options for output to System.out
       //Option printPairingErrorRate = Option.builder()
        return matchCDR3options;
    }
    private static CellSample getCells(String cellFilename) {
        assert cellFilename != null;
        CellFileReader reader = new CellFileReader(cellFilename);
        return reader.getCellSample();
    }
    private static Plate getPlate(String plateFilename) {
        assert plateFilename != null;
        PlateFileReader reader = new PlateFileReader(plateFilename);
        return reader.getSamplePlate();
    }
    private static GraphWithMapData getGraph(String graphFilename) {
        assert graphFilename != null;
        try{
            GraphDataObjectReader reader = new GraphDataObjectReader(graphFilename, false);
            return reader.getData();
        }
        catch (IOException ex) {
            ex.printStackTrace();
            return null;
        }
    }
    //for calling from command line
    public static void makeCells(String filename, Integer numCells, Integer cdr1Freq) {
        CellSample sample = new CellSample(numCells, cdr1Freq);
        CellFileWriter writer = new CellFileWriter(filename, sample);
        writer.writeCellsToFile();
    }
 }
--- a/src/main/java/Equations.java
+++ b/src/main/java/Equations.java
@@ -4,6 +4,9 @@ import java.math.MathContext;
 public abstract class Equations {
    //pValue calculation as described in original pairSEQ paper.
    //Included for comparison with original results.
    //Not used by BiGpairSEQ for matching.
    public static double pValue(Integer w, Integer w_a, Integer w_b, double w_ab_d) {
        int w_ab = (int) w_ab_d;
        double pv = 0.0;
@@ -14,6 +17,9 @@ public abstract class Equations {
        return pv;
    }
    //Implementation of the (corrected) probability equation from pairSEQ paper.
    //Included for comparison with original results.
    //Not used by BiGpairSEQ for matching.
    private static double probPairedByChance(Integer w, Integer w_a, Integer w_b, Integer w_ab){
        BigInteger numer1 = choose(w, w_ab);
        BigInteger numer2 = choose(w - w_ab, w_a - w_ab);
@@ -26,10 +32,9 @@ public abstract class Equations {
        return prob.doubleValue();
    }
-    /*
+
-     * This works because nC(k+1) = nCk * (n-k)/(k+1)
+     //This works because nC(k+1) = nCk * (n-k)/(k+1)
-     * Since nC0 = 1, can start there and generate all the rest.
+     //Since nC0 = 1, can start there and generate all the rest.
     */
     public static BigInteger choose(final int N, final int K) {
         BigInteger nCk = BigInteger.ONE;
         for (int k = 0; k < K; k++) {
--- a/src/main/java/GraphDataObjectReader.java
+++ b/src/main/java/GraphDataObjectReader.java
@@ -0,0 +1,33 @@
 import java.io.*;
 public class GraphDataObjectReader {
    private GraphWithMapData data;
    private String filename;
    private boolean verbose = true;
    public GraphDataObjectReader(String filename, boolean verbose) throws IOException {
        if(!filename.matches(".*\\.ser")){
            filename = filename + ".ser";
        }
        this.filename = filename;
        try(//don't need to close these because of try-with-resources
            BufferedInputStream fileIn = new BufferedInputStream(new FileInputStream(filename));
                ObjectInputStream in = new ObjectInputStream(fileIn))
        {
            System.out.println("Reading graph data from file. This may take some time");
            System.out.println("File I/O time is not included in results");
            data = (GraphWithMapData) in.readObject();
        } catch (FileNotFoundException | ClassNotFoundException ex) {
            ex.printStackTrace();
        }
    }
    public GraphWithMapData getData() {
        return data;
    }
    public String getFilename() {
        return filename;
    }
 }
--- a/src/main/java/GraphDataObjectWriter.java
+++ b/src/main/java/GraphDataObjectWriter.java
@@ -0,0 +1,45 @@
 import org.jgrapht.Graph;
 import java.io.BufferedOutputStream;
 import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.ObjectOutputStream;
 public class GraphDataObjectWriter {
    private GraphWithMapData data;
    private String filename;
    private boolean verbose = true;
    public GraphDataObjectWriter(String filename, GraphWithMapData data) {
        if(!filename.matches(".*\\.ser")){
            filename = filename + ".ser";
        }
        this.filename = filename;
        this.data = data;
    }
    public GraphDataObjectWriter(String filename, GraphWithMapData data, boolean verbose) {
        this.verbose = verbose;
        if(!filename.matches(".*\\.ser")){
            filename = filename + ".ser";
        }
        this.filename = filename;
        this.data = data;
    }
    public void writeDataToFile() {
        try (BufferedOutputStream bufferedOut = new BufferedOutputStream(new FileOutputStream(filename));
             ObjectOutputStream out = new ObjectOutputStream(bufferedOut);
        ){
            if(verbose) {
                System.out.println("Writing graph and occupancy data to file. This may take some time.");
                System.out.println("File I/O time is not included in results.");
            }
            out.writeObject(data);
        } catch (IOException ex) {
            ex.printStackTrace();
        }
    }
 }
--- a/src/main/java/GraphMLFileWriter.java
+++ b/src/main/java/GraphMLFileWriter.java
@@ -0,0 +1,84 @@
 import org.jgrapht.graph.DefaultWeightedEdge;
 import org.jgrapht.graph.SimpleWeightedGraph;
 import org.jgrapht.nio.Attribute;
 import org.jgrapht.nio.AttributeType;
 import org.jgrapht.nio.DefaultAttribute;
 import org.jgrapht.nio.dot.DOTExporter;
 import org.jgrapht.nio.graphml.GraphMLExporter;
 import java.io.BufferedWriter;
 import java.io.IOException;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.nio.file.StandardOpenOption;
 import java.util.HashMap;
 import java.util.LinkedHashMap;
 import java.util.Map;
 public class GraphMLFileWriter {
    String filename;
    GraphWithMapData data;
    public GraphMLFileWriter(String filename, GraphWithMapData data) {
        if(!filename.matches(".*\\.graphml")){
            filename = filename + ".graphml";
        }
        this.filename = filename;
        this.data = data;
    }
 //    public void writeGraphToFile() {
 //        try(BufferedWriter writer = Files.newBufferedWriter(Path.of(filename), StandardOpenOption.CREATE_NEW);
 //        ){
 //            GraphMLExporter<SimpleWeightedGraph, BufferedWriter> exporter = new GraphMLExporter<>();
 //            exporter.exportGraph(graph, writer);
 //        } catch(IOException ex){
 //            System.out.println("Could not make new file named "+filename);
 //            System.err.println(ex);
 //        }
 //    }
    public void writeGraphToFile() {
        SimpleWeightedGraph graph = data.getGraph();
        Map<Integer, Integer> vertexToAlphaMap = data.getPlateVtoAMap();
        Map<Integer, Integer> vertexToBetaMap = data.getPlateVtoBMap();
        Map<Integer, Integer> alphaOccs = data.getAlphaWellCounts();
        Map<Integer, Integer> betaOccs = data.getBetaWellCounts();
        try(BufferedWriter writer = Files.newBufferedWriter(Path.of(filename), StandardOpenOption.CREATE_NEW);
        ){
            //create exporter. Let the vertex labels be the unique ids for the vertices
            GraphMLExporter<Integer, SimpleWeightedGraph<Vertex, DefaultWeightedEdge>> exporter = new GraphMLExporter<>(v -> v.toString());
            //set to export weights
            exporter.setExportEdgeWeights(true);
            //set type, sequence, and occupancy attributes for each vertex
            exporter.setVertexAttributeProvider( v -> {
                Map<String, Attribute> attributes = new HashMap<>();
                if(vertexToAlphaMap.containsKey(v)) {
                    attributes.put("type", DefaultAttribute.createAttribute("CDR3 Alpha"));
                    attributes.put("sequence", DefaultAttribute.createAttribute(vertexToAlphaMap.get(v)));
                    attributes.put("occupancy", DefaultAttribute.createAttribute(
                            alphaOccs.get(vertexToAlphaMap.get(v))));
                }
                else if(vertexToBetaMap.containsKey(v)) {
                    attributes.put("type", DefaultAttribute.createAttribute("CDR3 Beta"));
                    attributes.put("sequence", DefaultAttribute.createAttribute(vertexToBetaMap.get(v)));
                    attributes.put("occupancy", DefaultAttribute.createAttribute(
                            betaOccs.get(vertexToBetaMap.get(v))));
                }
                return attributes;
            });
            //register the attributes
            exporter.registerAttribute("type", GraphMLExporter.AttributeCategory.NODE, AttributeType.STRING);
            exporter.registerAttribute("sequence", GraphMLExporter.AttributeCategory.NODE, AttributeType.STRING);
            exporter.registerAttribute("occupancy", GraphMLExporter.AttributeCategory.NODE, AttributeType.STRING);
            //export the graph
            exporter.exportGraph(graph, writer);
        } catch(IOException ex){
            System.out.println("Could not make new file named "+filename);
            System.err.println(ex);
        }
    }
 }
--- a/src/main/java/GraphModificationFunctions.java
+++ b/src/main/java/GraphModificationFunctions.java
@@ -0,0 +1,111 @@
 import org.jgrapht.graph.DefaultWeightedEdge;
 import org.jgrapht.graph.SimpleWeightedGraph;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Map;
 public interface GraphModificationFunctions {
    //remove over- and under-weight edges
    static List<Integer[]> filterByOverlapThresholds(SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph,
                                                            int low, int high, boolean saveEdges) {
        List<Integer[]> removedEdges = new ArrayList<>();
        for (DefaultWeightedEdge e : graph.edgeSet()) {
            if ((graph.getEdgeWeight(e) > high) || (graph.getEdgeWeight(e) < low)) {
                if(saveEdges) {
                    Integer source = graph.getEdgeSource(e);
                    Integer target = graph.getEdgeTarget(e);
                    Integer weight = (int) graph.getEdgeWeight(e);
                    Integer[] edge = {source, target, weight};
                    removedEdges.add(edge);
                }
                else {
                    graph.setEdgeWeight(e, 0.0);
                }
            }
        }
        if(saveEdges) {
            for (Integer[] edge : removedEdges) {
                graph.removeEdge(edge[0], edge[1]);
            }
        }
        return removedEdges;
    }
    //Remove edges for pairs with large occupancy discrepancy
    static List<Integer[]> filterByRelativeOccupancy(SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph,
                                                  Map<Integer, Integer> alphaWellCounts,
                                                  Map<Integer, Integer> betaWellCounts,
                                                  Map<Integer, Integer> plateVtoAMap,
                                                  Map<Integer, Integer> plateVtoBMap,
                                                  Integer maxOccupancyDifference, boolean saveEdges) {
        List<Integer[]> removedEdges = new ArrayList<>();
        for (DefaultWeightedEdge e : graph.edgeSet()) {
            Integer alphaOcc = alphaWellCounts.get(plateVtoAMap.get(graph.getEdgeSource(e)));
            Integer betaOcc = betaWellCounts.get(plateVtoBMap.get(graph.getEdgeTarget(e)));
            if (Math.abs(alphaOcc - betaOcc) >= maxOccupancyDifference) {
                if (saveEdges) {
                    Integer source = graph.getEdgeSource(e);
                    Integer target = graph.getEdgeTarget(e);
                    Integer weight = (int) graph.getEdgeWeight(e);
                    Integer[] edge = {source, target, weight};
                    removedEdges.add(edge);
                }
                else {
                    graph.setEdgeWeight(e, 0.0);
                }
            }
        }
        if(saveEdges) {
            for (Integer[] edge : removedEdges) {
                graph.removeEdge(edge[0], edge[1]);
            }
        }
        return removedEdges;
    }
    //Remove edges for pairs where overlap size is significantly lower than the well occupancy
    static List<Integer[]> filterByOverlapPercent(SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph,
                                                         Map<Integer, Integer> alphaWellCounts,
                                                         Map<Integer, Integer> betaWellCounts,
                                                         Map<Integer, Integer> plateVtoAMap,
                                                         Map<Integer, Integer> plateVtoBMap,
                                                         Integer minOverlapPercent,
                                                         boolean saveEdges) {
        List<Integer[]> removedEdges = new ArrayList<>();
        for (DefaultWeightedEdge e : graph.edgeSet()) {
            Integer alphaOcc = alphaWellCounts.get(plateVtoAMap.get(graph.getEdgeSource(e)));
            Integer betaOcc = betaWellCounts.get(plateVtoBMap.get(graph.getEdgeTarget(e)));
            double weight = graph.getEdgeWeight(e);
            double min = minOverlapPercent / 100.0;
            if ((weight / alphaOcc < min) || (weight / betaOcc < min)) {
                if(saveEdges) {
                    Integer source = graph.getEdgeSource(e);
                    Integer target = graph.getEdgeTarget(e);
                    Integer intWeight = (int) graph.getEdgeWeight(e);
                    Integer[] edge = {source, target, intWeight};
                    removedEdges.add(edge);
                }
                else {
                    graph.setEdgeWeight(e, 0.0);
                }
            }
        }
        if(saveEdges) {
            for (Integer[] edge : removedEdges) {
                graph.removeEdge(edge[0], edge[1]);
            }
        }
        return removedEdges;
    }
    static void addRemovedEdges(SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph,
                                       List<Integer[]> removedEdges) {
        for (Integer[] edge : removedEdges) {
            DefaultWeightedEdge e = graph.addEdge(edge[0], edge[1]);
            graph.setEdgeWeight(e, (double) edge[2]);
        }
    }
 }
--- a/src/main/java/GraphWithMapData.java
+++ b/src/main/java/GraphWithMapData.java
@@ -0,0 +1,106 @@
 import org.jgrapht.graph.SimpleWeightedGraph;
 import java.time.Duration;
 import java.util.Map;
 //Can't just write the graph, because I need the occupancy data too.
 //Makes most sense to serialize object and write that to a file.
 //Which means there's no reason to split map data and graph data up.
 public class GraphWithMapData implements java.io.Serializable {
    private String sourceFilename;
    private final SimpleWeightedGraph graph;
    private Integer numWells;
    private Integer[] wellPopulations;
    private Integer alphaCount;
    private Integer betaCount;
    private final Map<Integer, Integer> distCellsMapAlphaKey;
    private final Map<Integer, Integer> plateVtoAMap;
    private final Map<Integer, Integer> plateVtoBMap;
    private final Map<Integer, Integer> plateAtoVMap;
    private final Map<Integer, Integer> plateBtoVMap;
    private final Map<Integer, Integer> alphaWellCounts;
    private final Map<Integer, Integer> betaWellCounts;
    private final Duration time;
    public GraphWithMapData(SimpleWeightedGraph graph, Integer numWells, Integer[] wellConcentrations,
                            Integer alphaCount, Integer betaCount,
                            Map<Integer, Integer> distCellsMapAlphaKey, Map<Integer, Integer> plateVtoAMap,
                            Map<Integer,Integer> plateVtoBMap, Map<Integer, Integer> plateAtoVMap,
                            Map<Integer, Integer> plateBtoVMap, Map<Integer, Integer> alphaWellCounts,
                            Map<Integer, Integer> betaWellCounts, Duration time) {
        this.graph = graph;
        this.numWells = numWells;
        this.wellPopulations = wellConcentrations;
        this.alphaCount = alphaCount;
        this.betaCount = betaCount;
        this.distCellsMapAlphaKey = distCellsMapAlphaKey;
        this.plateVtoAMap = plateVtoAMap;
        this.plateVtoBMap = plateVtoBMap;
        this.plateAtoVMap = plateAtoVMap;
        this.plateBtoVMap = plateBtoVMap;
        this.alphaWellCounts = alphaWellCounts;
        this.betaWellCounts = betaWellCounts;
        this.time = time;
    }
    public SimpleWeightedGraph getGraph() {
        return graph;
    }
    public Integer getNumWells() {
        return numWells;
    }
    public Integer[] getWellPopulations() {
        return wellPopulations;
    }
    public Integer getAlphaCount() {
        return alphaCount;
    }
    public Integer getBetaCount() {
        return betaCount;
    }
    public Map<Integer, Integer> getDistCellsMapAlphaKey() {
        return distCellsMapAlphaKey;
    }
    public Map<Integer, Integer> getPlateVtoAMap() {
        return plateVtoAMap;
    }
    public Map<Integer, Integer> getPlateVtoBMap() {
        return plateVtoBMap;
    }
    public Map<Integer, Integer> getPlateAtoVMap() {
        return plateAtoVMap;
    }
    public Map<Integer, Integer> getPlateBtoVMap() {
        return plateBtoVMap;
    }
    public Map<Integer, Integer> getAlphaWellCounts() {
        return alphaWellCounts;
    }
    public Map<Integer, Integer> getBetaWellCounts() {
        return betaWellCounts;
    }
    public Duration getTime() {
        return time;
    }
    public void setSourceFilename(String filename) {
        this.sourceFilename = filename;
    }
    public String getSourceFilename() {
        return sourceFilename;
    }
 }
--- a/src/main/java/InteractiveInterface.java
+++ b/src/main/java/InteractiveInterface.java
@@ -0,0 +1,586 @@
 import java.io.IOException;
 import java.util.*;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 //
 public class InteractiveInterface {
    private static final Random rand = BiGpairSEQ.getRand();
    private static final Scanner sc = new Scanner(System.in);
    private static int input;
    private static boolean quit = false;
    public static void startInteractive() {
            while (!quit) {
                System.out.println();
                System.out.println("--------BiGPairSEQ SIMULATOR--------");
                System.out.println("ALPHA/BETA T CELL RECEPTOR MATCHING");
                System.out.println("  USING WEIGHTED BIPARTITE GRAPHS  ");
                System.out.println("------------------------------------");
                System.out.println("Please select an option:");
                System.out.println("1) Generate a population of distinct cells");
                System.out.println("2) Generate a sample plate of T cells");
                System.out.println("3) Generate CDR3 alpha/beta occupancy data and overlap graph");
                System.out.println("4) Simulate bipartite graph CDR3 alpha/beta matching (BiGpairSEQ)");
                //Need to re-do the CDR3/CDR1 matching to correspond to new pattern
                //System.out.println("5) Generate CDR3/CDR1 occupancy graph");
                //System.out.println("6) Simulate CDR3/CDR1 T cell matching");
                System.out.println("8) Options");
                System.out.println("9) About/Acknowledgments");
                System.out.println("0) Exit");
                try {
                    input = sc.nextInt();
                    switch (input) {
                        case 1 -> makeCells();
                        case 2 -> makePlate();
                        case 3 -> makeCDR3Graph();
                        case 4 -> matchCDR3s();
                        //case 6 -> matchCellsCDR1();
                        case 8 -> mainOptions();
                        case 9 -> acknowledge();
                        case 0 -> quit = true;
                        default -> System.out.println("Invalid input.");
                    }
                } catch (InputMismatchException | IOException ex) {
                    System.out.println(ex);
                    sc.next();
                }
            }
            sc.close();
    }
    private static void makeCells() {
        String filename = null;
        Integer numCells = 0;
        Integer cdr1Freq = 1;
        try {
            System.out.println("\nSimulated T-Cells consist of integer values representing:\n" +
                    "* a pair of alpha and beta CDR3 peptides (unique within simulated population)\n" +
                    "* a pair of alpha and beta CDR1 peptides (not necessarily unique).");
            System.out.println("\nThe cells will be written to a CSV file.");
            System.out.print("Please enter a file name: ");
            filename = sc.next();
            System.out.println("\nCDR3 sequences are more diverse than CDR1 sequences.");
            System.out.println("Please enter the factor by which distinct CDR3s outnumber CDR1s: ");
            cdr1Freq = sc.nextInt();
            System.out.print("\nPlease enter the number of T-cells to generate: ");
            numCells = sc.nextInt();
            if(numCells <= 0){
                throw new InputMismatchException("Number of cells must be a positive integer.");
            }
        } catch (InputMismatchException ex) {
            System.out.println(ex);
            sc.next();
        }
        CellSample sample = new CellSample(numCells, cdr1Freq);
        assert filename != null;
        System.out.println("Writing cells to file");
        CellFileWriter writer = new CellFileWriter(filename, sample);
        writer.writeCellsToFile();
        System.out.println("Cell sample written to: " + filename);
        if(BiGpairSEQ.cacheCells()) {
            BiGpairSEQ.setCellSampleInMemory(sample, filename);
        }
    }
    //Output a CSV of sample plate
    private static void makePlate() {
        String cellFile = null;
        String filename = null;
        Double stdDev = 0.0;
        Integer numWells = 0;
        Integer numSections;
        Integer[] populations = {1};
        Double dropOutRate = 0.0;
        boolean poisson = false;
        boolean exponential = false;
        double lambda = 1.5;
        try {
            System.out.println("\nSimulated sample plates consist of:");
            System.out.println("* a number of wells");
            System.out.println("  * separated into one or more sections");
            System.out.println("    * each of which has a set quantity of cells per well");
            System.out.println("    * selected from a statistical distribution of distinct cells");
            System.out.println("    * with a set dropout rate for individual sequences within a cell");
            System.out.println("\nMaking a sample plate requires a population of distinct cells");
            System.out.print("Please enter name of an existing cell sample file: ");
            cellFile = sc.next();
            System.out.println("\nThe sample plate will be written to a CSV file");
            System.out.print("Please enter a name for the output file: ");
            filename = sc.next();
            System.out.println("\nSelect T-cell frequency distribution function");
            System.out.println("1) Poisson");
            System.out.println("2) Gaussian");
            System.out.println("3) Exponential");
            System.out.println("(Note: approximate distribution in original paper is exponential, lambda = 0.6)");
            System.out.println("(lambda value approximated from slope of log-log graph in figure 4c)");
            System.out.println("(Note: wider distributions are more memory intensive to match)");
            System.out.print("Enter selection value: ");
            input = sc.nextInt();
            switch (input) {
                case 1 -> poisson = true;
                case 2 -> {
                    System.out.println("How many distinct T-cells within one standard deviation of peak frequency?");
                    System.out.println("(Note: wider distributions are more memory intensive to match)");
                    stdDev = sc.nextDouble();
                    if (stdDev <= 0.0) {
                        throw new InputMismatchException("Value must be positive.");
                    }
                }
                case 3 -> {
                    exponential = true;
                    System.out.print("Please enter lambda value for exponential distribution: ");
                    lambda = sc.nextDouble();
                    if (lambda <= 0.0) {
                        lambda = 0.6;
                        System.out.println("Value must be positive. Defaulting to 0.6.");
                    }
                }
                default -> {
                    System.out.println("Invalid input. Defaulting to exponential.");
                    exponential = true;
                }
            }
            System.out.print("\nNumber of wells on plate: ");
            numWells = sc.nextInt();
            if(numWells < 1){
                throw new InputMismatchException("No wells on plate");
            }
            //choose whether to make T cell population/well random
            boolean randomWellPopulations;
            System.out.println("Randomize number of T cells in each well? (y/n)");
            String ans = sc.next();
            Pattern pattern = Pattern.compile("(?:yes|y)", Pattern.CASE_INSENSITIVE);
            Matcher matcher = pattern.matcher(ans);
            if(matcher.matches()){
                randomWellPopulations = true;
            }
            else{
                randomWellPopulations = false;
            }
            if(randomWellPopulations) { //if T cell population/well is random
                numSections = numWells;
                Integer minPop;
                Integer maxPop;
                System.out.print("Please enter minimum number of T cells in a well: ");
                minPop = sc.nextInt();
                if(minPop < 1) {
                    throw new InputMismatchException("Minimum well population must be positive");
                }
                System.out.println("Please enter maximum number of T cells in a well: ");
                maxPop = sc.nextInt();
                if(maxPop < minPop) {
                    throw new InputMismatchException("Max well population must be greater than min well population");
                }
                //maximum should be inclusive, so need to add one to max of randomly generated values
                populations = rand.ints(minPop, maxPop + 1)
                        .limit(numSections)
                        .boxed()
                        .toArray(Integer[]::new);
                System.out.print("Populations: ");
                System.out.println(Arrays.toString(populations));
            }
            else{ //if T cell population/well is not random
                System.out.println("\nThe plate can be evenly sectioned to allow different numbers of T cells per well.");
                System.out.println("How many sections would you like to make (minimum 1)?");
                numSections = sc.nextInt();
                if (numSections < 1) {
                    throw new InputMismatchException("Too few sections.");
                } else if (numSections > numWells) {
                    throw new InputMismatchException("Cannot have more sections than wells.");
                }
                int i = 1;
                populations = new Integer[numSections];
                while (numSections > 0) {
                    System.out.print("Enter number of T cells per well in section " + i + ": ");
                    populations[i - 1] = sc.nextInt();
                    i++;
                    numSections--;
                }
            }
            System.out.println("\nErrors in amplification can induce a well dropout rate for sequences");
            System.out.print("Enter well dropout rate (0.0 to 1.0): ");
            dropOutRate = sc.nextDouble();
            if(dropOutRate < 0.0 || dropOutRate > 1.0) {
                throw new InputMismatchException("The well dropout rate must be in the range [0.0, 1.0]");
            }
        }catch(InputMismatchException ex){
            System.out.println(ex);
            sc.next();
        }
        assert cellFile != null;
        CellSample cells;
        if (cellFile.equals(BiGpairSEQ.getCellFilename())){
            cells = BiGpairSEQ.getCellSampleInMemory();
        }
        else {
            System.out.println("Reading Cell Sample file: " + cellFile);
            CellFileReader cellReader = new CellFileReader(cellFile);
            cells = cellReader.getCellSample();
            if(BiGpairSEQ.cacheCells()) {
                BiGpairSEQ.setCellSampleInMemory(cells, cellFile);
            }
        }
        assert filename != null;
        Plate samplePlate;
        PlateFileWriter writer;
        if(exponential){
            samplePlate = new Plate(cells, cellFile, numWells, populations, dropOutRate, lambda, true);
            writer = new PlateFileWriter(filename, samplePlate);
        }
        else {
            if (poisson) {
                stdDev = Math.sqrt(cells.getCellCount()); //gaussian with square root of elements approximates poisson
            }
            samplePlate = new Plate(cells, cellFile, numWells, populations, dropOutRate, stdDev, false);
            writer = new PlateFileWriter(filename, samplePlate);
        }
        System.out.println("Writing Sample Plate to file");
        writer.writePlateFile();
        System.out.println("Sample Plate written to file: " + filename);
        if(BiGpairSEQ.cachePlate()) {
            BiGpairSEQ.setPlateInMemory(samplePlate, filename);
        }
    }
    //Output serialized binary of GraphAndMapData object
    private static void makeCDR3Graph() {
        String filename = null;
        String cellFile = null;
        String plateFile = null;
        try {
            String str = "\nGenerating bipartite weighted graph encoding occupancy overlap data ";
            str = str.concat("\nrequires a cell sample file and a sample plate file.");
            System.out.println(str);
            System.out.print("\nPlease enter name of an existing cell sample file: ");
            cellFile = sc.next();
            System.out.print("\nPlease enter name of an existing sample plate file: ");
            plateFile = sc.next();
            System.out.println("\nThe graph and occupancy data will be written to a serialized binary file.");
            System.out.print("Please enter a name for the output file: ");
            filename = sc.next();
        } catch (InputMismatchException ex) {
            System.out.println(ex);
            sc.next();
        }
        assert cellFile != null;
        CellSample cellSample;
        //check if cells are already in memory
        if(cellFile.equals(BiGpairSEQ.getCellFilename()) && BiGpairSEQ.getCellSampleInMemory() != null) {
            cellSample = BiGpairSEQ.getCellSampleInMemory();
        }
        else {
            System.out.println("Reading Cell Sample file: " + cellFile);
            CellFileReader cellReader = new CellFileReader(cellFile);
            cellSample = cellReader.getCellSample();
            if(BiGpairSEQ.cacheCells()) {
                BiGpairSEQ.setCellSampleInMemory(cellSample, cellFile);
            }
        }
        assert plateFile != null;
        Plate plate;
        //check if plate is already in memory
        if(plateFile.equals(BiGpairSEQ.getPlateFilename())){
            plate = BiGpairSEQ.getPlateInMemory();
        }
        else {
            System.out.println("Reading Sample Plate file: " + plateFile);
            PlateFileReader plateReader = new PlateFileReader(plateFile);
            plate = plateReader.getSamplePlate();
            if(BiGpairSEQ.cachePlate()) {
                BiGpairSEQ.setPlateInMemory(plate, plateFile);
            }
        }
        if (cellSample.getCells().size() == 0){
            System.out.println("No cell sample found.");
            System.out.println("Returning to main menu.");
        }
        else if(plate.getWells().size() == 0 || plate.getPopulations().length == 0){
            System.out.println("No sample plate found.");
            System.out.println("Returning to main menu.");
        }
        else{
            GraphWithMapData data = Simulator.makeGraph(cellSample, plate, true);
            assert filename != null;
            if(BiGpairSEQ.outputBinary()) {
                GraphDataObjectWriter dataWriter = new GraphDataObjectWriter(filename, data);
                dataWriter.writeDataToFile();
                System.out.println("Serialized binary graph/data file written to: " + filename);
            }
            if(BiGpairSEQ.outputGraphML()) {
                GraphMLFileWriter graphMLWriter = new GraphMLFileWriter(filename, data);
                graphMLWriter.writeGraphToFile();
                System.out.println("GraphML file written to: " + filename);
            }
            if(BiGpairSEQ.cacheGraph()) {
                BiGpairSEQ.setGraphInMemory(data, filename);
            }
        }
    }
    //Simulate matching and output CSV file of results
    private static void matchCDR3s() throws IOException {
        String filename = null;
        String graphFilename = null;
        Integer lowThreshold = 0;
        Integer highThreshold = Integer.MAX_VALUE;
        Integer maxOccupancyDiff = Integer.MAX_VALUE;
        Integer minOverlapPercent = 0;
        try {
            System.out.println("\nBiGpairSEQ simulation requires an occupancy data and overlap graph file");
            System.out.println("Please enter name of an existing graph and occupancy data file: ");
            graphFilename = sc.next();
            System.out.println("The matching results will be written to a file.");
            System.out.print("Please enter a name for the output file: ");
            filename = sc.next();
            System.out.println("\nWhat is the minimum number of CDR3 alpha/beta overlap wells to attempt matching?");
            lowThreshold = sc.nextInt();
            if(lowThreshold < 1){
                lowThreshold = 1;
                System.out.println("Value for low occupancy overlap threshold must be positive");
                System.out.println("Value for low occupancy overlap threshold set to 1");
            }
            System.out.println("\nWhat is the maximum number of CDR3 alpha/beta overlap wells to attempt matching?");
            highThreshold = sc.nextInt();
            if(highThreshold < lowThreshold) {
                highThreshold = lowThreshold;
                System.out.println("Value for high occupancy overlap threshold must be >= low overlap threshold");
                System.out.println("Value for high occupancy overlap threshold set to " + lowThreshold);
            }
            System.out.println("What is the minimum percentage of a sequence's wells in alpha/beta overlap to attempt matching? (0 - 100)");
            minOverlapPercent = sc.nextInt();
            if (minOverlapPercent < 0 || minOverlapPercent > 100) {
                System.out.println("Value outside range. Minimum occupancy overlap percentage set to 0");
            }
            System.out.println("\nWhat is the maximum difference in alpha/beta occupancy to attempt matching?");
            maxOccupancyDiff = sc.nextInt();
            if (maxOccupancyDiff < 0) {
                maxOccupancyDiff = 0;
                System.out.println("Maximum allowable difference in alpha/beta occupancy must be nonnegative");
                System.out.println("Maximum allowable difference in alpha/beta occupancy set to 0");
            }
        } catch (InputMismatchException ex) {
            System.out.println(ex);
            sc.next();
        }
        assert graphFilename != null;
        //check if this is the same graph we already have in memory.
        GraphWithMapData data;
        if(graphFilename.equals(BiGpairSEQ.getGraphFilename())) {
            data = BiGpairSEQ.getGraphInMemory();
        }
        else {
            GraphDataObjectReader dataReader = new GraphDataObjectReader(graphFilename, true);
            data = dataReader.getData();
            if(BiGpairSEQ.cacheGraph()) {
                BiGpairSEQ.setGraphInMemory(data, graphFilename);
            }
        }
        //simulate matching
        MatchingResult results = Simulator.matchCDR3s(data, graphFilename, lowThreshold, highThreshold, maxOccupancyDiff,
                minOverlapPercent, true);
        //write results to file
        assert filename != null;
        MatchingFileWriter writer = new MatchingFileWriter(filename, results);
        System.out.println("Writing results to file");
        writer.writeResultsToFile();
        System.out.println("Results written to file: " + filename);
    }
    ///////
    //Rewrite this to fit new matchCDR3 method with file I/O
    ///////
 //    public static void matchCellsCDR1(){
 //    /*
 //    The idea here is that we'll get the CDR3 alpha/beta matches first. Then we'll try to match CDR3s to CDR1s by
 //    looking at the top two matches for each CDR3. If CDR3s in the same cell simply swap CDR1s, we assume a correct
 //    match
 //     */
 //        String filename = null;
 //        String preliminaryResultsFilename = null;
 //        String cellFile = null;
 //        String plateFile = null;
 //        Integer lowThresholdCDR3 = 0;
 //        Integer highThresholdCDR3 = Integer.MAX_VALUE;
 //        Integer maxOccupancyDiffCDR3 = 96; //no filtering if max difference is all wells by default
 //        Integer minOverlapPercentCDR3 = 0; //no filtering if min percentage is zero by default
 //        Integer lowThresholdCDR1 = 0;
 //        Integer highThresholdCDR1 = Integer.MAX_VALUE;
 //        boolean outputCDR3Matches = false;
 //        try {
 //            System.out.println("\nSimulated experiment requires a cell sample file and a sample plate file.");
 //            System.out.print("Please enter name of an existing cell sample file: ");
 //            cellFile = sc.next();
 //            System.out.print("Please enter name of an existing sample plate file: ");
 //            plateFile = sc.next();
 //            System.out.println("The matching results will be written to a file.");
 //            System.out.print("Please enter a name for the output file: ");
 //            filename = sc.next();
 //            System.out.println("What is the minimum number of CDR3 alpha/beta overlap wells to attempt matching?");
 //            lowThresholdCDR3 = sc.nextInt();
 //            if(lowThresholdCDR3 < 1){
 //                throw new InputMismatchException("Minimum value for low threshold is 1");
 //            }
 //            System.out.println("What is the maximum number of CDR3 alpha/beta overlap wells to attempt matching?");
 //            highThresholdCDR3 = sc.nextInt();
 //            System.out.println("What is the maximum difference in CDR3 alpha/beta occupancy to attempt matching?");
 //            maxOccupancyDiffCDR3 = sc.nextInt();
 //            System.out.println("What is the minimum CDR3 overlap percentage to attempt matching? (0 - 100)");
 //            minOverlapPercentCDR3 = sc.nextInt();
 //            if (minOverlapPercentCDR3 < 0 || minOverlapPercentCDR3 > 100) {
 //                throw new InputMismatchException("Value outside range. Minimum percent set to 0");
 //            }
 //            System.out.println("What is the minimum number of CDR3/CDR1 overlap wells to attempt matching?");
 //            lowThresholdCDR1 = sc.nextInt();
 //            if(lowThresholdCDR1 < 1){
 //                throw new InputMismatchException("Minimum value for low threshold is 1");
 //            }
 //            System.out.println("What is the maximum number of CDR3/CDR1 overlap wells to attempt matching?");
 //            highThresholdCDR1 = sc.nextInt();
 //            System.out.println("Matching CDR3s to CDR1s requires first matching CDR3 alpha/betas.");
 //            System.out.println("Output a file for CDR3 alpha/beta match results as well?");
 //            System.out.print("Please enter y/n: ");
 //            String ans = sc.next();
 //            Pattern pattern = Pattern.compile("(?:yes|y)", Pattern.CASE_INSENSITIVE);
 //            Matcher matcher = pattern.matcher(ans);
 //            if(matcher.matches()){
 //                outputCDR3Matches = true;
 //                System.out.println("Please enter filename for CDR3 alpha/beta match results");
 //                preliminaryResultsFilename = sc.next();
 //                System.out.println("CDR3 alpha/beta matches will be output to file");
 //            }
 //            else{
 //                System.out.println("CDR3 alpha/beta matches will not be output to file");
 //            }
 //        } catch (InputMismatchException ex) {
 //            System.out.println(ex);
 //            sc.next();
 //        }
 //        CellFileReader cellReader = new CellFileReader(cellFile);
 //        PlateFileReader plateReader = new PlateFileReader(plateFile);
 //        Plate plate = new Plate(plateReader.getFilename(), plateReader.getWells());
 //        if (cellReader.getCells().size() == 0){
 //            System.out.println("No cell sample found.");
 //            System.out.println("Returning to main menu.");
 //        }
 //        else if(plate.getWells().size() == 0){
 //            System.out.println("No sample plate found.");
 //            System.out.println("Returning to main menu.");
 //
 //        }
 //        else{
 //            if(highThresholdCDR3 >= plate.getSize()){
 //                highThresholdCDR3 = plate.getSize() - 1;
 //            }
 //            if(highThresholdCDR1 >= plate.getSize()){
 //                highThresholdCDR1 = plate.getSize() - 1;
 //            }
 //            List<Integer[]> cells = cellReader.getCells();
 //            MatchingResult preliminaryResults = Simulator.matchCDR3s(cells, plate, lowThresholdCDR3, highThresholdCDR3,
 //                    maxOccupancyDiffCDR3, minOverlapPercentCDR3, true);
 //            MatchingResult[] results = Simulator.matchCDR1s(cells, plate, lowThresholdCDR1,
 //                    highThresholdCDR1, preliminaryResults);
 //            MatchingFileWriter writer = new MatchingFileWriter(filename + "_FirstPass", results[0]);
 //            writer.writeResultsToFile();
 //            writer = new MatchingFileWriter(filename + "_SecondPass", results[1]);
 //            writer.writeResultsToFile();
 //            if(outputCDR3Matches){
 //                writer = new MatchingFileWriter(preliminaryResultsFilename, preliminaryResults);
 //                writer.writeResultsToFile();
 //            }
 //        }
 //    }
    private static void mainOptions(){
        boolean backToMain = false;
        while(!backToMain) {
            System.out.println("\n--------------OPTIONS---------------");
            System.out.println("1) Turn " + getOnOff(!BiGpairSEQ.cacheCells()) + " cell sample file caching");
            System.out.println("2) Turn " + getOnOff(!BiGpairSEQ.cachePlate()) + " plate file caching");
            System.out.println("3) Turn " + getOnOff(!BiGpairSEQ.cacheGraph()) + " graph/data file caching");
            System.out.println("4) Turn " + getOnOff(!BiGpairSEQ.outputBinary()) + " serialized binary graph output");
            System.out.println("5) Turn " + getOnOff(!BiGpairSEQ.outputGraphML()) + " GraphML graph output");
            System.out.println("6) Maximum weight matching algorithm options");
            System.out.println("0) Return to main menu");
            try {
                input = sc.nextInt();
                switch (input) {
                    case 1 -> BiGpairSEQ.setCacheCells(!BiGpairSEQ.cacheCells());
                    case 2 -> BiGpairSEQ.setCachePlate(!BiGpairSEQ.cachePlate());
                    case 3 -> BiGpairSEQ.setCacheGraph(!BiGpairSEQ.cacheGraph());
                    case 4 -> BiGpairSEQ.setOutputBinary(!BiGpairSEQ.outputBinary());
                    case 5 -> BiGpairSEQ.setOutputGraphML(!BiGpairSEQ.outputGraphML());
                    case 6 -> algorithmOptions();
                    case 0 -> backToMain = true;
                    default -> System.out.println("Invalid input");
                }
            } catch (InputMismatchException ex) {
                System.out.println(ex);
                sc.next();
            }
        }
    }
    /**
     * Helper function for printing menu items in mainOptions(). Returns a string based on the value of parameter.
     *
     * @param b - a boolean value
     * @return String "on" if b is true, "off" if b is false
     */
    private static String getOnOff(boolean b) {
        if (b) { return "on";}
        else { return "off"; }
    }
    private static void algorithmOptions(){
        boolean backToOptions = false;
        while(!backToOptions) {
            System.out.println("\n---------ALGORITHM OPTIONS----------");
            System.out.println("1) Use scaling algorithm by Duan and Su.");
            System.out.println("2) Use LEDA book algorithm with Fibonacci heap priority queue");
            System.out.println("3) Use LEDA book algorithm with pairing heap priority queue");
            System.out.println("0) Return to Options menu");
            try {
                input = sc.nextInt();
                switch (input) {
                    case 1 -> System.out.println("This option is not yet implemented. Choose another.");
                    case 2 -> {
                        BiGpairSEQ.setFibonacciHeap();
                        System.out.println("MWM algorithm set to LEDA with Fibonacci heap");
                        backToOptions = true;
                    }
                    case 3 -> {
                        BiGpairSEQ.setPairingHeap();
                        System.out.println("MWM algorithm set to LEDA with pairing heap");
                        backToOptions = true;
                    }
                    case 0 -> backToOptions = true;
                    default -> System.out.println("Invalid input");
                }
            } catch (InputMismatchException ex) {
                System.out.println(ex);
                sc.next();
            }
        }
    }
    private static void acknowledge(){
        System.out.println("This program simulates BiGpairSEQ, a graph theory based adaptation");
        System.out.println("of the pairSEQ algorithm for pairing T cell receptor sequences.");
        System.out.println();
        System.out.println("For full documentation, view readme.md file distributed with this code");
        System.out.println("or visit https://gitea.ejsf.synology.me/efischer/BiGpairSEQ.");
        System.out.println();
        System.out.println("pairSEQ citation:");
        System.out.println("Howie, B., Sherwood, A. M., et. al.");
        System.out.println("High-throughput pairing of T cell receptor alpha and beta sequences.");
        System.out.println("Sci. Transl. Med. 7, 301ra131 (2015)");
        System.out.println();
        System.out.println("BiGpairSEQ_Sim by Eugene Fischer, 2021-2022");
    }
 }
--- a/src/main/java/META-INF/MANIFEST.MF
+++ b/src/main/java/META-INF/MANIFEST.MF
@@ -0,0 +1,3 @@
 Manifest-Version: 1.0
 Main-Class: BiGpairSEQ
--- a/src/main/java/MatchingFileWriter.java
+++ b/src/main/java/MatchingFileWriter.java
@@ -8,24 +8,30 @@ import java.nio.file.Path;
 import java.nio.file.StandardOpenOption;
 import java.util.List;
 public class MatchingFileWriter {
    private String filename;
    private List<String> comments;
    private List<String> headers;
-    private List<List<String>> results;
+    private List<List<String>> allResults;
-    public MatchingFileWriter(String filename, List<String> comments, List<String> headers, List<List<String>> results){
+    public MatchingFileWriter(String filename, MatchingResult result){
        if(!filename.matches(".*\\.csv")){
            filename = filename + ".csv";
        }
        this.filename = filename;
-        this.comments = comments;
+        this.comments = result.getComments();
-        this.headers = headers;
+        this.headers = result.getHeaders();
-        this.results = results;
+        this.allResults = result.getAllResults();
    }
    public void writeErrorRateToTerminal(){
        for(String s: comments){
            if(s.matches("(Pairing error rate: )(\\d*.\\d+)")){
                System.out.println(s);
            }
        }
    }
    public void writeResultsToFile(){
        String[] headerStrings = new String[headers.size()];
        for(int i = 0; i < headers.size(); i++){
@@ -41,8 +47,8 @@ public class MatchingFileWriter {
            for(String comment: comments){
                printer.printComment(comment);
            }
-            results.add(0, headers);
+            allResults.add(0, headers);
-            printer.printRecords(results);
+            printer.printRecords(allResults);
        } catch(IOException ex){
            System.out.println("Could not make new file named "+filename);
--- a/src/main/java/MatchingResult.java
+++ b/src/main/java/MatchingResult.java
@@ -1,16 +1,41 @@
 import java.time.Duration;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Map;
 public class MatchingResult {
    private List<String> comments;
    private List<String> headers;
    private List<List<String>> allResults;
    private Map<Integer, Integer> matchMap;
    private Duration time;
-    public MatchingResult(List<String> comments, List<String> headers, List<List<String>> allResults, Map<Integer, Integer>matchMap, Duration time){
+    private final Map<String, String> metadata;
-        this.comments = comments;
+    private final List<String> comments;
    private final List<String> headers;
    private final List<List<String>> allResults;
    private final Map<Integer, Integer> matchMap;
    private final Duration time;
    public MatchingResult(Map<String, String> metadata, List<String> headers,
                          List<List<String>> allResults, Map<Integer, Integer>matchMap, Duration time){
        /*
         * POSSIBLE KEYS FOR METADATA MAP ARE:
         * sample plate filename *
         * graph filename *
         * well populations *
         * total alphas found *
         * total betas found *
         * high overlap threshold *
         * low overlap threshold *
         * maximum occupancy difference *
         * minimum overlap percent *
         * pairing attempt rate *
         * correct pairing count *
         * incorrect pairing count *
         * pairing error rate *
         * simulation time (seconds)
         */
        this.metadata = metadata;
        this.comments = new ArrayList<>();
        for (String key : metadata.keySet()) {
            comments.add(key +": " + metadata.get(key));
        }
        this.headers = headers;
        this.allResults = allResults;
        this.matchMap = matchMap;
@@ -18,6 +43,8 @@ public class MatchingResult {
    }
    public Map<String, String> getMetadata() {return metadata;}
    public List<String> getComments() {
        return comments;
    }
@@ -37,4 +64,49 @@ public class MatchingResult {
    public Duration getTime() {
        return time;
    }
    public String getPlateFilename() {
        return metadata.get("sample plate filename");
    }
    public String getGraphFilename() {
        return metadata.get("graph filename");
    }
    public Integer[] getWellPopulations() {
        List<Integer> wellPopulations = new ArrayList<>();
        String popString = metadata.get("well populations");
        for (String p : popString.split(", ")) {
            wellPopulations.add(Integer.parseInt(p));
        }
        Integer[] popArray = new Integer[wellPopulations.size()];
        return wellPopulations.toArray(popArray);
    }
    public Integer getAlphaCount() {
        return Integer.parseInt(metadata.get("total alpha count"));
    }
    public Integer getBetaCount() {
        return Integer.parseInt(metadata.get("total beta count"));
    }
    public Integer getHighOverlapThreshold() { return Integer.parseInt(metadata.get("high overlap threshold"));}
    public Integer getLowOverlapThreshold() { return Integer.parseInt(metadata.get("low overlap threshold"));}
    public Integer getMaxOccupancyDifference() { return Integer.parseInt(metadata.get("maximum occupancy difference"));}
    public Integer getMinOverlapPercent() { return Integer.parseInt(metadata.get("minimum overlap percent"));}
    public Double getPairingAttemptRate() { return Double.parseDouble(metadata.get("pairing attempt rate"));}
    public Integer getCorrectPairingCount() { return Integer.parseInt(metadata.get("correct pairing count"));}
    public Integer getIncorrectPairingCount() { return Integer.parseInt(metadata.get("incorrect pairing count"));}
    public Double getPairingErrorRate() { return Double.parseDouble(metadata.get("pairing error rate"));}
    public String getSimulationTime() { return metadata.get("simulation time (seconds)"); }
 }
--- a/src/main/java/Plate.java
+++ b/src/main/java/Plate.java
@@ -1,46 +1,89 @@
 /*
 TODO: Implement exponential distribution using inversion method - DONE
 TODO: Implement discrete frequency distributions using Vose's Alias Method
 */
 import java.util.*;
 //Need to write function to output plate to a file that I can read in.
 public class Plate {
    private CellSample cells;
    private String sourceFile;
    private String filename;
    private List<List<Integer[]>> wells;
-    private Random rand = new Random();
+    private final Random rand = BiGpairSEQ.getRand();
    private int size;
    private double error;
-    private Integer[] concentrations;
+    private Integer[] populations;
    private double stdDev;
    private double lambda;
    boolean exponential = false;
-    public Plate (int size, double error, Integer[] concentrations, double stdDev) {
+    public Plate(CellSample cells, String cellFilename, int numWells, Integer[] populations,
                 double dropoutRate, double stdDev_or_lambda, boolean exponential){
        this.cells = cells;
        this.sourceFile = cellFilename;
        this.size = numWells;
        this.wells = new ArrayList<>();
        this.error = dropoutRate;
        this.populations = populations;
        this.exponential = exponential;
        if (this.exponential) {
            this.lambda = stdDev_or_lambda;
            fillWellsExponential(cells.getCells(), this.lambda);
        }
        else {
            this.stdDev = stdDev_or_lambda;
            fillWells(cells.getCells(), this.stdDev);
        }
    }
    public Plate(int size, double error, Integer[] populations) {
        this.size = size;
        this.error = error;
-        this.concentrations = concentrations;
+        this.populations = populations;
        this.stdDev = stdDev;
        wells = new ArrayList<>();
    }
-    public Plate(List<List<Integer[]>> wells){
+    //constructor for returning a Plate from a PlateFileReader
    public Plate(String filename, List<List<Integer[]>> wells) {
        this.filename = filename;
        this.wells = wells;
        this.size = wells.size();
        List<Integer> concentrations = new ArrayList<>();
        for (List<Integer[]> w: wells) {
            if(!concentrations.contains(w.size())){
                concentrations.add(w.size());
            }
        }
        this.populations = new Integer[concentrations.size()];
        for (int i = 0; i < this.populations.length; i++) {
            this.populations[i] = concentrations.get(i);
        }
    }
-    public void fillWells(List<Integer[]> cells) {
+    private void fillWellsExponential(List<Integer[]> cells, double lambda){
-        int numSections = concentrations.length;
+        this.lambda = lambda;
        exponential = true;
        int numSections = populations.length;
        int section = 0;
        double m;
        int n;
        //testing
        //System.out.println("Cell size: " + cells.get(0).length);
        while (section < numSections){
            for (int i = 0; i < (size / numSections); i++) {
                List<Integer[]> well = new ArrayList<>();
-                for (int j = 0; j < concentrations[section]; j++) {
+                for (int j = 0; j < populations[section]; j++) {
                    do {
-                        m = (rand.nextGaussian() * stdDev) + (cells.size() / 2);
+                        //inverse transform sampling: for random number u in [0,1), x = log(1-u) / (-lambda)
                        m = (Math.log10((1 - rand.nextDouble()))/(-lambda)) * Math.sqrt(cells.size());
                    } while (m >= cells.size() || m < 0);
                    n = (int) Math.floor(m);
                    Integer[] cellToAdd = cells.get(n).clone();
                    for(int k = 0; k < cellToAdd.length; k++){
-                        if(Math.abs(rand.nextDouble()) < error){//error applied to each peptide
+                        if(Math.abs(rand.nextDouble()) < error){//error applied to each seqeunce
                            cellToAdd[k] = -1;
                        }
                    }
@@ -52,13 +95,36 @@ public class Plate {
        }
    }
-    public void writePlateToFile(String filename) {
+    private void fillWells( List<Integer[]> cells, double stdDev) {
-
+        this.stdDev = stdDev;
-
+        int numSections = populations.length;
        int section = 0;
        double m;
        int n;
        while (section < numSections){
            for (int i = 0; i < (size / numSections); i++) {
                List<Integer[]> well = new ArrayList<>();
                for (int j = 0; j < populations[section]; j++) {
                    do {
                        m = (rand.nextGaussian() * stdDev) + (cells.size() / 2);
                    } while (m >= cells.size() || m < 0);
                    n = (int) Math.floor(m);
                    Integer[] cellToAdd = cells.get(n).clone();
                    for(int k = 0; k < cellToAdd.length; k++){
                        if(Math.abs(rand.nextDouble()) < error){//error applied to each sequence
                            cellToAdd[k] = -1;
                        }
                    }
                    well.add(cellToAdd);
                }
                wells.add(well);
            }
            section++;
        }
    }
-    public Integer[] getConcentrations(){
+    public Integer[] getPopulations(){
-        return concentrations;
+        return populations;
    }
    public int getSize(){
@@ -69,6 +135,10 @@ public class Plate {
        return stdDev;
    }
    public boolean isExponential(){return exponential;}
    public double getLambda(){return lambda;}
    public double getError() {
        return error;
    }
@@ -77,142 +147,38 @@ public class Plate {
        return wells;
    }
-
+    //returns a map of the counts of the sequence at cell index sIndex, in all wells
-    //returns a map of counts of all the CDR3s (alphas and betas) in all wells
+    public Map<Integer, Integer> assayWellsSequenceS(int... sIndices){
-    public Map<Integer, Integer>assayWellsCDR3(){
+        return this.assayWellsSequenceS(0, size, sIndices);
        return this.assayWellsCDR3(0, size);
    }
    //returns a map of counts of all the CDR3 alphas in all wells
    public Map<Integer, Integer> assayWellsCDR3Alpha() {
        return this.assayWellsCDR3Alpha(0, size);
    }
    //returns a map of counts of all the CDR3 betas in all wells
    public Map<Integer, Integer> assayWellsCDR3Beta() {
        return this.assayWellsCDR3Beta(0, size);
    }
    //returns a map of counts of all CDR1s (alphas and betas) in all wells
    public Map<Integer, Integer> assayWellsCDR1(){
        return this.assayWellsCDR1(0, size);
    }
    //returns a map of counts of all the CDR1 alphas in all wells
    public Map<Integer, Integer> assayWellsCDR1Alpha() {
        return this.assayWellsCDR1Alpha(0, size);
    }
    //returns a map of counts of all the CDR1 betas in all wells
    public Map<Integer, Integer> assayWellsCDR1Beta() {
        return this.assayWellsCDR1Beta(0, size);
    }
-    //returns a map of counts of the CDR3s (alphas and betas) in a specific well
+    //returns a map of the counts of the sequence at cell index sIndex, in a specific well
-    public Map<Integer, Integer>assayWellsCDR3(int n){
+    public Map<Integer, Integer> assayWellsSequenceS(int n, int... sIndices) { return this.assayWellsSequenceS(n, n+1, sIndices);}
        return this.assayWellsCDR3(n, n+1);
    }
    //returns a map of counts of the CDR1s (alphas and betas) in a specific well
    public Map<Integer, Integer> assayWellsCDR1(int n){
        return this.assayWellsCDR1(n, n+1);
    }
    //returns a map of counts of the CDR3 alphas in a specific well
    public Map<Integer, Integer> assayWellsCDR3Alpha(int n) {
        return this.assayWellsCDR3Alpha(n, n+1);
    }
    //returns a map of counts of the CDR3 betas in a specific well
    public Map<Integer, Integer> assayWellsCDR3Beta(int n) {
        return this.assayWellsCDR3Beta(n, n+1);
    }
    //returns a map of counts of the CDR1 alphas in a specific well
    public Map<Integer, Integer> assayWellsCDR1Alpha(int n) {
        return this.assayWellsCDR1Alpha(n, n+1);
    }
    //returns a map of counts of the CDR1 betas in a specific well
    public Map<Integer, Integer> assayWellsCDR1Beta(int n) {
        return this.assayWellsCDR1Beta(n, n+1);
    }
-
+    //returns a map of the counts of the sequence at cell index sIndex, in a range of wells
-    //returns a map of the counts of the CDR3s (alphas and betas) in a range of wells
+    public Map<Integer, Integer> assayWellsSequenceS(int start, int end, int... sIndices) {
    public Map<Integer, Integer>assayWellsCDR3(int start, int end){
        Map<Integer,Integer> assay = new HashMap<>();
-        for(int i = start; i < end; i++){
+        for(int pIndex: sIndices){
-            countCDR3Alphas(assay, wells.get(i));
+            for(int i = start; i < end; i++){
-            countCDR3Betas(assay,wells.get(i));
+                countSequences(assay, wells.get(i), pIndex);
            }
        }
        return assay;
    }
-    //returns a map of the counts of the CDR1s (alphas and betas) in a range of wells
+    //For the sequences at cell indices sIndices, counts number of unique sequences in the given well into the given map
-    public Map<Integer, Integer>assayWellsCDR1(int start, int end){
+    private void countSequences(Map<Integer, Integer> wellMap, List<Integer[]> well, int... sIndices) {
        Map<Integer,Integer> assay = new HashMap<>();
        for(int i = start; i < end; i++){
            countCDR1Alphas(assay, wells.get(i));
            countCDR1Betas(assay,wells.get(i));
        }
        return assay;
    }
    //returns a map of the counts of the CDR3 alphas in a range of wells
    public Map<Integer, Integer> assayWellsCDR3Alpha(int start, int end) {
        Map<Integer, Integer> assay = new HashMap<>();
        for(int i = start; i < end; i++){
            countCDR3Alphas(assay, wells.get(i));
        }
        return assay;
    }
    //returns a map of the counts of the CDR3 betas in a range of wells
    public Map<Integer, Integer> assayWellsCDR3Beta(int start, int end) {
        Map<Integer, Integer> assay = new HashMap<>();
        for(int i = start; i < end; i++){
            countCDR3Betas(assay, wells.get(i));
        }
        return assay;
    }
    //returns a map of the counts of the CDR1 alphas in a range of wells
    public Map<Integer, Integer> assayWellsCDR1Alpha(int start, int end) {
        Map<Integer, Integer> assay = new HashMap<>();
        for(int i = start; i < end; i++){
            countCDR1Alphas(assay, wells.get(i));
        }
        return assay;
    }
    //returns a map of the counts of the CDR1 betas in a range of wells
    public Map<Integer, Integer> assayWellsCDR1Beta(int start, int end) {
        Map<Integer, Integer> assay = new HashMap<>();
        for(int i = start; i < end; i++){
            countCDR1Betas(assay, wells.get(i));
        }
        return assay;
    }
    //given a map, counts distinct CDR3 alphas in a well
    private void countCDR3Alphas(Map<Integer, Integer> wellMap, List<Integer[]> well){
        for(Integer[] cell : well) {
-            if(cell[0] != -1){
+            for(int sIndex: sIndices){
-                //keys are alphas, value is how many of them have been assayed
+                if(cell[sIndex] != -1){
-                wellMap.merge(cell[0], 1, (oldValue, newValue) -> oldValue + newValue);
+                    wellMap.merge(cell[sIndex], 1, (oldValue, newValue) -> oldValue + newValue);
-            }
+                }
        }
    }
    //given a map, counts distinct CDR3 betas in a well
    private void countCDR3Betas(Map<Integer, Integer> wellMap, List<Integer[]> well){
        for(Integer[] cell : well) {
            if(cell[1] != -1){
                wellMap.merge(cell[1], 1, (oldValue, newValue) -> oldValue + newValue);
            }
        }
    }
    //given a map, counts distinct CDR1 alphas in a well
    private void countCDR1Alphas(Map<Integer, Integer> wellMap, List<Integer[]> well){
        for(Integer[] cell: well){
            if(cell[2] != -1){
                wellMap.merge(cell[2], 1, (oldValue, newValue) -> oldValue + newValue);
            }
        }
    }
    //given a map, counts distinct CDR1 betas in a well
    private void countCDR1Betas(Map<Integer, Integer> wellMap, List<Integer[]> well){
        for(Integer[] cell: well){
            if(cell[3] != -1){
                wellMap.merge(cell[3], 1, (oldValue, newValue) -> oldValue + newValue);
            }
        }
    }
    public String getSourceFileName() {
        return sourceFile;
    }
    public String getFilename() { return filename; }
 }
--- a/src/main/java/PlateFileReader.java
+++ b/src/main/java/PlateFileReader.java
@@ -14,12 +14,14 @@ import java.util.regex.Pattern;
 public class PlateFileReader {
    private List<List<Integer[]>> wells = new ArrayList<>();
    private String filename;
    public PlateFileReader(String filename){
        if(!filename.matches(".*\\.csv")){
            filename = filename + ".csv";
        }
        this.filename = filename;
        CSVFormat plateFileFormat = CSVFormat.Builder.create()
                .setCommentMarker('#')
@@ -54,8 +56,8 @@ public class PlateFileReader {
    }
-    public List<List<Integer[]>> getWells() {
+    public Plate getSamplePlate() {
-        return wells;
+        return new Plate(filename, wells);
    }
 }
--- a/src/main/java/PlateFileWriter.java
+++ b/src/main/java/PlateFileWriter.java
@@ -7,35 +7,39 @@ import java.nio.file.Files;
 import java.nio.file.Path;
 import java.nio.file.StandardOpenOption;
 import java.util.*;
 import java.util.regex.Pattern;
 public class PlateFileWriter {
    private int size;
    private List<List<Integer[]>> wells;
    private double stdDev;
    private double lambda;
    private Double error;
    private String filename;
-    private String[] headers;
+    private String sourceFileName;
-    private List<Integer> concentrations;
+    private Integer[] populations;
    private boolean isExponential = false;
    public PlateFileWriter(String filename, Plate plate) {
        if(!filename.matches(".*\\.csv")){
            filename = filename + ".csv";
        }
        this.filename = filename;
        this.sourceFileName = plate.getSourceFileName();
        this.size = plate.getSize();
-        this.stdDev = plate.getStdDev();
+        this.isExponential = plate.isExponential();
        if(isExponential) {
            this.lambda = plate.getLambda();
        }
        else{
            this.stdDev = plate.getStdDev();
        }
        this.error = plate.getError();
        this.wells = plate.getWells();
-        this.concentrations = Arrays.asList(plate.getConcentrations());
+        this.populations = plate.getPopulations();
-        concentrations.sort(Comparator.reverseOrder());
+        Arrays.sort(populations);
    }
    public void writePlateFile(){
        //works as is, but too many columns in csv, need to make them all rows.
        //will now redo it so that every column is a well, with well names as headers
        //need to give plate error, sample pop size, stdDev, num sections, concentration per section as comments
        Comparator<List<Integer[]>> listLengthDescending = Comparator.comparingInt(List::size);
        wells.sort(listLengthDescending.reversed());
        int maxLength = wells.get(0).size();
@@ -53,37 +57,50 @@ public class PlateFileWriter {
            }
        }
-        //this took forever
+//        //this took forever and I don't use it
-        List<List<String>> rows = new ArrayList<>();
+//        //if I wanted to use it, I'd replace printer.printRecords(wellsAsStrings) with printer.printRecords(rows)
-        List<String> tmp = new ArrayList<>();
+//        List<List<String>> rows = new ArrayList<>();
-        for(int i = 0; i < wellsAsStrings.size(); i++){//List<Integer[]> w: wells){
+//        List<String> tmp = new ArrayList<>();
-            tmp.add("well " + (i+1));
+//        for(int i = 0; i < wellsAsStrings.size(); i++){//List<Integer[]> w: wells){
-        }
+//            tmp.add("well " + (i+1));
-        rows.add(tmp);
+//        }
-        for(int row = 0; row < maxLength; row++){
+//        rows.add(tmp);
-            tmp = new ArrayList<>();
+//        for(int row = 0; row < maxLength; row++){
-            for(List<String> c: wellsAsStrings){
+//            tmp = new ArrayList<>();
-                tmp.add(c.get(row));
+//            for(List<String> c: wellsAsStrings){
-            }
+//                tmp.add(c.get(row));
-            rows.add(tmp);
+//            }
-        }
+//            rows.add(tmp);
-        StringBuilder concen = new StringBuilder();
+//        }
        for(Integer i: concentrations){
            concen.append(i.toString());
            concen.append(" ");
        }
        String concenString = concen.toString();
-        CSVFormat plateFileFormat = CSVFormat.Builder.create().setCommentMarker('#').build();
+        //make string out of populations array
        StringBuilder populationsStringBuilder = new StringBuilder();
        populationsStringBuilder.append(populations[0].toString());
        for(int i = 1; i < populations.length; i++){
            populationsStringBuilder.append(", ");
            populationsStringBuilder.append(populations[i].toString());
        }
        String wellPopulationsString = populationsStringBuilder.toString();
        //set CSV format
        CSVFormat plateFileFormat = CSVFormat.Builder.create()
                .setCommentMarker('#')
                .build();
        try(BufferedWriter writer = Files.newBufferedWriter(Path.of(filename), StandardOpenOption.CREATE_NEW);
            CSVPrinter printer = new CSVPrinter(writer, plateFileFormat);
        ){
            printer.printComment("Cell source file name: " + sourceFileName);
            printer.printComment("Each row represents one well on the plate.");
            printer.printComment("Plate size: " + size);
            printer.printComment("Error rate: " + error);
-            printer.printComment("Concentrations: " + concenString);
+            printer.printComment("Well populations: " + wellPopulationsString);
-            printer.printComment("Std. dev.: " + stdDev);
+            if(isExponential){
                printer.printComment("Lambda: " + lambda);
            }
            else {
                printer.printComment("Std. dev.: " + stdDev);
            }
            printer.printRecords(wellsAsStrings);
        } catch(IOException ex){
            System.out.println("Could not make new file named "+filename);
--- a/src/main/java/Simulator.java
+++ b/src/main/java/Simulator.java
--- a/src/main/java/UserInterface.java
+++ b/src/main/java/UserInterface.java
@@ -1,279 +0,0 @@
 import java.util.List;
 import java.util.Scanner;
 import java.util.InputMismatchException;
 //
 public class UserInterface {
    final static Scanner sc = new Scanner(System.in);
    static int input;
    static boolean quit = false;
    public static void main(String args[]) {
        while(!quit) {
            System.out.println("\nALPHA/BETA T-CELL RECEPTOR MATCHING SIMULATOR");
            System.out.println("Please select an option:");
            System.out.println("1) Generate a population of distinct cells");
            System.out.println("2) Generate a sample plate of T cells");
            System.out.println("3) Simulate CDR3 alpha/beta T cell matching");
            System.out.println("4) Simulate CDR3/CDR1 T cell matching");
            System.out.println("5) Acknowledgements");
            System.out.println("0) Exit");
            try {
                input = sc.nextInt();
                switch(input){
                    case 1 -> makeCells();
                    case 2 -> makePlate();
                    case 3 -> matchCells();
                    case 4 -> matchCellsExpanded();
                    case 5 -> acknowledge();
                    case 0 -> quit = true;
                    default -> throw new InputMismatchException("Invalid input.");
                }
            }catch(InputMismatchException ex){
                System.out.println(ex);
                sc.next();
            }
        }
        sc.close();
    }
    private static void makeCells() {
        String filename = null;
        Integer numCells = 0;
        try {
            System.out.println("\nSimulated T-Cells consist of integer values representing:\n" +
                    "* a pair of alpha and beta CDR3 peptides (unique within simulated population)\n" +
                    "* a pair of alpha and beta CDR1 peptides (not necessarily unique).");
            System.out.println("\nThe cells will be written to a file.");
            System.out.print("Please enter a file name: ");
            filename = sc.next();
            System.out.print("Please enter the number of T-cells to generate: ");
            numCells = sc.nextInt();
            if(numCells <= 0){
                throw new InputMismatchException("Number of cells must be a positive integer.");
            }
        } catch (InputMismatchException ex) {
            System.out.println(ex);
            sc.next();
        }
        CellSample sample = Simulator.generateExpandedCellSample(numCells);
        CellFileWriter writer = new CellFileWriter(filename, sample);
        writer.writeCellsToFile();
    }
    //method to output a CSV of
    private static void makePlate() {
        String cellFile = null;
        String filename = null;
        Double stdDev = 0.0;
        Integer numWells = 0;
        Integer numSections = 0;
        Integer[] concentrations = {1};
        Double dropOutRate = 0.0;
        boolean poisson = false;
        try {
            System.out.println("\nMaking a sample plate requires a population of distinct cells");
            System.out.println("Please enter name of an existing cell sample file: ");
            cellFile = sc.next();
            System.out.println("\nThe sample plate will be written to file");
            System.out.print("Please enter a name for the output file: ");
            filename = sc.next();
            System.out.println("Select T-cell frequency distribution function");
            System.out.println("1) Poisson");
            System.out.println("2) Gaussian");
            System.out.println("(Note: wider distributions are more memory intensive to match)");
            System.out.print("Enter selection value: ");
            input = sc.nextInt();
            switch(input) {
                case 1:
                    poisson = true;
                    break;
                case 2:
                    System.out.println("How many distinct T-cells within one standard deviation of peak frequency?");
                    System.out.println("(Note: wider distributions are more memory intensive to match)");
                    stdDev = sc.nextDouble();
                    if(stdDev <= 0.0){
                        throw new InputMismatchException("Value must be positive.");
                    }
                    break;
                default:
                    System.out.println("Invalid input. Defaulting to Poisson.");
                    poisson = true;
            }
            System.out.print("Number of wells on plate: ");
            numWells = sc.nextInt();
            if(numWells < 1){
                throw new InputMismatchException("No wells on plate");
            }
            System.out.println("The plate can be evenly sectioned to allow multiple concentrations of T-cells/well");
            System.out.println("How many sections would you like to make (minimum 1)?");
            numSections = sc.nextInt();
            if(numSections < 1) {
                throw new InputMismatchException("Too few sections.");
            }
            else if (numSections > numWells) {
                throw new InputMismatchException("Cannot have more sections than wells.");
            }
            int i = 1;
            concentrations = new Integer[numSections];
            while(numSections > 0) {
                System.out.print("Enter number of T-cells per well in section " + i +": ");
                concentrations[i - 1] = sc.nextInt();
                i++;
                numSections--;
            }
            System.out.println("Errors in amplification can induce a well dropout rate for peptides");
            System.out.print("Enter well dropout rate (0.0 to 1.0): ");
            dropOutRate = sc.nextDouble();
            if(dropOutRate < 0.0 || dropOutRate > 1.0) {
                throw new InputMismatchException("The well dropout rate must be in the range [0.0, 1.0]");
            }
        }catch(InputMismatchException ex){
            System.out.println(ex);
            sc.next();
        }
        CellFileReader cellReader = new CellFileReader(cellFile);
        if(poisson) {
            stdDev = Math.sqrt(cellReader.getCellCount()); //gaussian with square root of elements approximates poisson
        }
        Plate samplePlate = new Plate(numWells, dropOutRate, concentrations, stdDev);
        samplePlate.fillWells(cellReader.getCells());
        PlateFileWriter writer = new PlateFileWriter(filename, samplePlate);
        writer.writePlateFile();
    }
    private static void matchCells() {
        String filename = null;
        String cellFile = null;
        String plateFile = null;
        Integer lowThreshold = 0;
        Integer highThreshold = Integer.MAX_VALUE;
        try {
            System.out.println("\nSimulated experiment requires a cell sample file and a sample plate file.");
            System.out.print("Please enter name of an existing cell sample file: ");
            cellFile = sc.next();
            System.out.print("Please enter name of an existing sample plate file: ");
            plateFile = sc.next();
            System.out.println("The matching results will be written to a file.");
            System.out.print("Please enter a name for the output file: ");
            filename = sc.next();
            System.out.println("What is the minimum number of alpha/beta overlap wells to attempt matching?");
            lowThreshold = sc.nextInt();
            if(lowThreshold < 1){
                throw new InputMismatchException("Minimum value for low threshold is 1");
            }
            System.out.println("What is the maximum number of alpha/beta overlap wells to attempt matching?");
            highThreshold = sc.nextInt();
        } catch (InputMismatchException ex) {
            System.out.println(ex);
            sc.next();
        }
        CellFileReader cellReader = new CellFileReader(cellFile);
        PlateFileReader plateReader = new PlateFileReader(plateFile);
        Plate plate = new Plate(plateReader.getWells());
        if (cellReader.getCells().size() == 0){
            System.out.println("No cell sample found.");
            System.out.println("Returning to main menu.");
        }
        else if(plate.getWells().size() == 0){
            System.out.println("No sample plate found.");
            System.out.println("Returning to main menu.");
        }
        else{
            if(highThreshold >= plate.getSize()){
                highThreshold = plate.getSize() - 1;
            }
            List<Integer[]> cells = cellReader.getCells();
            MatchingResult results = Simulator.matchCDR3s(cells, plate, lowThreshold, highThreshold);
            //result writer
            MatchingFileWriter writer = new MatchingFileWriter(filename, results.getComments(),
                    results.getHeaders(), results.getAllResults());
            writer.writeResultsToFile();
        }
    }
    public static void matchCellsExpanded(){
    /*
    The idea here is that we'll get the CDR3 alpha/beta matches first. Then we'll try to match CDR3s to CDR1s by
    looking at the top two matches for each CDR3. If CDR3s in the same cell simply swap CDR1s, we assume a correct
    match
     */
        String filename = null;
        String cellFile = null;
        String plateFile = null;
        Integer lowThresholdCDR3 = 0;
        Integer highThresholdCDR3 = Integer.MAX_VALUE;
        Integer lowThresholdCDR1 = 0;
        Integer highThresholdCDR1 = Integer.MAX_VALUE;
        try {
            System.out.println("\nSimulated experiment requires a cell sample file and a sample plate file.");
            System.out.print("Please enter name of an existing cell sample file: ");
            cellFile = sc.next();
            System.out.print("Please enter name of an existing sample plate file: ");
            plateFile = sc.next();
            System.out.println("The matching results will be written to a file.");
            System.out.print("Please enter a name for the output file: ");
            filename = sc.next();
            System.out.println("What is the minimum number of CDR3 alpha/beta overlap wells to attempt matching?");
            lowThresholdCDR3 = sc.nextInt();
            if(lowThresholdCDR3 < 1){
                throw new InputMismatchException("Minimum value for low threshold is 1");
            }
            System.out.println("What is the maximum number of CDR3 alpha/beta overlap wells to attempt matching?");
            highThresholdCDR3 = sc.nextInt();
            System.out.println("What is the minimum number of CDR3/CDR1 overlap wells to attempt matching?");
            lowThresholdCDR1 = sc.nextInt();
            if(lowThresholdCDR1 < 1){
                throw new InputMismatchException("Minimum value for low threshold is 1");
            }
            System.out.println("What is the maximum number of CDR3/CDR1 overlap wells to attempt matching?");
            highThresholdCDR1 = sc.nextInt();
        } catch (InputMismatchException ex) {
            System.out.println(ex);
            sc.next();
        }
        CellFileReader cellReader = new CellFileReader(cellFile);
        PlateFileReader plateReader = new PlateFileReader(plateFile);
        Plate plate = new Plate(plateReader.getWells());
        if (cellReader.getCells().size() == 0){
            System.out.println("No cell sample found.");
            System.out.println("Returning to main menu.");
        }
        else if(plate.getWells().size() == 0){
            System.out.println("No sample plate found.");
            System.out.println("Returning to main menu.");
        }
        else{
            if(highThresholdCDR3 >= plate.getSize()){
                highThresholdCDR3 = plate.getSize() - 1;
            }
            if(highThresholdCDR1 >= plate.getSize()){
                highThresholdCDR1 = plate.getSize() - 1;
            }
            List<Integer[]> cells = cellReader.getCells();
            MatchingResult preliminaryResults = Simulator.matchCDR3s(cells, plate, lowThresholdCDR3, highThresholdCDR3);
            MatchingResult[] results = Simulator.matchCDR1s(cells, plate, lowThresholdCDR1,
                    highThresholdCDR1, preliminaryResults.getMatchMap(), preliminaryResults.getTime());
            //result writer
            MatchingFileWriter writer = new MatchingFileWriter(filename + "First", results[0].getComments(),
                    results[0].getHeaders(), results[0].getAllResults());
            writer.writeResultsToFile();
            writer = new MatchingFileWriter(filename + "Dual", results[1].getComments(),
                    results[1].getHeaders(), results[1].getAllResults());
            writer.writeResultsToFile();
        }
    }
    private static void acknowledge(){
        System.out.println("Simulation based on:");
        System.out.println("Howie, B., Sherwood, A. M., et. al.");
        System.out.println("High-throughput pairing of T cell receptor alpha and beta sequences.");
        System.out.println("Sci. Transl. Med. 7, 301ra131 (2015)");
        System.out.println("");
        System.out.println("Simulation by Eugene Fischer, 2021");
    }
 }
--- a/src/main/java/Vertex.java
+++ b/src/main/java/Vertex.java
@@ -0,0 +1,23 @@
 public class Vertex {
    private final Integer vertexLabel;
    private final Integer sequence;
    private final Integer occupancy;
    public Vertex(Integer vertexLabel, Integer sequence, Integer occupancy) {
        this.vertexLabel = vertexLabel;
        this.sequence = sequence;
        this.occupancy = occupancy;
    }
    public Integer getVertexLabel() { return vertexLabel; }
    public Integer getSequence() {
        return sequence;
    }
    public Integer getOccupancy() {
        return occupancy;
    }
 }
Author	SHA1	Message	Date
efischer	6f5afbc6ec	Update readme with CLI arguments	2022-02-27 17:01:12 -06:00
efischer	fb4d22e7a4	Update readme with CLI arguments	2022-02-27 17:00:54 -06:00
efischer	e10350c214	Update readme with CLI arguments	2022-02-27 16:56:58 -06:00
efischer	b1155f8100	Format -help CLI option	2022-02-27 16:53:46 -06:00
efischer	12b003a69f	Add -help CLI option	2022-02-27 16:45:30 -06:00
efischer	32c5bcaaff	Deactivate file I/O announcement for CLI	2022-02-27 16:16:24 -06:00
efischer	2485ac4cf6	Add getters to MatchingResult	2022-02-27 16:15:26 -06:00
efischer	05556bce0c	Add units to metadata	2022-02-27 16:08:59 -06:00
efischer	a822f69ea4	Control verbose output	2022-02-27 16:07:17 -06:00
efischer	3d1f8668ee	Control verbose output	2022-02-27 16:03:57 -06:00
efischer	40c743308b	Initialize wells	2022-02-27 15:54:47 -06:00
efischer	5246cc4a0c	Re-implement command line options	2022-02-27 15:35:07 -06:00
efischer	a5f7c0641d	Refactor for better encapsulation with CellSamples	2022-02-27 14:51:53 -06:00
efischer	8ebfc1469f	Refactor plate to fill its own wells in its constructor	2022-02-27 14:25:53 -06:00
efischer	b53f5f1cc0	Refactor plate to fill its own wells in its constructor	2022-02-27 14:17:16 -06:00
efischer	974d2d650c	Refactor plate to fill its own wells in its constructor	2022-02-27 14:17:11 -06:00
efischer	6b5837e6ce	Add Vose's alias method to to-dos	2022-02-27 11:46:11 -06:00
efischer	b4cc240048	Update Readme	2022-02-26 11:03:31 -06:00
efischer	ff72c9b359	Update Readme	2022-02-26 11:02:23 -06:00
efischer	88eb8aca50	Update Readme	2022-02-26 11:01:44 -06:00
efischer	98bf452891	Update Readme	2022-02-26 11:01:20 -06:00
efischer	c2db4f87c1	Update Readme	2022-02-26 11:00:18 -06:00
efischer	8935407ade	Get rid of GraphML reader, those files are larger than serialized files	2022-02-26 10:38:10 -06:00
efischer	9fcc20343d	Fix GraphML writer	2022-02-26 10:36:00 -06:00
efischer	e4d094d796	Adding GraphML output to options menu	2022-02-24 17:22:07 -06:00
efischer	f385ebc31f	Update vertex class	2022-02-24 16:25:01 -06:00
efischer	8745550e11	add MWM algorithm type to matching metadata	2022-02-24 16:24:48 -06:00
efischer	41805135b3	remove unused import	2022-02-24 16:04:30 -06:00
efischer	373a5e02f9	Refactor to make CellSample class more self-contained	2022-02-24 16:03:49 -06:00
efischer	7f18311054	fix typos	2022-02-24 15:55:32 -06:00
efischer	bcb816c3e6	Reformat TODO	2022-02-24 15:48:10 -06:00
efischer	dad0fd35fd	Update readme to reflect wells with random population implemented	2022-02-24 15:47:08 -06:00
efischer	35d580cfcf	Update readme to reflect wells with random population implemented	2022-02-24 15:45:03 -06:00
efischer	ab8d98ed81	Update readme to reflect new default caching behavior.	2022-02-24 15:39:15 -06:00
efischer	3d9890e16a	Change GraphModificationFunctions to only save edges if graph data is cached	2022-02-24 15:32:27 -06:00
efischer	dd64ac2731	Change GraphModificationFunctions to interface	2022-02-24 15:18:09 -06:00
efischer	a5238624f1	Change default graph caching behavior to false	2022-02-24 15:14:28 -06:00
efischer	d8ba42b801	Fix Algorithm Options menu output	2022-02-24 14:59:08 -06:00
efischer	8edd89d784	Added heap type selection, fixed error handling	2022-02-24 14:48:19 -06:00
efischer	2829b88689	Update readme to reflect caching changes	2022-02-24 12:47:26 -06:00
efischer	108b0ec13f	Improve options menu wording	2022-02-24 12:42:09 -06:00
efischer	a8b58d3f79	Output new setting when changing options	2022-02-24 12:38:15 -06:00
efischer	bf64d57731	implement option menu for file caching	2022-02-24 12:30:47 -06:00
efischer	c068c3db3c	implement option menu for file caching	2022-02-23 20:35:31 -06:00
efischer	4bcda9b66c	update readme	2022-02-23 13:22:04 -06:00
efischer	17ae763c6c	Generate populations correctly	2022-02-23 10:37:40 -06:00
efischer	decdb147a9	Cache everything	2022-02-23 10:30:42 -06:00
efischer	74ffbfd8ac	make everything use same random number generator	2022-02-23 09:29:21 -06:00
efischer	08699ce8ce	Change output order to match interactive UI	2022-02-23 08:56:09 -06:00
efischer	69b0cc535c	Error checking	2022-02-23 08:55:07 -06:00
efischer	e58f7b0a55	checking for possible divide by zero error.	2022-02-23 08:54:14 -06:00
efischer	dd2164c250	implement sample plates with random well populations	2022-02-23 08:14:17 -06:00
efischer	7323093bdc	change "getRandomNumber" to "getRandomInt" for consistency.	2022-02-23 08:13:52 -06:00
efischer	f904cf6672	add more data caching code	2022-02-23 08:13:06 -06:00
efischer	3ccee9891b	change "concentrations" to "populations" for consistency	2022-02-23 08:12:48 -06:00
efischer	40c2be1cfb	create populations string correctly	2022-02-23 08:11:01 -06:00
efischer	4b597c4e5e	remove old testing code	2022-02-23 08:10:35 -06:00
efischer	b2398531a3	Update readme	2022-02-23 05:11:36 +00:00
efischer	8e9a250890	Cache graph data on creation	2022-02-22 22:23:55 -06:00
efischer	e2a996c997	update readme	2022-02-22 22:23:40 -06:00
efischer	a5db89cb0b	update readme	2022-02-22 22:13:01 -06:00
efischer	1630f9ccba	Moved I/O alert to file reader	2022-02-22 22:11:50 -06:00
efischer	d785aa0da2	Moved I/O alert to file reader	2022-02-22 22:10:31 -06:00
efischer	a7afeb6119	bugfixes	2022-02-22 22:10:09 -06:00
efischer	f8167b0774	Add .jar manifest to repo	2022-02-22 21:45:46 -06:00
efischer	68ee9e4bb6	Implemented storing graphs in memory for multiple pairing experiments	2022-02-22 21:30:00 -06:00
efischer	fd2ec76b71	Realized how to store graph in memory	2022-02-22 19:42:35 -06:00
efischer	875f457a2d	reimplement CLI (in progress)	2022-02-22 19:42:23 -06:00
efischer	906c06062f	Added metadata to MatchingResult to enable CLI options	2022-02-22 18:36:30 -06:00
efischer	90ae2ff474	Re-implemeting CLI options (in progress)	2022-02-22 17:37:00 -06:00
efischer	7d983076f3	Add link to releases page for download	2022-02-22 16:34:24 -06:00
efischer	4b053e6ec4	Remove artifacts from tracking to stop repo bloat.	2022-02-22 16:14:50 -06:00
efischer	44784b7976	Remove artifacts from tracking to stop repo bloat.	2022-02-22 16:10:22 -06:00
efischer	7c19896dc9	update readme	2022-02-22 16:09:50 -06:00
efischer	aec7e3016f	Typos in documentation	2022-02-21 11:19:54 -06:00
efischer	5c75c1ac09	Update readme.md	2022-02-21 06:53:30 +00:00
efischer	cb1f7adece	Change "peptide" references in code to "sequence", adding comments	2022-02-21 00:29:34 -06:00
efischer	370de79546	Add performance section to readme	2022-02-21 00:02:49 -06:00
efischer	a803336f56	Add performance section to readme	2022-02-21 00:01:20 -06:00
efischer	94b54b3416	Add performance section to readme	2022-02-20 23:31:25 -06:00
efischer	601e141fd0	Update readme	2022-02-20 22:51:49 -06:00
efischer	8f9c6b7d33	Update readme TODO	2022-02-20 20:59:05 -06:00
efischer	e5ddc73723	Finish reverting back to wells-as-rows	2022-02-20 20:54:44 -06:00
efischer	9b18fac74f	Invoke garbage collection	2022-02-20 20:47:12 -06:00
efischer	63ef6aa7a0	Revert attempt to switch plate output format. It worked, but introduced a bug in graph filtering I don't want to chase down	2022-02-20 20:45:35 -06:00
efischer	7558455f39	Correct errors in output and documentation	2022-02-20 20:13:38 -06:00
efischer	410f0ae547	Remove testing code, add garbage collection calls	2022-02-20 20:06:45 -06:00
efischer	1bc6a11545	Change plate reader/writer to use columns as wells	2022-02-20 19:58:24 -06:00
efischer	2b13e10e95	Change plate reader/writer to use columns as wells	2022-02-20 19:48:09 -06:00
efischer	4fd5baeb98	Change plate reader/writer to use columns as wells	2022-02-20 19:41:06 -06:00
efischer	b2a4e9a42b	Change plate reader/writer to use columns as wells	2022-02-20 19:17:56 -06:00
efischer	d1bb49b482	Change plate reader/writer to use columns as wells	2022-02-20 19:12:11 -06:00
efischer	9adb7dffb8	Change plate reader/writer to use columns as wells	2022-02-20 19:08:04 -06:00
efischer	2023bb9d7e	Cleanup file output, add UI verbosity	2022-02-20 18:31:31 -06:00
efischer	405fbf17ff	improve documentation	2022-02-20 17:11:39 -06:00
efischer	24519f4a52	improve documentation	2022-02-20 17:04:25 -06:00
efischer	2afd01eeef	improve documentation	2022-02-20 15:48:11 -06:00
efischer	10d0b711bf	improve documentation	2022-02-20 15:38:40 -06:00
efischer	8f98baf44e	improve documentation	2022-02-20 15:37:39 -06:00
efischer	d6c7c40c96	improve documentation	2022-02-20 13:23:15 -06:00
efischer	61c14b2ecf	improve documentation	2022-02-20 13:20:47 -06:00
efischer	22fc4aedfe	improve documentation	2022-02-20 13:18:49 -06:00
efischer	5d24dc6f70	improve documentation	2022-02-20 13:15:32 -06:00
efischer	2c01a0211c	move readme	2022-02-20 12:02:27 -06:00
efischer	f2b5d9e1b7	Rename and update readme	2022-02-20 11:58:12 -06:00
efischer	74c8cafd81	scan for filename	2022-02-20 03:08:31 -06:00
efischer	d1c37b5ccd	Relocate overlap threshold filters	2022-02-20 03:05:56 -06:00
efischer	cb2c5a6024	Add plate well concentrations to output data	2022-02-20 02:29:42 -06:00
efischer	284a5b3a40	Add plate well concentrations to output data	2022-02-20 02:23:31 -06:00
efischer	52afb1edc2	Add plate well concentrations to output data	2022-02-20 02:17:36 -06:00
efischer	9c52bc878a	Add plate well concentrations to output data	2022-02-20 02:13:13 -06:00
efischer	248fe4d662	Add plate well concentrations to output data	2022-02-20 02:09:22 -06:00
efischer	5d0e60708c	Add plate well concentrations to output data	2022-02-20 01:53:34 -06:00
efischer	c96b7237e9	Add plate well concentrations to output data	2022-02-20 01:40:01 -06:00
efischer	0b28259800	Add plate well concentrations to output data	2022-02-20 01:13:22 -06:00
efischer	837ef7bfe4	UI cleanup, some code cleanup	2022-02-20 01:05:28 -06:00
efischer	0bebbc7602	Add missing filtering code	2022-02-19 22:56:38 -06:00
efischer	84f7ddb696	Fix interactive output	2022-02-19 22:49:50 -06:00
efischer	c4633da9eb	Correct propogation of peptide counts	2022-02-19 22:33:38 -06:00
efischer	5b2ed165d0	Clean up interactive text, bugfix	2022-02-19 22:21:09 -06:00
efischer	0026d8cdfe	Use buffered input/output streams	2022-02-19 22:04:41 -06:00
efischer	13fb7168bf	Refactor to read/write files of graph and map data	2022-02-19 21:46:01 -06:00
efischer	568a6be3c7	Refactoring to allow graphs from file	2022-02-19 17:23:55 -06:00
efischer	cfa473c7ce	Adding parameters to filter by occupancy difference and percent overlap	2022-02-19 14:06:11 -06:00
efischer	6faacd9a82	Adding parameters to filter by occupancy difference and percent overlap	2022-02-19 14:05:26 -06:00
efischer	ce88e170c1	Update readme with max memory flag	2022-02-18 17:48:25 -06:00
efischer	47e23addfa	Do new filtering before matching	2022-02-18 17:42:05 -06:00
efischer	b9ee31b64c	Do new filtering before matching	2022-02-18 17:28:24 -06:00
efischer	bf32a55e4b	Implement matching using jheaps PairingHeap	2022-02-18 16:09:23 -06:00
efischer	acff88475b	Command line arguments working, need better documentation and error handling	2021-11-23 12:24:48 -06:00
efischer	32593308df	Command line arguments for CDR3 matching implemented	2021-11-20 10:43:57 -06:00
efischer	981e24011d	First attempt at command line arguments	2021-11-19 17:31:18 -06:00
efischer	3d0a843cea	Adding command line argument parsing library	2021-11-18 16:19:11 -06:00
efischer	c09ef27822	Adding command line argument parsing library	2021-11-18 16:15:05 -06:00
efischer	2ab93dd4b7	Recording source file names in output files, allowing output of intermediate results	2021-11-18 15:38:29 -06:00
efischer	09aa5961f3	New executable build after code refactor.	2021-11-18 14:16:42 -06:00
efischer	34e96d3b3d	Refactor to reduce code repetition	2021-11-18 14:11:04 -06:00
efischer	2064d7e9fc	Implemented parameter for CDR1 frequency	2021-11-18 11:55:54 -06:00
efischer	4157cfb556	Usage instructions for executable .jar	2021-11-16 21:27:54 -06:00
		`@@ -0,0 +1,3 @@`
							`Manifest-Version: 1.0`
							`Main-Class: BiGpairSEQ`