Compare commits
174 Commits
cb6330166f
...
v3.0
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e4e5a1f979 | ||
|
|
73c83bf35d | ||
|
|
06e72314b0 | ||
|
|
63317f2aa0 | ||
|
|
a054c0c20a | ||
|
|
29b844afd2 | ||
|
|
3ba305abdb | ||
|
|
3707923398 | ||
|
|
cf771ce574 | ||
| f980722b56 | |||
| 1df86f01df | |||
| 96ba57d653 | |||
| b602fb02f1 | |||
| 325e1ebe2b | |||
| df047267ee | |||
| 03e8d31210 | |||
| 582dc3ef40 | |||
| 4c872ed48e | |||
| 3fc39302c7 | |||
| 578bdc0fbf | |||
| 8275cf7740 | |||
| 64209691f0 | |||
| 1886800873 | |||
| bedf0894bc | |||
| 2ac3451842 | |||
| 67ec3f3764 | |||
| b5a8b7e2d5 | |||
| 9fb3095f0f | |||
| 25acf920c2 | |||
| f301327693 | |||
| e04d2d6777 | |||
| 3e41afaa64 | |||
| bc5d67680d | |||
| f2347e8fc2 | |||
| c8364d8a6e | |||
| 6f5afbc6ec | |||
| fb4d22e7a4 | |||
| e10350c214 | |||
| b1155f8100 | |||
| 12b003a69f | |||
| 32c5bcaaff | |||
| 2485ac4cf6 | |||
| 05556bce0c | |||
| a822f69ea4 | |||
| 3d1f8668ee | |||
| 40c743308b | |||
| 5246cc4a0c | |||
| a5f7c0641d | |||
| 8ebfc1469f | |||
| b53f5f1cc0 | |||
| 974d2d650c | |||
| 6b5837e6ce | |||
| b4cc240048 | |||
| ff72c9b359 | |||
| 88eb8aca50 | |||
| 98bf452891 | |||
| c2db4f87c1 | |||
| 8935407ade | |||
| 9fcc20343d | |||
| e4d094d796 | |||
| f385ebc31f | |||
| 8745550e11 | |||
| 41805135b3 | |||
| 373a5e02f9 | |||
| 7f18311054 | |||
| bcb816c3e6 | |||
| dad0fd35fd | |||
| 35d580cfcf | |||
| ab8d98ed81 | |||
| 3d9890e16a | |||
| dd64ac2731 | |||
| a5238624f1 | |||
| d8ba42b801 | |||
| 8edd89d784 | |||
| 2829b88689 | |||
| 108b0ec13f | |||
| a8b58d3f79 | |||
| bf64d57731 | |||
| c068c3db3c | |||
| 4bcda9b66c | |||
| 17ae763c6c | |||
| decdb147a9 | |||
| 74ffbfd8ac | |||
| 08699ce8ce | |||
| 69b0cc535c | |||
| e58f7b0a55 | |||
| dd2164c250 | |||
| 7323093bdc | |||
| f904cf6672 | |||
| 3ccee9891b | |||
| 40c2be1cfb | |||
| 4b597c4e5e | |||
| b2398531a3 | |||
| 8e9a250890 | |||
| e2a996c997 | |||
| a5db89cb0b | |||
| 1630f9ccba | |||
| d785aa0da2 | |||
| a7afeb6119 | |||
| f8167b0774 | |||
| 68ee9e4bb6 | |||
| fd2ec76b71 | |||
| 875f457a2d | |||
| 906c06062f | |||
| 90ae2ff474 | |||
| 7d983076f3 | |||
| 4b053e6ec4 | |||
| 44784b7976 | |||
| 7c19896dc9 | |||
| aec7e3016f | |||
| 5c75c1ac09 | |||
| cb1f7adece | |||
| 370de79546 | |||
| a803336f56 | |||
| 94b54b3416 | |||
| 601e141fd0 | |||
| 8f9c6b7d33 | |||
| e5ddc73723 | |||
| 9b18fac74f | |||
| 63ef6aa7a0 | |||
| 7558455f39 | |||
| 410f0ae547 | |||
| 1bc6a11545 | |||
| 2b13e10e95 | |||
| 4fd5baeb98 | |||
| b2a4e9a42b | |||
| d1bb49b482 | |||
| 9adb7dffb8 | |||
| 2023bb9d7e | |||
| 405fbf17ff | |||
| 24519f4a52 | |||
| 2afd01eeef | |||
| 10d0b711bf | |||
| 8f98baf44e | |||
| d6c7c40c96 | |||
| 61c14b2ecf | |||
| 22fc4aedfe | |||
| 5d24dc6f70 | |||
| 2c01a0211c | |||
| f2b5d9e1b7 | |||
| 74c8cafd81 | |||
| d1c37b5ccd | |||
| cb2c5a6024 | |||
| 284a5b3a40 | |||
| 52afb1edc2 | |||
| 9c52bc878a | |||
| 248fe4d662 | |||
| 5d0e60708c | |||
| c96b7237e9 | |||
| 0b28259800 | |||
| 837ef7bfe4 | |||
| 0bebbc7602 | |||
| 84f7ddb696 | |||
| c4633da9eb | |||
| 5b2ed165d0 | |||
| 0026d8cdfe | |||
| 13fb7168bf | |||
| 568a6be3c7 | |||
| cfa473c7ce | |||
| 6faacd9a82 | |||
| ce88e170c1 | |||
| 47e23addfa | |||
| b9ee31b64c | |||
| bf32a55e4b | |||
| acff88475b | |||
| 32593308df | |||
| 981e24011d | |||
| 3d0a843cea | |||
| c09ef27822 | |||
| 2ab93dd4b7 | |||
| 09aa5961f3 | |||
| 34e96d3b3d | |||
| 2064d7e9fc | |||
| 4157cfb556 |
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
@@ -0,0 +1 @@
|
||||
/out/
|
||||
@@ -1,15 +1,16 @@
|
||||
<component name="ArtifactManager">
|
||||
<artifact type="jar" name="TCellSim:jar">
|
||||
<output-path>$PROJECT_DIR$/out/artifacts/TCellSim_jar</output-path>
|
||||
<root id="archive" name="TCellSim.jar">
|
||||
<artifact type="jar" build-on-make="true" name="BiGpairSEQ_Sim:jar">
|
||||
<output-path>$PROJECT_DIR$/out/artifacts/BiGpairSEQ_Sim_jar</output-path>
|
||||
<root id="archive" name="BiGpairSEQ_Sim.jar">
|
||||
<element id="directory" name="META-INF">
|
||||
<element id="file-copy" path="$PROJECT_DIR$/src/main/java/META-INF/MANIFEST.MF" />
|
||||
</element>
|
||||
<element id="module-output" name="TCellSim" />
|
||||
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/apache/commons/commons-csv/1.9.0/commons-csv-1.9.0.jar" path-in-jar="/" />
|
||||
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/jetbrains/annotations/23.0.0/annotations-23.0.0.jar" path-in-jar="/" />
|
||||
<element id="module-output" name="BigPairSEQ" />
|
||||
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/jgrapht/jgrapht-core/1.5.1/jgrapht-core-1.5.1.jar" path-in-jar="/" />
|
||||
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/jheaps/jheaps/0.13/jheaps-0.13.jar" path-in-jar="/" />
|
||||
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/commons-cli/commons-cli/1.5.0/commons-cli-1.5.0.jar" path-in-jar="/" />
|
||||
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/apache/commons/commons-csv/1.9.0/commons-csv-1.9.0.jar" path-in-jar="/" />
|
||||
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/jetbrains/annotations/23.0.0/annotations-23.0.0.jar" path-in-jar="/" />
|
||||
</root>
|
||||
</artifact>
|
||||
</component>
|
||||
2
.idea/compiler.xml
generated
2
.idea/compiler.xml
generated
@@ -6,7 +6,7 @@
|
||||
<sourceOutputDir name="target/generated-sources/annotations" />
|
||||
<sourceTestOutputDir name="target/generated-test-sources/test-annotations" />
|
||||
<outputRelativeToContentRoot value="true" />
|
||||
<module name="TCellSim" />
|
||||
<module name="BigPairSEQ" />
|
||||
</profile>
|
||||
</annotationProcessing>
|
||||
</component>
|
||||
|
||||
10
.idea/libraries/commons_cli.xml
generated
Normal file
10
.idea/libraries/commons_cli.xml
generated
Normal file
@@ -0,0 +1,10 @@
|
||||
<component name="libraryTable">
|
||||
<library name="commons.cli" type="repository">
|
||||
<properties maven-id="commons-cli:commons-cli:1.5.0" />
|
||||
<CLASSES>
|
||||
<root url="jar://$MAVEN_REPOSITORY$/commons-cli/commons-cli/1.5.0/commons-cli-1.5.0.jar!/" />
|
||||
</CLASSES>
|
||||
<JAVADOC />
|
||||
<SOURCES />
|
||||
</library>
|
||||
</component>
|
||||
15
.idea/libraries/jgrapht_io.xml
generated
Normal file
15
.idea/libraries/jgrapht_io.xml
generated
Normal file
@@ -0,0 +1,15 @@
|
||||
<component name="libraryTable">
|
||||
<library name="jgrapht.io" type="repository">
|
||||
<properties maven-id="org.jgrapht:jgrapht-io:1.5.1" />
|
||||
<CLASSES>
|
||||
<root url="jar://$MAVEN_REPOSITORY$/org/jgrapht/jgrapht-io/1.5.1/jgrapht-io-1.5.1.jar!/" />
|
||||
<root url="jar://$MAVEN_REPOSITORY$/org/jgrapht/jgrapht-core/1.5.1/jgrapht-core-1.5.1.jar!/" />
|
||||
<root url="jar://$MAVEN_REPOSITORY$/org/jheaps/jheaps/0.13/jheaps-0.13.jar!/" />
|
||||
<root url="jar://$MAVEN_REPOSITORY$/org/antlr/antlr4-runtime/4.8-1/antlr4-runtime-4.8-1.jar!/" />
|
||||
<root url="jar://$MAVEN_REPOSITORY$/org/apache/commons/commons-text/1.8/commons-text-1.8.jar!/" />
|
||||
<root url="jar://$MAVEN_REPOSITORY$/org/apache/commons/commons-lang3/3.9/commons-lang3-3.9.jar!/" />
|
||||
</CLASSES>
|
||||
<JAVADOC />
|
||||
<SOURCES />
|
||||
</library>
|
||||
</component>
|
||||
10
.idea/libraries/jheaps.xml
generated
Normal file
10
.idea/libraries/jheaps.xml
generated
Normal file
@@ -0,0 +1,10 @@
|
||||
<component name="libraryTable">
|
||||
<library name="jheaps" type="repository">
|
||||
<properties maven-id="org.jheaps:jheaps:0.14" />
|
||||
<CLASSES>
|
||||
<root url="jar://$MAVEN_REPOSITORY$/org/jheaps/jheaps/0.14/jheaps-0.14.jar!/" />
|
||||
</CLASSES>
|
||||
<JAVADOC />
|
||||
<SOURCES />
|
||||
</library>
|
||||
</component>
|
||||
Binary file not shown.
384
readme.md
Normal file
384
readme.md
Normal file
@@ -0,0 +1,384 @@
|
||||
# BiGpairSEQ SIMULATOR
|
||||
|
||||
|
||||
## ABOUT
|
||||
|
||||
This program simulates BiGpairSEQ (Bipartite Graph pairSEQ), a graph theory-based adaptation
|
||||
of the pairSEQ algorithm (Howie, et al. 2015) for pairing T cell receptor sequences.
|
||||
|
||||
## THEORY
|
||||
|
||||
Unlike pairSEQ, which calculates p-values for every TCR alpha/beta overlap and compares
|
||||
against a null distribution, BiGpairSEQ does not do any statistical calculations
|
||||
directly.
|
||||
|
||||
BiGpairSEQ creates a [weighted bipartite graph](https://en.wikipedia.org/wiki/Bipartite_graph) representing the sample plate.
|
||||
The distinct TCRA and TCRB sequences form the two sets of vertices. Every TCRA/TCRB pair that share a well
|
||||
are connected by an edge, with the edge weight set to the number of wells in which both sequences appear.
|
||||
(Sequences present in *all* wells are filtered out prior to creating the graph, as there is no signal in their occupancy pattern.)
|
||||
The problem of pairing TCRA/TCRB sequences thus reduces to the "assignment problem" of finding a maximum weight
|
||||
matching on a bipartite graph--the subset of vertex-disjoint edges whose weights sum to the maximum possible value.
|
||||
|
||||
This is a well-studied combinatorial optimization problem, with many known solutions.
|
||||
The most efficient algorithm known to the author for maximum weight matching of a bipartite graph with strictly integral
|
||||
weights is from Duan and Su (2012). For a graph with m edges, n vertices per side, and maximum integer edge weight N,
|
||||
their algorithm runs in **O(m sqrt(n) log(N))** time. As the graph representation of a pairSEQ experiment is
|
||||
bipartite with integer weights, this algorithm is ideal for BiGpairSEQ.
|
||||
|
||||
Unfortunately, it's a fairly new algorithm, and not yet implemented by the graph theory library used in this simulator.
|
||||
So this program instead uses the Fibonacci heap-based algorithm of Fredman and Tarjan (1987), which has a worst-case
|
||||
runtime of **O(n (n log(n) + m))**. The algorithm is implemented as described in Melhorn and Näher (1999).
|
||||
|
||||
## USAGE
|
||||
|
||||
### RUNNING THE PROGRAM
|
||||
|
||||
[Download the current version of BiGpairSEQ_Sim.](https://gitea.ejsf.synology.me/efischer/BiGpairSEQ/releases)
|
||||
|
||||
BiGpairSEQ_Sim is an executable .jar file. Requires Java 14 or higher. [OpenJDK 17](https://jdk.java.net/17/)
|
||||
recommended.
|
||||
|
||||
Run with the command:
|
||||
|
||||
`java -jar BiGpairSEQ_Sim.jar`
|
||||
|
||||
Processing sample plates with tens of thousands of sequences may require large amounts
|
||||
of RAM. It is often desirable to increase the JVM maximum heap allocation with the `-Xmx` flag.
|
||||
For example, to run the program with 32 gigabytes of memory, use the command:
|
||||
|
||||
`java -Xmx32G -jar BiGpairSEQ_Sim.jar`
|
||||
|
||||
There are a number of command line options, to allow the program to be used in shell scripts. For a full list,
|
||||
use the `-help` flag:
|
||||
|
||||
`java -jar BiGpairSEQ_Sim.jar -help`
|
||||
|
||||
If no command line arguments are given, BiGpairSEQ_Sim will launch with an interactive, menu-driven CLI for
|
||||
generating files and simulating TCR pairing. The main menu looks like this:
|
||||
|
||||
```
|
||||
--------BiGPairSEQ SIMULATOR--------
|
||||
ALPHA/BETA T CELL RECEPTOR MATCHING
|
||||
USING WEIGHTED BIPARTITE GRAPHS
|
||||
------------------------------------
|
||||
Please select an option:
|
||||
1) Generate a population of distinct cells
|
||||
2) Generate a sample plate of T cells
|
||||
3) Generate CDR3 alpha/beta occupancy data and overlap graph
|
||||
4) Simulate bipartite graph CDR3 alpha/beta matching (BiGpairSEQ)
|
||||
8) Options
|
||||
9) About/Acknowledgments
|
||||
0) Exit
|
||||
```
|
||||
|
||||
By default, the Options menu looks like this:
|
||||
```
|
||||
--------------OPTIONS---------------
|
||||
1) Turn on cell sample file caching
|
||||
2) Turn on plate file caching
|
||||
3) Turn on graph/data file caching
|
||||
4) Turn off serialized binary graph output
|
||||
5) Turn on GraphML graph output
|
||||
6) Maximum weight matching algorithm options
|
||||
0) Return to main menu
|
||||
```
|
||||
|
||||
|
||||
### INPUT/OUTPUT
|
||||
|
||||
To run the simulation, the program reads and writes 4 kinds of files:
|
||||
* Cell Sample files in CSV format
|
||||
* Sample Plate files in CSV format
|
||||
* Graph/Data files in binary object serialization format
|
||||
* Matching Results files in CSV format
|
||||
|
||||
These files are often generated in sequence. When entering filenames, it is not necessary to include the file extension
|
||||
(.csv or .ser). When reading or writing files, the program will automatically add the correct extension to any filename
|
||||
without one.
|
||||
|
||||
To save file I/O time, the most recent instance of each of these four
|
||||
files either generated or read from disk can be cached in program memory. When caching is active, subsequent uses of the
|
||||
same data file won't need to be read in again until another file of that type is used or generated,
|
||||
or caching is turned off for that file type. The program checks whether it needs to update its cached data by comparing
|
||||
filenames as entered by the user. On encountering a new filename, the program flushes its cache and reads in the new file.
|
||||
|
||||
(Note that cached Graph/Data files must be transformed back into their original state after a matching experiment, which
|
||||
may take some time. Whether file I/O or graph transformation takes longer for graph/data files is likely to be
|
||||
device-specific.)
|
||||
|
||||
The program's caching behavior can be controlled in the Options menu. By default, all caching is OFF.
|
||||
|
||||
The program can optionally output Graph/Data files in GraphML format (.graphml) for data portability. This can be
|
||||
turned on in the Options menu. By default, GraphML output is OFF.
|
||||
|
||||
---
|
||||
#### Cell Sample Files
|
||||
Cell Sample files consist of any number of distinct "T cells." Every cell contains
|
||||
four sequences: Alpha CDR3, Beta CDR3, Alpha CDR1, Beta CDR1. The sequences are represented by
|
||||
random integers. CDR3 Alpha and Beta sequences are all unique within a given Cell Sample file. CDR1 Alpha and Beta sequences
|
||||
are not necessarily unique; the relative diversity can be set when making the file.
|
||||
|
||||
(Note: though cells still have CDR1 sequences, matching of CDR1s is currently awaiting re-implementation.)
|
||||
|
||||
Options when making a Cell Sample file:
|
||||
* Number of T cells to generate
|
||||
* Factor by which CDR3s are more diverse than CDR1s
|
||||
|
||||
Files are in CSV format. Rows are distinct T cells, columns are sequences within the cells.
|
||||
Comments are preceded by `#`
|
||||
|
||||
Structure:
|
||||
|
||||
# Sample contains 1 unique CDR1 for every 4 unique CDR3s.
|
||||
| Alpha CDR3 | Beta CDR3 | Alpha CDR1 | Beta CDR1 |
|
||||
|---|---|---|---|
|
||||
|unique number|unique number|number|number|
|
||||
|
||||
---
|
||||
|
||||
#### Sample Plate Files
|
||||
Sample Plate files consist of any number of "wells" containing any number of T cells (as
|
||||
described above). The wells are filled randomly from a Cell Sample file, according to a selected
|
||||
frequency distribution. Additionally, every individual sequence within each cell may, with some
|
||||
given dropout probability, be omitted from the file; this simulates the effect of amplification errors
|
||||
prior to sequencing. Plates can also be partitioned into any number of sections, each of which can have a
|
||||
different concentration of T cells per well.
|
||||
|
||||
Options when making a Sample Plate file:
|
||||
* Cell Sample file to use
|
||||
* Statistical distribution to apply to Cell Sample file
|
||||
* Poisson
|
||||
* Gaussian
|
||||
* Standard deviation size
|
||||
* Exponential
|
||||
* Lambda value
|
||||
* *(Based on the slope of the graph in Figure 4C of the pairSEQ paper, the distribution of the original experiment was approximately exponential with a lambda ~0.6. (Howie, et al. 2015))*
|
||||
* Total number of wells on the plate
|
||||
* Well populations random or fixed
|
||||
* If random, minimum and maximum population sizes
|
||||
* If fixed
|
||||
* Number of sections on plate
|
||||
* Number of T cells per well
|
||||
* per section, if more than one section
|
||||
* Dropout rate
|
||||
|
||||
Files are in CSV format. There are no header labels. Every row represents a well.
|
||||
Every value represents an individual cell, containing four sequences, depicted as an array string:
|
||||
`[CDR3A, CDR3B, CDR1A, CDR1B]`. So a representative cell might look like this:
|
||||
|
||||
`[525902, 791533, -1, 866282]`
|
||||
|
||||
Notice that the CDR1 Alpha is missing in the cell above--sequence dropout from simulated amplification error.
|
||||
Dropout sequences are replaced with the value `-1`. Comments are preceded by `#`
|
||||
|
||||
Structure:
|
||||
|
||||
```
|
||||
# Cell source file name:
|
||||
# Each row represents one well on the plate
|
||||
# Plate size:
|
||||
# Concentrations:
|
||||
# Lambda -or- StdDev:
|
||||
```
|
||||
| Well 1, cell 1 | Well 1, cell 2 | Well 1, cell 3| ... |
|
||||
|---|---|---|---|
|
||||
| **Well 2, cell 1** | **Well 2, cell 2** | **Well 2, cell 3**| **...** |
|
||||
| **Well 3, cell 1** | **Well 3, cell 2** | **Well 3, cell 3**| **...** |
|
||||
| **...** | **...** | **...** | **...** |
|
||||
|
||||
---
|
||||
|
||||
#### Graph/Data Files
|
||||
Graph/Data files are serialized binaries of a Java object containing the weigthed bipartite graph representation of a
|
||||
Sample Plate, along with the necessary metadata for matching and results output. Making them requires a Cell Sample file
|
||||
(to construct a list of correct sequence pairs for checking the accuracy of BiGpairSEQ simulations) and a
|
||||
Sample Plate file (to construct the associated occupancy graph).
|
||||
|
||||
These files can be several gigabytes in size. Writing them to a file lets us generate a graph and its metadata once,
|
||||
then use it for multiple different BiGpairSEQ simulations.
|
||||
|
||||
Options for creating a Graph/Data file:
|
||||
* The Cell Sample file to use
|
||||
* The Sample Plate file to use. (This must have been generated from the selected Cell Sample file.)
|
||||
|
||||
These files do not have a human-readable structure, and are not portable to other programs.
|
||||
|
||||
*Optional GraphML output*
|
||||
|
||||
For portability of graph data to other software, turn on [GraphML](http://graphml.graphdrawing.org/index.html) output
|
||||
in the Options menu in interactive mode, or use the `-graphml`command line argument. This will produce a .graphml file
|
||||
for the weighted graph, with vertex attributes for sequence, type, and occupancy data. This graph contains all the data
|
||||
necessary for the BiGpairSEQ matching algorithm. It does not include the data to measure pairing accuracy; for that,
|
||||
compare the matching results to the original Cell Sample .csv file.
|
||||
|
||||
---
|
||||
|
||||
#### Matching Results Files
|
||||
Matching results files consist of the results of a BiGpairSEQ matching simulation. Making them requires a serialized
|
||||
binary Graph/Data file (.ser). (Because .graphML files are larger than .ser files, BiGpairSEQ_Sim supports .graphML
|
||||
output only. Graph/data input must use a serialized binary.)
|
||||
|
||||
Matching results files are in CSV format. Rows are sequence pairings with extra relevant data. Columns are pairing-specific details.
|
||||
Metadata about the matching simulation is included as comments. Comments are preceded by `#`.
|
||||
|
||||
Options when running a BiGpairSEQ simulation of CDR3 alpha/beta matching:
|
||||
* The minimum number of alpha/beta overlap wells to attempt to match
|
||||
* (must be >= 1)
|
||||
* The maximum number of alpha/beta overlap wells to attempt to match
|
||||
* (must be <= the number of wells on the plate - 1)
|
||||
* The maximum difference in alpha/beta occupancy to attempt to match
|
||||
* (Optional. To skip using this filter, enter a value >= the number of wells on the plate)
|
||||
* The minimum overlap percentage--the percentage of a sequence's occupied wells shared by another sequence--to attempt to match. Given as value in range 0 - 100.
|
||||
* (Optional. To skip using this filter, enter 0)
|
||||
|
||||
Example output:
|
||||
|
||||
```
|
||||
# Source Sample Plate file: 4MilCellsPlate.csv
|
||||
# Source Graph and Data file: 4MilCellsPlateGraph.ser
|
||||
# T cell counts in sample plate wells: 30000
|
||||
# Total alphas found: 11813
|
||||
# Total betas found: 11808
|
||||
# High overlap threshold: 94
|
||||
# Low overlap threshold: 3
|
||||
# Minimum overlap percent: 0
|
||||
# Maximum occupancy difference: 96
|
||||
# Pairing attempt rate: 0.438
|
||||
# Correct pairings: 5151
|
||||
# Incorrect pairings: 18
|
||||
# Pairing error rate: 0.00348
|
||||
# Simulation time: 862 seconds
|
||||
```
|
||||
|
||||
| Alpha | Alpha well count | Beta | Beta well count | Overlap count | Matched Correctly? | P-value |
|
||||
|---|---|---|---|---|---|---|
|
||||
|5242972|17|1571520|18|17|true|1.41E-18|
|
||||
|5161027|18|2072219|18|18|true|7.31E-20|
|
||||
|4145198|33|1064455|30|29|true|2.65E-21|
|
||||
|7700582|18|112748|18|18|true|7.31E-20|
|
||||
|...|...|...|...|...|...|...|
|
||||
|
||||
---
|
||||
|
||||
**NOTE: The p-values in the output are not used for matching**—they aren't part of the BiGpairSEQ algorithm at all.
|
||||
P-values are calculated *after* BiGpairSEQ matching is completed, for purposes of comparison only,
|
||||
using the (2021 corrected) formula from the original pairSEQ paper. (Howie, et al. 2015)
|
||||
|
||||
|
||||
## PERFORMANCE
|
||||
|
||||
On a home computer with a Ryzen 5600X CPU, 64GB of 3200MHz DDR4 RAM (half of which was allocated to the Java Virtual Machine), and a PCIe 3.0 SSD, running Linux Mint 20.3 Edge (5.13 kernel),
|
||||
the author ran a BiGpairSEQ simulation of a 96-well sample plate with 30,000 T cells/well comprising ~11,800 alphas and betas,
|
||||
taken from a sample of 4,000,000 distinct cells with an exponential frequency distribution (lambda 0.6).
|
||||
|
||||
With min/max occupancy threshold of 3 and 94 wells for matching, and no other pre-filtering, BiGpairSEQ identified 5,151
|
||||
correct pairings and 18 incorrect pairings, for an accuracy of 99.652%.
|
||||
|
||||
The total simulation time was 14'22". If intermediate results were held in memory, this would be equivalent to the total elapsed time.
|
||||
|
||||
Since this implementation of BiGpairSEQ writes intermediate results to disk (to improve the efficiency of *repeated* simulations
|
||||
with different filtering options), the actual elapsed time was greater. File I/O time was not measured, but took
|
||||
slightly less time than the simulation itself. Real elapsed time from start to finish was under 30 minutes.
|
||||
|
||||
As mentioned in the theory section, performance could be improved by implementing a more efficient algorithm for finding
|
||||
the maximum weighted matching.
|
||||
|
||||
## BEHAVIOR WITH RANDOMIZED WELL POPULATIONS
|
||||
|
||||
A series of BiGpairSEQ simulations were conducted using a cell sample file of 3.5 million unique T cells. From these cells,
|
||||
10 sample plate files were created. All of these sample plates had 96 wells, used an exponential distribution with a lambda of 0.6, and
|
||||
had a sequence dropout rate of 10%.
|
||||
|
||||
The well populations of the plates were:
|
||||
* One sample plate with 1000 T cells/well
|
||||
* One sample plate with 2000 T cells/well
|
||||
* One sample plate with 3000 T cells/well
|
||||
* One sample plate with 4000 T cells/well
|
||||
* One sample plate with 5000 T cells/well
|
||||
* Five sample plates with each individual well's population randomized, from 1000 to 5000 T cells. (Average population ~3000 T cells/well.)
|
||||
|
||||
All BiGpairSEQ simulations were run with a low overlap threshold of 3 and a high overlap threshold of 94.
|
||||
No optional filters were used, so pairing was attempted for all sequences with overlaps within the threshold values.
|
||||
|
||||
Constant well population plate results:
|
||||
|
||||
| |1000 Cell/Well Plate|2000 Cell/Well Plate|3000 Cell/Well Plate|4000 Cell/Well Plate|5000 Cell/Well Plate
|
||||
|---|---|---|---|---|---|
|
||||
|Total Alphas Found|6407|7330|7936|8278|8553|
|
||||
|Total Betas Found|6405|7333|7968|8269|8582|
|
||||
|Pairing Attempt Rate|0.661|0.653|0.600|0.579|0.559|
|
||||
|Correct Pairing Count|4231|4749|4723|4761|4750|
|
||||
|Incorrect Pairing Count|3|34|40|26|29|
|
||||
|Pairing Error Rate|0.000709|0.00711|0.00840|0.00543|0.00607|
|
||||
|Simulation Time (Seconds)|500|643|700|589|598|
|
||||
|
||||
Randomized well population plate results:
|
||||
|
||||
| |Random Plate 1 | Random Plate 2|Random Plate 3|Random Plate 4|Random Plate 5|Average|
|
||||
|---|---|---|---|---|---|---|
|
||||
Total Alphas Found|7853|7904|7964|7898|7917|7907|
|
||||
Total Betas Found|7851|7891|7920|7910|7894|7893|
|
||||
Pairing Attempt Rate|0.607|0.610|0.601|0.605|0.603|0.605|
|
||||
Correct Pairing Count|4718|4782|4721|4755|4731|4741|
|
||||
Incorrect Pairing Count|51|35|42|27|29|37|
|
||||
Pairing Error Rate|0.0107|0.00727|0.00882|0.00565|0.00609|0.00771|
|
||||
Simulation Time (Seconds)|590|677|730|618|615|646|
|
||||
|
||||
The average results for the randomized plates are closest to the constant plate with 3000 T cells/well.
|
||||
This and several other tests indicate that BiGpairSEQ treats a sample plate with a highly variable number of T cells/well
|
||||
roughly as though it had a constant well population equal to the plate's average well population.
|
||||
|
||||
## TODO
|
||||
|
||||
* ~~Try invoking GC at end of workloads to reduce paging to disk~~ DONE
|
||||
* ~~Hold graph data in memory until another graph is read-in? ABANDONED UNABANDONED~~ DONE
|
||||
* ~~*No, this won't work, because BiGpairSEQ simulations alter the underlying graph based on filtering constraints. Changes would cascade with multiple experiments.*~~
|
||||
* Might have figured out a way to do it, by taking edges out and then putting them back into the graph. This may actually be possible.
|
||||
* It is possible, though the modifications to the graph incur their own performance penalties. Need testing to see which option is best. It may be computer-specific.
|
||||
* ~~Test whether pairing heap (currently used) or Fibonacci heap is more efficient for priority queue in current matching algorithm~~ DONE
|
||||
* ~~in theory Fibonacci heap should be more efficient, but complexity overhead may eliminate theoretical advantage~~
|
||||
* ~~Add controllable heap-type parameter?~~
|
||||
* Parameter implemented. Fibonacci heap the current default.
|
||||
* ~~Implement sample plates with random numbers of T cells per well.~~ DONE
|
||||
* Possible BiGpairSEQ advantage over pairSEQ: BiGpairSEQ is resilient to variations in well population sizes on a sample plate; pairSEQ is not.
|
||||
* preliminary data suggests that BiGpairSEQ behaves roughly as though the whole plate had whatever the *average* well concentration is, but that's still speculative.
|
||||
* See if there's a reasonable way to reformat Sample Plate files so that wells are columns instead of rows.
|
||||
* ~~Problem is variable number of cells in a well~~
|
||||
* ~~Apache Commons CSV library writes entries a row at a time~~
|
||||
* _Got this working, but at the cost of a profoundly strange bug in graph occupancy filtering. Have reverted the repo until I can figure out what caused that. Given how easily Thingiverse transposes CSV matrices in R, might not even be worth fixing.
|
||||
* ~~Enable GraphML output in addition to serialized object binaries, for data portability~~ DONE
|
||||
* ~~Custom vertex type with attribute for sequence occupancy?~~ DONE
|
||||
* Advantage: would eliminate the need to use maps to associate vertices with sequences, which would make the code easier to understand.
|
||||
* ~~Have a branch where this is implemented, but there's a bug that broke matching. Don't currently have time to fix.~~
|
||||
* ~~Re-implement command line arguments, to enable scripting and statistical simulation studies~~ DONE
|
||||
* Re-implement CDR1 matching method
|
||||
* Implement Duan and Su's maximum weight matching algorithm
|
||||
* Add controllable algorithm-type parameter?
|
||||
* This would be fun and valuable, but probably take more time than I have for a hobby project.
|
||||
* Implement an algorithm for approximating a maximum weight matching
|
||||
* Some of these run in linear or near-linear time
|
||||
* given that the underlying biological samples have many, many sources of error, this would probably be the most useful option in practice. It seems less mathematically elegant, though, and so less fun for me.
|
||||
* Implement Vose's alias method for arbitrary statistical distributions of cells
|
||||
* Should probably refactor to use apache commons rng for this
|
||||
* Use commons JCS for caching
|
||||
* Enable post-filtering instead of pre-filtering. Pre-filtering of things like singleton sequences or saturating-occupancy sequences reduces graph size, but could conceivably reduce pairing accuracy by throwing away data. While these sequences have very little signal, it would be interesting to compare unfiltered results to filtered results. This would require a much, much faster MWM algorithm, though, to handle the much larger graphs. Possible one of the linear-time approximation algorithms.
|
||||
|
||||
|
||||
## CITATIONS
|
||||
* Howie, B., Sherwood, A. M., et al. ["High-throughput pairing of T cell receptor alpha and beta sequences."](https://pubmed.ncbi.nlm.nih.gov/26290413/) Sci. Transl. Med. 7, 301ra131 (2015)
|
||||
* Duan, R., Su H. ["A Scaling Algorithm for Maximum Weight Matching in Bipartite Graphs."](https://web.eecs.umich.edu/~pettie/matching/Duan-Su-scaling-bipartite-matching.pdf) Proceedings of the Twenty-Third Annual ACM-SIAM Symposium on Discrete Algorithms, p. 1413-1424. (2012)
|
||||
* Melhorn, K., Näher, St. [The LEDA Platform of Combinatorial and Geometric Computing.](https://people.mpi-inf.mpg.de/~mehlhorn/LEDAbook.html) Cambridge University Press. Chapter 7, Graph Algorithms; p. 132-162 (1999)
|
||||
* Fredman, M., Tarjan, R. ["Fibonacci heaps and their uses in improved network optimization algorithms."](https://www.cl.cam.ac.uk/teaching/1011/AlgorithII/1987-FredmanTar-fibonacci.pdf) J. ACM, 34(3):596–615 (1987))
|
||||
|
||||
## EXTERNAL LIBRARIES USED
|
||||
* [JGraphT](https://jgrapht.org) -- Graph theory data structures and algorithms
|
||||
* [JHeaps](https://www.jheaps.org) -- For pairing heap priority queue used in maximum weight matching algorithm
|
||||
* [Apache Commons CSV](https://commons.apache.org/proper/commons-csv/) -- For CSV file output
|
||||
* [Apache Commons CLI](https://commons.apache.org/proper/commons-cli/) -- To enable command line arguments for scripting.
|
||||
|
||||
## ACKNOWLEDGEMENTS
|
||||
BiGpairSEQ was conceived in collaboration with Dr. Alice MacQueen, who brought the original
|
||||
pairSEQ paper to the author's attention and explained all the biology terms he didn't know.
|
||||
|
||||
## AUTHOR
|
||||
BiGpairSEQ algorithm and simulation by Eugene Fischer, 2021. UI improvements and documentation, 2022.
|
||||
177
src/main/java/BiGpairSEQ.java
Normal file
177
src/main/java/BiGpairSEQ.java
Normal file
@@ -0,0 +1,177 @@
|
||||
import java.util.Random;
|
||||
|
||||
//main class. For choosing interface type and holding settings
|
||||
public class BiGpairSEQ {
|
||||
|
||||
private static final Random rand = new Random();
|
||||
private static CellSample cellSampleInMemory = null;
|
||||
private static String cellFilename = null;
|
||||
private static Plate plateInMemory = null;
|
||||
private static String plateFilename = null;
|
||||
private static GraphWithMapData graphInMemory = null;
|
||||
private static String graphFilename = null;
|
||||
private static boolean cacheCells = false;
|
||||
private static boolean cachePlate = false;
|
||||
private static boolean cacheGraph = false;
|
||||
private static String priorityQueueHeapType = "FIBONACCI";
|
||||
private static boolean outputBinary = true;
|
||||
private static boolean outputGraphML = false;
|
||||
private static final String version = "version 3.0";
|
||||
|
||||
public static void main(String[] args) {
|
||||
if (args.length == 0) {
|
||||
InteractiveInterface.startInteractive();
|
||||
}
|
||||
else {
|
||||
//This will be uncommented when command line arguments are re-implemented.
|
||||
CommandLineInterface.startCLI(args);
|
||||
//System.out.println("Command line arguments are still being re-implemented.");
|
||||
}
|
||||
}
|
||||
|
||||
public static Random getRand() {
|
||||
return rand;
|
||||
}
|
||||
|
||||
public static CellSample getCellSampleInMemory() {
|
||||
return cellSampleInMemory;
|
||||
}
|
||||
|
||||
public static void setCellSampleInMemory(CellSample cellSample, String filename) {
|
||||
if(cellSampleInMemory != null) {
|
||||
clearCellSampleInMemory();
|
||||
}
|
||||
cellSampleInMemory = cellSample;
|
||||
cellFilename = filename;
|
||||
System.out.println("Cell sample file " + filename + " cached.");
|
||||
}
|
||||
|
||||
public static void clearCellSampleInMemory() {
|
||||
cellSampleInMemory = null;
|
||||
cellFilename = null;
|
||||
System.gc();
|
||||
System.out.println("Cell sample file cache cleared.");
|
||||
|
||||
}
|
||||
|
||||
public static String getCellFilename() {
|
||||
return cellFilename;
|
||||
}
|
||||
|
||||
public static Plate getPlateInMemory() {
|
||||
return plateInMemory;
|
||||
}
|
||||
|
||||
public static void setPlateInMemory(Plate plate, String filename) {
|
||||
if(plateInMemory != null) {
|
||||
clearPlateInMemory();
|
||||
}
|
||||
plateInMemory = plate;
|
||||
plateFilename = filename;
|
||||
System.out.println("Sample plate file " + filename + " cached.");
|
||||
}
|
||||
|
||||
public static void clearPlateInMemory() {
|
||||
plateInMemory = null;
|
||||
plateFilename = null;
|
||||
System.gc();
|
||||
System.out.println("Sample plate file cache cleared.");
|
||||
|
||||
}
|
||||
|
||||
public static String getPlateFilename() {
|
||||
return plateFilename;
|
||||
}
|
||||
|
||||
|
||||
public static GraphWithMapData getGraphInMemory() {return graphInMemory;
|
||||
}
|
||||
|
||||
public static void setGraphInMemory(GraphWithMapData g, String filename) {
|
||||
if (graphInMemory != null) {
|
||||
clearGraphInMemory();
|
||||
}
|
||||
graphInMemory = g;
|
||||
graphFilename = filename;
|
||||
System.out.println("Graph and data file " + filename + " cached.");
|
||||
}
|
||||
|
||||
public static void clearGraphInMemory() {
|
||||
graphInMemory = null;
|
||||
graphFilename = null;
|
||||
System.gc();
|
||||
System.out.println("Graph and data file cache cleared.");
|
||||
}
|
||||
|
||||
public static String getGraphFilename() {
|
||||
return graphFilename;
|
||||
}
|
||||
|
||||
|
||||
public static boolean cacheCells() {
|
||||
return cacheCells;
|
||||
}
|
||||
|
||||
public static void setCacheCells(boolean cacheCells) {
|
||||
//if not caching, clear the memory
|
||||
if(!cacheCells){
|
||||
BiGpairSEQ.clearCellSampleInMemory();
|
||||
System.out.println("Cell sample file caching: OFF.");
|
||||
}
|
||||
else {
|
||||
System.out.println("Cell sample file caching: ON.");
|
||||
}
|
||||
BiGpairSEQ.cacheCells = cacheCells;
|
||||
}
|
||||
|
||||
public static boolean cachePlate() {
|
||||
return cachePlate;
|
||||
}
|
||||
|
||||
public static void setCachePlate(boolean cachePlate) {
|
||||
//if not caching, clear the memory
|
||||
if(!cachePlate) {
|
||||
BiGpairSEQ.clearPlateInMemory();
|
||||
System.out.println("Sample plate file caching: OFF.");
|
||||
}
|
||||
else {
|
||||
System.out.println("Sample plate file caching: ON.");
|
||||
}
|
||||
BiGpairSEQ.cachePlate = cachePlate;
|
||||
}
|
||||
|
||||
public static boolean cacheGraph() {
|
||||
return cacheGraph;
|
||||
}
|
||||
|
||||
public static void setCacheGraph(boolean cacheGraph) {
|
||||
//if not caching, clear the memory
|
||||
if(!cacheGraph) {
|
||||
BiGpairSEQ.clearGraphInMemory();
|
||||
System.out.println("Graph/data file caching: OFF.");
|
||||
}
|
||||
else {
|
||||
System.out.println("Graph/data file caching: ON.");
|
||||
}
|
||||
BiGpairSEQ.cacheGraph = cacheGraph;
|
||||
}
|
||||
|
||||
public static String getPriorityQueueHeapType() {
|
||||
return priorityQueueHeapType;
|
||||
}
|
||||
|
||||
public static void setPairingHeap() {
|
||||
priorityQueueHeapType = "PAIRING";
|
||||
}
|
||||
|
||||
public static void setFibonacciHeap() {
|
||||
priorityQueueHeapType = "FIBONACCI";
|
||||
}
|
||||
|
||||
public static boolean outputBinary() {return outputBinary;}
|
||||
public static void setOutputBinary(boolean b) {outputBinary = b;}
|
||||
|
||||
public static boolean outputGraphML() {return outputGraphML;}
|
||||
public static void setOutputGraphML(boolean b) {outputGraphML = b;}
|
||||
public static String getVersion() { return version; }
|
||||
}
|
||||
@@ -11,17 +11,20 @@ import java.util.List;
|
||||
|
||||
public class CellFileReader {
|
||||
|
||||
private String filename;
|
||||
private List<Integer[]> distinctCells = new ArrayList<>();
|
||||
private Integer cdr1Freq;
|
||||
|
||||
public CellFileReader(String filename) {
|
||||
|
||||
if(!filename.matches(".*\\.csv")){
|
||||
filename = filename + ".csv";
|
||||
}
|
||||
this.filename = filename;
|
||||
|
||||
CSVFormat cellFileFormat = CSVFormat.Builder.create()
|
||||
.setHeader("Alpha CDR3", "Beta CDR3", "Alpha CDR1", "Beta CDR1")
|
||||
.setSkipHeaderRecord(true)
|
||||
.setCommentMarker('#')
|
||||
.build();
|
||||
|
||||
try(//don't need to close reader bc of try-with-resources auto-closing
|
||||
@@ -36,17 +39,37 @@ public class CellFileReader {
|
||||
cell[3] = Integer.valueOf(record.get("Beta CDR1"));
|
||||
distinctCells.add(cell);
|
||||
}
|
||||
|
||||
|
||||
} catch(IOException ex){
|
||||
System.out.println("cell file " + filename + " not found.");
|
||||
System.err.println(ex);
|
||||
}
|
||||
|
||||
//get CDR1 frequency
|
||||
ArrayList<Integer> cdr1Alphas = new ArrayList<>();
|
||||
for (Integer[] cell : distinctCells) {
|
||||
cdr1Alphas.add(cell[3]);
|
||||
}
|
||||
double count = cdr1Alphas.stream().distinct().count();
|
||||
count = Math.ceil(distinctCells.size() / count);
|
||||
cdr1Freq = (int) count;
|
||||
|
||||
}
|
||||
|
||||
public List<Integer[]> getCells(){
|
||||
public CellSample getCellSample() {
|
||||
return new CellSample(distinctCells, cdr1Freq);
|
||||
}
|
||||
|
||||
public String getFilename() { return filename;}
|
||||
|
||||
//Refactor everything that uses this to have access to a Cell Sample and get the cells there instead.
|
||||
public List<Integer[]> getListOfDistinctCellsDEPRECATED(){
|
||||
return distinctCells;
|
||||
}
|
||||
|
||||
public Integer getCellCount() {
|
||||
public Integer getCellCountDEPRECATED() {
|
||||
//Refactor everything that uses this to have access to a Cell Sample and get the count there instead.
|
||||
return distinctCells.size();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,6 +13,7 @@ public class CellFileWriter {
|
||||
private String[] headers = {"Alpha CDR3", "Beta CDR3", "Alpha CDR1", "Beta CDR1"};
|
||||
List<Integer[]> cells;
|
||||
String filename;
|
||||
Integer cdr1Freq;
|
||||
|
||||
public CellFileWriter(String filename, CellSample cells) {
|
||||
if(!filename.matches(".*\\.csv")){
|
||||
@@ -20,15 +21,18 @@ public class CellFileWriter {
|
||||
}
|
||||
this.filename = filename;
|
||||
this.cells = cells.getCells();
|
||||
this.cdr1Freq = cells.getCdr1Freq();
|
||||
}
|
||||
|
||||
public void writeCellsToFile() {
|
||||
CSVFormat cellFileFormat = CSVFormat.Builder.create()
|
||||
.setCommentMarker('#')
|
||||
.setHeader(headers)
|
||||
.build();
|
||||
try(BufferedWriter writer = Files.newBufferedWriter(Path.of(filename), StandardOpenOption.CREATE_NEW);
|
||||
CSVPrinter printer = new CSVPrinter(writer, cellFileFormat);
|
||||
){
|
||||
printer.printComment("Sample contains 1 unique CDR1 for every " + cdr1Freq + "unique CDR3s.");
|
||||
printer.printRecords(cells);
|
||||
} catch(IOException ex){
|
||||
System.out.println("Could not make new file named "+filename);
|
||||
|
||||
@@ -1,18 +1,51 @@
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.stream.IntStream;
|
||||
|
||||
public class CellSample {
|
||||
|
||||
private List<Integer[]> cells;
|
||||
private Integer cdr1Freq;
|
||||
|
||||
public CellSample(List<Integer[]> cells){
|
||||
public CellSample(Integer numDistinctCells, Integer cdr1Freq){
|
||||
this.cdr1Freq = cdr1Freq;
|
||||
List<Integer> numbersCDR3 = new ArrayList<>();
|
||||
List<Integer> numbersCDR1 = new ArrayList<>();
|
||||
Integer numDistCDR3s = 2 * numDistinctCells + 1;
|
||||
IntStream.range(1, numDistCDR3s + 1).forEach(i -> numbersCDR3.add(i));
|
||||
IntStream.range(numDistCDR3s + 1, numDistCDR3s + 1 + (numDistCDR3s / cdr1Freq) + 1).forEach(i -> numbersCDR1.add(i));
|
||||
Collections.shuffle(numbersCDR3);
|
||||
Collections.shuffle(numbersCDR1);
|
||||
|
||||
//Each cell represented by 4 values
|
||||
//two CDR3s, and two CDR1s. First two values are CDR3s (alpha, beta), second two are CDR1s (alpha, beta)
|
||||
List<Integer[]> distinctCells = new ArrayList<>();
|
||||
for(int i = 0; i < numbersCDR3.size() - 1; i = i + 2){
|
||||
Integer tmpCDR3a = numbersCDR3.get(i);
|
||||
Integer tmpCDR3b = numbersCDR3.get(i+1);
|
||||
Integer tmpCDR1a = numbersCDR1.get(i % numbersCDR1.size());
|
||||
Integer tmpCDR1b = numbersCDR1.get((i+1) % numbersCDR1.size());
|
||||
Integer[] tmp = {tmpCDR3a, tmpCDR3b, tmpCDR1a, tmpCDR1b};
|
||||
distinctCells.add(tmp);
|
||||
}
|
||||
this.cells = distinctCells;
|
||||
}
|
||||
|
||||
public CellSample(List<Integer[]> cells, Integer cdr1Freq){
|
||||
this.cells = cells;
|
||||
this.cdr1Freq = cdr1Freq;
|
||||
}
|
||||
|
||||
public List<Integer[]> getCells(){
|
||||
return cells;
|
||||
}
|
||||
|
||||
public Integer population(){
|
||||
public Integer getCdr1Freq() {
|
||||
return cdr1Freq;
|
||||
}
|
||||
|
||||
public Integer getCellCount(){
|
||||
return cells.size();
|
||||
}
|
||||
|
||||
|
||||
499
src/main/java/CommandLineInterface.java
Normal file
499
src/main/java/CommandLineInterface.java
Normal file
@@ -0,0 +1,499 @@
|
||||
import org.apache.commons.cli.*;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
/*
|
||||
* Class for parsing options passed to program from command line
|
||||
*
|
||||
* Top-level flags:
|
||||
* cells : to make a cell sample file
|
||||
* plate : to make a sample plate file
|
||||
* graph : to make a graph and data file
|
||||
* match : to do a cdr3 matching (WITH OR WITHOUT MAKING A RESULTS FILE. May just want to print summary for piping.)
|
||||
*
|
||||
* Cell flags:
|
||||
* count : number of cells to generate
|
||||
* diversity factor : factor by which CDR3s are more diverse than CDR1s
|
||||
* output : name of the output file
|
||||
*
|
||||
* Plate flags:
|
||||
* cellfile : name of the cell sample file to use as input
|
||||
* wells : the number of wells on the plate
|
||||
* dist : the statistical distribution to use
|
||||
* (if exponential) lambda : the lambda value of the exponential distribution
|
||||
* (if gaussian) stddev : the standard deviation of the gaussian distribution
|
||||
* rand : randomize well populations, take a minimum argument and a maximum argument
|
||||
* populations : number of t cells per well per section (number of arguments determines number of sections)
|
||||
* dropout : plate dropout rate, double from 0.0 to 1.0
|
||||
* output : name of the output file
|
||||
*
|
||||
* Graph flags:
|
||||
* cellfile : name of the cell sample file to use as input
|
||||
* platefile : name of the sample plate file to use as input
|
||||
* output : name of the output file
|
||||
* graphml : output a graphml file
|
||||
* binary : output a serialized binary object file
|
||||
*
|
||||
* Match flags:
|
||||
* graphFile : name of graph and data file to use as input
|
||||
* min : minimum number of overlap wells to attempt a matching
|
||||
* max : the maximum number of overlap wells to attempt a matching
|
||||
* maxdiff : (optional) the maximum difference in occupancy to attempt a matching
|
||||
* minpercent : (optional) the minimum percent overlap to attempt a matching.
|
||||
* writefile : (optional) the filename to write results to
|
||||
* output : the values to print to System.out for piping
|
||||
*
|
||||
*/
|
||||
public class CommandLineInterface {
|
||||
|
||||
public static void startCLI(String[] args) {
|
||||
//Options sets for the different modes
|
||||
Options mainOptions = buildMainOptions();
|
||||
Options cellOptions = buildCellOptions();
|
||||
Options plateOptions = buildPlateOptions();
|
||||
Options graphOptions = buildGraphOptions();
|
||||
Options matchOptions = buildMatchCDR3options();
|
||||
|
||||
CommandLineParser parser = new DefaultParser();
|
||||
try{
|
||||
CommandLine line = parser.parse(mainOptions, Arrays.copyOfRange(args, 0, 1));
|
||||
|
||||
if (line.hasOption("help")) {
|
||||
HelpFormatter formatter = new HelpFormatter();
|
||||
formatter.printHelp("BiGpairSEQ_Sim.jar", mainOptions);
|
||||
System.out.println();
|
||||
formatter.printHelp("BiGpairSEQ_Sim.jar -cells", cellOptions);
|
||||
System.out.println();
|
||||
formatter.printHelp("BiGpairSEQ_Sim.jar -plate", plateOptions);
|
||||
System.out.println();
|
||||
formatter.printHelp("BiGpairSEQ_Sim.jar -graph", graphOptions);
|
||||
System.out.println();
|
||||
formatter.printHelp("BiGpairSEQ_Sim.jar -match", matchOptions);
|
||||
}
|
||||
else if (line.hasOption("version")) {
|
||||
System.out.println("BiGpairSEQ_Sim " + BiGpairSEQ.getVersion());
|
||||
}
|
||||
else if (line.hasOption("cells")) {
|
||||
line = parser.parse(cellOptions, Arrays.copyOfRange(args, 1, args.length));
|
||||
Integer number = Integer.valueOf(line.getOptionValue("n"));
|
||||
Integer diversity = Integer.valueOf(line.getOptionValue("d"));
|
||||
String filename = line.getOptionValue("o");
|
||||
makeCells(filename, number, diversity);
|
||||
}
|
||||
|
||||
else if (line.hasOption("plate")) {
|
||||
line = parser.parse(plateOptions, Arrays.copyOfRange(args, 1, args.length));
|
||||
//get the cells
|
||||
String cellFilename = line.getOptionValue("c");
|
||||
CellSample cells = getCells(cellFilename);
|
||||
//get the rest of the parameters
|
||||
Integer[] populations;
|
||||
String outputFilename = line.getOptionValue("o");
|
||||
Integer numWells = Integer.parseInt(line.getOptionValue("w"));
|
||||
Double dropoutRate = Double.parseDouble(line.getOptionValue("err"));
|
||||
if (line.hasOption("random")) {
|
||||
//Array holding values of minimum and maximum populations
|
||||
Integer[] min_max = Stream.of(line.getOptionValues("random"))
|
||||
.mapToInt(Integer::parseInt)
|
||||
.boxed()
|
||||
.toArray(Integer[]::new);
|
||||
populations = BiGpairSEQ.getRand().ints(min_max[0], min_max[1] + 1)
|
||||
.limit(numWells)
|
||||
.boxed()
|
||||
.toArray(Integer[]::new);
|
||||
}
|
||||
else if (line.hasOption("pop")) {
|
||||
populations = Stream.of(line.getOptionValues("pop"))
|
||||
.mapToInt(Integer::parseInt)
|
||||
.boxed()
|
||||
.toArray(Integer[]::new);
|
||||
}
|
||||
else{
|
||||
populations = new Integer[1];
|
||||
populations[0] = 1;
|
||||
}
|
||||
//make the plate
|
||||
Plate plate;
|
||||
if (line.hasOption("poisson")) {
|
||||
Double stdDev = Math.sqrt(numWells);
|
||||
plate = new Plate(cells, cellFilename, numWells, populations, dropoutRate, stdDev, false);
|
||||
}
|
||||
else if (line.hasOption("gaussian")) {
|
||||
Double stdDev = Double.parseDouble(line.getOptionValue("stddev"));
|
||||
plate = new Plate(cells, cellFilename, numWells, populations, dropoutRate, stdDev, false);
|
||||
}
|
||||
else {
|
||||
assert line.hasOption("exponential");
|
||||
Double lambda = Double.parseDouble(line.getOptionValue("lambda"));
|
||||
plate = new Plate(cells, cellFilename, numWells, populations, dropoutRate, lambda, true);
|
||||
}
|
||||
PlateFileWriter writer = new PlateFileWriter(outputFilename, plate);
|
||||
writer.writePlateFile();
|
||||
}
|
||||
|
||||
else if (line.hasOption("graph")) { //Making a graph
|
||||
line = parser.parse(graphOptions, Arrays.copyOfRange(args, 1, args.length));
|
||||
String cellFilename = line.getOptionValue("c");
|
||||
String plateFilename = line.getOptionValue("p");
|
||||
String outputFilename = line.getOptionValue("o");
|
||||
//get cells
|
||||
CellSample cells = getCells(cellFilename);
|
||||
//get plate
|
||||
Plate plate = getPlate(plateFilename);
|
||||
GraphWithMapData graph = Simulator.makeGraph(cells, plate, false);
|
||||
if (!line.hasOption("no-binary")) { //output binary file unless told not to
|
||||
GraphDataObjectWriter writer = new GraphDataObjectWriter(outputFilename, graph, false);
|
||||
writer.writeDataToFile();
|
||||
}
|
||||
if (line.hasOption("graphml")) { //if told to, output graphml file
|
||||
GraphMLFileWriter gmlwriter = new GraphMLFileWriter(outputFilename, graph);
|
||||
gmlwriter.writeGraphToFile();
|
||||
}
|
||||
}
|
||||
|
||||
else if (line.hasOption("match")) { //can add a flag for which match type in future, spit this in two
|
||||
line = parser.parse(matchOptions, Arrays.copyOfRange(args, 1, args.length));
|
||||
String graphFilename = line.getOptionValue("g");
|
||||
|
||||
String outputFilename;
|
||||
if(line.hasOption("o")) {
|
||||
outputFilename = line.getOptionValue("o");
|
||||
}
|
||||
else {
|
||||
outputFilename = null;
|
||||
}
|
||||
Integer minThreshold = Integer.parseInt(line.getOptionValue("min"));
|
||||
Integer maxThreshold = Integer.parseInt(line.getOptionValue("max"));
|
||||
int minOverlapPct;
|
||||
if (line.hasOption("minpct")) { //see if this filter is being used
|
||||
minOverlapPct = Integer.parseInt(line.getOptionValue("minpct"));
|
||||
}
|
||||
else {
|
||||
minOverlapPct = 0;
|
||||
}
|
||||
int maxOccupancyDiff;
|
||||
if (line.hasOption("maxdiff")) { //see if this filter is being used
|
||||
maxOccupancyDiff = Integer.parseInt(line.getOptionValue("maxdiff"));
|
||||
}
|
||||
else {
|
||||
maxOccupancyDiff = Integer.MAX_VALUE;
|
||||
}
|
||||
GraphWithMapData graph = getGraph(graphFilename);
|
||||
MatchingResult result = Simulator.matchCDR3s(graph, graphFilename, minThreshold, maxThreshold,
|
||||
maxOccupancyDiff, minOverlapPct, false);
|
||||
if(outputFilename != null){
|
||||
MatchingFileWriter writer = new MatchingFileWriter(outputFilename, result);
|
||||
writer.writeResultsToFile();
|
||||
}
|
||||
//can put a bunch of ifs for outputting various things from the MatchingResult to System.out here
|
||||
//after I put those flags in the matchOptions
|
||||
if(line.hasOption("print-metadata")) {
|
||||
for (String k : result.getMetadata().keySet()) {
|
||||
System.out.println(k + ": " + result.getMetadata().get(k));
|
||||
}
|
||||
}
|
||||
if(line.hasOption("print-error")) {
|
||||
System.out.println("pairing error rate: " + result.getPairingErrorRate());
|
||||
}
|
||||
if(line.hasOption("print-attempt")) {
|
||||
System.out.println("pairing attempt rate: " +result.getPairingAttemptRate());
|
||||
}
|
||||
if(line.hasOption("print-correct")) {
|
||||
System.out.println("correct pairings: " + result.getCorrectPairingCount());
|
||||
}
|
||||
if(line.hasOption("print-incorrect")) {
|
||||
System.out.println("incorrect pairings: " + result.getIncorrectPairingCount());
|
||||
}
|
||||
if(line.hasOption("print-alphas")) {
|
||||
System.out.println("total alphas found: " + result.getAlphaCount());
|
||||
}
|
||||
if(line.hasOption("print-betas")) {
|
||||
System.out.println("total betas found: " + result.getBetaCount());
|
||||
}
|
||||
if(line.hasOption("print-time")) {
|
||||
System.out.println("simulation time (seconds): " + result.getSimulationTime());
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (ParseException exp) {
|
||||
System.err.println("Parsing failed. Reason: " + exp.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
private static Option outputFileOption() {
|
||||
Option outputFile = Option.builder("o")
|
||||
.longOpt("output-file")
|
||||
.hasArg()
|
||||
.argName("filename")
|
||||
.desc("Name of output file")
|
||||
.required()
|
||||
.build();
|
||||
return outputFile;
|
||||
}
|
||||
|
||||
private static Options buildMainOptions() {
|
||||
Options mainOptions = new Options();
|
||||
Option help = Option.builder("help")
|
||||
.desc("Displays this help menu")
|
||||
.build();
|
||||
Option makeCells = Option.builder("cells")
|
||||
.longOpt("make-cells")
|
||||
.desc("Makes a cell sample file of distinct T cells")
|
||||
.build();
|
||||
Option makePlate = Option.builder("plate")
|
||||
.longOpt("make-plate")
|
||||
.desc("Makes a sample plate file. Requires a cell sample file.")
|
||||
.build();
|
||||
Option makeGraph = Option.builder("graph")
|
||||
.longOpt("make-graph")
|
||||
.desc("Makes a graph/data file. Requires a cell sample file and a sample plate file")
|
||||
.build();
|
||||
Option matchCDR3 = Option.builder("match")
|
||||
.longOpt("match-cdr3")
|
||||
.desc("Matches CDR3s. Requires a graph/data file.")
|
||||
.build();
|
||||
Option printVersion = Option.builder("version")
|
||||
.desc("Prints the program version number to stdout").build();
|
||||
OptionGroup mainGroup = new OptionGroup();
|
||||
mainGroup.addOption(help);
|
||||
mainGroup.addOption(printVersion);
|
||||
mainGroup.addOption(makeCells);
|
||||
mainGroup.addOption(makePlate);
|
||||
mainGroup.addOption(makeGraph);
|
||||
mainGroup.addOption(matchCDR3);
|
||||
mainGroup.setRequired(true);
|
||||
mainOptions.addOptionGroup(mainGroup);
|
||||
return mainOptions;
|
||||
}
|
||||
|
||||
private static Options buildCellOptions() {
|
||||
Options cellOptions = new Options();
|
||||
Option numCells = Option.builder("n")
|
||||
.longOpt("num-cells")
|
||||
.desc("The number of distinct cells to generate")
|
||||
.hasArg()
|
||||
.argName("number")
|
||||
.required().build();
|
||||
Option cdr3Diversity = Option.builder("d")
|
||||
.longOpt("diversity-factor")
|
||||
.desc("The factor by which unique CDR3s outnumber unique CDR1s")
|
||||
.hasArg()
|
||||
.argName("factor")
|
||||
.required().build();
|
||||
cellOptions.addOption(numCells);
|
||||
cellOptions.addOption(cdr3Diversity);
|
||||
cellOptions.addOption(outputFileOption());
|
||||
return cellOptions;
|
||||
}
|
||||
|
||||
private static Options buildPlateOptions() {
|
||||
Options plateOptions = new Options();
|
||||
Option cellFile = Option.builder("c") // add this to plate options
|
||||
.longOpt("cell-file")
|
||||
.desc("The cell sample file to use")
|
||||
.hasArg()
|
||||
.argName("filename")
|
||||
.required().build();
|
||||
Option numWells = Option.builder("w")// add this to plate options
|
||||
.longOpt("wells")
|
||||
.desc("The number of wells on the sample plate")
|
||||
.hasArg()
|
||||
.argName("number")
|
||||
.required().build();
|
||||
//options group for choosing with distribution to use
|
||||
OptionGroup distributions = new OptionGroup();// add this to plate options
|
||||
distributions.setRequired(true);
|
||||
Option poisson = Option.builder("poisson")
|
||||
.desc("Use a Poisson distribution for cell sample")
|
||||
.build();
|
||||
Option gaussian = Option.builder("gaussian")
|
||||
.desc("Use a Gaussian distribution for cell sample")
|
||||
.build();
|
||||
Option exponential = Option.builder("exponential")
|
||||
.desc("Use an exponential distribution for cell sample")
|
||||
.build();
|
||||
distributions.addOption(poisson);
|
||||
distributions.addOption(gaussian);
|
||||
distributions.addOption(exponential);
|
||||
//options group for statistical distribution parameters
|
||||
OptionGroup statParams = new OptionGroup();// add this to plate options
|
||||
Option stdDev = Option.builder("stddev")
|
||||
.desc("If using -gaussian flag, standard deviation for distrbution")
|
||||
.hasArg()
|
||||
.argName("value")
|
||||
.build();
|
||||
Option lambda = Option.builder("lambda")
|
||||
.desc("If using -exponential flag, lambda value for distribution")
|
||||
.hasArg()
|
||||
.argName("value")
|
||||
.build();
|
||||
statParams.addOption(stdDev);
|
||||
statParams.addOption(lambda);
|
||||
//Option group for random plate or set populations
|
||||
OptionGroup wellPopOptions = new OptionGroup(); // add this to plate options
|
||||
wellPopOptions.setRequired(true);
|
||||
Option randomWellPopulations = Option.builder("random")
|
||||
.desc("Randomize well populations on sample plate. Takes two arguments: the minimum possible population and the maximum possible population.")
|
||||
.hasArgs()
|
||||
.numberOfArgs(2)
|
||||
.argName("min> <max")
|
||||
.build();
|
||||
Option specificWellPopulations = Option.builder("pop")
|
||||
.desc("The well populations for each section of the sample plate. There will be as many sections as there are populations given.")
|
||||
.hasArgs()
|
||||
.argName("number [number]...")
|
||||
.build();
|
||||
Option dropoutRate = Option.builder("err") //add this to plate options
|
||||
.hasArg()
|
||||
.desc("The sequence dropout rate due to amplification error. (0.0 - 1.0)")
|
||||
.argName("rate")
|
||||
.required()
|
||||
.build();
|
||||
wellPopOptions.addOption(randomWellPopulations);
|
||||
wellPopOptions.addOption(specificWellPopulations);
|
||||
plateOptions.addOption(cellFile);
|
||||
plateOptions.addOption(numWells);
|
||||
plateOptions.addOptionGroup(distributions);
|
||||
plateOptions.addOptionGroup(statParams);
|
||||
plateOptions.addOptionGroup(wellPopOptions);
|
||||
plateOptions.addOption(dropoutRate);
|
||||
plateOptions.addOption(outputFileOption());
|
||||
return plateOptions;
|
||||
}
|
||||
|
||||
private static Options buildGraphOptions() {
|
||||
Options graphOptions = new Options();
|
||||
Option cellFilename = Option.builder("c")
|
||||
.longOpt("cell-file")
|
||||
.desc("Cell sample file to use for checking pairing accuracy")
|
||||
.hasArg()
|
||||
.argName("filename")
|
||||
.required().build();
|
||||
Option plateFilename = Option.builder("p")
|
||||
.longOpt("plate-filename")
|
||||
.desc("Sample plate file from which to construct graph")
|
||||
.hasArg()
|
||||
.argName("filename")
|
||||
.required().build();
|
||||
Option outputGraphML = Option.builder("graphml")
|
||||
.desc("(Optional) Output GraphML file")
|
||||
.build();
|
||||
Option outputSerializedBinary = Option.builder("nb")
|
||||
.longOpt("no-binary")
|
||||
.desc("(Optional) Don't output serialized binary file")
|
||||
.build();
|
||||
graphOptions.addOption(cellFilename);
|
||||
graphOptions.addOption(plateFilename);
|
||||
graphOptions.addOption(outputFileOption());
|
||||
graphOptions.addOption(outputGraphML);
|
||||
graphOptions.addOption(outputSerializedBinary);
|
||||
return graphOptions;
|
||||
}
|
||||
|
||||
private static Options buildMatchCDR3options() {
|
||||
Options matchCDR3options = new Options();
|
||||
Option graphFilename = Option.builder("g")
|
||||
.longOpt("graph-file")
|
||||
.desc("The graph/data file to use")
|
||||
.hasArg()
|
||||
.argName("filename")
|
||||
.required().build();
|
||||
Option minOccupancyOverlap = Option.builder("min")
|
||||
.desc("The minimum number of shared wells to attempt to match a sequence pair")
|
||||
.hasArg()
|
||||
.argName("number")
|
||||
.required().build();
|
||||
Option maxOccupancyOverlap = Option.builder("max")
|
||||
.desc("The maximum number of shared wells to attempt to match a sequence pair")
|
||||
.hasArg()
|
||||
.argName("number")
|
||||
.required().build();
|
||||
Option minOverlapPercent = Option.builder("minpct")
|
||||
.desc("(Optional) The minimum percentage of a sequence's total occupancy shared by another sequence to attempt matching. (0 - 100) ")
|
||||
.hasArg()
|
||||
.argName("percent")
|
||||
.build();
|
||||
Option maxOccupancyDifference = Option.builder("maxdiff")
|
||||
.desc("(Optional) The maximum difference in total occupancy between two sequences to attempt matching.")
|
||||
.hasArg()
|
||||
.argName("number")
|
||||
.build();
|
||||
Option outputFile = Option.builder("o") //can't call the method this time, because this one's optional
|
||||
.longOpt("output-file")
|
||||
.hasArg()
|
||||
.argName("filename")
|
||||
.desc("(Optional) Name of output the output file. If not present, no file will be written.")
|
||||
.build();
|
||||
matchCDR3options.addOption(graphFilename)
|
||||
.addOption(minOccupancyOverlap)
|
||||
.addOption(maxOccupancyOverlap)
|
||||
.addOption(minOverlapPercent)
|
||||
.addOption(maxOccupancyDifference)
|
||||
.addOption(outputFile);
|
||||
|
||||
//options for output to System.out
|
||||
Option printAlphaCount = Option.builder().longOpt("print-alphas")
|
||||
.desc("(Optional) Print the number of distinct alpha sequences to stdout.").build();
|
||||
Option printBetaCount = Option.builder().longOpt("print-betas")
|
||||
.desc("(Optional) Print the number of distinct beta sequences to stdout.").build();
|
||||
Option printTime = Option.builder().longOpt("print-time")
|
||||
.desc("(Optional) Print the total simulation time to stdout.").build();
|
||||
Option printErrorRate = Option.builder().longOpt("print-error")
|
||||
.desc("(Optional) Print the pairing error rate to stdout").build();
|
||||
Option printAttempt = Option.builder().longOpt("print-attempt")
|
||||
.desc("(Optional) Print the pairing attempt rate to stdout").build();
|
||||
Option printCorrect = Option.builder().longOpt("print-correct")
|
||||
.desc("(Optional) Print the number of correct pairs to stdout").build();
|
||||
Option printIncorrect = Option.builder().longOpt("print-incorrect")
|
||||
.desc("(Optional) Print the number of incorrect pairs to stdout").build();
|
||||
Option printMetadata = Option.builder().longOpt("print-metadata")
|
||||
.desc("(Optional) Print a full summary of the matching results to stdout.").build();
|
||||
|
||||
matchCDR3options
|
||||
.addOption(printErrorRate)
|
||||
.addOption(printAttempt)
|
||||
.addOption(printCorrect)
|
||||
.addOption(printIncorrect)
|
||||
.addOption(printMetadata)
|
||||
.addOption(printAlphaCount)
|
||||
.addOption(printBetaCount)
|
||||
.addOption(printTime);
|
||||
return matchCDR3options;
|
||||
}
|
||||
|
||||
|
||||
|
||||
private static CellSample getCells(String cellFilename) {
|
||||
assert cellFilename != null;
|
||||
CellFileReader reader = new CellFileReader(cellFilename);
|
||||
return reader.getCellSample();
|
||||
}
|
||||
|
||||
private static Plate getPlate(String plateFilename) {
|
||||
assert plateFilename != null;
|
||||
PlateFileReader reader = new PlateFileReader(plateFilename);
|
||||
return reader.getSamplePlate();
|
||||
}
|
||||
|
||||
private static GraphWithMapData getGraph(String graphFilename) {
|
||||
assert graphFilename != null;
|
||||
try{
|
||||
GraphDataObjectReader reader = new GraphDataObjectReader(graphFilename, false);
|
||||
return reader.getData();
|
||||
|
||||
}
|
||||
catch (IOException ex) {
|
||||
ex.printStackTrace();
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
//for calling from command line
|
||||
public static void makeCells(String filename, Integer numCells, Integer cdr1Freq) {
|
||||
CellSample sample = new CellSample(numCells, cdr1Freq);
|
||||
CellFileWriter writer = new CellFileWriter(filename, sample);
|
||||
writer.writeCellsToFile();
|
||||
}
|
||||
}
|
||||
@@ -4,6 +4,9 @@ import java.math.MathContext;
|
||||
|
||||
public abstract class Equations {
|
||||
|
||||
//pValue calculation as described in original pairSEQ paper.
|
||||
//Included for comparison with original results.
|
||||
//Not used by BiGpairSEQ for matching.
|
||||
public static double pValue(Integer w, Integer w_a, Integer w_b, double w_ab_d) {
|
||||
int w_ab = (int) w_ab_d;
|
||||
double pv = 0.0;
|
||||
@@ -14,6 +17,9 @@ public abstract class Equations {
|
||||
return pv;
|
||||
}
|
||||
|
||||
//Implementation of the (corrected) probability equation from pairSEQ paper.
|
||||
//Included for comparison with original results.
|
||||
//Not used by BiGpairSEQ for matching.
|
||||
private static double probPairedByChance(Integer w, Integer w_a, Integer w_b, Integer w_ab){
|
||||
BigInteger numer1 = choose(w, w_ab);
|
||||
BigInteger numer2 = choose(w - w_ab, w_a - w_ab);
|
||||
@@ -26,10 +32,9 @@ public abstract class Equations {
|
||||
return prob.doubleValue();
|
||||
}
|
||||
|
||||
/*
|
||||
* This works because nC(k+1) = nCk * (n-k)/(k+1)
|
||||
* Since nC0 = 1, can start there and generate all the rest.
|
||||
*/
|
||||
|
||||
//This works because nC(k+1) = nCk * (n-k)/(k+1)
|
||||
//Since nC0 = 1, can start there and generate all the rest.
|
||||
public static BigInteger choose(final int N, final int K) {
|
||||
BigInteger nCk = BigInteger.ONE;
|
||||
for (int k = 0; k < K; k++) {
|
||||
|
||||
36
src/main/java/GraphDataObjectReader.java
Normal file
36
src/main/java/GraphDataObjectReader.java
Normal file
@@ -0,0 +1,36 @@
|
||||
import java.io.*;
|
||||
|
||||
public class GraphDataObjectReader {
|
||||
|
||||
private GraphWithMapData data;
|
||||
private String filename;
|
||||
|
||||
|
||||
public GraphDataObjectReader(String filename, boolean verbose) throws IOException {
|
||||
if(!filename.matches(".*\\.ser")){
|
||||
filename = filename + ".ser";
|
||||
}
|
||||
this.filename = filename;
|
||||
try(//don't need to close these because of try-with-resources
|
||||
BufferedInputStream fileIn = new BufferedInputStream(new FileInputStream(filename));
|
||||
ObjectInputStream in = new ObjectInputStream(fileIn))
|
||||
{
|
||||
if (verbose) {
|
||||
System.out.println("Reading graph data from file. This may take some time");
|
||||
System.out.println("File I/O time is not included in results");
|
||||
}
|
||||
data = (GraphWithMapData) in.readObject();
|
||||
} catch (FileNotFoundException | ClassNotFoundException ex) {
|
||||
System.out.println("Graph/data file " + filename + " not found.");
|
||||
ex.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
public GraphWithMapData getData() {
|
||||
return data;
|
||||
}
|
||||
|
||||
public String getFilename() {
|
||||
return filename;
|
||||
}
|
||||
}
|
||||
45
src/main/java/GraphDataObjectWriter.java
Normal file
45
src/main/java/GraphDataObjectWriter.java
Normal file
@@ -0,0 +1,45 @@
|
||||
import org.jgrapht.Graph;
|
||||
|
||||
import java.io.BufferedOutputStream;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.ObjectOutputStream;
|
||||
|
||||
public class GraphDataObjectWriter {
|
||||
|
||||
private GraphWithMapData data;
|
||||
private String filename;
|
||||
private boolean verbose = true;
|
||||
|
||||
public GraphDataObjectWriter(String filename, GraphWithMapData data) {
|
||||
if(!filename.matches(".*\\.ser")){
|
||||
filename = filename + ".ser";
|
||||
}
|
||||
this.filename = filename;
|
||||
this.data = data;
|
||||
}
|
||||
|
||||
public GraphDataObjectWriter(String filename, GraphWithMapData data, boolean verbose) {
|
||||
this.verbose = verbose;
|
||||
if(!filename.matches(".*\\.ser")){
|
||||
filename = filename + ".ser";
|
||||
}
|
||||
this.filename = filename;
|
||||
this.data = data;
|
||||
}
|
||||
|
||||
public void writeDataToFile() {
|
||||
try (BufferedOutputStream bufferedOut = new BufferedOutputStream(new FileOutputStream(filename));
|
||||
|
||||
ObjectOutputStream out = new ObjectOutputStream(bufferedOut);
|
||||
){
|
||||
if(verbose) {
|
||||
System.out.println("Writing graph and occupancy data to file. This may take some time.");
|
||||
System.out.println("File I/O time is not included in results.");
|
||||
}
|
||||
out.writeObject(data);
|
||||
} catch (IOException ex) {
|
||||
ex.printStackTrace();
|
||||
}
|
||||
}
|
||||
}
|
||||
93
src/main/java/GraphMLFileWriter.java
Normal file
93
src/main/java/GraphMLFileWriter.java
Normal file
@@ -0,0 +1,93 @@
|
||||
import org.jgrapht.graph.DefaultWeightedEdge;
|
||||
import org.jgrapht.graph.SimpleWeightedGraph;
|
||||
import org.jgrapht.nio.Attribute;
|
||||
import org.jgrapht.nio.AttributeType;
|
||||
import org.jgrapht.nio.DefaultAttribute;
|
||||
import org.jgrapht.nio.graphml.GraphMLExporter;
|
||||
import org.jgrapht.nio.graphml.GraphMLExporter.AttributeCategory;
|
||||
import org.w3c.dom.Attr;
|
||||
|
||||
import java.io.BufferedWriter;
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.StandardOpenOption;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
public class GraphMLFileWriter {
|
||||
|
||||
String filename;
|
||||
SimpleWeightedGraph graph;
|
||||
GraphWithMapData data;
|
||||
Map<String, Attribute> graphAttributes;
|
||||
|
||||
public GraphMLFileWriter(String filename, GraphWithMapData data) {
|
||||
if(!filename.matches(".*\\.graphml")){
|
||||
filename = filename + ".graphml";
|
||||
}
|
||||
this.filename = filename;
|
||||
this.data = data;
|
||||
this.graph = data.getGraph();
|
||||
graphAttributes = createGraphAttributes();
|
||||
}
|
||||
|
||||
public GraphMLFileWriter(String filename, SimpleWeightedGraph<Vertex, DefaultWeightedEdge> graph) {
|
||||
if(!filename.matches(".*\\.graphml")){
|
||||
filename = filename + ".graphml";
|
||||
}
|
||||
this.filename = filename;
|
||||
this.graph = graph;
|
||||
}
|
||||
|
||||
private Map<String, Attribute> createGraphAttributes(){
|
||||
Map<String, Attribute> ga = new HashMap<>();
|
||||
//Sample plate filename
|
||||
ga.put("sample plate filename", DefaultAttribute.createAttribute(data.getSourceFilename()));
|
||||
// Number of wells
|
||||
ga.put("well count", DefaultAttribute.createAttribute(data.getNumWells().toString()));
|
||||
//Well populations
|
||||
Integer[] wellPopulations = data.getWellPopulations();
|
||||
StringBuilder populationsStringBuilder = new StringBuilder();
|
||||
populationsStringBuilder.append(wellPopulations[0].toString());
|
||||
for(int i = 1; i < wellPopulations.length; i++){
|
||||
populationsStringBuilder.append(", ");
|
||||
populationsStringBuilder.append(wellPopulations[i].toString());
|
||||
}
|
||||
String wellPopulationsString = populationsStringBuilder.toString();
|
||||
ga.put("well populations", DefaultAttribute.createAttribute(wellPopulationsString));
|
||||
return ga;
|
||||
}
|
||||
|
||||
public void writeGraphToFile() {
|
||||
try(BufferedWriter writer = Files.newBufferedWriter(Path.of(filename), StandardOpenOption.CREATE_NEW);
|
||||
){
|
||||
//create exporter. Let the vertex labels be the unique ids for the vertices
|
||||
GraphMLExporter<Vertex, SimpleWeightedGraph<Vertex, DefaultWeightedEdge>> exporter = new GraphMLExporter<>(v -> v.getVertexLabel().toString());
|
||||
//set to export weights
|
||||
exporter.setExportEdgeWeights(true);
|
||||
//Set graph attributes
|
||||
exporter.setGraphAttributeProvider( () -> graphAttributes);
|
||||
//set type, sequence, and occupancy attributes for each vertex
|
||||
exporter.setVertexAttributeProvider( v -> {
|
||||
Map<String, Attribute> attributes = new HashMap<>();
|
||||
attributes.put("type", DefaultAttribute.createAttribute(v.getType().name()));
|
||||
attributes.put("sequence", DefaultAttribute.createAttribute(v.getSequence()));
|
||||
attributes.put("occupancy", DefaultAttribute.createAttribute(v.getOccupancy()));
|
||||
return attributes;
|
||||
});
|
||||
//register the attributes
|
||||
for(String s : graphAttributes.keySet()) {
|
||||
exporter.registerAttribute(s, AttributeCategory.GRAPH, AttributeType.STRING);
|
||||
}
|
||||
exporter.registerAttribute("type", AttributeCategory.NODE, AttributeType.STRING);
|
||||
exporter.registerAttribute("sequence", AttributeCategory.NODE, AttributeType.STRING);
|
||||
exporter.registerAttribute("occupancy", AttributeCategory.NODE, AttributeType.STRING);
|
||||
//export the graph
|
||||
exporter.exportGraph(graph, writer);
|
||||
} catch(IOException ex){
|
||||
System.out.println("Could not make new file named "+filename);
|
||||
System.err.println(ex);
|
||||
}
|
||||
}
|
||||
}
|
||||
105
src/main/java/GraphModificationFunctions.java
Normal file
105
src/main/java/GraphModificationFunctions.java
Normal file
@@ -0,0 +1,105 @@
|
||||
import org.jgrapht.graph.DefaultWeightedEdge;
|
||||
import org.jgrapht.graph.SimpleWeightedGraph;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
public interface GraphModificationFunctions {
|
||||
|
||||
//remove over- and under-weight edges
|
||||
static Map<Vertex[], Integer> filterByOverlapThresholds(SimpleWeightedGraph<Vertex, DefaultWeightedEdge> graph,
|
||||
int low, int high, boolean saveEdges) {
|
||||
Map<Vertex[], Integer> removedEdges = new HashMap<>();
|
||||
//List<Integer[]> removedEdges = new ArrayList<>();
|
||||
for (DefaultWeightedEdge e : graph.edgeSet()) {
|
||||
if ((graph.getEdgeWeight(e) > high) || (graph.getEdgeWeight(e) < low)) {
|
||||
if(saveEdges) {
|
||||
Vertex source = graph.getEdgeSource(e);
|
||||
Vertex target = graph.getEdgeTarget(e);
|
||||
Integer weight = (int) graph.getEdgeWeight(e);
|
||||
Vertex[] edge = {source, target};
|
||||
removedEdges.put(edge, weight);
|
||||
}
|
||||
else {
|
||||
graph.setEdgeWeight(e, 0.0);
|
||||
}
|
||||
}
|
||||
}
|
||||
if(saveEdges) {
|
||||
for (Vertex[] edge : removedEdges.keySet()) {
|
||||
graph.removeEdge(edge[0], edge[1]);
|
||||
}
|
||||
}
|
||||
return removedEdges;
|
||||
}
|
||||
|
||||
//Remove edges for pairs with large occupancy discrepancy
|
||||
static Map<Vertex[], Integer> filterByRelativeOccupancy(SimpleWeightedGraph<Vertex, DefaultWeightedEdge> graph,
|
||||
Integer maxOccupancyDifference, boolean saveEdges) {
|
||||
Map<Vertex[], Integer> removedEdges = new HashMap<>();
|
||||
for (DefaultWeightedEdge e : graph.edgeSet()) {
|
||||
Integer alphaOcc = graph.getEdgeSource(e).getOccupancy();
|
||||
Integer betaOcc = graph.getEdgeTarget(e).getOccupancy();
|
||||
if (Math.abs(alphaOcc - betaOcc) >= maxOccupancyDifference) {
|
||||
if (saveEdges) {
|
||||
Vertex source = graph.getEdgeSource(e);
|
||||
Vertex target = graph.getEdgeTarget(e);
|
||||
Integer weight = (int) graph.getEdgeWeight(e);
|
||||
Vertex[] edge = {source, target};
|
||||
removedEdges.put(edge, weight);
|
||||
}
|
||||
else {
|
||||
graph.setEdgeWeight(e, 0.0);
|
||||
}
|
||||
}
|
||||
}
|
||||
if(saveEdges) {
|
||||
for (Vertex[] edge : removedEdges.keySet()) {
|
||||
graph.removeEdge(edge[0], edge[1]);
|
||||
}
|
||||
}
|
||||
return removedEdges;
|
||||
}
|
||||
|
||||
//Remove edges for pairs where overlap size is significantly lower than the well occupancy
|
||||
static Map<Vertex[], Integer> filterByOverlapPercent(SimpleWeightedGraph<Vertex, DefaultWeightedEdge> graph,
|
||||
Integer minOverlapPercent,
|
||||
boolean saveEdges) {
|
||||
Map<Vertex[], Integer> removedEdges = new HashMap<>();
|
||||
for (DefaultWeightedEdge e : graph.edgeSet()) {
|
||||
Integer alphaOcc = graph.getEdgeSource(e).getOccupancy();
|
||||
Integer betaOcc = graph.getEdgeTarget(e).getOccupancy();
|
||||
double weight = graph.getEdgeWeight(e);
|
||||
double min = minOverlapPercent / 100.0;
|
||||
if ((weight / alphaOcc < min) || (weight / betaOcc < min)) {
|
||||
if (saveEdges) {
|
||||
Vertex source = graph.getEdgeSource(e);
|
||||
Vertex target = graph.getEdgeTarget(e);
|
||||
Integer intWeight = (int) graph.getEdgeWeight(e);
|
||||
Vertex[] edge = {source, target};
|
||||
removedEdges.put(edge, intWeight);
|
||||
}
|
||||
else {
|
||||
graph.setEdgeWeight(e, 0.0);
|
||||
}
|
||||
}
|
||||
}
|
||||
if(saveEdges) {
|
||||
for (Vertex[] edge : removedEdges.keySet()) {
|
||||
graph.removeEdge(edge[0], edge[1]);
|
||||
}
|
||||
}
|
||||
return removedEdges;
|
||||
}
|
||||
|
||||
static void addRemovedEdges(SimpleWeightedGraph<Vertex, DefaultWeightedEdge> graph,
|
||||
Map<Vertex[], Integer> removedEdges) {
|
||||
for (Vertex[] edge : removedEdges.keySet()) {
|
||||
DefaultWeightedEdge e = graph.addEdge(edge[0], edge[1]);
|
||||
graph.setEdgeWeight(e, removedEdges.get(edge));
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
108
src/main/java/GraphWithMapData.java
Normal file
108
src/main/java/GraphWithMapData.java
Normal file
@@ -0,0 +1,108 @@
|
||||
import org.jgrapht.graph.SimpleWeightedGraph;
|
||||
|
||||
import java.time.Duration;
|
||||
import java.util.Map;
|
||||
|
||||
//Can't just write the graph, because I need the occupancy data too.
|
||||
//Makes most sense to serialize object and write that to a file.
|
||||
//Which means there's no reason to split map data and graph data up.
|
||||
//Custom vertex class means a lot of the map data can now be encoded in the graph itself
|
||||
public class GraphWithMapData implements java.io.Serializable {
|
||||
|
||||
private String sourceFilename;
|
||||
private final SimpleWeightedGraph graph;
|
||||
private Integer numWells;
|
||||
private Integer[] wellPopulations;
|
||||
private Integer alphaCount;
|
||||
private Integer betaCount;
|
||||
private final Map<Integer, Integer> distCellsMapAlphaKey;
|
||||
// private final Map<Integer, Integer> plateVtoAMap;
|
||||
// private final Map<Integer, Integer> plateVtoBMap;
|
||||
// private final Map<Integer, Integer> plateAtoVMap;
|
||||
// private final Map<Integer, Integer> plateBtoVMap;
|
||||
// private final Map<Integer, Integer> alphaWellCounts;
|
||||
// private final Map<Integer, Integer> betaWellCounts;
|
||||
private final Duration time;
|
||||
|
||||
public GraphWithMapData(SimpleWeightedGraph graph, Integer numWells, Integer[] wellConcentrations,
|
||||
Map<Integer, Integer> distCellsMapAlphaKey, Duration time){
|
||||
|
||||
// Map<Integer, Integer> plateVtoAMap, Integer alphaCount, Integer betaCount,
|
||||
// Map<Integer,Integer> plateVtoBMap, Map<Integer, Integer> plateAtoVMap,
|
||||
// Map<Integer, Integer> plateBtoVMap, Map<Integer, Integer> alphaWellCounts,
|
||||
// Map<Integer, Integer> betaWellCounts,) {
|
||||
this.graph = graph;
|
||||
this.numWells = numWells;
|
||||
this.wellPopulations = wellConcentrations;
|
||||
this.alphaCount = alphaCount;
|
||||
this.betaCount = betaCount;
|
||||
this.distCellsMapAlphaKey = distCellsMapAlphaKey;
|
||||
// this.plateVtoAMap = plateVtoAMap;
|
||||
// this.plateVtoBMap = plateVtoBMap;
|
||||
// this.plateAtoVMap = plateAtoVMap;
|
||||
// this.plateBtoVMap = plateBtoVMap;
|
||||
// this.alphaWellCounts = alphaWellCounts;
|
||||
// this.betaWellCounts = betaWellCounts;
|
||||
this.time = time;
|
||||
}
|
||||
|
||||
public SimpleWeightedGraph getGraph() {
|
||||
return graph;
|
||||
}
|
||||
|
||||
public Integer getNumWells() {
|
||||
return numWells;
|
||||
}
|
||||
|
||||
public Integer[] getWellPopulations() {
|
||||
return wellPopulations;
|
||||
}
|
||||
|
||||
// public Integer getAlphaCount() {
|
||||
// return alphaCount;
|
||||
// }
|
||||
//
|
||||
// public Integer getBetaCount() {
|
||||
// return betaCount;
|
||||
// }
|
||||
|
||||
public Map<Integer, Integer> getDistCellsMapAlphaKey() {
|
||||
return distCellsMapAlphaKey;
|
||||
}
|
||||
|
||||
// public Map<Integer, Integer> getPlateVtoAMap() {
|
||||
// return plateVtoAMap;
|
||||
// }
|
||||
//
|
||||
// public Map<Integer, Integer> getPlateVtoBMap() {
|
||||
// return plateVtoBMap;
|
||||
// }
|
||||
//
|
||||
// public Map<Integer, Integer> getPlateAtoVMap() {
|
||||
// return plateAtoVMap;
|
||||
// }
|
||||
//
|
||||
// public Map<Integer, Integer> getPlateBtoVMap() {
|
||||
// return plateBtoVMap;
|
||||
// }
|
||||
//
|
||||
// public Map<Integer, Integer> getAlphaWellCounts() {
|
||||
// return alphaWellCounts;
|
||||
// }
|
||||
//
|
||||
// public Map<Integer, Integer> getBetaWellCounts() {
|
||||
// return betaWellCounts;
|
||||
// }
|
||||
|
||||
public Duration getTime() {
|
||||
return time;
|
||||
}
|
||||
|
||||
public void setSourceFilename(String filename) {
|
||||
this.sourceFilename = filename;
|
||||
}
|
||||
|
||||
public String getSourceFilename() {
|
||||
return sourceFilename;
|
||||
}
|
||||
}
|
||||
588
src/main/java/InteractiveInterface.java
Normal file
588
src/main/java/InteractiveInterface.java
Normal file
@@ -0,0 +1,588 @@
|
||||
import java.io.IOException;
|
||||
import java.util.*;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
//
|
||||
public class InteractiveInterface {
|
||||
|
||||
private static final Random rand = BiGpairSEQ.getRand();
|
||||
private static final Scanner sc = new Scanner(System.in);
|
||||
private static int input;
|
||||
private static boolean quit = false;
|
||||
|
||||
public static void startInteractive() {
|
||||
|
||||
while (!quit) {
|
||||
System.out.println();
|
||||
System.out.println("--------BiGPairSEQ SIMULATOR--------");
|
||||
System.out.println("ALPHA/BETA T CELL RECEPTOR MATCHING");
|
||||
System.out.println(" USING WEIGHTED BIPARTITE GRAPHS ");
|
||||
System.out.println("------------------------------------");
|
||||
System.out.println("Please select an option:");
|
||||
System.out.println("1) Generate a population of distinct cells");
|
||||
System.out.println("2) Generate a sample plate of T cells");
|
||||
System.out.println("3) Generate CDR3 alpha/beta occupancy data and overlap graph");
|
||||
System.out.println("4) Simulate bipartite graph CDR3 alpha/beta matching (BiGpairSEQ)");
|
||||
//Need to re-do the CDR3/CDR1 matching to correspond to new pattern
|
||||
//System.out.println("5) Generate CDR3/CDR1 occupancy graph");
|
||||
//System.out.println("6) Simulate CDR3/CDR1 T cell matching");
|
||||
System.out.println("8) Options");
|
||||
System.out.println("9) About/Acknowledgments");
|
||||
System.out.println("0) Exit");
|
||||
try {
|
||||
input = sc.nextInt();
|
||||
switch (input) {
|
||||
case 1 -> makeCells();
|
||||
case 2 -> makePlate();
|
||||
case 3 -> makeCDR3Graph();
|
||||
case 4 -> matchCDR3s();
|
||||
//case 6 -> matchCellsCDR1();
|
||||
case 8 -> mainOptions();
|
||||
case 9 -> acknowledge();
|
||||
case 0 -> quit = true;
|
||||
default -> System.out.println("Invalid input.");
|
||||
}
|
||||
} catch (InputMismatchException | IOException ex) {
|
||||
System.out.println(ex);
|
||||
sc.next();
|
||||
}
|
||||
}
|
||||
sc.close();
|
||||
}
|
||||
|
||||
private static void makeCells() {
|
||||
String filename = null;
|
||||
Integer numCells = 0;
|
||||
Integer cdr1Freq = 1;
|
||||
try {
|
||||
System.out.println("\nSimulated T-Cells consist of integer values representing:\n" +
|
||||
"* a pair of alpha and beta CDR3 peptides (unique within simulated population)\n" +
|
||||
"* a pair of alpha and beta CDR1 peptides (not necessarily unique).");
|
||||
System.out.println("\nThe cells will be written to a CSV file.");
|
||||
System.out.print("Please enter a file name: ");
|
||||
filename = sc.next();
|
||||
System.out.println("\nCDR3 sequences are more diverse than CDR1 sequences.");
|
||||
System.out.println("Please enter the factor by which distinct CDR3s outnumber CDR1s: ");
|
||||
cdr1Freq = sc.nextInt();
|
||||
System.out.print("\nPlease enter the number of T-cells to generate: ");
|
||||
numCells = sc.nextInt();
|
||||
if(numCells <= 0){
|
||||
throw new InputMismatchException("Number of cells must be a positive integer.");
|
||||
}
|
||||
} catch (InputMismatchException ex) {
|
||||
System.out.println(ex);
|
||||
sc.next();
|
||||
}
|
||||
CellSample sample = new CellSample(numCells, cdr1Freq);
|
||||
assert filename != null;
|
||||
System.out.println("Writing cells to file");
|
||||
CellFileWriter writer = new CellFileWriter(filename, sample);
|
||||
writer.writeCellsToFile();
|
||||
System.out.println("Cell sample written to: " + filename);
|
||||
if(BiGpairSEQ.cacheCells()) {
|
||||
BiGpairSEQ.setCellSampleInMemory(sample, filename);
|
||||
}
|
||||
}
|
||||
|
||||
//Output a CSV of sample plate
|
||||
private static void makePlate() {
|
||||
String cellFile = null;
|
||||
String filename = null;
|
||||
Double stdDev = 0.0;
|
||||
Integer numWells = 0;
|
||||
Integer numSections;
|
||||
Integer[] populations = {1};
|
||||
Double dropOutRate = 0.0;
|
||||
boolean poisson = false;
|
||||
boolean exponential = false;
|
||||
double lambda = 1.5;
|
||||
try {
|
||||
System.out.println("\nSimulated sample plates consist of:");
|
||||
System.out.println("* a number of wells");
|
||||
System.out.println(" * separated into one or more sections");
|
||||
System.out.println(" * each of which has a set quantity of cells per well");
|
||||
System.out.println(" * selected from a statistical distribution of distinct cells");
|
||||
System.out.println(" * with a set dropout rate for individual sequences within a cell");
|
||||
System.out.println("\nMaking a sample plate requires a population of distinct cells");
|
||||
System.out.print("Please enter name of an existing cell sample file: ");
|
||||
cellFile = sc.next();
|
||||
System.out.println("\nThe sample plate will be written to a CSV file");
|
||||
System.out.print("Please enter a name for the output file: ");
|
||||
filename = sc.next();
|
||||
System.out.println("\nSelect T-cell frequency distribution function");
|
||||
System.out.println("1) Poisson");
|
||||
System.out.println("2) Gaussian");
|
||||
System.out.println("3) Exponential");
|
||||
System.out.println("(Note: approximate distribution in original paper is exponential, lambda = 0.6)");
|
||||
System.out.println("(lambda value approximated from slope of log-log graph in figure 4c)");
|
||||
System.out.println("(Note: wider distributions are more memory intensive to match)");
|
||||
System.out.print("Enter selection value: ");
|
||||
input = sc.nextInt();
|
||||
switch (input) {
|
||||
case 1 -> poisson = true;
|
||||
case 2 -> {
|
||||
System.out.println("How many distinct T-cells within one standard deviation of peak frequency?");
|
||||
System.out.println("(Note: wider distributions are more memory intensive to match)");
|
||||
stdDev = sc.nextDouble();
|
||||
if (stdDev <= 0.0) {
|
||||
throw new InputMismatchException("Value must be positive.");
|
||||
}
|
||||
}
|
||||
case 3 -> {
|
||||
exponential = true;
|
||||
System.out.print("Please enter lambda value for exponential distribution: ");
|
||||
lambda = sc.nextDouble();
|
||||
if (lambda <= 0.0) {
|
||||
lambda = 0.6;
|
||||
System.out.println("Value must be positive. Defaulting to 0.6.");
|
||||
}
|
||||
}
|
||||
default -> {
|
||||
System.out.println("Invalid input. Defaulting to exponential.");
|
||||
exponential = true;
|
||||
}
|
||||
}
|
||||
System.out.print("\nNumber of wells on plate: ");
|
||||
numWells = sc.nextInt();
|
||||
if(numWells < 1){
|
||||
throw new InputMismatchException("No wells on plate");
|
||||
}
|
||||
//choose whether to make T cell population/well random
|
||||
boolean randomWellPopulations;
|
||||
System.out.println("Randomize number of T cells in each well? (y/n)");
|
||||
String ans = sc.next();
|
||||
Pattern pattern = Pattern.compile("(?:yes|y)", Pattern.CASE_INSENSITIVE);
|
||||
Matcher matcher = pattern.matcher(ans);
|
||||
if(matcher.matches()){
|
||||
randomWellPopulations = true;
|
||||
}
|
||||
else{
|
||||
randomWellPopulations = false;
|
||||
}
|
||||
if(randomWellPopulations) { //if T cell population/well is random
|
||||
numSections = numWells;
|
||||
Integer minPop;
|
||||
Integer maxPop;
|
||||
System.out.print("Please enter minimum number of T cells in a well: ");
|
||||
minPop = sc.nextInt();
|
||||
if(minPop < 1) {
|
||||
throw new InputMismatchException("Minimum well population must be positive");
|
||||
}
|
||||
System.out.println("Please enter maximum number of T cells in a well: ");
|
||||
maxPop = sc.nextInt();
|
||||
if(maxPop < minPop) {
|
||||
throw new InputMismatchException("Max well population must be greater than min well population");
|
||||
}
|
||||
//maximum should be inclusive, so need to add one to max of randomly generated values
|
||||
populations = rand.ints(minPop, maxPop + 1)
|
||||
.limit(numSections)
|
||||
.boxed()
|
||||
.toArray(Integer[]::new);
|
||||
System.out.print("Populations: ");
|
||||
System.out.println(Arrays.toString(populations));
|
||||
}
|
||||
else{ //if T cell population/well is not random
|
||||
System.out.println("\nThe plate can be evenly sectioned to allow different numbers of T cells per well.");
|
||||
System.out.println("How many sections would you like to make (minimum 1)?");
|
||||
numSections = sc.nextInt();
|
||||
if (numSections < 1) {
|
||||
throw new InputMismatchException("Too few sections.");
|
||||
} else if (numSections > numWells) {
|
||||
throw new InputMismatchException("Cannot have more sections than wells.");
|
||||
}
|
||||
int i = 1;
|
||||
populations = new Integer[numSections];
|
||||
while (numSections > 0) {
|
||||
System.out.print("Enter number of T cells per well in section " + i + ": ");
|
||||
populations[i - 1] = sc.nextInt();
|
||||
i++;
|
||||
numSections--;
|
||||
}
|
||||
}
|
||||
System.out.println("\nErrors in amplification can induce a well dropout rate for sequences");
|
||||
System.out.print("Enter well dropout rate (0.0 to 1.0): ");
|
||||
dropOutRate = sc.nextDouble();
|
||||
if(dropOutRate < 0.0 || dropOutRate > 1.0) {
|
||||
throw new InputMismatchException("The well dropout rate must be in the range [0.0, 1.0]");
|
||||
}
|
||||
}catch(InputMismatchException ex){
|
||||
System.out.println(ex);
|
||||
sc.next();
|
||||
}
|
||||
assert cellFile != null;
|
||||
CellSample cells;
|
||||
if (cellFile.equals(BiGpairSEQ.getCellFilename())){
|
||||
cells = BiGpairSEQ.getCellSampleInMemory();
|
||||
}
|
||||
else {
|
||||
System.out.println("Reading Cell Sample file: " + cellFile);
|
||||
CellFileReader cellReader = new CellFileReader(cellFile);
|
||||
cells = cellReader.getCellSample();
|
||||
if(BiGpairSEQ.cacheCells()) {
|
||||
BiGpairSEQ.setCellSampleInMemory(cells, cellFile);
|
||||
}
|
||||
}
|
||||
assert filename != null;
|
||||
Plate samplePlate;
|
||||
PlateFileWriter writer;
|
||||
if(exponential){
|
||||
samplePlate = new Plate(cells, cellFile, numWells, populations, dropOutRate, lambda, true);
|
||||
writer = new PlateFileWriter(filename, samplePlate);
|
||||
}
|
||||
else {
|
||||
if (poisson) {
|
||||
stdDev = Math.sqrt(cells.getCellCount()); //gaussian with square root of elements approximates poisson
|
||||
}
|
||||
samplePlate = new Plate(cells, cellFile, numWells, populations, dropOutRate, stdDev, false);
|
||||
writer = new PlateFileWriter(filename, samplePlate);
|
||||
}
|
||||
System.out.println("Writing Sample Plate to file");
|
||||
writer.writePlateFile();
|
||||
System.out.println("Sample Plate written to file: " + filename);
|
||||
if(BiGpairSEQ.cachePlate()) {
|
||||
BiGpairSEQ.setPlateInMemory(samplePlate, filename);
|
||||
}
|
||||
}
|
||||
|
||||
//Output serialized binary of GraphAndMapData object
|
||||
private static void makeCDR3Graph() {
|
||||
String filename = null;
|
||||
String cellFile = null;
|
||||
String plateFile = null;
|
||||
try {
|
||||
String str = "\nGenerating bipartite weighted graph encoding occupancy overlap data ";
|
||||
str = str.concat("\nrequires a cell sample file and a sample plate file.");
|
||||
System.out.println(str);
|
||||
System.out.print("\nPlease enter name of an existing cell sample file: ");
|
||||
cellFile = sc.next();
|
||||
System.out.print("\nPlease enter name of an existing sample plate file: ");
|
||||
plateFile = sc.next();
|
||||
System.out.println("\nThe graph and occupancy data will be written to a file.");
|
||||
System.out.print("Please enter a name for the output file: ");
|
||||
filename = sc.next();
|
||||
} catch (InputMismatchException ex) {
|
||||
System.out.println(ex);
|
||||
sc.next();
|
||||
}
|
||||
|
||||
assert cellFile != null;
|
||||
CellSample cellSample;
|
||||
//check if cells are already in memory
|
||||
if(cellFile.equals(BiGpairSEQ.getCellFilename()) && BiGpairSEQ.getCellSampleInMemory() != null) {
|
||||
cellSample = BiGpairSEQ.getCellSampleInMemory();
|
||||
}
|
||||
else {
|
||||
System.out.println("Reading Cell Sample file: " + cellFile);
|
||||
CellFileReader cellReader = new CellFileReader(cellFile);
|
||||
cellSample = cellReader.getCellSample();
|
||||
if(BiGpairSEQ.cacheCells()) {
|
||||
BiGpairSEQ.setCellSampleInMemory(cellSample, cellFile);
|
||||
}
|
||||
}
|
||||
|
||||
assert plateFile != null;
|
||||
Plate plate;
|
||||
//check if plate is already in memory
|
||||
if(plateFile.equals(BiGpairSEQ.getPlateFilename())){
|
||||
plate = BiGpairSEQ.getPlateInMemory();
|
||||
}
|
||||
else {
|
||||
System.out.println("Reading Sample Plate file: " + plateFile);
|
||||
PlateFileReader plateReader = new PlateFileReader(plateFile);
|
||||
plate = plateReader.getSamplePlate();
|
||||
if(BiGpairSEQ.cachePlate()) {
|
||||
BiGpairSEQ.setPlateInMemory(plate, plateFile);
|
||||
}
|
||||
}
|
||||
if (cellSample.getCells().size() == 0){
|
||||
System.out.println("No cell sample found.");
|
||||
System.out.println("Returning to main menu.");
|
||||
}
|
||||
else if(plate.getWells().size() == 0 || plate.getPopulations().length == 0){
|
||||
System.out.println("No sample plate found.");
|
||||
System.out.println("Returning to main menu.");
|
||||
}
|
||||
else{
|
||||
GraphWithMapData data = Simulator.makeGraph(cellSample, plate, true);
|
||||
assert filename != null;
|
||||
if(BiGpairSEQ.outputBinary()) {
|
||||
GraphDataObjectWriter dataWriter = new GraphDataObjectWriter(filename, data);
|
||||
dataWriter.writeDataToFile();
|
||||
System.out.println("Serialized binary graph/data file written to: " + filename);
|
||||
}
|
||||
if(BiGpairSEQ.outputGraphML()) {
|
||||
GraphMLFileWriter graphMLWriter = new GraphMLFileWriter(filename, data);
|
||||
graphMLWriter.writeGraphToFile();
|
||||
System.out.println("GraphML file written to: " + filename);
|
||||
}
|
||||
if(BiGpairSEQ.cacheGraph()) {
|
||||
BiGpairSEQ.setGraphInMemory(data, filename);
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//Simulate matching and output CSV file of results
|
||||
private static void matchCDR3s() throws IOException {
|
||||
String filename = null;
|
||||
String graphFilename = null;
|
||||
Integer lowThreshold = 0;
|
||||
Integer highThreshold = Integer.MAX_VALUE;
|
||||
Integer maxOccupancyDiff = Integer.MAX_VALUE;
|
||||
Integer minOverlapPercent = 0;
|
||||
try {
|
||||
System.out.println("\nBiGpairSEQ simulation requires an occupancy data and overlap graph file");
|
||||
System.out.println("Please enter name of an existing graph and occupancy data file: ");
|
||||
graphFilename = sc.next();
|
||||
System.out.println("The matching results will be written to a file.");
|
||||
System.out.print("Please enter a name for the output file: ");
|
||||
filename = sc.next();
|
||||
System.out.println("\nWhat is the minimum number of CDR3 alpha/beta overlap wells to attempt matching?");
|
||||
lowThreshold = sc.nextInt();
|
||||
if(lowThreshold < 1){
|
||||
lowThreshold = 1;
|
||||
System.out.println("Value for low occupancy overlap threshold must be positive");
|
||||
System.out.println("Value for low occupancy overlap threshold set to 1");
|
||||
}
|
||||
System.out.println("\nWhat is the maximum number of CDR3 alpha/beta overlap wells to attempt matching?");
|
||||
highThreshold = sc.nextInt();
|
||||
if(highThreshold < lowThreshold) {
|
||||
highThreshold = lowThreshold;
|
||||
System.out.println("Value for high occupancy overlap threshold must be >= low overlap threshold");
|
||||
System.out.println("Value for high occupancy overlap threshold set to " + lowThreshold);
|
||||
}
|
||||
System.out.println("What is the minimum percentage of a sequence's wells in alpha/beta overlap to attempt matching? (0 - 100)");
|
||||
minOverlapPercent = sc.nextInt();
|
||||
if (minOverlapPercent < 0 || minOverlapPercent > 100) {
|
||||
System.out.println("Value outside range. Minimum occupancy overlap percentage set to 0");
|
||||
}
|
||||
System.out.println("\nWhat is the maximum difference in alpha/beta occupancy to attempt matching?");
|
||||
maxOccupancyDiff = sc.nextInt();
|
||||
if (maxOccupancyDiff < 0) {
|
||||
maxOccupancyDiff = 0;
|
||||
System.out.println("Maximum allowable difference in alpha/beta occupancy must be nonnegative");
|
||||
System.out.println("Maximum allowable difference in alpha/beta occupancy set to 0");
|
||||
}
|
||||
} catch (InputMismatchException ex) {
|
||||
System.out.println(ex);
|
||||
sc.next();
|
||||
}
|
||||
assert graphFilename != null;
|
||||
//check if this is the same graph we already have in memory.
|
||||
GraphWithMapData data;
|
||||
if(graphFilename.equals(BiGpairSEQ.getGraphFilename())) {
|
||||
data = BiGpairSEQ.getGraphInMemory();
|
||||
}
|
||||
else {
|
||||
GraphDataObjectReader dataReader = new GraphDataObjectReader(graphFilename, true);
|
||||
data = dataReader.getData();
|
||||
if(BiGpairSEQ.cacheGraph()) {
|
||||
BiGpairSEQ.setGraphInMemory(data, graphFilename);
|
||||
}
|
||||
}
|
||||
//simulate matching
|
||||
MatchingResult results = Simulator.matchCDR3s(data, graphFilename, lowThreshold, highThreshold, maxOccupancyDiff,
|
||||
minOverlapPercent, true);
|
||||
//write results to file
|
||||
assert filename != null;
|
||||
MatchingFileWriter writer = new MatchingFileWriter(filename, results);
|
||||
System.out.println("Writing results to file");
|
||||
writer.writeResultsToFile();
|
||||
System.out.println("Results written to file: " + filename);
|
||||
}
|
||||
|
||||
///////
|
||||
//Rewrite this to fit new matchCDR3 method with file I/O
|
||||
///////
|
||||
// public static void matchCellsCDR1(){
|
||||
// /*
|
||||
// The idea here is that we'll get the CDR3 alpha/beta matches first. Then we'll try to match CDR3s to CDR1s by
|
||||
// looking at the top two matches for each CDR3. If CDR3s in the same cell simply swap CDR1s, we assume a correct
|
||||
// match
|
||||
// */
|
||||
// String filename = null;
|
||||
// String preliminaryResultsFilename = null;
|
||||
// String cellFile = null;
|
||||
// String plateFile = null;
|
||||
// Integer lowThresholdCDR3 = 0;
|
||||
// Integer highThresholdCDR3 = Integer.MAX_VALUE;
|
||||
// Integer maxOccupancyDiffCDR3 = 96; //no filtering if max difference is all wells by default
|
||||
// Integer minOverlapPercentCDR3 = 0; //no filtering if min percentage is zero by default
|
||||
// Integer lowThresholdCDR1 = 0;
|
||||
// Integer highThresholdCDR1 = Integer.MAX_VALUE;
|
||||
// boolean outputCDR3Matches = false;
|
||||
// try {
|
||||
// System.out.println("\nSimulated experiment requires a cell sample file and a sample plate file.");
|
||||
// System.out.print("Please enter name of an existing cell sample file: ");
|
||||
// cellFile = sc.next();
|
||||
// System.out.print("Please enter name of an existing sample plate file: ");
|
||||
// plateFile = sc.next();
|
||||
// System.out.println("The matching results will be written to a file.");
|
||||
// System.out.print("Please enter a name for the output file: ");
|
||||
// filename = sc.next();
|
||||
// System.out.println("What is the minimum number of CDR3 alpha/beta overlap wells to attempt matching?");
|
||||
// lowThresholdCDR3 = sc.nextInt();
|
||||
// if(lowThresholdCDR3 < 1){
|
||||
// throw new InputMismatchException("Minimum value for low threshold is 1");
|
||||
// }
|
||||
// System.out.println("What is the maximum number of CDR3 alpha/beta overlap wells to attempt matching?");
|
||||
// highThresholdCDR3 = sc.nextInt();
|
||||
// System.out.println("What is the maximum difference in CDR3 alpha/beta occupancy to attempt matching?");
|
||||
// maxOccupancyDiffCDR3 = sc.nextInt();
|
||||
// System.out.println("What is the minimum CDR3 overlap percentage to attempt matching? (0 - 100)");
|
||||
// minOverlapPercentCDR3 = sc.nextInt();
|
||||
// if (minOverlapPercentCDR3 < 0 || minOverlapPercentCDR3 > 100) {
|
||||
// throw new InputMismatchException("Value outside range. Minimum percent set to 0");
|
||||
// }
|
||||
// System.out.println("What is the minimum number of CDR3/CDR1 overlap wells to attempt matching?");
|
||||
// lowThresholdCDR1 = sc.nextInt();
|
||||
// if(lowThresholdCDR1 < 1){
|
||||
// throw new InputMismatchException("Minimum value for low threshold is 1");
|
||||
// }
|
||||
// System.out.println("What is the maximum number of CDR3/CDR1 overlap wells to attempt matching?");
|
||||
// highThresholdCDR1 = sc.nextInt();
|
||||
// System.out.println("Matching CDR3s to CDR1s requires first matching CDR3 alpha/betas.");
|
||||
// System.out.println("Output a file for CDR3 alpha/beta match results as well?");
|
||||
// System.out.print("Please enter y/n: ");
|
||||
// String ans = sc.next();
|
||||
// Pattern pattern = Pattern.compile("(?:yes|y)", Pattern.CASE_INSENSITIVE);
|
||||
// Matcher matcher = pattern.matcher(ans);
|
||||
// if(matcher.matches()){
|
||||
// outputCDR3Matches = true;
|
||||
// System.out.println("Please enter filename for CDR3 alpha/beta match results");
|
||||
// preliminaryResultsFilename = sc.next();
|
||||
// System.out.println("CDR3 alpha/beta matches will be output to file");
|
||||
// }
|
||||
// else{
|
||||
// System.out.println("CDR3 alpha/beta matches will not be output to file");
|
||||
// }
|
||||
// } catch (InputMismatchException ex) {
|
||||
// System.out.println(ex);
|
||||
// sc.next();
|
||||
// }
|
||||
// CellFileReader cellReader = new CellFileReader(cellFile);
|
||||
// PlateFileReader plateReader = new PlateFileReader(plateFile);
|
||||
// Plate plate = new Plate(plateReader.getFilename(), plateReader.getWells());
|
||||
// if (cellReader.getCells().size() == 0){
|
||||
// System.out.println("No cell sample found.");
|
||||
// System.out.println("Returning to main menu.");
|
||||
// }
|
||||
// else if(plate.getWells().size() == 0){
|
||||
// System.out.println("No sample plate found.");
|
||||
// System.out.println("Returning to main menu.");
|
||||
//
|
||||
// }
|
||||
// else{
|
||||
// if(highThresholdCDR3 >= plate.getSize()){
|
||||
// highThresholdCDR3 = plate.getSize() - 1;
|
||||
// }
|
||||
// if(highThresholdCDR1 >= plate.getSize()){
|
||||
// highThresholdCDR1 = plate.getSize() - 1;
|
||||
// }
|
||||
// List<Integer[]> cells = cellReader.getCells();
|
||||
// MatchingResult preliminaryResults = Simulator.matchCDR3s(cells, plate, lowThresholdCDR3, highThresholdCDR3,
|
||||
// maxOccupancyDiffCDR3, minOverlapPercentCDR3, true);
|
||||
// MatchingResult[] results = Simulator.matchCDR1s(cells, plate, lowThresholdCDR1,
|
||||
// highThresholdCDR1, preliminaryResults);
|
||||
// MatchingFileWriter writer = new MatchingFileWriter(filename + "_FirstPass", results[0]);
|
||||
// writer.writeResultsToFile();
|
||||
// writer = new MatchingFileWriter(filename + "_SecondPass", results[1]);
|
||||
// writer.writeResultsToFile();
|
||||
// if(outputCDR3Matches){
|
||||
// writer = new MatchingFileWriter(preliminaryResultsFilename, preliminaryResults);
|
||||
// writer.writeResultsToFile();
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
private static void mainOptions(){
|
||||
boolean backToMain = false;
|
||||
while(!backToMain) {
|
||||
System.out.println("\n--------------OPTIONS---------------");
|
||||
System.out.println("1) Turn " + getOnOff(!BiGpairSEQ.cacheCells()) + " cell sample file caching");
|
||||
System.out.println("2) Turn " + getOnOff(!BiGpairSEQ.cachePlate()) + " plate file caching");
|
||||
System.out.println("3) Turn " + getOnOff(!BiGpairSEQ.cacheGraph()) + " graph/data file caching");
|
||||
System.out.println("4) Turn " + getOnOff(!BiGpairSEQ.outputBinary()) + " serialized binary graph output");
|
||||
System.out.println("5) Turn " + getOnOff(!BiGpairSEQ.outputGraphML()) + " GraphML graph output (for data portability to other programs)");
|
||||
System.out.println("6) Maximum weight matching algorithm options");
|
||||
System.out.println("0) Return to main menu");
|
||||
try {
|
||||
input = sc.nextInt();
|
||||
switch (input) {
|
||||
case 1 -> BiGpairSEQ.setCacheCells(!BiGpairSEQ.cacheCells());
|
||||
case 2 -> BiGpairSEQ.setCachePlate(!BiGpairSEQ.cachePlate());
|
||||
case 3 -> BiGpairSEQ.setCacheGraph(!BiGpairSEQ.cacheGraph());
|
||||
case 4 -> BiGpairSEQ.setOutputBinary(!BiGpairSEQ.outputBinary());
|
||||
case 5 -> BiGpairSEQ.setOutputGraphML(!BiGpairSEQ.outputGraphML());
|
||||
case 6 -> algorithmOptions();
|
||||
case 0 -> backToMain = true;
|
||||
default -> System.out.println("Invalid input");
|
||||
}
|
||||
} catch (InputMismatchException ex) {
|
||||
System.out.println(ex);
|
||||
sc.next();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper function for printing menu items in mainOptions(). Returns a string based on the value of parameter.
|
||||
*
|
||||
* @param b - a boolean value
|
||||
* @return String "on" if b is true, "off" if b is false
|
||||
*/
|
||||
private static String getOnOff(boolean b) {
|
||||
if (b) { return "on";}
|
||||
else { return "off"; }
|
||||
}
|
||||
|
||||
private static void algorithmOptions(){
|
||||
boolean backToOptions = false;
|
||||
while(!backToOptions) {
|
||||
System.out.println("\n---------ALGORITHM OPTIONS----------");
|
||||
System.out.println("1) Use scaling algorithm by Duan and Su.");
|
||||
System.out.println("2) Use LEDA book algorithm with Fibonacci heap priority queue");
|
||||
System.out.println("3) Use LEDA book algorithm with pairing heap priority queue");
|
||||
System.out.println("0) Return to Options menu");
|
||||
try {
|
||||
input = sc.nextInt();
|
||||
switch (input) {
|
||||
case 1 -> System.out.println("This option is not yet implemented. Choose another.");
|
||||
case 2 -> {
|
||||
BiGpairSEQ.setFibonacciHeap();
|
||||
System.out.println("MWM algorithm set to LEDA with Fibonacci heap");
|
||||
backToOptions = true;
|
||||
}
|
||||
case 3 -> {
|
||||
BiGpairSEQ.setPairingHeap();
|
||||
System.out.println("MWM algorithm set to LEDA with pairing heap");
|
||||
backToOptions = true;
|
||||
}
|
||||
case 0 -> backToOptions = true;
|
||||
default -> System.out.println("Invalid input");
|
||||
}
|
||||
} catch (InputMismatchException ex) {
|
||||
System.out.println(ex);
|
||||
sc.next();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static void acknowledge(){
|
||||
System.out.println("BiGpairSEQ_Sim " + BiGpairSEQ.getVersion());
|
||||
System.out.println();
|
||||
System.out.println("This program simulates BiGpairSEQ, a graph theory based adaptation");
|
||||
System.out.println("of the pairSEQ algorithm for pairing T cell receptor sequences.");
|
||||
System.out.println();
|
||||
System.out.println("For full documentation, view readme.md file distributed with this code");
|
||||
System.out.println("or visit https://gitea.ejsf.synology.me/efischer/BiGpairSEQ.");
|
||||
System.out.println();
|
||||
System.out.println("pairSEQ citation:");
|
||||
System.out.println("Howie, B., Sherwood, A. M., et. al.");
|
||||
System.out.println("High-throughput pairing of T cell receptor alpha and beta sequences.");
|
||||
System.out.println("Sci. Transl. Med. 7, 301ra131 (2015)");
|
||||
System.out.println();
|
||||
System.out.println("BiGpairSEQ_Sim by Eugene Fischer, 2021-2022");
|
||||
}
|
||||
}
|
||||
3
src/main/java/META-INF/MANIFEST.MF
Normal file
3
src/main/java/META-INF/MANIFEST.MF
Normal file
@@ -0,0 +1,3 @@
|
||||
Manifest-Version: 1.0
|
||||
Main-Class: BiGpairSEQ
|
||||
|
||||
@@ -8,24 +8,30 @@ import java.nio.file.Path;
|
||||
import java.nio.file.StandardOpenOption;
|
||||
import java.util.List;
|
||||
|
||||
|
||||
public class MatchingFileWriter {
|
||||
|
||||
private String filename;
|
||||
private List<String> comments;
|
||||
private List<String> headers;
|
||||
private List<List<String>> results;
|
||||
private List<List<String>> allResults;
|
||||
|
||||
public MatchingFileWriter(String filename, List<String> comments, List<String> headers, List<List<String>> results){
|
||||
public MatchingFileWriter(String filename, MatchingResult result){
|
||||
if(!filename.matches(".*\\.csv")){
|
||||
filename = filename + ".csv";
|
||||
}
|
||||
this.filename = filename;
|
||||
this.comments = comments;
|
||||
this.headers = headers;
|
||||
this.results = results;
|
||||
this.comments = result.getComments();
|
||||
this.headers = result.getHeaders();
|
||||
this.allResults = result.getAllResults();
|
||||
}
|
||||
|
||||
public void writeErrorRateToTerminal(){
|
||||
for(String s: comments){
|
||||
if(s.matches("(Pairing error rate: )(\\d*.\\d+)")){
|
||||
System.out.println(s);
|
||||
}
|
||||
}
|
||||
}
|
||||
public void writeResultsToFile(){
|
||||
String[] headerStrings = new String[headers.size()];
|
||||
for(int i = 0; i < headers.size(); i++){
|
||||
@@ -41,8 +47,8 @@ public class MatchingFileWriter {
|
||||
for(String comment: comments){
|
||||
printer.printComment(comment);
|
||||
}
|
||||
results.add(0, headers);
|
||||
printer.printRecords(results);
|
||||
allResults.add(0, headers);
|
||||
printer.printRecords(allResults);
|
||||
|
||||
} catch(IOException ex){
|
||||
System.out.println("Could not make new file named "+filename);
|
||||
|
||||
@@ -1,16 +1,41 @@
|
||||
import java.time.Duration;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
public class MatchingResult {
|
||||
private List<String> comments;
|
||||
private List<String> headers;
|
||||
private List<List<String>> allResults;
|
||||
private Map<Integer, Integer> matchMap;
|
||||
private Duration time;
|
||||
|
||||
public MatchingResult(List<String> comments, List<String> headers, List<List<String>> allResults, Map<Integer, Integer>matchMap, Duration time){
|
||||
this.comments = comments;
|
||||
private final Map<String, String> metadata;
|
||||
private final List<String> comments;
|
||||
private final List<String> headers;
|
||||
private final List<List<String>> allResults;
|
||||
private final Map<Integer, Integer> matchMap;
|
||||
private final Duration time;
|
||||
|
||||
public MatchingResult(Map<String, String> metadata, List<String> headers,
|
||||
List<List<String>> allResults, Map<Integer, Integer>matchMap, Duration time){
|
||||
/*
|
||||
* POSSIBLE KEYS FOR METADATA MAP ARE:
|
||||
* sample plate filename *
|
||||
* graph filename *
|
||||
* well populations *
|
||||
* total alphas found *
|
||||
* total betas found *
|
||||
* high overlap threshold *
|
||||
* low overlap threshold *
|
||||
* maximum occupancy difference *
|
||||
* minimum overlap percent *
|
||||
* pairing attempt rate *
|
||||
* correct pairing count *
|
||||
* incorrect pairing count *
|
||||
* pairing error rate *
|
||||
* simulation time (seconds)
|
||||
*/
|
||||
this.metadata = metadata;
|
||||
this.comments = new ArrayList<>();
|
||||
for (String key : metadata.keySet()) {
|
||||
comments.add(key +": " + metadata.get(key));
|
||||
}
|
||||
this.headers = headers;
|
||||
this.allResults = allResults;
|
||||
this.matchMap = matchMap;
|
||||
@@ -18,6 +43,8 @@ public class MatchingResult {
|
||||
|
||||
}
|
||||
|
||||
public Map<String, String> getMetadata() {return metadata;}
|
||||
|
||||
public List<String> getComments() {
|
||||
return comments;
|
||||
}
|
||||
@@ -37,4 +64,49 @@ public class MatchingResult {
|
||||
public Duration getTime() {
|
||||
return time;
|
||||
}
|
||||
|
||||
public String getPlateFilename() {
|
||||
return metadata.get("sample plate filename");
|
||||
}
|
||||
|
||||
public String getGraphFilename() {
|
||||
return metadata.get("graph filename");
|
||||
}
|
||||
|
||||
public Integer[] getWellPopulations() {
|
||||
List<Integer> wellPopulations = new ArrayList<>();
|
||||
String popString = metadata.get("well populations");
|
||||
for (String p : popString.split(", ")) {
|
||||
wellPopulations.add(Integer.parseInt(p));
|
||||
}
|
||||
Integer[] popArray = new Integer[wellPopulations.size()];
|
||||
return wellPopulations.toArray(popArray);
|
||||
}
|
||||
|
||||
public Integer getAlphaCount() {
|
||||
return Integer.parseInt(metadata.get("total alpha count"));
|
||||
}
|
||||
|
||||
public Integer getBetaCount() {
|
||||
return Integer.parseInt(metadata.get("total beta count"));
|
||||
}
|
||||
|
||||
public Integer getHighOverlapThreshold() { return Integer.parseInt(metadata.get("high overlap threshold"));}
|
||||
|
||||
public Integer getLowOverlapThreshold() { return Integer.parseInt(metadata.get("low overlap threshold"));}
|
||||
|
||||
public Integer getMaxOccupancyDifference() { return Integer.parseInt(metadata.get("maximum occupancy difference"));}
|
||||
|
||||
public Integer getMinOverlapPercent() { return Integer.parseInt(metadata.get("minimum overlap percent"));}
|
||||
|
||||
public Double getPairingAttemptRate() { return Double.parseDouble(metadata.get("pairing attempt rate"));}
|
||||
|
||||
public Integer getCorrectPairingCount() { return Integer.parseInt(metadata.get("correct pairing count"));}
|
||||
|
||||
public Integer getIncorrectPairingCount() { return Integer.parseInt(metadata.get("incorrect pairing count"));}
|
||||
|
||||
public Double getPairingErrorRate() { return Double.parseDouble(metadata.get("pairing error rate"));}
|
||||
|
||||
public String getSimulationTime() { return metadata.get("simulation time (seconds)"); }
|
||||
|
||||
}
|
||||
|
||||
@@ -1,46 +1,89 @@
|
||||
|
||||
|
||||
/*
|
||||
TODO: Implement exponential distribution using inversion method - DONE
|
||||
TODO: Implement discrete frequency distributions using Vose's Alias Method
|
||||
*/
|
||||
|
||||
import java.util.*;
|
||||
|
||||
//Need to write function to output plate to a file that I can read in.
|
||||
|
||||
public class Plate {
|
||||
private CellSample cells;
|
||||
private String sourceFile;
|
||||
private String filename;
|
||||
private List<List<Integer[]>> wells;
|
||||
private Random rand = new Random();
|
||||
private final Random rand = BiGpairSEQ.getRand();
|
||||
private int size;
|
||||
private double error;
|
||||
private Integer[] concentrations;
|
||||
private Integer[] populations;
|
||||
private double stdDev;
|
||||
private double lambda;
|
||||
boolean exponential = false;
|
||||
|
||||
public Plate (int size, double error, Integer[] concentrations, double stdDev) {
|
||||
public Plate(CellSample cells, String cellFilename, int numWells, Integer[] populations,
|
||||
double dropoutRate, double stdDev_or_lambda, boolean exponential){
|
||||
this.cells = cells;
|
||||
this.sourceFile = cellFilename;
|
||||
this.size = numWells;
|
||||
this.wells = new ArrayList<>();
|
||||
this.error = dropoutRate;
|
||||
this.populations = populations;
|
||||
this.exponential = exponential;
|
||||
if (this.exponential) {
|
||||
this.lambda = stdDev_or_lambda;
|
||||
fillWellsExponential(cells.getCells(), this.lambda);
|
||||
}
|
||||
else {
|
||||
this.stdDev = stdDev_or_lambda;
|
||||
fillWells(cells.getCells(), this.stdDev);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public Plate(int size, double error, Integer[] populations) {
|
||||
this.size = size;
|
||||
this.error = error;
|
||||
this.concentrations = concentrations;
|
||||
this.stdDev = stdDev;
|
||||
this.populations = populations;
|
||||
wells = new ArrayList<>();
|
||||
}
|
||||
|
||||
public Plate(List<List<Integer[]>> wells){
|
||||
//constructor for returning a Plate from a PlateFileReader
|
||||
public Plate(String filename, List<List<Integer[]>> wells) {
|
||||
this.filename = filename;
|
||||
this.wells = wells;
|
||||
this.size = wells.size();
|
||||
|
||||
List<Integer> concentrations = new ArrayList<>();
|
||||
for (List<Integer[]> w: wells) {
|
||||
if(!concentrations.contains(w.size())){
|
||||
concentrations.add(w.size());
|
||||
}
|
||||
}
|
||||
this.populations = new Integer[concentrations.size()];
|
||||
for (int i = 0; i < this.populations.length; i++) {
|
||||
this.populations[i] = concentrations.get(i);
|
||||
}
|
||||
}
|
||||
|
||||
public void fillWells(List<Integer[]> cells) {
|
||||
int numSections = concentrations.length;
|
||||
private void fillWellsExponential(List<Integer[]> cells, double lambda){
|
||||
this.lambda = lambda;
|
||||
exponential = true;
|
||||
int numSections = populations.length;
|
||||
int section = 0;
|
||||
double m;
|
||||
int n;
|
||||
//testing
|
||||
//System.out.println("Cell size: " + cells.get(0).length);
|
||||
while (section < numSections){
|
||||
for (int i = 0; i < (size / numSections); i++) {
|
||||
List<Integer[]> well = new ArrayList<>();
|
||||
for (int j = 0; j < concentrations[section]; j++) {
|
||||
for (int j = 0; j < populations[section]; j++) {
|
||||
do {
|
||||
m = (rand.nextGaussian() * stdDev) + (cells.size() / 2);
|
||||
//inverse transform sampling: for random number u in [0,1), x = log(1-u) / (-lambda)
|
||||
m = (Math.log10((1 - rand.nextDouble()))/(-lambda)) * Math.sqrt(cells.size());
|
||||
} while (m >= cells.size() || m < 0);
|
||||
n = (int) Math.floor(m);
|
||||
Integer[] cellToAdd = cells.get(n).clone();
|
||||
for(int k = 0; k < cellToAdd.length; k++){
|
||||
if(Math.abs(rand.nextDouble()) < error){//error applied to each peptide
|
||||
if(Math.abs(rand.nextDouble()) < error){//error applied to each seqeunce
|
||||
cellToAdd[k] = -1;
|
||||
}
|
||||
}
|
||||
@@ -52,13 +95,36 @@ public class Plate {
|
||||
}
|
||||
}
|
||||
|
||||
public void writePlateToFile(String filename) {
|
||||
|
||||
|
||||
private void fillWells( List<Integer[]> cells, double stdDev) {
|
||||
this.stdDev = stdDev;
|
||||
int numSections = populations.length;
|
||||
int section = 0;
|
||||
double m;
|
||||
int n;
|
||||
while (section < numSections){
|
||||
for (int i = 0; i < (size / numSections); i++) {
|
||||
List<Integer[]> well = new ArrayList<>();
|
||||
for (int j = 0; j < populations[section]; j++) {
|
||||
do {
|
||||
m = (rand.nextGaussian() * stdDev) + (cells.size() / 2);
|
||||
} while (m >= cells.size() || m < 0);
|
||||
n = (int) Math.floor(m);
|
||||
Integer[] cellToAdd = cells.get(n).clone();
|
||||
for(int k = 0; k < cellToAdd.length; k++){
|
||||
if(Math.abs(rand.nextDouble()) < error){//error applied to each sequence
|
||||
cellToAdd[k] = -1;
|
||||
}
|
||||
}
|
||||
well.add(cellToAdd);
|
||||
}
|
||||
wells.add(well);
|
||||
}
|
||||
section++;
|
||||
}
|
||||
}
|
||||
|
||||
public Integer[] getConcentrations(){
|
||||
return concentrations;
|
||||
public Integer[] getPopulations(){
|
||||
return populations;
|
||||
}
|
||||
|
||||
public int getSize(){
|
||||
@@ -69,6 +135,10 @@ public class Plate {
|
||||
return stdDev;
|
||||
}
|
||||
|
||||
public boolean isExponential(){return exponential;}
|
||||
|
||||
public double getLambda(){return lambda;}
|
||||
|
||||
public double getError() {
|
||||
return error;
|
||||
}
|
||||
@@ -77,142 +147,38 @@ public class Plate {
|
||||
return wells;
|
||||
}
|
||||
|
||||
|
||||
//returns a map of counts of all the CDR3s (alphas and betas) in all wells
|
||||
public Map<Integer, Integer>assayWellsCDR3(){
|
||||
return this.assayWellsCDR3(0, size);
|
||||
}
|
||||
//returns a map of counts of all the CDR3 alphas in all wells
|
||||
public Map<Integer, Integer> assayWellsCDR3Alpha() {
|
||||
return this.assayWellsCDR3Alpha(0, size);
|
||||
}
|
||||
//returns a map of counts of all the CDR3 betas in all wells
|
||||
public Map<Integer, Integer> assayWellsCDR3Beta() {
|
||||
return this.assayWellsCDR3Beta(0, size);
|
||||
}
|
||||
//returns a map of counts of all CDR1s (alphas and betas) in all wells
|
||||
public Map<Integer, Integer> assayWellsCDR1(){
|
||||
return this.assayWellsCDR1(0, size);
|
||||
}
|
||||
//returns a map of counts of all the CDR1 alphas in all wells
|
||||
public Map<Integer, Integer> assayWellsCDR1Alpha() {
|
||||
return this.assayWellsCDR1Alpha(0, size);
|
||||
}
|
||||
//returns a map of counts of all the CDR1 betas in all wells
|
||||
public Map<Integer, Integer> assayWellsCDR1Beta() {
|
||||
return this.assayWellsCDR1Beta(0, size);
|
||||
//returns a map of the counts of the sequence at cell index sIndex, in all wells
|
||||
public Map<Integer, Integer> assayWellsSequenceS(int... sIndices){
|
||||
return this.assayWellsSequenceS(0, size, sIndices);
|
||||
}
|
||||
|
||||
//returns a map of counts of the CDR3s (alphas and betas) in a specific well
|
||||
public Map<Integer, Integer>assayWellsCDR3(int n){
|
||||
return this.assayWellsCDR3(n, n+1);
|
||||
}
|
||||
//returns a map of counts of the CDR1s (alphas and betas) in a specific well
|
||||
public Map<Integer, Integer> assayWellsCDR1(int n){
|
||||
return this.assayWellsCDR1(n, n+1);
|
||||
}
|
||||
//returns a map of counts of the CDR3 alphas in a specific well
|
||||
public Map<Integer, Integer> assayWellsCDR3Alpha(int n) {
|
||||
return this.assayWellsCDR3Alpha(n, n+1);
|
||||
}
|
||||
//returns a map of counts of the CDR3 betas in a specific well
|
||||
public Map<Integer, Integer> assayWellsCDR3Beta(int n) {
|
||||
return this.assayWellsCDR3Beta(n, n+1);
|
||||
}
|
||||
//returns a map of counts of the CDR1 alphas in a specific well
|
||||
public Map<Integer, Integer> assayWellsCDR1Alpha(int n) {
|
||||
return this.assayWellsCDR1Alpha(n, n+1);
|
||||
}
|
||||
//returns a map of counts of the CDR1 betas in a specific well
|
||||
public Map<Integer, Integer> assayWellsCDR1Beta(int n) {
|
||||
return this.assayWellsCDR1Beta(n, n+1);
|
||||
}
|
||||
//returns a map of the counts of the sequence at cell index sIndex, in a specific well
|
||||
public Map<Integer, Integer> assayWellsSequenceS(int n, int... sIndices) { return this.assayWellsSequenceS(n, n+1, sIndices);}
|
||||
|
||||
|
||||
//returns a map of the counts of the CDR3s (alphas and betas) in a range of wells
|
||||
public Map<Integer, Integer>assayWellsCDR3(int start, int end){
|
||||
//returns a map of the counts of the sequence at cell index sIndex, in a range of wells
|
||||
public Map<Integer, Integer> assayWellsSequenceS(int start, int end, int... sIndices) {
|
||||
Map<Integer,Integer> assay = new HashMap<>();
|
||||
for(int pIndex: sIndices){
|
||||
for(int i = start; i < end; i++){
|
||||
countCDR3Alphas(assay, wells.get(i));
|
||||
countCDR3Betas(assay,wells.get(i));
|
||||
countSequences(assay, wells.get(i), pIndex);
|
||||
}
|
||||
}
|
||||
return assay;
|
||||
}
|
||||
//returns a map of the counts of the CDR1s (alphas and betas) in a range of wells
|
||||
public Map<Integer, Integer>assayWellsCDR1(int start, int end){
|
||||
Map<Integer,Integer> assay = new HashMap<>();
|
||||
for(int i = start; i < end; i++){
|
||||
countCDR1Alphas(assay, wells.get(i));
|
||||
countCDR1Betas(assay,wells.get(i));
|
||||
}
|
||||
return assay;
|
||||
}
|
||||
//returns a map of the counts of the CDR3 alphas in a range of wells
|
||||
public Map<Integer, Integer> assayWellsCDR3Alpha(int start, int end) {
|
||||
Map<Integer, Integer> assay = new HashMap<>();
|
||||
for(int i = start; i < end; i++){
|
||||
countCDR3Alphas(assay, wells.get(i));
|
||||
}
|
||||
return assay;
|
||||
}
|
||||
//returns a map of the counts of the CDR3 betas in a range of wells
|
||||
public Map<Integer, Integer> assayWellsCDR3Beta(int start, int end) {
|
||||
Map<Integer, Integer> assay = new HashMap<>();
|
||||
for(int i = start; i < end; i++){
|
||||
countCDR3Betas(assay, wells.get(i));
|
||||
}
|
||||
return assay;
|
||||
}
|
||||
//returns a map of the counts of the CDR1 alphas in a range of wells
|
||||
public Map<Integer, Integer> assayWellsCDR1Alpha(int start, int end) {
|
||||
Map<Integer, Integer> assay = new HashMap<>();
|
||||
for(int i = start; i < end; i++){
|
||||
countCDR1Alphas(assay, wells.get(i));
|
||||
}
|
||||
return assay;
|
||||
}
|
||||
//returns a map of the counts of the CDR1 betas in a range of wells
|
||||
public Map<Integer, Integer> assayWellsCDR1Beta(int start, int end) {
|
||||
Map<Integer, Integer> assay = new HashMap<>();
|
||||
for(int i = start; i < end; i++){
|
||||
countCDR1Betas(assay, wells.get(i));
|
||||
}
|
||||
return assay;
|
||||
}
|
||||
|
||||
|
||||
//given a map, counts distinct CDR3 alphas in a well
|
||||
private void countCDR3Alphas(Map<Integer, Integer> wellMap, List<Integer[]> well){
|
||||
//For the sequences at cell indices sIndices, counts number of unique sequences in the given well into the given map
|
||||
private void countSequences(Map<Integer, Integer> wellMap, List<Integer[]> well, int... sIndices) {
|
||||
for(Integer[] cell : well) {
|
||||
if(cell[0] != -1){
|
||||
//keys are alphas, value is how many of them have been assayed
|
||||
wellMap.merge(cell[0], 1, (oldValue, newValue) -> oldValue + newValue);
|
||||
for(int sIndex: sIndices){
|
||||
if(cell[sIndex] != -1){
|
||||
wellMap.merge(cell[sIndex], 1, (oldValue, newValue) -> oldValue + newValue);
|
||||
}
|
||||
}
|
||||
}
|
||||
//given a map, counts distinct CDR3 betas in a well
|
||||
private void countCDR3Betas(Map<Integer, Integer> wellMap, List<Integer[]> well){
|
||||
for(Integer[] cell : well) {
|
||||
if(cell[1] != -1){
|
||||
wellMap.merge(cell[1], 1, (oldValue, newValue) -> oldValue + newValue);
|
||||
}
|
||||
}
|
||||
}
|
||||
//given a map, counts distinct CDR1 alphas in a well
|
||||
private void countCDR1Alphas(Map<Integer, Integer> wellMap, List<Integer[]> well){
|
||||
for(Integer[] cell: well){
|
||||
if(cell[2] != -1){
|
||||
wellMap.merge(cell[2], 1, (oldValue, newValue) -> oldValue + newValue);
|
||||
}
|
||||
}
|
||||
}
|
||||
//given a map, counts distinct CDR1 betas in a well
|
||||
private void countCDR1Betas(Map<Integer, Integer> wellMap, List<Integer[]> well){
|
||||
for(Integer[] cell: well){
|
||||
if(cell[3] != -1){
|
||||
wellMap.merge(cell[3], 1, (oldValue, newValue) -> oldValue + newValue);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public String getSourceFileName() {
|
||||
return sourceFile;
|
||||
}
|
||||
|
||||
public String getFilename() { return filename; }
|
||||
}
|
||||
|
||||
@@ -14,12 +14,14 @@ import java.util.regex.Pattern;
|
||||
public class PlateFileReader {
|
||||
|
||||
private List<List<Integer[]>> wells = new ArrayList<>();
|
||||
private String filename;
|
||||
|
||||
public PlateFileReader(String filename){
|
||||
|
||||
if(!filename.matches(".*\\.csv")){
|
||||
filename = filename + ".csv";
|
||||
}
|
||||
this.filename = filename;
|
||||
|
||||
CSVFormat plateFileFormat = CSVFormat.Builder.create()
|
||||
.setCommentMarker('#')
|
||||
@@ -54,8 +56,8 @@ public class PlateFileReader {
|
||||
|
||||
}
|
||||
|
||||
public List<List<Integer[]>> getWells() {
|
||||
return wells;
|
||||
public Plate getSamplePlate() {
|
||||
return new Plate(filename, wells);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -7,35 +7,39 @@ import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.StandardOpenOption;
|
||||
import java.util.*;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
public class PlateFileWriter {
|
||||
private int size;
|
||||
private List<List<Integer[]>> wells;
|
||||
private double stdDev;
|
||||
private double lambda;
|
||||
private Double error;
|
||||
private String filename;
|
||||
private String[] headers;
|
||||
private List<Integer> concentrations;
|
||||
private String sourceFileName;
|
||||
private Integer[] populations;
|
||||
private boolean isExponential = false;
|
||||
|
||||
public PlateFileWriter(String filename, Plate plate) {
|
||||
if(!filename.matches(".*\\.csv")){
|
||||
filename = filename + ".csv";
|
||||
}
|
||||
this.filename = filename;
|
||||
this.sourceFileName = plate.getSourceFileName();
|
||||
this.size = plate.getSize();
|
||||
this.isExponential = plate.isExponential();
|
||||
if(isExponential) {
|
||||
this.lambda = plate.getLambda();
|
||||
}
|
||||
else{
|
||||
this.stdDev = plate.getStdDev();
|
||||
}
|
||||
this.error = plate.getError();
|
||||
this.wells = plate.getWells();
|
||||
this.concentrations = Arrays.asList(plate.getConcentrations());
|
||||
concentrations.sort(Comparator.reverseOrder());
|
||||
this.populations = plate.getPopulations();
|
||||
Arrays.sort(populations);
|
||||
}
|
||||
|
||||
public void writePlateFile(){
|
||||
//works as is, but too many columns in csv, need to make them all rows.
|
||||
|
||||
//will now redo it so that every column is a well, with well names as headers
|
||||
//need to give plate error, sample pop size, stdDev, num sections, concentration per section as comments
|
||||
Comparator<List<Integer[]>> listLengthDescending = Comparator.comparingInt(List::size);
|
||||
wells.sort(listLengthDescending.reversed());
|
||||
int maxLength = wells.get(0).size();
|
||||
@@ -53,37 +57,50 @@ public class PlateFileWriter {
|
||||
}
|
||||
}
|
||||
|
||||
//this took forever
|
||||
List<List<String>> rows = new ArrayList<>();
|
||||
List<String> tmp = new ArrayList<>();
|
||||
for(int i = 0; i < wellsAsStrings.size(); i++){//List<Integer[]> w: wells){
|
||||
tmp.add("well " + (i+1));
|
||||
}
|
||||
rows.add(tmp);
|
||||
for(int row = 0; row < maxLength; row++){
|
||||
tmp = new ArrayList<>();
|
||||
for(List<String> c: wellsAsStrings){
|
||||
tmp.add(c.get(row));
|
||||
}
|
||||
rows.add(tmp);
|
||||
}
|
||||
StringBuilder concen = new StringBuilder();
|
||||
for(Integer i: concentrations){
|
||||
concen.append(i.toString());
|
||||
concen.append(" ");
|
||||
}
|
||||
String concenString = concen.toString();
|
||||
// //this took forever and I don't use it
|
||||
// //if I wanted to use it, I'd replace printer.printRecords(wellsAsStrings) with printer.printRecords(rows)
|
||||
// List<List<String>> rows = new ArrayList<>();
|
||||
// List<String> tmp = new ArrayList<>();
|
||||
// for(int i = 0; i < wellsAsStrings.size(); i++){//List<Integer[]> w: wells){
|
||||
// tmp.add("well " + (i+1));
|
||||
// }
|
||||
// rows.add(tmp);
|
||||
// for(int row = 0; row < maxLength; row++){
|
||||
// tmp = new ArrayList<>();
|
||||
// for(List<String> c: wellsAsStrings){
|
||||
// tmp.add(c.get(row));
|
||||
// }
|
||||
// rows.add(tmp);
|
||||
// }
|
||||
|
||||
CSVFormat plateFileFormat = CSVFormat.Builder.create().setCommentMarker('#').build();
|
||||
//make string out of populations array
|
||||
StringBuilder populationsStringBuilder = new StringBuilder();
|
||||
populationsStringBuilder.append(populations[0].toString());
|
||||
for(int i = 1; i < populations.length; i++){
|
||||
populationsStringBuilder.append(", ");
|
||||
populationsStringBuilder.append(populations[i].toString());
|
||||
}
|
||||
String wellPopulationsString = populationsStringBuilder.toString();
|
||||
|
||||
//set CSV format
|
||||
CSVFormat plateFileFormat = CSVFormat.Builder.create()
|
||||
.setCommentMarker('#')
|
||||
.build();
|
||||
|
||||
try(BufferedWriter writer = Files.newBufferedWriter(Path.of(filename), StandardOpenOption.CREATE_NEW);
|
||||
CSVPrinter printer = new CSVPrinter(writer, plateFileFormat);
|
||||
){
|
||||
printer.printComment("Cell source file name: " + sourceFileName);
|
||||
printer.printComment("Each row represents one well on the plate.");
|
||||
printer.printComment("Plate size: " + size);
|
||||
printer.printComment("Error rate: " + error);
|
||||
printer.printComment("Concentrations: " + concenString);
|
||||
printer.printComment("Well populations: " + wellPopulationsString);
|
||||
if(isExponential){
|
||||
printer.printComment("Lambda: " + lambda);
|
||||
}
|
||||
else {
|
||||
printer.printComment("Std. dev.: " + stdDev);
|
||||
}
|
||||
printer.printRecords(wellsAsStrings);
|
||||
} catch(IOException ex){
|
||||
System.out.println("Could not make new file named "+filename);
|
||||
|
||||
8
src/main/java/SequenceType.java
Normal file
8
src/main/java/SequenceType.java
Normal file
@@ -0,0 +1,8 @@
|
||||
//enum for tagging types of sequences
|
||||
//Listed in order that they appear in a cell array, so ordinal() method will return correct index
|
||||
public enum SequenceType {
|
||||
CDR3_ALPHA,
|
||||
CDR3_BETA,
|
||||
CDR1_ALPHA,
|
||||
CDR1_BETA
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,279 +0,0 @@
|
||||
import java.util.List;
|
||||
import java.util.Scanner;
|
||||
import java.util.InputMismatchException;
|
||||
|
||||
//
|
||||
public class UserInterface {
|
||||
|
||||
final static Scanner sc = new Scanner(System.in);
|
||||
static int input;
|
||||
static boolean quit = false;
|
||||
|
||||
public static void main(String args[]) {
|
||||
while(!quit) {
|
||||
System.out.println("\nALPHA/BETA T-CELL RECEPTOR MATCHING SIMULATOR");
|
||||
System.out.println("Please select an option:");
|
||||
System.out.println("1) Generate a population of distinct cells");
|
||||
System.out.println("2) Generate a sample plate of T cells");
|
||||
System.out.println("3) Simulate CDR3 alpha/beta T cell matching");
|
||||
System.out.println("4) Simulate CDR3/CDR1 T cell matching");
|
||||
System.out.println("5) Acknowledgements");
|
||||
System.out.println("0) Exit");
|
||||
try {
|
||||
input = sc.nextInt();
|
||||
switch(input){
|
||||
case 1 -> makeCells();
|
||||
case 2 -> makePlate();
|
||||
case 3 -> matchCells();
|
||||
case 4 -> matchCellsExpanded();
|
||||
case 5 -> acknowledge();
|
||||
case 0 -> quit = true;
|
||||
default -> throw new InputMismatchException("Invalid input.");
|
||||
}
|
||||
}catch(InputMismatchException ex){
|
||||
System.out.println(ex);
|
||||
sc.next();
|
||||
}
|
||||
}
|
||||
sc.close();
|
||||
}
|
||||
|
||||
private static void makeCells() {
|
||||
String filename = null;
|
||||
Integer numCells = 0;
|
||||
try {
|
||||
System.out.println("\nSimulated T-Cells consist of integer values representing:\n" +
|
||||
"* a pair of alpha and beta CDR3 peptides (unique within simulated population)\n" +
|
||||
"* a pair of alpha and beta CDR1 peptides (not necessarily unique).");
|
||||
System.out.println("\nThe cells will be written to a file.");
|
||||
System.out.print("Please enter a file name: ");
|
||||
filename = sc.next();
|
||||
System.out.print("Please enter the number of T-cells to generate: ");
|
||||
numCells = sc.nextInt();
|
||||
if(numCells <= 0){
|
||||
throw new InputMismatchException("Number of cells must be a positive integer.");
|
||||
}
|
||||
} catch (InputMismatchException ex) {
|
||||
System.out.println(ex);
|
||||
sc.next();
|
||||
}
|
||||
CellSample sample = Simulator.generateExpandedCellSample(numCells);
|
||||
CellFileWriter writer = new CellFileWriter(filename, sample);
|
||||
writer.writeCellsToFile();
|
||||
}
|
||||
|
||||
//method to output a CSV of
|
||||
private static void makePlate() {
|
||||
String cellFile = null;
|
||||
String filename = null;
|
||||
Double stdDev = 0.0;
|
||||
Integer numWells = 0;
|
||||
Integer numSections = 0;
|
||||
Integer[] concentrations = {1};
|
||||
Double dropOutRate = 0.0;
|
||||
boolean poisson = false;
|
||||
try {
|
||||
System.out.println("\nMaking a sample plate requires a population of distinct cells");
|
||||
System.out.println("Please enter name of an existing cell sample file: ");
|
||||
cellFile = sc.next();
|
||||
System.out.println("\nThe sample plate will be written to file");
|
||||
System.out.print("Please enter a name for the output file: ");
|
||||
filename = sc.next();
|
||||
System.out.println("Select T-cell frequency distribution function");
|
||||
System.out.println("1) Poisson");
|
||||
System.out.println("2) Gaussian");
|
||||
System.out.println("(Note: wider distributions are more memory intensive to match)");
|
||||
System.out.print("Enter selection value: ");
|
||||
input = sc.nextInt();
|
||||
switch(input) {
|
||||
case 1:
|
||||
poisson = true;
|
||||
break;
|
||||
case 2:
|
||||
System.out.println("How many distinct T-cells within one standard deviation of peak frequency?");
|
||||
System.out.println("(Note: wider distributions are more memory intensive to match)");
|
||||
stdDev = sc.nextDouble();
|
||||
if(stdDev <= 0.0){
|
||||
throw new InputMismatchException("Value must be positive.");
|
||||
}
|
||||
break;
|
||||
default:
|
||||
System.out.println("Invalid input. Defaulting to Poisson.");
|
||||
poisson = true;
|
||||
}
|
||||
System.out.print("Number of wells on plate: ");
|
||||
numWells = sc.nextInt();
|
||||
if(numWells < 1){
|
||||
throw new InputMismatchException("No wells on plate");
|
||||
}
|
||||
System.out.println("The plate can be evenly sectioned to allow multiple concentrations of T-cells/well");
|
||||
System.out.println("How many sections would you like to make (minimum 1)?");
|
||||
numSections = sc.nextInt();
|
||||
if(numSections < 1) {
|
||||
throw new InputMismatchException("Too few sections.");
|
||||
}
|
||||
else if (numSections > numWells) {
|
||||
throw new InputMismatchException("Cannot have more sections than wells.");
|
||||
}
|
||||
int i = 1;
|
||||
concentrations = new Integer[numSections];
|
||||
while(numSections > 0) {
|
||||
System.out.print("Enter number of T-cells per well in section " + i +": ");
|
||||
concentrations[i - 1] = sc.nextInt();
|
||||
i++;
|
||||
numSections--;
|
||||
}
|
||||
System.out.println("Errors in amplification can induce a well dropout rate for peptides");
|
||||
System.out.print("Enter well dropout rate (0.0 to 1.0): ");
|
||||
dropOutRate = sc.nextDouble();
|
||||
if(dropOutRate < 0.0 || dropOutRate > 1.0) {
|
||||
throw new InputMismatchException("The well dropout rate must be in the range [0.0, 1.0]");
|
||||
}
|
||||
}catch(InputMismatchException ex){
|
||||
System.out.println(ex);
|
||||
sc.next();
|
||||
}
|
||||
CellFileReader cellReader = new CellFileReader(cellFile);
|
||||
if(poisson) {
|
||||
stdDev = Math.sqrt(cellReader.getCellCount()); //gaussian with square root of elements approximates poisson
|
||||
}
|
||||
Plate samplePlate = new Plate(numWells, dropOutRate, concentrations, stdDev);
|
||||
samplePlate.fillWells(cellReader.getCells());
|
||||
PlateFileWriter writer = new PlateFileWriter(filename, samplePlate);
|
||||
writer.writePlateFile();
|
||||
}
|
||||
|
||||
private static void matchCells() {
|
||||
String filename = null;
|
||||
String cellFile = null;
|
||||
String plateFile = null;
|
||||
Integer lowThreshold = 0;
|
||||
Integer highThreshold = Integer.MAX_VALUE;
|
||||
try {
|
||||
System.out.println("\nSimulated experiment requires a cell sample file and a sample plate file.");
|
||||
System.out.print("Please enter name of an existing cell sample file: ");
|
||||
cellFile = sc.next();
|
||||
System.out.print("Please enter name of an existing sample plate file: ");
|
||||
plateFile = sc.next();
|
||||
System.out.println("The matching results will be written to a file.");
|
||||
System.out.print("Please enter a name for the output file: ");
|
||||
filename = sc.next();
|
||||
System.out.println("What is the minimum number of alpha/beta overlap wells to attempt matching?");
|
||||
lowThreshold = sc.nextInt();
|
||||
if(lowThreshold < 1){
|
||||
throw new InputMismatchException("Minimum value for low threshold is 1");
|
||||
}
|
||||
System.out.println("What is the maximum number of alpha/beta overlap wells to attempt matching?");
|
||||
highThreshold = sc.nextInt();
|
||||
} catch (InputMismatchException ex) {
|
||||
System.out.println(ex);
|
||||
sc.next();
|
||||
}
|
||||
CellFileReader cellReader = new CellFileReader(cellFile);
|
||||
PlateFileReader plateReader = new PlateFileReader(plateFile);
|
||||
Plate plate = new Plate(plateReader.getWells());
|
||||
if (cellReader.getCells().size() == 0){
|
||||
System.out.println("No cell sample found.");
|
||||
System.out.println("Returning to main menu.");
|
||||
}
|
||||
else if(plate.getWells().size() == 0){
|
||||
System.out.println("No sample plate found.");
|
||||
System.out.println("Returning to main menu.");
|
||||
|
||||
}
|
||||
else{
|
||||
if(highThreshold >= plate.getSize()){
|
||||
highThreshold = plate.getSize() - 1;
|
||||
}
|
||||
List<Integer[]> cells = cellReader.getCells();
|
||||
MatchingResult results = Simulator.matchCDR3s(cells, plate, lowThreshold, highThreshold);
|
||||
//result writer
|
||||
MatchingFileWriter writer = new MatchingFileWriter(filename, results.getComments(),
|
||||
results.getHeaders(), results.getAllResults());
|
||||
writer.writeResultsToFile();
|
||||
}
|
||||
}
|
||||
|
||||
public static void matchCellsExpanded(){
|
||||
/*
|
||||
The idea here is that we'll get the CDR3 alpha/beta matches first. Then we'll try to match CDR3s to CDR1s by
|
||||
looking at the top two matches for each CDR3. If CDR3s in the same cell simply swap CDR1s, we assume a correct
|
||||
match
|
||||
*/
|
||||
String filename = null;
|
||||
String cellFile = null;
|
||||
String plateFile = null;
|
||||
Integer lowThresholdCDR3 = 0;
|
||||
Integer highThresholdCDR3 = Integer.MAX_VALUE;
|
||||
Integer lowThresholdCDR1 = 0;
|
||||
Integer highThresholdCDR1 = Integer.MAX_VALUE;
|
||||
try {
|
||||
System.out.println("\nSimulated experiment requires a cell sample file and a sample plate file.");
|
||||
System.out.print("Please enter name of an existing cell sample file: ");
|
||||
cellFile = sc.next();
|
||||
System.out.print("Please enter name of an existing sample plate file: ");
|
||||
plateFile = sc.next();
|
||||
System.out.println("The matching results will be written to a file.");
|
||||
System.out.print("Please enter a name for the output file: ");
|
||||
filename = sc.next();
|
||||
System.out.println("What is the minimum number of CDR3 alpha/beta overlap wells to attempt matching?");
|
||||
lowThresholdCDR3 = sc.nextInt();
|
||||
if(lowThresholdCDR3 < 1){
|
||||
throw new InputMismatchException("Minimum value for low threshold is 1");
|
||||
}
|
||||
System.out.println("What is the maximum number of CDR3 alpha/beta overlap wells to attempt matching?");
|
||||
highThresholdCDR3 = sc.nextInt();
|
||||
System.out.println("What is the minimum number of CDR3/CDR1 overlap wells to attempt matching?");
|
||||
lowThresholdCDR1 = sc.nextInt();
|
||||
if(lowThresholdCDR1 < 1){
|
||||
throw new InputMismatchException("Minimum value for low threshold is 1");
|
||||
}
|
||||
System.out.println("What is the maximum number of CDR3/CDR1 overlap wells to attempt matching?");
|
||||
highThresholdCDR1 = sc.nextInt();
|
||||
} catch (InputMismatchException ex) {
|
||||
System.out.println(ex);
|
||||
sc.next();
|
||||
}
|
||||
CellFileReader cellReader = new CellFileReader(cellFile);
|
||||
PlateFileReader plateReader = new PlateFileReader(plateFile);
|
||||
Plate plate = new Plate(plateReader.getWells());
|
||||
if (cellReader.getCells().size() == 0){
|
||||
System.out.println("No cell sample found.");
|
||||
System.out.println("Returning to main menu.");
|
||||
}
|
||||
else if(plate.getWells().size() == 0){
|
||||
System.out.println("No sample plate found.");
|
||||
System.out.println("Returning to main menu.");
|
||||
|
||||
}
|
||||
else{
|
||||
if(highThresholdCDR3 >= plate.getSize()){
|
||||
highThresholdCDR3 = plate.getSize() - 1;
|
||||
}
|
||||
if(highThresholdCDR1 >= plate.getSize()){
|
||||
highThresholdCDR1 = plate.getSize() - 1;
|
||||
}
|
||||
List<Integer[]> cells = cellReader.getCells();
|
||||
MatchingResult preliminaryResults = Simulator.matchCDR3s(cells, plate, lowThresholdCDR3, highThresholdCDR3);
|
||||
MatchingResult[] results = Simulator.matchCDR1s(cells, plate, lowThresholdCDR1,
|
||||
highThresholdCDR1, preliminaryResults.getMatchMap(), preliminaryResults.getTime());
|
||||
|
||||
//result writer
|
||||
MatchingFileWriter writer = new MatchingFileWriter(filename + "First", results[0].getComments(),
|
||||
results[0].getHeaders(), results[0].getAllResults());
|
||||
writer.writeResultsToFile();
|
||||
writer = new MatchingFileWriter(filename + "Dual", results[1].getComments(),
|
||||
results[1].getHeaders(), results[1].getAllResults());
|
||||
writer.writeResultsToFile();
|
||||
}
|
||||
}
|
||||
|
||||
private static void acknowledge(){
|
||||
System.out.println("Simulation based on:");
|
||||
System.out.println("Howie, B., Sherwood, A. M., et. al.");
|
||||
System.out.println("High-throughput pairing of T cell receptor alpha and beta sequences.");
|
||||
System.out.println("Sci. Transl. Med. 7, 301ra131 (2015)");
|
||||
System.out.println("");
|
||||
System.out.println("Simulation by Eugene Fischer, 2021");
|
||||
}
|
||||
}
|
||||
92
src/main/java/Vertex.java
Normal file
92
src/main/java/Vertex.java
Normal file
@@ -0,0 +1,92 @@
|
||||
import java.io.Serializable;
|
||||
|
||||
public class Vertex implements Serializable {
|
||||
private SequenceType type;
|
||||
private Integer vertexLabel;
|
||||
private Integer sequence;
|
||||
private Integer occupancy;
|
||||
|
||||
public Vertex(Integer vertexLabel) {
|
||||
this.vertexLabel = vertexLabel;
|
||||
}
|
||||
public Vertex(String vertexLabel) {
|
||||
this.vertexLabel = Integer.parseInt((vertexLabel));
|
||||
}
|
||||
|
||||
public Vertex(SequenceType type, Integer sequence, Integer occupancy, Integer vertexLabel) {
|
||||
this.type = type;
|
||||
this.vertexLabel = vertexLabel;
|
||||
this.sequence = sequence;
|
||||
this.occupancy = occupancy;
|
||||
}
|
||||
|
||||
|
||||
public SequenceType getType() {
|
||||
return type;
|
||||
}
|
||||
|
||||
public void setType(String type) {
|
||||
this.type = SequenceType.valueOf(type);
|
||||
}
|
||||
|
||||
public Integer getVertexLabel() {
|
||||
return vertexLabel;
|
||||
}
|
||||
|
||||
public void setVertexLabel(String label) {
|
||||
this.vertexLabel = Integer.parseInt(label);
|
||||
}
|
||||
|
||||
public Integer getSequence() {
|
||||
|
||||
return sequence;
|
||||
}
|
||||
|
||||
public void setSequence(String sequence) {
|
||||
this.sequence = Integer.parseInt(sequence);
|
||||
}
|
||||
|
||||
public Integer getOccupancy() {
|
||||
return occupancy;
|
||||
}
|
||||
|
||||
public void setOccupancy(String occupancy) {
|
||||
this.occupancy = Integer.parseInt(occupancy);
|
||||
}
|
||||
|
||||
@Override //adapted from JGraphT example code
|
||||
public int hashCode()
|
||||
{
|
||||
return (sequence == null) ? 0 : sequence.hashCode();
|
||||
}
|
||||
|
||||
@Override //adapted from JGraphT example code
|
||||
public boolean equals(Object obj)
|
||||
{
|
||||
if (this == obj)
|
||||
return true;
|
||||
if (obj == null)
|
||||
return false;
|
||||
if (getClass() != obj.getClass())
|
||||
return false;
|
||||
Vertex other = (Vertex) obj;
|
||||
if (sequence == null) {
|
||||
return other.sequence == null;
|
||||
} else {
|
||||
return sequence.equals(other.sequence);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Override //adapted from JGraphT example code
|
||||
public String toString()
|
||||
{
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append("(").append(vertexLabel)
|
||||
.append(", Type: ").append(type.name())
|
||||
.append(", Sequence: ").append(sequence)
|
||||
.append(", Occupancy: ").append(occupancy).append(")");
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
}
|
||||
Reference in New Issue
Block a user