139 Commits

Author SHA1 Message Date
6f5afbc6ec Update readme with CLI arguments 2022-02-27 17:01:12 -06:00
fb4d22e7a4 Update readme with CLI arguments 2022-02-27 17:00:54 -06:00
e10350c214 Update readme with CLI arguments 2022-02-27 16:56:58 -06:00
b1155f8100 Format -help CLI option 2022-02-27 16:53:46 -06:00
12b003a69f Add -help CLI option 2022-02-27 16:45:30 -06:00
32c5bcaaff Deactivate file I/O announcement for CLI 2022-02-27 16:16:24 -06:00
2485ac4cf6 Add getters to MatchingResult 2022-02-27 16:15:26 -06:00
05556bce0c Add units to metadata 2022-02-27 16:08:59 -06:00
a822f69ea4 Control verbose output 2022-02-27 16:07:17 -06:00
3d1f8668ee Control verbose output 2022-02-27 16:03:57 -06:00
40c743308b Initialize wells 2022-02-27 15:54:47 -06:00
5246cc4a0c Re-implement command line options 2022-02-27 15:35:07 -06:00
a5f7c0641d Refactor for better encapsulation with CellSamples 2022-02-27 14:51:53 -06:00
8ebfc1469f Refactor plate to fill its own wells in its constructor 2022-02-27 14:25:53 -06:00
b53f5f1cc0 Refactor plate to fill its own wells in its constructor 2022-02-27 14:17:16 -06:00
974d2d650c Refactor plate to fill its own wells in its constructor 2022-02-27 14:17:11 -06:00
6b5837e6ce Add Vose's alias method to to-dos 2022-02-27 11:46:11 -06:00
b4cc240048 Update Readme 2022-02-26 11:03:31 -06:00
ff72c9b359 Update Readme 2022-02-26 11:02:23 -06:00
88eb8aca50 Update Readme 2022-02-26 11:01:44 -06:00
98bf452891 Update Readme 2022-02-26 11:01:20 -06:00
c2db4f87c1 Update Readme 2022-02-26 11:00:18 -06:00
8935407ade Get rid of GraphML reader, those files are larger than serialized files 2022-02-26 10:38:10 -06:00
9fcc20343d Fix GraphML writer 2022-02-26 10:36:00 -06:00
e4d094d796 Adding GraphML output to options menu 2022-02-24 17:22:07 -06:00
f385ebc31f Update vertex class 2022-02-24 16:25:01 -06:00
8745550e11 add MWM algorithm type to matching metadata 2022-02-24 16:24:48 -06:00
41805135b3 remove unused import 2022-02-24 16:04:30 -06:00
373a5e02f9 Refactor to make CellSample class more self-contained 2022-02-24 16:03:49 -06:00
7f18311054 fix typos 2022-02-24 15:55:32 -06:00
bcb816c3e6 Reformat TODO 2022-02-24 15:48:10 -06:00
dad0fd35fd Update readme to reflect wells with random population implemented 2022-02-24 15:47:08 -06:00
35d580cfcf Update readme to reflect wells with random population implemented 2022-02-24 15:45:03 -06:00
ab8d98ed81 Update readme to reflect new default caching behavior. 2022-02-24 15:39:15 -06:00
3d9890e16a Change GraphModificationFunctions to only save edges if graph data is cached 2022-02-24 15:32:27 -06:00
dd64ac2731 Change GraphModificationFunctions to interface 2022-02-24 15:18:09 -06:00
a5238624f1 Change default graph caching behavior to false 2022-02-24 15:14:28 -06:00
d8ba42b801 Fix Algorithm Options menu output 2022-02-24 14:59:08 -06:00
8edd89d784 Added heap type selection, fixed error handling 2022-02-24 14:48:19 -06:00
2829b88689 Update readme to reflect caching changes 2022-02-24 12:47:26 -06:00
108b0ec13f Improve options menu wording 2022-02-24 12:42:09 -06:00
a8b58d3f79 Output new setting when changing options 2022-02-24 12:38:15 -06:00
bf64d57731 implement option menu for file caching 2022-02-24 12:30:47 -06:00
c068c3db3c implement option menu for file caching 2022-02-23 20:35:31 -06:00
4bcda9b66c update readme 2022-02-23 13:22:04 -06:00
17ae763c6c Generate populations correctly 2022-02-23 10:37:40 -06:00
decdb147a9 Cache everything 2022-02-23 10:30:42 -06:00
74ffbfd8ac make everything use same random number generator 2022-02-23 09:29:21 -06:00
08699ce8ce Change output order to match interactive UI 2022-02-23 08:56:09 -06:00
69b0cc535c Error checking 2022-02-23 08:55:07 -06:00
e58f7b0a55 checking for possible divide by zero error. 2022-02-23 08:54:14 -06:00
dd2164c250 implement sample plates with random well populations 2022-02-23 08:14:17 -06:00
7323093bdc change "getRandomNumber" to "getRandomInt" for consistency. 2022-02-23 08:13:52 -06:00
f904cf6672 add more data caching code 2022-02-23 08:13:06 -06:00
3ccee9891b change "concentrations" to "populations" for consistency 2022-02-23 08:12:48 -06:00
40c2be1cfb create populations string correctly 2022-02-23 08:11:01 -06:00
4b597c4e5e remove old testing code 2022-02-23 08:10:35 -06:00
b2398531a3 Update readme 2022-02-23 05:11:36 +00:00
8e9a250890 Cache graph data on creation 2022-02-22 22:23:55 -06:00
e2a996c997 update readme 2022-02-22 22:23:40 -06:00
a5db89cb0b update readme 2022-02-22 22:13:01 -06:00
1630f9ccba Moved I/O alert to file reader 2022-02-22 22:11:50 -06:00
d785aa0da2 Moved I/O alert to file reader 2022-02-22 22:10:31 -06:00
a7afeb6119 bugfixes 2022-02-22 22:10:09 -06:00
f8167b0774 Add .jar manifest to repo 2022-02-22 21:45:46 -06:00
68ee9e4bb6 Implemented storing graphs in memory for multiple pairing experiments 2022-02-22 21:30:00 -06:00
fd2ec76b71 Realized how to store graph in memory 2022-02-22 19:42:35 -06:00
875f457a2d reimplement CLI (in progress) 2022-02-22 19:42:23 -06:00
906c06062f Added metadata to MatchingResult to enable CLI options 2022-02-22 18:36:30 -06:00
90ae2ff474 Re-implemeting CLI options (in progress) 2022-02-22 17:37:00 -06:00
7d983076f3 Add link to releases page for download 2022-02-22 16:34:24 -06:00
4b053e6ec4 Remove artifacts from tracking to stop repo bloat. 2022-02-22 16:14:50 -06:00
44784b7976 Remove artifacts from tracking to stop repo bloat. 2022-02-22 16:10:22 -06:00
7c19896dc9 update readme 2022-02-22 16:09:50 -06:00
aec7e3016f Typos in documentation 2022-02-21 11:19:54 -06:00
5c75c1ac09 Update readme.md 2022-02-21 06:53:30 +00:00
cb1f7adece Change "peptide" references in code to "sequence", adding comments 2022-02-21 00:29:34 -06:00
370de79546 Add performance section to readme 2022-02-21 00:02:49 -06:00
a803336f56 Add performance section to readme 2022-02-21 00:01:20 -06:00
94b54b3416 Add performance section to readme 2022-02-20 23:31:25 -06:00
601e141fd0 Update readme 2022-02-20 22:51:49 -06:00
8f9c6b7d33 Update readme TODO 2022-02-20 20:59:05 -06:00
e5ddc73723 Finish reverting back to wells-as-rows 2022-02-20 20:54:44 -06:00
9b18fac74f Invoke garbage collection 2022-02-20 20:47:12 -06:00
63ef6aa7a0 Revert attempt to switch plate output format. It worked, but introduced a bug in graph filtering I don't want to chase down 2022-02-20 20:45:35 -06:00
7558455f39 Correct errors in output and documentation 2022-02-20 20:13:38 -06:00
410f0ae547 Remove testing code, add garbage collection calls 2022-02-20 20:06:45 -06:00
1bc6a11545 Change plate reader/writer to use columns as wells 2022-02-20 19:58:24 -06:00
2b13e10e95 Change plate reader/writer to use columns as wells 2022-02-20 19:48:09 -06:00
4fd5baeb98 Change plate reader/writer to use columns as wells 2022-02-20 19:41:06 -06:00
b2a4e9a42b Change plate reader/writer to use columns as wells 2022-02-20 19:17:56 -06:00
d1bb49b482 Change plate reader/writer to use columns as wells 2022-02-20 19:12:11 -06:00
9adb7dffb8 Change plate reader/writer to use columns as wells 2022-02-20 19:08:04 -06:00
2023bb9d7e Cleanup file output, add UI verbosity 2022-02-20 18:31:31 -06:00
405fbf17ff improve documentation 2022-02-20 17:11:39 -06:00
24519f4a52 improve documentation 2022-02-20 17:04:25 -06:00
2afd01eeef improve documentation 2022-02-20 15:48:11 -06:00
10d0b711bf improve documentation 2022-02-20 15:38:40 -06:00
8f98baf44e improve documentation 2022-02-20 15:37:39 -06:00
d6c7c40c96 improve documentation 2022-02-20 13:23:15 -06:00
61c14b2ecf improve documentation 2022-02-20 13:20:47 -06:00
22fc4aedfe improve documentation 2022-02-20 13:18:49 -06:00
5d24dc6f70 improve documentation 2022-02-20 13:15:32 -06:00
2c01a0211c move readme 2022-02-20 12:02:27 -06:00
f2b5d9e1b7 Rename and update readme 2022-02-20 11:58:12 -06:00
74c8cafd81 scan for filename 2022-02-20 03:08:31 -06:00
d1c37b5ccd Relocate overlap threshold filters 2022-02-20 03:05:56 -06:00
cb2c5a6024 Add plate well concentrations to output data 2022-02-20 02:29:42 -06:00
284a5b3a40 Add plate well concentrations to output data 2022-02-20 02:23:31 -06:00
52afb1edc2 Add plate well concentrations to output data 2022-02-20 02:17:36 -06:00
9c52bc878a Add plate well concentrations to output data 2022-02-20 02:13:13 -06:00
248fe4d662 Add plate well concentrations to output data 2022-02-20 02:09:22 -06:00
5d0e60708c Add plate well concentrations to output data 2022-02-20 01:53:34 -06:00
c96b7237e9 Add plate well concentrations to output data 2022-02-20 01:40:01 -06:00
0b28259800 Add plate well concentrations to output data 2022-02-20 01:13:22 -06:00
837ef7bfe4 UI cleanup, some code cleanup 2022-02-20 01:05:28 -06:00
0bebbc7602 Add missing filtering code 2022-02-19 22:56:38 -06:00
84f7ddb696 Fix interactive output 2022-02-19 22:49:50 -06:00
c4633da9eb Correct propogation of peptide counts 2022-02-19 22:33:38 -06:00
5b2ed165d0 Clean up interactive text, bugfix 2022-02-19 22:21:09 -06:00
0026d8cdfe Use buffered input/output streams 2022-02-19 22:04:41 -06:00
13fb7168bf Refactor to read/write files of graph and map data 2022-02-19 21:46:01 -06:00
568a6be3c7 Refactoring to allow graphs from file 2022-02-19 17:23:55 -06:00
cfa473c7ce Adding parameters to filter by occupancy difference and percent overlap 2022-02-19 14:06:11 -06:00
6faacd9a82 Adding parameters to filter by occupancy difference and percent overlap 2022-02-19 14:05:26 -06:00
ce88e170c1 Update readme with max memory flag 2022-02-18 17:48:25 -06:00
47e23addfa Do new filtering before matching 2022-02-18 17:42:05 -06:00
b9ee31b64c Do new filtering before matching 2022-02-18 17:28:24 -06:00
bf32a55e4b Implement matching using jheaps PairingHeap 2022-02-18 16:09:23 -06:00
acff88475b Command line arguments working, need better documentation and error handling 2021-11-23 12:24:48 -06:00
32593308df Command line arguments for CDR3 matching implemented 2021-11-20 10:43:57 -06:00
981e24011d First attempt at command line arguments 2021-11-19 17:31:18 -06:00
3d0a843cea Adding command line argument parsing library 2021-11-18 16:19:11 -06:00
c09ef27822 Adding command line argument parsing library 2021-11-18 16:15:05 -06:00
2ab93dd4b7 Recording source file names in output files, allowing output of intermediate results 2021-11-18 15:38:29 -06:00
09aa5961f3 New executable build after code refactor. 2021-11-18 14:16:42 -06:00
34e96d3b3d Refactor to reduce code repetition 2021-11-18 14:11:04 -06:00
2064d7e9fc Implemented parameter for CDR1 frequency 2021-11-18 11:55:54 -06:00
4157cfb556 Usage instructions for executable .jar 2021-11-16 21:27:54 -06:00
29 changed files with 2845 additions and 1280 deletions

1
.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
/out/

View File

@@ -1,15 +1,16 @@
<component name="ArtifactManager"> <component name="ArtifactManager">
<artifact type="jar" name="TCellSim:jar"> <artifact type="jar" build-on-make="true" name="BiGpairSEQ_Sim:jar">
<output-path>$PROJECT_DIR$/out/artifacts/TCellSim_jar</output-path> <output-path>$PROJECT_DIR$/out/artifacts/BiGpairSEQ_Sim_jar</output-path>
<root id="archive" name="TCellSim.jar"> <root id="archive" name="BiGpairSEQ_Sim.jar">
<element id="directory" name="META-INF"> <element id="directory" name="META-INF">
<element id="file-copy" path="$PROJECT_DIR$/src/main/java/META-INF/MANIFEST.MF" /> <element id="file-copy" path="$PROJECT_DIR$/src/main/java/META-INF/MANIFEST.MF" />
</element> </element>
<element id="module-output" name="TCellSim" /> <element id="module-output" name="BigPairSEQ" />
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/apache/commons/commons-csv/1.9.0/commons-csv-1.9.0.jar" path-in-jar="/" />
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/jetbrains/annotations/23.0.0/annotations-23.0.0.jar" path-in-jar="/" />
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/jgrapht/jgrapht-core/1.5.1/jgrapht-core-1.5.1.jar" path-in-jar="/" /> <element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/jgrapht/jgrapht-core/1.5.1/jgrapht-core-1.5.1.jar" path-in-jar="/" />
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/jheaps/jheaps/0.13/jheaps-0.13.jar" path-in-jar="/" /> <element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/jheaps/jheaps/0.13/jheaps-0.13.jar" path-in-jar="/" />
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/commons-cli/commons-cli/1.5.0/commons-cli-1.5.0.jar" path-in-jar="/" />
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/apache/commons/commons-csv/1.9.0/commons-csv-1.9.0.jar" path-in-jar="/" />
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/jetbrains/annotations/23.0.0/annotations-23.0.0.jar" path-in-jar="/" />
</root> </root>
</artifact> </artifact>
</component> </component>

2
.idea/compiler.xml generated
View File

@@ -6,7 +6,7 @@
<sourceOutputDir name="target/generated-sources/annotations" /> <sourceOutputDir name="target/generated-sources/annotations" />
<sourceTestOutputDir name="target/generated-test-sources/test-annotations" /> <sourceTestOutputDir name="target/generated-test-sources/test-annotations" />
<outputRelativeToContentRoot value="true" /> <outputRelativeToContentRoot value="true" />
<module name="TCellSim" /> <module name="BigPairSEQ" />
</profile> </profile>
</annotationProcessing> </annotationProcessing>
</component> </component>

10
.idea/libraries/commons_cli.xml generated Normal file
View File

@@ -0,0 +1,10 @@
<component name="libraryTable">
<library name="commons.cli" type="repository">
<properties maven-id="commons-cli:commons-cli:1.5.0" />
<CLASSES>
<root url="jar://$MAVEN_REPOSITORY$/commons-cli/commons-cli/1.5.0/commons-cli-1.5.0.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES />
</library>
</component>

15
.idea/libraries/jgrapht_io.xml generated Normal file
View File

@@ -0,0 +1,15 @@
<component name="libraryTable">
<library name="jgrapht.io" type="repository">
<properties maven-id="org.jgrapht:jgrapht-io:1.5.1" />
<CLASSES>
<root url="jar://$MAVEN_REPOSITORY$/org/jgrapht/jgrapht-io/1.5.1/jgrapht-io-1.5.1.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/jgrapht/jgrapht-core/1.5.1/jgrapht-core-1.5.1.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/jheaps/jheaps/0.13/jheaps-0.13.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/antlr/antlr4-runtime/4.8-1/antlr4-runtime-4.8-1.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/apache/commons/commons-text/1.8/commons-text-1.8.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/apache/commons/commons-lang3/3.9/commons-lang3-3.9.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES />
</library>
</component>

10
.idea/libraries/jheaps.xml generated Normal file
View File

@@ -0,0 +1,10 @@
<component name="libraryTable">
<library name="jheaps" type="repository">
<properties maven-id="org.jheaps:jheaps:0.14" />
<CLASSES>
<root url="jar://$MAVEN_REPOSITORY$/org/jheaps/jheaps/0.14/jheaps-0.14.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES />
</library>
</component>

324
readme.md Normal file
View File

@@ -0,0 +1,324 @@
# BiGpairSEQ SIMULATOR
## ABOUT
This program simulates BiGpairSEQ (Bipartite Graph pairSEQ), a graph theory-based adaptation
of the pairSEQ algorithm (Howie, et al. 2015) for pairing T cell receptor sequences.
## THEORY
Unlike pairSEQ, which calculates p-values for every TCR alpha/beta overlap and compares
against a null distribution, BiGpairSEQ does not do any statistical calculations
directly.
BiGpairSEQ creates a [weighted bipartite graph](https://en.wikipedia.org/wiki/Bipartite_graph) representing the sample plate.
The distinct TCRA and TCRB sequences form the two sets of vertices. Every TCRA/TCRB pair that share a well
are connected by an edge, with the edge weight set to the number of wells in which both sequences appear.
(Sequences present in *all* wells are filtered out prior to creating the graph, as there is no signal in their occupancy pattern.)
The problem of pairing TCRA/TCRB sequences thus reduces to the "assignment problem" of finding a maximum weight
matching on a bipartite graph--the subset of vertex-disjoint edges whose weights sum to the maximum possible value.
This is a well-studied combinatorial optimization problem, with many known solutions.
The most efficient algorithm known to the author for maximum weight matching of a bipartite graph with strictly integral weights
is from Duan and Su (2012). For a graph with m edges, n vertices per side, and maximum integer edge weight N,
their algorithm runs in **O(m sqrt(n) log(N))** time. As the graph representation of a pairSEQ experiment is
bipartite with integer weights, this algorithm is ideal for BiGpairSEQ.
Unfortunately, it's a fairly new algorithm, and not yet implemented by the graph theory library used in this simulator.
So this program instead uses the Fibonacci heap-based algorithm of Fredman and Tarjan (1987), which has a worst-case
runtime of **O(n (n log(n) + m))**. The algorithm is implemented as described in Melhorn and Näher (1999).
## USAGE
### RUNNING THE PROGRAM
[Download the current version of BiGpairSEQ_Sim.](https://gitea.ejsf.synology.me/efischer/BiGpairSEQ/releases)
BiGpairSEQ_Sim is an executable .jar file. Requires Java 14 or higher. [OpenJDK 17](https://jdk.java.net/17/)
recommended.
Run with the command:
`java -jar BiGpairSEQ_Sim.jar`
Processing sample plates with tens of thousands of sequences may require large amounts
of RAM. It is often desirable to increase the JVM maximum heap allocation with the -Xmx flag.
For example, to run the program with 32 gigabytes of memory, use the command:
`java -Xmx32G -jar BiGpairSEQ_Sim.jar`
There are a number of command line options, to allow the program to be used in shell scripts. For a full list,
use the -help flag:
`java -jar BiGpairSEQ_Sim.jar -help`
If no command line arguments are given, BiGpairSEQ_Sim will launch with an interactive, menu-driven CLI for
generating files and simulating TCR pairing. The main menu looks like this:
```
--------BiGPairSEQ SIMULATOR--------
ALPHA/BETA T CELL RECEPTOR MATCHING
USING WEIGHTED BIPARTITE GRAPHS
------------------------------------
Please select an option:
1) Generate a population of distinct cells
2) Generate a sample plate of T cells
3) Generate CDR3 alpha/beta occupancy data and overlap graph
4) Simulate bipartite graph CDR3 alpha/beta matching (BiGpairSEQ)
8) Options
9) About/Acknowledgments
0) Exit
```
By default, the Options menu looks like this:
```
--------------OPTIONS---------------
1) Turn on cell sample file caching
2) Turn on plate file caching
3) Turn on graph/data file caching
4) Turn off serialized binary graph output
5) Turn on GraphML graph output
6) Maximum weight matching algorithm options
0) Return to main menu
```
### INPUT/OUTPUT
To run the simulation, the program reads and writes 4 kinds of files:
* Cell Sample files in CSV format
* Sample Plate files in CSV format
* Graph/Data files in binary object serialization format
* Matching Results files in CSV format
These files are often generated in sequence. When entering filenames, it is not necessary to include the file extension
(.csv or .ser). When reading or writing files, the program will automatically add the correct extension to any filename
without one.
To save file I/O time, the most recent instance of each of these four
files either generated or read from disk can be cached in program memory. When caching is active, subsequent uses of the
same data file won't need to be read in again until another file of that type is used or generated,
or caching is turned off for that file type. The program checks whether it needs to update its cached data by comparing
filenames as entered by the user. On encountering a new filename, the program flushes its cache and reads in the new file.
(Note that cached Graph/Data files must be transformed back into their original state after a matching experiment, which
may take some time. Whether file I/O or graph transformation takes longer for graph/data files is likely to be
device-specific.)
The program's caching behavior can be controlled in the Options menu. By default, all caching is OFF.
The program can optionally output Graph/Data files in .GraphML format (.graphml) for data portability. This can be
turned on in the Options menu. By default, GraphML output is OFF.
---
#### Cell Sample Files
Cell Sample files consist of any number of distinct "T cells." Every cell contains
four sequences: Alpha CDR3, Beta CDR3, Alpha CDR1, Beta CDR1. The sequences are represented by
random integers. CDR3 Alpha and Beta sequences are all unique within a given Cell Sample file. CDR1 Alpha and Beta sequences
are not necessarily unique; the relative diversity can be set when making the file.
(Note: though cells still have CDR1 sequences, matching of CDR1s is currently awaiting re-implementation.)
Options when making a Cell Sample file:
* Number of T cells to generate
* Factor by which CDR3s are more diverse than CDR1s
Files are in CSV format. Rows are distinct T cells, columns are sequences within the cells.
Comments are preceded by `#`
Structure:
# Sample contains 1 unique CDR1 for every 4 unique CDR3s.
| Alpha CDR3 | Beta CDR3 | Alpha CDR1 | Beta CDR1 |
|---|---|---|---|
|unique number|unique number|number|number|
---
#### Sample Plate Files
Sample Plate files consist of any number of "wells" containing any number of T cells (as
described above). The wells are filled randomly from a Cell Sample file, according to a selected
frequency distribution. Additionally, every individual sequence within each cell may, with some
given dropout probability, be omitted from the file; this simulates the effect of amplification errors
prior to sequencing. Plates can also be partitioned into any number of sections, each of which can have a
different concentration of T cells per well.
Options when making a Sample Plate file:
* Cell Sample file to use
* Statistical distribution to apply to Cell Sample file
* Poisson
* Gaussian
* Standard deviation size
* Exponential
* Lambda value
* *(Based on the slope of the graph in Figure 4C of the pairSEQ paper, the distribution of the original experiment was approximately exponential with a lambda ~0.6. (Howie, et al. 2015))*
* Total number of wells on the plate
* Well populations random or fixed
* If random, minimum and maximum population sizes
* If fixed
* Number of sections on plate
* Number of T cells per well
* per section, if more than one section
* Dropout rate
Files are in CSV format. There are no header labels. Every row represents a well.
Every value represents an individual cell, containing four sequences, depicted as an array string:
`[CDR3A, CDR3B, CDR1A, CDR1B]`. So a representative cell might look like this:
`[525902, 791533, -1, 866282]`
Notice that the CDR1 Alpha is missing in the cell above--sequence dropout from simulated amplification error.
Dropout sequences are replaced with the value `-1`. Comments are preceded by `#`
Structure:
```
# Cell source file name:
# Each row represents one well on the plate
# Plate size:
# Concentrations:
# Lambda -or- StdDev:
```
| Well 1, cell 1 | Well 1, cell 2 | Well 1, cell 3| ... |
|---|---|---|---|
| **Well 2, cell 1** | **Well 2, cell 2** | **Well 2, cell 3**| **...** |
| **Well 3, cell 1** | **Well 3, cell 2** | **Well 3, cell 3**| **...** |
| **...** | **...** | **...** | **...** |
---
#### Graph/Data Files
Graph/Data files are serialized binaries of a Java object containing the weigthed bipartite graph representation of a
Sample Plate, along with the necessary metadata for matching and results output. Making them requires a Cell Sample file
(to construct a list of correct sequence pairs for checking the accuracy of BiGpairSEQ simulations) and a
Sample Plate file (to construct the associated occupancy graph).
These files can be several gigabytes in size. Writing them to a file lets us generate a graph and its metadata once,
then use it for multiple different BiGpairSEQ simulations.
Options for creating a Graph/Data file:
* The Cell Sample file to use
* The Sample Plate file to use. (This must have been generated from the selected Cell Sample file.)
These files do not have a human-readable structure, and are not portable to other programs.
(For portability to other software, turn on GraphML output in the Options menu. This will produce a .graphml file
for the weighted graph, with vertex attributes sequence, type, and occupancy data.)
---
#### Matching Results Files
Matching results files consist of the results of a BiGpairSEQ matching simulation. Making them requires a serialized
binary Graph/Data file (.ser). (Because .graphML files are larger than .ser files, BiGpairSEQ_Sim supports .graphML
output only. Graph/data input must use a serialized binary.)
Matching results files are in CSV format. Rows are sequence pairings with extra relevant data. Columns are pairing-specific details.
Metadata about the matching simulation is included as comments. Comments are preceded by `#`.
Options when running a BiGpairSEQ simulation of CDR3 alpha/beta matching:
* The minimum number of alpha/beta overlap wells to attempt to match
* (must be >= 1)
* The maximum number of alpha/beta overlap wells to attempt to match
* (must be <= the number of wells on the plate - 1)
* The maximum difference in alpha/beta occupancy to attempt to match
* (Optional. To skip using this filter, enter a value >= the number of wells on the plate)
* The minimum overlap percentage--the percentage of a sequence's occupied wells shared by another sequence--to attempt to match. Given as value in range 0 - 100.
* (Optional. To skip using this filter, enter 0)
Example output:
```
# Source Sample Plate file: 4MilCellsPlate.csv
# Source Graph and Data file: 4MilCellsPlateGraph.ser
# T cell counts in sample plate wells: 30000
# Total alphas found: 11813
# Total betas found: 11808
# High overlap threshold: 94
# Low overlap threshold: 3
# Minimum overlap percent: 0
# Maximum occupancy difference: 96
# Pairing attempt rate: 0.438
# Correct pairings: 5151
# Incorrect pairings: 18
# Pairing error rate: 0.00348
# Simulation time: 862 seconds
```
| Alpha | Alpha well count | Beta | Beta well count | Overlap count | Matched Correctly? | P-value |
|---|---|---|---|---|---|---|
|5242972|17|1571520|18|17|true|1.41E-18|
|5161027|18|2072219|18|18|true|7.31E-20|
|4145198|33|1064455|30|29|true|2.65E-21|
|7700582|18|112748|18|18|true|7.31E-20|
|...|...|...|...|...|...|...|
---
**NOTE: The p-values in the output are not used for matching**—they aren't part of the BiGpairSEQ algorithm at all.
P-values are calculated *after* BiGpairSEQ matching is completed, for purposes of comparison only,
using the (2021 corrected) formula from the original pairSEQ paper. (Howie, et al. 2015)
### PERFORMANCE
Performance details of the example excerpted above:
On a home computer with a Ryzen 5600X CPU, 64GB of 3200MHz DDR4 RAM (half of which was allocated to the Java Virtual Machine), and a PCIe 3.0 SSD, running Linux Mint 20.3 Edge (5.13 kernel),
the author ran a BiGpairSEQ simulation of a 96-well sample plate with 30,000 T cells/well comprising ~11,800 alphas and betas,
taken from a sample of 4,000,000 distinct cells with an exponential frequency distribution.
With min/max occupancy threshold of 3 and 94 wells for matching, and no other pre-filtering, BiGpairSEQ identified 5,151
correct pairings and 18 incorrect pairings, for an accuracy of 99.652%.
The simulation time was 14'22". If intermediate results were held in memory, this would be equivalent to the total elapsed time.
Since this implementation of BiGpairSEQ writes intermediate results to disk (to improve the efficiency of *repeated* simulations
with different filtering options), the actual elapsed time was greater. File I/O time was not measured, but took
slightly less time than the simulation itself. Real elapsed time from start to finish was under 30 minutes.
## TODO
* ~~Try invoking GC at end of workloads to reduce paging to disk~~ DONE
* ~~Hold graph data in memory until another graph is read-in? ABANDONED UNABANDONED~~ DONE
* ~~*No, this won't work, because BiGpairSEQ simulations alter the underlying graph based on filtering constraints. Changes would cascade with multiple experiments.*~~
* Might have figured out a way to do it, by taking edges out and then putting them back into the graph. This may actually be possible.
* It is possible, though the modifications to the graph incur their own performance penalties. Need testing to see which option is best.
* ~~Test whether pairing heap (currently used) or Fibonacci heap is more efficient for priority queue in current matching algorithm~~ DONE
* ~~in theory Fibonacci heap should be more efficient, but complexity overhead may eliminate theoretical advantage~~
* ~~Add controllable heap-type parameter?~~
* Parameter implemented. Fibonacci heap the current default.
* ~~Implement sample plates with random numbers of T cells per well.~~ DONE
* Possible BiGpairSEQ advantage over pairSEQ: BiGpairSEQ is resilient to variations in well population sizes on a sample plate; pairSEQ is not.
* preliminary data suggests that BiGpairSEQ behaves roughly as though the whole plate had whatever the *average* well concentration is, but that's still speculative.
* See if there's a reasonable way to reformat Sample Plate files so that wells are columns instead of rows.
* ~~Problem is variable number of cells in a well~~
* ~~Apache Commons CSV library writes entries a row at a time~~
* _Got this working, but at the cost of a profoundly strange bug in graph occupancy filtering. Have reverted the repo until I can figure out what caused that. Given how easily Thingiverse transposes CSV matrices in R, might not even be worth fixing.
* ~~Enable GraphML output in addition to serialized object binaries, for data portability~~ DONE
* ~~Custom vertex type with attribute for sequence occupancy?~~ ABANDONED
* Have a branch where this is implemented, but there's a bug that broke matching. Don't currently have time to fix.
* ~~Re-implement command line arguments, to enable scripting and statistical simulation studies~~ DONE
* Re-implement CDR1 matching method
* Implement Duan and Su's maximum weight matching algorithm
* Add controllable algorithm-type parameter?
* This would be fun and valuable, but probably take more time than I have for a hobby project.
* Implement Vose's alias method for arbitrary statistical distributions of cells
## CITATIONS
* Howie, B., Sherwood, A. M., et al. ["High-throughput pairing of T cell receptor alpha and beta sequences."](https://pubmed.ncbi.nlm.nih.gov/26290413/) Sci. Transl. Med. 7, 301ra131 (2015)
* Duan, R., Su H. ["A Scaling Algorithm for Maximum Weight Matching in Bipartite Graphs."](https://web.eecs.umich.edu/~pettie/matching/Duan-Su-scaling-bipartite-matching.pdf) Proceedings of the Twenty-Third Annual ACM-SIAM Symposium on Discrete Algorithms, p. 1413-1424. (2012)
* Melhorn, K., Näher, St. [The LEDA Platform of Combinatorial and Geometric Computing.](https://people.mpi-inf.mpg.de/~mehlhorn/LEDAbook.html) Cambridge University Press. Chapter 7, Graph Algorithms; p. 132-162 (1999)
* Fredman, M., Tarjan, R. ["Fibonacci heaps and their uses in improved network optimization algorithms."](https://www.cl.cam.ac.uk/teaching/1011/AlgorithII/1987-FredmanTar-fibonacci.pdf) J. ACM, 34(3):596615 (1987))
## EXTERNAL LIBRARIES USED
* [JGraphT](https://jgrapht.org) -- Graph theory data structures and algorithms
* [JHeaps](https://www.jheaps.org) -- For pairing heap priority queue used in maximum weight matching algorithm
* [Apache Commons CSV](https://commons.apache.org/proper/commons-csv/) -- For CSV file output
* [Apache Commons CLI](https://commons.apache.org/proper/commons-cli/) -- To enable command line arguments for scripting. (**Awaiting re-implementation**.)
## ACKNOWLEDGEMENTS
BiGpairSEQ was conceived in collaboration with Dr. Alice MacQueen, who brought the original
pairSEQ paper to the author's attention and explained all the biology terms he didn't know.
## AUTHOR
BiGpairSEQ algorithm and simulation by Eugene Fischer, 2021. UI improvements and documentation, 2022.

View File

@@ -0,0 +1,176 @@
import java.util.Random;
//main class. For choosing interface type and holding settings
public class BiGpairSEQ {
private static final Random rand = new Random();
private static CellSample cellSampleInMemory = null;
private static String cellFilename = null;
private static Plate plateInMemory = null;
private static String plateFilename = null;
private static GraphWithMapData graphInMemory = null;
private static String graphFilename = null;
private static boolean cacheCells = false;
private static boolean cachePlate = false;
private static boolean cacheGraph = false;
private static String priorityQueueHeapType = "FIBONACCI";
private static boolean outputBinary = true;
private static boolean outputGraphML = false;
public static void main(String[] args) {
if (args.length == 0) {
InteractiveInterface.startInteractive();
}
else {
//This will be uncommented when command line arguments are re-implemented.
CommandLineInterface.startCLI(args);
//System.out.println("Command line arguments are still being re-implemented.");
}
}
public static Random getRand() {
return rand;
}
public static CellSample getCellSampleInMemory() {
return cellSampleInMemory;
}
public static void setCellSampleInMemory(CellSample cellSample, String filename) {
if(cellSampleInMemory != null) {
clearCellSampleInMemory();
}
cellSampleInMemory = cellSample;
cellFilename = filename;
System.out.println("Cell sample file " + filename + " cached.");
}
public static void clearCellSampleInMemory() {
cellSampleInMemory = null;
cellFilename = null;
System.gc();
System.out.println("Cell sample file cache cleared.");
}
public static String getCellFilename() {
return cellFilename;
}
public static Plate getPlateInMemory() {
return plateInMemory;
}
public static void setPlateInMemory(Plate plate, String filename) {
if(plateInMemory != null) {
clearPlateInMemory();
}
plateInMemory = plate;
plateFilename = filename;
System.out.println("Sample plate file " + filename + " cached.");
}
public static void clearPlateInMemory() {
plateInMemory = null;
plateFilename = null;
System.gc();
System.out.println("Sample plate file cache cleared.");
}
public static String getPlateFilename() {
return plateFilename;
}
public static GraphWithMapData getGraphInMemory() {return graphInMemory;
}
public static void setGraphInMemory(GraphWithMapData g, String filename) {
if (graphInMemory != null) {
clearGraphInMemory();
}
graphInMemory = g;
graphFilename = filename;
System.out.println("Graph and data file " + filename + " cached.");
}
public static void clearGraphInMemory() {
graphInMemory = null;
graphFilename = null;
System.gc();
System.out.println("Graph and data file cache cleared.");
}
public static String getGraphFilename() {
return graphFilename;
}
public static boolean cacheCells() {
return cacheCells;
}
public static void setCacheCells(boolean cacheCells) {
//if not caching, clear the memory
if(!cacheCells){
BiGpairSEQ.clearCellSampleInMemory();
System.out.println("Cell sample file caching: OFF.");
}
else {
System.out.println("Cell sample file caching: ON.");
}
BiGpairSEQ.cacheCells = cacheCells;
}
public static boolean cachePlate() {
return cachePlate;
}
public static void setCachePlate(boolean cachePlate) {
//if not caching, clear the memory
if(!cachePlate) {
BiGpairSEQ.clearPlateInMemory();
System.out.println("Sample plate file caching: OFF.");
}
else {
System.out.println("Sample plate file caching: ON.");
}
BiGpairSEQ.cachePlate = cachePlate;
}
public static boolean cacheGraph() {
return cacheGraph;
}
public static void setCacheGraph(boolean cacheGraph) {
//if not caching, clear the memory
if(!cacheGraph) {
BiGpairSEQ.clearGraphInMemory();
System.out.println("Graph/data file caching: OFF.");
}
else {
System.out.println("Graph/data file caching: ON.");
}
BiGpairSEQ.cacheGraph = cacheGraph;
}
public static String getPriorityQueueHeapType() {
return priorityQueueHeapType;
}
public static void setPairingHeap() {
priorityQueueHeapType = "PAIRING";
}
public static void setFibonacciHeap() {
priorityQueueHeapType = "FIBONACCI";
}
public static boolean outputBinary() {return outputBinary;}
public static void setOutputBinary(boolean b) {outputBinary = b;}
public static boolean outputGraphML() {return outputGraphML;}
public static void setOutputGraphML(boolean b) {outputGraphML = b;}
}

View File

@@ -11,17 +11,20 @@ import java.util.List;
public class CellFileReader { public class CellFileReader {
private String filename;
private List<Integer[]> distinctCells = new ArrayList<>(); private List<Integer[]> distinctCells = new ArrayList<>();
private Integer cdr1Freq;
public CellFileReader(String filename) { public CellFileReader(String filename) {
if(!filename.matches(".*\\.csv")){ if(!filename.matches(".*\\.csv")){
filename = filename + ".csv"; filename = filename + ".csv";
} }
this.filename = filename;
CSVFormat cellFileFormat = CSVFormat.Builder.create() CSVFormat cellFileFormat = CSVFormat.Builder.create()
.setHeader("Alpha CDR3", "Beta CDR3", "Alpha CDR1", "Beta CDR1") .setHeader("Alpha CDR3", "Beta CDR3", "Alpha CDR1", "Beta CDR1")
.setSkipHeaderRecord(true) .setSkipHeaderRecord(true)
.setCommentMarker('#')
.build(); .build();
try(//don't need to close reader bc of try-with-resources auto-closing try(//don't need to close reader bc of try-with-resources auto-closing
@@ -36,17 +39,37 @@ public class CellFileReader {
cell[3] = Integer.valueOf(record.get("Beta CDR1")); cell[3] = Integer.valueOf(record.get("Beta CDR1"));
distinctCells.add(cell); distinctCells.add(cell);
} }
} catch(IOException ex){ } catch(IOException ex){
System.out.println("cell file " + filename + " not found."); System.out.println("cell file " + filename + " not found.");
System.err.println(ex); System.err.println(ex);
} }
//get CDR1 frequency
ArrayList<Integer> cdr1Alphas = new ArrayList<>();
for (Integer[] cell : distinctCells) {
cdr1Alphas.add(cell[3]);
}
double count = cdr1Alphas.stream().distinct().count();
count = Math.ceil(distinctCells.size() / count);
cdr1Freq = (int) count;
} }
public List<Integer[]> getCells(){ public CellSample getCellSample() {
return new CellSample(distinctCells, cdr1Freq);
}
public String getFilename() { return filename;}
//Refactor everything that uses this to have access to a Cell Sample and get the cells there instead.
public List<Integer[]> getListOfDistinctCellsDEPRECATED(){
return distinctCells; return distinctCells;
} }
public Integer getCellCount() { public Integer getCellCountDEPRECATED() {
//Refactor everything that uses this to have access to a Cell Sample and get the count there instead.
return distinctCells.size(); return distinctCells.size();
} }
} }

View File

@@ -13,6 +13,7 @@ public class CellFileWriter {
private String[] headers = {"Alpha CDR3", "Beta CDR3", "Alpha CDR1", "Beta CDR1"}; private String[] headers = {"Alpha CDR3", "Beta CDR3", "Alpha CDR1", "Beta CDR1"};
List<Integer[]> cells; List<Integer[]> cells;
String filename; String filename;
Integer cdr1Freq;
public CellFileWriter(String filename, CellSample cells) { public CellFileWriter(String filename, CellSample cells) {
if(!filename.matches(".*\\.csv")){ if(!filename.matches(".*\\.csv")){
@@ -20,15 +21,18 @@ public class CellFileWriter {
} }
this.filename = filename; this.filename = filename;
this.cells = cells.getCells(); this.cells = cells.getCells();
this.cdr1Freq = cells.getCdr1Freq();
} }
public void writeCellsToFile() { public void writeCellsToFile() {
CSVFormat cellFileFormat = CSVFormat.Builder.create() CSVFormat cellFileFormat = CSVFormat.Builder.create()
.setCommentMarker('#')
.setHeader(headers) .setHeader(headers)
.build(); .build();
try(BufferedWriter writer = Files.newBufferedWriter(Path.of(filename), StandardOpenOption.CREATE_NEW); try(BufferedWriter writer = Files.newBufferedWriter(Path.of(filename), StandardOpenOption.CREATE_NEW);
CSVPrinter printer = new CSVPrinter(writer, cellFileFormat); CSVPrinter printer = new CSVPrinter(writer, cellFileFormat);
){ ){
printer.printComment("Sample contains 1 unique CDR1 for every " + cdr1Freq + "unique CDR3s.");
printer.printRecords(cells); printer.printRecords(cells);
} catch(IOException ex){ } catch(IOException ex){
System.out.println("Could not make new file named "+filename); System.out.println("Could not make new file named "+filename);

View File

@@ -1,18 +1,51 @@
import java.util.ArrayList;
import java.util.Collections;
import java.util.List; import java.util.List;
import java.util.stream.IntStream;
public class CellSample { public class CellSample {
private List<Integer[]> cells; private List<Integer[]> cells;
private Integer cdr1Freq;
public CellSample(List<Integer[]> cells){ public CellSample(Integer numDistinctCells, Integer cdr1Freq){
this.cdr1Freq = cdr1Freq;
List<Integer> numbersCDR3 = new ArrayList<>();
List<Integer> numbersCDR1 = new ArrayList<>();
Integer numDistCDR3s = 2 * numDistinctCells + 1;
IntStream.range(1, numDistCDR3s + 1).forEach(i -> numbersCDR3.add(i));
IntStream.range(numDistCDR3s + 1, numDistCDR3s + 1 + (numDistCDR3s / cdr1Freq) + 1).forEach(i -> numbersCDR1.add(i));
Collections.shuffle(numbersCDR3);
Collections.shuffle(numbersCDR1);
//Each cell represented by 4 values
//two CDR3s, and two CDR1s. First two values are CDR3s (alpha, beta), second two are CDR1s (alpha, beta)
List<Integer[]> distinctCells = new ArrayList<>();
for(int i = 0; i < numbersCDR3.size() - 1; i = i + 2){
Integer tmpCDR3a = numbersCDR3.get(i);
Integer tmpCDR3b = numbersCDR3.get(i+1);
Integer tmpCDR1a = numbersCDR1.get(i % numbersCDR1.size());
Integer tmpCDR1b = numbersCDR1.get((i+1) % numbersCDR1.size());
Integer[] tmp = {tmpCDR3a, tmpCDR3b, tmpCDR1a, tmpCDR1b};
distinctCells.add(tmp);
}
this.cells = distinctCells;
}
public CellSample(List<Integer[]> cells, Integer cdr1Freq){
this.cells = cells; this.cells = cells;
this.cdr1Freq = cdr1Freq;
} }
public List<Integer[]> getCells(){ public List<Integer[]> getCells(){
return cells; return cells;
} }
public Integer population(){ public Integer getCdr1Freq() {
return cdr1Freq;
}
public Integer getCellCount(){
return cells.size(); return cells.size();
} }

View File

@@ -0,0 +1,427 @@
import org.apache.commons.cli.*;
import java.io.IOException;
import java.util.Arrays;
import java.util.stream.Stream;
/*
* Class for parsing options passed to program from command line
*
* Top-level flags:
* cells : to make a cell sample file
* plate : to make a sample plate file
* graph : to make a graph and data file
* match : to do a cdr3 matching (WITH OR WITHOUT MAKING A RESULTS FILE. May just want to print summary for piping.)
*
* Cell flags:
* count : number of cells to generate
* diversity factor : factor by which CDR3s are more diverse than CDR1s
* output : name of the output file
*
* Plate flags:
* cellfile : name of the cell sample file to use as input
* wells : the number of wells on the plate
* dist : the statistical distribution to use
* (if exponential) lambda : the lambda value of the exponential distribution
* (if gaussian) stddev : the standard deviation of the gaussian distribution
* rand : randomize well populations, take a minimum argument and a maximum argument
* populations : number of t cells per well per section (number of arguments determines number of sections)
* dropout : plate dropout rate, double from 0.0 to 1.0
* output : name of the output file
*
* Graph flags:
* cellfile : name of the cell sample file to use as input
* platefile : name of the sample plate file to use as input
* output : name of the output file
* graphml : output a graphml file
* binary : output a serialized binary object file
*
* Match flags:
* graphFile : name of graph and data file to use as input
* min : minimum number of overlap wells to attempt a matching
* max : the maximum number of overlap wells to attempt a matching
* maxdiff : (optional) the maximum difference in occupancy to attempt a matching
* minpercent : (optional) the minimum percent overlap to attempt a matching.
* writefile : (optional) the filename to write results to
* output : the values to print to System.out for piping
*
*/
public class CommandLineInterface {
public static void startCLI(String[] args) {
//Options sets for the different modes
Options mainOptions = buildMainOptions();
Options cellOptions = buildCellOptions();
Options plateOptions = buildPlateOptions();
Options graphOptions = buildGraphOptions();
Options matchOptions = buildMatchCDR3options();
CommandLineParser parser = new DefaultParser();
try{
CommandLine line = parser.parse(mainOptions, Arrays.copyOfRange(args, 0, 1));
if (line.hasOption("help")) {
HelpFormatter formatter = new HelpFormatter();
formatter.printHelp("BiGpairSEQ_Sim", mainOptions);
System.out.println();
formatter.printHelp("BiGpairSEQ_SIM -cells", cellOptions);
System.out.println();
formatter.printHelp("BiGpairSEQ_Sim -plate", plateOptions);
System.out.println();
formatter.printHelp("BiGpairSEQ_Sim -graph", graphOptions);
System.out.println();
formatter.printHelp("BiGpairSEQ_Sim -match", matchOptions);
}
else if (line.hasOption("cells")) {
line = parser.parse(cellOptions, Arrays.copyOfRange(args, 1, args.length));
Integer number = Integer.valueOf(line.getOptionValue("n"));
Integer diversity = Integer.valueOf(line.getOptionValue("d"));
String filename = line.getOptionValue("o");
makeCells(filename, number, diversity);
}
else if (line.hasOption("plate")) {
line = parser.parse(plateOptions, Arrays.copyOfRange(args, 1, args.length));
//get the cells
String cellFilename = line.getOptionValue("c");
CellSample cells = getCells(cellFilename);
//get the rest of the parameters
Integer[] populations;
String outputFilename = line.getOptionValue("o");
Integer numWells = Integer.parseInt(line.getOptionValue("w"));
Double dropoutRate = Double.parseDouble(line.getOptionValue("err"));
if (line.hasOption("random")) {
//Array holding values of minimum and maximum populations
Integer[] min_max = Stream.of(line.getOptionValues("random"))
.mapToInt(Integer::parseInt)
.boxed()
.toArray(Integer[]::new);
populations = BiGpairSEQ.getRand().ints(min_max[0], min_max[1] + 1)
.limit(numWells)
.boxed()
.toArray(Integer[]::new);
}
else if (line.hasOption("pop")) {
populations = Stream.of(line.getOptionValues("pop"))
.mapToInt(Integer::parseInt)
.boxed()
.toArray(Integer[]::new);
}
else{
populations = new Integer[1];
populations[0] = 1;
}
//make the plate
Plate plate;
if (line.hasOption("poisson")) {
Double stdDev = Math.sqrt(numWells);
plate = new Plate(cells, cellFilename, numWells, populations, dropoutRate, stdDev, false);
}
else if (line.hasOption("gaussian")) {
Double stdDev = Double.parseDouble(line.getOptionValue("stddev"));
plate = new Plate(cells, cellFilename, numWells, populations, dropoutRate, stdDev, false);
}
else {
assert line.hasOption("exponential");
Double lambda = Double.parseDouble(line.getOptionValue("lambda"));
plate = new Plate(cells, cellFilename, numWells, populations, dropoutRate, lambda, true);
}
PlateFileWriter writer = new PlateFileWriter(outputFilename, plate);
writer.writePlateFile();
}
else if (line.hasOption("graph")) { //Making a graph
line = parser.parse(graphOptions, Arrays.copyOfRange(args, 1, args.length));
String cellFilename = line.getOptionValue("c");
String plateFilename = line.getOptionValue("p");
String outputFilename = line.getOptionValue("o");
//get cells
CellSample cells = getCells(cellFilename);
//get plate
Plate plate = getPlate(plateFilename);
GraphWithMapData graph = Simulator.makeGraph(cells, plate, false);
if (!line.hasOption("no-binary")) { //output binary file unless told not to
GraphDataObjectWriter writer = new GraphDataObjectWriter(outputFilename, graph, false);
writer.writeDataToFile();
}
if (line.hasOption("graphml")) { //if told to, output graphml file
GraphMLFileWriter gmlwriter = new GraphMLFileWriter(outputFilename, graph);
gmlwriter.writeGraphToFile();
}
}
else if (line.hasOption("match")) { //can add a flag for which match type in future, spit this in two
line = parser.parse(matchOptions, Arrays.copyOfRange(args, 1, args.length));
String graphFilename = line.getOptionValue("g");
String outputFilename = line.getOptionValue("o");
Integer minThreshold = Integer.parseInt(line.getOptionValue("min"));
Integer maxThreshold = Integer.parseInt(line.getOptionValue("max"));
Integer minOverlapPct;
if (line.hasOption("minpct")) { //see if this filter is being used
minOverlapPct = Integer.parseInt(line.getOptionValue("minpct"));
}
else {
minOverlapPct = 0;
}
Integer maxOccupancyDiff;
if (line.hasOption("maxdiff")) { //see if this filter is being used
maxOccupancyDiff = Integer.parseInt(line.getOptionValue("maxdiff"));
}
else {
maxOccupancyDiff = Integer.MAX_VALUE;
}
GraphWithMapData graph = getGraph(graphFilename);
MatchingResult result = Simulator.matchCDR3s(graph, graphFilename, minThreshold, maxThreshold,
maxOccupancyDiff, minOverlapPct, false);
MatchingFileWriter writer = new MatchingFileWriter(outputFilename, result);
writer.writeResultsToFile();
//can put a bunch of ifs for outputting various things from the MatchingResult to System.out here
//after I put those flags in the matchOptions
}
}
catch (ParseException exp) {
System.err.println("Parsing failed. Reason: " + exp.getMessage());
}
}
private static Option outputFileOption() {
Option outputFile = Option.builder("o")
.longOpt("output-file")
.hasArg()
.argName("filename")
.desc("Name of output file")
.required()
.build();
return outputFile;
}
private static Options buildMainOptions() {
Options mainOptions = new Options();
Option help = Option.builder("help")
.desc("Displays this help menu")
.build();
Option makeCells = Option.builder("cells")
.longOpt("make-cells")
.desc("Makes a cell sample file of distinct T cells")
.build();
Option makePlate = Option.builder("plate")
.longOpt("make-plate")
.desc("Makes a sample plate file. Requires a cell sample file.")
.build();
Option makeGraph = Option.builder("graph")
.longOpt("make-graph")
.desc("Makes a graph/data file. Requires a cell sample file and a sample plate file")
.build();
Option matchCDR3 = Option.builder("match")
.longOpt("match-cdr3")
.desc("Matches CDR3s. Requires a graph/data file.")
.build();
OptionGroup mainGroup = new OptionGroup();
mainGroup.addOption(help);
mainGroup.addOption(makeCells);
mainGroup.addOption(makePlate);
mainGroup.addOption(makeGraph);
mainGroup.addOption(matchCDR3);
mainGroup.setRequired(true);
mainOptions.addOptionGroup(mainGroup);
return mainOptions;
}
private static Options buildCellOptions() {
Options cellOptions = new Options();
Option numCells = Option.builder("n")
.longOpt("num-cells")
.desc("The number of distinct cells to generate")
.hasArg()
.argName("number")
.required().build();
Option cdr3Diversity = Option.builder("d")
.longOpt("diversity-factor")
.desc("The factor by which unique CDR3s outnumber unique CDR1s")
.hasArg()
.argName("factor")
.required().build();
cellOptions.addOption(numCells);
cellOptions.addOption(cdr3Diversity);
cellOptions.addOption(outputFileOption());
return cellOptions;
}
private static Options buildPlateOptions() {
Options plateOptions = new Options();
Option cellFile = Option.builder("c") // add this to plate options
.longOpt("cell-file")
.desc("The cell sample file to use")
.hasArg()
.argName("filename")
.required().build();
Option numWells = Option.builder("w")// add this to plate options
.longOpt("wells")
.desc("The number of wells on the sample plate")
.hasArg()
.argName("number")
.required().build();
//options group for choosing with distribution to use
OptionGroup distributions = new OptionGroup();// add this to plate options
distributions.setRequired(true);
Option poisson = Option.builder("poisson")
.desc("Use a Poisson distribution for cell sample")
.build();
Option gaussian = Option.builder("gaussian")
.desc("Use a Gaussian distribution for cell sample")
.build();
Option exponential = Option.builder("exponential")
.desc("Use an exponential distribution for cell sample")
.build();
distributions.addOption(poisson);
distributions.addOption(gaussian);
distributions.addOption(exponential);
//options group for statistical distribution parameters
OptionGroup statParams = new OptionGroup();// add this to plate options
Option stdDev = Option.builder("stddev")
.desc("If using -gaussian flag, standard deviation for distrbution")
.hasArg()
.argName("value")
.build();
Option lambda = Option.builder("lambda")
.desc("If using -exponential flag, lambda value for distribution")
.hasArg()
.argName("value")
.build();
statParams.addOption(stdDev);
statParams.addOption(lambda);
//Option group for random plate or set populations
OptionGroup wellPopOptions = new OptionGroup(); // add this to plate options
wellPopOptions.setRequired(true);
Option randomWellPopulations = Option.builder("random")
.desc("Randomize well populations on sample plate. Takes two arguments: the minimum possible population and the maximum possible population.")
.hasArgs()
.numberOfArgs(2)
.argName("minimum maximum")
.build();
Option specificWellPopulations = Option.builder("pop")
.desc("The well populations for each section of the sample plate. There will be as many sections as there are populations given.")
.hasArgs()
.argName("number [number]...")
.build();
Option dropoutRate = Option.builder("err") //add this to plate options
.hasArg()
.desc("The sequence dropout rate due to amplification error. (0.0 - 1.0)")
.argName("rate")
.required()
.build();
wellPopOptions.addOption(randomWellPopulations);
wellPopOptions.addOption(specificWellPopulations);
plateOptions.addOption(cellFile);
plateOptions.addOption(numWells);
plateOptions.addOptionGroup(distributions);
plateOptions.addOptionGroup(statParams);
plateOptions.addOptionGroup(wellPopOptions);
plateOptions.addOption(dropoutRate);
plateOptions.addOption(outputFileOption());
return plateOptions;
}
private static Options buildGraphOptions() {
Options graphOptions = new Options();
Option cellFilename = Option.builder("c")
.longOpt("cell-file")
.desc("Cell sample file to use for checking accuracy")
.hasArg()
.argName("filename")
.required().build();
Option plateFilename = Option.builder("p")
.longOpt("plate-filename")
.desc("Sample plate file (made from given cell sample file) to construct graph from")
.hasArg()
.argName("filename")
.required().build();
Option outputGraphML = Option.builder("graphml")
.desc("Output GraphML file")
.build();
Option outputSerializedBinary = Option.builder("nb")
.longOpt("no-binary")
.desc("Don't output serialized binary file")
.build();
graphOptions.addOption(cellFilename);
graphOptions.addOption(plateFilename);
graphOptions.addOption(outputFileOption());
graphOptions.addOption(outputGraphML);
graphOptions.addOption(outputSerializedBinary);
return graphOptions;
}
private static Options buildMatchCDR3options() {
Options matchCDR3options = new Options();
Option graphFilename = Option.builder("g")
.longOpt("graph-file")
.desc("The graph/data file to use")
.hasArg()
.argName("filename")
.required().build();
Option minOccupancyOverlap = Option.builder("min")
.desc("The minimum number of shared wells to attempt to match a sequence pair")
.hasArg()
.argName("number")
.required().build();
Option maxOccupancyOverlap = Option.builder("max")
.desc("The maximum number of shared wells to attempt to match a sequence pair")
.hasArg()
.argName("number")
.required().build();
Option minOverlapPercent = Option.builder("minpct")
.desc("(Optional) The minimum percentage of a sequence's total occupancy shared by another sequence to attempt matching. (0 - 100) ")
.hasArg()
.argName("percent")
.build();
Option maxOccupancyDifference = Option.builder("maxdiff")
.desc("(Optional) The maximum difference in total occupancy between two sequences to attempt matching.")
.hasArg()
.argName("number")
.build();
matchCDR3options.addOption(graphFilename);
matchCDR3options.addOption(minOccupancyOverlap);
matchCDR3options.addOption(maxOccupancyOverlap);
matchCDR3options.addOption(minOverlapPercent);
matchCDR3options.addOption(maxOccupancyDifference);
matchCDR3options.addOption(outputFileOption());
//options for output to System.out
//Option printPairingErrorRate = Option.builder()
return matchCDR3options;
}
private static CellSample getCells(String cellFilename) {
assert cellFilename != null;
CellFileReader reader = new CellFileReader(cellFilename);
return reader.getCellSample();
}
private static Plate getPlate(String plateFilename) {
assert plateFilename != null;
PlateFileReader reader = new PlateFileReader(plateFilename);
return reader.getSamplePlate();
}
private static GraphWithMapData getGraph(String graphFilename) {
assert graphFilename != null;
try{
GraphDataObjectReader reader = new GraphDataObjectReader(graphFilename, false);
return reader.getData();
}
catch (IOException ex) {
ex.printStackTrace();
return null;
}
}
//for calling from command line
public static void makeCells(String filename, Integer numCells, Integer cdr1Freq) {
CellSample sample = new CellSample(numCells, cdr1Freq);
CellFileWriter writer = new CellFileWriter(filename, sample);
writer.writeCellsToFile();
}
}

View File

@@ -4,6 +4,9 @@ import java.math.MathContext;
public abstract class Equations { public abstract class Equations {
//pValue calculation as described in original pairSEQ paper.
//Included for comparison with original results.
//Not used by BiGpairSEQ for matching.
public static double pValue(Integer w, Integer w_a, Integer w_b, double w_ab_d) { public static double pValue(Integer w, Integer w_a, Integer w_b, double w_ab_d) {
int w_ab = (int) w_ab_d; int w_ab = (int) w_ab_d;
double pv = 0.0; double pv = 0.0;
@@ -14,6 +17,9 @@ public abstract class Equations {
return pv; return pv;
} }
//Implementation of the (corrected) probability equation from pairSEQ paper.
//Included for comparison with original results.
//Not used by BiGpairSEQ for matching.
private static double probPairedByChance(Integer w, Integer w_a, Integer w_b, Integer w_ab){ private static double probPairedByChance(Integer w, Integer w_a, Integer w_b, Integer w_ab){
BigInteger numer1 = choose(w, w_ab); BigInteger numer1 = choose(w, w_ab);
BigInteger numer2 = choose(w - w_ab, w_a - w_ab); BigInteger numer2 = choose(w - w_ab, w_a - w_ab);
@@ -26,10 +32,9 @@ public abstract class Equations {
return prob.doubleValue(); return prob.doubleValue();
} }
/*
* This works because nC(k+1) = nCk * (n-k)/(k+1) //This works because nC(k+1) = nCk * (n-k)/(k+1)
* Since nC0 = 1, can start there and generate all the rest. //Since nC0 = 1, can start there and generate all the rest.
*/
public static BigInteger choose(final int N, final int K) { public static BigInteger choose(final int N, final int K) {
BigInteger nCk = BigInteger.ONE; BigInteger nCk = BigInteger.ONE;
for (int k = 0; k < K; k++) { for (int k = 0; k < K; k++) {

View File

@@ -0,0 +1,33 @@
import java.io.*;
public class GraphDataObjectReader {
private GraphWithMapData data;
private String filename;
private boolean verbose = true;
public GraphDataObjectReader(String filename, boolean verbose) throws IOException {
if(!filename.matches(".*\\.ser")){
filename = filename + ".ser";
}
this.filename = filename;
try(//don't need to close these because of try-with-resources
BufferedInputStream fileIn = new BufferedInputStream(new FileInputStream(filename));
ObjectInputStream in = new ObjectInputStream(fileIn))
{
System.out.println("Reading graph data from file. This may take some time");
System.out.println("File I/O time is not included in results");
data = (GraphWithMapData) in.readObject();
} catch (FileNotFoundException | ClassNotFoundException ex) {
ex.printStackTrace();
}
}
public GraphWithMapData getData() {
return data;
}
public String getFilename() {
return filename;
}
}

View File

@@ -0,0 +1,45 @@
import org.jgrapht.Graph;
import java.io.BufferedOutputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.ObjectOutputStream;
public class GraphDataObjectWriter {
private GraphWithMapData data;
private String filename;
private boolean verbose = true;
public GraphDataObjectWriter(String filename, GraphWithMapData data) {
if(!filename.matches(".*\\.ser")){
filename = filename + ".ser";
}
this.filename = filename;
this.data = data;
}
public GraphDataObjectWriter(String filename, GraphWithMapData data, boolean verbose) {
this.verbose = verbose;
if(!filename.matches(".*\\.ser")){
filename = filename + ".ser";
}
this.filename = filename;
this.data = data;
}
public void writeDataToFile() {
try (BufferedOutputStream bufferedOut = new BufferedOutputStream(new FileOutputStream(filename));
ObjectOutputStream out = new ObjectOutputStream(bufferedOut);
){
if(verbose) {
System.out.println("Writing graph and occupancy data to file. This may take some time.");
System.out.println("File I/O time is not included in results.");
}
out.writeObject(data);
} catch (IOException ex) {
ex.printStackTrace();
}
}
}

View File

@@ -0,0 +1,84 @@
import org.jgrapht.graph.DefaultWeightedEdge;
import org.jgrapht.graph.SimpleWeightedGraph;
import org.jgrapht.nio.Attribute;
import org.jgrapht.nio.AttributeType;
import org.jgrapht.nio.DefaultAttribute;
import org.jgrapht.nio.dot.DOTExporter;
import org.jgrapht.nio.graphml.GraphMLExporter;
import java.io.BufferedWriter;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.Map;
public class GraphMLFileWriter {
String filename;
GraphWithMapData data;
public GraphMLFileWriter(String filename, GraphWithMapData data) {
if(!filename.matches(".*\\.graphml")){
filename = filename + ".graphml";
}
this.filename = filename;
this.data = data;
}
// public void writeGraphToFile() {
// try(BufferedWriter writer = Files.newBufferedWriter(Path.of(filename), StandardOpenOption.CREATE_NEW);
// ){
// GraphMLExporter<SimpleWeightedGraph, BufferedWriter> exporter = new GraphMLExporter<>();
// exporter.exportGraph(graph, writer);
// } catch(IOException ex){
// System.out.println("Could not make new file named "+filename);
// System.err.println(ex);
// }
// }
public void writeGraphToFile() {
SimpleWeightedGraph graph = data.getGraph();
Map<Integer, Integer> vertexToAlphaMap = data.getPlateVtoAMap();
Map<Integer, Integer> vertexToBetaMap = data.getPlateVtoBMap();
Map<Integer, Integer> alphaOccs = data.getAlphaWellCounts();
Map<Integer, Integer> betaOccs = data.getBetaWellCounts();
try(BufferedWriter writer = Files.newBufferedWriter(Path.of(filename), StandardOpenOption.CREATE_NEW);
){
//create exporter. Let the vertex labels be the unique ids for the vertices
GraphMLExporter<Integer, SimpleWeightedGraph<Vertex, DefaultWeightedEdge>> exporter = new GraphMLExporter<>(v -> v.toString());
//set to export weights
exporter.setExportEdgeWeights(true);
//set type, sequence, and occupancy attributes for each vertex
exporter.setVertexAttributeProvider( v -> {
Map<String, Attribute> attributes = new HashMap<>();
if(vertexToAlphaMap.containsKey(v)) {
attributes.put("type", DefaultAttribute.createAttribute("CDR3 Alpha"));
attributes.put("sequence", DefaultAttribute.createAttribute(vertexToAlphaMap.get(v)));
attributes.put("occupancy", DefaultAttribute.createAttribute(
alphaOccs.get(vertexToAlphaMap.get(v))));
}
else if(vertexToBetaMap.containsKey(v)) {
attributes.put("type", DefaultAttribute.createAttribute("CDR3 Beta"));
attributes.put("sequence", DefaultAttribute.createAttribute(vertexToBetaMap.get(v)));
attributes.put("occupancy", DefaultAttribute.createAttribute(
betaOccs.get(vertexToBetaMap.get(v))));
}
return attributes;
});
//register the attributes
exporter.registerAttribute("type", GraphMLExporter.AttributeCategory.NODE, AttributeType.STRING);
exporter.registerAttribute("sequence", GraphMLExporter.AttributeCategory.NODE, AttributeType.STRING);
exporter.registerAttribute("occupancy", GraphMLExporter.AttributeCategory.NODE, AttributeType.STRING);
//export the graph
exporter.exportGraph(graph, writer);
} catch(IOException ex){
System.out.println("Could not make new file named "+filename);
System.err.println(ex);
}
}
}

View File

@@ -0,0 +1,111 @@
import org.jgrapht.graph.DefaultWeightedEdge;
import org.jgrapht.graph.SimpleWeightedGraph;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
public interface GraphModificationFunctions {
//remove over- and under-weight edges
static List<Integer[]> filterByOverlapThresholds(SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph,
int low, int high, boolean saveEdges) {
List<Integer[]> removedEdges = new ArrayList<>();
for (DefaultWeightedEdge e : graph.edgeSet()) {
if ((graph.getEdgeWeight(e) > high) || (graph.getEdgeWeight(e) < low)) {
if(saveEdges) {
Integer source = graph.getEdgeSource(e);
Integer target = graph.getEdgeTarget(e);
Integer weight = (int) graph.getEdgeWeight(e);
Integer[] edge = {source, target, weight};
removedEdges.add(edge);
}
else {
graph.setEdgeWeight(e, 0.0);
}
}
}
if(saveEdges) {
for (Integer[] edge : removedEdges) {
graph.removeEdge(edge[0], edge[1]);
}
}
return removedEdges;
}
//Remove edges for pairs with large occupancy discrepancy
static List<Integer[]> filterByRelativeOccupancy(SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph,
Map<Integer, Integer> alphaWellCounts,
Map<Integer, Integer> betaWellCounts,
Map<Integer, Integer> plateVtoAMap,
Map<Integer, Integer> plateVtoBMap,
Integer maxOccupancyDifference, boolean saveEdges) {
List<Integer[]> removedEdges = new ArrayList<>();
for (DefaultWeightedEdge e : graph.edgeSet()) {
Integer alphaOcc = alphaWellCounts.get(plateVtoAMap.get(graph.getEdgeSource(e)));
Integer betaOcc = betaWellCounts.get(plateVtoBMap.get(graph.getEdgeTarget(e)));
if (Math.abs(alphaOcc - betaOcc) >= maxOccupancyDifference) {
if (saveEdges) {
Integer source = graph.getEdgeSource(e);
Integer target = graph.getEdgeTarget(e);
Integer weight = (int) graph.getEdgeWeight(e);
Integer[] edge = {source, target, weight};
removedEdges.add(edge);
}
else {
graph.setEdgeWeight(e, 0.0);
}
}
}
if(saveEdges) {
for (Integer[] edge : removedEdges) {
graph.removeEdge(edge[0], edge[1]);
}
}
return removedEdges;
}
//Remove edges for pairs where overlap size is significantly lower than the well occupancy
static List<Integer[]> filterByOverlapPercent(SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph,
Map<Integer, Integer> alphaWellCounts,
Map<Integer, Integer> betaWellCounts,
Map<Integer, Integer> plateVtoAMap,
Map<Integer, Integer> plateVtoBMap,
Integer minOverlapPercent,
boolean saveEdges) {
List<Integer[]> removedEdges = new ArrayList<>();
for (DefaultWeightedEdge e : graph.edgeSet()) {
Integer alphaOcc = alphaWellCounts.get(plateVtoAMap.get(graph.getEdgeSource(e)));
Integer betaOcc = betaWellCounts.get(plateVtoBMap.get(graph.getEdgeTarget(e)));
double weight = graph.getEdgeWeight(e);
double min = minOverlapPercent / 100.0;
if ((weight / alphaOcc < min) || (weight / betaOcc < min)) {
if(saveEdges) {
Integer source = graph.getEdgeSource(e);
Integer target = graph.getEdgeTarget(e);
Integer intWeight = (int) graph.getEdgeWeight(e);
Integer[] edge = {source, target, intWeight};
removedEdges.add(edge);
}
else {
graph.setEdgeWeight(e, 0.0);
}
}
}
if(saveEdges) {
for (Integer[] edge : removedEdges) {
graph.removeEdge(edge[0], edge[1]);
}
}
return removedEdges;
}
static void addRemovedEdges(SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph,
List<Integer[]> removedEdges) {
for (Integer[] edge : removedEdges) {
DefaultWeightedEdge e = graph.addEdge(edge[0], edge[1]);
graph.setEdgeWeight(e, (double) edge[2]);
}
}
}

View File

@@ -0,0 +1,106 @@
import org.jgrapht.graph.SimpleWeightedGraph;
import java.time.Duration;
import java.util.Map;
//Can't just write the graph, because I need the occupancy data too.
//Makes most sense to serialize object and write that to a file.
//Which means there's no reason to split map data and graph data up.
public class GraphWithMapData implements java.io.Serializable {
private String sourceFilename;
private final SimpleWeightedGraph graph;
private Integer numWells;
private Integer[] wellPopulations;
private Integer alphaCount;
private Integer betaCount;
private final Map<Integer, Integer> distCellsMapAlphaKey;
private final Map<Integer, Integer> plateVtoAMap;
private final Map<Integer, Integer> plateVtoBMap;
private final Map<Integer, Integer> plateAtoVMap;
private final Map<Integer, Integer> plateBtoVMap;
private final Map<Integer, Integer> alphaWellCounts;
private final Map<Integer, Integer> betaWellCounts;
private final Duration time;
public GraphWithMapData(SimpleWeightedGraph graph, Integer numWells, Integer[] wellConcentrations,
Integer alphaCount, Integer betaCount,
Map<Integer, Integer> distCellsMapAlphaKey, Map<Integer, Integer> plateVtoAMap,
Map<Integer,Integer> plateVtoBMap, Map<Integer, Integer> plateAtoVMap,
Map<Integer, Integer> plateBtoVMap, Map<Integer, Integer> alphaWellCounts,
Map<Integer, Integer> betaWellCounts, Duration time) {
this.graph = graph;
this.numWells = numWells;
this.wellPopulations = wellConcentrations;
this.alphaCount = alphaCount;
this.betaCount = betaCount;
this.distCellsMapAlphaKey = distCellsMapAlphaKey;
this.plateVtoAMap = plateVtoAMap;
this.plateVtoBMap = plateVtoBMap;
this.plateAtoVMap = plateAtoVMap;
this.plateBtoVMap = plateBtoVMap;
this.alphaWellCounts = alphaWellCounts;
this.betaWellCounts = betaWellCounts;
this.time = time;
}
public SimpleWeightedGraph getGraph() {
return graph;
}
public Integer getNumWells() {
return numWells;
}
public Integer[] getWellPopulations() {
return wellPopulations;
}
public Integer getAlphaCount() {
return alphaCount;
}
public Integer getBetaCount() {
return betaCount;
}
public Map<Integer, Integer> getDistCellsMapAlphaKey() {
return distCellsMapAlphaKey;
}
public Map<Integer, Integer> getPlateVtoAMap() {
return plateVtoAMap;
}
public Map<Integer, Integer> getPlateVtoBMap() {
return plateVtoBMap;
}
public Map<Integer, Integer> getPlateAtoVMap() {
return plateAtoVMap;
}
public Map<Integer, Integer> getPlateBtoVMap() {
return plateBtoVMap;
}
public Map<Integer, Integer> getAlphaWellCounts() {
return alphaWellCounts;
}
public Map<Integer, Integer> getBetaWellCounts() {
return betaWellCounts;
}
public Duration getTime() {
return time;
}
public void setSourceFilename(String filename) {
this.sourceFilename = filename;
}
public String getSourceFilename() {
return sourceFilename;
}
}

View File

@@ -0,0 +1,586 @@
import java.io.IOException;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
//
public class InteractiveInterface {
private static final Random rand = BiGpairSEQ.getRand();
private static final Scanner sc = new Scanner(System.in);
private static int input;
private static boolean quit = false;
public static void startInteractive() {
while (!quit) {
System.out.println();
System.out.println("--------BiGPairSEQ SIMULATOR--------");
System.out.println("ALPHA/BETA T CELL RECEPTOR MATCHING");
System.out.println(" USING WEIGHTED BIPARTITE GRAPHS ");
System.out.println("------------------------------------");
System.out.println("Please select an option:");
System.out.println("1) Generate a population of distinct cells");
System.out.println("2) Generate a sample plate of T cells");
System.out.println("3) Generate CDR3 alpha/beta occupancy data and overlap graph");
System.out.println("4) Simulate bipartite graph CDR3 alpha/beta matching (BiGpairSEQ)");
//Need to re-do the CDR3/CDR1 matching to correspond to new pattern
//System.out.println("5) Generate CDR3/CDR1 occupancy graph");
//System.out.println("6) Simulate CDR3/CDR1 T cell matching");
System.out.println("8) Options");
System.out.println("9) About/Acknowledgments");
System.out.println("0) Exit");
try {
input = sc.nextInt();
switch (input) {
case 1 -> makeCells();
case 2 -> makePlate();
case 3 -> makeCDR3Graph();
case 4 -> matchCDR3s();
//case 6 -> matchCellsCDR1();
case 8 -> mainOptions();
case 9 -> acknowledge();
case 0 -> quit = true;
default -> System.out.println("Invalid input.");
}
} catch (InputMismatchException | IOException ex) {
System.out.println(ex);
sc.next();
}
}
sc.close();
}
private static void makeCells() {
String filename = null;
Integer numCells = 0;
Integer cdr1Freq = 1;
try {
System.out.println("\nSimulated T-Cells consist of integer values representing:\n" +
"* a pair of alpha and beta CDR3 peptides (unique within simulated population)\n" +
"* a pair of alpha and beta CDR1 peptides (not necessarily unique).");
System.out.println("\nThe cells will be written to a CSV file.");
System.out.print("Please enter a file name: ");
filename = sc.next();
System.out.println("\nCDR3 sequences are more diverse than CDR1 sequences.");
System.out.println("Please enter the factor by which distinct CDR3s outnumber CDR1s: ");
cdr1Freq = sc.nextInt();
System.out.print("\nPlease enter the number of T-cells to generate: ");
numCells = sc.nextInt();
if(numCells <= 0){
throw new InputMismatchException("Number of cells must be a positive integer.");
}
} catch (InputMismatchException ex) {
System.out.println(ex);
sc.next();
}
CellSample sample = new CellSample(numCells, cdr1Freq);
assert filename != null;
System.out.println("Writing cells to file");
CellFileWriter writer = new CellFileWriter(filename, sample);
writer.writeCellsToFile();
System.out.println("Cell sample written to: " + filename);
if(BiGpairSEQ.cacheCells()) {
BiGpairSEQ.setCellSampleInMemory(sample, filename);
}
}
//Output a CSV of sample plate
private static void makePlate() {
String cellFile = null;
String filename = null;
Double stdDev = 0.0;
Integer numWells = 0;
Integer numSections;
Integer[] populations = {1};
Double dropOutRate = 0.0;
boolean poisson = false;
boolean exponential = false;
double lambda = 1.5;
try {
System.out.println("\nSimulated sample plates consist of:");
System.out.println("* a number of wells");
System.out.println(" * separated into one or more sections");
System.out.println(" * each of which has a set quantity of cells per well");
System.out.println(" * selected from a statistical distribution of distinct cells");
System.out.println(" * with a set dropout rate for individual sequences within a cell");
System.out.println("\nMaking a sample plate requires a population of distinct cells");
System.out.print("Please enter name of an existing cell sample file: ");
cellFile = sc.next();
System.out.println("\nThe sample plate will be written to a CSV file");
System.out.print("Please enter a name for the output file: ");
filename = sc.next();
System.out.println("\nSelect T-cell frequency distribution function");
System.out.println("1) Poisson");
System.out.println("2) Gaussian");
System.out.println("3) Exponential");
System.out.println("(Note: approximate distribution in original paper is exponential, lambda = 0.6)");
System.out.println("(lambda value approximated from slope of log-log graph in figure 4c)");
System.out.println("(Note: wider distributions are more memory intensive to match)");
System.out.print("Enter selection value: ");
input = sc.nextInt();
switch (input) {
case 1 -> poisson = true;
case 2 -> {
System.out.println("How many distinct T-cells within one standard deviation of peak frequency?");
System.out.println("(Note: wider distributions are more memory intensive to match)");
stdDev = sc.nextDouble();
if (stdDev <= 0.0) {
throw new InputMismatchException("Value must be positive.");
}
}
case 3 -> {
exponential = true;
System.out.print("Please enter lambda value for exponential distribution: ");
lambda = sc.nextDouble();
if (lambda <= 0.0) {
lambda = 0.6;
System.out.println("Value must be positive. Defaulting to 0.6.");
}
}
default -> {
System.out.println("Invalid input. Defaulting to exponential.");
exponential = true;
}
}
System.out.print("\nNumber of wells on plate: ");
numWells = sc.nextInt();
if(numWells < 1){
throw new InputMismatchException("No wells on plate");
}
//choose whether to make T cell population/well random
boolean randomWellPopulations;
System.out.println("Randomize number of T cells in each well? (y/n)");
String ans = sc.next();
Pattern pattern = Pattern.compile("(?:yes|y)", Pattern.CASE_INSENSITIVE);
Matcher matcher = pattern.matcher(ans);
if(matcher.matches()){
randomWellPopulations = true;
}
else{
randomWellPopulations = false;
}
if(randomWellPopulations) { //if T cell population/well is random
numSections = numWells;
Integer minPop;
Integer maxPop;
System.out.print("Please enter minimum number of T cells in a well: ");
minPop = sc.nextInt();
if(minPop < 1) {
throw new InputMismatchException("Minimum well population must be positive");
}
System.out.println("Please enter maximum number of T cells in a well: ");
maxPop = sc.nextInt();
if(maxPop < minPop) {
throw new InputMismatchException("Max well population must be greater than min well population");
}
//maximum should be inclusive, so need to add one to max of randomly generated values
populations = rand.ints(minPop, maxPop + 1)
.limit(numSections)
.boxed()
.toArray(Integer[]::new);
System.out.print("Populations: ");
System.out.println(Arrays.toString(populations));
}
else{ //if T cell population/well is not random
System.out.println("\nThe plate can be evenly sectioned to allow different numbers of T cells per well.");
System.out.println("How many sections would you like to make (minimum 1)?");
numSections = sc.nextInt();
if (numSections < 1) {
throw new InputMismatchException("Too few sections.");
} else if (numSections > numWells) {
throw new InputMismatchException("Cannot have more sections than wells.");
}
int i = 1;
populations = new Integer[numSections];
while (numSections > 0) {
System.out.print("Enter number of T cells per well in section " + i + ": ");
populations[i - 1] = sc.nextInt();
i++;
numSections--;
}
}
System.out.println("\nErrors in amplification can induce a well dropout rate for sequences");
System.out.print("Enter well dropout rate (0.0 to 1.0): ");
dropOutRate = sc.nextDouble();
if(dropOutRate < 0.0 || dropOutRate > 1.0) {
throw new InputMismatchException("The well dropout rate must be in the range [0.0, 1.0]");
}
}catch(InputMismatchException ex){
System.out.println(ex);
sc.next();
}
assert cellFile != null;
CellSample cells;
if (cellFile.equals(BiGpairSEQ.getCellFilename())){
cells = BiGpairSEQ.getCellSampleInMemory();
}
else {
System.out.println("Reading Cell Sample file: " + cellFile);
CellFileReader cellReader = new CellFileReader(cellFile);
cells = cellReader.getCellSample();
if(BiGpairSEQ.cacheCells()) {
BiGpairSEQ.setCellSampleInMemory(cells, cellFile);
}
}
assert filename != null;
Plate samplePlate;
PlateFileWriter writer;
if(exponential){
samplePlate = new Plate(cells, cellFile, numWells, populations, dropOutRate, lambda, true);
writer = new PlateFileWriter(filename, samplePlate);
}
else {
if (poisson) {
stdDev = Math.sqrt(cells.getCellCount()); //gaussian with square root of elements approximates poisson
}
samplePlate = new Plate(cells, cellFile, numWells, populations, dropOutRate, stdDev, false);
writer = new PlateFileWriter(filename, samplePlate);
}
System.out.println("Writing Sample Plate to file");
writer.writePlateFile();
System.out.println("Sample Plate written to file: " + filename);
if(BiGpairSEQ.cachePlate()) {
BiGpairSEQ.setPlateInMemory(samplePlate, filename);
}
}
//Output serialized binary of GraphAndMapData object
private static void makeCDR3Graph() {
String filename = null;
String cellFile = null;
String plateFile = null;
try {
String str = "\nGenerating bipartite weighted graph encoding occupancy overlap data ";
str = str.concat("\nrequires a cell sample file and a sample plate file.");
System.out.println(str);
System.out.print("\nPlease enter name of an existing cell sample file: ");
cellFile = sc.next();
System.out.print("\nPlease enter name of an existing sample plate file: ");
plateFile = sc.next();
System.out.println("\nThe graph and occupancy data will be written to a serialized binary file.");
System.out.print("Please enter a name for the output file: ");
filename = sc.next();
} catch (InputMismatchException ex) {
System.out.println(ex);
sc.next();
}
assert cellFile != null;
CellSample cellSample;
//check if cells are already in memory
if(cellFile.equals(BiGpairSEQ.getCellFilename()) && BiGpairSEQ.getCellSampleInMemory() != null) {
cellSample = BiGpairSEQ.getCellSampleInMemory();
}
else {
System.out.println("Reading Cell Sample file: " + cellFile);
CellFileReader cellReader = new CellFileReader(cellFile);
cellSample = cellReader.getCellSample();
if(BiGpairSEQ.cacheCells()) {
BiGpairSEQ.setCellSampleInMemory(cellSample, cellFile);
}
}
assert plateFile != null;
Plate plate;
//check if plate is already in memory
if(plateFile.equals(BiGpairSEQ.getPlateFilename())){
plate = BiGpairSEQ.getPlateInMemory();
}
else {
System.out.println("Reading Sample Plate file: " + plateFile);
PlateFileReader plateReader = new PlateFileReader(plateFile);
plate = plateReader.getSamplePlate();
if(BiGpairSEQ.cachePlate()) {
BiGpairSEQ.setPlateInMemory(plate, plateFile);
}
}
if (cellSample.getCells().size() == 0){
System.out.println("No cell sample found.");
System.out.println("Returning to main menu.");
}
else if(plate.getWells().size() == 0 || plate.getPopulations().length == 0){
System.out.println("No sample plate found.");
System.out.println("Returning to main menu.");
}
else{
GraphWithMapData data = Simulator.makeGraph(cellSample, plate, true);
assert filename != null;
if(BiGpairSEQ.outputBinary()) {
GraphDataObjectWriter dataWriter = new GraphDataObjectWriter(filename, data);
dataWriter.writeDataToFile();
System.out.println("Serialized binary graph/data file written to: " + filename);
}
if(BiGpairSEQ.outputGraphML()) {
GraphMLFileWriter graphMLWriter = new GraphMLFileWriter(filename, data);
graphMLWriter.writeGraphToFile();
System.out.println("GraphML file written to: " + filename);
}
if(BiGpairSEQ.cacheGraph()) {
BiGpairSEQ.setGraphInMemory(data, filename);
}
}
}
//Simulate matching and output CSV file of results
private static void matchCDR3s() throws IOException {
String filename = null;
String graphFilename = null;
Integer lowThreshold = 0;
Integer highThreshold = Integer.MAX_VALUE;
Integer maxOccupancyDiff = Integer.MAX_VALUE;
Integer minOverlapPercent = 0;
try {
System.out.println("\nBiGpairSEQ simulation requires an occupancy data and overlap graph file");
System.out.println("Please enter name of an existing graph and occupancy data file: ");
graphFilename = sc.next();
System.out.println("The matching results will be written to a file.");
System.out.print("Please enter a name for the output file: ");
filename = sc.next();
System.out.println("\nWhat is the minimum number of CDR3 alpha/beta overlap wells to attempt matching?");
lowThreshold = sc.nextInt();
if(lowThreshold < 1){
lowThreshold = 1;
System.out.println("Value for low occupancy overlap threshold must be positive");
System.out.println("Value for low occupancy overlap threshold set to 1");
}
System.out.println("\nWhat is the maximum number of CDR3 alpha/beta overlap wells to attempt matching?");
highThreshold = sc.nextInt();
if(highThreshold < lowThreshold) {
highThreshold = lowThreshold;
System.out.println("Value for high occupancy overlap threshold must be >= low overlap threshold");
System.out.println("Value for high occupancy overlap threshold set to " + lowThreshold);
}
System.out.println("What is the minimum percentage of a sequence's wells in alpha/beta overlap to attempt matching? (0 - 100)");
minOverlapPercent = sc.nextInt();
if (minOverlapPercent < 0 || minOverlapPercent > 100) {
System.out.println("Value outside range. Minimum occupancy overlap percentage set to 0");
}
System.out.println("\nWhat is the maximum difference in alpha/beta occupancy to attempt matching?");
maxOccupancyDiff = sc.nextInt();
if (maxOccupancyDiff < 0) {
maxOccupancyDiff = 0;
System.out.println("Maximum allowable difference in alpha/beta occupancy must be nonnegative");
System.out.println("Maximum allowable difference in alpha/beta occupancy set to 0");
}
} catch (InputMismatchException ex) {
System.out.println(ex);
sc.next();
}
assert graphFilename != null;
//check if this is the same graph we already have in memory.
GraphWithMapData data;
if(graphFilename.equals(BiGpairSEQ.getGraphFilename())) {
data = BiGpairSEQ.getGraphInMemory();
}
else {
GraphDataObjectReader dataReader = new GraphDataObjectReader(graphFilename, true);
data = dataReader.getData();
if(BiGpairSEQ.cacheGraph()) {
BiGpairSEQ.setGraphInMemory(data, graphFilename);
}
}
//simulate matching
MatchingResult results = Simulator.matchCDR3s(data, graphFilename, lowThreshold, highThreshold, maxOccupancyDiff,
minOverlapPercent, true);
//write results to file
assert filename != null;
MatchingFileWriter writer = new MatchingFileWriter(filename, results);
System.out.println("Writing results to file");
writer.writeResultsToFile();
System.out.println("Results written to file: " + filename);
}
///////
//Rewrite this to fit new matchCDR3 method with file I/O
///////
// public static void matchCellsCDR1(){
// /*
// The idea here is that we'll get the CDR3 alpha/beta matches first. Then we'll try to match CDR3s to CDR1s by
// looking at the top two matches for each CDR3. If CDR3s in the same cell simply swap CDR1s, we assume a correct
// match
// */
// String filename = null;
// String preliminaryResultsFilename = null;
// String cellFile = null;
// String plateFile = null;
// Integer lowThresholdCDR3 = 0;
// Integer highThresholdCDR3 = Integer.MAX_VALUE;
// Integer maxOccupancyDiffCDR3 = 96; //no filtering if max difference is all wells by default
// Integer minOverlapPercentCDR3 = 0; //no filtering if min percentage is zero by default
// Integer lowThresholdCDR1 = 0;
// Integer highThresholdCDR1 = Integer.MAX_VALUE;
// boolean outputCDR3Matches = false;
// try {
// System.out.println("\nSimulated experiment requires a cell sample file and a sample plate file.");
// System.out.print("Please enter name of an existing cell sample file: ");
// cellFile = sc.next();
// System.out.print("Please enter name of an existing sample plate file: ");
// plateFile = sc.next();
// System.out.println("The matching results will be written to a file.");
// System.out.print("Please enter a name for the output file: ");
// filename = sc.next();
// System.out.println("What is the minimum number of CDR3 alpha/beta overlap wells to attempt matching?");
// lowThresholdCDR3 = sc.nextInt();
// if(lowThresholdCDR3 < 1){
// throw new InputMismatchException("Minimum value for low threshold is 1");
// }
// System.out.println("What is the maximum number of CDR3 alpha/beta overlap wells to attempt matching?");
// highThresholdCDR3 = sc.nextInt();
// System.out.println("What is the maximum difference in CDR3 alpha/beta occupancy to attempt matching?");
// maxOccupancyDiffCDR3 = sc.nextInt();
// System.out.println("What is the minimum CDR3 overlap percentage to attempt matching? (0 - 100)");
// minOverlapPercentCDR3 = sc.nextInt();
// if (minOverlapPercentCDR3 < 0 || minOverlapPercentCDR3 > 100) {
// throw new InputMismatchException("Value outside range. Minimum percent set to 0");
// }
// System.out.println("What is the minimum number of CDR3/CDR1 overlap wells to attempt matching?");
// lowThresholdCDR1 = sc.nextInt();
// if(lowThresholdCDR1 < 1){
// throw new InputMismatchException("Minimum value for low threshold is 1");
// }
// System.out.println("What is the maximum number of CDR3/CDR1 overlap wells to attempt matching?");
// highThresholdCDR1 = sc.nextInt();
// System.out.println("Matching CDR3s to CDR1s requires first matching CDR3 alpha/betas.");
// System.out.println("Output a file for CDR3 alpha/beta match results as well?");
// System.out.print("Please enter y/n: ");
// String ans = sc.next();
// Pattern pattern = Pattern.compile("(?:yes|y)", Pattern.CASE_INSENSITIVE);
// Matcher matcher = pattern.matcher(ans);
// if(matcher.matches()){
// outputCDR3Matches = true;
// System.out.println("Please enter filename for CDR3 alpha/beta match results");
// preliminaryResultsFilename = sc.next();
// System.out.println("CDR3 alpha/beta matches will be output to file");
// }
// else{
// System.out.println("CDR3 alpha/beta matches will not be output to file");
// }
// } catch (InputMismatchException ex) {
// System.out.println(ex);
// sc.next();
// }
// CellFileReader cellReader = new CellFileReader(cellFile);
// PlateFileReader plateReader = new PlateFileReader(plateFile);
// Plate plate = new Plate(plateReader.getFilename(), plateReader.getWells());
// if (cellReader.getCells().size() == 0){
// System.out.println("No cell sample found.");
// System.out.println("Returning to main menu.");
// }
// else if(plate.getWells().size() == 0){
// System.out.println("No sample plate found.");
// System.out.println("Returning to main menu.");
//
// }
// else{
// if(highThresholdCDR3 >= plate.getSize()){
// highThresholdCDR3 = plate.getSize() - 1;
// }
// if(highThresholdCDR1 >= plate.getSize()){
// highThresholdCDR1 = plate.getSize() - 1;
// }
// List<Integer[]> cells = cellReader.getCells();
// MatchingResult preliminaryResults = Simulator.matchCDR3s(cells, plate, lowThresholdCDR3, highThresholdCDR3,
// maxOccupancyDiffCDR3, minOverlapPercentCDR3, true);
// MatchingResult[] results = Simulator.matchCDR1s(cells, plate, lowThresholdCDR1,
// highThresholdCDR1, preliminaryResults);
// MatchingFileWriter writer = new MatchingFileWriter(filename + "_FirstPass", results[0]);
// writer.writeResultsToFile();
// writer = new MatchingFileWriter(filename + "_SecondPass", results[1]);
// writer.writeResultsToFile();
// if(outputCDR3Matches){
// writer = new MatchingFileWriter(preliminaryResultsFilename, preliminaryResults);
// writer.writeResultsToFile();
// }
// }
// }
private static void mainOptions(){
boolean backToMain = false;
while(!backToMain) {
System.out.println("\n--------------OPTIONS---------------");
System.out.println("1) Turn " + getOnOff(!BiGpairSEQ.cacheCells()) + " cell sample file caching");
System.out.println("2) Turn " + getOnOff(!BiGpairSEQ.cachePlate()) + " plate file caching");
System.out.println("3) Turn " + getOnOff(!BiGpairSEQ.cacheGraph()) + " graph/data file caching");
System.out.println("4) Turn " + getOnOff(!BiGpairSEQ.outputBinary()) + " serialized binary graph output");
System.out.println("5) Turn " + getOnOff(!BiGpairSEQ.outputGraphML()) + " GraphML graph output");
System.out.println("6) Maximum weight matching algorithm options");
System.out.println("0) Return to main menu");
try {
input = sc.nextInt();
switch (input) {
case 1 -> BiGpairSEQ.setCacheCells(!BiGpairSEQ.cacheCells());
case 2 -> BiGpairSEQ.setCachePlate(!BiGpairSEQ.cachePlate());
case 3 -> BiGpairSEQ.setCacheGraph(!BiGpairSEQ.cacheGraph());
case 4 -> BiGpairSEQ.setOutputBinary(!BiGpairSEQ.outputBinary());
case 5 -> BiGpairSEQ.setOutputGraphML(!BiGpairSEQ.outputGraphML());
case 6 -> algorithmOptions();
case 0 -> backToMain = true;
default -> System.out.println("Invalid input");
}
} catch (InputMismatchException ex) {
System.out.println(ex);
sc.next();
}
}
}
/**
* Helper function for printing menu items in mainOptions(). Returns a string based on the value of parameter.
*
* @param b - a boolean value
* @return String "on" if b is true, "off" if b is false
*/
private static String getOnOff(boolean b) {
if (b) { return "on";}
else { return "off"; }
}
private static void algorithmOptions(){
boolean backToOptions = false;
while(!backToOptions) {
System.out.println("\n---------ALGORITHM OPTIONS----------");
System.out.println("1) Use scaling algorithm by Duan and Su.");
System.out.println("2) Use LEDA book algorithm with Fibonacci heap priority queue");
System.out.println("3) Use LEDA book algorithm with pairing heap priority queue");
System.out.println("0) Return to Options menu");
try {
input = sc.nextInt();
switch (input) {
case 1 -> System.out.println("This option is not yet implemented. Choose another.");
case 2 -> {
BiGpairSEQ.setFibonacciHeap();
System.out.println("MWM algorithm set to LEDA with Fibonacci heap");
backToOptions = true;
}
case 3 -> {
BiGpairSEQ.setPairingHeap();
System.out.println("MWM algorithm set to LEDA with pairing heap");
backToOptions = true;
}
case 0 -> backToOptions = true;
default -> System.out.println("Invalid input");
}
} catch (InputMismatchException ex) {
System.out.println(ex);
sc.next();
}
}
}
private static void acknowledge(){
System.out.println("This program simulates BiGpairSEQ, a graph theory based adaptation");
System.out.println("of the pairSEQ algorithm for pairing T cell receptor sequences.");
System.out.println();
System.out.println("For full documentation, view readme.md file distributed with this code");
System.out.println("or visit https://gitea.ejsf.synology.me/efischer/BiGpairSEQ.");
System.out.println();
System.out.println("pairSEQ citation:");
System.out.println("Howie, B., Sherwood, A. M., et. al.");
System.out.println("High-throughput pairing of T cell receptor alpha and beta sequences.");
System.out.println("Sci. Transl. Med. 7, 301ra131 (2015)");
System.out.println();
System.out.println("BiGpairSEQ_Sim by Eugene Fischer, 2021-2022");
}
}

View File

@@ -0,0 +1,3 @@
Manifest-Version: 1.0
Main-Class: BiGpairSEQ

View File

@@ -8,24 +8,30 @@ import java.nio.file.Path;
import java.nio.file.StandardOpenOption; import java.nio.file.StandardOpenOption;
import java.util.List; import java.util.List;
public class MatchingFileWriter { public class MatchingFileWriter {
private String filename; private String filename;
private List<String> comments; private List<String> comments;
private List<String> headers; private List<String> headers;
private List<List<String>> results; private List<List<String>> allResults;
public MatchingFileWriter(String filename, List<String> comments, List<String> headers, List<List<String>> results){ public MatchingFileWriter(String filename, MatchingResult result){
if(!filename.matches(".*\\.csv")){ if(!filename.matches(".*\\.csv")){
filename = filename + ".csv"; filename = filename + ".csv";
} }
this.filename = filename; this.filename = filename;
this.comments = comments; this.comments = result.getComments();
this.headers = headers; this.headers = result.getHeaders();
this.results = results; this.allResults = result.getAllResults();
} }
public void writeErrorRateToTerminal(){
for(String s: comments){
if(s.matches("(Pairing error rate: )(\\d*.\\d+)")){
System.out.println(s);
}
}
}
public void writeResultsToFile(){ public void writeResultsToFile(){
String[] headerStrings = new String[headers.size()]; String[] headerStrings = new String[headers.size()];
for(int i = 0; i < headers.size(); i++){ for(int i = 0; i < headers.size(); i++){
@@ -41,8 +47,8 @@ public class MatchingFileWriter {
for(String comment: comments){ for(String comment: comments){
printer.printComment(comment); printer.printComment(comment);
} }
results.add(0, headers); allResults.add(0, headers);
printer.printRecords(results); printer.printRecords(allResults);
} catch(IOException ex){ } catch(IOException ex){
System.out.println("Could not make new file named "+filename); System.out.println("Could not make new file named "+filename);

View File

@@ -1,16 +1,41 @@
import java.time.Duration; import java.time.Duration;
import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
public class MatchingResult { public class MatchingResult {
private List<String> comments;
private List<String> headers;
private List<List<String>> allResults;
private Map<Integer, Integer> matchMap;
private Duration time;
public MatchingResult(List<String> comments, List<String> headers, List<List<String>> allResults, Map<Integer, Integer>matchMap, Duration time){ private final Map<String, String> metadata;
this.comments = comments; private final List<String> comments;
private final List<String> headers;
private final List<List<String>> allResults;
private final Map<Integer, Integer> matchMap;
private final Duration time;
public MatchingResult(Map<String, String> metadata, List<String> headers,
List<List<String>> allResults, Map<Integer, Integer>matchMap, Duration time){
/*
* POSSIBLE KEYS FOR METADATA MAP ARE:
* sample plate filename *
* graph filename *
* well populations *
* total alphas found *
* total betas found *
* high overlap threshold *
* low overlap threshold *
* maximum occupancy difference *
* minimum overlap percent *
* pairing attempt rate *
* correct pairing count *
* incorrect pairing count *
* pairing error rate *
* simulation time (seconds)
*/
this.metadata = metadata;
this.comments = new ArrayList<>();
for (String key : metadata.keySet()) {
comments.add(key +": " + metadata.get(key));
}
this.headers = headers; this.headers = headers;
this.allResults = allResults; this.allResults = allResults;
this.matchMap = matchMap; this.matchMap = matchMap;
@@ -18,6 +43,8 @@ public class MatchingResult {
} }
public Map<String, String> getMetadata() {return metadata;}
public List<String> getComments() { public List<String> getComments() {
return comments; return comments;
} }
@@ -37,4 +64,49 @@ public class MatchingResult {
public Duration getTime() { public Duration getTime() {
return time; return time;
} }
public String getPlateFilename() {
return metadata.get("sample plate filename");
}
public String getGraphFilename() {
return metadata.get("graph filename");
}
public Integer[] getWellPopulations() {
List<Integer> wellPopulations = new ArrayList<>();
String popString = metadata.get("well populations");
for (String p : popString.split(", ")) {
wellPopulations.add(Integer.parseInt(p));
}
Integer[] popArray = new Integer[wellPopulations.size()];
return wellPopulations.toArray(popArray);
}
public Integer getAlphaCount() {
return Integer.parseInt(metadata.get("total alpha count"));
}
public Integer getBetaCount() {
return Integer.parseInt(metadata.get("total beta count"));
}
public Integer getHighOverlapThreshold() { return Integer.parseInt(metadata.get("high overlap threshold"));}
public Integer getLowOverlapThreshold() { return Integer.parseInt(metadata.get("low overlap threshold"));}
public Integer getMaxOccupancyDifference() { return Integer.parseInt(metadata.get("maximum occupancy difference"));}
public Integer getMinOverlapPercent() { return Integer.parseInt(metadata.get("minimum overlap percent"));}
public Double getPairingAttemptRate() { return Double.parseDouble(metadata.get("pairing attempt rate"));}
public Integer getCorrectPairingCount() { return Integer.parseInt(metadata.get("correct pairing count"));}
public Integer getIncorrectPairingCount() { return Integer.parseInt(metadata.get("incorrect pairing count"));}
public Double getPairingErrorRate() { return Double.parseDouble(metadata.get("pairing error rate"));}
public String getSimulationTime() { return metadata.get("simulation time (seconds)"); }
} }

View File

@@ -1,46 +1,89 @@
/*
TODO: Implement exponential distribution using inversion method - DONE
TODO: Implement discrete frequency distributions using Vose's Alias Method
*/
import java.util.*; import java.util.*;
//Need to write function to output plate to a file that I can read in.
public class Plate { public class Plate {
private CellSample cells;
private String sourceFile;
private String filename;
private List<List<Integer[]>> wells; private List<List<Integer[]>> wells;
private Random rand = new Random(); private final Random rand = BiGpairSEQ.getRand();
private int size; private int size;
private double error; private double error;
private Integer[] concentrations; private Integer[] populations;
private double stdDev; private double stdDev;
private double lambda;
boolean exponential = false;
public Plate (int size, double error, Integer[] concentrations, double stdDev) { public Plate(CellSample cells, String cellFilename, int numWells, Integer[] populations,
double dropoutRate, double stdDev_or_lambda, boolean exponential){
this.cells = cells;
this.sourceFile = cellFilename;
this.size = numWells;
this.wells = new ArrayList<>();
this.error = dropoutRate;
this.populations = populations;
this.exponential = exponential;
if (this.exponential) {
this.lambda = stdDev_or_lambda;
fillWellsExponential(cells.getCells(), this.lambda);
}
else {
this.stdDev = stdDev_or_lambda;
fillWells(cells.getCells(), this.stdDev);
}
}
public Plate(int size, double error, Integer[] populations) {
this.size = size; this.size = size;
this.error = error; this.error = error;
this.concentrations = concentrations; this.populations = populations;
this.stdDev = stdDev;
wells = new ArrayList<>(); wells = new ArrayList<>();
} }
public Plate(List<List<Integer[]>> wells){ //constructor for returning a Plate from a PlateFileReader
public Plate(String filename, List<List<Integer[]>> wells) {
this.filename = filename;
this.wells = wells; this.wells = wells;
this.size = wells.size(); this.size = wells.size();
List<Integer> concentrations = new ArrayList<>();
for (List<Integer[]> w: wells) {
if(!concentrations.contains(w.size())){
concentrations.add(w.size());
}
}
this.populations = new Integer[concentrations.size()];
for (int i = 0; i < this.populations.length; i++) {
this.populations[i] = concentrations.get(i);
}
} }
public void fillWells(List<Integer[]> cells) { private void fillWellsExponential(List<Integer[]> cells, double lambda){
int numSections = concentrations.length; this.lambda = lambda;
exponential = true;
int numSections = populations.length;
int section = 0; int section = 0;
double m; double m;
int n; int n;
//testing
//System.out.println("Cell size: " + cells.get(0).length);
while (section < numSections){ while (section < numSections){
for (int i = 0; i < (size / numSections); i++) { for (int i = 0; i < (size / numSections); i++) {
List<Integer[]> well = new ArrayList<>(); List<Integer[]> well = new ArrayList<>();
for (int j = 0; j < concentrations[section]; j++) { for (int j = 0; j < populations[section]; j++) {
do { do {
m = (rand.nextGaussian() * stdDev) + (cells.size() / 2); //inverse transform sampling: for random number u in [0,1), x = log(1-u) / (-lambda)
m = (Math.log10((1 - rand.nextDouble()))/(-lambda)) * Math.sqrt(cells.size());
} while (m >= cells.size() || m < 0); } while (m >= cells.size() || m < 0);
n = (int) Math.floor(m); n = (int) Math.floor(m);
Integer[] cellToAdd = cells.get(n).clone(); Integer[] cellToAdd = cells.get(n).clone();
for(int k = 0; k < cellToAdd.length; k++){ for(int k = 0; k < cellToAdd.length; k++){
if(Math.abs(rand.nextDouble()) < error){//error applied to each peptide if(Math.abs(rand.nextDouble()) < error){//error applied to each seqeunce
cellToAdd[k] = -1; cellToAdd[k] = -1;
} }
} }
@@ -52,13 +95,36 @@ public class Plate {
} }
} }
public void writePlateToFile(String filename) { private void fillWells( List<Integer[]> cells, double stdDev) {
this.stdDev = stdDev;
int numSections = populations.length;
int section = 0;
double m;
int n;
while (section < numSections){
for (int i = 0; i < (size / numSections); i++) {
List<Integer[]> well = new ArrayList<>();
for (int j = 0; j < populations[section]; j++) {
do {
m = (rand.nextGaussian() * stdDev) + (cells.size() / 2);
} while (m >= cells.size() || m < 0);
n = (int) Math.floor(m);
Integer[] cellToAdd = cells.get(n).clone();
for(int k = 0; k < cellToAdd.length; k++){
if(Math.abs(rand.nextDouble()) < error){//error applied to each sequence
cellToAdd[k] = -1;
}
}
well.add(cellToAdd);
}
wells.add(well);
}
section++;
}
} }
public Integer[] getConcentrations(){ public Integer[] getPopulations(){
return concentrations; return populations;
} }
public int getSize(){ public int getSize(){
@@ -69,6 +135,10 @@ public class Plate {
return stdDev; return stdDev;
} }
public boolean isExponential(){return exponential;}
public double getLambda(){return lambda;}
public double getError() { public double getError() {
return error; return error;
} }
@@ -77,142 +147,38 @@ public class Plate {
return wells; return wells;
} }
//returns a map of the counts of the sequence at cell index sIndex, in all wells
//returns a map of counts of all the CDR3s (alphas and betas) in all wells public Map<Integer, Integer> assayWellsSequenceS(int... sIndices){
public Map<Integer, Integer>assayWellsCDR3(){ return this.assayWellsSequenceS(0, size, sIndices);
return this.assayWellsCDR3(0, size);
}
//returns a map of counts of all the CDR3 alphas in all wells
public Map<Integer, Integer> assayWellsCDR3Alpha() {
return this.assayWellsCDR3Alpha(0, size);
}
//returns a map of counts of all the CDR3 betas in all wells
public Map<Integer, Integer> assayWellsCDR3Beta() {
return this.assayWellsCDR3Beta(0, size);
}
//returns a map of counts of all CDR1s (alphas and betas) in all wells
public Map<Integer, Integer> assayWellsCDR1(){
return this.assayWellsCDR1(0, size);
}
//returns a map of counts of all the CDR1 alphas in all wells
public Map<Integer, Integer> assayWellsCDR1Alpha() {
return this.assayWellsCDR1Alpha(0, size);
}
//returns a map of counts of all the CDR1 betas in all wells
public Map<Integer, Integer> assayWellsCDR1Beta() {
return this.assayWellsCDR1Beta(0, size);
} }
//returns a map of counts of the CDR3s (alphas and betas) in a specific well //returns a map of the counts of the sequence at cell index sIndex, in a specific well
public Map<Integer, Integer>assayWellsCDR3(int n){ public Map<Integer, Integer> assayWellsSequenceS(int n, int... sIndices) { return this.assayWellsSequenceS(n, n+1, sIndices);}
return this.assayWellsCDR3(n, n+1);
}
//returns a map of counts of the CDR1s (alphas and betas) in a specific well
public Map<Integer, Integer> assayWellsCDR1(int n){
return this.assayWellsCDR1(n, n+1);
}
//returns a map of counts of the CDR3 alphas in a specific well
public Map<Integer, Integer> assayWellsCDR3Alpha(int n) {
return this.assayWellsCDR3Alpha(n, n+1);
}
//returns a map of counts of the CDR3 betas in a specific well
public Map<Integer, Integer> assayWellsCDR3Beta(int n) {
return this.assayWellsCDR3Beta(n, n+1);
}
//returns a map of counts of the CDR1 alphas in a specific well
public Map<Integer, Integer> assayWellsCDR1Alpha(int n) {
return this.assayWellsCDR1Alpha(n, n+1);
}
//returns a map of counts of the CDR1 betas in a specific well
public Map<Integer, Integer> assayWellsCDR1Beta(int n) {
return this.assayWellsCDR1Beta(n, n+1);
}
//returns a map of the counts of the sequence at cell index sIndex, in a range of wells
//returns a map of the counts of the CDR3s (alphas and betas) in a range of wells public Map<Integer, Integer> assayWellsSequenceS(int start, int end, int... sIndices) {
public Map<Integer, Integer>assayWellsCDR3(int start, int end){
Map<Integer,Integer> assay = new HashMap<>(); Map<Integer,Integer> assay = new HashMap<>();
for(int i = start; i < end; i++){ for(int pIndex: sIndices){
countCDR3Alphas(assay, wells.get(i)); for(int i = start; i < end; i++){
countCDR3Betas(assay,wells.get(i)); countSequences(assay, wells.get(i), pIndex);
}
} }
return assay; return assay;
} }
//returns a map of the counts of the CDR1s (alphas and betas) in a range of wells //For the sequences at cell indices sIndices, counts number of unique sequences in the given well into the given map
public Map<Integer, Integer>assayWellsCDR1(int start, int end){ private void countSequences(Map<Integer, Integer> wellMap, List<Integer[]> well, int... sIndices) {
Map<Integer,Integer> assay = new HashMap<>();
for(int i = start; i < end; i++){
countCDR1Alphas(assay, wells.get(i));
countCDR1Betas(assay,wells.get(i));
}
return assay;
}
//returns a map of the counts of the CDR3 alphas in a range of wells
public Map<Integer, Integer> assayWellsCDR3Alpha(int start, int end) {
Map<Integer, Integer> assay = new HashMap<>();
for(int i = start; i < end; i++){
countCDR3Alphas(assay, wells.get(i));
}
return assay;
}
//returns a map of the counts of the CDR3 betas in a range of wells
public Map<Integer, Integer> assayWellsCDR3Beta(int start, int end) {
Map<Integer, Integer> assay = new HashMap<>();
for(int i = start; i < end; i++){
countCDR3Betas(assay, wells.get(i));
}
return assay;
}
//returns a map of the counts of the CDR1 alphas in a range of wells
public Map<Integer, Integer> assayWellsCDR1Alpha(int start, int end) {
Map<Integer, Integer> assay = new HashMap<>();
for(int i = start; i < end; i++){
countCDR1Alphas(assay, wells.get(i));
}
return assay;
}
//returns a map of the counts of the CDR1 betas in a range of wells
public Map<Integer, Integer> assayWellsCDR1Beta(int start, int end) {
Map<Integer, Integer> assay = new HashMap<>();
for(int i = start; i < end; i++){
countCDR1Betas(assay, wells.get(i));
}
return assay;
}
//given a map, counts distinct CDR3 alphas in a well
private void countCDR3Alphas(Map<Integer, Integer> wellMap, List<Integer[]> well){
for(Integer[] cell : well) { for(Integer[] cell : well) {
if(cell[0] != -1){ for(int sIndex: sIndices){
//keys are alphas, value is how many of them have been assayed if(cell[sIndex] != -1){
wellMap.merge(cell[0], 1, (oldValue, newValue) -> oldValue + newValue); wellMap.merge(cell[sIndex], 1, (oldValue, newValue) -> oldValue + newValue);
} }
}
}
//given a map, counts distinct CDR3 betas in a well
private void countCDR3Betas(Map<Integer, Integer> wellMap, List<Integer[]> well){
for(Integer[] cell : well) {
if(cell[1] != -1){
wellMap.merge(cell[1], 1, (oldValue, newValue) -> oldValue + newValue);
}
}
}
//given a map, counts distinct CDR1 alphas in a well
private void countCDR1Alphas(Map<Integer, Integer> wellMap, List<Integer[]> well){
for(Integer[] cell: well){
if(cell[2] != -1){
wellMap.merge(cell[2], 1, (oldValue, newValue) -> oldValue + newValue);
}
}
}
//given a map, counts distinct CDR1 betas in a well
private void countCDR1Betas(Map<Integer, Integer> wellMap, List<Integer[]> well){
for(Integer[] cell: well){
if(cell[3] != -1){
wellMap.merge(cell[3], 1, (oldValue, newValue) -> oldValue + newValue);
} }
} }
} }
public String getSourceFileName() {
return sourceFile;
}
public String getFilename() { return filename; }
} }

View File

@@ -14,12 +14,14 @@ import java.util.regex.Pattern;
public class PlateFileReader { public class PlateFileReader {
private List<List<Integer[]>> wells = new ArrayList<>(); private List<List<Integer[]>> wells = new ArrayList<>();
private String filename;
public PlateFileReader(String filename){ public PlateFileReader(String filename){
if(!filename.matches(".*\\.csv")){ if(!filename.matches(".*\\.csv")){
filename = filename + ".csv"; filename = filename + ".csv";
} }
this.filename = filename;
CSVFormat plateFileFormat = CSVFormat.Builder.create() CSVFormat plateFileFormat = CSVFormat.Builder.create()
.setCommentMarker('#') .setCommentMarker('#')
@@ -54,8 +56,8 @@ public class PlateFileReader {
} }
public List<List<Integer[]>> getWells() { public Plate getSamplePlate() {
return wells; return new Plate(filename, wells);
} }
} }

View File

@@ -7,35 +7,39 @@ import java.nio.file.Files;
import java.nio.file.Path; import java.nio.file.Path;
import java.nio.file.StandardOpenOption; import java.nio.file.StandardOpenOption;
import java.util.*; import java.util.*;
import java.util.regex.Pattern;
public class PlateFileWriter { public class PlateFileWriter {
private int size; private int size;
private List<List<Integer[]>> wells; private List<List<Integer[]>> wells;
private double stdDev; private double stdDev;
private double lambda;
private Double error; private Double error;
private String filename; private String filename;
private String[] headers; private String sourceFileName;
private List<Integer> concentrations; private Integer[] populations;
private boolean isExponential = false;
public PlateFileWriter(String filename, Plate plate) { public PlateFileWriter(String filename, Plate plate) {
if(!filename.matches(".*\\.csv")){ if(!filename.matches(".*\\.csv")){
filename = filename + ".csv"; filename = filename + ".csv";
} }
this.filename = filename; this.filename = filename;
this.sourceFileName = plate.getSourceFileName();
this.size = plate.getSize(); this.size = plate.getSize();
this.stdDev = plate.getStdDev(); this.isExponential = plate.isExponential();
if(isExponential) {
this.lambda = plate.getLambda();
}
else{
this.stdDev = plate.getStdDev();
}
this.error = plate.getError(); this.error = plate.getError();
this.wells = plate.getWells(); this.wells = plate.getWells();
this.concentrations = Arrays.asList(plate.getConcentrations()); this.populations = plate.getPopulations();
concentrations.sort(Comparator.reverseOrder()); Arrays.sort(populations);
} }
public void writePlateFile(){ public void writePlateFile(){
//works as is, but too many columns in csv, need to make them all rows.
//will now redo it so that every column is a well, with well names as headers
//need to give plate error, sample pop size, stdDev, num sections, concentration per section as comments
Comparator<List<Integer[]>> listLengthDescending = Comparator.comparingInt(List::size); Comparator<List<Integer[]>> listLengthDescending = Comparator.comparingInt(List::size);
wells.sort(listLengthDescending.reversed()); wells.sort(listLengthDescending.reversed());
int maxLength = wells.get(0).size(); int maxLength = wells.get(0).size();
@@ -53,37 +57,50 @@ public class PlateFileWriter {
} }
} }
//this took forever // //this took forever and I don't use it
List<List<String>> rows = new ArrayList<>(); // //if I wanted to use it, I'd replace printer.printRecords(wellsAsStrings) with printer.printRecords(rows)
List<String> tmp = new ArrayList<>(); // List<List<String>> rows = new ArrayList<>();
for(int i = 0; i < wellsAsStrings.size(); i++){//List<Integer[]> w: wells){ // List<String> tmp = new ArrayList<>();
tmp.add("well " + (i+1)); // for(int i = 0; i < wellsAsStrings.size(); i++){//List<Integer[]> w: wells){
} // tmp.add("well " + (i+1));
rows.add(tmp); // }
for(int row = 0; row < maxLength; row++){ // rows.add(tmp);
tmp = new ArrayList<>(); // for(int row = 0; row < maxLength; row++){
for(List<String> c: wellsAsStrings){ // tmp = new ArrayList<>();
tmp.add(c.get(row)); // for(List<String> c: wellsAsStrings){
} // tmp.add(c.get(row));
rows.add(tmp); // }
} // rows.add(tmp);
StringBuilder concen = new StringBuilder(); // }
for(Integer i: concentrations){
concen.append(i.toString());
concen.append(" ");
}
String concenString = concen.toString();
CSVFormat plateFileFormat = CSVFormat.Builder.create().setCommentMarker('#').build(); //make string out of populations array
StringBuilder populationsStringBuilder = new StringBuilder();
populationsStringBuilder.append(populations[0].toString());
for(int i = 1; i < populations.length; i++){
populationsStringBuilder.append(", ");
populationsStringBuilder.append(populations[i].toString());
}
String wellPopulationsString = populationsStringBuilder.toString();
//set CSV format
CSVFormat plateFileFormat = CSVFormat.Builder.create()
.setCommentMarker('#')
.build();
try(BufferedWriter writer = Files.newBufferedWriter(Path.of(filename), StandardOpenOption.CREATE_NEW); try(BufferedWriter writer = Files.newBufferedWriter(Path.of(filename), StandardOpenOption.CREATE_NEW);
CSVPrinter printer = new CSVPrinter(writer, plateFileFormat); CSVPrinter printer = new CSVPrinter(writer, plateFileFormat);
){ ){
printer.printComment("Cell source file name: " + sourceFileName);
printer.printComment("Each row represents one well on the plate."); printer.printComment("Each row represents one well on the plate.");
printer.printComment("Plate size: " + size); printer.printComment("Plate size: " + size);
printer.printComment("Error rate: " + error); printer.printComment("Error rate: " + error);
printer.printComment("Concentrations: " + concenString); printer.printComment("Well populations: " + wellPopulationsString);
printer.printComment("Std. dev.: " + stdDev); if(isExponential){
printer.printComment("Lambda: " + lambda);
}
else {
printer.printComment("Std. dev.: " + stdDev);
}
printer.printRecords(wellsAsStrings); printer.printRecords(wellsAsStrings);
} catch(IOException ex){ } catch(IOException ex){
System.out.println("Could not make new file named "+filename); System.out.println("Could not make new file named "+filename);

File diff suppressed because it is too large Load Diff

View File

@@ -1,279 +0,0 @@
import java.util.List;
import java.util.Scanner;
import java.util.InputMismatchException;
//
public class UserInterface {
final static Scanner sc = new Scanner(System.in);
static int input;
static boolean quit = false;
public static void main(String args[]) {
while(!quit) {
System.out.println("\nALPHA/BETA T-CELL RECEPTOR MATCHING SIMULATOR");
System.out.println("Please select an option:");
System.out.println("1) Generate a population of distinct cells");
System.out.println("2) Generate a sample plate of T cells");
System.out.println("3) Simulate CDR3 alpha/beta T cell matching");
System.out.println("4) Simulate CDR3/CDR1 T cell matching");
System.out.println("5) Acknowledgements");
System.out.println("0) Exit");
try {
input = sc.nextInt();
switch(input){
case 1 -> makeCells();
case 2 -> makePlate();
case 3 -> matchCells();
case 4 -> matchCellsExpanded();
case 5 -> acknowledge();
case 0 -> quit = true;
default -> throw new InputMismatchException("Invalid input.");
}
}catch(InputMismatchException ex){
System.out.println(ex);
sc.next();
}
}
sc.close();
}
private static void makeCells() {
String filename = null;
Integer numCells = 0;
try {
System.out.println("\nSimulated T-Cells consist of integer values representing:\n" +
"* a pair of alpha and beta CDR3 peptides (unique within simulated population)\n" +
"* a pair of alpha and beta CDR1 peptides (not necessarily unique).");
System.out.println("\nThe cells will be written to a file.");
System.out.print("Please enter a file name: ");
filename = sc.next();
System.out.print("Please enter the number of T-cells to generate: ");
numCells = sc.nextInt();
if(numCells <= 0){
throw new InputMismatchException("Number of cells must be a positive integer.");
}
} catch (InputMismatchException ex) {
System.out.println(ex);
sc.next();
}
CellSample sample = Simulator.generateExpandedCellSample(numCells);
CellFileWriter writer = new CellFileWriter(filename, sample);
writer.writeCellsToFile();
}
//method to output a CSV of
private static void makePlate() {
String cellFile = null;
String filename = null;
Double stdDev = 0.0;
Integer numWells = 0;
Integer numSections = 0;
Integer[] concentrations = {1};
Double dropOutRate = 0.0;
boolean poisson = false;
try {
System.out.println("\nMaking a sample plate requires a population of distinct cells");
System.out.println("Please enter name of an existing cell sample file: ");
cellFile = sc.next();
System.out.println("\nThe sample plate will be written to file");
System.out.print("Please enter a name for the output file: ");
filename = sc.next();
System.out.println("Select T-cell frequency distribution function");
System.out.println("1) Poisson");
System.out.println("2) Gaussian");
System.out.println("(Note: wider distributions are more memory intensive to match)");
System.out.print("Enter selection value: ");
input = sc.nextInt();
switch(input) {
case 1:
poisson = true;
break;
case 2:
System.out.println("How many distinct T-cells within one standard deviation of peak frequency?");
System.out.println("(Note: wider distributions are more memory intensive to match)");
stdDev = sc.nextDouble();
if(stdDev <= 0.0){
throw new InputMismatchException("Value must be positive.");
}
break;
default:
System.out.println("Invalid input. Defaulting to Poisson.");
poisson = true;
}
System.out.print("Number of wells on plate: ");
numWells = sc.nextInt();
if(numWells < 1){
throw new InputMismatchException("No wells on plate");
}
System.out.println("The plate can be evenly sectioned to allow multiple concentrations of T-cells/well");
System.out.println("How many sections would you like to make (minimum 1)?");
numSections = sc.nextInt();
if(numSections < 1) {
throw new InputMismatchException("Too few sections.");
}
else if (numSections > numWells) {
throw new InputMismatchException("Cannot have more sections than wells.");
}
int i = 1;
concentrations = new Integer[numSections];
while(numSections > 0) {
System.out.print("Enter number of T-cells per well in section " + i +": ");
concentrations[i - 1] = sc.nextInt();
i++;
numSections--;
}
System.out.println("Errors in amplification can induce a well dropout rate for peptides");
System.out.print("Enter well dropout rate (0.0 to 1.0): ");
dropOutRate = sc.nextDouble();
if(dropOutRate < 0.0 || dropOutRate > 1.0) {
throw new InputMismatchException("The well dropout rate must be in the range [0.0, 1.0]");
}
}catch(InputMismatchException ex){
System.out.println(ex);
sc.next();
}
CellFileReader cellReader = new CellFileReader(cellFile);
if(poisson) {
stdDev = Math.sqrt(cellReader.getCellCount()); //gaussian with square root of elements approximates poisson
}
Plate samplePlate = new Plate(numWells, dropOutRate, concentrations, stdDev);
samplePlate.fillWells(cellReader.getCells());
PlateFileWriter writer = new PlateFileWriter(filename, samplePlate);
writer.writePlateFile();
}
private static void matchCells() {
String filename = null;
String cellFile = null;
String plateFile = null;
Integer lowThreshold = 0;
Integer highThreshold = Integer.MAX_VALUE;
try {
System.out.println("\nSimulated experiment requires a cell sample file and a sample plate file.");
System.out.print("Please enter name of an existing cell sample file: ");
cellFile = sc.next();
System.out.print("Please enter name of an existing sample plate file: ");
plateFile = sc.next();
System.out.println("The matching results will be written to a file.");
System.out.print("Please enter a name for the output file: ");
filename = sc.next();
System.out.println("What is the minimum number of alpha/beta overlap wells to attempt matching?");
lowThreshold = sc.nextInt();
if(lowThreshold < 1){
throw new InputMismatchException("Minimum value for low threshold is 1");
}
System.out.println("What is the maximum number of alpha/beta overlap wells to attempt matching?");
highThreshold = sc.nextInt();
} catch (InputMismatchException ex) {
System.out.println(ex);
sc.next();
}
CellFileReader cellReader = new CellFileReader(cellFile);
PlateFileReader plateReader = new PlateFileReader(plateFile);
Plate plate = new Plate(plateReader.getWells());
if (cellReader.getCells().size() == 0){
System.out.println("No cell sample found.");
System.out.println("Returning to main menu.");
}
else if(plate.getWells().size() == 0){
System.out.println("No sample plate found.");
System.out.println("Returning to main menu.");
}
else{
if(highThreshold >= plate.getSize()){
highThreshold = plate.getSize() - 1;
}
List<Integer[]> cells = cellReader.getCells();
MatchingResult results = Simulator.matchCDR3s(cells, plate, lowThreshold, highThreshold);
//result writer
MatchingFileWriter writer = new MatchingFileWriter(filename, results.getComments(),
results.getHeaders(), results.getAllResults());
writer.writeResultsToFile();
}
}
public static void matchCellsExpanded(){
/*
The idea here is that we'll get the CDR3 alpha/beta matches first. Then we'll try to match CDR3s to CDR1s by
looking at the top two matches for each CDR3. If CDR3s in the same cell simply swap CDR1s, we assume a correct
match
*/
String filename = null;
String cellFile = null;
String plateFile = null;
Integer lowThresholdCDR3 = 0;
Integer highThresholdCDR3 = Integer.MAX_VALUE;
Integer lowThresholdCDR1 = 0;
Integer highThresholdCDR1 = Integer.MAX_VALUE;
try {
System.out.println("\nSimulated experiment requires a cell sample file and a sample plate file.");
System.out.print("Please enter name of an existing cell sample file: ");
cellFile = sc.next();
System.out.print("Please enter name of an existing sample plate file: ");
plateFile = sc.next();
System.out.println("The matching results will be written to a file.");
System.out.print("Please enter a name for the output file: ");
filename = sc.next();
System.out.println("What is the minimum number of CDR3 alpha/beta overlap wells to attempt matching?");
lowThresholdCDR3 = sc.nextInt();
if(lowThresholdCDR3 < 1){
throw new InputMismatchException("Minimum value for low threshold is 1");
}
System.out.println("What is the maximum number of CDR3 alpha/beta overlap wells to attempt matching?");
highThresholdCDR3 = sc.nextInt();
System.out.println("What is the minimum number of CDR3/CDR1 overlap wells to attempt matching?");
lowThresholdCDR1 = sc.nextInt();
if(lowThresholdCDR1 < 1){
throw new InputMismatchException("Minimum value for low threshold is 1");
}
System.out.println("What is the maximum number of CDR3/CDR1 overlap wells to attempt matching?");
highThresholdCDR1 = sc.nextInt();
} catch (InputMismatchException ex) {
System.out.println(ex);
sc.next();
}
CellFileReader cellReader = new CellFileReader(cellFile);
PlateFileReader plateReader = new PlateFileReader(plateFile);
Plate plate = new Plate(plateReader.getWells());
if (cellReader.getCells().size() == 0){
System.out.println("No cell sample found.");
System.out.println("Returning to main menu.");
}
else if(plate.getWells().size() == 0){
System.out.println("No sample plate found.");
System.out.println("Returning to main menu.");
}
else{
if(highThresholdCDR3 >= plate.getSize()){
highThresholdCDR3 = plate.getSize() - 1;
}
if(highThresholdCDR1 >= plate.getSize()){
highThresholdCDR1 = plate.getSize() - 1;
}
List<Integer[]> cells = cellReader.getCells();
MatchingResult preliminaryResults = Simulator.matchCDR3s(cells, plate, lowThresholdCDR3, highThresholdCDR3);
MatchingResult[] results = Simulator.matchCDR1s(cells, plate, lowThresholdCDR1,
highThresholdCDR1, preliminaryResults.getMatchMap(), preliminaryResults.getTime());
//result writer
MatchingFileWriter writer = new MatchingFileWriter(filename + "First", results[0].getComments(),
results[0].getHeaders(), results[0].getAllResults());
writer.writeResultsToFile();
writer = new MatchingFileWriter(filename + "Dual", results[1].getComments(),
results[1].getHeaders(), results[1].getAllResults());
writer.writeResultsToFile();
}
}
private static void acknowledge(){
System.out.println("Simulation based on:");
System.out.println("Howie, B., Sherwood, A. M., et. al.");
System.out.println("High-throughput pairing of T cell receptor alpha and beta sequences.");
System.out.println("Sci. Transl. Med. 7, 301ra131 (2015)");
System.out.println("");
System.out.println("Simulation by Eugene Fischer, 2021");
}
}

23
src/main/java/Vertex.java Normal file
View File

@@ -0,0 +1,23 @@
public class Vertex {
private final Integer vertexLabel;
private final Integer sequence;
private final Integer occupancy;
public Vertex(Integer vertexLabel, Integer sequence, Integer occupancy) {
this.vertexLabel = vertexLabel;
this.sequence = sequence;
this.occupancy = occupancy;
}
public Integer getVertexLabel() { return vertexLabel; }
public Integer getSequence() {
return sequence;
}
public Integer getOccupancy() {
return occupancy;
}
}