86 Commits

Author SHA1 Message Date
4bcda9b66c update readme 2022-02-23 13:22:04 -06:00
17ae763c6c Generate populations correctly 2022-02-23 10:37:40 -06:00
decdb147a9 Cache everything 2022-02-23 10:30:42 -06:00
74ffbfd8ac make everything use same random number generator 2022-02-23 09:29:21 -06:00
08699ce8ce Change output order to match interactive UI 2022-02-23 08:56:09 -06:00
69b0cc535c Error checking 2022-02-23 08:55:07 -06:00
e58f7b0a55 checking for possible divide by zero error. 2022-02-23 08:54:14 -06:00
dd2164c250 implement sample plates with random well populations 2022-02-23 08:14:17 -06:00
7323093bdc change "getRandomNumber" to "getRandomInt" for consistency. 2022-02-23 08:13:52 -06:00
f904cf6672 add more data caching code 2022-02-23 08:13:06 -06:00
3ccee9891b change "concentrations" to "populations" for consistency 2022-02-23 08:12:48 -06:00
40c2be1cfb create populations string correctly 2022-02-23 08:11:01 -06:00
4b597c4e5e remove old testing code 2022-02-23 08:10:35 -06:00
b2398531a3 Update readme 2022-02-23 05:11:36 +00:00
8e9a250890 Cache graph data on creation 2022-02-22 22:23:55 -06:00
e2a996c997 update readme 2022-02-22 22:23:40 -06:00
a5db89cb0b update readme 2022-02-22 22:13:01 -06:00
1630f9ccba Moved I/O alert to file reader 2022-02-22 22:11:50 -06:00
d785aa0da2 Moved I/O alert to file reader 2022-02-22 22:10:31 -06:00
a7afeb6119 bugfixes 2022-02-22 22:10:09 -06:00
f8167b0774 Add .jar manifest to repo 2022-02-22 21:45:46 -06:00
68ee9e4bb6 Implemented storing graphs in memory for multiple pairing experiments 2022-02-22 21:30:00 -06:00
fd2ec76b71 Realized how to store graph in memory 2022-02-22 19:42:35 -06:00
875f457a2d reimplement CLI (in progress) 2022-02-22 19:42:23 -06:00
906c06062f Added metadata to MatchingResult to enable CLI options 2022-02-22 18:36:30 -06:00
90ae2ff474 Re-implemeting CLI options (in progress) 2022-02-22 17:37:00 -06:00
7d983076f3 Add link to releases page for download 2022-02-22 16:34:24 -06:00
4b053e6ec4 Remove artifacts from tracking to stop repo bloat. 2022-02-22 16:14:50 -06:00
44784b7976 Remove artifacts from tracking to stop repo bloat. 2022-02-22 16:10:22 -06:00
7c19896dc9 update readme 2022-02-22 16:09:50 -06:00
aec7e3016f Typos in documentation 2022-02-21 11:19:54 -06:00
5c75c1ac09 Update readme.md 2022-02-21 06:53:30 +00:00
cb1f7adece Change "peptide" references in code to "sequence", adding comments 2022-02-21 00:29:34 -06:00
370de79546 Add performance section to readme 2022-02-21 00:02:49 -06:00
a803336f56 Add performance section to readme 2022-02-21 00:01:20 -06:00
94b54b3416 Add performance section to readme 2022-02-20 23:31:25 -06:00
601e141fd0 Update readme 2022-02-20 22:51:49 -06:00
8f9c6b7d33 Update readme TODO 2022-02-20 20:59:05 -06:00
e5ddc73723 Finish reverting back to wells-as-rows 2022-02-20 20:54:44 -06:00
9b18fac74f Invoke garbage collection 2022-02-20 20:47:12 -06:00
63ef6aa7a0 Revert attempt to switch plate output format. It worked, but introduced a bug in graph filtering I don't want to chase down 2022-02-20 20:45:35 -06:00
7558455f39 Correct errors in output and documentation 2022-02-20 20:13:38 -06:00
410f0ae547 Remove testing code, add garbage collection calls 2022-02-20 20:06:45 -06:00
1bc6a11545 Change plate reader/writer to use columns as wells 2022-02-20 19:58:24 -06:00
2b13e10e95 Change plate reader/writer to use columns as wells 2022-02-20 19:48:09 -06:00
4fd5baeb98 Change plate reader/writer to use columns as wells 2022-02-20 19:41:06 -06:00
b2a4e9a42b Change plate reader/writer to use columns as wells 2022-02-20 19:17:56 -06:00
d1bb49b482 Change plate reader/writer to use columns as wells 2022-02-20 19:12:11 -06:00
9adb7dffb8 Change plate reader/writer to use columns as wells 2022-02-20 19:08:04 -06:00
2023bb9d7e Cleanup file output, add UI verbosity 2022-02-20 18:31:31 -06:00
405fbf17ff improve documentation 2022-02-20 17:11:39 -06:00
24519f4a52 improve documentation 2022-02-20 17:04:25 -06:00
2afd01eeef improve documentation 2022-02-20 15:48:11 -06:00
10d0b711bf improve documentation 2022-02-20 15:38:40 -06:00
8f98baf44e improve documentation 2022-02-20 15:37:39 -06:00
d6c7c40c96 improve documentation 2022-02-20 13:23:15 -06:00
61c14b2ecf improve documentation 2022-02-20 13:20:47 -06:00
22fc4aedfe improve documentation 2022-02-20 13:18:49 -06:00
5d24dc6f70 improve documentation 2022-02-20 13:15:32 -06:00
2c01a0211c move readme 2022-02-20 12:02:27 -06:00
f2b5d9e1b7 Rename and update readme 2022-02-20 11:58:12 -06:00
74c8cafd81 scan for filename 2022-02-20 03:08:31 -06:00
d1c37b5ccd Relocate overlap threshold filters 2022-02-20 03:05:56 -06:00
cb2c5a6024 Add plate well concentrations to output data 2022-02-20 02:29:42 -06:00
284a5b3a40 Add plate well concentrations to output data 2022-02-20 02:23:31 -06:00
52afb1edc2 Add plate well concentrations to output data 2022-02-20 02:17:36 -06:00
9c52bc878a Add plate well concentrations to output data 2022-02-20 02:13:13 -06:00
248fe4d662 Add plate well concentrations to output data 2022-02-20 02:09:22 -06:00
5d0e60708c Add plate well concentrations to output data 2022-02-20 01:53:34 -06:00
c96b7237e9 Add plate well concentrations to output data 2022-02-20 01:40:01 -06:00
0b28259800 Add plate well concentrations to output data 2022-02-20 01:13:22 -06:00
837ef7bfe4 UI cleanup, some code cleanup 2022-02-20 01:05:28 -06:00
0bebbc7602 Add missing filtering code 2022-02-19 22:56:38 -06:00
84f7ddb696 Fix interactive output 2022-02-19 22:49:50 -06:00
c4633da9eb Correct propogation of peptide counts 2022-02-19 22:33:38 -06:00
5b2ed165d0 Clean up interactive text, bugfix 2022-02-19 22:21:09 -06:00
0026d8cdfe Use buffered input/output streams 2022-02-19 22:04:41 -06:00
13fb7168bf Refactor to read/write files of graph and map data 2022-02-19 21:46:01 -06:00
568a6be3c7 Refactoring to allow graphs from file 2022-02-19 17:23:55 -06:00
cfa473c7ce Adding parameters to filter by occupancy difference and percent overlap 2022-02-19 14:06:11 -06:00
6faacd9a82 Adding parameters to filter by occupancy difference and percent overlap 2022-02-19 14:05:26 -06:00
ce88e170c1 Update readme with max memory flag 2022-02-18 17:48:25 -06:00
47e23addfa Do new filtering before matching 2022-02-18 17:42:05 -06:00
b9ee31b64c Do new filtering before matching 2022-02-18 17:28:24 -06:00
bf32a55e4b Implement matching using jheaps PairingHeap 2022-02-18 16:09:23 -06:00
acff88475b Command line arguments working, need better documentation and error handling 2021-11-23 12:24:48 -06:00
28 changed files with 2329 additions and 919 deletions

1
.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
/out/

View File

@@ -1,11 +1,11 @@
<component name="ArtifactManager">
<artifact type="jar" name="TCellSim:jar">
<output-path>$PROJECT_DIR$/out/artifacts/TCellSim_jar</output-path>
<root id="archive" name="TCellSim.jar">
<artifact type="jar" build-on-make="true" name="BiGpairSEQ_Sim:jar">
<output-path>$PROJECT_DIR$/out/artifacts/BiGpairSEQ_Sim_jar</output-path>
<root id="archive" name="BiGpairSEQ_Sim.jar">
<element id="directory" name="META-INF">
<element id="file-copy" path="$PROJECT_DIR$/src/main/java/META-INF/MANIFEST.MF" />
</element>
<element id="module-output" name="TCellSim" />
<element id="module-output" name="BigPairSEQ" />
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/jgrapht/jgrapht-core/1.5.1/jgrapht-core-1.5.1.jar" path-in-jar="/" />
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/jheaps/jheaps/0.13/jheaps-0.13.jar" path-in-jar="/" />
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/commons-cli/commons-cli/1.5.0/commons-cli-1.5.0.jar" path-in-jar="/" />

2
.idea/compiler.xml generated
View File

@@ -6,7 +6,7 @@
<sourceOutputDir name="target/generated-sources/annotations" />
<sourceTestOutputDir name="target/generated-test-sources/test-annotations" />
<outputRelativeToContentRoot value="true" />
<module name="TCellSim" />
<module name="BigPairSEQ" />
</profile>
</annotationProcessing>
</component>

15
.idea/libraries/jgrapht_io.xml generated Normal file
View File

@@ -0,0 +1,15 @@
<component name="libraryTable">
<library name="jgrapht.io" type="repository">
<properties maven-id="org.jgrapht:jgrapht-io:1.5.1" />
<CLASSES>
<root url="jar://$MAVEN_REPOSITORY$/org/jgrapht/jgrapht-io/1.5.1/jgrapht-io-1.5.1.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/jgrapht/jgrapht-core/1.5.1/jgrapht-core-1.5.1.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/jheaps/jheaps/0.13/jheaps-0.13.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/antlr/antlr4-runtime/4.8-1/antlr4-runtime-4.8-1.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/apache/commons/commons-text/1.8/commons-text-1.8.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/apache/commons/commons-lang3/3.9/commons-lang3-3.9.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES />
</library>
</component>

10
.idea/libraries/jheaps.xml generated Normal file
View File

@@ -0,0 +1,10 @@
<component name="libraryTable">
<library name="jheaps" type="repository">
<properties maven-id="org.jheaps:jheaps:0.14" />
<CLASSES>
<root url="jar://$MAVEN_REPOSITORY$/org/jheaps/jheaps/0.14/jheaps-0.14.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES />
</library>
</component>

View File

@@ -1,4 +0,0 @@
Executable .jar file with interactive, command line UI
Usage: java -jar TCellSim.jar
Requires Java11 or higher (Openjdk-17 recommended)

293
readme.md Normal file
View File

@@ -0,0 +1,293 @@
# BiGpairSEQ SIMULATOR
## ABOUT
This program simulates BiGpairSEQ (Bipartite Graph pairSEQ), a graph theory-based adaptation
of the pairSEQ algorithm (Howie, et al. 2015) for pairing T cell receptor sequences.
## THEORY
Unlike pairSEQ, which calculates p-values for every TCR alpha/beta overlap and compares
against a null distribution, BiGpairSEQ does not do any statistical calculations
directly.
BiGpairSEQ creates a [weightd bipartite graph](https://en.wikipedia.org/wiki/Bipartite_graph) representing the sample plate.
The distinct TCRA and TCRB sequences form the two sets of vertices. Every TCRA/TCRB pair that share a well
are connected by an edge, with the edge weight set to the number of wells in which both sequences appear.
(Sequences present in *all* wells are filtered out prior to creating the graph, as there is no signal in their occupancy pattern.)
The problem of pairing TCRA/TCRB sequences thus reduces to the "assignment problem" of finding a maximum weight
matching on a bipartite graph--the subset of vertex-disjoint edges whose weights sum to the maximum possible value.
This is a well-studied combinatorial optimization problem, with many known solutions.
The most efficient algorithm known to the author for maximum weight matching of a bipartite graph with strictly integral weights
is from Duan and Su (2012). For a graph with m edges, n vertices per side, and maximum integer edge weight N,
their algorithm runs in **O(m sqrt(n) log(N))** time. As the graph representation of a pairSEQ experiment is
bipartite with integer weights, this algorithm is ideal for BiGpairSEQ.
Unfortunately, it's a fairly new algorithm, and not yet implemented by the graph theory library used in this simulator.
So this program instead uses the Fibonacci heap-based algorithm of Fredman and Tarjan (1987), which has a worst-case
runtime of **O(n (n log(n) + m))**. The algorithm is implemented as described in Melhorn and Näher (1999).
The current version of the program uses a pairing heap instead of a Fibonacci heap for its priority queue,
which has lower theoretical efficiency but also lower complexity overhead, and is often equivalently performant
in practice.
## USAGE
### RUNNING THE PROGRAM
[Download the current version of BiGpairSEQ_Sim.](https://gitea.ejsf.synology.me/efischer/BiGpairSEQ/releases)
BiGpairSEQ_Sim is an executable .jar file. Requires Java 11 or higher. [OpenJDK 17](https://jdk.java.net/17/)
recommended.
Run with the command:
`java -jar BiGpairSEQ_Sim.jar`
Processing sample plates with tens of thousands of sequences may require large amounts
of RAM. It is often desirable to increase the JVM maximum heap allocation with the -Xmx flag.
For example, to run the program with 32 gigabytes of memory, use the command:
`java -Xmx32G -jar BiGpairSEQ_Sim.jar`
Once running, BiGpairSEQ_Sim has an interactive, menu-driven CLI for generating files and simulating TCR pairing. The
main menu looks like this:
```
--------BiGPairSEQ SIMULATOR--------
ALPHA/BETA T CELL RECEPTOR MATCHING
USING WEIGHTED BIPARTITE GRAPHS
------------------------------------
Please select an option:
1) Generate a population of distinct cells
2) Generate a sample plate of T cells
3) Generate CDR3 alpha/beta occupancy data and overlap graph
4) Simulate bipartite graph CDR3 alpha/beta matching (BiGpairSEQ)
9) About/Acknowledgments
0) Exit
```
### INPUT/OUTPUT
To run the simulation, the program reads and writes 4 kinds of files:
* Cell Sample files in CSV format
* Sample Plate files in CSV format
* Graph/Data files in binary object serialization format
* Matching Results files in CSV format
These files are often generated in sequence. To save file I/O time, the most recent instance of each of these four
files either generated or read from disk is cached in program memory. This is especially important for Graph/Data files,
which can be several gigabytes in size. Since some simulations may require running multiple,
differntly-configured BiGpairSEQ matchings on the same graph, keeping the most recent graph cached drastically reduces
execution time.
Subsequent uses of the same data file won't need to be read in again until another file of that type is used or generated.
The program checks whether it needs to update its cached data by comparing filenames as entered by the user. On
encountering a new filename, the program flushes its cache and reads in the new file.
When entering filenames, it is not necessary to include the file extension (.csv or .ser). When reading or
writing files, the program will automatically add the correct extension to any filename without one.
#### Cell Sample Files
Cell Sample files consist of any number of distinct "T cells." Every cell contains
four sequences: Alpha CDR3, Beta CDR3, Alpha CDR1, Beta CDR1. The sequences are represented by
random integers. CDR3 Alpha and Beta sequences are all unique within a given Cell Sample file. CDR1 Alpha and Beta sequences
are not necessarily unique; the relative diversity can be set when making the file.
(Note: though cells still have CDR1 sequences, matching of CDR1s is currently awaiting re-implementation.)
Options when making a Cell Sample file:
* Number of T cells to generate
* Factor by which CDR3s are more diverse than CDR1s
Files are in CSV format. Rows are distinct T cells, columns are sequences within the cells.
Comments are preceded by `#`
Structure:
---
# Sample contains 1 unique CDR1 for every 4 unique CDR3s.
| Alpha CDR3 | Beta CDR3 | Alpha CDR1 | Beta CDR1 |
|---|---|---|---|
|unique number|unique number|number|number|
---
#### Sample Plate Files
Sample Plate files consist of any number of "wells" containing any number of T cells (as
described above). The wells are filled randomly from a Cell Sample file, according to a selected
frequency distribution. Additionally, every individual sequence within each cell may, with some
given dropout probability, be omitted from the file; this simulates the effect of amplification errors
prior to sequencing. Plates can also be partitioned into any number of sections, each of which can have a
different concentration of T cells per well.
Options when making a Sample Plate file:
* Cell Sample file to use
* Statistical distribution to apply to Cell Sample file
* Poisson
* Gaussian
* Standard deviation size
* Exponential
* Lambda value
* *(Based on the slope of the graph in Figure 4C of the pairSEQ paper, the distribution of the original experiment was exponential with a lambda of approximately 0.6. (Howie, et al. 2015))*
* Total number of wells on the plate
* Number of sections on plate
* Number of T cells per well
* per section, if more than one section
* Dropout rate
Files are in CSV format. There are no header labels. Every row represents a well.
Every value represents an individual cell, containing four sequences, depicted as an array string:
`[CDR3A, CDR3B, CDR1A, CDR1B]`. So a representative cell might look like this:
`[525902, 791533, -1, 866282]`
Notice that the CDR1 Alpha is missing in the cell above--sequence dropout from simulated amplification error.
Dropout sequences are replaced with the value `-1`. Comments are preceded by `#`
Structure:
---
```
# Cell source file name:
# Each row represents one well on the plate
# Plate size:
# Concentrations:
# Lambda -or- StdDev:
```
| Well 1, cell 1 | Well 1, cell 2 | Well 1, cell 3| ... |
|---|---|---|---|
| **Well 2, cell 1** | **Well 2, cell 2** | **Well 2, cell 3**| **...** |
| **Well 3, cell 1** | **Well 3, cell 2** | **Well 3, cell 3**| **...** |
| **...** | **...** | **...** | **...** |
---
#### Graph/Data Files
Graph/Data files are serialized binaries of a Java object containing the weigthed bipartite graph representation of a
Sample Plate, along with the necessary metadata for matching and results output. Making them requires a Cell Sample file
(to construct a list of correct sequence pairs for checking the accuracy of BiGpairSEQ simulations) and a
Sample Plate file (to construct the associated occupancy graph).
These files can be several gigabytes in size. Writing them to a file lets us generate a graph and its metadata once,
then use it for multiple different BiGpairSEQ simulations.
Options for creating a Graph/Data file:
* The Cell Sample file to use
* The Sample Plate file to use. (This must have been generated from the selected Cell Sample file.)
These files do not have a human-readable structure, and are not portable to other programs. (Export of graphs in a
portable data format may be implemented in the future. The tricky part is encoding the necessary metadata.)
---
#### Matching Results Files
Matching results files consist of the results of a BiGpairSEQ matching simulation. Making them requires a Graph and
Data file. Matching results files are in CSV format. Rows are sequence pairings with extra relevant data. Columns are pairing-specific details.
Metadata about the matching simulation is included as comments. Comments are preceded by `#`.
Options when running a BiGpairSEQ simulation of CDR3 alpha/beta matching:
* The minimum number of alpha/beta overlap wells to attempt to match
* (must be >= 1)
* The maximum number of alpha/beta overlap wells to attempt to match
* (must be <= the number of wells on the plate - 1)
* The maximum difference in alpha/beta occupancy to attempt to match
* (Optional. To skip using this filter, enter a value >= the number of wells on the plate)
* The minimum overlap percentage--the percentage of a sequence's occupied wells shared by another sequence--to attempt to match. Given as value in range 0 - 100.
* (Optional. To skip using this filter, enter 0)
Example output:
---
```
# Source Sample Plate file: 4MilCellsPlate.csv
# Source Graph and Data file: 4MilCellsPlateGraph.ser
# T cell counts in sample plate wells: 30000
# Total alphas found: 11813
# Total betas found: 11808
# High overlap threshold: 94
# Low overlap threshold: 3
# Minimum overlap percent: 0
# Maximum occupancy difference: 96
# Pairing attempt rate: 0.438
# Correct pairings: 5151
# Incorrect pairings: 18
# Pairing error rate: 0.00348
# Simulation time: 862 seconds
```
| Alpha | Alpha well count | Beta | Beta well count | Overlap count | Matched Correctly? | P-value |
|---|---|---|---|---|---|---|
|5242972|17|1571520|18|17|true|1.41E-18|
|5161027|18|2072219|18|18|true|7.31E-20|
|4145198|33|1064455|30|29|true|2.65E-21|
|7700582|18|112748|18|18|true|7.31E-20|
|...|...|...|...|...|...|...|
---
**NOTE: The p-values in the output are not used for matching**—they aren't part of the BiGpairSEQ algorithm at all.
P-values are calculated *after* BiGpairSEQ matching is completed, for purposes of comparison only,
using the (2021 corrected) formula from the original pairSEQ paper. (Howie, et al. 2015)
### PERFORMANCE
Performance details of the example excerpted above:
On a home computer with a Ryzen 5600X CPU, 64GB of 3200MHz DDR4 RAM (half of which was allocated to the Java Virtual Machine), and a PCIe 3.0 SSD, running Linux Mint 20.3 Edge (5.13 kernel),
the author ran a BiGpairSEQ simulation of a 96-well sample plate with 30,000 T cells/well comprising ~11,800 alphas and betas,
taken from a sample of 4,000,000 distinct cells with an exponential frequency distribution.
With min/max occupancy threshold of 3 and 94 wells for matching, and no other pre-filtering, BiGpairSEQ identified 5,151
correct pairings and 18 incorrect pairings, for an accuracy of 99.652%.
The simulation time was 14'22". If intermediate results were held in memory, this would be equivalent to the total elapsed time.
Since this implementation of BiGpairSEQ writes intermediate results to disk (to improve the efficiency of *repeated* simulations
with different filtering options), the actual elapsed time was greater. File I/O time was not measured, but took
slightly less time than the simulation itself. Real elapsed time from start to finish was under 30 minutes.
## TODO
* ~~Try invoking GC at end of workloads to reduce paging to disk~~ DONE
* Hold graph data in memory until another graph is read-in? ~~ABANDONED~~ ~~UNABANDONED~~ DONE
* ~~*No, this won't work, because BiGpairSEQ simulations alter the underlying graph based on filtering constraints. Changes would cascade with multiple experiments.*~~
* Might have figured out a way to do it, by taking edges out and then putting them back into the graph. This may actually be possible. If so, awesome.
* See if there's a reasonable way to reformat Sample Plate files so that wells are columns instead of rows.
* ~~Problem is variable number of cells in a well~~
* ~~Apache Commons CSV library writes entries a row at a time~~
* _Got this working, but at the cost of a profoundly strange bug in graph occupancy filtering. Have reverted the repo until I can figure out what caused that. Given how easily Thingiverse transposes CSV matrices in R, might not even be worth fixing._
* Re-implement command line arguments, to enable scripting and statistical simulation studies
* Implement sample plates with random numbers of T cells per well.
* Possible BiGpairSEQ advantage over pairSEQ: BiGpairSEQ is resilient to variations in well population sizes on a sample plate; pairSEQ is not.
* preliminary data suggests that BiGpairSEQ behaves roughly as though the whole plate had whatever the *average* well concentration is, but that's still speculative.
* Enable GraphML output in addition to serialized object binaries, for data portability
* Custom vertex type with attribute for sequence occupancy?
* Re-implement CDR1 matching method
* Implement Duan and Su's maximum weight matching algorithm
* Add controllable algorithm-type parameter?
* Test whether pairing heap (currently used) or Fibonacci heap is more efficient for priority queue in current matching algorithm
* in theory Fibonacci heap should be more efficient, but complexity overhead may eliminate theoretical advantage
* Add controllable heap-type parameter?
## CITATIONS
* Howie, B., Sherwood, A. M., et al. ["High-throughput pairing of T cell receptor alpha and beta sequences."](https://pubmed.ncbi.nlm.nih.gov/26290413/) Sci. Transl. Med. 7, 301ra131 (2015)
* Duan, R., Su H. ["A Scaling Algorithm for Maximum Weight Matching in Bipartite Graphs."](https://web.eecs.umich.edu/~pettie/matching/Duan-Su-scaling-bipartite-matching.pdf) Proceedings of the Twenty-Third Annual ACM-SIAM Symposium on Discrete Algorithms, p. 1413-1424. (2012)
* Melhorn, K., Näher, St. [The LEDA Platform of Combinatorial and Geometric Computing.](https://people.mpi-inf.mpg.de/~mehlhorn/LEDAbook.html) Cambridge University Press. Chapter 7, Graph Algorithms; p. 132-162 (1999)
* Fredman, M., Tarjan, R. ["Fibonacci heaps and their uses in improved network optimization algorithms."](https://www.cl.cam.ac.uk/teaching/1011/AlgorithII/1987-FredmanTar-fibonacci.pdf) J. ACM, 34(3):596615 (1987))
## EXTERNAL LIBRARIES USED
* [JGraphT](https://jgrapht.org) -- Graph theory data structures and algorithms
* [JHeaps](https://www.jheaps.org) -- For pairing heap priority queue used in maximum weight matching algorithm
* [Apache Commons CSV](https://commons.apache.org/proper/commons-csv/) -- For CSV file output
* [Apache Commons CLI](https://commons.apache.org/proper/commons-cli/) -- To enable command line arguments for scripting. (**Awaiting re-implementation**.)
## ACKNOWLEDGEMENTS
BiGpairSEQ was conceived in collaboration with Dr. Alice MacQueen, who brought the original
pairSEQ paper to the author's attention and explained all the biology terms he didn't know.
## AUTHOR
BiGpairSEQ algorithm and simulation by Eugene Fischer, 2021. UI improvements and documentation, 2022.

View File

@@ -0,0 +1,95 @@
import java.util.Random;
//main class. For choosing interface type and caching file data
public class BiGpairSEQ {
private static final Random rand = new Random();
private static CellSample cellSampleInMemory = null;
private static String cellFilename = null;
private static Plate plateInMemory = null;
private static String plateFilename = null;
private static GraphWithMapData graphInMemory = null;
private static String graphFilename = null;
public static void main(String[] args) {
if (args.length == 0) {
InteractiveInterface.startInteractive();
}
else {
//This will be uncommented when command line arguments are re-implemented.
//CommandLineInterface.startCLI(args);
System.out.println("Command line arguments are still being re-implemented.");
}
}
public static Random getRand() {
return rand;
}
public static CellSample getCellSampleInMemory() {
return cellSampleInMemory;
}
public static void setCellSampleInMemory(CellSample cellSampleInMemory) {
BiGpairSEQ.cellSampleInMemory = cellSampleInMemory;
}
public static void clearCellSampleInMemory() {
cellSampleInMemory = null;
System.gc();
}
public static String getCellFilename() {
return cellFilename;
}
public static void setCellFilename(String cellFilename) {
BiGpairSEQ.cellFilename = cellFilename;
}
public static Plate getPlateInMemory() {
return plateInMemory;
}
public static void setPlateInMemory(Plate plateInMemory) {
BiGpairSEQ.plateInMemory = plateInMemory;
}
public static void clearPlateInMemory() {
plateInMemory = null;
System.gc();
}
public static String getPlateFilename() {
return plateFilename;
}
public static void setPlateFilename(String plateFilename) {
BiGpairSEQ.plateFilename = plateFilename;
}
public static GraphWithMapData getGraphInMemory() {
return graphInMemory;
}
public static void setGraphInMemory(GraphWithMapData g) {
if (graphInMemory != null) {
clearGraphInMemory();
}
graphInMemory = g;
}
public static void clearGraphInMemory() {
graphInMemory = null;
System.gc();
}
public static String getGraphFilename() {
return graphFilename;
}
public static void setGraphFilename(String filename) {
graphFilename = filename;
}
}

View File

@@ -13,6 +13,7 @@ public class CellFileReader {
private String filename;
private List<Integer[]> distinctCells = new ArrayList<>();
private Integer cdr1Freq;
public CellFileReader(String filename) {
if(!filename.matches(".*\\.csv")){
@@ -38,19 +39,37 @@ public class CellFileReader {
cell[3] = Integer.valueOf(record.get("Beta CDR1"));
distinctCells.add(cell);
}
} catch(IOException ex){
System.out.println("cell file " + filename + " not found.");
System.err.println(ex);
}
//get CDR1 frequency
ArrayList<Integer> cdr1Alphas = new ArrayList<>();
for (Integer[] cell : distinctCells) {
cdr1Alphas.add(cell[3]);
}
double count = cdr1Alphas.stream().distinct().count();
count = Math.ceil(distinctCells.size() / count);
cdr1Freq = (int) count;
}
public CellSample getCellSample() {
return new CellSample(distinctCells, cdr1Freq);
}
public String getFilename() { return filename;}
public List<Integer[]> getCells(){
//Refactor everything that uses this to have access to a Cell Sample and get the cells there instead.
public List<Integer[]> getListOfDistinctCellsDEPRECATED(){
return distinctCells;
}
public Integer getCellCount() {
public Integer getCellCountDEPRECATED() {
//Refactor everything that uses this to have access to a Cell Sample and get the count there instead.
return distinctCells.size();
}
}

View File

@@ -18,7 +18,7 @@ public class CellSample {
return cdr1Freq;
}
public Integer population(){
public Integer getCellCount(){
return cells.size();
}

View File

@@ -0,0 +1,328 @@
import org.apache.commons.cli.*;
/*
* Class for parsing options passed to program from command line
*
* Top-level flags:
* cells : to make a cell sample file
* plate : to make a sample plate file
* graph : to make a graph and data file
* match : to do a cdr3 matching (WITH OR WITHOUT MAKING A RESULTS FILE. May just want to print summary for piping.)
*
* Cell flags:
* count : number of cells to generate
* diversity factor : factor by which CDR3s are more diverse than CDR1s
* output : name of the output file
*
* Plate flags:
* cellfile : name of the cell sample file to use as input
* wells : the number of wells on the plate
* dist : the statistical distribution to use
* (if exponential) lambda : the lambda value of the exponential distribution
* (if gaussian) stddev : the standard deviation of the gaussian distribution
* rand : randomize well populations, take a minimum argument and a maximum argument
* populations : number of t cells per well per section (number of arguments determines number of sections)
* dropout : plate dropout rate, double from 0.0 to 1.0
* output : name of the output file
*
* Graph flags:
* cellfile : name of the cell sample file to use as input
* platefile : name of the sample plate file to use as input
* output : name of the output file
*
* Match flags:
* graphFile : name of graph and data file to use as input
* min : minimum number of overlap wells to attempt a matching
* max : the maximum number of overlap wells to attempt a matching
* maxdiff : (optional) the maximum difference in occupancy to attempt a matching
* minpercent : (optional) the minimum percent overlap to attempt a matching.
* writefile : (optional) the filename to write results to
* output : the values to print to System.out for piping
*
*/
public class CommandLineInterface {
public static void startCLI(String[] args) {
//These command line options are a big mess
//Really, I don't think command line tools are expected to work in this many different modes
//making cells, making plates, and matching are the sort of thing that UNIX philosophy would say
//should be three separate programs.
//There might be a way to do it with option parameters?
//main options set
Options mainOptions = new Options();
Option makeCells = Option.builder("cells")
.longOpt("make-cells")
.desc("Makes a file of distinct cells")
.build();
Option makePlate = Option.builder("plates")
.longOpt("make-plates")
.desc("Makes a sample plate file")
.build();
Option makeGraph = Option.builder("graph")
.longOpt("make-graph")
.desc("Makes a graph and data file")
.build();
Option matchCDR3 = Option.builder("match")
.longOpt("match-cdr3")
.desc("Match CDR3s. Requires a cell sample file and any number of plate files.")
.build();
OptionGroup mainGroup = new OptionGroup();
mainGroup.addOption(makeCells);
mainGroup.addOption(makePlate);
mainGroup.addOption(makeGraph);
mainGroup.addOption(matchCDR3);
mainGroup.setRequired(true);
mainOptions.addOptionGroup(mainGroup);
//Reuse clones of this for other options groups, rather than making it lots of times
Option outputFile = Option.builder("o")
.longOpt("output-file")
.hasArg()
.argName("filename")
.desc("Name of output file")
.build();
mainOptions.addOption(outputFile);
//Options cellOptions = new Options();
Option numCells = Option.builder("nc")
.longOpt("num-cells")
.desc("The number of distinct cells to generate")
.hasArg()
.argName("number")
.build();
mainOptions.addOption(numCells);
Option cdr1Freq = Option.builder("d")
.longOpt("peptide-diversity-factor")
.hasArg()
.argName("number")
.desc("Number of distinct CDR3s for every CDR1")
.build();
mainOptions.addOption(cdr1Freq);
//Option cellOutput = (Option) outputFile.clone();
//cellOutput.setRequired(true);
//mainOptions.addOption(cellOutput);
//Options plateOptions = new Options();
Option inputCells = Option.builder("c")
.longOpt("cell-file")
.hasArg()
.argName("file")
.desc("The cell sample file used for filling wells")
.build();
mainOptions.addOption(inputCells);
Option numWells = Option.builder("w")
.longOpt("num-wells")
.hasArg()
.argName("number")
.desc("The number of wells on each plate")
.build();
mainOptions.addOption(numWells);
Option numPlates = Option.builder("np")
.longOpt("num-plates")
.hasArg()
.argName("number")
.desc("The number of plate files to output")
.build();
mainOptions.addOption(numPlates);
//Option plateOutput = (Option) outputFile.clone();
//plateOutput.setRequired(true);
//plateOutput.setDescription("Prefix for plate output filenames");
//mainOptions.addOption(plateOutput);
Option plateErr = Option.builder("err")
.longOpt("drop-out-rate")
.hasArg()
.argName("number")
.desc("Well drop-out rate. (Probability between 0 and 1)")
.build();
mainOptions.addOption(plateErr);
Option plateConcentrations = Option.builder("t")
.longOpt("t-cells-per-well")
.hasArgs()
.argName("number 1, number 2, ...")
.desc("Number of T cells per well for each plate section")
.build();
mainOptions.addOption(plateConcentrations);
//different distributions, mutually exclusive
OptionGroup plateDistributions = new OptionGroup();
Option plateExp = Option.builder("exponential")
.desc("Sample from distinct cells with exponential frequency distribution")
.build();
plateDistributions.addOption(plateExp);
Option plateGaussian = Option.builder("gaussian")
.desc("Sample from distinct cells with gaussain frequency distribution")
.build();
plateDistributions.addOption(plateGaussian);
Option platePoisson = Option.builder("poisson")
.desc("Sample from distinct cells with poisson frequency distribution")
.build();
plateDistributions.addOption(platePoisson);
mainOptions.addOptionGroup(plateDistributions);
Option plateStdDev = Option.builder("stddev")
.desc("Standard deviation for gaussian distribution")
.hasArg()
.argName("number")
.build();
mainOptions.addOption(plateStdDev);
Option plateLambda = Option.builder("lambda")
.desc("Lambda for exponential distribution")
.hasArg()
.argName("number")
.build();
mainOptions.addOption(plateLambda);
//
// String cellFile, String filename, Double stdDev,
// Integer numWells, Integer numSections,
// Integer[] concentrations, Double dropOutRate
//
//Options matchOptions = new Options();
inputCells.setDescription("The cell sample file to be used for matching.");
mainOptions.addOption(inputCells);
Option lowThresh = Option.builder("low")
.longOpt("low-threshold")
.hasArg()
.argName("number")
.desc("Sets the minimum occupancy overlap to attempt matching")
.build();
mainOptions.addOption(lowThresh);
Option highThresh = Option.builder("high")
.longOpt("high-threshold")
.hasArg()
.argName("number")
.desc("Sets the maximum occupancy overlap to attempt matching")
.build();
mainOptions.addOption(highThresh);
Option occDiff = Option.builder("occdiff")
.longOpt("occupancy-difference")
.hasArg()
.argName("Number")
.desc("Maximum difference in alpha/beta occupancy to attempt matching")
.build();
mainOptions.addOption(occDiff);
Option overlapPer = Option.builder("ovper")
.longOpt("overlap-percent")
.hasArg()
.argName("Percent")
.desc("Minimum overlap percent to attempt matching (0 -100)")
.build();
mainOptions.addOption(overlapPer);
Option inputPlates = Option.builder("p")
.longOpt("plate-files")
.hasArgs()
.desc("Plate files to match")
.build();
mainOptions.addOption(inputPlates);
CommandLineParser parser = new DefaultParser();
try {
CommandLine line = parser.parse(mainOptions, args);
if(line.hasOption("match")){
//line = parser.parse(mainOptions, args);
//String cellFile = line.getOptionValue("c");
String graphFile = line.getOptionValue("g");
Integer lowThreshold = Integer.valueOf(line.getOptionValue(lowThresh));
Integer highThreshold = Integer.valueOf(line.getOptionValue(highThresh));
Integer occupancyDifference = Integer.valueOf(line.getOptionValue(occDiff));
Integer overlapPercent = Integer.valueOf(line.getOptionValue(overlapPer));
for(String plate: line.getOptionValues("p")) {
matchCDR3s(graphFile, lowThreshold, highThreshold, occupancyDifference, overlapPercent);
}
}
else if(line.hasOption("cells")){
//line = parser.parse(mainOptions, args);
String filename = line.getOptionValue("o");
Integer numDistCells = Integer.valueOf(line.getOptionValue("nc"));
Integer freq = Integer.valueOf(line.getOptionValue("d"));
makeCells(filename, numDistCells, freq);
}
else if(line.hasOption("plates")){
//line = parser.parse(mainOptions, args);
String cellFile = line.getOptionValue("c");
String filenamePrefix = line.getOptionValue("o");
Integer numWellsOnPlate = Integer.valueOf(line.getOptionValue("w"));
Integer numPlatesToMake = Integer.valueOf(line.getOptionValue("np"));
String[] concentrationsToUseString = line.getOptionValues("t");
Integer numSections = concentrationsToUseString.length;
Integer[] concentrationsToUse = new Integer[numSections];
for(int i = 0; i <numSections; i++){
concentrationsToUse[i] = Integer.valueOf(concentrationsToUseString[i]);
}
Double dropOutRate = Double.valueOf(line.getOptionValue("err"));
if(line.hasOption("exponential")){
Double lambda = Double.valueOf(line.getOptionValue("lambda"));
for(int i = 1; i <= numPlatesToMake; i++){
makePlateExp(cellFile, filenamePrefix + i, lambda, numWellsOnPlate,
concentrationsToUse,dropOutRate);
}
}
else if(line.hasOption("gaussian")){
Double stdDev = Double.valueOf(line.getOptionValue("std-dev"));
for(int i = 1; i <= numPlatesToMake; i++){
makePlate(cellFile, filenamePrefix + i, stdDev, numWellsOnPlate,
concentrationsToUse,dropOutRate);
}
}
else if(line.hasOption("poisson")){
for(int i = 1; i <= numPlatesToMake; i++){
makePlatePoisson(cellFile, filenamePrefix + i, numWellsOnPlate,
concentrationsToUse,dropOutRate);
}
}
}
}
catch (ParseException exp) {
System.err.println("Parsing failed. Reason: " + exp.getMessage());
}
}
//for calling from command line
public static void makeCells(String filename, Integer numCells, Integer cdr1Freq){
CellSample sample = Simulator.generateCellSample(numCells, cdr1Freq);
CellFileWriter writer = new CellFileWriter(filename, sample);
writer.writeCellsToFile();
}
public static void makePlateExp(String cellFile, String filename, Double lambda,
Integer numWells, Integer[] concentrations, Double dropOutRate){
CellFileReader cellReader = new CellFileReader(cellFile);
Plate samplePlate = new Plate(numWells, dropOutRate, concentrations);
samplePlate.fillWellsExponential(cellReader.getFilename(), cellReader.getListOfDistinctCellsDEPRECATED(), lambda);
PlateFileWriter writer = new PlateFileWriter(filename, samplePlate);
writer.writePlateFile();
}
private static void makePlatePoisson(String cellFile, String filename, Integer numWells,
Integer[] concentrations, Double dropOutRate){
CellFileReader cellReader = new CellFileReader(cellFile);
Double stdDev = Math.sqrt(cellReader.getCellCountDEPRECATED());
Plate samplePlate = new Plate(numWells, dropOutRate, concentrations);
samplePlate.fillWells(cellReader.getFilename(), cellReader.getListOfDistinctCellsDEPRECATED(), stdDev);
PlateFileWriter writer = new PlateFileWriter(filename, samplePlate);
writer.writePlateFile();
}
private static void makePlate(String cellFile, String filename, Double stdDev,
Integer numWells, Integer[] concentrations, Double dropOutRate){
CellFileReader cellReader = new CellFileReader(cellFile);
Plate samplePlate = new Plate(numWells, dropOutRate, concentrations);
samplePlate.fillWells(cellReader.getFilename(), cellReader.getListOfDistinctCellsDEPRECATED(), stdDev);
PlateFileWriter writer = new PlateFileWriter(filename, samplePlate);
writer.writePlateFile();
}
private static void matchCDR3s(String graphFile, Integer lowThreshold, Integer highThreshold,
Integer occupancyDifference, Integer overlapPercent) {
}
}

View File

@@ -4,6 +4,9 @@ import java.math.MathContext;
public abstract class Equations {
//pValue calculation as described in original pairSEQ paper.
//Included for comparison with original results.
//Not used by BiGpairSEQ for matching.
public static double pValue(Integer w, Integer w_a, Integer w_b, double w_ab_d) {
int w_ab = (int) w_ab_d;
double pv = 0.0;
@@ -14,6 +17,9 @@ public abstract class Equations {
return pv;
}
//Implementation of the (corrected) probability equation from pairSEQ paper.
//Included for comparison with original results.
//Not used by BiGpairSEQ for matching.
private static double probPairedByChance(Integer w, Integer w_a, Integer w_b, Integer w_ab){
BigInteger numer1 = choose(w, w_ab);
BigInteger numer2 = choose(w - w_ab, w_a - w_ab);
@@ -26,10 +32,9 @@ public abstract class Equations {
return prob.doubleValue();
}
/*
* This works because nC(k+1) = nCk * (n-k)/(k+1)
* Since nC0 = 1, can start there and generate all the rest.
*/
//This works because nC(k+1) = nCk * (n-k)/(k+1)
//Since nC0 = 1, can start there and generate all the rest.
public static BigInteger choose(final int N, final int K) {
BigInteger nCk = BigInteger.ONE;
for (int k = 0; k < K; k++) {

View File

@@ -0,0 +1,31 @@
import java.io.*;
public class GraphDataObjectReader {
private GraphWithMapData data;
private String filename;
public GraphDataObjectReader(String filename) throws IOException {
if(!filename.matches(".*\\.ser")){
filename = filename + ".ser";
}
this.filename = filename;
try(//don't need to close these because of try-with-resources
BufferedInputStream fileIn = new BufferedInputStream(new FileInputStream(filename));
ObjectInputStream in = new ObjectInputStream(fileIn))
{
System.out.println("Reading graph data from file. This may take some time");
System.out.println("File I/O time is not included in results");
data = (GraphWithMapData) in.readObject();
} catch (FileNotFoundException | ClassNotFoundException ex) {
ex.printStackTrace();
}
}
public GraphWithMapData getData() {
return data;
}
public String getFilename() {
return filename;
}
}

View File

@@ -0,0 +1,31 @@
import java.io.BufferedOutputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.ObjectOutputStream;
public class GraphDataObjectWriter {
private GraphWithMapData data;
private String filename;
public GraphDataObjectWriter(String filename, GraphWithMapData data) {
if(!filename.matches(".*\\.ser")){
filename = filename + ".ser";
}
this.filename = filename;
this.data = data;
}
public void writeDataToFile() {
try (BufferedOutputStream bufferedOut = new BufferedOutputStream(new FileOutputStream(filename));
ObjectOutputStream out = new ObjectOutputStream(bufferedOut);
){
System.out.println("Writing graph and occupancy data to file. This may take some time.");
System.out.println("File I/O time is not included in results.");
out.writeObject(data);
} catch (IOException ex) {
ex.printStackTrace();
}
}
}

View File

@@ -0,0 +1,35 @@
import org.jgrapht.graph.SimpleWeightedGraph;
import org.jgrapht.nio.graphml.GraphMLImporter;
import java.io.BufferedReader;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
public class GraphMLFileReader {
private String filename;
private SimpleWeightedGraph graph;
public GraphMLFileReader(String filename, SimpleWeightedGraph graph) {
if(!filename.matches(".*\\.graphml")){
filename = filename + ".graphml";
}
this.filename = filename;
this.graph = graph;
try(//don't need to close reader bc of try-with-resources auto-closing
BufferedReader reader = Files.newBufferedReader(Path.of(filename));
){
GraphMLImporter<SimpleWeightedGraph, BufferedReader> importer = new GraphMLImporter<>();
importer.importGraph(graph, reader);
}
catch (IOException ex) {
System.out.println("Graph file " + filename + " not found.");
System.err.println(ex);
}
}
public SimpleWeightedGraph getGraph() { return graph; }
}

View File

@@ -0,0 +1,35 @@
import org.jgrapht.graph.SimpleWeightedGraph;
import org.jgrapht.nio.dot.DOTExporter;
import org.jgrapht.nio.graphml.GraphMLExporter;
import java.io.BufferedWriter;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
public class GraphMLFileWriter {
String filename;
SimpleWeightedGraph graph;
public GraphMLFileWriter(String filename, SimpleWeightedGraph graph) {
if(!filename.matches(".*\\.graphml")){
filename = filename + ".graphml";
}
this.filename = filename;
this.graph = graph;
}
public void writeGraphToFile() {
try(BufferedWriter writer = Files.newBufferedWriter(Path.of(filename), StandardOpenOption.CREATE_NEW);
){
GraphMLExporter<SimpleWeightedGraph, BufferedWriter> exporter = new GraphMLExporter<>();
exporter.exportGraph(graph, writer);
} catch(IOException ex){
System.out.println("Could not make new file named "+filename);
System.err.println(ex);
}
}
}

View File

@@ -0,0 +1,90 @@
import org.jgrapht.graph.DefaultWeightedEdge;
import org.jgrapht.graph.SimpleWeightedGraph;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Set;
public abstract class GraphModificationFunctions {
//remove over- and under-weight edges
public static List<Integer[]> filterByOverlapThresholds(SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph,
int low, int high) {
List<Integer[]> removedEdges = new ArrayList<>();
for(DefaultWeightedEdge e: graph.edgeSet()){
if ((graph.getEdgeWeight(e) > high) || (graph.getEdgeWeight(e) < low)){
Integer source = graph.getEdgeSource(e);
Integer target = graph.getEdgeTarget(e);
Integer weight = (int) graph.getEdgeWeight(e);
Integer[] edge = {source, target, weight};
removedEdges.add(edge);
}
}
for (Integer[] edge : removedEdges) {
graph.removeEdge(edge[0], edge[1]);
}
return removedEdges;
}
//Remove edges for pairs with large occupancy discrepancy
public static List<Integer[]> filterByRelativeOccupancy(SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph,
Map<Integer, Integer> alphaWellCounts,
Map<Integer, Integer> betaWellCounts,
Map<Integer, Integer> plateVtoAMap,
Map<Integer, Integer> plateVtoBMap,
Integer maxOccupancyDifference) {
List<Integer[]> removedEdges = new ArrayList<>();
for (DefaultWeightedEdge e : graph.edgeSet()) {
Integer alphaOcc = alphaWellCounts.get(plateVtoAMap.get(graph.getEdgeSource(e)));
Integer betaOcc = betaWellCounts.get(plateVtoBMap.get(graph.getEdgeTarget(e)));
if (Math.abs(alphaOcc - betaOcc) >= maxOccupancyDifference) {
Integer source = graph.getEdgeSource(e);
Integer target = graph.getEdgeTarget(e);
Integer weight = (int) graph.getEdgeWeight(e);
Integer[] edge = {source, target, weight};
removedEdges.add(edge);
}
}
for (Integer[] edge : removedEdges) {
graph.removeEdge(edge[0], edge[1]);
}
return removedEdges;
}
//Remove edges for pairs where overlap size is significantly lower than the well occupancy
public static List<Integer[]> filterByOverlapPercent(SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph,
Map<Integer, Integer> alphaWellCounts,
Map<Integer, Integer> betaWellCounts,
Map<Integer, Integer> plateVtoAMap,
Map<Integer, Integer> plateVtoBMap,
Integer minOverlapPercent) {
List<Integer[]> removedEdges = new ArrayList<>();
for (DefaultWeightedEdge e : graph.edgeSet()) {
Integer alphaOcc = alphaWellCounts.get(plateVtoAMap.get(graph.getEdgeSource(e)));
Integer betaOcc = betaWellCounts.get(plateVtoBMap.get(graph.getEdgeTarget(e)));
double weight = graph.getEdgeWeight(e);
double min = minOverlapPercent / 100.0;
if ((weight / alphaOcc < min) || (weight / betaOcc < min)) {
Integer source = graph.getEdgeSource(e);
Integer target = graph.getEdgeTarget(e);
Integer intWeight = (int) graph.getEdgeWeight(e);
Integer[] edge = {source, target, intWeight};
removedEdges.add(edge);
}
}
for (Integer[] edge : removedEdges) {
graph.removeEdge(edge[0], edge[1]);
}
return removedEdges;
}
public static void addRemovedEdges(SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph,
List<Integer[]> removedEdges) {
for (Integer[] edge : removedEdges) {
DefaultWeightedEdge e = graph.addEdge(edge[0], edge[1]);
graph.setEdgeWeight(e, (double) edge[2]);
}
}
}

View File

@@ -0,0 +1,106 @@
import org.jgrapht.graph.SimpleWeightedGraph;
import java.time.Duration;
import java.util.Map;
//Can't just write the graph, because I need the occupancy data too.
//Makes most sense to serialize object and write that to a file.
//Which means there's no reason to split map data and graph data up.
public class GraphWithMapData implements java.io.Serializable {
private String sourceFilename;
private final SimpleWeightedGraph graph;
private Integer numWells;
private Integer[] wellPopulations;
private Integer alphaCount;
private Integer betaCount;
private final Map<Integer, Integer> distCellsMapAlphaKey;
private final Map<Integer, Integer> plateVtoAMap;
private final Map<Integer, Integer> plateVtoBMap;
private final Map<Integer, Integer> plateAtoVMap;
private final Map<Integer, Integer> plateBtoVMap;
private final Map<Integer, Integer> alphaWellCounts;
private final Map<Integer, Integer> betaWellCounts;
private final Duration time;
public GraphWithMapData(SimpleWeightedGraph graph, Integer numWells, Integer[] wellConcentrations,
Integer alphaCount, Integer betaCount,
Map<Integer, Integer> distCellsMapAlphaKey, Map<Integer, Integer> plateVtoAMap,
Map<Integer,Integer> plateVtoBMap, Map<Integer, Integer> plateAtoVMap,
Map<Integer, Integer> plateBtoVMap, Map<Integer, Integer> alphaWellCounts,
Map<Integer, Integer> betaWellCounts, Duration time) {
this.graph = graph;
this.numWells = numWells;
this.wellPopulations = wellConcentrations;
this.alphaCount = alphaCount;
this.betaCount = betaCount;
this.distCellsMapAlphaKey = distCellsMapAlphaKey;
this.plateVtoAMap = plateVtoAMap;
this.plateVtoBMap = plateVtoBMap;
this.plateAtoVMap = plateAtoVMap;
this.plateBtoVMap = plateBtoVMap;
this.alphaWellCounts = alphaWellCounts;
this.betaWellCounts = betaWellCounts;
this.time = time;
}
public SimpleWeightedGraph getGraph() {
return graph;
}
public Integer getNumWells() {
return numWells;
}
public Integer[] getWellPopulations() {
return wellPopulations;
}
public Integer getAlphaCount() {
return alphaCount;
}
public Integer getBetaCount() {
return betaCount;
}
public Map<Integer, Integer> getDistCellsMapAlphaKey() {
return distCellsMapAlphaKey;
}
public Map<Integer, Integer> getPlateVtoAMap() {
return plateVtoAMap;
}
public Map<Integer, Integer> getPlateVtoBMap() {
return plateVtoBMap;
}
public Map<Integer, Integer> getPlateAtoVMap() {
return plateAtoVMap;
}
public Map<Integer, Integer> getPlateBtoVMap() {
return plateBtoVMap;
}
public Map<Integer, Integer> getAlphaWellCounts() {
return alphaWellCounts;
}
public Map<Integer, Integer> getBetaWellCounts() {
return betaWellCounts;
}
public Duration getTime() {
return time;
}
public void setSourceFilename(String filename) {
this.sourceFilename = filename;
}
public String getSourceFilename() {
return sourceFilename;
}
}

View File

@@ -0,0 +1,510 @@
import java.io.IOException;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
//
public class InteractiveInterface {
private static final Random rand = BiGpairSEQ.getRand();
private static final Scanner sc = new Scanner(System.in);
private static int input;
private static boolean quit = false;
public static void startInteractive() {
while (!quit) {
System.out.println();
System.out.println("--------BiGPairSEQ SIMULATOR--------");
System.out.println("ALPHA/BETA T CELL RECEPTOR MATCHING");
System.out.println(" USING WEIGHTED BIPARTITE GRAPHS ");
System.out.println("------------------------------------");
System.out.println("Please select an option:");
System.out.println("1) Generate a population of distinct cells");
System.out.println("2) Generate a sample plate of T cells");
System.out.println("3) Generate CDR3 alpha/beta occupancy data and overlap graph");
System.out.println("4) Simulate bipartite graph CDR3 alpha/beta matching (BiGpairSEQ)");
//Need to re-do the CDR3/CDR1 matching to correspond to new pattern
//System.out.println("5) Generate CDR3/CDR1 occupancy graph");
//System.out.println("6) Simulate CDR3/CDR1 T cell matching");
System.out.println("9) About/Acknowledgments");
System.out.println("0) Exit");
try {
input = sc.nextInt();
switch (input) {
case 1 -> makeCells();
case 2 -> makePlate();
case 3 -> makeCDR3Graph();
case 4 -> matchCDR3s();
//case 6 -> matchCellsCDR1();
case 9 -> acknowledge();
case 0 -> quit = true;
default -> throw new InputMismatchException("Invalid input.");
}
} catch (InputMismatchException | IOException ex) {
System.out.println(ex);
sc.next();
}
}
sc.close();
}
private static void makeCells() {
String filename = null;
Integer numCells = 0;
Integer cdr1Freq = 1;
try {
System.out.println("\nSimulated T-Cells consist of integer values representing:\n" +
"* a pair of alpha and beta CDR3 peptides (unique within simulated population)\n" +
"* a pair of alpha and beta CDR1 peptides (not necessarily unique).");
System.out.println("\nThe cells will be written to a CSV file.");
System.out.print("Please enter a file name: ");
filename = sc.next();
System.out.println("\nCDR3 sequences are more diverse than CDR1 sequences.");
System.out.println("Please enter the factor by which distinct CDR3s outnumber CDR1s: ");
cdr1Freq = sc.nextInt();
System.out.print("\nPlease enter the number of T-cells to generate: ");
numCells = sc.nextInt();
if(numCells <= 0){
throw new InputMismatchException("Number of cells must be a positive integer.");
}
} catch (InputMismatchException ex) {
System.out.println(ex);
sc.next();
}
CellSample sample = Simulator.generateCellSample(numCells, cdr1Freq);
assert filename != null;
System.out.println("Writing cells to file");
CellFileWriter writer = new CellFileWriter(filename, sample);
writer.writeCellsToFile();
System.out.println("Cell sample written to: " + filename);
if(BiGpairSEQ.getCellSampleInMemory() != null) {
BiGpairSEQ.clearCellSampleInMemory();
}
BiGpairSEQ.setCellSampleInMemory(sample);
BiGpairSEQ.setCellFilename(filename);
}
//Output a CSV of sample plate
private static void makePlate() {
String cellFile = null;
String filename = null;
Double stdDev = 0.0;
Integer numWells = 0;
Integer numSections;
Integer[] populations = {1};
Double dropOutRate = 0.0;
boolean poisson = false;
boolean exponential = false;
double lambda = 1.5;
try {
System.out.println("\nSimulated sample plates consist of:");
System.out.println("* a number of wells");
System.out.println(" * separated into one or more sections");
System.out.println(" * each of which has a set quantity of cells per well");
System.out.println(" * selected from a statistical distribution of distinct cells");
System.out.println(" * with a set dropout rate for individual sequences within a cell");
System.out.println("\nMaking a sample plate requires a population of distinct cells");
System.out.print("Please enter name of an existing cell sample file: ");
cellFile = sc.next();
System.out.println("\nThe sample plate will be written to a CSV file");
System.out.print("Please enter a name for the output file: ");
filename = sc.next();
System.out.println("\nSelect T-cell frequency distribution function");
System.out.println("1) Poisson");
System.out.println("2) Gaussian");
System.out.println("3) Exponential");
System.out.println("(Note: approximate distribution in original paper is exponential, lambda = 0.6)");
System.out.println("(lambda value approximated from slope of log-log graph in figure 4c)");
System.out.println("(Note: wider distributions are more memory intensive to match)");
System.out.print("Enter selection value: ");
input = sc.nextInt();
switch (input) {
case 1 -> poisson = true;
case 2 -> {
System.out.println("How many distinct T-cells within one standard deviation of peak frequency?");
System.out.println("(Note: wider distributions are more memory intensive to match)");
stdDev = sc.nextDouble();
if (stdDev <= 0.0) {
throw new InputMismatchException("Value must be positive.");
}
}
case 3 -> {
exponential = true;
System.out.print("Please enter lambda value for exponential distribution: ");
lambda = sc.nextDouble();
if (lambda <= 0.0) {
lambda = 0.6;
System.out.println("Value must be positive. Defaulting to 0.6.");
}
}
default -> {
System.out.println("Invalid input. Defaulting to exponential.");
exponential = true;
}
}
System.out.print("\nNumber of wells on plate: ");
numWells = sc.nextInt();
if(numWells < 1){
throw new InputMismatchException("No wells on plate");
}
//choose whether to make T cell population/well random
boolean randomWellPopulations;
System.out.println("Randomize number of T cells in each well? (y/n)");
String ans = sc.next();
Pattern pattern = Pattern.compile("(?:yes|y)", Pattern.CASE_INSENSITIVE);
Matcher matcher = pattern.matcher(ans);
if(matcher.matches()){
randomWellPopulations = true;
}
else{
randomWellPopulations = false;
}
if(randomWellPopulations) { //if T cell population/well is random
numSections = numWells;
Integer minPop;
Integer maxPop;
System.out.print("Please enter minimum number of T cells in a well: ");
minPop = sc.nextInt();
if(minPop < 1) {
throw new InputMismatchException("Minimum well population must be positive");
}
System.out.println("Please enter maximum number of T cells in a well: ");
maxPop = sc.nextInt();
if(maxPop < minPop) {
throw new InputMismatchException("Max well population must be greater than min well population");
}
//maximum should be inclusive, so need to add one to max of randomly generated values
populations = rand.ints(minPop, maxPop + 1)
.limit(numSections)
.boxed()
.toArray(Integer[]::new);
System.out.print("Populations: ");
System.out.println(Arrays.toString(populations));
}
else{ //if T cell population/well is not random
System.out.println("\nThe plate can be evenly sectioned to allow different numbers of T cells per well.");
System.out.println("How many sections would you like to make (minimum 1)?");
numSections = sc.nextInt();
if (numSections < 1) {
throw new InputMismatchException("Too few sections.");
} else if (numSections > numWells) {
throw new InputMismatchException("Cannot have more sections than wells.");
}
int i = 1;
populations = new Integer[numSections];
while (numSections > 0) {
System.out.print("Enter number of T cells per well in section " + i + ": ");
populations[i - 1] = sc.nextInt();
i++;
numSections--;
}
}
System.out.println("\nErrors in amplification can induce a well dropout rate for sequences");
System.out.print("Enter well dropout rate (0.0 to 1.0): ");
dropOutRate = sc.nextDouble();
if(dropOutRate < 0.0 || dropOutRate > 1.0) {
throw new InputMismatchException("The well dropout rate must be in the range [0.0, 1.0]");
}
}catch(InputMismatchException ex){
System.out.println(ex);
sc.next();
}
assert cellFile != null;
CellSample cells;
if (cellFile.equals(BiGpairSEQ.getCellFilename())){
cells = BiGpairSEQ.getCellSampleInMemory();
}
else {
System.out.println("Reading Cell Sample file: " + cellFile);
CellFileReader cellReader = new CellFileReader(cellFile);
cells = cellReader.getCellSample();
BiGpairSEQ.clearCellSampleInMemory();
BiGpairSEQ.setCellSampleInMemory(cells);
BiGpairSEQ.setCellFilename(cellFile);
}
assert filename != null;
Plate samplePlate;
PlateFileWriter writer;
if(exponential){
samplePlate = new Plate(numWells, dropOutRate, populations);
samplePlate.fillWellsExponential(cellFile, cells.getCells(), lambda);
writer = new PlateFileWriter(filename, samplePlate);
}
else {
if (poisson) {
stdDev = Math.sqrt(cells.getCellCount()); //gaussian with square root of elements approximates poisson
}
samplePlate = new Plate(numWells, dropOutRate, populations);
samplePlate.fillWells(cellFile, cells.getCells(), stdDev);
writer = new PlateFileWriter(filename, samplePlate);
}
System.out.println("Writing Sample Plate to file");
writer.writePlateFile();
System.out.println("Sample Plate written to file: " + filename);
BiGpairSEQ.setPlateInMemory(samplePlate);
BiGpairSEQ.setPlateFilename(filename);
}
//Output serialized binary of GraphAndMapData object
private static void makeCDR3Graph() {
String filename = null;
String cellFile = null;
String plateFile = null;
try {
String str = "\nGenerating bipartite weighted graph encoding occupancy overlap data ";
str = str.concat("\nrequires a cell sample file and a sample plate file.");
System.out.println(str);
System.out.print("\nPlease enter name of an existing cell sample file: ");
cellFile = sc.next();
System.out.print("\nPlease enter name of an existing sample plate file: ");
plateFile = sc.next();
System.out.println("\nThe graph and occupancy data will be written to a serialized binary file.");
System.out.print("Please enter a name for the output file: ");
filename = sc.next();
} catch (InputMismatchException ex) {
System.out.println(ex);
sc.next();
}
assert cellFile != null;
CellSample cellSample;
//check if cells are already in memory
if(cellFile.equals(BiGpairSEQ.getCellFilename())) {
cellSample = BiGpairSEQ.getCellSampleInMemory();
}
else {
BiGpairSEQ.clearCellSampleInMemory();
System.out.println("Reading Cell Sample file: " + cellFile);
CellFileReader cellReader = new CellFileReader(cellFile);
cellSample = cellReader.getCellSample();
BiGpairSEQ.setCellSampleInMemory(cellSample);
BiGpairSEQ.setCellFilename(cellFile);
}
assert plateFile != null;
Plate plate;
//check if plate is already in memory
if(plateFile.equals(BiGpairSEQ.getPlateFilename())){
plate = BiGpairSEQ.getPlateInMemory();
}
else {
BiGpairSEQ.clearPlateInMemory();
System.out.println("Reading Sample Plate file: " + plateFile);
PlateFileReader plateReader = new PlateFileReader(plateFile);
plate = new Plate(plateReader.getFilename(), plateReader.getWells());
BiGpairSEQ.setPlateInMemory(plate);
BiGpairSEQ.setPlateFilename(plateFile);
}
if (cellSample.getCells().size() == 0){
System.out.println("No cell sample found.");
System.out.println("Returning to main menu.");
}
else if(plate.getWells().size() == 0 || plate.getPopulations().length == 0){
System.out.println("No sample plate found.");
System.out.println("Returning to main menu.");
}
else{
List<Integer[]> cells = cellSample.getCells();
GraphWithMapData data = Simulator.makeGraph(cells, plate, true);
assert filename != null;
GraphDataObjectWriter dataWriter = new GraphDataObjectWriter(filename, data);
dataWriter.writeDataToFile();
System.out.println("Graph and Data file written to: " + filename);
BiGpairSEQ.setGraphInMemory(data);
BiGpairSEQ.setGraphFilename(filename);
System.out.println("Graph and Data file " + filename + " cached.");
}
}
//Simulate matching and output CSV file of results
private static void matchCDR3s() throws IOException {
String filename = null;
String graphFilename = null;
Integer lowThreshold = 0;
Integer highThreshold = Integer.MAX_VALUE;
Integer maxOccupancyDiff = Integer.MAX_VALUE;
Integer minOverlapPercent = 0;
try {
System.out.println("\nBiGpairSEQ simulation requires an occupancy data and overlap graph file");
System.out.println("Please enter name of an existing graph and occupancy data file: ");
graphFilename = sc.next();
System.out.println("The matching results will be written to a file.");
System.out.print("Please enter a name for the output file: ");
filename = sc.next();
System.out.println("\nWhat is the minimum number of CDR3 alpha/beta overlap wells to attempt matching?");
lowThreshold = sc.nextInt();
if(lowThreshold < 1){
lowThreshold = 1;
System.out.println("Value for low occupancy overlap threshold must be positive");
System.out.println("Value for low occupancy overlap threshold set to 1");
}
System.out.println("\nWhat is the maximum number of CDR3 alpha/beta overlap wells to attempt matching?");
highThreshold = sc.nextInt();
if(highThreshold < lowThreshold) {
highThreshold = lowThreshold;
System.out.println("Value for high occupancy overlap threshold must be >= low overlap threshold");
System.out.println("Value for high occupancy overlap threshold set to " + lowThreshold);
}
System.out.println("What is the minimum percentage of a sequence's wells in alpha/beta overlap to attempt matching? (0 - 100)");
minOverlapPercent = sc.nextInt();
if (minOverlapPercent < 0 || minOverlapPercent > 100) {
System.out.println("Value outside range. Minimum occupancy overlap percentage set to 0");
}
System.out.println("\nWhat is the maximum difference in alpha/beta occupancy to attempt matching?");
maxOccupancyDiff = sc.nextInt();
if (maxOccupancyDiff < 0) {
maxOccupancyDiff = 0;
System.out.println("Maximum allowable difference in alpha/beta occupancy must be nonnegative");
System.out.println("Maximum allowable difference in alpha/beta occupancy set to 0");
}
} catch (InputMismatchException ex) {
System.out.println(ex);
sc.next();
}
assert graphFilename != null;
//check if this is the same graph we already have in memory.
GraphWithMapData data;
if(!(graphFilename.equals(BiGpairSEQ.getGraphFilename())) || BiGpairSEQ.getGraphInMemory() == null) {
BiGpairSEQ.clearGraphInMemory();
//read object data from file
GraphDataObjectReader dataReader = new GraphDataObjectReader(graphFilename);
data = dataReader.getData();
//set new graph in memory and new filename
BiGpairSEQ.setGraphInMemory(data);
BiGpairSEQ.setGraphFilename(graphFilename);
}
else {
data = BiGpairSEQ.getGraphInMemory();
}
//simulate matching
MatchingResult results = Simulator.matchCDR3s(data, graphFilename, lowThreshold, highThreshold, maxOccupancyDiff,
minOverlapPercent, true);
//write results to file
assert filename != null;
MatchingFileWriter writer = new MatchingFileWriter(filename, results);
System.out.println("Writing results to file");
writer.writeResultsToFile();
System.out.println("Results written to file: " + filename);
}
///////
//Rewrite this to fit new matchCDR3 method with file I/O
///////
// public static void matchCellsCDR1(){
// /*
// The idea here is that we'll get the CDR3 alpha/beta matches first. Then we'll try to match CDR3s to CDR1s by
// looking at the top two matches for each CDR3. If CDR3s in the same cell simply swap CDR1s, we assume a correct
// match
// */
// String filename = null;
// String preliminaryResultsFilename = null;
// String cellFile = null;
// String plateFile = null;
// Integer lowThresholdCDR3 = 0;
// Integer highThresholdCDR3 = Integer.MAX_VALUE;
// Integer maxOccupancyDiffCDR3 = 96; //no filtering if max difference is all wells by default
// Integer minOverlapPercentCDR3 = 0; //no filtering if min percentage is zero by default
// Integer lowThresholdCDR1 = 0;
// Integer highThresholdCDR1 = Integer.MAX_VALUE;
// boolean outputCDR3Matches = false;
// try {
// System.out.println("\nSimulated experiment requires a cell sample file and a sample plate file.");
// System.out.print("Please enter name of an existing cell sample file: ");
// cellFile = sc.next();
// System.out.print("Please enter name of an existing sample plate file: ");
// plateFile = sc.next();
// System.out.println("The matching results will be written to a file.");
// System.out.print("Please enter a name for the output file: ");
// filename = sc.next();
// System.out.println("What is the minimum number of CDR3 alpha/beta overlap wells to attempt matching?");
// lowThresholdCDR3 = sc.nextInt();
// if(lowThresholdCDR3 < 1){
// throw new InputMismatchException("Minimum value for low threshold is 1");
// }
// System.out.println("What is the maximum number of CDR3 alpha/beta overlap wells to attempt matching?");
// highThresholdCDR3 = sc.nextInt();
// System.out.println("What is the maximum difference in CDR3 alpha/beta occupancy to attempt matching?");
// maxOccupancyDiffCDR3 = sc.nextInt();
// System.out.println("What is the minimum CDR3 overlap percentage to attempt matching? (0 - 100)");
// minOverlapPercentCDR3 = sc.nextInt();
// if (minOverlapPercentCDR3 < 0 || minOverlapPercentCDR3 > 100) {
// throw new InputMismatchException("Value outside range. Minimum percent set to 0");
// }
// System.out.println("What is the minimum number of CDR3/CDR1 overlap wells to attempt matching?");
// lowThresholdCDR1 = sc.nextInt();
// if(lowThresholdCDR1 < 1){
// throw new InputMismatchException("Minimum value for low threshold is 1");
// }
// System.out.println("What is the maximum number of CDR3/CDR1 overlap wells to attempt matching?");
// highThresholdCDR1 = sc.nextInt();
// System.out.println("Matching CDR3s to CDR1s requires first matching CDR3 alpha/betas.");
// System.out.println("Output a file for CDR3 alpha/beta match results as well?");
// System.out.print("Please enter y/n: ");
// String ans = sc.next();
// Pattern pattern = Pattern.compile("(?:yes|y)", Pattern.CASE_INSENSITIVE);
// Matcher matcher = pattern.matcher(ans);
// if(matcher.matches()){
// outputCDR3Matches = true;
// System.out.println("Please enter filename for CDR3 alpha/beta match results");
// preliminaryResultsFilename = sc.next();
// System.out.println("CDR3 alpha/beta matches will be output to file");
// }
// else{
// System.out.println("CDR3 alpha/beta matches will not be output to file");
// }
// } catch (InputMismatchException ex) {
// System.out.println(ex);
// sc.next();
// }
// CellFileReader cellReader = new CellFileReader(cellFile);
// PlateFileReader plateReader = new PlateFileReader(plateFile);
// Plate plate = new Plate(plateReader.getFilename(), plateReader.getWells());
// if (cellReader.getCells().size() == 0){
// System.out.println("No cell sample found.");
// System.out.println("Returning to main menu.");
// }
// else if(plate.getWells().size() == 0){
// System.out.println("No sample plate found.");
// System.out.println("Returning to main menu.");
//
// }
// else{
// if(highThresholdCDR3 >= plate.getSize()){
// highThresholdCDR3 = plate.getSize() - 1;
// }
// if(highThresholdCDR1 >= plate.getSize()){
// highThresholdCDR1 = plate.getSize() - 1;
// }
// List<Integer[]> cells = cellReader.getCells();
// MatchingResult preliminaryResults = Simulator.matchCDR3s(cells, plate, lowThresholdCDR3, highThresholdCDR3,
// maxOccupancyDiffCDR3, minOverlapPercentCDR3, true);
// MatchingResult[] results = Simulator.matchCDR1s(cells, plate, lowThresholdCDR1,
// highThresholdCDR1, preliminaryResults);
// MatchingFileWriter writer = new MatchingFileWriter(filename + "_FirstPass", results[0]);
// writer.writeResultsToFile();
// writer = new MatchingFileWriter(filename + "_SecondPass", results[1]);
// writer.writeResultsToFile();
// if(outputCDR3Matches){
// writer = new MatchingFileWriter(preliminaryResultsFilename, preliminaryResults);
// writer.writeResultsToFile();
// }
// }
// }
private static void acknowledge(){
System.out.println("This program simulates BiGpairSEQ, a graph theory based adaptation");
System.out.println("of the pairSEQ algorithm for pairing T cell receptor sequences.");
System.out.println();
System.out.println("For full documentation, view readme.md file distributed with this code");
System.out.println("or visit https://gitea.ejsf.synology.me/efischer/BiGpairSEQ.");
System.out.println();
System.out.println("pairSEQ citation:");
System.out.println("Howie, B., Sherwood, A. M., et. al.");
System.out.println("High-throughput pairing of T cell receptor alpha and beta sequences.");
System.out.println("Sci. Transl. Med. 7, 301ra131 (2015)");
System.out.println();
System.out.println("BiGpairSEQ_Sim by Eugene Fischer, 2021-2022");
}
}

View File

@@ -0,0 +1,3 @@
Manifest-Version: 1.0
Main-Class: BiGpairSEQ

View File

@@ -7,13 +7,10 @@ import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
import java.util.List;
import java.util.regex.Pattern;
public class MatchingFileWriter {
private String filename;
private String sourceFileName;
private List<String> comments;
private List<String> headers;
private List<List<String>> allResults;
@@ -23,7 +20,6 @@ public class MatchingFileWriter {
filename = filename + ".csv";
}
this.filename = filename;
this.sourceFileName = result.getSourceFileName();
this.comments = result.getComments();
this.headers = result.getHeaders();
this.allResults = result.getAllResults();

View File

@@ -1,18 +1,41 @@
import java.time.Duration;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
public class MatchingResult {
private String sourceFile;
private List<String> comments;
private List<String> headers;
private List<List<String>> allResults;
private Map<Integer, Integer> matchMap;
private Duration time;
public MatchingResult(String sourceFileName, List<String> comments, List<String> headers, List<List<String>> allResults, Map<Integer, Integer>matchMap, Duration time){
this.sourceFile = sourceFileName;
this.comments = comments;
private final Map<String, String> metadata;
private final List<String> comments;
private final List<String> headers;
private final List<List<String>> allResults;
private final Map<Integer, Integer> matchMap;
private final Duration time;
public MatchingResult(Map<String, String> metadata, List<String> headers,
List<List<String>> allResults, Map<Integer, Integer>matchMap, Duration time){
/*
* POSSIBLE KEYS FOR METADATA MAP ARE:
* sample plate filename *
* graph filename *
* well populations *
* total alphas found *
* total betas found *
* high overlap threshold
* low overlap threshold
* maximum occupancy difference
* minimum overlap percent
* pairing attempt rate
* correct pairing count
* incorrect pairing count
* pairing error rate
* simulation time
*/
this.metadata = metadata;
this.comments = new ArrayList<>();
for (String key : metadata.keySet()) {
comments.add(key +": " + metadata.get(key));
}
this.headers = headers;
this.allResults = allResults;
this.matchMap = matchMap;
@@ -20,6 +43,8 @@ public class MatchingResult {
}
public Map<String, String> getMetadata() {return metadata;}
public List<String> getComments() {
return comments;
}
@@ -40,7 +65,32 @@ public class MatchingResult {
return time;
}
public String getSourceFileName() {
return sourceFile;
public String getPlateFilename() {
return metadata.get("sample plate filename");
}
public String getGraphFilename() {
return metadata.get("graph filename");
}
public Integer[] getWellPopulations() {
List<Integer> wellPopulations = new ArrayList<>();
String popString = metadata.get("well populations");
for (String p : popString.split(", ")) {
wellPopulations.add(Integer.parseInt(p));
}
Integer[] popArray = new Integer[wellPopulations.size()];
return wellPopulations.toArray(popArray);
}
public Integer getAlphaCount() {
return Integer.parseInt(metadata.get("total alpha count"));
}
public Integer getBetaCount() {
return Integer.parseInt(metadata.get("total beta count"));
}
//put in the rest of these methods following the same pattern
}

View File

@@ -1,24 +1,28 @@
import java.util.*;
/*
TODO: Implement exponential distribution using inversion method - DONE
TODO: Implement discrete frequency distributions using Vose's Alias Method
*/
import java.util.*;
public class Plate {
private String sourceFile;
private List<List<Integer[]>> wells;
private Random rand = new Random();
private final Random rand = BiGpairSEQ.getRand();
private int size;
private double error;
private Integer[] concentrations;
private Integer[] populations;
private double stdDev;
private double lambda;
boolean exponential = false;
public Plate (int size, double error, Integer[] concentrations, double stdDev) {
public Plate(int size, double error, Integer[] populations) {
this.size = size;
this.error = error;
this.concentrations = concentrations;
this.stdDev = stdDev;
this.populations = populations;
wells = new ArrayList<>();
}
@@ -26,25 +30,39 @@ public class Plate {
this.sourceFile = sourceFileName;
this.wells = wells;
this.size = wells.size();
List<Integer> concentrations = new ArrayList<>();
for (List<Integer[]> w: wells) {
if(!concentrations.contains(w.size())){
concentrations.add(w.size());
}
}
this.populations = new Integer[concentrations.size()];
for (int i = 0; i < this.populations.length; i++) {
this.populations[i] = concentrations.get(i);
}
}
public void fillWells(String sourceFileName, List<Integer[]> cells) {
public void fillWellsExponential(String sourceFileName, List<Integer[]> cells, double lambda){
this.lambda = lambda;
exponential = true;
sourceFile = sourceFileName;
int numSections = concentrations.length;
int numSections = populations.length;
int section = 0;
double m;
int n;
while (section < numSections){
for (int i = 0; i < (size / numSections); i++) {
List<Integer[]> well = new ArrayList<>();
for (int j = 0; j < concentrations[section]; j++) {
for (int j = 0; j < populations[section]; j++) {
do {
m = (rand.nextGaussian() * stdDev) + (cells.size() / 2);
//inverse transform sampling: for random number u in [0,1), x = log(1-u) / (-lambda)
m = (Math.log10((1 - rand.nextDouble()))/(-lambda)) * Math.sqrt(cells.size());
} while (m >= cells.size() || m < 0);
n = (int) Math.floor(m);
Integer[] cellToAdd = cells.get(n).clone();
for(int k = 0; k < cellToAdd.length; k++){
if(Math.abs(rand.nextDouble()) < error){//error applied to each peptide
if(Math.abs(rand.nextDouble()) < error){//error applied to each seqeunce
cellToAdd[k] = -1;
}
}
@@ -56,8 +74,37 @@ public class Plate {
}
}
public Integer[] getConcentrations(){
return concentrations;
public void fillWells(String sourceFileName, List<Integer[]> cells, double stdDev) {
this.stdDev = stdDev;
sourceFile = sourceFileName;
int numSections = populations.length;
int section = 0;
double m;
int n;
while (section < numSections){
for (int i = 0; i < (size / numSections); i++) {
List<Integer[]> well = new ArrayList<>();
for (int j = 0; j < populations[section]; j++) {
do {
m = (rand.nextGaussian() * stdDev) + (cells.size() / 2);
} while (m >= cells.size() || m < 0);
n = (int) Math.floor(m);
Integer[] cellToAdd = cells.get(n).clone();
for(int k = 0; k < cellToAdd.length; k++){
if(Math.abs(rand.nextDouble()) < error){//error applied to each sequence
cellToAdd[k] = -1;
}
}
well.add(cellToAdd);
}
wells.add(well);
}
section++;
}
}
public Integer[] getPopulations(){
return populations;
}
public int getSize(){
@@ -68,6 +115,10 @@ public class Plate {
return stdDev;
}
public boolean isExponential(){return exponential;}
public double getLambda(){return lambda;}
public double getError() {
return error;
}
@@ -76,30 +127,30 @@ public class Plate {
return wells;
}
//returns a map of the counts of the peptide at cell index pIndex, in all wells
public Map<Integer, Integer> assayWellsPeptideP(int... pIndices){
return this.assayWellsPeptideP(0, size, pIndices);
//returns a map of the counts of the sequence at cell index sIndex, in all wells
public Map<Integer, Integer> assayWellsSequenceS(int... sIndices){
return this.assayWellsSequenceS(0, size, sIndices);
}
//returns a map of the counts of the peptide at cell index pIndex, in a specific well
public Map<Integer, Integer> assayWellsPeptideP(int n, int... pIndices) { return this.assayWellsPeptideP(n, n+1, pIndices);}
//returns a map of the counts of the sequence at cell index sIndex, in a specific well
public Map<Integer, Integer> assayWellsSequenceS(int n, int... sIndices) { return this.assayWellsSequenceS(n, n+1, sIndices);}
//returns a map of the counts of the peptide at cell index pIndex, in a range of wells
public Map<Integer, Integer> assayWellsPeptideP(int start, int end, int... pIndices) {
//returns a map of the counts of the sequence at cell index sIndex, in a range of wells
public Map<Integer, Integer> assayWellsSequenceS(int start, int end, int... sIndices) {
Map<Integer,Integer> assay = new HashMap<>();
for(int pIndex: pIndices){
for(int pIndex: sIndices){
for(int i = start; i < end; i++){
countPeptides(assay, wells.get(i), pIndex);
countSequences(assay, wells.get(i), pIndex);
}
}
return assay;
}
//For the peptides at cell indices pIndices, counts number of unique peptides in the given well into the given map
private void countPeptides(Map<Integer, Integer> wellMap, List<Integer[]> well, int... pIndices) {
//For the sequences at cell indices sIndices, counts number of unique sequences in the given well into the given map
private void countSequences(Map<Integer, Integer> wellMap, List<Integer[]> well, int... sIndices) {
for(Integer[] cell : well) {
for(int pIndex: pIndices){
if(cell[pIndex] != -1){
wellMap.merge(cell[pIndex], 1, (oldValue, newValue) -> oldValue + newValue);
for(int sIndex: sIndices){
if(cell[sIndex] != -1){
wellMap.merge(cell[sIndex], 1, (oldValue, newValue) -> oldValue + newValue);
}
}
}

View File

@@ -7,17 +7,17 @@ import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
import java.util.*;
import java.util.regex.Pattern;
public class PlateFileWriter {
private int size;
private List<List<Integer[]>> wells;
private double stdDev;
private double lambda;
private Double error;
private String filename;
private String sourceFileName;
private String[] headers;
private List<Integer> concentrations;
private Integer[] populations;
private boolean isExponential = false;
public PlateFileWriter(String filename, Plate plate) {
if(!filename.matches(".*\\.csv")){
@@ -26,11 +26,17 @@ public class PlateFileWriter {
this.filename = filename;
this.sourceFileName = plate.getSourceFileName();
this.size = plate.getSize();
this.isExponential = plate.isExponential();
if(isExponential) {
this.lambda = plate.getLambda();
}
else{
this.stdDev = plate.getStdDev();
}
this.error = plate.getError();
this.wells = plate.getWells();
this.concentrations = Arrays.asList(plate.getConcentrations());
concentrations.sort(Comparator.reverseOrder());
this.populations = plate.getPopulations();
Arrays.sort(populations);
}
public void writePlateFile(){
@@ -51,28 +57,35 @@ public class PlateFileWriter {
}
}
//this took forever
List<List<String>> rows = new ArrayList<>();
List<String> tmp = new ArrayList<>();
for(int i = 0; i < wellsAsStrings.size(); i++){//List<Integer[]> w: wells){
tmp.add("well " + (i+1));
}
rows.add(tmp);
for(int row = 0; row < maxLength; row++){
tmp = new ArrayList<>();
for(List<String> c: wellsAsStrings){
tmp.add(c.get(row));
}
rows.add(tmp);
}
StringBuilder concen = new StringBuilder();
for(Integer i: concentrations){
concen.append(i.toString());
concen.append(" ");
}
String concenString = concen.toString();
// //this took forever and I don't use it
// //if I wanted to use it, I'd replace printer.printRecords(wellsAsStrings) with printer.printRecords(rows)
// List<List<String>> rows = new ArrayList<>();
// List<String> tmp = new ArrayList<>();
// for(int i = 0; i < wellsAsStrings.size(); i++){//List<Integer[]> w: wells){
// tmp.add("well " + (i+1));
// }
// rows.add(tmp);
// for(int row = 0; row < maxLength; row++){
// tmp = new ArrayList<>();
// for(List<String> c: wellsAsStrings){
// tmp.add(c.get(row));
// }
// rows.add(tmp);
// }
CSVFormat plateFileFormat = CSVFormat.Builder.create().setCommentMarker('#').build();
//make string out of populations array
StringBuilder populationsStringBuilder = new StringBuilder();
populationsStringBuilder.append(populations[0].toString());
for(int i = 1; i < populations.length; i++){
populationsStringBuilder.append(", ");
populationsStringBuilder.append(populations[i].toString());
}
String wellPopulationsString = populationsStringBuilder.toString();
//set CSV format
CSVFormat plateFileFormat = CSVFormat.Builder.create()
.setCommentMarker('#')
.build();
try(BufferedWriter writer = Files.newBufferedWriter(Path.of(filename), StandardOpenOption.CREATE_NEW);
CSVPrinter printer = new CSVPrinter(writer, plateFileFormat);
@@ -81,8 +94,13 @@ public class PlateFileWriter {
printer.printComment("Each row represents one well on the plate.");
printer.printComment("Plate size: " + size);
printer.printComment("Error rate: " + error);
printer.printComment("Concentrations: " + concenString);
printer.printComment("Well populations: " + wellPopulationsString);
if(isExponential){
printer.printComment("Lambda: " + lambda);
}
else {
printer.printComment("Std. dev.: " + stdDev);
}
printer.printRecords(wellsAsStrings);
} catch(IOException ex){
System.out.println("Could not make new file named "+filename);

File diff suppressed because it is too large Load Diff

View File

@@ -1,395 +0,0 @@
import org.apache.commons.cli.*;
import java.util.List;
import java.util.Scanner;
import java.util.InputMismatchException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
//
public class UserInterface {
final static Scanner sc = new Scanner(System.in);
static int input;
static boolean quit = false;
public static void main(String[] args) {
if(args.length != 0){
Options options = new Options();
Option matchCDR3 = Option.builder("m")
.longOpt("match")
.desc("Match CDR3s. Requires a cell sample file and any number of plate files.")
.build();
options.addOption(matchCDR3);
Option inputCells = Option.builder("c")
.longOpt("cellfile")
.hasArg()
.argName("file")
.desc("The cell sample file used for matching")
.required().build();
options.addOption(inputCells);
Option lowThresh = Option.builder("low")
.hasArg()
.argName("number")
.desc("Sets the minimum occupancy overlap to attempt matching")
.required().build();
options.addOption(lowThresh);
Option highThresh = Option.builder("high")
.hasArg()
.argName("number")
.desc("Sets the maximum occupancy overlap to attempt matching")
.required().build();
options.addOption(highThresh);
Option inputPlates = Option.builder("p")
.longOpt("platefiles")
.hasArgs()
.desc("Plate files to match")
.required().build();
options.addOption(inputPlates);
CommandLineParser parser = new DefaultParser();
try {
CommandLine line = parser.parse(options, args);
if(line.hasOption("m")){
String cellFile = line.getOptionValue("c");
Integer lowThreshold = Integer.valueOf(line.getOptionValue(lowThresh));
Integer highThreshold = Integer.valueOf(line.getOptionValue(highThresh));
for(String plate: line.getOptionValues("p")) {
matchCDR3s(cellFile, plate, lowThreshold, highThreshold);
}
}
}
catch (ParseException exp) {
System.err.println("Parsing failed. Reason: " + exp.getMessage());
}
}
else {
while (!quit) {
System.out.println("\nALPHA/BETA T-CELL RECEPTOR MATCHING SIMULATOR");
System.out.println("Please select an option:");
System.out.println("1) Generate a population of distinct cells");
System.out.println("2) Generate a sample plate of T cells");
System.out.println("3) Simulate CDR3 alpha/beta T cell matching");
System.out.println("4) Simulate CDR3/CDR1 T cell matching");
System.out.println("5) Acknowledgements");
System.out.println("0) Exit");
try {
input = sc.nextInt();
switch (input) {
case 1 -> makeCells();
case 2 -> makePlate();
case 3 -> matchCells();
case 4 -> matchCellsCDR1();
case 5 -> acknowledge();
case 0 -> quit = true;
default -> throw new InputMismatchException("Invalid input.");
}
} catch (InputMismatchException ex) {
System.out.println(ex);
sc.next();
}
}
sc.close();
}
}
private static void makeCells() {
String filename = null;
Integer numCells = 0;
Integer cdr1Freq = 1;
try {
System.out.println("\nSimulated T-Cells consist of integer values representing:\n" +
"* a pair of alpha and beta CDR3 peptides (unique within simulated population)\n" +
"* a pair of alpha and beta CDR1 peptides (not necessarily unique).");
System.out.println("\nThe cells will be written to a file.");
System.out.print("Please enter a file name: ");
filename = sc.next();
System.out.println("CDR3 sequences are more diverse than CDR1 sequences.");
System.out.println("Please enter the factor by which distinct CDR3s outnumber CDR1s: ");
cdr1Freq = sc.nextInt();
System.out.print("Please enter the number of T-cells to generate: ");
numCells = sc.nextInt();
if(numCells <= 0){
throw new InputMismatchException("Number of cells must be a positive integer.");
}
} catch (InputMismatchException ex) {
System.out.println(ex);
sc.next();
}
CellSample sample = Simulator.generateCellSample(numCells, cdr1Freq);
CellFileWriter writer = new CellFileWriter(filename, sample);
writer.writeCellsToFile();
}
private static void makeCells(String filename, Integer numCells, Integer cdr1Freq){
CellSample sample = Simulator.generateCellSample(numCells, cdr1Freq);
CellFileWriter writer = new CellFileWriter(filename, sample);
writer.writeCellsToFile();
}
private static void makePlate(String cellFile, String filename, Double stdDev,
Integer numWells, Integer numSections,
Integer[] concentrations, Double dropOutRate){
CellFileReader cellReader = new CellFileReader(cellFile);
Plate samplePlate = new Plate(numWells, dropOutRate, concentrations, stdDev);
samplePlate.fillWells(cellReader.getFilename(), cellReader.getCells());
PlateFileWriter writer = new PlateFileWriter(filename, samplePlate);
writer.writePlateFile();
}
//method to output a CSV of
private static void makePlate() {
String cellFile = null;
String filename = null;
Double stdDev = 0.0;
Integer numWells = 0;
Integer numSections = 0;
Integer[] concentrations = {1};
Double dropOutRate = 0.0;
boolean poisson = false;
try {
System.out.println("\nMaking a sample plate requires a population of distinct cells");
System.out.println("Please enter name of an existing cell sample file: ");
cellFile = sc.next();
System.out.println("\nThe sample plate will be written to file");
System.out.print("Please enter a name for the output file: ");
filename = sc.next();
System.out.println("Select T-cell frequency distribution function");
System.out.println("1) Poisson");
System.out.println("2) Gaussian");
System.out.println("(Note: wider distributions are more memory intensive to match)");
System.out.print("Enter selection value: ");
input = sc.nextInt();
switch(input) {
case 1:
poisson = true;
break;
case 2:
System.out.println("How many distinct T-cells within one standard deviation of peak frequency?");
System.out.println("(Note: wider distributions are more memory intensive to match)");
stdDev = sc.nextDouble();
if(stdDev <= 0.0){
throw new InputMismatchException("Value must be positive.");
}
break;
default:
System.out.println("Invalid input. Defaulting to Poisson.");
poisson = true;
}
System.out.print("Number of wells on plate: ");
numWells = sc.nextInt();
if(numWells < 1){
throw new InputMismatchException("No wells on plate");
}
System.out.println("The plate can be evenly sectioned to allow multiple concentrations of T-cells/well");
System.out.println("How many sections would you like to make (minimum 1)?");
numSections = sc.nextInt();
if(numSections < 1) {
throw new InputMismatchException("Too few sections.");
}
else if (numSections > numWells) {
throw new InputMismatchException("Cannot have more sections than wells.");
}
int i = 1;
concentrations = new Integer[numSections];
while(numSections > 0) {
System.out.print("Enter number of T-cells per well in section " + i +": ");
concentrations[i - 1] = sc.nextInt();
i++;
numSections--;
}
System.out.println("Errors in amplification can induce a well dropout rate for peptides");
System.out.print("Enter well dropout rate (0.0 to 1.0): ");
dropOutRate = sc.nextDouble();
if(dropOutRate < 0.0 || dropOutRate > 1.0) {
throw new InputMismatchException("The well dropout rate must be in the range [0.0, 1.0]");
}
}catch(InputMismatchException ex){
System.out.println(ex);
sc.next();
}
CellFileReader cellReader = new CellFileReader(cellFile);
if(poisson) {
stdDev = Math.sqrt(cellReader.getCellCount()); //gaussian with square root of elements approximates poisson
}
Plate samplePlate = new Plate(numWells, dropOutRate, concentrations, stdDev);
samplePlate.fillWells(cellReader.getFilename(), cellReader.getCells());
PlateFileWriter writer = new PlateFileWriter(filename, samplePlate);
writer.writePlateFile();
}
private static void matchCDR3s(String cellFile, String plateFile, Integer lowThreshold, Integer highThreshold){
CellFileReader cellReader = new CellFileReader(cellFile);
PlateFileReader plateReader = new PlateFileReader(plateFile);
Plate plate = new Plate(plateReader.getFilename(), plateReader.getWells());
if (cellReader.getCells().size() == 0){
System.exit(0);
}
else if(plate.getWells().size() == 0){
System.exit(0);
}
else{
if(highThreshold >= plate.getSize()){
highThreshold = plate.getSize() - 1;
}
List<Integer[]> cells = cellReader.getCells();
MatchingResult results = Simulator.matchCDR3s(cells, plate, lowThreshold, highThreshold, false);
//result writer
MatchingFileWriter writer = new MatchingFileWriter("", results);
writer.writeErrorRateToTerminal();
}
}
private static void matchCells() {
String filename = null;
String cellFile = null;
String plateFile = null;
Integer lowThreshold = 0;
Integer highThreshold = Integer.MAX_VALUE;
try {
System.out.println("\nSimulated experiment requires a cell sample file and a sample plate file.");
System.out.print("Please enter name of an existing cell sample file: ");
cellFile = sc.next();
System.out.print("Please enter name of an existing sample plate file: ");
plateFile = sc.next();
System.out.println("The matching results will be written to a file.");
System.out.print("Please enter a name for the output file: ");
filename = sc.next();
System.out.println("What is the minimum number of alpha/beta overlap wells to attempt matching?");
lowThreshold = sc.nextInt();
if(lowThreshold < 1){
throw new InputMismatchException("Minimum value for low threshold is 1");
}
System.out.println("What is the maximum number of alpha/beta overlap wells to attempt matching?");
highThreshold = sc.nextInt();
} catch (InputMismatchException ex) {
System.out.println(ex);
sc.next();
}
CellFileReader cellReader = new CellFileReader(cellFile);
PlateFileReader plateReader = new PlateFileReader(plateFile);
Plate plate = new Plate(plateReader.getFilename(), plateReader.getWells());
if (cellReader.getCells().size() == 0){
System.out.println("No cell sample found.");
System.out.println("Returning to main menu.");
}
else if(plate.getWells().size() == 0){
System.out.println("No sample plate found.");
System.out.println("Returning to main menu.");
}
else{
if(highThreshold >= plate.getSize()){
highThreshold = plate.getSize() - 1;
}
List<Integer[]> cells = cellReader.getCells();
MatchingResult results = Simulator.matchCDR3s(cells, plate, lowThreshold, highThreshold, true);
//result writer
MatchingFileWriter writer = new MatchingFileWriter(filename, results);
writer.writeResultsToFile();
}
}
public static void matchCellsCDR1(){
/*
The idea here is that we'll get the CDR3 alpha/beta matches first. Then we'll try to match CDR3s to CDR1s by
looking at the top two matches for each CDR3. If CDR3s in the same cell simply swap CDR1s, we assume a correct
match
*/
String filename = null;
String preliminaryResultsFilename = null;
String cellFile = null;
String plateFile = null;
Integer lowThresholdCDR3 = 0;
Integer highThresholdCDR3 = Integer.MAX_VALUE;
Integer lowThresholdCDR1 = 0;
Integer highThresholdCDR1 = Integer.MAX_VALUE;
boolean outputCDR3Matches = false;
try {
System.out.println("\nSimulated experiment requires a cell sample file and a sample plate file.");
System.out.print("Please enter name of an existing cell sample file: ");
cellFile = sc.next();
System.out.print("Please enter name of an existing sample plate file: ");
plateFile = sc.next();
System.out.println("The matching results will be written to a file.");
System.out.print("Please enter a name for the output file: ");
filename = sc.next();
System.out.println("What is the minimum number of CDR3 alpha/beta overlap wells to attempt matching?");
lowThresholdCDR3 = sc.nextInt();
if(lowThresholdCDR3 < 1){
throw new InputMismatchException("Minimum value for low threshold is 1");
}
System.out.println("What is the maximum number of CDR3 alpha/beta overlap wells to attempt matching?");
highThresholdCDR3 = sc.nextInt();
System.out.println("What is the minimum number of CDR3/CDR1 overlap wells to attempt matching?");
lowThresholdCDR1 = sc.nextInt();
if(lowThresholdCDR1 < 1){
throw new InputMismatchException("Minimum value for low threshold is 1");
}
System.out.println("What is the maximum number of CDR3/CDR1 overlap wells to attempt matching?");
highThresholdCDR1 = sc.nextInt();
System.out.println("Matching CDR3s to CDR1s requires first matching CDR3 alpha/betas.");
System.out.println("Output a file for CDR3 alpha/beta match results as well?");
System.out.print("Please enter y/n: ");
String ans = sc.next();
Pattern pattern = Pattern.compile("(?:yes|y)", Pattern.CASE_INSENSITIVE);
Matcher matcher = pattern.matcher(ans);
if(matcher.matches()){
outputCDR3Matches = true;
System.out.println("Please enter filename for CDR3 alpha/beta match results");
preliminaryResultsFilename = sc.next();
System.out.println("CDR3 alpha/beta matches will be output to file");
}
else{
System.out.println("CDR3 alpha/beta matches will not be output to file");
}
} catch (InputMismatchException ex) {
System.out.println(ex);
sc.next();
}
CellFileReader cellReader = new CellFileReader(cellFile);
PlateFileReader plateReader = new PlateFileReader(plateFile);
Plate plate = new Plate(plateReader.getFilename(), plateReader.getWells());
if (cellReader.getCells().size() == 0){
System.out.println("No cell sample found.");
System.out.println("Returning to main menu.");
}
else if(plate.getWells().size() == 0){
System.out.println("No sample plate found.");
System.out.println("Returning to main menu.");
}
else{
if(highThresholdCDR3 >= plate.getSize()){
highThresholdCDR3 = plate.getSize() - 1;
}
if(highThresholdCDR1 >= plate.getSize()){
highThresholdCDR1 = plate.getSize() - 1;
}
List<Integer[]> cells = cellReader.getCells();
MatchingResult preliminaryResults = Simulator.matchCDR3s(cells, plate, lowThresholdCDR3, highThresholdCDR3, true);
MatchingResult[] results = Simulator.matchCDR1s(cells, plate, lowThresholdCDR1,
highThresholdCDR1, preliminaryResults);
MatchingFileWriter writer = new MatchingFileWriter(filename + "_FirstPass", results[0]);
writer.writeResultsToFile();
writer = new MatchingFileWriter(filename + "_SecondPass", results[1]);
writer.writeResultsToFile();
if(outputCDR3Matches){
writer = new MatchingFileWriter(preliminaryResultsFilename, preliminaryResults);
writer.writeResultsToFile();
}
}
}
private static void acknowledge(){
System.out.println("Simulation based on:");
System.out.println("Howie, B., Sherwood, A. M., et. al.");
System.out.println("High-throughput pairing of T cell receptor alpha and beta sequences.");
System.out.println("Sci. Transl. Med. 7, 301ra131 (2015)");
System.out.println("");
System.out.println("Simulation by Eugene Fischer, 2021");
}
}

17
src/main/java/Vertex.java Normal file
View File

@@ -0,0 +1,17 @@
public class Vertex {
private final Integer peptide;
private final Integer occupancy;
public Vertex(Integer peptide, Integer occupancy) {
this.peptide = peptide;
this.occupancy = occupancy;
}
public Integer getPeptide() {
return peptide;
}
public Integer getOccupancy() {
return occupancy;
}
}