Compare commits
64 Commits
09aa5961f3
...
v1.0
| Author | SHA1 | Date | |
|---|---|---|---|
| 4b053e6ec4 | |||
| 44784b7976 | |||
| 7c19896dc9 | |||
| aec7e3016f | |||
| 5c75c1ac09 | |||
| cb1f7adece | |||
| 370de79546 | |||
| a803336f56 | |||
| 94b54b3416 | |||
| 601e141fd0 | |||
| 8f9c6b7d33 | |||
| e5ddc73723 | |||
| 9b18fac74f | |||
| 63ef6aa7a0 | |||
| 7558455f39 | |||
| 410f0ae547 | |||
| 1bc6a11545 | |||
| 2b13e10e95 | |||
| 4fd5baeb98 | |||
| b2a4e9a42b | |||
| d1bb49b482 | |||
| 9adb7dffb8 | |||
| 2023bb9d7e | |||
| 405fbf17ff | |||
| 24519f4a52 | |||
| 2afd01eeef | |||
| 10d0b711bf | |||
| 8f98baf44e | |||
| d6c7c40c96 | |||
| 61c14b2ecf | |||
| 22fc4aedfe | |||
| 5d24dc6f70 | |||
| 2c01a0211c | |||
| f2b5d9e1b7 | |||
| 74c8cafd81 | |||
| d1c37b5ccd | |||
| cb2c5a6024 | |||
| 284a5b3a40 | |||
| 52afb1edc2 | |||
| 9c52bc878a | |||
| 248fe4d662 | |||
| 5d0e60708c | |||
| c96b7237e9 | |||
| 0b28259800 | |||
| 837ef7bfe4 | |||
| 0bebbc7602 | |||
| 84f7ddb696 | |||
| c4633da9eb | |||
| 5b2ed165d0 | |||
| 0026d8cdfe | |||
| 13fb7168bf | |||
| 568a6be3c7 | |||
| cfa473c7ce | |||
| 6faacd9a82 | |||
| ce88e170c1 | |||
| 47e23addfa | |||
| b9ee31b64c | |||
| bf32a55e4b | |||
| acff88475b | |||
| 32593308df | |||
| 981e24011d | |||
| 3d0a843cea | |||
| c09ef27822 | |||
| 2ab93dd4b7 |
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
@@ -0,0 +1 @@
|
|||||||
|
/out/
|
||||||
@@ -1,15 +1,16 @@
|
|||||||
<component name="ArtifactManager">
|
<component name="ArtifactManager">
|
||||||
<artifact type="jar" name="TCellSim:jar">
|
<artifact type="jar" build-on-make="true" name="BiGpairSEQ_Sim:jar">
|
||||||
<output-path>$PROJECT_DIR$/out/artifacts/TCellSim_jar</output-path>
|
<output-path>$PROJECT_DIR$/out/artifacts/BiGpairSEQ_Sim_jar</output-path>
|
||||||
<root id="archive" name="TCellSim.jar">
|
<root id="archive" name="BiGpairSEQ_Sim.jar">
|
||||||
<element id="directory" name="META-INF">
|
<element id="directory" name="META-INF">
|
||||||
<element id="file-copy" path="$PROJECT_DIR$/src/main/java/META-INF/MANIFEST.MF" />
|
<element id="file-copy" path="$PROJECT_DIR$/src/main/java/META-INF/MANIFEST.MF" />
|
||||||
</element>
|
</element>
|
||||||
<element id="module-output" name="TCellSim" />
|
<element id="module-output" name="BigPairSEQ" />
|
||||||
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/apache/commons/commons-csv/1.9.0/commons-csv-1.9.0.jar" path-in-jar="/" />
|
|
||||||
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/jetbrains/annotations/23.0.0/annotations-23.0.0.jar" path-in-jar="/" />
|
|
||||||
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/jgrapht/jgrapht-core/1.5.1/jgrapht-core-1.5.1.jar" path-in-jar="/" />
|
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/jgrapht/jgrapht-core/1.5.1/jgrapht-core-1.5.1.jar" path-in-jar="/" />
|
||||||
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/jheaps/jheaps/0.13/jheaps-0.13.jar" path-in-jar="/" />
|
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/jheaps/jheaps/0.13/jheaps-0.13.jar" path-in-jar="/" />
|
||||||
|
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/commons-cli/commons-cli/1.5.0/commons-cli-1.5.0.jar" path-in-jar="/" />
|
||||||
|
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/apache/commons/commons-csv/1.9.0/commons-csv-1.9.0.jar" path-in-jar="/" />
|
||||||
|
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/jetbrains/annotations/23.0.0/annotations-23.0.0.jar" path-in-jar="/" />
|
||||||
</root>
|
</root>
|
||||||
</artifact>
|
</artifact>
|
||||||
</component>
|
</component>
|
||||||
2
.idea/compiler.xml
generated
2
.idea/compiler.xml
generated
@@ -6,7 +6,7 @@
|
|||||||
<sourceOutputDir name="target/generated-sources/annotations" />
|
<sourceOutputDir name="target/generated-sources/annotations" />
|
||||||
<sourceTestOutputDir name="target/generated-test-sources/test-annotations" />
|
<sourceTestOutputDir name="target/generated-test-sources/test-annotations" />
|
||||||
<outputRelativeToContentRoot value="true" />
|
<outputRelativeToContentRoot value="true" />
|
||||||
<module name="TCellSim" />
|
<module name="BigPairSEQ" />
|
||||||
</profile>
|
</profile>
|
||||||
</annotationProcessing>
|
</annotationProcessing>
|
||||||
</component>
|
</component>
|
||||||
|
|||||||
10
.idea/libraries/commons_cli.xml
generated
Normal file
10
.idea/libraries/commons_cli.xml
generated
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
<component name="libraryTable">
|
||||||
|
<library name="commons.cli" type="repository">
|
||||||
|
<properties maven-id="commons-cli:commons-cli:1.5.0" />
|
||||||
|
<CLASSES>
|
||||||
|
<root url="jar://$MAVEN_REPOSITORY$/commons-cli/commons-cli/1.5.0/commons-cli-1.5.0.jar!/" />
|
||||||
|
</CLASSES>
|
||||||
|
<JAVADOC />
|
||||||
|
<SOURCES />
|
||||||
|
</library>
|
||||||
|
</component>
|
||||||
15
.idea/libraries/jgrapht_io.xml
generated
Normal file
15
.idea/libraries/jgrapht_io.xml
generated
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
<component name="libraryTable">
|
||||||
|
<library name="jgrapht.io" type="repository">
|
||||||
|
<properties maven-id="org.jgrapht:jgrapht-io:1.5.1" />
|
||||||
|
<CLASSES>
|
||||||
|
<root url="jar://$MAVEN_REPOSITORY$/org/jgrapht/jgrapht-io/1.5.1/jgrapht-io-1.5.1.jar!/" />
|
||||||
|
<root url="jar://$MAVEN_REPOSITORY$/org/jgrapht/jgrapht-core/1.5.1/jgrapht-core-1.5.1.jar!/" />
|
||||||
|
<root url="jar://$MAVEN_REPOSITORY$/org/jheaps/jheaps/0.13/jheaps-0.13.jar!/" />
|
||||||
|
<root url="jar://$MAVEN_REPOSITORY$/org/antlr/antlr4-runtime/4.8-1/antlr4-runtime-4.8-1.jar!/" />
|
||||||
|
<root url="jar://$MAVEN_REPOSITORY$/org/apache/commons/commons-text/1.8/commons-text-1.8.jar!/" />
|
||||||
|
<root url="jar://$MAVEN_REPOSITORY$/org/apache/commons/commons-lang3/3.9/commons-lang3-3.9.jar!/" />
|
||||||
|
</CLASSES>
|
||||||
|
<JAVADOC />
|
||||||
|
<SOURCES />
|
||||||
|
</library>
|
||||||
|
</component>
|
||||||
10
.idea/libraries/jheaps.xml
generated
Normal file
10
.idea/libraries/jheaps.xml
generated
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
<component name="libraryTable">
|
||||||
|
<library name="jheaps" type="repository">
|
||||||
|
<properties maven-id="org.jheaps:jheaps:0.14" />
|
||||||
|
<CLASSES>
|
||||||
|
<root url="jar://$MAVEN_REPOSITORY$/org/jheaps/jheaps/0.14/jheaps-0.14.jar!/" />
|
||||||
|
</CLASSES>
|
||||||
|
<JAVADOC />
|
||||||
|
<SOURCES />
|
||||||
|
</library>
|
||||||
|
</component>
|
||||||
Binary file not shown.
@@ -1,4 +0,0 @@
|
|||||||
Executable .jar file with interactive, command line UI
|
|
||||||
Usage: java -jar TCellSim.jar
|
|
||||||
|
|
||||||
Requires Java11 or higher (Openjdk-17 recommended)
|
|
||||||
278
readme.md
Normal file
278
readme.md
Normal file
@@ -0,0 +1,278 @@
|
|||||||
|
# BiGpairSEQ SIMULATOR
|
||||||
|
|
||||||
|
|
||||||
|
## ABOUT
|
||||||
|
|
||||||
|
This program simulates BiGpairSEQ (Bipartite Graph pairSEQ), a graph theory-based adaptation
|
||||||
|
of the pairSEQ algorithm (Howie, et al. 2015) for pairing T cell receptor sequences.
|
||||||
|
|
||||||
|
## THEORY
|
||||||
|
|
||||||
|
Unlike pairSEQ, which calculates p-values for every TCR alpha/beta overlap and compares
|
||||||
|
against a null distribution, BiGpairSEQ does not do any statistical calculations
|
||||||
|
directly.
|
||||||
|
|
||||||
|
BiGpairSEQ creates a [simple bipartite weighted graph](https://en.wikipedia.org/wiki/Bipartite_graph) representing the sample plate.
|
||||||
|
The distinct TCRA and TCRB sequences form the two sets of vertices. Every TCRA/TCRB pair that share a well
|
||||||
|
are connected by an edge, with the edge weight set to the number of wells in which both sequences appear.
|
||||||
|
(Sequences present in *all* wells are filtered out prior to creating the graph, as there is no signal in their occupancy pattern.)
|
||||||
|
The problem of pairing TCRA/TCRB sequences thus reduces to the "assignment problem" of finding a maximum weight
|
||||||
|
matching on a bipartite graph--the subset of vertex-disjoint edges whose weights sum to the maximum possible value.
|
||||||
|
|
||||||
|
This is a well-studied combinatorial optimization problem, with many known solutions.
|
||||||
|
The most efficient algorithm known to the author for maximum weight matching of a bipartite graph with strictly integral weights
|
||||||
|
is from Duan and Su (2012). For a graph with m edges, n vertices per side, and maximum integer edge weight N,
|
||||||
|
their algorithm runs in **O(m sqrt(n) log(N))** time. As the graph representation of a pairSEQ experiment is
|
||||||
|
bipartite with integer weights, this algorithm is ideal for BiGpairSEQ.
|
||||||
|
|
||||||
|
Unfortunately, it's a fairly new algorithm, and not yet implemented by the graph theory library used in this simulator.
|
||||||
|
So this program instead uses the Fibonacci heap-based algorithm of Fredman and Tarjan (1987), which has a worst-case
|
||||||
|
runtime of **O(n (n log(n) + m))**. The algorithm is implemented as described in Melhorn and Näher (1999).
|
||||||
|
|
||||||
|
The current version of the program uses a pairing heap instead of a Fibonacci heap for its priority queue,
|
||||||
|
which has lower theoretical efficiency but also lower complexity overhead, and is often equivalently performant
|
||||||
|
in practice.
|
||||||
|
|
||||||
|
## USAGE
|
||||||
|
|
||||||
|
### RUNNING THE PROGRAM
|
||||||
|
|
||||||
|
BiGpairSEQ_Sim is an executable .jar file. Requires Java 11 or higher. [OpenJDK 17](https://jdk.java.net/17/)
|
||||||
|
recommended.
|
||||||
|
|
||||||
|
Run with the command:
|
||||||
|
|
||||||
|
`java -jar BiGpairSEQ_Sim.jar`
|
||||||
|
|
||||||
|
Processing sample plates with tens of thousands of sequences may require large amounts
|
||||||
|
of RAM. It is often desirable to increase the JVM maximum heap allocation with the -Xmx flag.
|
||||||
|
For example, to run the program with 32 gigabytes of memory, use the command:
|
||||||
|
|
||||||
|
`java -Xmx32G -jar BiGpairSEQ_Sim.jar`
|
||||||
|
|
||||||
|
Once running, BiGpairSEQ_Sim has an interactive, menu-driven CLI for generating files and simulating TCR pairing. The
|
||||||
|
main menu looks like this:
|
||||||
|
|
||||||
|
```
|
||||||
|
--------BiGPairSEQ SIMULATOR--------
|
||||||
|
ALPHA/BETA T CELL RECEPTOR MATCHING
|
||||||
|
USING WEIGHTED BIPARTITE GRAPHS
|
||||||
|
------------------------------------
|
||||||
|
Please select an option:
|
||||||
|
1) Generate a population of distinct cells
|
||||||
|
2) Generate a sample plate of T cells
|
||||||
|
3) Generate CDR3 alpha/beta occupancy data and overlap graph
|
||||||
|
4) Simulate bipartite graph CDR3 alpha/beta matching (BiGpairSEQ)
|
||||||
|
9) About/Acknowledgments
|
||||||
|
0) Exit
|
||||||
|
```
|
||||||
|
|
||||||
|
### OUTPUT
|
||||||
|
|
||||||
|
To run the simulation, the program reads and writes 4 kinds of files:
|
||||||
|
* Cell Sample files in CSV format
|
||||||
|
* Sample Plate files in CSV format
|
||||||
|
* Graph and Data files in binary object serialization format
|
||||||
|
* Matching Results files in CSV format
|
||||||
|
|
||||||
|
When entering filenames, it is not necessary to include the file extension (.csv or .ser). When reading or
|
||||||
|
writing files, the program will automatically add the correct extension to any filename without one.
|
||||||
|
|
||||||
|
#### Cell Sample Files
|
||||||
|
Cell Sample files consist of any number of distinct "T cells." Every cell contains
|
||||||
|
four sequences: Alpha CDR3, Beta CDR3, Alpha CDR1, Beta CDR1. The sequences are represented by
|
||||||
|
random integers. CDR3 Alpha and Beta sequences are all unique within a given Cell Sample file. CDR1 Alpha and Beta sequences
|
||||||
|
are not necessarily unique; the relative diversity can be set when making the file.
|
||||||
|
|
||||||
|
(Note: though cells still have CDR1 sequences, matching of CDR1s is currently awaiting re-implementation.)
|
||||||
|
|
||||||
|
Options when making a Cell Sample file:
|
||||||
|
* Number of T cells to generate
|
||||||
|
* Factor by which CDR3s are more diverse than CDR1s
|
||||||
|
|
||||||
|
Files are in CSV format. Rows are distinct T cells, columns are sequences within the cells.
|
||||||
|
Comments are preceded by `#`
|
||||||
|
|
||||||
|
Structure:
|
||||||
|
|
||||||
|
---
|
||||||
|
# Sample contains 1 unique CDR1 for every 4 unique CDR3s.
|
||||||
|
| Alpha CDR3 | Beta CDR3 | Alpha CDR1 | Beta CDR1 |
|
||||||
|
|---|---|---|---|
|
||||||
|
|unique number|unique number|number|number|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
#### Sample Plate Files
|
||||||
|
Sample Plate files consist of any number of "wells" containing any number of T cells (as
|
||||||
|
described above). The wells are filled randomly from a Cell Sample file, according to a selected
|
||||||
|
frequency distribution. Additionally, every individual sequence within each cell may, with some
|
||||||
|
given dropout probability, be omitted from the file; this simulates the effect of amplification errors
|
||||||
|
prior to sequencing. Plates can also be partitioned into any number of sections, each of which can have a
|
||||||
|
different concentration of T cells per well.
|
||||||
|
|
||||||
|
Options when making a Sample Plate file:
|
||||||
|
* Cell Sample file to use
|
||||||
|
* Statistical distribution to apply to Cell Sample file
|
||||||
|
* Poisson
|
||||||
|
* Gaussian
|
||||||
|
* Standard deviation size
|
||||||
|
* Exponential
|
||||||
|
* Lambda value
|
||||||
|
* (Based on the slope of the graph in Figure 4C of the pairSEQ paper, the distribution of the original experiment was exponential with a lambda of approximately 0.6. (Howie, et al. 2015))
|
||||||
|
* Total number of wells on the plate
|
||||||
|
* Number of sections on plate
|
||||||
|
* Number of T cells per well
|
||||||
|
* per section, if more than one section
|
||||||
|
* Dropout rate
|
||||||
|
|
||||||
|
Files are in CSV format. There are no header labels. Every row represents a well.
|
||||||
|
Every column represents an individual cell, containing four sequences, depicted as an array string:
|
||||||
|
`[CDR3A, CDR3B, CDR1A, CDR1B]`. So a representative cell might look like this:
|
||||||
|
|
||||||
|
`[525902, 791533, -1, 866282]`
|
||||||
|
|
||||||
|
Notice that the CDR1 Alpha is missing in the cell above--sequence dropout from simulated amplification error.
|
||||||
|
Dropout sequences are replaced with the value `-1`. Comments are preceded by `#`
|
||||||
|
|
||||||
|
Structure:
|
||||||
|
|
||||||
|
---
|
||||||
|
```
|
||||||
|
# Cell source file name:
|
||||||
|
# Each row represents one well on the plate
|
||||||
|
# Plate size:
|
||||||
|
# Concentrations:
|
||||||
|
# Lambda -or- StdDev:
|
||||||
|
```
|
||||||
|
| Well 1, cell 1 | Well 1, cell 2 | Well 1, cell 3| ... |
|
||||||
|
|---|---|---|---|
|
||||||
|
| **Well 2, cell 1** | **Well 2, cell 2** | **Well 2, cell 3**| **...** |
|
||||||
|
| **Well 3, cell 1** | **Well 3, cell 2** | **Well 3, cell 3**| **...** |
|
||||||
|
| **...** | **...** | **...** | **...** |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
#### Graph and Data Files
|
||||||
|
Graph and Data files are serialized binaries of a Java object containing the weigthed bipartite graph representation of a
|
||||||
|
Sample Plate, along with the necessary metadata for matching and results output. Making them requires a Cell Sample file
|
||||||
|
(to construct a list of correct sequence pairs for checking the accuracy of BiGpairSEQ simulations) and a
|
||||||
|
Sample Plate file (to construct the associated occupancy graph). These files can be several gigabytes in size.
|
||||||
|
Writing them to a file lets us generate a graph and its metadata once, then use it for multiple different BiGpairSEQ simulations.
|
||||||
|
|
||||||
|
Options for creating a Graph and Data file:
|
||||||
|
* The Cell Sample file to use
|
||||||
|
* The Sample Plate file to use. (This must have been generated from the selected Cell Sample file.)
|
||||||
|
|
||||||
|
These files do not have a human-readable structure, and are not portable to other programs. (Export of graphs in a
|
||||||
|
portable data format may be implemented in the future. The tricky part is encoding the necessary metadata.)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
#### Matching Results Files
|
||||||
|
Matching results files consist of the results of a BiGpairSEQ matching simulation.
|
||||||
|
Files are in CSV format. Rows are sequence pairings with extra relevant data. Columns are pairing-specific details.
|
||||||
|
Metadata about the matching simulation is included as comments. Comments are preceded by `#`.
|
||||||
|
|
||||||
|
Options when running a BiGpairSEQ simulation of CDR3 alpha/beta matching:
|
||||||
|
* The minimum number of alpha/beta overlap wells to attempt to match
|
||||||
|
* (must be >= 1)
|
||||||
|
* The maximum number of alpha/beta overlap wells to attempt to match
|
||||||
|
* (must be <= the number of wells on the plate - 1)
|
||||||
|
* The maximum difference in alpha/beta occupancy to attempt to match
|
||||||
|
* (Optional. To skip using this filter, enter a value >= the number of wells on the plate)
|
||||||
|
* The minimum overlap percentage--the percentage of a sequence's occupied wells shared by another sequence--to attempt to match. Given as value in range 0 - 100.
|
||||||
|
* (Optional. To skip using this filter, enter 0)
|
||||||
|
|
||||||
|
Example output:
|
||||||
|
|
||||||
|
---
|
||||||
|
```
|
||||||
|
# Source Sample Plate file: 4MilCellsPlate.csv
|
||||||
|
# Source Graph and Data file: 4MilCellsPlateGraph.ser
|
||||||
|
# T cell counts in sample plate wells: 30000
|
||||||
|
# Total alphas found: 11813
|
||||||
|
# Total betas found: 11808
|
||||||
|
# High overlap threshold: 94
|
||||||
|
# Low overlap threshold: 3
|
||||||
|
# Minimum overlap percent: 0
|
||||||
|
# Maximum occupancy difference: 96
|
||||||
|
# Pairing attempt rate: 0.438
|
||||||
|
# Correct pairings: 5151
|
||||||
|
# Incorrect pairings: 18
|
||||||
|
# Pairing error rate: 0.00348
|
||||||
|
# Simulation time: 862 seconds
|
||||||
|
```
|
||||||
|
|
||||||
|
| Alpha | Alpha well count | Beta | Beta well count | Overlap count | Matched Correctly? | P-value |
|
||||||
|
|---|---|---|---|---|---|---|
|
||||||
|
|5242972|17|1571520|18|17|true|1.41E-18|
|
||||||
|
|5161027|18|2072219|18|18|true|7.31E-20|
|
||||||
|
|4145198|33|1064455|30|29|true|2.65E-21|
|
||||||
|
|7700582|18|112748|18|18|true|7.31E-20|
|
||||||
|
|...|...|...|...|...|...|...|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
**NOTE: The p-values in the output are not used for matching**—they aren't part of the BiGpairSEQ algorithm at all.
|
||||||
|
P-values are calculated *after* BiGpairSEQ matching is completed, for purposes of comparison only,
|
||||||
|
using the (2021 corrected) formula from the original pairSEQ paper. (Howie, et al. 2015)
|
||||||
|
|
||||||
|
### PERFORMANCE
|
||||||
|
Performance details of the example excerpted above:
|
||||||
|
|
||||||
|
On a home computer with a Ryzen 5600X CPU, 64GB of 3200MHz DDR4 RAM (half of which was allocated to the Java Virtual Machine), and a PCIe 3.0 SSD, running Linux Mint 20.3 Edge (5.13 kernel),
|
||||||
|
the author ran a BiGpairSEQ simulation of a 96-well sample plate with 30,000 T cells/well comprising ~11,800 alphas and betas,
|
||||||
|
taken from a sample of 4,000,000 distinct cells with an exponential frequency distribution.
|
||||||
|
|
||||||
|
With min/max occupancy threshold of 3 and 94 wells for matching, and no other pre-filtering, BiGpairSEQ identified 5,151
|
||||||
|
correct pairings and 18 incorrect pairings, for an accuracy of 99.652%.
|
||||||
|
|
||||||
|
The simulation time was 14'22". If intermediate results were held in memory, this would be equivalent to the total elapsed time.
|
||||||
|
|
||||||
|
Since this implementation of BiGpairSEQ writes intermediate results to disk (to improve the efficiency of *repeated* simulations
|
||||||
|
with different filtering options), the actual elapsed time was greater. File I/O time was not measured, but took
|
||||||
|
slightly less time than the simulation itself. Real elapsed time from start to finish was under 30 minutes.
|
||||||
|
|
||||||
|
## TODO
|
||||||
|
|
||||||
|
* ~~Try invoking GC at end of workloads to reduce paging to disk~~ DONE
|
||||||
|
* ~~Hold graph data in memory until another graph is read-in?~~ ABANDONED
|
||||||
|
* *No, this won't work, because BiGpairSEQ simulations alter the underlying graph based on filtering constraints. Changes would cascade with multiple experiments.*
|
||||||
|
* See if there's a reasonable way to reformat Sample Plate files so that wells are columns instead of rows.
|
||||||
|
* ~~Problem is variable number of cells in a well~~
|
||||||
|
* ~~Apache Commons CSV library writes entries a row at a time~~
|
||||||
|
* _Got this working, but at the cost of a profoundly strange bug in graph occupancy filtering. Have reverted the repo until I can figure out what caused that. Given how easily Thingiverse transposes CSV matrices in R, might not even be worth fixing._
|
||||||
|
* Re-implement command line arguments, to enable scripting and statistical simulation studies
|
||||||
|
* Implement sample plates with random numbers of T cells per well.
|
||||||
|
* Possible BiGpairSEQ advantage over pairSEQ: BiGpairSEQ is resilient to variations in well population sizes on a sample plate; pairSEQ is not.
|
||||||
|
* preliminary data suggests that BiGpairSEQ behaves roughly as though the whole plate had whatever the *average* well concentration is, but that's still speculative.
|
||||||
|
* Enable GraphML output in addition to serialized object binaries, for data portability
|
||||||
|
* Custom vertex type with attribute for sequence occupancy?
|
||||||
|
* Re-implement CDR1 matching method
|
||||||
|
* Implement Duan and Su's maximum weight matching algorithm
|
||||||
|
* Add controllable algorithm-type parameter?
|
||||||
|
* Test whether pairing heap (currently used) or Fibonacci heap is more efficient for priority queue in current matching algorithm
|
||||||
|
* in theory Fibonacci heap should be more efficient, but complexity overhead may eliminate theoretical advantage
|
||||||
|
* Add controllable heap-type parameter?
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
## CITATIONS
|
||||||
|
* Howie, B., Sherwood, A. M., et al. ["High-throughput pairing of T cell receptor alpha and beta sequences."](https://pubmed.ncbi.nlm.nih.gov/26290413/) Sci. Transl. Med. 7, 301ra131 (2015)
|
||||||
|
* Duan, R., Su H. ["A Scaling Algorithm for Maximum Weight Matching in Bipartite Graphs."](https://web.eecs.umich.edu/~pettie/matching/Duan-Su-scaling-bipartite-matching.pdf) Proceedings of the Twenty-Third Annual ACM-SIAM Symposium on Discrete Algorithms, p. 1413-1424. (2012)
|
||||||
|
* Melhorn, K., Näher, St. [The LEDA Platform of Combinatorial and Geometric Computing.](https://people.mpi-inf.mpg.de/~mehlhorn/LEDAbook.html) Cambridge University Press. Chapter 7, Graph Algorithms; p. 132-162 (1999)
|
||||||
|
* Fredman, M., Tarjan, R. ["Fibonacci heaps and their uses in improved network optimization algorithms."](https://www.cl.cam.ac.uk/teaching/1011/AlgorithII/1987-FredmanTar-fibonacci.pdf) J. ACM, 34(3):596–615 (1987))
|
||||||
|
|
||||||
|
## EXTERNAL LIBRARIES USED
|
||||||
|
* [JGraphT](https://jgrapht.org) -- Graph theory data structures and algorithms
|
||||||
|
* [JHeaps](https://www.jheaps.org) -- For pairing heap priority queue used in maximum weight matching algorithm
|
||||||
|
* [Apache Commons CSV](https://commons.apache.org/proper/commons-csv/) -- For CSV file output
|
||||||
|
* [Apache Commons CLI](https://commons.apache.org/proper/commons-cli/) -- To enable command line arguments for scripting. (**Awaiting re-implementation**.)
|
||||||
|
|
||||||
|
## ACKNOWLEDGEMENTS
|
||||||
|
BiGpairSEQ was conceived in collaboration with Dr. Alice MacQueen, who brought the original
|
||||||
|
pairSEQ paper to the author's attention and explained all the biology terms he didn't know.
|
||||||
|
|
||||||
|
## AUTHOR
|
||||||
|
BiGpairSEQ algorithm and simulation by Eugene Fischer, 2021. UI improvements and documentation, 2022.
|
||||||
@@ -11,17 +11,19 @@ import java.util.List;
|
|||||||
|
|
||||||
public class CellFileReader {
|
public class CellFileReader {
|
||||||
|
|
||||||
|
private String filename;
|
||||||
private List<Integer[]> distinctCells = new ArrayList<>();
|
private List<Integer[]> distinctCells = new ArrayList<>();
|
||||||
|
|
||||||
public CellFileReader(String filename) {
|
public CellFileReader(String filename) {
|
||||||
|
|
||||||
if(!filename.matches(".*\\.csv")){
|
if(!filename.matches(".*\\.csv")){
|
||||||
filename = filename + ".csv";
|
filename = filename + ".csv";
|
||||||
}
|
}
|
||||||
|
this.filename = filename;
|
||||||
|
|
||||||
CSVFormat cellFileFormat = CSVFormat.Builder.create()
|
CSVFormat cellFileFormat = CSVFormat.Builder.create()
|
||||||
.setHeader("Alpha CDR3", "Beta CDR3", "Alpha CDR1", "Beta CDR1")
|
.setHeader("Alpha CDR3", "Beta CDR3", "Alpha CDR1", "Beta CDR1")
|
||||||
.setSkipHeaderRecord(true)
|
.setSkipHeaderRecord(true)
|
||||||
|
.setCommentMarker('#')
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
try(//don't need to close reader bc of try-with-resources auto-closing
|
try(//don't need to close reader bc of try-with-resources auto-closing
|
||||||
@@ -42,6 +44,8 @@ public class CellFileReader {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public String getFilename() { return filename;}
|
||||||
|
|
||||||
public List<Integer[]> getCells(){
|
public List<Integer[]> getCells(){
|
||||||
return distinctCells;
|
return distinctCells;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -4,6 +4,13 @@ import java.math.MathContext;
|
|||||||
|
|
||||||
public abstract class Equations {
|
public abstract class Equations {
|
||||||
|
|
||||||
|
public static int getRandomNumber(int min, int max) {
|
||||||
|
return (int) ((Math.random() * (max - min)) + min);
|
||||||
|
}
|
||||||
|
|
||||||
|
//pValue calculation as described in original pairSEQ paper.
|
||||||
|
//Included for comparison with original results.
|
||||||
|
//Not used by BiGpairSEQ for matching.
|
||||||
public static double pValue(Integer w, Integer w_a, Integer w_b, double w_ab_d) {
|
public static double pValue(Integer w, Integer w_a, Integer w_b, double w_ab_d) {
|
||||||
int w_ab = (int) w_ab_d;
|
int w_ab = (int) w_ab_d;
|
||||||
double pv = 0.0;
|
double pv = 0.0;
|
||||||
@@ -14,6 +21,9 @@ public abstract class Equations {
|
|||||||
return pv;
|
return pv;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//Implementation of the (corrected) probability equation from pairSEQ paper.
|
||||||
|
//Included for comparison with original results.
|
||||||
|
//Not used by BiGpairSEQ for matching.
|
||||||
private static double probPairedByChance(Integer w, Integer w_a, Integer w_b, Integer w_ab){
|
private static double probPairedByChance(Integer w, Integer w_a, Integer w_b, Integer w_ab){
|
||||||
BigInteger numer1 = choose(w, w_ab);
|
BigInteger numer1 = choose(w, w_ab);
|
||||||
BigInteger numer2 = choose(w - w_ab, w_a - w_ab);
|
BigInteger numer2 = choose(w - w_ab, w_a - w_ab);
|
||||||
@@ -26,10 +36,9 @@ public abstract class Equations {
|
|||||||
return prob.doubleValue();
|
return prob.doubleValue();
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* This works because nC(k+1) = nCk * (n-k)/(k+1)
|
//This works because nC(k+1) = nCk * (n-k)/(k+1)
|
||||||
* Since nC0 = 1, can start there and generate all the rest.
|
//Since nC0 = 1, can start there and generate all the rest.
|
||||||
*/
|
|
||||||
public static BigInteger choose(final int N, final int K) {
|
public static BigInteger choose(final int N, final int K) {
|
||||||
BigInteger nCk = BigInteger.ONE;
|
BigInteger nCk = BigInteger.ONE;
|
||||||
for (int k = 0; k < K; k++) {
|
for (int k = 0; k < K; k++) {
|
||||||
|
|||||||
29
src/main/java/GraphDataObjectReader.java
Normal file
29
src/main/java/GraphDataObjectReader.java
Normal file
@@ -0,0 +1,29 @@
|
|||||||
|
import java.io.*;
|
||||||
|
|
||||||
|
public class GraphDataObjectReader {
|
||||||
|
private GraphWithMapData data;
|
||||||
|
private String filename;
|
||||||
|
|
||||||
|
public GraphDataObjectReader(String filename) throws IOException {
|
||||||
|
if(!filename.matches(".*\\.ser")){
|
||||||
|
filename = filename + ".ser";
|
||||||
|
}
|
||||||
|
this.filename = filename;
|
||||||
|
try(//don't need to close these because of try-with-resources
|
||||||
|
BufferedInputStream fileIn = new BufferedInputStream(new FileInputStream(filename));
|
||||||
|
ObjectInputStream in = new ObjectInputStream(fileIn))
|
||||||
|
{
|
||||||
|
data = (GraphWithMapData) in.readObject();
|
||||||
|
} catch (FileNotFoundException | ClassNotFoundException ex) {
|
||||||
|
ex.printStackTrace();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public GraphWithMapData getData() {
|
||||||
|
return data;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getFilename() {
|
||||||
|
return filename;
|
||||||
|
}
|
||||||
|
}
|
||||||
28
src/main/java/GraphDataObjectWriter.java
Normal file
28
src/main/java/GraphDataObjectWriter.java
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
import java.io.BufferedOutputStream;
|
||||||
|
import java.io.FileOutputStream;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.ObjectOutputStream;
|
||||||
|
|
||||||
|
public class GraphDataObjectWriter {
|
||||||
|
|
||||||
|
private GraphWithMapData data;
|
||||||
|
private String filename;
|
||||||
|
|
||||||
|
public GraphDataObjectWriter(String filename, GraphWithMapData data) {
|
||||||
|
if(!filename.matches(".*\\.ser")){
|
||||||
|
filename = filename + ".ser";
|
||||||
|
}
|
||||||
|
this.filename = filename;
|
||||||
|
this.data = data;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void writeDataToFile() {
|
||||||
|
try (BufferedOutputStream bufferedOut = new BufferedOutputStream(new FileOutputStream(filename));
|
||||||
|
ObjectOutputStream out = new ObjectOutputStream(bufferedOut);
|
||||||
|
){
|
||||||
|
out.writeObject(data);
|
||||||
|
} catch (IOException ex) {
|
||||||
|
ex.printStackTrace();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
35
src/main/java/GraphMLFileReader.java
Normal file
35
src/main/java/GraphMLFileReader.java
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
import org.jgrapht.graph.SimpleWeightedGraph;
|
||||||
|
import org.jgrapht.nio.graphml.GraphMLImporter;
|
||||||
|
|
||||||
|
import java.io.BufferedReader;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.nio.file.Files;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
|
||||||
|
public class GraphMLFileReader {
|
||||||
|
|
||||||
|
private String filename;
|
||||||
|
private SimpleWeightedGraph graph;
|
||||||
|
|
||||||
|
public GraphMLFileReader(String filename, SimpleWeightedGraph graph) {
|
||||||
|
if(!filename.matches(".*\\.graphml")){
|
||||||
|
filename = filename + ".graphml";
|
||||||
|
}
|
||||||
|
this.filename = filename;
|
||||||
|
this.graph = graph;
|
||||||
|
|
||||||
|
try(//don't need to close reader bc of try-with-resources auto-closing
|
||||||
|
BufferedReader reader = Files.newBufferedReader(Path.of(filename));
|
||||||
|
){
|
||||||
|
GraphMLImporter<SimpleWeightedGraph, BufferedReader> importer = new GraphMLImporter<>();
|
||||||
|
importer.importGraph(graph, reader);
|
||||||
|
}
|
||||||
|
catch (IOException ex) {
|
||||||
|
System.out.println("Graph file " + filename + " not found.");
|
||||||
|
System.err.println(ex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public SimpleWeightedGraph getGraph() { return graph; }
|
||||||
|
|
||||||
|
}
|
||||||
35
src/main/java/GraphMLFileWriter.java
Normal file
35
src/main/java/GraphMLFileWriter.java
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
import org.jgrapht.graph.SimpleWeightedGraph;
|
||||||
|
import org.jgrapht.nio.dot.DOTExporter;
|
||||||
|
import org.jgrapht.nio.graphml.GraphMLExporter;
|
||||||
|
|
||||||
|
import java.io.BufferedWriter;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.nio.file.Files;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
import java.nio.file.StandardOpenOption;
|
||||||
|
|
||||||
|
public class GraphMLFileWriter {
|
||||||
|
|
||||||
|
String filename;
|
||||||
|
SimpleWeightedGraph graph;
|
||||||
|
|
||||||
|
|
||||||
|
public GraphMLFileWriter(String filename, SimpleWeightedGraph graph) {
|
||||||
|
if(!filename.matches(".*\\.graphml")){
|
||||||
|
filename = filename + ".graphml";
|
||||||
|
}
|
||||||
|
this.filename = filename;
|
||||||
|
this.graph = graph;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void writeGraphToFile() {
|
||||||
|
try(BufferedWriter writer = Files.newBufferedWriter(Path.of(filename), StandardOpenOption.CREATE_NEW);
|
||||||
|
){
|
||||||
|
GraphMLExporter<SimpleWeightedGraph, BufferedWriter> exporter = new GraphMLExporter<>();
|
||||||
|
exporter.exportGraph(graph, writer);
|
||||||
|
} catch(IOException ex){
|
||||||
|
System.out.println("Could not make new file named "+filename);
|
||||||
|
System.err.println(ex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
106
src/main/java/GraphWithMapData.java
Normal file
106
src/main/java/GraphWithMapData.java
Normal file
@@ -0,0 +1,106 @@
|
|||||||
|
import org.jgrapht.graph.SimpleWeightedGraph;
|
||||||
|
|
||||||
|
import java.time.Duration;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
//Can't just write the graph, because I need the occupancy data too.
|
||||||
|
//Makes most sense to serialize object and write that to a file.
|
||||||
|
//Which means there's no reason to split map data and graph data up.
|
||||||
|
public class GraphWithMapData implements java.io.Serializable {
|
||||||
|
|
||||||
|
private String sourceFilename;
|
||||||
|
private final SimpleWeightedGraph graph;
|
||||||
|
private Integer numWells;
|
||||||
|
private Integer[] wellConcentrations;
|
||||||
|
private Integer alphaCount;
|
||||||
|
private Integer betaCount;
|
||||||
|
private final Map<Integer, Integer> distCellsMapAlphaKey;
|
||||||
|
private final Map<Integer, Integer> plateVtoAMap;
|
||||||
|
private final Map<Integer, Integer> plateVtoBMap;
|
||||||
|
private final Map<Integer, Integer> plateAtoVMap;
|
||||||
|
private final Map<Integer, Integer> plateBtoVMap;
|
||||||
|
private final Map<Integer, Integer> alphaWellCounts;
|
||||||
|
private final Map<Integer, Integer> betaWellCounts;
|
||||||
|
private final Duration time;
|
||||||
|
|
||||||
|
public GraphWithMapData(SimpleWeightedGraph graph, Integer numWells, Integer[] wellConcentrations,
|
||||||
|
Integer alphaCount, Integer betaCount,
|
||||||
|
Map<Integer, Integer> distCellsMapAlphaKey, Map<Integer, Integer> plateVtoAMap,
|
||||||
|
Map<Integer,Integer> plateVtoBMap, Map<Integer, Integer> plateAtoVMap,
|
||||||
|
Map<Integer, Integer> plateBtoVMap, Map<Integer, Integer> alphaWellCounts,
|
||||||
|
Map<Integer, Integer> betaWellCounts, Duration time) {
|
||||||
|
this.graph = graph;
|
||||||
|
this.numWells = numWells;
|
||||||
|
this.wellConcentrations = wellConcentrations;
|
||||||
|
this.alphaCount = alphaCount;
|
||||||
|
this.betaCount = betaCount;
|
||||||
|
this.distCellsMapAlphaKey = distCellsMapAlphaKey;
|
||||||
|
this.plateVtoAMap = plateVtoAMap;
|
||||||
|
this.plateVtoBMap = plateVtoBMap;
|
||||||
|
this.plateAtoVMap = plateAtoVMap;
|
||||||
|
this.plateBtoVMap = plateBtoVMap;
|
||||||
|
this.alphaWellCounts = alphaWellCounts;
|
||||||
|
this.betaWellCounts = betaWellCounts;
|
||||||
|
this.time = time;
|
||||||
|
}
|
||||||
|
|
||||||
|
public SimpleWeightedGraph getGraph() {
|
||||||
|
return graph;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Integer getNumWells() {
|
||||||
|
return numWells;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Integer[] getWellConcentrations() {
|
||||||
|
return wellConcentrations;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Integer getAlphaCount() {
|
||||||
|
return alphaCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Integer getBetaCount() {
|
||||||
|
return betaCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Map<Integer, Integer> getDistCellsMapAlphaKey() {
|
||||||
|
return distCellsMapAlphaKey;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Map<Integer, Integer> getPlateVtoAMap() {
|
||||||
|
return plateVtoAMap;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Map<Integer, Integer> getPlateVtoBMap() {
|
||||||
|
return plateVtoBMap;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Map<Integer, Integer> getPlateAtoVMap() {
|
||||||
|
return plateAtoVMap;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Map<Integer, Integer> getPlateBtoVMap() {
|
||||||
|
return plateBtoVMap;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Map<Integer, Integer> getAlphaWellCounts() {
|
||||||
|
return alphaWellCounts;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Map<Integer, Integer> getBetaWellCounts() {
|
||||||
|
return betaWellCounts;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Duration getTime() {
|
||||||
|
return time;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setSourceFilename(String filename) {
|
||||||
|
this.sourceFilename = filename;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getSourceFilename() {
|
||||||
|
return sourceFilename;
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -7,25 +7,35 @@ import java.nio.file.Files;
|
|||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
import java.nio.file.StandardOpenOption;
|
import java.nio.file.StandardOpenOption;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
|
||||||
public class MatchingFileWriter {
|
public class MatchingFileWriter {
|
||||||
|
|
||||||
private String filename;
|
private String filename;
|
||||||
|
private String sourceFileName;
|
||||||
private List<String> comments;
|
private List<String> comments;
|
||||||
private List<String> headers;
|
private List<String> headers;
|
||||||
private List<List<String>> results;
|
private List<List<String>> allResults;
|
||||||
|
|
||||||
public MatchingFileWriter(String filename, List<String> comments, List<String> headers, List<List<String>> results){
|
public MatchingFileWriter(String filename, MatchingResult result){
|
||||||
if(!filename.matches(".*\\.csv")){
|
if(!filename.matches(".*\\.csv")){
|
||||||
filename = filename + ".csv";
|
filename = filename + ".csv";
|
||||||
}
|
}
|
||||||
this.filename = filename;
|
this.filename = filename;
|
||||||
this.comments = comments;
|
this.sourceFileName = result.getSourceFileName();
|
||||||
this.headers = headers;
|
this.comments = result.getComments();
|
||||||
this.results = results;
|
this.headers = result.getHeaders();
|
||||||
|
this.allResults = result.getAllResults();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void writeErrorRateToTerminal(){
|
||||||
|
for(String s: comments){
|
||||||
|
if(s.matches("(Pairing error rate: )(\\d*.\\d+)")){
|
||||||
|
System.out.println(s);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
public void writeResultsToFile(){
|
public void writeResultsToFile(){
|
||||||
String[] headerStrings = new String[headers.size()];
|
String[] headerStrings = new String[headers.size()];
|
||||||
for(int i = 0; i < headers.size(); i++){
|
for(int i = 0; i < headers.size(); i++){
|
||||||
@@ -41,8 +51,8 @@ public class MatchingFileWriter {
|
|||||||
for(String comment: comments){
|
for(String comment: comments){
|
||||||
printer.printComment(comment);
|
printer.printComment(comment);
|
||||||
}
|
}
|
||||||
results.add(0, headers);
|
allResults.add(0, headers);
|
||||||
printer.printRecords(results);
|
printer.printRecords(allResults);
|
||||||
|
|
||||||
} catch(IOException ex){
|
} catch(IOException ex){
|
||||||
System.out.println("Could not make new file named "+filename);
|
System.out.println("Could not make new file named "+filename);
|
||||||
|
|||||||
@@ -3,13 +3,15 @@ import java.util.List;
|
|||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
public class MatchingResult {
|
public class MatchingResult {
|
||||||
|
private String sourceFile;
|
||||||
private List<String> comments;
|
private List<String> comments;
|
||||||
private List<String> headers;
|
private List<String> headers;
|
||||||
private List<List<String>> allResults;
|
private List<List<String>> allResults;
|
||||||
private Map<Integer, Integer> matchMap;
|
private Map<Integer, Integer> matchMap;
|
||||||
private Duration time;
|
private Duration time;
|
||||||
|
|
||||||
public MatchingResult(List<String> comments, List<String> headers, List<List<String>> allResults, Map<Integer, Integer>matchMap, Duration time){
|
public MatchingResult(String sourceFileName, List<String> comments, List<String> headers, List<List<String>> allResults, Map<Integer, Integer>matchMap, Duration time){
|
||||||
|
this.sourceFile = sourceFileName;
|
||||||
this.comments = comments;
|
this.comments = comments;
|
||||||
this.headers = headers;
|
this.headers = headers;
|
||||||
this.allResults = allResults;
|
this.allResults = allResults;
|
||||||
@@ -37,4 +39,8 @@ public class MatchingResult {
|
|||||||
public Duration getTime() {
|
public Duration getTime() {
|
||||||
return time;
|
return time;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public String getSourceFileName() {
|
||||||
|
return sourceFile;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,31 +1,89 @@
|
|||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
TODO: Implement exponential distribution using inversion method - DONE
|
||||||
TODO: Implement discrete frequency distributions using Vose's Alias Method
|
TODO: Implement discrete frequency distributions using Vose's Alias Method
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public class Plate {
|
public class Plate {
|
||||||
|
private String sourceFile;
|
||||||
private List<List<Integer[]>> wells;
|
private List<List<Integer[]>> wells;
|
||||||
private Random rand = new Random();
|
private Random rand = new Random();
|
||||||
private int size;
|
private int size;
|
||||||
private double error;
|
private double error;
|
||||||
private Integer[] concentrations;
|
private Integer[] concentrations;
|
||||||
private double stdDev;
|
private double stdDev;
|
||||||
|
private double lambda;
|
||||||
|
boolean exponential = false;
|
||||||
|
|
||||||
public Plate (int size, double error, Integer[] concentrations, double stdDev) {
|
|
||||||
|
public Plate(int size, double error, Integer[] concentrations) {
|
||||||
this.size = size;
|
this.size = size;
|
||||||
this.error = error;
|
this.error = error;
|
||||||
this.concentrations = concentrations;
|
this.concentrations = concentrations;
|
||||||
this.stdDev = stdDev;
|
|
||||||
wells = new ArrayList<>();
|
wells = new ArrayList<>();
|
||||||
}
|
}
|
||||||
|
|
||||||
public Plate(List<List<Integer[]>> wells){
|
public Plate(String sourceFileName, List<List<Integer[]>> wells) {
|
||||||
|
this.sourceFile = sourceFileName;
|
||||||
this.wells = wells;
|
this.wells = wells;
|
||||||
this.size = wells.size();
|
this.size = wells.size();
|
||||||
|
|
||||||
|
List<Integer> concentrations = new ArrayList<>();
|
||||||
|
for (List<Integer[]> w: wells) {
|
||||||
|
if(!concentrations.contains(w.size())){
|
||||||
|
concentrations.add(w.size());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
this.concentrations = new Integer[concentrations.size()];
|
||||||
|
for (int i = 0; i < this.concentrations.length; i++) {
|
||||||
|
this.concentrations[i] = concentrations.get(i);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void fillWells(List<Integer[]> cells) {
|
public void fillWellsExponential(String sourceFileName, List<Integer[]> cells, double lambda){
|
||||||
|
this.lambda = lambda;
|
||||||
|
exponential = true;
|
||||||
|
sourceFile = sourceFileName;
|
||||||
|
int numSections = concentrations.length;
|
||||||
|
int section = 0;
|
||||||
|
double m;
|
||||||
|
int n;
|
||||||
|
int test=0;
|
||||||
|
while (section < numSections){
|
||||||
|
for (int i = 0; i < (size / numSections); i++) {
|
||||||
|
List<Integer[]> well = new ArrayList<>();
|
||||||
|
for (int j = 0; j < concentrations[section]; j++) {
|
||||||
|
do {
|
||||||
|
//inverse transform sampling: for random number u in [0,1), x = log(1-u) / (-lambda)
|
||||||
|
m = (Math.log10((1 - rand.nextDouble()))/(-lambda)) * Math.sqrt(cells.size());
|
||||||
|
} while (m >= cells.size() || m < 0);
|
||||||
|
n = (int) Math.floor(m);
|
||||||
|
//n = Equations.getRandomNumber(0, cells.size());
|
||||||
|
// was testing generating the cell sample file with exponential dist, then sampling flat here
|
||||||
|
//that would be more realistic
|
||||||
|
//But would mess up other things in the simulation with how I've coded it.
|
||||||
|
if(n > test){
|
||||||
|
test = n;
|
||||||
|
}
|
||||||
|
Integer[] cellToAdd = cells.get(n).clone();
|
||||||
|
for(int k = 0; k < cellToAdd.length; k++){
|
||||||
|
if(Math.abs(rand.nextDouble()) < error){//error applied to each seqeunce
|
||||||
|
cellToAdd[k] = -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
well.add(cellToAdd);
|
||||||
|
}
|
||||||
|
wells.add(well);
|
||||||
|
}
|
||||||
|
section++;
|
||||||
|
}
|
||||||
|
System.out.println("Highest index: " +test);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void fillWells(String sourceFileName, List<Integer[]> cells, double stdDev) {
|
||||||
|
this.stdDev = stdDev;
|
||||||
|
sourceFile = sourceFileName;
|
||||||
int numSections = concentrations.length;
|
int numSections = concentrations.length;
|
||||||
int section = 0;
|
int section = 0;
|
||||||
double m;
|
double m;
|
||||||
@@ -40,7 +98,7 @@ public class Plate {
|
|||||||
n = (int) Math.floor(m);
|
n = (int) Math.floor(m);
|
||||||
Integer[] cellToAdd = cells.get(n).clone();
|
Integer[] cellToAdd = cells.get(n).clone();
|
||||||
for(int k = 0; k < cellToAdd.length; k++){
|
for(int k = 0; k < cellToAdd.length; k++){
|
||||||
if(Math.abs(rand.nextDouble()) < error){//error applied to each peptide
|
if(Math.abs(rand.nextDouble()) < error){//error applied to each sequence
|
||||||
cellToAdd[k] = -1;
|
cellToAdd[k] = -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -64,6 +122,10 @@ public class Plate {
|
|||||||
return stdDev;
|
return stdDev;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public boolean isExponential(){return exponential;}
|
||||||
|
|
||||||
|
public double getLambda(){return lambda;}
|
||||||
|
|
||||||
public double getError() {
|
public double getError() {
|
||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
@@ -72,32 +134,36 @@ public class Plate {
|
|||||||
return wells;
|
return wells;
|
||||||
}
|
}
|
||||||
|
|
||||||
//returns a map of the counts of the peptide at cell index pIndex, in all wells
|
//returns a map of the counts of the sequence at cell index sIndex, in all wells
|
||||||
public Map<Integer, Integer> assayWellsPeptideP(int... pIndices){
|
public Map<Integer, Integer> assayWellsSequenceS(int... sIndices){
|
||||||
return this.assayWellsPeptideP(0, size, pIndices);
|
return this.assayWellsSequenceS(0, size, sIndices);
|
||||||
}
|
}
|
||||||
|
|
||||||
//returns a map of the counts of the peptide at cell index pIndex, in a specific well
|
//returns a map of the counts of the sequence at cell index sIndex, in a specific well
|
||||||
public Map<Integer, Integer> assayWellsPeptideP(int n, int... pIndices) { return this.assayWellsPeptideP(n, n+1, pIndices);}
|
public Map<Integer, Integer> assayWellsSequenceS(int n, int... sIndices) { return this.assayWellsSequenceS(n, n+1, sIndices);}
|
||||||
|
|
||||||
//returns a map of the counts of the peptide at cell index pIndex, in a range of wells
|
//returns a map of the counts of the sequence at cell index sIndex, in a range of wells
|
||||||
public Map<Integer, Integer> assayWellsPeptideP(int start, int end, int... pIndices) {
|
public Map<Integer, Integer> assayWellsSequenceS(int start, int end, int... sIndices) {
|
||||||
Map<Integer,Integer> assay = new HashMap<>();
|
Map<Integer,Integer> assay = new HashMap<>();
|
||||||
for(int pIndex: pIndices){
|
for(int pIndex: sIndices){
|
||||||
for(int i = start; i < end; i++){
|
for(int i = start; i < end; i++){
|
||||||
countPeptides(assay, wells.get(i), pIndex);
|
countSequences(assay, wells.get(i), pIndex);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return assay;
|
return assay;
|
||||||
}
|
}
|
||||||
//For the peptides at cell indices pIndices, counts number of unique peptides in the given well into the given map
|
//For the sequences at cell indices sIndices, counts number of unique sequences in the given well into the given map
|
||||||
private void countPeptides(Map<Integer, Integer> wellMap, List<Integer[]> well, int... pIndices) {
|
private void countSequences(Map<Integer, Integer> wellMap, List<Integer[]> well, int... sIndices) {
|
||||||
for(Integer[] cell : well) {
|
for(Integer[] cell : well) {
|
||||||
for(int pIndex: pIndices){
|
for(int sIndex: sIndices){
|
||||||
if(cell[pIndex] != -1){
|
if(cell[sIndex] != -1){
|
||||||
wellMap.merge(cell[pIndex], 1, (oldValue, newValue) -> oldValue + newValue);
|
wellMap.merge(cell[sIndex], 1, (oldValue, newValue) -> oldValue + newValue);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public String getSourceFileName() {
|
||||||
|
return sourceFile;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -14,12 +14,14 @@ import java.util.regex.Pattern;
|
|||||||
public class PlateFileReader {
|
public class PlateFileReader {
|
||||||
|
|
||||||
private List<List<Integer[]>> wells = new ArrayList<>();
|
private List<List<Integer[]>> wells = new ArrayList<>();
|
||||||
|
private String filename;
|
||||||
|
|
||||||
public PlateFileReader(String filename){
|
public PlateFileReader(String filename){
|
||||||
|
|
||||||
if(!filename.matches(".*\\.csv")){
|
if(!filename.matches(".*\\.csv")){
|
||||||
filename = filename + ".csv";
|
filename = filename + ".csv";
|
||||||
}
|
}
|
||||||
|
this.filename = filename;
|
||||||
|
|
||||||
CSVFormat plateFileFormat = CSVFormat.Builder.create()
|
CSVFormat plateFileFormat = CSVFormat.Builder.create()
|
||||||
.setCommentMarker('#')
|
.setCommentMarker('#')
|
||||||
@@ -58,4 +60,7 @@ public class PlateFileReader {
|
|||||||
return wells;
|
return wells;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public String getFilename() {
|
||||||
|
return filename;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
@@ -13,18 +13,28 @@ public class PlateFileWriter {
|
|||||||
private int size;
|
private int size;
|
||||||
private List<List<Integer[]>> wells;
|
private List<List<Integer[]>> wells;
|
||||||
private double stdDev;
|
private double stdDev;
|
||||||
|
private double lambda;
|
||||||
private Double error;
|
private Double error;
|
||||||
private String filename;
|
private String filename;
|
||||||
|
private String sourceFileName;
|
||||||
private String[] headers;
|
private String[] headers;
|
||||||
private List<Integer> concentrations;
|
private List<Integer> concentrations;
|
||||||
|
private boolean isExponential = false;
|
||||||
|
|
||||||
public PlateFileWriter(String filename, Plate plate) {
|
public PlateFileWriter(String filename, Plate plate) {
|
||||||
if(!filename.matches(".*\\.csv")){
|
if(!filename.matches(".*\\.csv")){
|
||||||
filename = filename + ".csv";
|
filename = filename + ".csv";
|
||||||
}
|
}
|
||||||
this.filename = filename;
|
this.filename = filename;
|
||||||
|
this.sourceFileName = plate.getSourceFileName();
|
||||||
this.size = plate.getSize();
|
this.size = plate.getSize();
|
||||||
this.stdDev = plate.getStdDev();
|
this.isExponential = plate.isExponential();
|
||||||
|
if(isExponential) {
|
||||||
|
this.lambda = plate.getLambda();
|
||||||
|
}
|
||||||
|
else{
|
||||||
|
this.stdDev = plate.getStdDev();
|
||||||
|
}
|
||||||
this.error = plate.getError();
|
this.error = plate.getError();
|
||||||
this.wells = plate.getWells();
|
this.wells = plate.getWells();
|
||||||
this.concentrations = Arrays.asList(plate.getConcentrations());
|
this.concentrations = Arrays.asList(plate.getConcentrations());
|
||||||
@@ -32,10 +42,6 @@ public class PlateFileWriter {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public void writePlateFile(){
|
public void writePlateFile(){
|
||||||
//works as is, but too many columns in csv, need to make them all rows.
|
|
||||||
|
|
||||||
//will now redo it so that every column is a well, with well names as headers
|
|
||||||
//need to give plate error, sample pop size, stdDev, num sections, concentration per section as comments
|
|
||||||
Comparator<List<Integer[]>> listLengthDescending = Comparator.comparingInt(List::size);
|
Comparator<List<Integer[]>> listLengthDescending = Comparator.comparingInt(List::size);
|
||||||
wells.sort(listLengthDescending.reversed());
|
wells.sort(listLengthDescending.reversed());
|
||||||
int maxLength = wells.get(0).size();
|
int maxLength = wells.get(0).size();
|
||||||
@@ -67,6 +73,7 @@ public class PlateFileWriter {
|
|||||||
}
|
}
|
||||||
rows.add(tmp);
|
rows.add(tmp);
|
||||||
}
|
}
|
||||||
|
//build string of well concentrations
|
||||||
StringBuilder concen = new StringBuilder();
|
StringBuilder concen = new StringBuilder();
|
||||||
for(Integer i: concentrations){
|
for(Integer i: concentrations){
|
||||||
concen.append(i.toString());
|
concen.append(i.toString());
|
||||||
@@ -74,16 +81,24 @@ public class PlateFileWriter {
|
|||||||
}
|
}
|
||||||
String concenString = concen.toString();
|
String concenString = concen.toString();
|
||||||
|
|
||||||
CSVFormat plateFileFormat = CSVFormat.Builder.create().setCommentMarker('#').build();
|
CSVFormat plateFileFormat = CSVFormat.Builder.create()
|
||||||
|
.setCommentMarker('#')
|
||||||
|
.build();
|
||||||
|
|
||||||
try(BufferedWriter writer = Files.newBufferedWriter(Path.of(filename), StandardOpenOption.CREATE_NEW);
|
try(BufferedWriter writer = Files.newBufferedWriter(Path.of(filename), StandardOpenOption.CREATE_NEW);
|
||||||
CSVPrinter printer = new CSVPrinter(writer, plateFileFormat);
|
CSVPrinter printer = new CSVPrinter(writer, plateFileFormat);
|
||||||
){
|
){
|
||||||
|
printer.printComment("Cell source file name: " + sourceFileName);
|
||||||
printer.printComment("Each row represents one well on the plate.");
|
printer.printComment("Each row represents one well on the plate.");
|
||||||
printer.printComment("Plate size: " + size);
|
printer.printComment("Plate size: " + size);
|
||||||
printer.printComment("Error rate: " + error);
|
printer.printComment("Error rate: " + error);
|
||||||
printer.printComment("Concentrations: " + concenString);
|
printer.printComment("Concentrations: " + concenString);
|
||||||
printer.printComment("Std. dev.: " + stdDev);
|
if(isExponential){
|
||||||
|
printer.printComment("Lambda: " + lambda);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
printer.printComment("Std. dev.: " + stdDev);
|
||||||
|
}
|
||||||
printer.printRecords(wellsAsStrings);
|
printer.printRecords(wellsAsStrings);
|
||||||
} catch(IOException ex){
|
} catch(IOException ex){
|
||||||
System.out.println("Could not make new file named "+filename);
|
System.out.println("Could not make new file named "+filename);
|
||||||
|
|||||||
@@ -1,9 +1,9 @@
|
|||||||
//import org.jgrapht.Graph;
|
|
||||||
import org.jgrapht.alg.interfaces.MatchingAlgorithm;
|
import org.jgrapht.alg.interfaces.MatchingAlgorithm;
|
||||||
import org.jgrapht.alg.matching.MaximumWeightBipartiteMatching;
|
import org.jgrapht.alg.matching.MaximumWeightBipartiteMatching;
|
||||||
import org.jgrapht.generate.SimpleWeightedBipartiteGraphMatrixGenerator;
|
import org.jgrapht.generate.SimpleWeightedBipartiteGraphMatrixGenerator;
|
||||||
import org.jgrapht.graph.DefaultWeightedEdge;
|
import org.jgrapht.graph.DefaultWeightedEdge;
|
||||||
import org.jgrapht.graph.SimpleWeightedGraph;
|
import org.jgrapht.graph.SimpleWeightedGraph;
|
||||||
|
import org.jheaps.tree.PairingHeap;
|
||||||
|
|
||||||
import java.math.BigDecimal;
|
import java.math.BigDecimal;
|
||||||
import java.math.MathContext;
|
import java.math.MathContext;
|
||||||
@@ -13,18 +13,15 @@ import java.time.Duration;
|
|||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.util.stream.IntStream;
|
import java.util.stream.IntStream;
|
||||||
|
|
||||||
|
//NOTE: "sequence" in method and variable names refers to a peptide sequence from a simulated T cell
|
||||||
public class Simulator {
|
public class Simulator {
|
||||||
private static int cdr3AlphaIndex = 0;
|
private static final int cdr3AlphaIndex = 0;
|
||||||
private static int cdr3BetaIndex = 1;
|
private static final int cdr3BetaIndex = 1;
|
||||||
private static int cdr1AlphaIndex = 2;
|
private static final int cdr1AlphaIndex = 2;
|
||||||
private static int cdr1BetaIndex = 3;
|
private static final int cdr1BetaIndex = 3;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
public static CellSample generateCellSample(Integer numDistinctCells, Integer cdr1Freq) {
|
public static CellSample generateCellSample(Integer numDistinctCells, Integer cdr1Freq) {
|
||||||
//In real T cells, CDR1s have about one third the diversity of CDR3s
|
//In real T cells, CDR1s have about one third the diversity of CDR3s
|
||||||
//previous sim was only CDR3s
|
|
||||||
List<Integer> numbersCDR3 = new ArrayList<>();
|
List<Integer> numbersCDR3 = new ArrayList<>();
|
||||||
List<Integer> numbersCDR1 = new ArrayList<>();
|
List<Integer> numbersCDR1 = new ArrayList<>();
|
||||||
Integer numDistCDR3s = 2 * numDistinctCells + 1;
|
Integer numDistCDR3s = 2 * numDistinctCells + 1;
|
||||||
@@ -34,7 +31,7 @@ public class Simulator {
|
|||||||
Collections.shuffle(numbersCDR1);
|
Collections.shuffle(numbersCDR1);
|
||||||
|
|
||||||
//Each cell represented by 4 values
|
//Each cell represented by 4 values
|
||||||
//two CDR3s, and two CDR1s. First two values are CDR3s, second two are CDR1s
|
//two CDR3s, and two CDR1s. First two values are CDR3s (alpha, beta), second two are CDR1s (alpha, beta)
|
||||||
List<Integer[]> distinctCells = new ArrayList<>();
|
List<Integer[]> distinctCells = new ArrayList<>();
|
||||||
for(int i = 0; i < numbersCDR3.size() - 1; i = i + 2){
|
for(int i = 0; i < numbersCDR3.size() - 1; i = i + 2){
|
||||||
Integer tmpCDR3a = numbersCDR3.get(i);
|
Integer tmpCDR3a = numbersCDR3.get(i);
|
||||||
@@ -47,97 +44,143 @@ public class Simulator {
|
|||||||
return new CellSample(distinctCells, cdr1Freq);
|
return new CellSample(distinctCells, cdr1Freq);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static MatchingResult matchCDR3s(List<Integer[]> distinctCells,
|
//Make the graph needed for matching CDR3s
|
||||||
Plate samplePlate, Integer lowThreshold, Integer highThreshold){
|
public static GraphWithMapData makeGraph(List<Integer[]> distinctCells, Plate samplePlate, boolean verbose) {
|
||||||
System.out.println("Cells: " + distinctCells.size());
|
|
||||||
|
|
||||||
Instant start = Instant.now();
|
Instant start = Instant.now();
|
||||||
int numWells = samplePlate.getSize();
|
|
||||||
int[] alphaIndex = {cdr3AlphaIndex};
|
int[] alphaIndex = {cdr3AlphaIndex};
|
||||||
int[] betaIndex = {cdr3BetaIndex};
|
int[] betaIndex = {cdr3BetaIndex};
|
||||||
|
int numWells = samplePlate.getSize();
|
||||||
|
|
||||||
System.out.println("Making cell maps");
|
if(verbose){System.out.println("Making cell maps");}
|
||||||
//HashMap keyed to Alphas, values Betas
|
//HashMap keyed to Alphas, values Betas
|
||||||
Map<Integer, Integer> distCellsMapAlphaKey = makePeptideToPeptideMap(distinctCells, 0, 1);
|
Map<Integer, Integer> distCellsMapAlphaKey = makeSequenceToSequenceMap(distinctCells, 0, 1);
|
||||||
System.out.println("Cell maps made");
|
if(verbose){System.out.println("Cell maps made");}
|
||||||
|
|
||||||
System.out.println("Making well maps");
|
if(verbose){System.out.println("Making well maps");}
|
||||||
Map<Integer, Integer> allAlphas = samplePlate.assayWellsPeptideP(alphaIndex);
|
Map<Integer, Integer> allAlphas = samplePlate.assayWellsSequenceS(alphaIndex);
|
||||||
Map<Integer, Integer> allBetas = samplePlate.assayWellsPeptideP(betaIndex);
|
Map<Integer, Integer> allBetas = samplePlate.assayWellsSequenceS(betaIndex);
|
||||||
int alphaCount = allAlphas.size();
|
int alphaCount = allAlphas.size();
|
||||||
System.out.println("all alphas count: " + alphaCount);
|
if(verbose){System.out.println("All alphas count: " + alphaCount);}
|
||||||
int betaCount = allBetas.size();
|
int betaCount = allBetas.size();
|
||||||
System.out.println("all betas count: " + betaCount);
|
if(verbose){System.out.println("All betas count: " + betaCount);}
|
||||||
|
|
||||||
System.out.println("Well maps made");
|
if(verbose){System.out.println("Well maps made");}
|
||||||
|
|
||||||
//Remove saturating-occupancy peptides because they have no signal value.
|
//Remove saturating-occupancy sequences because they have no signal value.
|
||||||
//Remove below-minimum-overlap-threshold peptides because they can't possibly have an overlap with another
|
//Remove sequences with total occupancy below minimum pair overlap threshold
|
||||||
//peptide that's above the threshold.
|
if(verbose){System.out.println("Removing sequences present in all wells.");}
|
||||||
System.out.println("Removing peptides present in all wells.");
|
//if(verbose){System.out.println("Removing sequences with occupancy below the minimum overlap threshold");}
|
||||||
System.out.println("Removing peptides with occupancy below the minimum overlap threshold");
|
filterByOccupancyThreshold(allAlphas, 1, numWells - 1);
|
||||||
filterByOccupancyThreshold(allAlphas, lowThreshold, numWells - 1);
|
filterByOccupancyThreshold(allBetas, 1, numWells - 1);
|
||||||
filterByOccupancyThreshold(allBetas, lowThreshold, numWells - 1);
|
if(verbose){System.out.println("Sequences removed");}
|
||||||
System.out.println("Peptides removed");
|
|
||||||
int pairableAlphaCount = allAlphas.size();
|
int pairableAlphaCount = allAlphas.size();
|
||||||
System.out.println("Remaining alpha count: " + pairableAlphaCount);
|
if(verbose){System.out.println("Remaining alphas count: " + pairableAlphaCount);}
|
||||||
int pairableBetaCount = allBetas.size();
|
int pairableBetaCount = allBetas.size();
|
||||||
System.out.println("Remaining beta count: " + pairableBetaCount);
|
if(verbose){System.out.println("Remaining betas count: " + pairableBetaCount);}
|
||||||
|
|
||||||
System.out.println("Making vertex maps");
|
if(verbose){System.out.println("Making vertex maps");}
|
||||||
//For the SimpleWeightedBipartiteGraphMatrixGenerator, all vertices must have
|
//For the SimpleWeightedBipartiteGraphMatrixGenerator, all vertices must have
|
||||||
// distinct numbers associated with them. Since I'm using a 2D array, that means
|
//distinct numbers associated with them. Since I'm using a 2D array, that means
|
||||||
// distinct indices between the rows and columns. vertexStartValue lets me track where I switch
|
//distinct indices between the rows and columns. vertexStartValue lets me track where I switch
|
||||||
// from numbering rows to columns, so I can assign unique numbers to every vertex, and then
|
//from numbering rows to columns, so I can assign unique numbers to every vertex, and then
|
||||||
// subtract the vertexStartValue from betas to use their vertex labels as array indices
|
//subtract the vertexStartValue from betas to use their vertex labels as array indices
|
||||||
Integer vertexStartValue = 0;
|
Integer vertexStartValue = 0;
|
||||||
//keys are sequential integer vertices, values are alphas
|
//keys are sequential integer vertices, values are alphas
|
||||||
Map<Integer, Integer> plateVtoAMap = makeVertexToPeptideMap(allAlphas, vertexStartValue);
|
Map<Integer, Integer> plateVtoAMap = makeVertexToSequenceMap(allAlphas, vertexStartValue);
|
||||||
//New start value for vertex to beta map should be one more than final vertex value in alpha map
|
//new start value for vertex to beta map should be one more than final vertex value in alpha map
|
||||||
vertexStartValue += plateVtoAMap.size();
|
vertexStartValue += plateVtoAMap.size();
|
||||||
//keys are sequential integers vertices, values are betas
|
//keys are sequential integers vertices, values are betas
|
||||||
Map<Integer, Integer> plateVtoBMap = makeVertexToPeptideMap(allBetas, vertexStartValue);
|
Map<Integer, Integer> plateVtoBMap = makeVertexToSequenceMap(allBetas, vertexStartValue);
|
||||||
//keys are alphas, values are sequential integer vertices from previous map
|
//keys are alphas, values are sequential integer vertices from previous map
|
||||||
Map<Integer, Integer> plateAtoVMap = invertVertexMap(plateVtoAMap);
|
Map<Integer, Integer> plateAtoVMap = invertVertexMap(plateVtoAMap);
|
||||||
//keys are betas, values are sequential integer vertices from previous map
|
//keys are betas, values are sequential integer vertices from previous map
|
||||||
Map<Integer, Integer> plateBtoVMap = invertVertexMap(plateVtoBMap);
|
Map<Integer, Integer> plateBtoVMap = invertVertexMap(plateVtoBMap);
|
||||||
System.out.println("Vertex maps made");
|
if(verbose){System.out.println("Vertex maps made");}
|
||||||
|
|
||||||
System.out.println("Creating adjacency matrix");
|
//make adjacency matrix for bipartite graph generator
|
||||||
|
//(technically this is only 1/4 of an adjacency matrix, but that's all you need
|
||||||
|
//for a bipartite graph, and all the SimpleWeightedBipartiteGraphMatrixGenerator class expects.)
|
||||||
|
if(verbose){System.out.println("Creating adjacency matrix");}
|
||||||
//Count how many wells each alpha appears in
|
//Count how many wells each alpha appears in
|
||||||
Map<Integer, Integer> alphaWellCounts = new HashMap<>();
|
Map<Integer, Integer> alphaWellCounts = new HashMap<>();
|
||||||
//count how many wells each beta appears in
|
//count how many wells each beta appears in
|
||||||
Map<Integer, Integer> betaWellCounts = new HashMap<>();
|
Map<Integer, Integer> betaWellCounts = new HashMap<>();
|
||||||
|
//the adjacency matrix to be used by the graph generator
|
||||||
double[][] weights = new double[plateVtoAMap.size()][plateVtoBMap.size()];
|
double[][] weights = new double[plateVtoAMap.size()][plateVtoBMap.size()];
|
||||||
|
countSequencesAndFillMatrix(samplePlate, allAlphas, allBetas, plateAtoVMap,
|
||||||
countPeptidesAndFillMatrix(samplePlate, allAlphas, allBetas, plateAtoVMap,
|
|
||||||
plateBtoVMap, alphaIndex, betaIndex, alphaWellCounts, betaWellCounts, weights);
|
plateBtoVMap, alphaIndex, betaIndex, alphaWellCounts, betaWellCounts, weights);
|
||||||
System.out.println("matrix created");
|
if(verbose){System.out.println("Matrix created");}
|
||||||
|
|
||||||
System.out.println("creating graph");
|
//create bipartite graph
|
||||||
|
if(verbose){System.out.println("Creating graph");}
|
||||||
|
//the graph object
|
||||||
SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph =
|
SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph =
|
||||||
new SimpleWeightedGraph<>(DefaultWeightedEdge.class);
|
new SimpleWeightedGraph<>(DefaultWeightedEdge.class);
|
||||||
|
//the graph generator
|
||||||
SimpleWeightedBipartiteGraphMatrixGenerator graphGenerator = new SimpleWeightedBipartiteGraphMatrixGenerator();
|
SimpleWeightedBipartiteGraphMatrixGenerator graphGenerator = new SimpleWeightedBipartiteGraphMatrixGenerator();
|
||||||
List<Integer> alphaVertices = new ArrayList<>();
|
//the list of alpha vertices
|
||||||
alphaVertices.addAll(plateVtoAMap.keySet()); //This will work because LinkedHashMap preserves order of entry
|
List<Integer> alphaVertices = new ArrayList<>(plateVtoAMap.keySet()); //This will work because LinkedHashMap preserves order of entry
|
||||||
graphGenerator.first(alphaVertices);
|
graphGenerator.first(alphaVertices);
|
||||||
List<Integer> betaVertices = new ArrayList<>();
|
//the list of beta vertices
|
||||||
betaVertices.addAll(plateVtoBMap.keySet());
|
List<Integer> betaVertices = new ArrayList<>(plateVtoBMap.keySet());
|
||||||
graphGenerator.second(betaVertices); //This will work because LinkedHashMap preserves order of entry
|
graphGenerator.second(betaVertices); //This will work because LinkedHashMap preserves order of entry
|
||||||
|
//use adjacency matrix of weight created previously
|
||||||
graphGenerator.weights(weights);
|
graphGenerator.weights(weights);
|
||||||
graphGenerator.generateGraph(graph);
|
graphGenerator.generateGraph(graph);
|
||||||
System.out.println("Graph created");
|
if(verbose){System.out.println("Graph created");}
|
||||||
|
|
||||||
System.out.println("Eliminating edges with weights outside threshold values");
|
Instant stop = Instant.now();
|
||||||
|
Duration time = Duration.between(start, stop);
|
||||||
|
//return GraphWithMapData object
|
||||||
|
return new GraphWithMapData(graph, numWells, samplePlate.getConcentrations(), alphaCount, betaCount,
|
||||||
|
distCellsMapAlphaKey, plateVtoAMap, plateVtoBMap, plateAtoVMap,
|
||||||
|
plateBtoVMap, alphaWellCounts, betaWellCounts, time);
|
||||||
|
}
|
||||||
|
|
||||||
|
//match CDR3s.
|
||||||
|
public static MatchingResult matchCDR3s(GraphWithMapData data, String dataFilename, Integer lowThreshold,
|
||||||
|
Integer highThreshold, Integer maxOccupancyDifference,
|
||||||
|
Integer minOverlapPercent, boolean verbose) {
|
||||||
|
Instant start = Instant.now();
|
||||||
|
int numWells = data.getNumWells();
|
||||||
|
Integer alphaCount = data.getAlphaCount();
|
||||||
|
Integer betaCount = data.getBetaCount();
|
||||||
|
Map<Integer, Integer> distCellsMapAlphaKey = data.getDistCellsMapAlphaKey();
|
||||||
|
Map<Integer, Integer> plateVtoAMap = data.getPlateVtoAMap();
|
||||||
|
Map<Integer, Integer> plateVtoBMap = data.getPlateVtoBMap();
|
||||||
|
Map<Integer, Integer> alphaWellCounts = data.getAlphaWellCounts();
|
||||||
|
Map<Integer, Integer> betaWellCounts = data.getBetaWellCounts();
|
||||||
|
SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph = data.getGraph();
|
||||||
|
|
||||||
|
//remove weights outside given overlap thresholds
|
||||||
|
if(verbose){System.out.println("Eliminating edges with weights outside overlap threshold values");}
|
||||||
filterByOccupancyThreshold(graph, lowThreshold, highThreshold);
|
filterByOccupancyThreshold(graph, lowThreshold, highThreshold);
|
||||||
System.out.println("Over- and under-weight edges set to 0.0");
|
if(verbose){System.out.println("Over- and under-weight edges set to 0.0");}
|
||||||
|
|
||||||
|
//Filter by overlap size
|
||||||
|
if(verbose){System.out.println("Eliminating edges with weights less than " + minOverlapPercent.toString() +
|
||||||
|
" percent of vertex occupancy value.");}
|
||||||
|
filterByOverlapSize(graph, alphaWellCounts, betaWellCounts, plateVtoAMap, plateVtoBMap, minOverlapPercent);
|
||||||
|
if(verbose){System.out.println("Edges with weights too far below vertex occupancy values set to 0.0");}
|
||||||
|
|
||||||
System.out.println("Finding maximum weighted matching");
|
//Filter by relative occupancy
|
||||||
|
if(verbose){System.out.println("Eliminating edges between vertices with occupancy difference > "
|
||||||
|
+ maxOccupancyDifference);}
|
||||||
|
filterByRelativeOccupancy(graph, alphaWellCounts, betaWellCounts, plateVtoAMap, plateVtoBMap,
|
||||||
|
maxOccupancyDifference);
|
||||||
|
if(verbose){System.out.println("Edges between vertices of with excessively different occupancy values " +
|
||||||
|
"set to 0.0");}
|
||||||
|
//Find Maximum Weighted Matching
|
||||||
|
//using jheaps library class PairingHeap for improved efficiency
|
||||||
|
if(verbose){System.out.println("Finding maximum weighted matching");}
|
||||||
|
//Attempting to use addressable heap to improve performance
|
||||||
MaximumWeightBipartiteMatching maxWeightMatching =
|
MaximumWeightBipartiteMatching maxWeightMatching =
|
||||||
new MaximumWeightBipartiteMatching(graph, plateVtoAMap.keySet(), plateVtoBMap.keySet());
|
new MaximumWeightBipartiteMatching(graph,
|
||||||
|
plateVtoAMap.keySet(),
|
||||||
|
plateVtoBMap.keySet(),
|
||||||
|
i -> new PairingHeap(Comparator.naturalOrder()));
|
||||||
MatchingAlgorithm.Matching<String, DefaultWeightedEdge> graphMatching = maxWeightMatching.getMatching();
|
MatchingAlgorithm.Matching<String, DefaultWeightedEdge> graphMatching = maxWeightMatching.getMatching();
|
||||||
System.out.println("Matching completed");
|
if(verbose){System.out.println("Matching completed");}
|
||||||
Instant stop = Instant.now();
|
Instant stop = Instant.now();
|
||||||
|
|
||||||
//Header for CSV file
|
//Header for CSV file
|
||||||
@@ -150,16 +193,15 @@ public class Simulator {
|
|||||||
header.add("Matched correctly?");
|
header.add("Matched correctly?");
|
||||||
header.add("P-value");
|
header.add("P-value");
|
||||||
|
|
||||||
|
|
||||||
//Results for csv file
|
//Results for csv file
|
||||||
List<List<String>> allResults = new ArrayList<>();
|
List<List<String>> allResults = new ArrayList<>();
|
||||||
NumberFormat nf = NumberFormat.getInstance(Locale.US);
|
NumberFormat nf = NumberFormat.getInstance(Locale.US);
|
||||||
MathContext mc = new MathContext(3);
|
MathContext mc = new MathContext(3);
|
||||||
Iterator<DefaultWeightedEdge> weightIter = graphMatching.iterator();
|
Iterator<DefaultWeightedEdge> weightIter = graphMatching.iterator();
|
||||||
DefaultWeightedEdge e = null;
|
DefaultWeightedEdge e;
|
||||||
int trueCount = 0;
|
int trueCount = 0;
|
||||||
int falseCount = 0;
|
int falseCount = 0;
|
||||||
boolean check = false;
|
boolean check;
|
||||||
Map<Integer, Integer> matchMap = new HashMap<>();
|
Map<Integer, Integer> matchMap = new HashMap<>();
|
||||||
while(weightIter.hasNext()) {
|
while(weightIter.hasNext()) {
|
||||||
e = weightIter.next();
|
e = weightIter.next();
|
||||||
@@ -192,28 +234,44 @@ public class Simulator {
|
|||||||
}
|
}
|
||||||
|
|
||||||
//Metadate comments for CSV file
|
//Metadate comments for CSV file
|
||||||
int min = alphaCount > betaCount ? betaCount : alphaCount;
|
int min = Math.min(alphaCount, betaCount);
|
||||||
double attemptRate = (double) (trueCount + falseCount) / min;
|
double attemptRate = (double) (trueCount + falseCount) / min;
|
||||||
BigDecimal attemptRateTrunc = new BigDecimal(attemptRate, mc);
|
BigDecimal attemptRateTrunc = new BigDecimal(attemptRate, mc);
|
||||||
double pairingErrorRate = (double) falseCount / (trueCount + falseCount);
|
double pairingErrorRate = (double) falseCount / (trueCount + falseCount);
|
||||||
BigDecimal pairingErrorRateTrunc = new BigDecimal(pairingErrorRate, mc);
|
BigDecimal pairingErrorRateTrunc = new BigDecimal(pairingErrorRate, mc);
|
||||||
|
//get list of well concentrations
|
||||||
|
List<Integer> wellConcentrations = Arrays.asList(data.getWellConcentrations());
|
||||||
|
//make string out of concentrations list
|
||||||
|
StringBuilder concentrationStringBuilder = new StringBuilder();
|
||||||
|
for(Integer i: wellConcentrations){
|
||||||
|
concentrationStringBuilder.append(i.toString());
|
||||||
|
concentrationStringBuilder.append(" ");
|
||||||
|
}
|
||||||
|
String concentrationString = concentrationStringBuilder.toString();
|
||||||
|
|
||||||
List<String> comments = new ArrayList<>();
|
List<String> comments = new ArrayList<>();
|
||||||
|
comments.add("Source Sample Plate filename: " + data.getSourceFilename());
|
||||||
|
comments.add("Source Graph and Data filename: " + dataFilename);
|
||||||
|
comments.add("T cell counts in sample plate wells: " + concentrationString);
|
||||||
comments.add("Total alphas found: " + alphaCount);
|
comments.add("Total alphas found: " + alphaCount);
|
||||||
comments.add("Total betas found: " + betaCount);
|
comments.add("Total betas found: " + betaCount);
|
||||||
comments.add("High overlap threshold: " + highThreshold);
|
comments.add("High overlap threshold: " + highThreshold);
|
||||||
comments.add("Low overlap threshold: " + lowThreshold);
|
comments.add("Low overlap threshold: " + lowThreshold);
|
||||||
|
comments.add("Minimum overlap percent: " + minOverlapPercent);
|
||||||
|
comments.add("Maximum occupancy difference: " + maxOccupancyDifference);
|
||||||
comments.add("Pairing attempt rate: " + attemptRateTrunc);
|
comments.add("Pairing attempt rate: " + attemptRateTrunc);
|
||||||
comments.add("Correct pairings: " + trueCount);
|
comments.add("Correct pairings: " + trueCount);
|
||||||
comments.add("Incorrect pairings: " + falseCount);
|
comments.add("Incorrect pairings: " + falseCount);
|
||||||
comments.add("Pairing error rate: " + pairingErrorRateTrunc);
|
comments.add("Pairing error rate: " + pairingErrorRateTrunc);
|
||||||
Duration time = Duration.between(start, stop);
|
Duration time = Duration.between(start, stop);
|
||||||
|
time = time.plus(data.getTime());
|
||||||
comments.add("Simulation time: " + nf.format(time.toSeconds()) + " seconds");
|
comments.add("Simulation time: " + nf.format(time.toSeconds()) + " seconds");
|
||||||
for(String s: comments){
|
if(verbose){
|
||||||
System.out.println(s);
|
for(String s: comments){
|
||||||
|
System.out.println(s);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
return new MatchingResult(data.getSourceFilename(), comments, header, allResults, matchMap, time);
|
||||||
return new MatchingResult(comments, header, allResults, matchMap, time);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//Simulated matching of CDR1s to CDR3s. Requires MatchingResult from prior run of matchCDR3s.
|
//Simulated matching of CDR1s to CDR3s. Requires MatchingResult from prior run of matchCDR3s.
|
||||||
@@ -233,13 +291,13 @@ public class Simulator {
|
|||||||
System.out.println("Previous match maps made");
|
System.out.println("Previous match maps made");
|
||||||
|
|
||||||
System.out.println("Making cell maps");
|
System.out.println("Making cell maps");
|
||||||
Map<Integer, Integer> alphaCDR3toCDR1Map = makePeptideToPeptideMap(distinctCells, cdr3AlphaIndex, cdr1AlphaIndex);
|
Map<Integer, Integer> alphaCDR3toCDR1Map = makeSequenceToSequenceMap(distinctCells, cdr3AlphaIndex, cdr1AlphaIndex);
|
||||||
Map<Integer, Integer> betaCDR3toCDR1Map = makePeptideToPeptideMap(distinctCells, cdr3BetaIndex, cdr1BetaIndex);
|
Map<Integer, Integer> betaCDR3toCDR1Map = makeSequenceToSequenceMap(distinctCells, cdr3BetaIndex, cdr1BetaIndex);
|
||||||
System.out.println("Cell maps made");
|
System.out.println("Cell maps made");
|
||||||
|
|
||||||
System.out.println("Making well maps");
|
System.out.println("Making well maps");
|
||||||
Map<Integer, Integer> allCDR3s = samplePlate.assayWellsPeptideP(cdr3Indices);
|
Map<Integer, Integer> allCDR3s = samplePlate.assayWellsSequenceS(cdr3Indices);
|
||||||
Map<Integer, Integer> allCDR1s = samplePlate.assayWellsPeptideP(cdr1Indices);
|
Map<Integer, Integer> allCDR1s = samplePlate.assayWellsSequenceS(cdr1Indices);
|
||||||
int CDR3Count = allCDR3s.size();
|
int CDR3Count = allCDR3s.size();
|
||||||
System.out.println("all CDR3s count: " + CDR3Count);
|
System.out.println("all CDR3s count: " + CDR3Count);
|
||||||
int CDR1Count = allCDR1s.size();
|
int CDR1Count = allCDR1s.size();
|
||||||
@@ -273,11 +331,11 @@ public class Simulator {
|
|||||||
// subtract the vertexStartValue from CDR1s to use their vertex labels as array indices
|
// subtract the vertexStartValue from CDR1s to use their vertex labels as array indices
|
||||||
Integer vertexStartValue = 0;
|
Integer vertexStartValue = 0;
|
||||||
//keys are sequential integer vertices, values are CDR3s
|
//keys are sequential integer vertices, values are CDR3s
|
||||||
Map<Integer, Integer> plateVtoCDR3Map = makeVertexToPeptideMap(allCDR3s, vertexStartValue);
|
Map<Integer, Integer> plateVtoCDR3Map = makeVertexToSequenceMap(allCDR3s, vertexStartValue);
|
||||||
//New start value for vertex to CDR1 map should be one more than final vertex value in CDR3 map
|
//New start value for vertex to CDR1 map should be one more than final vertex value in CDR3 map
|
||||||
vertexStartValue += plateVtoCDR3Map.size();
|
vertexStartValue += plateVtoCDR3Map.size();
|
||||||
//keys are sequential integers vertices, values are CDR1s
|
//keys are sequential integers vertices, values are CDR1s
|
||||||
Map<Integer, Integer> plateVtoCDR1Map = makeVertexToPeptideMap(allCDR1s, vertexStartValue);
|
Map<Integer, Integer> plateVtoCDR1Map = makeVertexToSequenceMap(allCDR1s, vertexStartValue);
|
||||||
//keys are CDR3s, values are sequential integer vertices from previous map
|
//keys are CDR3s, values are sequential integer vertices from previous map
|
||||||
Map<Integer, Integer> plateCDR3toVMap = invertVertexMap(plateVtoCDR3Map);
|
Map<Integer, Integer> plateCDR3toVMap = invertVertexMap(plateVtoCDR3Map);
|
||||||
//keys are CDR1s, values are sequential integer vertices from previous map
|
//keys are CDR1s, values are sequential integer vertices from previous map
|
||||||
@@ -294,7 +352,7 @@ public class Simulator {
|
|||||||
Map<Integer, Integer> wellNCDR3s = null;
|
Map<Integer, Integer> wellNCDR3s = null;
|
||||||
Map<Integer, Integer> wellNCDR1s = null;
|
Map<Integer, Integer> wellNCDR1s = null;
|
||||||
double[][] weights = new double[plateVtoCDR3Map.size()][plateVtoCDR1Map.size()];
|
double[][] weights = new double[plateVtoCDR3Map.size()][plateVtoCDR1Map.size()];
|
||||||
countPeptidesAndFillMatrix(samplePlate, allCDR3s, allCDR1s, plateCDR3toVMap, plateCDR1toVMap,
|
countSequencesAndFillMatrix(samplePlate, allCDR3s, allCDR1s, plateCDR3toVMap, plateCDR1toVMap,
|
||||||
cdr3Indices, cdr1Indices, cdr3WellCounts, cdr1WellCounts, weights);
|
cdr3Indices, cdr1Indices, cdr3WellCounts, cdr1WellCounts, weights);
|
||||||
System.out.println("Matrix created");
|
System.out.println("Matrix created");
|
||||||
|
|
||||||
@@ -303,11 +361,9 @@ public class Simulator {
|
|||||||
new SimpleWeightedGraph<>(DefaultWeightedEdge.class);
|
new SimpleWeightedGraph<>(DefaultWeightedEdge.class);
|
||||||
|
|
||||||
SimpleWeightedBipartiteGraphMatrixGenerator graphGenerator = new SimpleWeightedBipartiteGraphMatrixGenerator();
|
SimpleWeightedBipartiteGraphMatrixGenerator graphGenerator = new SimpleWeightedBipartiteGraphMatrixGenerator();
|
||||||
List<Integer> cdr3Vertices = new ArrayList<>();
|
List<Integer> cdr3Vertices = new ArrayList<>(plateVtoCDR3Map.keySet()); //This will work because LinkedHashMap preserves order of entry
|
||||||
cdr3Vertices.addAll(plateVtoCDR3Map.keySet()); //This will work because LinkedHashMap preserves order of entry
|
|
||||||
graphGenerator.first(cdr3Vertices);
|
graphGenerator.first(cdr3Vertices);
|
||||||
List<Integer> cdr1Vertices = new ArrayList<>();
|
List<Integer> cdr1Vertices = new ArrayList<>(plateVtoCDR1Map.keySet());
|
||||||
cdr1Vertices.addAll(plateVtoCDR1Map.keySet());
|
|
||||||
graphGenerator.second(cdr1Vertices); //This will work because LinkedHashMap preserves order of entry
|
graphGenerator.second(cdr1Vertices); //This will work because LinkedHashMap preserves order of entry
|
||||||
graphGenerator.weights(weights);
|
graphGenerator.weights(weights);
|
||||||
graphGenerator.generateGraph(graph);
|
graphGenerator.generateGraph(graph);
|
||||||
@@ -327,7 +383,7 @@ public class Simulator {
|
|||||||
//first processing run
|
//first processing run
|
||||||
Map<Integer, Integer> firstMatchCDR3toCDR1Map = new HashMap<>();
|
Map<Integer, Integer> firstMatchCDR3toCDR1Map = new HashMap<>();
|
||||||
Iterator<DefaultWeightedEdge> weightIter = graphMatching.iterator();
|
Iterator<DefaultWeightedEdge> weightIter = graphMatching.iterator();
|
||||||
DefaultWeightedEdge e = null;
|
DefaultWeightedEdge e;
|
||||||
while(weightIter.hasNext()){
|
while(weightIter.hasNext()){
|
||||||
e = weightIter.next();
|
e = weightIter.next();
|
||||||
// if(graph.getEdgeWeight(e) < lowThreshold || graph.getEdgeWeight(e) > highThreshold) {
|
// if(graph.getEdgeWeight(e) < lowThreshold || graph.getEdgeWeight(e) > highThreshold) {
|
||||||
@@ -451,7 +507,7 @@ public class Simulator {
|
|||||||
comments.add("Correct matching rate: " + correctRate);
|
comments.add("Correct matching rate: " + correctRate);
|
||||||
NumberFormat nf = NumberFormat.getInstance(Locale.US);
|
NumberFormat nf = NumberFormat.getInstance(Locale.US);
|
||||||
Duration time = Duration.between(start, stop);
|
Duration time = Duration.between(start, stop);
|
||||||
time.plus(previousTime);
|
time = time.plus(previousTime);
|
||||||
comments.add("Simulation time: " + nf.format(time.toSeconds()) + " seconds");
|
comments.add("Simulation time: " + nf.format(time.toSeconds()) + " seconds");
|
||||||
for(String s: comments){
|
for(String s: comments){
|
||||||
System.out.println(s);
|
System.out.println(s);
|
||||||
@@ -466,7 +522,8 @@ public class Simulator {
|
|||||||
headers.add("second matched CDR1");
|
headers.add("second matched CDR1");
|
||||||
headers.add("Correct match?");
|
headers.add("Correct match?");
|
||||||
|
|
||||||
MatchingResult firstTest = new MatchingResult(comments, headers, allResults, dualMatchesMap, time);
|
MatchingResult firstTest = new MatchingResult(samplePlate.getSourceFileName(),
|
||||||
|
comments, headers, allResults, dualMatchesMap, time);
|
||||||
|
|
||||||
//results for dual map
|
//results for dual map
|
||||||
System.out.println("RESULTS FOR SECOND PASS MATCHING");
|
System.out.println("RESULTS FOR SECOND PASS MATCHING");
|
||||||
@@ -515,7 +572,8 @@ public class Simulator {
|
|||||||
}
|
}
|
||||||
|
|
||||||
System.out.println("Simulation time: " + nf.format(time.toSeconds()) + " seconds");
|
System.out.println("Simulation time: " + nf.format(time.toSeconds()) + " seconds");
|
||||||
MatchingResult dualTest = new MatchingResult(comments, headers, allResults, dualMatchesMap, time);
|
MatchingResult dualTest = new MatchingResult(samplePlate.getSourceFileName(), comments, headers,
|
||||||
|
allResults, dualMatchesMap, time);
|
||||||
MatchingResult[] output = {firstTest, dualTest};
|
MatchingResult[] output = {firstTest, dualTest};
|
||||||
return output;
|
return output;
|
||||||
}
|
}
|
||||||
@@ -523,38 +581,38 @@ public class Simulator {
|
|||||||
|
|
||||||
//Counts the well occupancy of the row peptides and column peptides into given maps, and
|
//Counts the well occupancy of the row peptides and column peptides into given maps, and
|
||||||
//fills weights in the given 2D array
|
//fills weights in the given 2D array
|
||||||
private static void countPeptidesAndFillMatrix(Plate samplePlate,
|
private static void countSequencesAndFillMatrix(Plate samplePlate,
|
||||||
Map<Integer,Integer> allRowPeptides,
|
Map<Integer,Integer> allRowSequences,
|
||||||
Map<Integer,Integer> allColumnPeptides,
|
Map<Integer,Integer> allColumnSequences,
|
||||||
Map<Integer,Integer> rowPeptideToVertexMap,
|
Map<Integer,Integer> rowSequenceToVertexMap,
|
||||||
Map<Integer,Integer> columnPeptideToVertexMap,
|
Map<Integer,Integer> columnSequenceToVertexMap,
|
||||||
int[] rowPeptideIndices,
|
int[] rowSequenceIndices,
|
||||||
int[] colPeptideIndices,
|
int[] colSequenceIndices,
|
||||||
Map<Integer, Integer> rowPeptideCounts,
|
Map<Integer, Integer> rowSequenceCounts,
|
||||||
Map<Integer,Integer> columnPeptideCounts,
|
Map<Integer,Integer> columnSequenceCounts,
|
||||||
double[][] weights){
|
double[][] weights){
|
||||||
Map<Integer, Integer> wellNRowPeptides = null;
|
Map<Integer, Integer> wellNRowSequences = null;
|
||||||
Map<Integer, Integer> wellNColumnPeptides = null;
|
Map<Integer, Integer> wellNColumnSequences = null;
|
||||||
int vertexStartValue = rowPeptideToVertexMap.size();
|
int vertexStartValue = rowSequenceToVertexMap.size();
|
||||||
int numWells = samplePlate.getSize();
|
int numWells = samplePlate.getSize();
|
||||||
for (int n = 0; n < numWells; n++) {
|
for (int n = 0; n < numWells; n++) {
|
||||||
wellNRowPeptides = samplePlate.assayWellsPeptideP(n, rowPeptideIndices);
|
wellNRowSequences = samplePlate.assayWellsSequenceS(n, rowSequenceIndices);
|
||||||
for (Integer a : wellNRowPeptides.keySet()) {
|
for (Integer a : wellNRowSequences.keySet()) {
|
||||||
if(allRowPeptides.containsKey(a)){
|
if(allRowSequences.containsKey(a)){
|
||||||
rowPeptideCounts.merge(a, 1, (oldValue, newValue) -> oldValue + newValue);
|
rowSequenceCounts.merge(a, 1, (oldValue, newValue) -> oldValue + newValue);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
wellNColumnPeptides = samplePlate.assayWellsPeptideP(n, colPeptideIndices);
|
wellNColumnSequences = samplePlate.assayWellsSequenceS(n, colSequenceIndices);
|
||||||
for (Integer b : wellNColumnPeptides.keySet()) {
|
for (Integer b : wellNColumnSequences.keySet()) {
|
||||||
if(allColumnPeptides.containsKey(b)){
|
if(allColumnSequences.containsKey(b)){
|
||||||
columnPeptideCounts.merge(b, 1, (oldValue, newValue) -> oldValue + newValue);
|
columnSequenceCounts.merge(b, 1, (oldValue, newValue) -> oldValue + newValue);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for (Integer i : wellNRowPeptides.keySet()) {
|
for (Integer i : wellNRowSequences.keySet()) {
|
||||||
if(allRowPeptides.containsKey(i)){
|
if(allRowSequences.containsKey(i)){
|
||||||
for (Integer j : wellNColumnPeptides.keySet()) {
|
for (Integer j : wellNColumnSequences.keySet()) {
|
||||||
if(allColumnPeptides.containsKey(j)){
|
if(allColumnSequences.containsKey(j)){
|
||||||
weights[rowPeptideToVertexMap.get(i)][columnPeptideToVertexMap.get(j) - vertexStartValue] += 1.0;
|
weights[rowSequenceToVertexMap.get(i)][columnSequenceToVertexMap.get(j) - vertexStartValue] += 1.0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -584,19 +642,54 @@ public class Simulator {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static Map<Integer, Integer> makePeptideToPeptideMap(List<Integer[]> cells, int keyPeptideIndex,
|
//Remove edges for pairs with large occupancy discrepancy
|
||||||
int valuePeptideIndex){
|
private static void filterByRelativeOccupancy(SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph,
|
||||||
Map<Integer, Integer> keyPeptideToValuePeptideMap = new HashMap<>();
|
Map<Integer, Integer> alphaWellCounts,
|
||||||
for (Integer[] cell : cells) {
|
Map<Integer, Integer> betaWellCounts,
|
||||||
keyPeptideToValuePeptideMap.put(cell[keyPeptideIndex], cell[valuePeptideIndex]);
|
Map<Integer, Integer> plateVtoAMap,
|
||||||
|
Map<Integer, Integer> plateVtoBMap,
|
||||||
|
Integer maxOccupancyDifference) {
|
||||||
|
for (DefaultWeightedEdge e : graph.edgeSet()) {
|
||||||
|
Integer alphaOcc = alphaWellCounts.get(plateVtoAMap.get(graph.getEdgeSource(e)));
|
||||||
|
Integer betaOcc = betaWellCounts.get(plateVtoBMap.get(graph.getEdgeTarget(e)));
|
||||||
|
//Adjust this to something cleverer later
|
||||||
|
if (Math.abs(alphaOcc - betaOcc) >= maxOccupancyDifference) {
|
||||||
|
graph.setEdgeWeight(e, 0.0);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return keyPeptideToValuePeptideMap;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static Map<Integer, Integer> makeVertexToPeptideMap(Map<Integer, Integer> peptides, Integer startValue) {
|
//Remove edges for pairs where overlap size is significantly lower than the well occupancy
|
||||||
|
private static void filterByOverlapSize(SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph,
|
||||||
|
Map<Integer, Integer> alphaWellCounts,
|
||||||
|
Map<Integer, Integer> betaWellCounts,
|
||||||
|
Map<Integer, Integer> plateVtoAMap,
|
||||||
|
Map<Integer, Integer> plateVtoBMap,
|
||||||
|
Integer minOverlapPercent) {
|
||||||
|
for (DefaultWeightedEdge e : graph.edgeSet()) {
|
||||||
|
Integer alphaOcc = alphaWellCounts.get(plateVtoAMap.get(graph.getEdgeSource(e)));
|
||||||
|
Integer betaOcc = betaWellCounts.get(plateVtoBMap.get(graph.getEdgeTarget(e)));
|
||||||
|
double weight = graph.getEdgeWeight(e);
|
||||||
|
double min = minOverlapPercent / 100.0;
|
||||||
|
if ((weight / alphaOcc < min) || (weight / betaOcc < min)) {
|
||||||
|
graph.setEdgeWeight(e, 0.0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Map<Integer, Integer> makeSequenceToSequenceMap(List<Integer[]> cells, int keySequenceIndex,
|
||||||
|
int valueSequenceIndex){
|
||||||
|
Map<Integer, Integer> keySequenceToValueSequenceMap = new HashMap<>();
|
||||||
|
for (Integer[] cell : cells) {
|
||||||
|
keySequenceToValueSequenceMap.put(cell[keySequenceIndex], cell[valueSequenceIndex]);
|
||||||
|
}
|
||||||
|
return keySequenceToValueSequenceMap;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Map<Integer, Integer> makeVertexToSequenceMap(Map<Integer, Integer> sequences, Integer startValue) {
|
||||||
Map<Integer, Integer> map = new LinkedHashMap<>(); //LinkedHashMap to preserve order of entry
|
Map<Integer, Integer> map = new LinkedHashMap<>(); //LinkedHashMap to preserve order of entry
|
||||||
Integer index = startValue;
|
Integer index = startValue;
|
||||||
for (Integer k: peptides.keySet()) {
|
for (Integer k: sequences.keySet()) {
|
||||||
map.put(index, k);
|
map.put(index, k);
|
||||||
index++;
|
index++;
|
||||||
}
|
}
|
||||||
@@ -605,7 +698,7 @@ public class Simulator {
|
|||||||
|
|
||||||
private static Map<Integer, Integer> invertVertexMap(Map<Integer, Integer> map) {
|
private static Map<Integer, Integer> invertVertexMap(Map<Integer, Integer> map) {
|
||||||
Map<Integer, Integer> inverse = new HashMap<>();
|
Map<Integer, Integer> inverse = new HashMap<>();
|
||||||
for(Integer k: map.keySet()) {
|
for (Integer k : map.keySet()) {
|
||||||
inverse.put(map.get(k), k);
|
inverse.put(map.get(k), k);
|
||||||
}
|
}
|
||||||
return inverse;
|
return inverse;
|
||||||
|
|||||||
@@ -1,6 +1,11 @@
|
|||||||
|
import org.apache.commons.cli.*;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Scanner;
|
import java.util.Scanner;
|
||||||
import java.util.InputMismatchException;
|
import java.util.InputMismatchException;
|
||||||
|
import java.util.regex.Matcher;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
//
|
//
|
||||||
public class UserInterface {
|
public class UserInterface {
|
||||||
@@ -9,33 +14,282 @@ public class UserInterface {
|
|||||||
static int input;
|
static int input;
|
||||||
static boolean quit = false;
|
static boolean quit = false;
|
||||||
|
|
||||||
public static void main(String args[]) {
|
public static void main(String[] args) {
|
||||||
while(!quit) {
|
//for now, commenting out all the command line argument stuff.
|
||||||
System.out.println("\nALPHA/BETA T-CELL RECEPTOR MATCHING SIMULATOR");
|
// Refactoring to output files of graphs, so it would all need to change anyway.
|
||||||
System.out.println("Please select an option:");
|
|
||||||
System.out.println("1) Generate a population of distinct cells");
|
// if(args.length != 0){
|
||||||
System.out.println("2) Generate a sample plate of T cells");
|
// //These command line options are a big mess
|
||||||
System.out.println("3) Simulate CDR3 alpha/beta T cell matching");
|
// //Really, I don't think command line tools are expected to work in this many different modes
|
||||||
System.out.println("4) Simulate CDR3/CDR1 T cell matching");
|
// //making cells, making plates, and matching are the sort of thing that UNIX philosophy would say
|
||||||
System.out.println("5) Acknowledgements");
|
// //should be three separate programs.
|
||||||
System.out.println("0) Exit");
|
// //There might be a way to do it with option parameters?
|
||||||
try {
|
//
|
||||||
input = sc.nextInt();
|
// Options mainOptions = new Options();
|
||||||
switch(input){
|
// Option makeCells = Option.builder("cells")
|
||||||
case 1 -> makeCells();
|
// .longOpt("make-cells")
|
||||||
case 2 -> makePlate();
|
// .desc("Makes a file of distinct cells")
|
||||||
case 3 -> matchCells();
|
// .build();
|
||||||
case 4 -> matchCellsExpanded();
|
// Option makePlate = Option.builder("plates")
|
||||||
case 5 -> acknowledge();
|
// .longOpt("make-plates")
|
||||||
case 0 -> quit = true;
|
// .desc("Makes a sample plate file")
|
||||||
default -> throw new InputMismatchException("Invalid input.");
|
// .build();
|
||||||
|
// Option matchCDR3 = Option.builder("match")
|
||||||
|
// .longOpt("match-cdr3")
|
||||||
|
// .desc("Match CDR3s. Requires a cell sample file and any number of plate files.")
|
||||||
|
// .build();
|
||||||
|
// OptionGroup mainGroup = new OptionGroup();
|
||||||
|
// mainGroup.addOption(makeCells);
|
||||||
|
// mainGroup.addOption(makePlate);
|
||||||
|
// mainGroup.addOption(matchCDR3);
|
||||||
|
// mainGroup.setRequired(true);
|
||||||
|
// mainOptions.addOptionGroup(mainGroup);
|
||||||
|
//
|
||||||
|
// //Reuse clones of this for other options groups, rather than making it lots of times
|
||||||
|
// Option outputFile = Option.builder("o")
|
||||||
|
// .longOpt("output-file")
|
||||||
|
// .hasArg()
|
||||||
|
// .argName("filename")
|
||||||
|
// .desc("Name of output file")
|
||||||
|
// .build();
|
||||||
|
// mainOptions.addOption(outputFile);
|
||||||
|
//
|
||||||
|
// //Options cellOptions = new Options();
|
||||||
|
// Option numCells = Option.builder("nc")
|
||||||
|
// .longOpt("num-cells")
|
||||||
|
// .desc("The number of distinct cells to generate")
|
||||||
|
// .hasArg()
|
||||||
|
// .argName("number")
|
||||||
|
// .build();
|
||||||
|
// mainOptions.addOption(numCells);
|
||||||
|
// Option cdr1Freq = Option.builder("d")
|
||||||
|
// .longOpt("peptide-diversity-factor")
|
||||||
|
// .hasArg()
|
||||||
|
// .argName("number")
|
||||||
|
// .desc("Number of distinct CDR3s for every CDR1")
|
||||||
|
// .build();
|
||||||
|
// mainOptions.addOption(cdr1Freq);
|
||||||
|
// //Option cellOutput = (Option) outputFile.clone();
|
||||||
|
// //cellOutput.setRequired(true);
|
||||||
|
// //mainOptions.addOption(cellOutput);
|
||||||
|
//
|
||||||
|
// //Options plateOptions = new Options();
|
||||||
|
// Option inputCells = Option.builder("c")
|
||||||
|
// .longOpt("cell-file")
|
||||||
|
// .hasArg()
|
||||||
|
// .argName("file")
|
||||||
|
// .desc("The cell sample file used for filling wells")
|
||||||
|
// .build();
|
||||||
|
// mainOptions.addOption(inputCells);
|
||||||
|
// Option numWells = Option.builder("w")
|
||||||
|
// .longOpt("num-wells")
|
||||||
|
// .hasArg()
|
||||||
|
// .argName("number")
|
||||||
|
// .desc("The number of wells on each plate")
|
||||||
|
// .build();
|
||||||
|
// mainOptions.addOption(numWells);
|
||||||
|
// Option numPlates = Option.builder("np")
|
||||||
|
// .longOpt("num-plates")
|
||||||
|
// .hasArg()
|
||||||
|
// .argName("number")
|
||||||
|
// .desc("The number of plate files to output")
|
||||||
|
// .build();
|
||||||
|
// mainOptions.addOption(numPlates);
|
||||||
|
// //Option plateOutput = (Option) outputFile.clone();
|
||||||
|
// //plateOutput.setRequired(true);
|
||||||
|
// //plateOutput.setDescription("Prefix for plate output filenames");
|
||||||
|
// //mainOptions.addOption(plateOutput);
|
||||||
|
// Option plateErr = Option.builder("err")
|
||||||
|
// .longOpt("drop-out-rate")
|
||||||
|
// .hasArg()
|
||||||
|
// .argName("number")
|
||||||
|
// .desc("Well drop-out rate. (Probability between 0 and 1)")
|
||||||
|
// .build();
|
||||||
|
// mainOptions.addOption(plateErr);
|
||||||
|
// Option plateConcentrations = Option.builder("t")
|
||||||
|
// .longOpt("t-cells-per-well")
|
||||||
|
// .hasArgs()
|
||||||
|
// .argName("number 1, number 2, ...")
|
||||||
|
// .desc("Number of T cells per well for each plate section")
|
||||||
|
// .build();
|
||||||
|
// mainOptions.addOption(plateConcentrations);
|
||||||
|
//
|
||||||
|
////different distributions, mutually exclusive
|
||||||
|
// OptionGroup plateDistributions = new OptionGroup();
|
||||||
|
// Option plateExp = Option.builder("exponential")
|
||||||
|
// .desc("Sample from distinct cells with exponential frequency distribution")
|
||||||
|
// .build();
|
||||||
|
// plateDistributions.addOption(plateExp);
|
||||||
|
// Option plateGaussian = Option.builder("gaussian")
|
||||||
|
// .desc("Sample from distinct cells with gaussain frequency distribution")
|
||||||
|
// .build();
|
||||||
|
// plateDistributions.addOption(plateGaussian);
|
||||||
|
// Option platePoisson = Option.builder("poisson")
|
||||||
|
// .desc("Sample from distinct cells with poisson frequency distribution")
|
||||||
|
// .build();
|
||||||
|
// plateDistributions.addOption(platePoisson);
|
||||||
|
// mainOptions.addOptionGroup(plateDistributions);
|
||||||
|
//
|
||||||
|
// Option plateStdDev = Option.builder("stddev")
|
||||||
|
// .desc("Standard deviation for gaussian distribution")
|
||||||
|
// .hasArg()
|
||||||
|
// .argName("number")
|
||||||
|
// .build();
|
||||||
|
// mainOptions.addOption(plateStdDev);
|
||||||
|
//
|
||||||
|
// Option plateLambda = Option.builder("lambda")
|
||||||
|
// .desc("Lambda for exponential distribution")
|
||||||
|
// .hasArg()
|
||||||
|
// .argName("number")
|
||||||
|
// .build();
|
||||||
|
// mainOptions.addOption(plateLambda);
|
||||||
|
//
|
||||||
|
//
|
||||||
|
//
|
||||||
|
////
|
||||||
|
//// String cellFile, String filename, Double stdDev,
|
||||||
|
//// Integer numWells, Integer numSections,
|
||||||
|
//// Integer[] concentrations, Double dropOutRate
|
||||||
|
////
|
||||||
|
//
|
||||||
|
// //Options matchOptions = new Options();
|
||||||
|
// inputCells.setDescription("The cell sample file to be used for matching.");
|
||||||
|
// mainOptions.addOption(inputCells);
|
||||||
|
// Option lowThresh = Option.builder("low")
|
||||||
|
// .longOpt("low-threshold")
|
||||||
|
// .hasArg()
|
||||||
|
// .argName("number")
|
||||||
|
// .desc("Sets the minimum occupancy overlap to attempt matching")
|
||||||
|
// .build();
|
||||||
|
// mainOptions.addOption(lowThresh);
|
||||||
|
// Option highThresh = Option.builder("high")
|
||||||
|
// .longOpt("high-threshold")
|
||||||
|
// .hasArg()
|
||||||
|
// .argName("number")
|
||||||
|
// .desc("Sets the maximum occupancy overlap to attempt matching")
|
||||||
|
// .build();
|
||||||
|
// mainOptions.addOption(highThresh);
|
||||||
|
// Option occDiff = Option.builder("occdiff")
|
||||||
|
// .longOpt("occupancy-difference")
|
||||||
|
// .hasArg()
|
||||||
|
// .argName("Number")
|
||||||
|
// .desc("Maximum difference in alpha/beta occupancy to attempt matching")
|
||||||
|
// .build();
|
||||||
|
// mainOptions.addOption(occDiff);
|
||||||
|
// Option overlapPer = Option.builder("ovper")
|
||||||
|
// .longOpt("overlap-percent")
|
||||||
|
// .hasArg()
|
||||||
|
// .argName("Percent")
|
||||||
|
// .desc("Minimum overlap percent to attempt matching (0 -100)")
|
||||||
|
// .build();
|
||||||
|
// mainOptions.addOption(overlapPer);
|
||||||
|
// Option inputPlates = Option.builder("p")
|
||||||
|
// .longOpt("plate-files")
|
||||||
|
// .hasArgs()
|
||||||
|
// .desc("Plate files to match")
|
||||||
|
// .build();
|
||||||
|
// mainOptions.addOption(inputPlates);
|
||||||
|
//
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// CommandLineParser parser = new DefaultParser();
|
||||||
|
// try {
|
||||||
|
// CommandLine line = parser.parse(mainOptions, args);
|
||||||
|
// if(line.hasOption("match")){
|
||||||
|
// //line = parser.parse(mainOptions, args);
|
||||||
|
// String cellFile = line.getOptionValue("c");
|
||||||
|
// Integer lowThreshold = Integer.valueOf(line.getOptionValue(lowThresh));
|
||||||
|
// Integer highThreshold = Integer.valueOf(line.getOptionValue(highThresh));
|
||||||
|
// Integer occupancyDifference = Integer.valueOf(line.getOptionValue(occDiff));
|
||||||
|
// Integer overlapPercent = Integer.valueOf(line.getOptionValue(overlapPer));
|
||||||
|
// for(String plate: line.getOptionValues("p")) {
|
||||||
|
// matchCDR3s(cellFile, plate, lowThreshold, highThreshold, occupancyDifference, overlapPercent);
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// else if(line.hasOption("cells")){
|
||||||
|
// //line = parser.parse(mainOptions, args);
|
||||||
|
// String filename = line.getOptionValue("o");
|
||||||
|
// Integer numDistCells = Integer.valueOf(line.getOptionValue("nc"));
|
||||||
|
// Integer freq = Integer.valueOf(line.getOptionValue("d"));
|
||||||
|
// makeCells(filename, numDistCells, freq);
|
||||||
|
// }
|
||||||
|
// else if(line.hasOption("plates")){
|
||||||
|
// //line = parser.parse(mainOptions, args);
|
||||||
|
// String cellFile = line.getOptionValue("c");
|
||||||
|
// String filenamePrefix = line.getOptionValue("o");
|
||||||
|
// Integer numWellsOnPlate = Integer.valueOf(line.getOptionValue("w"));
|
||||||
|
// Integer numPlatesToMake = Integer.valueOf(line.getOptionValue("np"));
|
||||||
|
// String[] concentrationsToUseString = line.getOptionValues("t");
|
||||||
|
// Integer numSections = concentrationsToUseString.length;
|
||||||
|
//
|
||||||
|
// Integer[] concentrationsToUse = new Integer[numSections];
|
||||||
|
// for(int i = 0; i <numSections; i++){
|
||||||
|
// concentrationsToUse[i] = Integer.valueOf(concentrationsToUseString[i]);
|
||||||
|
// }
|
||||||
|
// Double dropOutRate = Double.valueOf(line.getOptionValue("err"));
|
||||||
|
// if(line.hasOption("exponential")){
|
||||||
|
// Double lambda = Double.valueOf(line.getOptionValue("lambda"));
|
||||||
|
// for(int i = 1; i <= numPlatesToMake; i++){
|
||||||
|
// makePlateExp(cellFile, filenamePrefix + i, lambda, numWellsOnPlate,
|
||||||
|
// concentrationsToUse,dropOutRate);
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// else if(line.hasOption("gaussian")){
|
||||||
|
// Double stdDev = Double.valueOf(line.getOptionValue("std-dev"));
|
||||||
|
// for(int i = 1; i <= numPlatesToMake; i++){
|
||||||
|
// makePlate(cellFile, filenamePrefix + i, stdDev, numWellsOnPlate,
|
||||||
|
// concentrationsToUse,dropOutRate);
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// }
|
||||||
|
// else if(line.hasOption("poisson")){
|
||||||
|
// for(int i = 1; i <= numPlatesToMake; i++){
|
||||||
|
// makePlatePoisson(cellFile, filenamePrefix + i, numWellsOnPlate,
|
||||||
|
// concentrationsToUse,dropOutRate);
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// catch (ParseException exp) {
|
||||||
|
// System.err.println("Parsing failed. Reason: " + exp.getMessage());
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// else {
|
||||||
|
while (!quit) {
|
||||||
|
System.out.println();
|
||||||
|
System.out.println("--------BiGPairSEQ SIMULATOR--------");
|
||||||
|
System.out.println("ALPHA/BETA T CELL RECEPTOR MATCHING");
|
||||||
|
System.out.println(" USING WEIGHTED BIPARTITE GRAPHS ");
|
||||||
|
System.out.println("------------------------------------");
|
||||||
|
System.out.println("Please select an option:");
|
||||||
|
System.out.println("1) Generate a population of distinct cells");
|
||||||
|
System.out.println("2) Generate a sample plate of T cells");
|
||||||
|
System.out.println("3) Generate CDR3 alpha/beta occupancy data and overlap graph");
|
||||||
|
System.out.println("4) Simulate bipartite graph CDR3 alpha/beta matching (BiGpairSEQ)");
|
||||||
|
//Need to re-do the CDR3/CDR1 matching to correspond to new pattern
|
||||||
|
//System.out.println("5) Generate CDR3/CDR1 occupancy graph");
|
||||||
|
//System.out.println("6) Simulate CDR3/CDR1 T cell matching");
|
||||||
|
System.out.println("9) About/Acknowledgments");
|
||||||
|
System.out.println("0) Exit");
|
||||||
|
try {
|
||||||
|
input = sc.nextInt();
|
||||||
|
switch (input) {
|
||||||
|
case 1 -> makeCells();
|
||||||
|
case 2 -> makePlate();
|
||||||
|
case 3 -> makeCDR3Graph();
|
||||||
|
case 4 -> matchCDR3s();
|
||||||
|
//case 6 -> matchCellsCDR1();
|
||||||
|
case 9 -> acknowledge();
|
||||||
|
case 0 -> quit = true;
|
||||||
|
default -> throw new InputMismatchException("Invalid input.");
|
||||||
|
}
|
||||||
|
} catch (InputMismatchException | IOException ex) {
|
||||||
|
System.out.println(ex);
|
||||||
|
sc.next();
|
||||||
}
|
}
|
||||||
}catch(InputMismatchException ex){
|
|
||||||
System.out.println(ex);
|
|
||||||
sc.next();
|
|
||||||
}
|
}
|
||||||
}
|
sc.close();
|
||||||
sc.close();
|
// }
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void makeCells() {
|
private static void makeCells() {
|
||||||
@@ -46,13 +300,13 @@ public class UserInterface {
|
|||||||
System.out.println("\nSimulated T-Cells consist of integer values representing:\n" +
|
System.out.println("\nSimulated T-Cells consist of integer values representing:\n" +
|
||||||
"* a pair of alpha and beta CDR3 peptides (unique within simulated population)\n" +
|
"* a pair of alpha and beta CDR3 peptides (unique within simulated population)\n" +
|
||||||
"* a pair of alpha and beta CDR1 peptides (not necessarily unique).");
|
"* a pair of alpha and beta CDR1 peptides (not necessarily unique).");
|
||||||
System.out.println("\nThe cells will be written to a file.");
|
System.out.println("\nThe cells will be written to a CSV file.");
|
||||||
System.out.print("Please enter a file name: ");
|
System.out.print("Please enter a file name: ");
|
||||||
filename = sc.next();
|
filename = sc.next();
|
||||||
System.out.println("CDR3 sequences are more diverse than CDR1 sequences.");
|
System.out.println("\nCDR3 sequences are more diverse than CDR1 sequences.");
|
||||||
System.out.println("Please enter the factor by which distinct CDR3s outnumber CDR1s: ");
|
System.out.println("Please enter the factor by which distinct CDR3s outnumber CDR1s: ");
|
||||||
cdr1Freq = sc.nextInt();
|
cdr1Freq = sc.nextInt();
|
||||||
System.out.print("Please enter the number of T-cells to generate: ");
|
System.out.print("\nPlease enter the number of T-cells to generate: ");
|
||||||
numCells = sc.nextInt();
|
numCells = sc.nextInt();
|
||||||
if(numCells <= 0){
|
if(numCells <= 0){
|
||||||
throw new InputMismatchException("Number of cells must be a positive integer.");
|
throw new InputMismatchException("Number of cells must be a positive integer.");
|
||||||
@@ -62,55 +316,110 @@ public class UserInterface {
|
|||||||
sc.next();
|
sc.next();
|
||||||
}
|
}
|
||||||
CellSample sample = Simulator.generateCellSample(numCells, cdr1Freq);
|
CellSample sample = Simulator.generateCellSample(numCells, cdr1Freq);
|
||||||
|
assert filename != null;
|
||||||
CellFileWriter writer = new CellFileWriter(filename, sample);
|
CellFileWriter writer = new CellFileWriter(filename, sample);
|
||||||
writer.writeCellsToFile();
|
writer.writeCellsToFile();
|
||||||
|
System.gc();
|
||||||
}
|
}
|
||||||
|
|
||||||
//method to output a CSV of
|
// //for calling from command line
|
||||||
|
// private static void makeCells(String filename, Integer numCells, Integer cdr1Freq){
|
||||||
|
// CellSample sample = Simulator.generateCellSample(numCells, cdr1Freq);
|
||||||
|
// CellFileWriter writer = new CellFileWriter(filename, sample);
|
||||||
|
// writer.writeCellsToFile();
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// private static void makePlateExp(String cellFile, String filename, Double lambda,
|
||||||
|
// Integer numWells, Integer[] concentrations, Double dropOutRate){
|
||||||
|
// CellFileReader cellReader = new CellFileReader(cellFile);
|
||||||
|
// Plate samplePlate = new Plate(numWells, dropOutRate, concentrations);
|
||||||
|
// samplePlate.fillWellsExponential(cellReader.getFilename(), cellReader.getCells(), lambda);
|
||||||
|
// PlateFileWriter writer = new PlateFileWriter(filename, samplePlate);
|
||||||
|
// writer.writePlateFile();
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// private static void makePlatePoisson(String cellFile, String filename, Integer numWells,
|
||||||
|
// Integer[] concentrations, Double dropOutRate){
|
||||||
|
// CellFileReader cellReader = new CellFileReader(cellFile);
|
||||||
|
// Double stdDev = Math.sqrt(cellReader.getCellCount());
|
||||||
|
// Plate samplePlate = new Plate(numWells, dropOutRate, concentrations);
|
||||||
|
// samplePlate.fillWells(cellReader.getFilename(), cellReader.getCells(), stdDev);
|
||||||
|
// PlateFileWriter writer = new PlateFileWriter(filename, samplePlate);
|
||||||
|
// writer.writePlateFile();
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// private static void makePlate(String cellFile, String filename, Double stdDev,
|
||||||
|
// Integer numWells, Integer[] concentrations, Double dropOutRate){
|
||||||
|
// CellFileReader cellReader = new CellFileReader(cellFile);
|
||||||
|
// Plate samplePlate = new Plate(numWells, dropOutRate, concentrations);
|
||||||
|
// samplePlate.fillWells(cellReader.getFilename(), cellReader.getCells(), stdDev);
|
||||||
|
// PlateFileWriter writer = new PlateFileWriter(filename, samplePlate);
|
||||||
|
// writer.writePlateFile();
|
||||||
|
// }
|
||||||
|
|
||||||
|
//Output a CSV of sample plate
|
||||||
private static void makePlate() {
|
private static void makePlate() {
|
||||||
String cellFile = null;
|
String cellFile = null;
|
||||||
String filename = null;
|
String filename = null;
|
||||||
Double stdDev = 0.0;
|
Double stdDev = 0.0;
|
||||||
Integer numWells = 0;
|
Integer numWells = 0;
|
||||||
Integer numSections = 0;
|
Integer numSections;
|
||||||
Integer[] concentrations = {1};
|
Integer[] concentrations = {1};
|
||||||
Double dropOutRate = 0.0;
|
Double dropOutRate = 0.0;
|
||||||
boolean poisson = false;
|
boolean poisson = false;
|
||||||
|
boolean exponential = false;
|
||||||
|
double lambda = 1.5;
|
||||||
try {
|
try {
|
||||||
|
System.out.println("\nSimulated sample plates consist of:");
|
||||||
|
System.out.println("* a number of wells");
|
||||||
|
System.out.println(" * separated into one or more sections");
|
||||||
|
System.out.println(" * each of which has a set quantity of cells per well");
|
||||||
|
System.out.println(" * selected from a statistical distribution of distinct cells");
|
||||||
|
System.out.println(" * with a set dropout rate for individual sequences within a cell");
|
||||||
System.out.println("\nMaking a sample plate requires a population of distinct cells");
|
System.out.println("\nMaking a sample plate requires a population of distinct cells");
|
||||||
System.out.println("Please enter name of an existing cell sample file: ");
|
System.out.print("Please enter name of an existing cell sample file: ");
|
||||||
cellFile = sc.next();
|
cellFile = sc.next();
|
||||||
System.out.println("\nThe sample plate will be written to file");
|
System.out.println("\nThe sample plate will be written to a CSV file");
|
||||||
System.out.print("Please enter a name for the output file: ");
|
System.out.print("Please enter a name for the output file: ");
|
||||||
filename = sc.next();
|
filename = sc.next();
|
||||||
System.out.println("Select T-cell frequency distribution function");
|
System.out.println("\nSelect T-cell frequency distribution function");
|
||||||
System.out.println("1) Poisson");
|
System.out.println("1) Poisson");
|
||||||
System.out.println("2) Gaussian");
|
System.out.println("2) Gaussian");
|
||||||
|
System.out.println("3) Exponential");
|
||||||
|
System.out.println("(Note: approximate distribution in original paper is exponential, lambda = 0.6)");
|
||||||
|
System.out.println("(lambda value approximated from slope of log-log graph in figure 4c)");
|
||||||
System.out.println("(Note: wider distributions are more memory intensive to match)");
|
System.out.println("(Note: wider distributions are more memory intensive to match)");
|
||||||
System.out.print("Enter selection value: ");
|
System.out.print("Enter selection value: ");
|
||||||
input = sc.nextInt();
|
input = sc.nextInt();
|
||||||
switch(input) {
|
switch (input) {
|
||||||
case 1:
|
case 1 -> poisson = true;
|
||||||
poisson = true;
|
case 2 -> {
|
||||||
break;
|
|
||||||
case 2:
|
|
||||||
System.out.println("How many distinct T-cells within one standard deviation of peak frequency?");
|
System.out.println("How many distinct T-cells within one standard deviation of peak frequency?");
|
||||||
System.out.println("(Note: wider distributions are more memory intensive to match)");
|
System.out.println("(Note: wider distributions are more memory intensive to match)");
|
||||||
stdDev = sc.nextDouble();
|
stdDev = sc.nextDouble();
|
||||||
if(stdDev <= 0.0){
|
if (stdDev <= 0.0) {
|
||||||
throw new InputMismatchException("Value must be positive.");
|
throw new InputMismatchException("Value must be positive.");
|
||||||
}
|
}
|
||||||
break;
|
}
|
||||||
default:
|
case 3 -> {
|
||||||
System.out.println("Invalid input. Defaulting to Poisson.");
|
exponential = true;
|
||||||
poisson = true;
|
System.out.println("Please enter lambda value for exponential distribution.");
|
||||||
|
lambda = sc.nextDouble();
|
||||||
|
if (lambda <= 0.0) {
|
||||||
|
throw new InputMismatchException("Value must be positive.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
default -> {
|
||||||
|
System.out.println("Invalid input. Defaulting to exponential.");
|
||||||
|
exponential = true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
System.out.print("Number of wells on plate: ");
|
System.out.print("\nNumber of wells on plate: ");
|
||||||
numWells = sc.nextInt();
|
numWells = sc.nextInt();
|
||||||
if(numWells < 1){
|
if(numWells < 1){
|
||||||
throw new InputMismatchException("No wells on plate");
|
throw new InputMismatchException("No wells on plate");
|
||||||
}
|
}
|
||||||
System.out.println("The plate can be evenly sectioned to allow multiple concentrations of T-cells/well");
|
System.out.println("\nThe plate can be evenly sectioned to allow multiple concentrations of T-cells/well");
|
||||||
System.out.println("How many sections would you like to make (minimum 1)?");
|
System.out.println("How many sections would you like to make (minimum 1)?");
|
||||||
numSections = sc.nextInt();
|
numSections = sc.nextInt();
|
||||||
if(numSections < 1) {
|
if(numSections < 1) {
|
||||||
@@ -127,7 +436,7 @@ public class UserInterface {
|
|||||||
i++;
|
i++;
|
||||||
numSections--;
|
numSections--;
|
||||||
}
|
}
|
||||||
System.out.println("Errors in amplification can induce a well dropout rate for peptides");
|
System.out.println("\nErrors in amplification can induce a well dropout rate for sequences");
|
||||||
System.out.print("Enter well dropout rate (0.0 to 1.0): ");
|
System.out.print("Enter well dropout rate (0.0 to 1.0): ");
|
||||||
dropOutRate = sc.nextDouble();
|
dropOutRate = sc.nextDouble();
|
||||||
if(dropOutRate < 0.0 || dropOutRate > 1.0) {
|
if(dropOutRate < 0.0 || dropOutRate > 1.0) {
|
||||||
@@ -137,147 +446,249 @@ public class UserInterface {
|
|||||||
System.out.println(ex);
|
System.out.println(ex);
|
||||||
sc.next();
|
sc.next();
|
||||||
}
|
}
|
||||||
|
System.out.println("Reading Cell Sample file: " + cellFile);
|
||||||
|
assert cellFile != null;
|
||||||
CellFileReader cellReader = new CellFileReader(cellFile);
|
CellFileReader cellReader = new CellFileReader(cellFile);
|
||||||
if(poisson) {
|
if(exponential){
|
||||||
stdDev = Math.sqrt(cellReader.getCellCount()); //gaussian with square root of elements approximates poisson
|
Plate samplePlate = new Plate(numWells, dropOutRate, concentrations);
|
||||||
|
samplePlate.fillWellsExponential(cellReader.getFilename(), cellReader.getCells(), lambda);
|
||||||
|
PlateFileWriter writer = new PlateFileWriter(filename, samplePlate);
|
||||||
|
writer.writePlateFile();
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
if (poisson) {
|
||||||
|
stdDev = Math.sqrt(cellReader.getCellCount()); //gaussian with square root of elements approximates poisson
|
||||||
|
}
|
||||||
|
Plate samplePlate = new Plate(numWells, dropOutRate, concentrations);
|
||||||
|
samplePlate.fillWells(cellReader.getFilename(), cellReader.getCells(), stdDev);
|
||||||
|
assert filename != null;
|
||||||
|
PlateFileWriter writer = new PlateFileWriter(filename, samplePlate);
|
||||||
|
System.out.println("Writing Sample Plate to file");
|
||||||
|
writer.writePlateFile();
|
||||||
|
System.out.println("Sample Plate written to file: " + filename);
|
||||||
|
System.gc();
|
||||||
}
|
}
|
||||||
Plate samplePlate = new Plate(numWells, dropOutRate, concentrations, stdDev);
|
|
||||||
samplePlate.fillWells(cellReader.getCells());
|
|
||||||
PlateFileWriter writer = new PlateFileWriter(filename, samplePlate);
|
|
||||||
writer.writePlateFile();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void matchCells() {
|
//Output serialized binary of GraphAndMapData object
|
||||||
|
private static void makeCDR3Graph() {
|
||||||
String filename = null;
|
String filename = null;
|
||||||
String cellFile = null;
|
String cellFile = null;
|
||||||
String plateFile = null;
|
String plateFile = null;
|
||||||
|
|
||||||
|
try {
|
||||||
|
String str = "\nGenerating bipartite weighted graph encoding occupancy overlap data ";
|
||||||
|
str = str.concat("\nrequires a cell sample file and a sample plate file.");
|
||||||
|
System.out.println(str);
|
||||||
|
System.out.print("\nPlease enter name of an existing cell sample file: ");
|
||||||
|
cellFile = sc.next();
|
||||||
|
System.out.print("\nPlease enter name of an existing sample plate file: ");
|
||||||
|
plateFile = sc.next();
|
||||||
|
System.out.println("\nThe graph and occupancy data will be written to a serialized binary file.");
|
||||||
|
System.out.print("Please enter a name for the output file: ");
|
||||||
|
filename = sc.next();
|
||||||
|
} catch (InputMismatchException ex) {
|
||||||
|
System.out.println(ex);
|
||||||
|
sc.next();
|
||||||
|
}
|
||||||
|
System.out.println("Reading Cell Sample file: " + cellFile);
|
||||||
|
assert cellFile != null;
|
||||||
|
CellFileReader cellReader = new CellFileReader(cellFile);
|
||||||
|
System.out.println("Reading Sample Plate file: " + plateFile);
|
||||||
|
assert plateFile != null;
|
||||||
|
PlateFileReader plateReader = new PlateFileReader(plateFile);
|
||||||
|
Plate plate = new Plate(plateReader.getFilename(), plateReader.getWells());
|
||||||
|
if (cellReader.getCells().size() == 0){
|
||||||
|
System.out.println("No cell sample found.");
|
||||||
|
System.out.println("Returning to main menu.");
|
||||||
|
}
|
||||||
|
else if(plate.getWells().size() == 0 || plate.getConcentrations().length == 0){
|
||||||
|
System.out.println("No sample plate found.");
|
||||||
|
System.out.println("Returning to main menu.");
|
||||||
|
}
|
||||||
|
else{
|
||||||
|
List<Integer[]> cells = cellReader.getCells();
|
||||||
|
GraphWithMapData data = Simulator.makeGraph(cells, plate, true);
|
||||||
|
assert filename != null;
|
||||||
|
GraphDataObjectWriter dataWriter = new GraphDataObjectWriter(filename, data);
|
||||||
|
System.out.println("Writing graph and occupancy data to file. This may take some time.");
|
||||||
|
System.out.println("File I/O time is not included in results.");
|
||||||
|
dataWriter.writeDataToFile();
|
||||||
|
System.out.println("Graph and Data file written to: " + filename);
|
||||||
|
System.gc();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//Simulate matching and output CSV file of results
|
||||||
|
private static void matchCDR3s() throws IOException {
|
||||||
|
String filename = null;
|
||||||
|
String dataFilename = null;
|
||||||
Integer lowThreshold = 0;
|
Integer lowThreshold = 0;
|
||||||
Integer highThreshold = Integer.MAX_VALUE;
|
Integer highThreshold = Integer.MAX_VALUE;
|
||||||
|
Integer maxOccupancyDiff = Integer.MAX_VALUE;
|
||||||
|
Integer minOverlapPercent = 0;
|
||||||
try {
|
try {
|
||||||
System.out.println("\nSimulated experiment requires a cell sample file and a sample plate file.");
|
System.out.println("\nBiGpairSEQ simulation requires an occupancy data and overlap graph file");
|
||||||
System.out.print("Please enter name of an existing cell sample file: ");
|
System.out.println("Please enter name of an existing graph and occupancy data file: ");
|
||||||
cellFile = sc.next();
|
dataFilename = sc.next();
|
||||||
System.out.print("Please enter name of an existing sample plate file: ");
|
|
||||||
plateFile = sc.next();
|
|
||||||
System.out.println("The matching results will be written to a file.");
|
System.out.println("The matching results will be written to a file.");
|
||||||
System.out.print("Please enter a name for the output file: ");
|
System.out.print("Please enter a name for the output file: ");
|
||||||
filename = sc.next();
|
filename = sc.next();
|
||||||
System.out.println("What is the minimum number of alpha/beta overlap wells to attempt matching?");
|
System.out.println("\nWhat is the minimum number of CDR3 alpha/beta overlap wells to attempt matching?");
|
||||||
lowThreshold = sc.nextInt();
|
lowThreshold = sc.nextInt();
|
||||||
if(lowThreshold < 1){
|
if(lowThreshold < 1){
|
||||||
throw new InputMismatchException("Minimum value for low threshold is 1");
|
throw new InputMismatchException("Minimum value for low threshold set to 1");
|
||||||
}
|
}
|
||||||
System.out.println("What is the maximum number of alpha/beta overlap wells to attempt matching?");
|
System.out.println("\nWhat is the maximum number of CDR3 alpha/beta overlap wells to attempt matching?");
|
||||||
highThreshold = sc.nextInt();
|
highThreshold = sc.nextInt();
|
||||||
|
System.out.println("\nWhat is the maximum difference in alpha/beta occupancy to attempt matching?");
|
||||||
|
maxOccupancyDiff = sc.nextInt();
|
||||||
|
System.out.println("\nWell overlap percentage = pair overlap / sequence occupancy");
|
||||||
|
System.out.println("What is the minimum well overlap percentage to attempt matching? (0 to 100)");
|
||||||
|
minOverlapPercent = sc.nextInt();
|
||||||
|
if (minOverlapPercent < 0 || minOverlapPercent > 100) {
|
||||||
|
throw new InputMismatchException("Value outside range. Minimum percent set to 0");
|
||||||
|
}
|
||||||
} catch (InputMismatchException ex) {
|
} catch (InputMismatchException ex) {
|
||||||
System.out.println(ex);
|
System.out.println(ex);
|
||||||
sc.next();
|
sc.next();
|
||||||
}
|
}
|
||||||
CellFileReader cellReader = new CellFileReader(cellFile);
|
//read object data from file
|
||||||
PlateFileReader plateReader = new PlateFileReader(plateFile);
|
System.out.println("Reading graph data from file. This may take some time");
|
||||||
Plate plate = new Plate(plateReader.getWells());
|
System.out.println("File I/O time is not included in results");
|
||||||
if (cellReader.getCells().size() == 0){
|
assert dataFilename != null;
|
||||||
System.out.println("No cell sample found.");
|
GraphDataObjectReader dataReader = new GraphDataObjectReader(dataFilename);
|
||||||
System.out.println("Returning to main menu.");
|
GraphWithMapData data = dataReader.getData();
|
||||||
}
|
//set source file name
|
||||||
else if(plate.getWells().size() == 0){
|
data.setSourceFilename(dataFilename);
|
||||||
System.out.println("No sample plate found.");
|
//simulate matching
|
||||||
System.out.println("Returning to main menu.");
|
MatchingResult results = Simulator.matchCDR3s(data, dataFilename, lowThreshold, highThreshold, maxOccupancyDiff,
|
||||||
|
minOverlapPercent, true);
|
||||||
}
|
//write results to file
|
||||||
else{
|
assert filename != null;
|
||||||
if(highThreshold >= plate.getSize()){
|
MatchingFileWriter writer = new MatchingFileWriter(filename, results);
|
||||||
highThreshold = plate.getSize() - 1;
|
System.out.println("Writing results to file");
|
||||||
}
|
writer.writeResultsToFile();
|
||||||
List<Integer[]> cells = cellReader.getCells();
|
System.out.println("Results written to file: " + filename);
|
||||||
MatchingResult results = Simulator.matchCDR3s(cells, plate, lowThreshold, highThreshold);
|
System.gc();
|
||||||
//result writer
|
|
||||||
MatchingFileWriter writer = new MatchingFileWriter(filename, results.getComments(),
|
|
||||||
results.getHeaders(), results.getAllResults());
|
|
||||||
writer.writeResultsToFile();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public static void matchCellsExpanded(){
|
///////
|
||||||
/*
|
//Rewrite this to fit new matchCDR3 method with file I/O
|
||||||
The idea here is that we'll get the CDR3 alpha/beta matches first. Then we'll try to match CDR3s to CDR1s by
|
///////
|
||||||
looking at the top two matches for each CDR3. If CDR3s in the same cell simply swap CDR1s, we assume a correct
|
// public static void matchCellsCDR1(){
|
||||||
match
|
// /*
|
||||||
*/
|
// The idea here is that we'll get the CDR3 alpha/beta matches first. Then we'll try to match CDR3s to CDR1s by
|
||||||
String filename = null;
|
// looking at the top two matches for each CDR3. If CDR3s in the same cell simply swap CDR1s, we assume a correct
|
||||||
String cellFile = null;
|
// match
|
||||||
String plateFile = null;
|
// */
|
||||||
Integer lowThresholdCDR3 = 0;
|
// String filename = null;
|
||||||
Integer highThresholdCDR3 = Integer.MAX_VALUE;
|
// String preliminaryResultsFilename = null;
|
||||||
Integer lowThresholdCDR1 = 0;
|
// String cellFile = null;
|
||||||
Integer highThresholdCDR1 = Integer.MAX_VALUE;
|
// String plateFile = null;
|
||||||
try {
|
// Integer lowThresholdCDR3 = 0;
|
||||||
System.out.println("\nSimulated experiment requires a cell sample file and a sample plate file.");
|
// Integer highThresholdCDR3 = Integer.MAX_VALUE;
|
||||||
System.out.print("Please enter name of an existing cell sample file: ");
|
// Integer maxOccupancyDiffCDR3 = 96; //no filtering if max difference is all wells by default
|
||||||
cellFile = sc.next();
|
// Integer minOverlapPercentCDR3 = 0; //no filtering if min percentage is zero by default
|
||||||
System.out.print("Please enter name of an existing sample plate file: ");
|
// Integer lowThresholdCDR1 = 0;
|
||||||
plateFile = sc.next();
|
// Integer highThresholdCDR1 = Integer.MAX_VALUE;
|
||||||
System.out.println("The matching results will be written to a file.");
|
// boolean outputCDR3Matches = false;
|
||||||
System.out.print("Please enter a name for the output file: ");
|
// try {
|
||||||
filename = sc.next();
|
// System.out.println("\nSimulated experiment requires a cell sample file and a sample plate file.");
|
||||||
System.out.println("What is the minimum number of CDR3 alpha/beta overlap wells to attempt matching?");
|
// System.out.print("Please enter name of an existing cell sample file: ");
|
||||||
lowThresholdCDR3 = sc.nextInt();
|
// cellFile = sc.next();
|
||||||
if(lowThresholdCDR3 < 1){
|
// System.out.print("Please enter name of an existing sample plate file: ");
|
||||||
throw new InputMismatchException("Minimum value for low threshold is 1");
|
// plateFile = sc.next();
|
||||||
}
|
// System.out.println("The matching results will be written to a file.");
|
||||||
System.out.println("What is the maximum number of CDR3 alpha/beta overlap wells to attempt matching?");
|
// System.out.print("Please enter a name for the output file: ");
|
||||||
highThresholdCDR3 = sc.nextInt();
|
// filename = sc.next();
|
||||||
System.out.println("What is the minimum number of CDR3/CDR1 overlap wells to attempt matching?");
|
// System.out.println("What is the minimum number of CDR3 alpha/beta overlap wells to attempt matching?");
|
||||||
lowThresholdCDR1 = sc.nextInt();
|
// lowThresholdCDR3 = sc.nextInt();
|
||||||
if(lowThresholdCDR1 < 1){
|
// if(lowThresholdCDR3 < 1){
|
||||||
throw new InputMismatchException("Minimum value for low threshold is 1");
|
// throw new InputMismatchException("Minimum value for low threshold is 1");
|
||||||
}
|
// }
|
||||||
System.out.println("What is the maximum number of CDR3/CDR1 overlap wells to attempt matching?");
|
// System.out.println("What is the maximum number of CDR3 alpha/beta overlap wells to attempt matching?");
|
||||||
highThresholdCDR1 = sc.nextInt();
|
// highThresholdCDR3 = sc.nextInt();
|
||||||
} catch (InputMismatchException ex) {
|
// System.out.println("What is the maximum difference in CDR3 alpha/beta occupancy to attempt matching?");
|
||||||
System.out.println(ex);
|
// maxOccupancyDiffCDR3 = sc.nextInt();
|
||||||
sc.next();
|
// System.out.println("What is the minimum CDR3 overlap percentage to attempt matching? (0 - 100)");
|
||||||
}
|
// minOverlapPercentCDR3 = sc.nextInt();
|
||||||
CellFileReader cellReader = new CellFileReader(cellFile);
|
// if (minOverlapPercentCDR3 < 0 || minOverlapPercentCDR3 > 100) {
|
||||||
PlateFileReader plateReader = new PlateFileReader(plateFile);
|
// throw new InputMismatchException("Value outside range. Minimum percent set to 0");
|
||||||
Plate plate = new Plate(plateReader.getWells());
|
// }
|
||||||
if (cellReader.getCells().size() == 0){
|
// System.out.println("What is the minimum number of CDR3/CDR1 overlap wells to attempt matching?");
|
||||||
System.out.println("No cell sample found.");
|
// lowThresholdCDR1 = sc.nextInt();
|
||||||
System.out.println("Returning to main menu.");
|
// if(lowThresholdCDR1 < 1){
|
||||||
}
|
// throw new InputMismatchException("Minimum value for low threshold is 1");
|
||||||
else if(plate.getWells().size() == 0){
|
// }
|
||||||
System.out.println("No sample plate found.");
|
// System.out.println("What is the maximum number of CDR3/CDR1 overlap wells to attempt matching?");
|
||||||
System.out.println("Returning to main menu.");
|
// highThresholdCDR1 = sc.nextInt();
|
||||||
|
// System.out.println("Matching CDR3s to CDR1s requires first matching CDR3 alpha/betas.");
|
||||||
}
|
// System.out.println("Output a file for CDR3 alpha/beta match results as well?");
|
||||||
else{
|
// System.out.print("Please enter y/n: ");
|
||||||
if(highThresholdCDR3 >= plate.getSize()){
|
// String ans = sc.next();
|
||||||
highThresholdCDR3 = plate.getSize() - 1;
|
// Pattern pattern = Pattern.compile("(?:yes|y)", Pattern.CASE_INSENSITIVE);
|
||||||
}
|
// Matcher matcher = pattern.matcher(ans);
|
||||||
if(highThresholdCDR1 >= plate.getSize()){
|
// if(matcher.matches()){
|
||||||
highThresholdCDR1 = plate.getSize() - 1;
|
// outputCDR3Matches = true;
|
||||||
}
|
// System.out.println("Please enter filename for CDR3 alpha/beta match results");
|
||||||
List<Integer[]> cells = cellReader.getCells();
|
// preliminaryResultsFilename = sc.next();
|
||||||
MatchingResult preliminaryResults = Simulator.matchCDR3s(cells, plate, lowThresholdCDR3, highThresholdCDR3);
|
// System.out.println("CDR3 alpha/beta matches will be output to file");
|
||||||
MatchingResult[] results = Simulator.matchCDR1s(cells, plate, lowThresholdCDR1,
|
// }
|
||||||
highThresholdCDR1, preliminaryResults);
|
// else{
|
||||||
|
// System.out.println("CDR3 alpha/beta matches will not be output to file");
|
||||||
//result writer
|
// }
|
||||||
MatchingFileWriter writer = new MatchingFileWriter(filename + "First", results[0].getComments(),
|
// } catch (InputMismatchException ex) {
|
||||||
results[0].getHeaders(), results[0].getAllResults());
|
// System.out.println(ex);
|
||||||
writer.writeResultsToFile();
|
// sc.next();
|
||||||
writer = new MatchingFileWriter(filename + "Dual", results[1].getComments(),
|
// }
|
||||||
results[1].getHeaders(), results[1].getAllResults());
|
// CellFileReader cellReader = new CellFileReader(cellFile);
|
||||||
writer.writeResultsToFile();
|
// PlateFileReader plateReader = new PlateFileReader(plateFile);
|
||||||
}
|
// Plate plate = new Plate(plateReader.getFilename(), plateReader.getWells());
|
||||||
}
|
// if (cellReader.getCells().size() == 0){
|
||||||
|
// System.out.println("No cell sample found.");
|
||||||
|
// System.out.println("Returning to main menu.");
|
||||||
|
// }
|
||||||
|
// else if(plate.getWells().size() == 0){
|
||||||
|
// System.out.println("No sample plate found.");
|
||||||
|
// System.out.println("Returning to main menu.");
|
||||||
|
//
|
||||||
|
// }
|
||||||
|
// else{
|
||||||
|
// if(highThresholdCDR3 >= plate.getSize()){
|
||||||
|
// highThresholdCDR3 = plate.getSize() - 1;
|
||||||
|
// }
|
||||||
|
// if(highThresholdCDR1 >= plate.getSize()){
|
||||||
|
// highThresholdCDR1 = plate.getSize() - 1;
|
||||||
|
// }
|
||||||
|
// List<Integer[]> cells = cellReader.getCells();
|
||||||
|
// MatchingResult preliminaryResults = Simulator.matchCDR3s(cells, plate, lowThresholdCDR3, highThresholdCDR3,
|
||||||
|
// maxOccupancyDiffCDR3, minOverlapPercentCDR3, true);
|
||||||
|
// MatchingResult[] results = Simulator.matchCDR1s(cells, plate, lowThresholdCDR1,
|
||||||
|
// highThresholdCDR1, preliminaryResults);
|
||||||
|
// MatchingFileWriter writer = new MatchingFileWriter(filename + "_FirstPass", results[0]);
|
||||||
|
// writer.writeResultsToFile();
|
||||||
|
// writer = new MatchingFileWriter(filename + "_SecondPass", results[1]);
|
||||||
|
// writer.writeResultsToFile();
|
||||||
|
// if(outputCDR3Matches){
|
||||||
|
// writer = new MatchingFileWriter(preliminaryResultsFilename, preliminaryResults);
|
||||||
|
// writer.writeResultsToFile();
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
|
||||||
private static void acknowledge(){
|
private static void acknowledge(){
|
||||||
System.out.println("Simulation based on:");
|
System.out.println("This program simulates BiGpairSEQ, a graph theory based adaptation");
|
||||||
|
System.out.println("of the pairSEQ algorithm for pairing T cell receptor sequences.");
|
||||||
|
System.out.println();
|
||||||
|
System.out.println("For full documentation, view readme.md file distributed with this code");
|
||||||
|
System.out.println("or visit https://gitea.ejsf.synology.me/efischer/BiGpairSEQ.");
|
||||||
|
System.out.println();
|
||||||
|
System.out.println("pairSEQ citation:");
|
||||||
System.out.println("Howie, B., Sherwood, A. M., et. al.");
|
System.out.println("Howie, B., Sherwood, A. M., et. al.");
|
||||||
System.out.println("High-throughput pairing of T cell receptor alpha and beta sequences.");
|
System.out.println("High-throughput pairing of T cell receptor alpha and beta sequences.");
|
||||||
System.out.println("Sci. Transl. Med. 7, 301ra131 (2015)");
|
System.out.println("Sci. Transl. Med. 7, 301ra131 (2015)");
|
||||||
System.out.println("");
|
System.out.println();
|
||||||
System.out.println("Simulation by Eugene Fischer, 2021");
|
System.out.println("BiGpairSEQ_Sim by Eugene Fischer, 2021-2022");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
17
src/main/java/Vertex.java
Normal file
17
src/main/java/Vertex.java
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
public class Vertex {
|
||||||
|
private final Integer peptide;
|
||||||
|
private final Integer occupancy;
|
||||||
|
|
||||||
|
public Vertex(Integer peptide, Integer occupancy) {
|
||||||
|
this.peptide = peptide;
|
||||||
|
this.occupancy = occupancy;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Integer getPeptide() {
|
||||||
|
return peptide;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Integer getOccupancy() {
|
||||||
|
return occupancy;
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user