Compare commits
95 Commits
v1.0
...
96ba57d653
| Author | SHA1 | Date | |
|---|---|---|---|
| 96ba57d653 | |||
| b602fb02f1 | |||
| 325e1ebe2b | |||
| df047267ee | |||
| 03e8d31210 | |||
| 582dc3ef40 | |||
| 4c872ed48e | |||
| 3fc39302c7 | |||
| 578bdc0fbf | |||
| 8275cf7740 | |||
| 64209691f0 | |||
| 1886800873 | |||
| bedf0894bc | |||
| 2ac3451842 | |||
| 67ec3f3764 | |||
| b5a8b7e2d5 | |||
| 9fb3095f0f | |||
| 25acf920c2 | |||
| f301327693 | |||
| e04d2d6777 | |||
| 3e41afaa64 | |||
| bc5d67680d | |||
| f2347e8fc2 | |||
| c8364d8a6e | |||
| 6f5afbc6ec | |||
| fb4d22e7a4 | |||
| e10350c214 | |||
| b1155f8100 | |||
| 12b003a69f | |||
| 32c5bcaaff | |||
| 2485ac4cf6 | |||
| 05556bce0c | |||
| a822f69ea4 | |||
| 3d1f8668ee | |||
| 40c743308b | |||
| 5246cc4a0c | |||
| a5f7c0641d | |||
| 8ebfc1469f | |||
| b53f5f1cc0 | |||
| 974d2d650c | |||
| 6b5837e6ce | |||
| b4cc240048 | |||
| ff72c9b359 | |||
| 88eb8aca50 | |||
| 98bf452891 | |||
| c2db4f87c1 | |||
| 8935407ade | |||
| 9fcc20343d | |||
| e4d094d796 | |||
| f385ebc31f | |||
| 8745550e11 | |||
| 41805135b3 | |||
| 373a5e02f9 | |||
| 7f18311054 | |||
| bcb816c3e6 | |||
| dad0fd35fd | |||
| 35d580cfcf | |||
| ab8d98ed81 | |||
| 3d9890e16a | |||
| dd64ac2731 | |||
| a5238624f1 | |||
| d8ba42b801 | |||
| 8edd89d784 | |||
| 2829b88689 | |||
| 108b0ec13f | |||
| a8b58d3f79 | |||
| bf64d57731 | |||
| c068c3db3c | |||
| 4bcda9b66c | |||
| 17ae763c6c | |||
| decdb147a9 | |||
| 74ffbfd8ac | |||
| 08699ce8ce | |||
| 69b0cc535c | |||
| e58f7b0a55 | |||
| dd2164c250 | |||
| 7323093bdc | |||
| f904cf6672 | |||
| 3ccee9891b | |||
| 40c2be1cfb | |||
| 4b597c4e5e | |||
| b2398531a3 | |||
| 8e9a250890 | |||
| e2a996c997 | |||
| a5db89cb0b | |||
| 1630f9ccba | |||
| d785aa0da2 | |||
| a7afeb6119 | |||
| f8167b0774 | |||
| 68ee9e4bb6 | |||
| fd2ec76b71 | |||
| 875f457a2d | |||
| 906c06062f | |||
| 90ae2ff474 | |||
| 7d983076f3 |
206
readme.md
206
readme.md
@@ -12,7 +12,7 @@ Unlike pairSEQ, which calculates p-values for every TCR alpha/beta overlap and c
|
||||
against a null distribution, BiGpairSEQ does not do any statistical calculations
|
||||
directly.
|
||||
|
||||
BiGpairSEQ creates a [simple bipartite weighted graph](https://en.wikipedia.org/wiki/Bipartite_graph) representing the sample plate.
|
||||
BiGpairSEQ creates a [weighted bipartite graph](https://en.wikipedia.org/wiki/Bipartite_graph) representing the sample plate.
|
||||
The distinct TCRA and TCRB sequences form the two sets of vertices. Every TCRA/TCRB pair that share a well
|
||||
are connected by an edge, with the edge weight set to the number of wells in which both sequences appear.
|
||||
(Sequences present in *all* wells are filtered out prior to creating the graph, as there is no signal in their occupancy pattern.)
|
||||
@@ -20,8 +20,8 @@ The problem of pairing TCRA/TCRB sequences thus reduces to the "assignment probl
|
||||
matching on a bipartite graph--the subset of vertex-disjoint edges whose weights sum to the maximum possible value.
|
||||
|
||||
This is a well-studied combinatorial optimization problem, with many known solutions.
|
||||
The most efficient algorithm known to the author for maximum weight matching of a bipartite graph with strictly integral weights
|
||||
is from Duan and Su (2012). For a graph with m edges, n vertices per side, and maximum integer edge weight N,
|
||||
The most efficient algorithm known to the author for maximum weight matching of a bipartite graph with strictly integral
|
||||
weights is from Duan and Su (2012). For a graph with m edges, n vertices per side, and maximum integer edge weight N,
|
||||
their algorithm runs in **O(m sqrt(n) log(N))** time. As the graph representation of a pairSEQ experiment is
|
||||
bipartite with integer weights, this algorithm is ideal for BiGpairSEQ.
|
||||
|
||||
@@ -29,15 +29,13 @@ Unfortunately, it's a fairly new algorithm, and not yet implemented by the graph
|
||||
So this program instead uses the Fibonacci heap-based algorithm of Fredman and Tarjan (1987), which has a worst-case
|
||||
runtime of **O(n (n log(n) + m))**. The algorithm is implemented as described in Melhorn and Näher (1999).
|
||||
|
||||
The current version of the program uses a pairing heap instead of a Fibonacci heap for its priority queue,
|
||||
which has lower theoretical efficiency but also lower complexity overhead, and is often equivalently performant
|
||||
in practice.
|
||||
|
||||
## USAGE
|
||||
|
||||
### RUNNING THE PROGRAM
|
||||
|
||||
BiGpairSEQ_Sim is an executable .jar file. Requires Java 11 or higher. [OpenJDK 17](https://jdk.java.net/17/)
|
||||
[Download the current version of BiGpairSEQ_Sim.](https://gitea.ejsf.synology.me/efischer/BiGpairSEQ/releases)
|
||||
|
||||
BiGpairSEQ_Sim is an executable .jar file. Requires Java 14 or higher. [OpenJDK 17](https://jdk.java.net/17/)
|
||||
recommended.
|
||||
|
||||
Run with the command:
|
||||
@@ -45,39 +43,75 @@ Run with the command:
|
||||
`java -jar BiGpairSEQ_Sim.jar`
|
||||
|
||||
Processing sample plates with tens of thousands of sequences may require large amounts
|
||||
of RAM. It is often desirable to increase the JVM maximum heap allocation with the -Xmx flag.
|
||||
of RAM. It is often desirable to increase the JVM maximum heap allocation with the `-Xmx` flag.
|
||||
For example, to run the program with 32 gigabytes of memory, use the command:
|
||||
|
||||
`java -Xmx32G -jar BiGpairSEQ_Sim.jar`
|
||||
|
||||
Once running, BiGpairSEQ_Sim has an interactive, menu-driven CLI for generating files and simulating TCR pairing. The
|
||||
main menu looks like this:
|
||||
There are a number of command line options, to allow the program to be used in shell scripts. For a full list,
|
||||
use the `-help` flag:
|
||||
|
||||
`java -jar BiGpairSEQ_Sim.jar -help`
|
||||
|
||||
If no command line arguments are given, BiGpairSEQ_Sim will launch with an interactive, menu-driven CLI for
|
||||
generating files and simulating TCR pairing. The main menu looks like this:
|
||||
|
||||
```
|
||||
--------BiGPairSEQ SIMULATOR--------
|
||||
ALPHA/BETA T CELL RECEPTOR MATCHING
|
||||
USING WEIGHTED BIPARTITE GRAPHS
|
||||
USING WEIGHTED BIPARTITE GRAPHS
|
||||
------------------------------------
|
||||
Please select an option:
|
||||
1) Generate a population of distinct cells
|
||||
2) Generate a sample plate of T cells
|
||||
3) Generate CDR3 alpha/beta occupancy data and overlap graph
|
||||
4) Simulate bipartite graph CDR3 alpha/beta matching (BiGpairSEQ)
|
||||
8) Options
|
||||
9) About/Acknowledgments
|
||||
0) Exit
|
||||
```
|
||||
|
||||
### OUTPUT
|
||||
By default, the Options menu looks like this:
|
||||
```
|
||||
--------------OPTIONS---------------
|
||||
1) Turn on cell sample file caching
|
||||
2) Turn on plate file caching
|
||||
3) Turn on graph/data file caching
|
||||
4) Turn off serialized binary graph output
|
||||
5) Turn on GraphML graph output
|
||||
6) Maximum weight matching algorithm options
|
||||
0) Return to main menu
|
||||
```
|
||||
|
||||
|
||||
### INPUT/OUTPUT
|
||||
|
||||
To run the simulation, the program reads and writes 4 kinds of files:
|
||||
* Cell Sample files in CSV format
|
||||
* Sample Plate files in CSV format
|
||||
* Graph and Data files in binary object serialization format
|
||||
* Graph/Data files in binary object serialization format
|
||||
* Matching Results files in CSV format
|
||||
|
||||
When entering filenames, it is not necessary to include the file extension (.csv or .ser). When reading or
|
||||
writing files, the program will automatically add the correct extension to any filename without one.
|
||||
These files are often generated in sequence. When entering filenames, it is not necessary to include the file extension
|
||||
(.csv or .ser). When reading or writing files, the program will automatically add the correct extension to any filename
|
||||
without one.
|
||||
|
||||
To save file I/O time, the most recent instance of each of these four
|
||||
files either generated or read from disk can be cached in program memory. When caching is active, subsequent uses of the
|
||||
same data file won't need to be read in again until another file of that type is used or generated,
|
||||
or caching is turned off for that file type. The program checks whether it needs to update its cached data by comparing
|
||||
filenames as entered by the user. On encountering a new filename, the program flushes its cache and reads in the new file.
|
||||
|
||||
(Note that cached Graph/Data files must be transformed back into their original state after a matching experiment, which
|
||||
may take some time. Whether file I/O or graph transformation takes longer for graph/data files is likely to be
|
||||
device-specific.)
|
||||
|
||||
The program's caching behavior can be controlled in the Options menu. By default, all caching is OFF.
|
||||
|
||||
The program can optionally output Graph/Data files in GraphML format (.graphml) for data portability. This can be
|
||||
turned on in the Options menu. By default, GraphML output is OFF.
|
||||
|
||||
---
|
||||
#### Cell Sample Files
|
||||
Cell Sample files consist of any number of distinct "T cells." Every cell contains
|
||||
four sequences: Alpha CDR3, Beta CDR3, Alpha CDR1, Beta CDR1. The sequences are represented by
|
||||
@@ -95,7 +129,6 @@ Comments are preceded by `#`
|
||||
|
||||
Structure:
|
||||
|
||||
---
|
||||
# Sample contains 1 unique CDR1 for every 4 unique CDR3s.
|
||||
| Alpha CDR3 | Beta CDR3 | Alpha CDR1 | Beta CDR1 |
|
||||
|---|---|---|---|
|
||||
@@ -119,15 +152,18 @@ Options when making a Sample Plate file:
|
||||
* Standard deviation size
|
||||
* Exponential
|
||||
* Lambda value
|
||||
* (Based on the slope of the graph in Figure 4C of the pairSEQ paper, the distribution of the original experiment was exponential with a lambda of approximately 0.6. (Howie, et al. 2015))
|
||||
* *(Based on the slope of the graph in Figure 4C of the pairSEQ paper, the distribution of the original experiment was approximately exponential with a lambda ~0.6. (Howie, et al. 2015))*
|
||||
* Total number of wells on the plate
|
||||
* Number of sections on plate
|
||||
* Number of T cells per well
|
||||
* per section, if more than one section
|
||||
* Well populations random or fixed
|
||||
* If random, minimum and maximum population sizes
|
||||
* If fixed
|
||||
* Number of sections on plate
|
||||
* Number of T cells per well
|
||||
* per section, if more than one section
|
||||
* Dropout rate
|
||||
|
||||
Files are in CSV format. There are no header labels. Every row represents a well.
|
||||
Every column represents an individual cell, containing four sequences, depicted as an array string:
|
||||
Every value represents an individual cell, containing four sequences, depicted as an array string:
|
||||
`[CDR3A, CDR3B, CDR1A, CDR1B]`. So a representative cell might look like this:
|
||||
|
||||
`[525902, 791533, -1, 866282]`
|
||||
@@ -137,7 +173,6 @@ Dropout sequences are replaced with the value `-1`. Comments are preceded by `#`
|
||||
|
||||
Structure:
|
||||
|
||||
---
|
||||
```
|
||||
# Cell source file name:
|
||||
# Each row represents one well on the plate
|
||||
@@ -153,25 +188,37 @@ Structure:
|
||||
|
||||
---
|
||||
|
||||
#### Graph and Data Files
|
||||
Graph and Data files are serialized binaries of a Java object containing the weigthed bipartite graph representation of a
|
||||
#### Graph/Data Files
|
||||
Graph/Data files are serialized binaries of a Java object containing the weigthed bipartite graph representation of a
|
||||
Sample Plate, along with the necessary metadata for matching and results output. Making them requires a Cell Sample file
|
||||
(to construct a list of correct sequence pairs for checking the accuracy of BiGpairSEQ simulations) and a
|
||||
Sample Plate file (to construct the associated occupancy graph). These files can be several gigabytes in size.
|
||||
Writing them to a file lets us generate a graph and its metadata once, then use it for multiple different BiGpairSEQ simulations.
|
||||
Sample Plate file (to construct the associated occupancy graph).
|
||||
|
||||
Options for creating a Graph and Data file:
|
||||
These files can be several gigabytes in size. Writing them to a file lets us generate a graph and its metadata once,
|
||||
then use it for multiple different BiGpairSEQ simulations.
|
||||
|
||||
Options for creating a Graph/Data file:
|
||||
* The Cell Sample file to use
|
||||
* The Sample Plate file to use. (This must have been generated from the selected Cell Sample file.)
|
||||
|
||||
These files do not have a human-readable structure, and are not portable to other programs. (Export of graphs in a
|
||||
portable data format may be implemented in the future. The tricky part is encoding the necessary metadata.)
|
||||
These files do not have a human-readable structure, and are not portable to other programs.
|
||||
|
||||
*Optional GraphML output*
|
||||
|
||||
For portability of graph data to other software, turn on [GraphML](http://graphml.graphdrawing.org/index.html) output
|
||||
in the Options menu in interactive mode, or use the `-graphml`command line argument. This will produce a .graphml file
|
||||
for the weighted graph, with vertex attributes for sequence, type, and occupancy data. This graph contains all the data
|
||||
necessary for the BiGpairSEQ matching algorithm. It does not include the data to measure pairing accuracy; for that,
|
||||
compare the matching results to the original Cell Sample .csv file.
|
||||
|
||||
---
|
||||
|
||||
#### Matching Results Files
|
||||
Matching results files consist of the results of a BiGpairSEQ matching simulation.
|
||||
Files are in CSV format. Rows are sequence pairings with extra relevant data. Columns are pairing-specific details.
|
||||
Matching results files consist of the results of a BiGpairSEQ matching simulation. Making them requires a serialized
|
||||
binary Graph/Data file (.ser). (Because .graphML files are larger than .ser files, BiGpairSEQ_Sim supports .graphML
|
||||
output only. Graph/data input must use a serialized binary.)
|
||||
|
||||
Matching results files are in CSV format. Rows are sequence pairings with extra relevant data. Columns are pairing-specific details.
|
||||
Metadata about the matching simulation is included as comments. Comments are preceded by `#`.
|
||||
|
||||
Options when running a BiGpairSEQ simulation of CDR3 alpha/beta matching:
|
||||
@@ -186,7 +233,6 @@ Options when running a BiGpairSEQ simulation of CDR3 alpha/beta matching:
|
||||
|
||||
Example output:
|
||||
|
||||
---
|
||||
```
|
||||
# Source Sample Plate file: 4MilCellsPlate.csv
|
||||
# Source Graph and Data file: 4MilCellsPlateGraph.ser
|
||||
@@ -218,45 +264,99 @@ Example output:
|
||||
P-values are calculated *after* BiGpairSEQ matching is completed, for purposes of comparison only,
|
||||
using the (2021 corrected) formula from the original pairSEQ paper. (Howie, et al. 2015)
|
||||
|
||||
### PERFORMANCE
|
||||
Performance details of the example excerpted above:
|
||||
|
||||
## PERFORMANCE
|
||||
|
||||
On a home computer with a Ryzen 5600X CPU, 64GB of 3200MHz DDR4 RAM (half of which was allocated to the Java Virtual Machine), and a PCIe 3.0 SSD, running Linux Mint 20.3 Edge (5.13 kernel),
|
||||
the author ran a BiGpairSEQ simulation of a 96-well sample plate with 30,000 T cells/well comprising ~11,800 alphas and betas,
|
||||
taken from a sample of 4,000,000 distinct cells with an exponential frequency distribution.
|
||||
taken from a sample of 4,000,000 distinct cells with an exponential frequency distribution (lambda 0.6).
|
||||
|
||||
With min/max occupancy threshold of 3 and 94 wells for matching, and no other pre-filtering, BiGpairSEQ identified 5,151
|
||||
correct pairings and 18 incorrect pairings, for an accuracy of 99.652%.
|
||||
|
||||
The simulation time was 14'22". If intermediate results were held in memory, this would be equivalent to the total elapsed time.
|
||||
The total simulation time was 14'22". If intermediate results were held in memory, this would be equivalent to the total elapsed time.
|
||||
|
||||
Since this implementation of BiGpairSEQ writes intermediate results to disk (to improve the efficiency of *repeated* simulations
|
||||
with different filtering options), the actual elapsed time was greater. File I/O time was not measured, but took
|
||||
slightly less time than the simulation itself. Real elapsed time from start to finish was under 30 minutes.
|
||||
|
||||
As mentioned in the theory section, performance could be improved by implementing a more efficient algorithm for finding
|
||||
the maximum weighted matching.
|
||||
|
||||
## BEHAVIOR WITH RANDOMIZED WELL POPULATIONS
|
||||
|
||||
A series of BiGpairSEQ simulations were conducted using a cell sample file of 3.5 million unique T cells. From these cells,
|
||||
10 sample plate files were created. All of these sample plates had 96 wells, used an exponential distribution with a lambda of 0.6, and
|
||||
had a sequence dropout rate of 10%.
|
||||
|
||||
The well populations of the plates were:
|
||||
* One sample plate with 1000 T cells/well
|
||||
* One sample plate with 2000 T cells/well
|
||||
* One sample plate with 3000 T cells/well
|
||||
* One sample plate with 4000 T cells/well
|
||||
* One sample plate with 5000 T cells/well
|
||||
* Five sample plates with each individual well's population randomized, from 1000 to 5000 T cells. (Average population ~3000 T cells/well.)
|
||||
|
||||
All BiGpairSEQ simulations were run with a low overlap threshold of 3 and a high overlap threshold of 94.
|
||||
No optional filters were used, so pairing was attempted for all sequences with overlaps within the threshold values.
|
||||
|
||||
Constant well population plate results:
|
||||
|
||||
| |1000 Cell/Well Plate|2000 Cell/Well Plate|3000 Cell/Well Plate|4000 Cell/Well Plate|5000 Cell/Well Plate
|
||||
|---|---|---|---|---|---|
|
||||
|Total Alphas Found|6407|7330|7936|8278|8553|
|
||||
|Total Betas Found|6405|7333|7968|8269|8582|
|
||||
|Pairing Attempt Rate|0.661|0.653|0.600|0.579|0.559|
|
||||
|Correct Pairing Count|4231|4749|4723|4761|4750|
|
||||
|Incorrect Pairing Count|3|34|40|26|29|
|
||||
|Pairing Error Rate|0.000709|0.00711|0.00840|0.00543|0.00607|
|
||||
|Simulation Time (Seconds)|500|643|700|589|598|
|
||||
|
||||
Randomized well population plate results:
|
||||
|
||||
| |Random Plate 1 | Random Plate 2|Random Plate 3|Random Plate 4|Random Plate 5|Average|
|
||||
|---|---|---|---|---|---|---|
|
||||
Total Alphas Found|7853|7904|7964|7898|7917|7907|
|
||||
Total Betas Found|7851|7891|7920|7910|7894|7893|
|
||||
Pairing Attempt Rate|0.607|0.610|0.601|0.605|0.603|0.605|
|
||||
Correct Pairing Count|4718|4782|4721|4755|4731|4741|
|
||||
Incorrect Pairing Count|51|35|42|27|29|37|
|
||||
Pairing Error Rate|0.0107|0.00727|0.00882|0.00565|0.00609|0.00771|
|
||||
Simulation Time (Seconds)|590|677|730|618|615|646|
|
||||
|
||||
The average results for the randomized plates are closest to the constant plate with 3000 T cells/well.
|
||||
This and several other tests indicate that BiGpairSEQ treats a sample plate with a highly variable number of T cells/well
|
||||
roughly as though it had a constant well population equal to the plate's average well population.
|
||||
|
||||
## TODO
|
||||
|
||||
* ~~Try invoking GC at end of workloads to reduce paging to disk~~ DONE
|
||||
* ~~Hold graph data in memory until another graph is read-in?~~ ABANDONED
|
||||
* *No, this won't work, because BiGpairSEQ simulations alter the underlying graph based on filtering constraints. Changes would cascade with multiple experiments.*
|
||||
* ~~Hold graph data in memory until another graph is read-in? ABANDONED UNABANDONED~~ DONE
|
||||
* ~~*No, this won't work, because BiGpairSEQ simulations alter the underlying graph based on filtering constraints. Changes would cascade with multiple experiments.*~~
|
||||
* Might have figured out a way to do it, by taking edges out and then putting them back into the graph. This may actually be possible.
|
||||
* It is possible, though the modifications to the graph incur their own performance penalties. Need testing to see which option is best. It may be computer-specific.
|
||||
* ~~Test whether pairing heap (currently used) or Fibonacci heap is more efficient for priority queue in current matching algorithm~~ DONE
|
||||
* ~~in theory Fibonacci heap should be more efficient, but complexity overhead may eliminate theoretical advantage~~
|
||||
* ~~Add controllable heap-type parameter?~~
|
||||
* Parameter implemented. Fibonacci heap the current default.
|
||||
* ~~Implement sample plates with random numbers of T cells per well.~~ DONE
|
||||
* Possible BiGpairSEQ advantage over pairSEQ: BiGpairSEQ is resilient to variations in well population sizes on a sample plate; pairSEQ is not.
|
||||
* preliminary data suggests that BiGpairSEQ behaves roughly as though the whole plate had whatever the *average* well concentration is, but that's still speculative.
|
||||
* See if there's a reasonable way to reformat Sample Plate files so that wells are columns instead of rows.
|
||||
* ~~Problem is variable number of cells in a well~~
|
||||
* ~~Apache Commons CSV library writes entries a row at a time~~
|
||||
* _Got this working, but at the cost of a profoundly strange bug in graph occupancy filtering. Have reverted the repo until I can figure out what caused that. Given how easily Thingiverse transposes CSV matrices in R, might not even be worth fixing._
|
||||
* Re-implement command line arguments, to enable scripting and statistical simulation studies
|
||||
* Implement sample plates with random numbers of T cells per well.
|
||||
* Possible BiGpairSEQ advantage over pairSEQ: BiGpairSEQ is resilient to variations in well population sizes on a sample plate; pairSEQ is not.
|
||||
* preliminary data suggests that BiGpairSEQ behaves roughly as though the whole plate had whatever the *average* well concentration is, but that's still speculative.
|
||||
* Enable GraphML output in addition to serialized object binaries, for data portability
|
||||
* Custom vertex type with attribute for sequence occupancy?
|
||||
* _Got this working, but at the cost of a profoundly strange bug in graph occupancy filtering. Have reverted the repo until I can figure out what caused that. Given how easily Thingiverse transposes CSV matrices in R, might not even be worth fixing.
|
||||
* ~~Enable GraphML output in addition to serialized object binaries, for data portability~~ DONE
|
||||
* ~~Custom vertex type with attribute for sequence occupancy?~~ ABANDONED
|
||||
* Advantage: would eliminate the need to use maps to associate vertices with sequences, which would make the code easier to understand.
|
||||
* Have a branch where this is implemented, but there's a bug that broke matching. Don't currently have time to fix.
|
||||
* ~~Re-implement command line arguments, to enable scripting and statistical simulation studies~~ DONE
|
||||
* Re-implement CDR1 matching method
|
||||
* Implement Duan and Su's maximum weight matching algorithm
|
||||
* Add controllable algorithm-type parameter?
|
||||
* Test whether pairing heap (currently used) or Fibonacci heap is more efficient for priority queue in current matching algorithm
|
||||
* in theory Fibonacci heap should be more efficient, but complexity overhead may eliminate theoretical advantage
|
||||
* Add controllable heap-type parameter?
|
||||
|
||||
|
||||
* Add controllable algorithm-type parameter?
|
||||
* This would be fun and valuable, but probably take more time than I have for a hobby project.
|
||||
* Implement Vose's alias method for arbitrary statistical distributions of cells
|
||||
|
||||
|
||||
## CITATIONS
|
||||
* Howie, B., Sherwood, A. M., et al. ["High-throughput pairing of T cell receptor alpha and beta sequences."](https://pubmed.ncbi.nlm.nih.gov/26290413/) Sci. Transl. Med. 7, 301ra131 (2015)
|
||||
@@ -268,7 +368,7 @@ slightly less time than the simulation itself. Real elapsed time from start to f
|
||||
* [JGraphT](https://jgrapht.org) -- Graph theory data structures and algorithms
|
||||
* [JHeaps](https://www.jheaps.org) -- For pairing heap priority queue used in maximum weight matching algorithm
|
||||
* [Apache Commons CSV](https://commons.apache.org/proper/commons-csv/) -- For CSV file output
|
||||
* [Apache Commons CLI](https://commons.apache.org/proper/commons-cli/) -- To enable command line arguments for scripting. (**Awaiting re-implementation**.)
|
||||
* [Apache Commons CLI](https://commons.apache.org/proper/commons-cli/) -- To enable command line arguments for scripting.
|
||||
|
||||
## ACKNOWLEDGEMENTS
|
||||
BiGpairSEQ was conceived in collaboration with Dr. Alice MacQueen, who brought the original
|
||||
|
||||
177
src/main/java/BiGpairSEQ.java
Normal file
177
src/main/java/BiGpairSEQ.java
Normal file
@@ -0,0 +1,177 @@
|
||||
import java.util.Random;
|
||||
|
||||
//main class. For choosing interface type and holding settings
|
||||
public class BiGpairSEQ {
|
||||
|
||||
private static final Random rand = new Random();
|
||||
private static CellSample cellSampleInMemory = null;
|
||||
private static String cellFilename = null;
|
||||
private static Plate plateInMemory = null;
|
||||
private static String plateFilename = null;
|
||||
private static GraphWithMapData graphInMemory = null;
|
||||
private static String graphFilename = null;
|
||||
private static boolean cacheCells = false;
|
||||
private static boolean cachePlate = false;
|
||||
private static boolean cacheGraph = false;
|
||||
private static String priorityQueueHeapType = "FIBONACCI";
|
||||
private static boolean outputBinary = true;
|
||||
private static boolean outputGraphML = false;
|
||||
private static final String version = "version 2.0";
|
||||
|
||||
public static void main(String[] args) {
|
||||
if (args.length == 0) {
|
||||
InteractiveInterface.startInteractive();
|
||||
}
|
||||
else {
|
||||
//This will be uncommented when command line arguments are re-implemented.
|
||||
CommandLineInterface.startCLI(args);
|
||||
//System.out.println("Command line arguments are still being re-implemented.");
|
||||
}
|
||||
}
|
||||
|
||||
public static Random getRand() {
|
||||
return rand;
|
||||
}
|
||||
|
||||
public static CellSample getCellSampleInMemory() {
|
||||
return cellSampleInMemory;
|
||||
}
|
||||
|
||||
public static void setCellSampleInMemory(CellSample cellSample, String filename) {
|
||||
if(cellSampleInMemory != null) {
|
||||
clearCellSampleInMemory();
|
||||
}
|
||||
cellSampleInMemory = cellSample;
|
||||
cellFilename = filename;
|
||||
System.out.println("Cell sample file " + filename + " cached.");
|
||||
}
|
||||
|
||||
public static void clearCellSampleInMemory() {
|
||||
cellSampleInMemory = null;
|
||||
cellFilename = null;
|
||||
System.gc();
|
||||
System.out.println("Cell sample file cache cleared.");
|
||||
|
||||
}
|
||||
|
||||
public static String getCellFilename() {
|
||||
return cellFilename;
|
||||
}
|
||||
|
||||
public static Plate getPlateInMemory() {
|
||||
return plateInMemory;
|
||||
}
|
||||
|
||||
public static void setPlateInMemory(Plate plate, String filename) {
|
||||
if(plateInMemory != null) {
|
||||
clearPlateInMemory();
|
||||
}
|
||||
plateInMemory = plate;
|
||||
plateFilename = filename;
|
||||
System.out.println("Sample plate file " + filename + " cached.");
|
||||
}
|
||||
|
||||
public static void clearPlateInMemory() {
|
||||
plateInMemory = null;
|
||||
plateFilename = null;
|
||||
System.gc();
|
||||
System.out.println("Sample plate file cache cleared.");
|
||||
|
||||
}
|
||||
|
||||
public static String getPlateFilename() {
|
||||
return plateFilename;
|
||||
}
|
||||
|
||||
|
||||
public static GraphWithMapData getGraphInMemory() {return graphInMemory;
|
||||
}
|
||||
|
||||
public static void setGraphInMemory(GraphWithMapData g, String filename) {
|
||||
if (graphInMemory != null) {
|
||||
clearGraphInMemory();
|
||||
}
|
||||
graphInMemory = g;
|
||||
graphFilename = filename;
|
||||
System.out.println("Graph and data file " + filename + " cached.");
|
||||
}
|
||||
|
||||
public static void clearGraphInMemory() {
|
||||
graphInMemory = null;
|
||||
graphFilename = null;
|
||||
System.gc();
|
||||
System.out.println("Graph and data file cache cleared.");
|
||||
}
|
||||
|
||||
public static String getGraphFilename() {
|
||||
return graphFilename;
|
||||
}
|
||||
|
||||
|
||||
public static boolean cacheCells() {
|
||||
return cacheCells;
|
||||
}
|
||||
|
||||
public static void setCacheCells(boolean cacheCells) {
|
||||
//if not caching, clear the memory
|
||||
if(!cacheCells){
|
||||
BiGpairSEQ.clearCellSampleInMemory();
|
||||
System.out.println("Cell sample file caching: OFF.");
|
||||
}
|
||||
else {
|
||||
System.out.println("Cell sample file caching: ON.");
|
||||
}
|
||||
BiGpairSEQ.cacheCells = cacheCells;
|
||||
}
|
||||
|
||||
public static boolean cachePlate() {
|
||||
return cachePlate;
|
||||
}
|
||||
|
||||
public static void setCachePlate(boolean cachePlate) {
|
||||
//if not caching, clear the memory
|
||||
if(!cachePlate) {
|
||||
BiGpairSEQ.clearPlateInMemory();
|
||||
System.out.println("Sample plate file caching: OFF.");
|
||||
}
|
||||
else {
|
||||
System.out.println("Sample plate file caching: ON.");
|
||||
}
|
||||
BiGpairSEQ.cachePlate = cachePlate;
|
||||
}
|
||||
|
||||
public static boolean cacheGraph() {
|
||||
return cacheGraph;
|
||||
}
|
||||
|
||||
public static void setCacheGraph(boolean cacheGraph) {
|
||||
//if not caching, clear the memory
|
||||
if(!cacheGraph) {
|
||||
BiGpairSEQ.clearGraphInMemory();
|
||||
System.out.println("Graph/data file caching: OFF.");
|
||||
}
|
||||
else {
|
||||
System.out.println("Graph/data file caching: ON.");
|
||||
}
|
||||
BiGpairSEQ.cacheGraph = cacheGraph;
|
||||
}
|
||||
|
||||
public static String getPriorityQueueHeapType() {
|
||||
return priorityQueueHeapType;
|
||||
}
|
||||
|
||||
public static void setPairingHeap() {
|
||||
priorityQueueHeapType = "PAIRING";
|
||||
}
|
||||
|
||||
public static void setFibonacciHeap() {
|
||||
priorityQueueHeapType = "FIBONACCI";
|
||||
}
|
||||
|
||||
public static boolean outputBinary() {return outputBinary;}
|
||||
public static void setOutputBinary(boolean b) {outputBinary = b;}
|
||||
|
||||
public static boolean outputGraphML() {return outputGraphML;}
|
||||
public static void setOutputGraphML(boolean b) {outputGraphML = b;}
|
||||
public static String getVersion() { return version; }
|
||||
}
|
||||
@@ -13,6 +13,7 @@ public class CellFileReader {
|
||||
|
||||
private String filename;
|
||||
private List<Integer[]> distinctCells = new ArrayList<>();
|
||||
private Integer cdr1Freq;
|
||||
|
||||
public CellFileReader(String filename) {
|
||||
if(!filename.matches(".*\\.csv")){
|
||||
@@ -38,19 +39,37 @@ public class CellFileReader {
|
||||
cell[3] = Integer.valueOf(record.get("Beta CDR1"));
|
||||
distinctCells.add(cell);
|
||||
}
|
||||
|
||||
|
||||
} catch(IOException ex){
|
||||
System.out.println("cell file " + filename + " not found.");
|
||||
System.err.println(ex);
|
||||
}
|
||||
|
||||
//get CDR1 frequency
|
||||
ArrayList<Integer> cdr1Alphas = new ArrayList<>();
|
||||
for (Integer[] cell : distinctCells) {
|
||||
cdr1Alphas.add(cell[3]);
|
||||
}
|
||||
double count = cdr1Alphas.stream().distinct().count();
|
||||
count = Math.ceil(distinctCells.size() / count);
|
||||
cdr1Freq = (int) count;
|
||||
|
||||
}
|
||||
|
||||
public CellSample getCellSample() {
|
||||
return new CellSample(distinctCells, cdr1Freq);
|
||||
}
|
||||
|
||||
public String getFilename() { return filename;}
|
||||
|
||||
public List<Integer[]> getCells(){
|
||||
//Refactor everything that uses this to have access to a Cell Sample and get the cells there instead.
|
||||
public List<Integer[]> getListOfDistinctCellsDEPRECATED(){
|
||||
return distinctCells;
|
||||
}
|
||||
|
||||
public Integer getCellCount() {
|
||||
public Integer getCellCountDEPRECATED() {
|
||||
//Refactor everything that uses this to have access to a Cell Sample and get the count there instead.
|
||||
return distinctCells.size();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,10 +1,37 @@
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.stream.IntStream;
|
||||
|
||||
public class CellSample {
|
||||
|
||||
private List<Integer[]> cells;
|
||||
private Integer cdr1Freq;
|
||||
|
||||
public CellSample(Integer numDistinctCells, Integer cdr1Freq){
|
||||
this.cdr1Freq = cdr1Freq;
|
||||
List<Integer> numbersCDR3 = new ArrayList<>();
|
||||
List<Integer> numbersCDR1 = new ArrayList<>();
|
||||
Integer numDistCDR3s = 2 * numDistinctCells + 1;
|
||||
IntStream.range(1, numDistCDR3s + 1).forEach(i -> numbersCDR3.add(i));
|
||||
IntStream.range(numDistCDR3s + 1, numDistCDR3s + 1 + (numDistCDR3s / cdr1Freq) + 1).forEach(i -> numbersCDR1.add(i));
|
||||
Collections.shuffle(numbersCDR3);
|
||||
Collections.shuffle(numbersCDR1);
|
||||
|
||||
//Each cell represented by 4 values
|
||||
//two CDR3s, and two CDR1s. First two values are CDR3s (alpha, beta), second two are CDR1s (alpha, beta)
|
||||
List<Integer[]> distinctCells = new ArrayList<>();
|
||||
for(int i = 0; i < numbersCDR3.size() - 1; i = i + 2){
|
||||
Integer tmpCDR3a = numbersCDR3.get(i);
|
||||
Integer tmpCDR3b = numbersCDR3.get(i+1);
|
||||
Integer tmpCDR1a = numbersCDR1.get(i % numbersCDR1.size());
|
||||
Integer tmpCDR1b = numbersCDR1.get((i+1) % numbersCDR1.size());
|
||||
Integer[] tmp = {tmpCDR3a, tmpCDR3b, tmpCDR1a, tmpCDR1b};
|
||||
distinctCells.add(tmp);
|
||||
}
|
||||
this.cells = distinctCells;
|
||||
}
|
||||
|
||||
public CellSample(List<Integer[]> cells, Integer cdr1Freq){
|
||||
this.cells = cells;
|
||||
this.cdr1Freq = cdr1Freq;
|
||||
@@ -18,7 +45,7 @@ public class CellSample {
|
||||
return cdr1Freq;
|
||||
}
|
||||
|
||||
public Integer population(){
|
||||
public Integer getCellCount(){
|
||||
return cells.size();
|
||||
}
|
||||
|
||||
|
||||
499
src/main/java/CommandLineInterface.java
Normal file
499
src/main/java/CommandLineInterface.java
Normal file
@@ -0,0 +1,499 @@
|
||||
import org.apache.commons.cli.*;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
/*
|
||||
* Class for parsing options passed to program from command line
|
||||
*
|
||||
* Top-level flags:
|
||||
* cells : to make a cell sample file
|
||||
* plate : to make a sample plate file
|
||||
* graph : to make a graph and data file
|
||||
* match : to do a cdr3 matching (WITH OR WITHOUT MAKING A RESULTS FILE. May just want to print summary for piping.)
|
||||
*
|
||||
* Cell flags:
|
||||
* count : number of cells to generate
|
||||
* diversity factor : factor by which CDR3s are more diverse than CDR1s
|
||||
* output : name of the output file
|
||||
*
|
||||
* Plate flags:
|
||||
* cellfile : name of the cell sample file to use as input
|
||||
* wells : the number of wells on the plate
|
||||
* dist : the statistical distribution to use
|
||||
* (if exponential) lambda : the lambda value of the exponential distribution
|
||||
* (if gaussian) stddev : the standard deviation of the gaussian distribution
|
||||
* rand : randomize well populations, take a minimum argument and a maximum argument
|
||||
* populations : number of t cells per well per section (number of arguments determines number of sections)
|
||||
* dropout : plate dropout rate, double from 0.0 to 1.0
|
||||
* output : name of the output file
|
||||
*
|
||||
* Graph flags:
|
||||
* cellfile : name of the cell sample file to use as input
|
||||
* platefile : name of the sample plate file to use as input
|
||||
* output : name of the output file
|
||||
* graphml : output a graphml file
|
||||
* binary : output a serialized binary object file
|
||||
*
|
||||
* Match flags:
|
||||
* graphFile : name of graph and data file to use as input
|
||||
* min : minimum number of overlap wells to attempt a matching
|
||||
* max : the maximum number of overlap wells to attempt a matching
|
||||
* maxdiff : (optional) the maximum difference in occupancy to attempt a matching
|
||||
* minpercent : (optional) the minimum percent overlap to attempt a matching.
|
||||
* writefile : (optional) the filename to write results to
|
||||
* output : the values to print to System.out for piping
|
||||
*
|
||||
*/
|
||||
public class CommandLineInterface {
|
||||
|
||||
public static void startCLI(String[] args) {
|
||||
//Options sets for the different modes
|
||||
Options mainOptions = buildMainOptions();
|
||||
Options cellOptions = buildCellOptions();
|
||||
Options plateOptions = buildPlateOptions();
|
||||
Options graphOptions = buildGraphOptions();
|
||||
Options matchOptions = buildMatchCDR3options();
|
||||
|
||||
CommandLineParser parser = new DefaultParser();
|
||||
try{
|
||||
CommandLine line = parser.parse(mainOptions, Arrays.copyOfRange(args, 0, 1));
|
||||
|
||||
if (line.hasOption("help")) {
|
||||
HelpFormatter formatter = new HelpFormatter();
|
||||
formatter.printHelp("BiGpairSEQ_Sim.jar", mainOptions);
|
||||
System.out.println();
|
||||
formatter.printHelp("BiGpairSEQ_Sim.jar -cells", cellOptions);
|
||||
System.out.println();
|
||||
formatter.printHelp("BiGpairSEQ_Sim.jar -plate", plateOptions);
|
||||
System.out.println();
|
||||
formatter.printHelp("BiGpairSEQ_Sim.jar -graph", graphOptions);
|
||||
System.out.println();
|
||||
formatter.printHelp("BiGpairSEQ_Sim.jar -match", matchOptions);
|
||||
}
|
||||
else if (line.hasOption("version")) {
|
||||
System.out.println("BiGpairSEQ_Sim " + BiGpairSEQ.getVersion());
|
||||
}
|
||||
else if (line.hasOption("cells")) {
|
||||
line = parser.parse(cellOptions, Arrays.copyOfRange(args, 1, args.length));
|
||||
Integer number = Integer.valueOf(line.getOptionValue("n"));
|
||||
Integer diversity = Integer.valueOf(line.getOptionValue("d"));
|
||||
String filename = line.getOptionValue("o");
|
||||
makeCells(filename, number, diversity);
|
||||
}
|
||||
|
||||
else if (line.hasOption("plate")) {
|
||||
line = parser.parse(plateOptions, Arrays.copyOfRange(args, 1, args.length));
|
||||
//get the cells
|
||||
String cellFilename = line.getOptionValue("c");
|
||||
CellSample cells = getCells(cellFilename);
|
||||
//get the rest of the parameters
|
||||
Integer[] populations;
|
||||
String outputFilename = line.getOptionValue("o");
|
||||
Integer numWells = Integer.parseInt(line.getOptionValue("w"));
|
||||
Double dropoutRate = Double.parseDouble(line.getOptionValue("err"));
|
||||
if (line.hasOption("random")) {
|
||||
//Array holding values of minimum and maximum populations
|
||||
Integer[] min_max = Stream.of(line.getOptionValues("random"))
|
||||
.mapToInt(Integer::parseInt)
|
||||
.boxed()
|
||||
.toArray(Integer[]::new);
|
||||
populations = BiGpairSEQ.getRand().ints(min_max[0], min_max[1] + 1)
|
||||
.limit(numWells)
|
||||
.boxed()
|
||||
.toArray(Integer[]::new);
|
||||
}
|
||||
else if (line.hasOption("pop")) {
|
||||
populations = Stream.of(line.getOptionValues("pop"))
|
||||
.mapToInt(Integer::parseInt)
|
||||
.boxed()
|
||||
.toArray(Integer[]::new);
|
||||
}
|
||||
else{
|
||||
populations = new Integer[1];
|
||||
populations[0] = 1;
|
||||
}
|
||||
//make the plate
|
||||
Plate plate;
|
||||
if (line.hasOption("poisson")) {
|
||||
Double stdDev = Math.sqrt(numWells);
|
||||
plate = new Plate(cells, cellFilename, numWells, populations, dropoutRate, stdDev, false);
|
||||
}
|
||||
else if (line.hasOption("gaussian")) {
|
||||
Double stdDev = Double.parseDouble(line.getOptionValue("stddev"));
|
||||
plate = new Plate(cells, cellFilename, numWells, populations, dropoutRate, stdDev, false);
|
||||
}
|
||||
else {
|
||||
assert line.hasOption("exponential");
|
||||
Double lambda = Double.parseDouble(line.getOptionValue("lambda"));
|
||||
plate = new Plate(cells, cellFilename, numWells, populations, dropoutRate, lambda, true);
|
||||
}
|
||||
PlateFileWriter writer = new PlateFileWriter(outputFilename, plate);
|
||||
writer.writePlateFile();
|
||||
}
|
||||
|
||||
else if (line.hasOption("graph")) { //Making a graph
|
||||
line = parser.parse(graphOptions, Arrays.copyOfRange(args, 1, args.length));
|
||||
String cellFilename = line.getOptionValue("c");
|
||||
String plateFilename = line.getOptionValue("p");
|
||||
String outputFilename = line.getOptionValue("o");
|
||||
//get cells
|
||||
CellSample cells = getCells(cellFilename);
|
||||
//get plate
|
||||
Plate plate = getPlate(plateFilename);
|
||||
GraphWithMapData graph = Simulator.makeGraph(cells, plate, false);
|
||||
if (!line.hasOption("no-binary")) { //output binary file unless told not to
|
||||
GraphDataObjectWriter writer = new GraphDataObjectWriter(outputFilename, graph, false);
|
||||
writer.writeDataToFile();
|
||||
}
|
||||
if (line.hasOption("graphml")) { //if told to, output graphml file
|
||||
GraphMLFileWriter gmlwriter = new GraphMLFileWriter(outputFilename, graph);
|
||||
gmlwriter.writeGraphToFile();
|
||||
}
|
||||
}
|
||||
|
||||
else if (line.hasOption("match")) { //can add a flag for which match type in future, spit this in two
|
||||
line = parser.parse(matchOptions, Arrays.copyOfRange(args, 1, args.length));
|
||||
String graphFilename = line.getOptionValue("g");
|
||||
|
||||
String outputFilename;
|
||||
if(line.hasOption("o")) {
|
||||
outputFilename = line.getOptionValue("o");
|
||||
}
|
||||
else {
|
||||
outputFilename = null;
|
||||
}
|
||||
Integer minThreshold = Integer.parseInt(line.getOptionValue("min"));
|
||||
Integer maxThreshold = Integer.parseInt(line.getOptionValue("max"));
|
||||
int minOverlapPct;
|
||||
if (line.hasOption("minpct")) { //see if this filter is being used
|
||||
minOverlapPct = Integer.parseInt(line.getOptionValue("minpct"));
|
||||
}
|
||||
else {
|
||||
minOverlapPct = 0;
|
||||
}
|
||||
int maxOccupancyDiff;
|
||||
if (line.hasOption("maxdiff")) { //see if this filter is being used
|
||||
maxOccupancyDiff = Integer.parseInt(line.getOptionValue("maxdiff"));
|
||||
}
|
||||
else {
|
||||
maxOccupancyDiff = Integer.MAX_VALUE;
|
||||
}
|
||||
GraphWithMapData graph = getGraph(graphFilename);
|
||||
MatchingResult result = Simulator.matchCDR3s(graph, graphFilename, minThreshold, maxThreshold,
|
||||
maxOccupancyDiff, minOverlapPct, false);
|
||||
if(outputFilename != null){
|
||||
MatchingFileWriter writer = new MatchingFileWriter(outputFilename, result);
|
||||
writer.writeResultsToFile();
|
||||
}
|
||||
//can put a bunch of ifs for outputting various things from the MatchingResult to System.out here
|
||||
//after I put those flags in the matchOptions
|
||||
if(line.hasOption("print-metadata")) {
|
||||
for (String k : result.getMetadata().keySet()) {
|
||||
System.out.println(k + ": " + result.getMetadata().get(k));
|
||||
}
|
||||
}
|
||||
if(line.hasOption("print-error")) {
|
||||
System.out.println("pairing error rate: " + result.getPairingErrorRate());
|
||||
}
|
||||
if(line.hasOption("print-attempt")) {
|
||||
System.out.println("pairing attempt rate: " +result.getPairingAttemptRate());
|
||||
}
|
||||
if(line.hasOption("print-correct")) {
|
||||
System.out.println("correct pairings: " + result.getCorrectPairingCount());
|
||||
}
|
||||
if(line.hasOption("print-incorrect")) {
|
||||
System.out.println("incorrect pairings: " + result.getIncorrectPairingCount());
|
||||
}
|
||||
if(line.hasOption("print-alphas")) {
|
||||
System.out.println("total alphas found: " + result.getAlphaCount());
|
||||
}
|
||||
if(line.hasOption("print-betas")) {
|
||||
System.out.println("total betas found: " + result.getBetaCount());
|
||||
}
|
||||
if(line.hasOption("print-time")) {
|
||||
System.out.println("simulation time (seconds): " + result.getSimulationTime());
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (ParseException exp) {
|
||||
System.err.println("Parsing failed. Reason: " + exp.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
private static Option outputFileOption() {
|
||||
Option outputFile = Option.builder("o")
|
||||
.longOpt("output-file")
|
||||
.hasArg()
|
||||
.argName("filename")
|
||||
.desc("Name of output file")
|
||||
.required()
|
||||
.build();
|
||||
return outputFile;
|
||||
}
|
||||
|
||||
private static Options buildMainOptions() {
|
||||
Options mainOptions = new Options();
|
||||
Option help = Option.builder("help")
|
||||
.desc("Displays this help menu")
|
||||
.build();
|
||||
Option makeCells = Option.builder("cells")
|
||||
.longOpt("make-cells")
|
||||
.desc("Makes a cell sample file of distinct T cells")
|
||||
.build();
|
||||
Option makePlate = Option.builder("plate")
|
||||
.longOpt("make-plate")
|
||||
.desc("Makes a sample plate file. Requires a cell sample file.")
|
||||
.build();
|
||||
Option makeGraph = Option.builder("graph")
|
||||
.longOpt("make-graph")
|
||||
.desc("Makes a graph/data file. Requires a cell sample file and a sample plate file")
|
||||
.build();
|
||||
Option matchCDR3 = Option.builder("match")
|
||||
.longOpt("match-cdr3")
|
||||
.desc("Matches CDR3s. Requires a graph/data file.")
|
||||
.build();
|
||||
Option printVersion = Option.builder("version")
|
||||
.desc("Prints the program version number to stdout").build();
|
||||
OptionGroup mainGroup = new OptionGroup();
|
||||
mainGroup.addOption(help);
|
||||
mainGroup.addOption(printVersion);
|
||||
mainGroup.addOption(makeCells);
|
||||
mainGroup.addOption(makePlate);
|
||||
mainGroup.addOption(makeGraph);
|
||||
mainGroup.addOption(matchCDR3);
|
||||
mainGroup.setRequired(true);
|
||||
mainOptions.addOptionGroup(mainGroup);
|
||||
return mainOptions;
|
||||
}
|
||||
|
||||
private static Options buildCellOptions() {
|
||||
Options cellOptions = new Options();
|
||||
Option numCells = Option.builder("n")
|
||||
.longOpt("num-cells")
|
||||
.desc("The number of distinct cells to generate")
|
||||
.hasArg()
|
||||
.argName("number")
|
||||
.required().build();
|
||||
Option cdr3Diversity = Option.builder("d")
|
||||
.longOpt("diversity-factor")
|
||||
.desc("The factor by which unique CDR3s outnumber unique CDR1s")
|
||||
.hasArg()
|
||||
.argName("factor")
|
||||
.required().build();
|
||||
cellOptions.addOption(numCells);
|
||||
cellOptions.addOption(cdr3Diversity);
|
||||
cellOptions.addOption(outputFileOption());
|
||||
return cellOptions;
|
||||
}
|
||||
|
||||
private static Options buildPlateOptions() {
|
||||
Options plateOptions = new Options();
|
||||
Option cellFile = Option.builder("c") // add this to plate options
|
||||
.longOpt("cell-file")
|
||||
.desc("The cell sample file to use")
|
||||
.hasArg()
|
||||
.argName("filename")
|
||||
.required().build();
|
||||
Option numWells = Option.builder("w")// add this to plate options
|
||||
.longOpt("wells")
|
||||
.desc("The number of wells on the sample plate")
|
||||
.hasArg()
|
||||
.argName("number")
|
||||
.required().build();
|
||||
//options group for choosing with distribution to use
|
||||
OptionGroup distributions = new OptionGroup();// add this to plate options
|
||||
distributions.setRequired(true);
|
||||
Option poisson = Option.builder("poisson")
|
||||
.desc("Use a Poisson distribution for cell sample")
|
||||
.build();
|
||||
Option gaussian = Option.builder("gaussian")
|
||||
.desc("Use a Gaussian distribution for cell sample")
|
||||
.build();
|
||||
Option exponential = Option.builder("exponential")
|
||||
.desc("Use an exponential distribution for cell sample")
|
||||
.build();
|
||||
distributions.addOption(poisson);
|
||||
distributions.addOption(gaussian);
|
||||
distributions.addOption(exponential);
|
||||
//options group for statistical distribution parameters
|
||||
OptionGroup statParams = new OptionGroup();// add this to plate options
|
||||
Option stdDev = Option.builder("stddev")
|
||||
.desc("If using -gaussian flag, standard deviation for distrbution")
|
||||
.hasArg()
|
||||
.argName("value")
|
||||
.build();
|
||||
Option lambda = Option.builder("lambda")
|
||||
.desc("If using -exponential flag, lambda value for distribution")
|
||||
.hasArg()
|
||||
.argName("value")
|
||||
.build();
|
||||
statParams.addOption(stdDev);
|
||||
statParams.addOption(lambda);
|
||||
//Option group for random plate or set populations
|
||||
OptionGroup wellPopOptions = new OptionGroup(); // add this to plate options
|
||||
wellPopOptions.setRequired(true);
|
||||
Option randomWellPopulations = Option.builder("random")
|
||||
.desc("Randomize well populations on sample plate. Takes two arguments: the minimum possible population and the maximum possible population.")
|
||||
.hasArgs()
|
||||
.numberOfArgs(2)
|
||||
.argName("min> <max")
|
||||
.build();
|
||||
Option specificWellPopulations = Option.builder("pop")
|
||||
.desc("The well populations for each section of the sample plate. There will be as many sections as there are populations given.")
|
||||
.hasArgs()
|
||||
.argName("number [number]...")
|
||||
.build();
|
||||
Option dropoutRate = Option.builder("err") //add this to plate options
|
||||
.hasArg()
|
||||
.desc("The sequence dropout rate due to amplification error. (0.0 - 1.0)")
|
||||
.argName("rate")
|
||||
.required()
|
||||
.build();
|
||||
wellPopOptions.addOption(randomWellPopulations);
|
||||
wellPopOptions.addOption(specificWellPopulations);
|
||||
plateOptions.addOption(cellFile);
|
||||
plateOptions.addOption(numWells);
|
||||
plateOptions.addOptionGroup(distributions);
|
||||
plateOptions.addOptionGroup(statParams);
|
||||
plateOptions.addOptionGroup(wellPopOptions);
|
||||
plateOptions.addOption(dropoutRate);
|
||||
plateOptions.addOption(outputFileOption());
|
||||
return plateOptions;
|
||||
}
|
||||
|
||||
private static Options buildGraphOptions() {
|
||||
Options graphOptions = new Options();
|
||||
Option cellFilename = Option.builder("c")
|
||||
.longOpt("cell-file")
|
||||
.desc("Cell sample file to use for checking pairing accuracy")
|
||||
.hasArg()
|
||||
.argName("filename")
|
||||
.required().build();
|
||||
Option plateFilename = Option.builder("p")
|
||||
.longOpt("plate-filename")
|
||||
.desc("Sample plate file from which to construct graph")
|
||||
.hasArg()
|
||||
.argName("filename")
|
||||
.required().build();
|
||||
Option outputGraphML = Option.builder("graphml")
|
||||
.desc("(Optional) Output GraphML file")
|
||||
.build();
|
||||
Option outputSerializedBinary = Option.builder("nb")
|
||||
.longOpt("no-binary")
|
||||
.desc("(Optional) Don't output serialized binary file")
|
||||
.build();
|
||||
graphOptions.addOption(cellFilename);
|
||||
graphOptions.addOption(plateFilename);
|
||||
graphOptions.addOption(outputFileOption());
|
||||
graphOptions.addOption(outputGraphML);
|
||||
graphOptions.addOption(outputSerializedBinary);
|
||||
return graphOptions;
|
||||
}
|
||||
|
||||
private static Options buildMatchCDR3options() {
|
||||
Options matchCDR3options = new Options();
|
||||
Option graphFilename = Option.builder("g")
|
||||
.longOpt("graph-file")
|
||||
.desc("The graph/data file to use")
|
||||
.hasArg()
|
||||
.argName("filename")
|
||||
.required().build();
|
||||
Option minOccupancyOverlap = Option.builder("min")
|
||||
.desc("The minimum number of shared wells to attempt to match a sequence pair")
|
||||
.hasArg()
|
||||
.argName("number")
|
||||
.required().build();
|
||||
Option maxOccupancyOverlap = Option.builder("max")
|
||||
.desc("The maximum number of shared wells to attempt to match a sequence pair")
|
||||
.hasArg()
|
||||
.argName("number")
|
||||
.required().build();
|
||||
Option minOverlapPercent = Option.builder("minpct")
|
||||
.desc("(Optional) The minimum percentage of a sequence's total occupancy shared by another sequence to attempt matching. (0 - 100) ")
|
||||
.hasArg()
|
||||
.argName("percent")
|
||||
.build();
|
||||
Option maxOccupancyDifference = Option.builder("maxdiff")
|
||||
.desc("(Optional) The maximum difference in total occupancy between two sequences to attempt matching.")
|
||||
.hasArg()
|
||||
.argName("number")
|
||||
.build();
|
||||
Option outputFile = Option.builder("o") //can't call the method this time, because this one's optional
|
||||
.longOpt("output-file")
|
||||
.hasArg()
|
||||
.argName("filename")
|
||||
.desc("(Optional) Name of output the output file. If not present, no file will be written.")
|
||||
.build();
|
||||
matchCDR3options.addOption(graphFilename)
|
||||
.addOption(minOccupancyOverlap)
|
||||
.addOption(maxOccupancyOverlap)
|
||||
.addOption(minOverlapPercent)
|
||||
.addOption(maxOccupancyDifference)
|
||||
.addOption(outputFile);
|
||||
|
||||
//options for output to System.out
|
||||
Option printAlphaCount = Option.builder().longOpt("print-alphas")
|
||||
.desc("(Optional) Print the number of distinct alpha sequences to stdout.").build();
|
||||
Option printBetaCount = Option.builder().longOpt("print-betas")
|
||||
.desc("(Optional) Print the number of distinct beta sequences to stdout.").build();
|
||||
Option printTime = Option.builder().longOpt("print-time")
|
||||
.desc("(Optional) Print the total simulation time to stdout.").build();
|
||||
Option printErrorRate = Option.builder().longOpt("print-error")
|
||||
.desc("(Optional) Print the pairing error rate to stdout").build();
|
||||
Option printAttempt = Option.builder().longOpt("print-attempt")
|
||||
.desc("(Optional) Print the pairing attempt rate to stdout").build();
|
||||
Option printCorrect = Option.builder().longOpt("print-correct")
|
||||
.desc("(Optional) Print the number of correct pairs to stdout").build();
|
||||
Option printIncorrect = Option.builder().longOpt("print-incorrect")
|
||||
.desc("(Optional) Print the number of incorrect pairs to stdout").build();
|
||||
Option printMetadata = Option.builder().longOpt("print-metadata")
|
||||
.desc("(Optional) Print a full summary of the matching results to stdout.").build();
|
||||
|
||||
matchCDR3options
|
||||
.addOption(printErrorRate)
|
||||
.addOption(printAttempt)
|
||||
.addOption(printCorrect)
|
||||
.addOption(printIncorrect)
|
||||
.addOption(printMetadata)
|
||||
.addOption(printAlphaCount)
|
||||
.addOption(printBetaCount)
|
||||
.addOption(printTime);
|
||||
return matchCDR3options;
|
||||
}
|
||||
|
||||
|
||||
|
||||
private static CellSample getCells(String cellFilename) {
|
||||
assert cellFilename != null;
|
||||
CellFileReader reader = new CellFileReader(cellFilename);
|
||||
return reader.getCellSample();
|
||||
}
|
||||
|
||||
private static Plate getPlate(String plateFilename) {
|
||||
assert plateFilename != null;
|
||||
PlateFileReader reader = new PlateFileReader(plateFilename);
|
||||
return reader.getSamplePlate();
|
||||
}
|
||||
|
||||
private static GraphWithMapData getGraph(String graphFilename) {
|
||||
assert graphFilename != null;
|
||||
try{
|
||||
GraphDataObjectReader reader = new GraphDataObjectReader(graphFilename, false);
|
||||
return reader.getData();
|
||||
|
||||
}
|
||||
catch (IOException ex) {
|
||||
ex.printStackTrace();
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
//for calling from command line
|
||||
public static void makeCells(String filename, Integer numCells, Integer cdr1Freq) {
|
||||
CellSample sample = new CellSample(numCells, cdr1Freq);
|
||||
CellFileWriter writer = new CellFileWriter(filename, sample);
|
||||
writer.writeCellsToFile();
|
||||
}
|
||||
}
|
||||
@@ -4,10 +4,6 @@ import java.math.MathContext;
|
||||
|
||||
public abstract class Equations {
|
||||
|
||||
public static int getRandomNumber(int min, int max) {
|
||||
return (int) ((Math.random() * (max - min)) + min);
|
||||
}
|
||||
|
||||
//pValue calculation as described in original pairSEQ paper.
|
||||
//Included for comparison with original results.
|
||||
//Not used by BiGpairSEQ for matching.
|
||||
|
||||
@@ -1,10 +1,12 @@
|
||||
import java.io.*;
|
||||
|
||||
public class GraphDataObjectReader {
|
||||
|
||||
private GraphWithMapData data;
|
||||
private String filename;
|
||||
|
||||
public GraphDataObjectReader(String filename) throws IOException {
|
||||
|
||||
public GraphDataObjectReader(String filename, boolean verbose) throws IOException {
|
||||
if(!filename.matches(".*\\.ser")){
|
||||
filename = filename + ".ser";
|
||||
}
|
||||
@@ -13,8 +15,13 @@ public class GraphDataObjectReader {
|
||||
BufferedInputStream fileIn = new BufferedInputStream(new FileInputStream(filename));
|
||||
ObjectInputStream in = new ObjectInputStream(fileIn))
|
||||
{
|
||||
if (verbose) {
|
||||
System.out.println("Reading graph data from file. This may take some time");
|
||||
System.out.println("File I/O time is not included in results");
|
||||
}
|
||||
data = (GraphWithMapData) in.readObject();
|
||||
} catch (FileNotFoundException | ClassNotFoundException ex) {
|
||||
System.out.println("Graph/data file " + filename + " not found.");
|
||||
ex.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
import org.jgrapht.Graph;
|
||||
|
||||
import java.io.BufferedOutputStream;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
@@ -7,6 +9,7 @@ public class GraphDataObjectWriter {
|
||||
|
||||
private GraphWithMapData data;
|
||||
private String filename;
|
||||
private boolean verbose = true;
|
||||
|
||||
public GraphDataObjectWriter(String filename, GraphWithMapData data) {
|
||||
if(!filename.matches(".*\\.ser")){
|
||||
@@ -16,10 +19,24 @@ public class GraphDataObjectWriter {
|
||||
this.data = data;
|
||||
}
|
||||
|
||||
public GraphDataObjectWriter(String filename, GraphWithMapData data, boolean verbose) {
|
||||
this.verbose = verbose;
|
||||
if(!filename.matches(".*\\.ser")){
|
||||
filename = filename + ".ser";
|
||||
}
|
||||
this.filename = filename;
|
||||
this.data = data;
|
||||
}
|
||||
|
||||
public void writeDataToFile() {
|
||||
try (BufferedOutputStream bufferedOut = new BufferedOutputStream(new FileOutputStream(filename));
|
||||
|
||||
ObjectOutputStream out = new ObjectOutputStream(bufferedOut);
|
||||
){
|
||||
if(verbose) {
|
||||
System.out.println("Writing graph and occupancy data to file. This may take some time.");
|
||||
System.out.println("File I/O time is not included in results.");
|
||||
}
|
||||
out.writeObject(data);
|
||||
} catch (IOException ex) {
|
||||
ex.printStackTrace();
|
||||
|
||||
@@ -1,35 +0,0 @@
|
||||
import org.jgrapht.graph.SimpleWeightedGraph;
|
||||
import org.jgrapht.nio.graphml.GraphMLImporter;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
|
||||
public class GraphMLFileReader {
|
||||
|
||||
private String filename;
|
||||
private SimpleWeightedGraph graph;
|
||||
|
||||
public GraphMLFileReader(String filename, SimpleWeightedGraph graph) {
|
||||
if(!filename.matches(".*\\.graphml")){
|
||||
filename = filename + ".graphml";
|
||||
}
|
||||
this.filename = filename;
|
||||
this.graph = graph;
|
||||
|
||||
try(//don't need to close reader bc of try-with-resources auto-closing
|
||||
BufferedReader reader = Files.newBufferedReader(Path.of(filename));
|
||||
){
|
||||
GraphMLImporter<SimpleWeightedGraph, BufferedReader> importer = new GraphMLImporter<>();
|
||||
importer.importGraph(graph, reader);
|
||||
}
|
||||
catch (IOException ex) {
|
||||
System.out.println("Graph file " + filename + " not found.");
|
||||
System.err.println(ex);
|
||||
}
|
||||
}
|
||||
|
||||
public SimpleWeightedGraph getGraph() { return graph; }
|
||||
|
||||
}
|
||||
@@ -1,4 +1,8 @@
|
||||
import org.jgrapht.graph.DefaultWeightedEdge;
|
||||
import org.jgrapht.graph.SimpleWeightedGraph;
|
||||
import org.jgrapht.nio.Attribute;
|
||||
import org.jgrapht.nio.AttributeType;
|
||||
import org.jgrapht.nio.DefaultAttribute;
|
||||
import org.jgrapht.nio.dot.DOTExporter;
|
||||
import org.jgrapht.nio.graphml.GraphMLExporter;
|
||||
|
||||
@@ -7,25 +11,69 @@ import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.StandardOpenOption;
|
||||
import java.util.HashMap;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.Map;
|
||||
|
||||
public class GraphMLFileWriter {
|
||||
|
||||
String filename;
|
||||
SimpleWeightedGraph graph;
|
||||
GraphWithMapData data;
|
||||
|
||||
|
||||
public GraphMLFileWriter(String filename, SimpleWeightedGraph graph) {
|
||||
public GraphMLFileWriter(String filename, GraphWithMapData data) {
|
||||
if(!filename.matches(".*\\.graphml")){
|
||||
filename = filename + ".graphml";
|
||||
}
|
||||
this.filename = filename;
|
||||
this.graph = graph;
|
||||
this.data = data;
|
||||
}
|
||||
|
||||
// public void writeGraphToFile() {
|
||||
// try(BufferedWriter writer = Files.newBufferedWriter(Path.of(filename), StandardOpenOption.CREATE_NEW);
|
||||
// ){
|
||||
// GraphMLExporter<SimpleWeightedGraph, BufferedWriter> exporter = new GraphMLExporter<>();
|
||||
// exporter.exportGraph(graph, writer);
|
||||
// } catch(IOException ex){
|
||||
// System.out.println("Could not make new file named "+filename);
|
||||
// System.err.println(ex);
|
||||
// }
|
||||
// }
|
||||
|
||||
public void writeGraphToFile() {
|
||||
SimpleWeightedGraph graph = data.getGraph();
|
||||
Map<Integer, Integer> vertexToAlphaMap = data.getPlateVtoAMap();
|
||||
Map<Integer, Integer> vertexToBetaMap = data.getPlateVtoBMap();
|
||||
Map<Integer, Integer> alphaOccs = data.getAlphaWellCounts();
|
||||
Map<Integer, Integer> betaOccs = data.getBetaWellCounts();
|
||||
try(BufferedWriter writer = Files.newBufferedWriter(Path.of(filename), StandardOpenOption.CREATE_NEW);
|
||||
){
|
||||
GraphMLExporter<SimpleWeightedGraph, BufferedWriter> exporter = new GraphMLExporter<>();
|
||||
//create exporter. Let the vertex labels be the unique ids for the vertices
|
||||
GraphMLExporter<Integer, SimpleWeightedGraph<Vertex, DefaultWeightedEdge>> exporter = new GraphMLExporter<>(v -> v.toString());
|
||||
//set to export weights
|
||||
exporter.setExportEdgeWeights(true);
|
||||
//set type, sequence, and occupancy attributes for each vertex
|
||||
exporter.setVertexAttributeProvider( v -> {
|
||||
Map<String, Attribute> attributes = new HashMap<>();
|
||||
if(vertexToAlphaMap.containsKey(v)) {
|
||||
attributes.put("type", DefaultAttribute.createAttribute("CDR3 Alpha"));
|
||||
attributes.put("sequence", DefaultAttribute.createAttribute(vertexToAlphaMap.get(v)));
|
||||
attributes.put("occupancy", DefaultAttribute.createAttribute(
|
||||
alphaOccs.get(vertexToAlphaMap.get(v))));
|
||||
}
|
||||
else if(vertexToBetaMap.containsKey(v)) {
|
||||
attributes.put("type", DefaultAttribute.createAttribute("CDR3 Beta"));
|
||||
attributes.put("sequence", DefaultAttribute.createAttribute(vertexToBetaMap.get(v)));
|
||||
attributes.put("occupancy", DefaultAttribute.createAttribute(
|
||||
betaOccs.get(vertexToBetaMap.get(v))));
|
||||
}
|
||||
return attributes;
|
||||
});
|
||||
//register the attributes
|
||||
exporter.registerAttribute("type", GraphMLExporter.AttributeCategory.NODE, AttributeType.STRING);
|
||||
exporter.registerAttribute("sequence", GraphMLExporter.AttributeCategory.NODE, AttributeType.STRING);
|
||||
exporter.registerAttribute("occupancy", GraphMLExporter.AttributeCategory.NODE, AttributeType.STRING);
|
||||
//export the graph
|
||||
exporter.exportGraph(graph, writer);
|
||||
} catch(IOException ex){
|
||||
System.out.println("Could not make new file named "+filename);
|
||||
@@ -33,3 +81,4 @@ public class GraphMLFileWriter {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
111
src/main/java/GraphModificationFunctions.java
Normal file
111
src/main/java/GraphModificationFunctions.java
Normal file
@@ -0,0 +1,111 @@
|
||||
import org.jgrapht.graph.DefaultWeightedEdge;
|
||||
import org.jgrapht.graph.SimpleWeightedGraph;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
public interface GraphModificationFunctions {
|
||||
|
||||
//remove over- and under-weight edges
|
||||
static List<Integer[]> filterByOverlapThresholds(SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph,
|
||||
int low, int high, boolean saveEdges) {
|
||||
List<Integer[]> removedEdges = new ArrayList<>();
|
||||
for (DefaultWeightedEdge e : graph.edgeSet()) {
|
||||
if ((graph.getEdgeWeight(e) > high) || (graph.getEdgeWeight(e) < low)) {
|
||||
if(saveEdges) {
|
||||
Integer source = graph.getEdgeSource(e);
|
||||
Integer target = graph.getEdgeTarget(e);
|
||||
Integer weight = (int) graph.getEdgeWeight(e);
|
||||
Integer[] edge = {source, target, weight};
|
||||
removedEdges.add(edge);
|
||||
}
|
||||
else {
|
||||
graph.setEdgeWeight(e, 0.0);
|
||||
}
|
||||
}
|
||||
}
|
||||
if(saveEdges) {
|
||||
for (Integer[] edge : removedEdges) {
|
||||
graph.removeEdge(edge[0], edge[1]);
|
||||
}
|
||||
}
|
||||
return removedEdges;
|
||||
}
|
||||
|
||||
//Remove edges for pairs with large occupancy discrepancy
|
||||
static List<Integer[]> filterByRelativeOccupancy(SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph,
|
||||
Map<Integer, Integer> alphaWellCounts,
|
||||
Map<Integer, Integer> betaWellCounts,
|
||||
Map<Integer, Integer> plateVtoAMap,
|
||||
Map<Integer, Integer> plateVtoBMap,
|
||||
Integer maxOccupancyDifference, boolean saveEdges) {
|
||||
List<Integer[]> removedEdges = new ArrayList<>();
|
||||
for (DefaultWeightedEdge e : graph.edgeSet()) {
|
||||
Integer alphaOcc = alphaWellCounts.get(plateVtoAMap.get(graph.getEdgeSource(e)));
|
||||
Integer betaOcc = betaWellCounts.get(plateVtoBMap.get(graph.getEdgeTarget(e)));
|
||||
if (Math.abs(alphaOcc - betaOcc) >= maxOccupancyDifference) {
|
||||
if (saveEdges) {
|
||||
Integer source = graph.getEdgeSource(e);
|
||||
Integer target = graph.getEdgeTarget(e);
|
||||
Integer weight = (int) graph.getEdgeWeight(e);
|
||||
Integer[] edge = {source, target, weight};
|
||||
removedEdges.add(edge);
|
||||
}
|
||||
else {
|
||||
graph.setEdgeWeight(e, 0.0);
|
||||
}
|
||||
}
|
||||
}
|
||||
if(saveEdges) {
|
||||
for (Integer[] edge : removedEdges) {
|
||||
graph.removeEdge(edge[0], edge[1]);
|
||||
}
|
||||
}
|
||||
return removedEdges;
|
||||
}
|
||||
|
||||
//Remove edges for pairs where overlap size is significantly lower than the well occupancy
|
||||
static List<Integer[]> filterByOverlapPercent(SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph,
|
||||
Map<Integer, Integer> alphaWellCounts,
|
||||
Map<Integer, Integer> betaWellCounts,
|
||||
Map<Integer, Integer> plateVtoAMap,
|
||||
Map<Integer, Integer> plateVtoBMap,
|
||||
Integer minOverlapPercent,
|
||||
boolean saveEdges) {
|
||||
List<Integer[]> removedEdges = new ArrayList<>();
|
||||
for (DefaultWeightedEdge e : graph.edgeSet()) {
|
||||
Integer alphaOcc = alphaWellCounts.get(plateVtoAMap.get(graph.getEdgeSource(e)));
|
||||
Integer betaOcc = betaWellCounts.get(plateVtoBMap.get(graph.getEdgeTarget(e)));
|
||||
double weight = graph.getEdgeWeight(e);
|
||||
double min = minOverlapPercent / 100.0;
|
||||
if ((weight / alphaOcc < min) || (weight / betaOcc < min)) {
|
||||
if(saveEdges) {
|
||||
Integer source = graph.getEdgeSource(e);
|
||||
Integer target = graph.getEdgeTarget(e);
|
||||
Integer intWeight = (int) graph.getEdgeWeight(e);
|
||||
Integer[] edge = {source, target, intWeight};
|
||||
removedEdges.add(edge);
|
||||
}
|
||||
else {
|
||||
graph.setEdgeWeight(e, 0.0);
|
||||
}
|
||||
}
|
||||
}
|
||||
if(saveEdges) {
|
||||
for (Integer[] edge : removedEdges) {
|
||||
graph.removeEdge(edge[0], edge[1]);
|
||||
}
|
||||
}
|
||||
return removedEdges;
|
||||
}
|
||||
|
||||
static void addRemovedEdges(SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph,
|
||||
List<Integer[]> removedEdges) {
|
||||
for (Integer[] edge : removedEdges) {
|
||||
DefaultWeightedEdge e = graph.addEdge(edge[0], edge[1]);
|
||||
graph.setEdgeWeight(e, (double) edge[2]);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@@ -11,7 +11,7 @@ public class GraphWithMapData implements java.io.Serializable {
|
||||
private String sourceFilename;
|
||||
private final SimpleWeightedGraph graph;
|
||||
private Integer numWells;
|
||||
private Integer[] wellConcentrations;
|
||||
private Integer[] wellPopulations;
|
||||
private Integer alphaCount;
|
||||
private Integer betaCount;
|
||||
private final Map<Integer, Integer> distCellsMapAlphaKey;
|
||||
@@ -31,7 +31,7 @@ public class GraphWithMapData implements java.io.Serializable {
|
||||
Map<Integer, Integer> betaWellCounts, Duration time) {
|
||||
this.graph = graph;
|
||||
this.numWells = numWells;
|
||||
this.wellConcentrations = wellConcentrations;
|
||||
this.wellPopulations = wellConcentrations;
|
||||
this.alphaCount = alphaCount;
|
||||
this.betaCount = betaCount;
|
||||
this.distCellsMapAlphaKey = distCellsMapAlphaKey;
|
||||
@@ -52,8 +52,8 @@ public class GraphWithMapData implements java.io.Serializable {
|
||||
return numWells;
|
||||
}
|
||||
|
||||
public Integer[] getWellConcentrations() {
|
||||
return wellConcentrations;
|
||||
public Integer[] getWellPopulations() {
|
||||
return wellPopulations;
|
||||
}
|
||||
|
||||
public Integer getAlphaCount() {
|
||||
|
||||
588
src/main/java/InteractiveInterface.java
Normal file
588
src/main/java/InteractiveInterface.java
Normal file
@@ -0,0 +1,588 @@
|
||||
import java.io.IOException;
|
||||
import java.util.*;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
//
|
||||
public class InteractiveInterface {
|
||||
|
||||
private static final Random rand = BiGpairSEQ.getRand();
|
||||
private static final Scanner sc = new Scanner(System.in);
|
||||
private static int input;
|
||||
private static boolean quit = false;
|
||||
|
||||
public static void startInteractive() {
|
||||
|
||||
while (!quit) {
|
||||
System.out.println();
|
||||
System.out.println("--------BiGPairSEQ SIMULATOR--------");
|
||||
System.out.println("ALPHA/BETA T CELL RECEPTOR MATCHING");
|
||||
System.out.println(" USING WEIGHTED BIPARTITE GRAPHS ");
|
||||
System.out.println("------------------------------------");
|
||||
System.out.println("Please select an option:");
|
||||
System.out.println("1) Generate a population of distinct cells");
|
||||
System.out.println("2) Generate a sample plate of T cells");
|
||||
System.out.println("3) Generate CDR3 alpha/beta occupancy data and overlap graph");
|
||||
System.out.println("4) Simulate bipartite graph CDR3 alpha/beta matching (BiGpairSEQ)");
|
||||
//Need to re-do the CDR3/CDR1 matching to correspond to new pattern
|
||||
//System.out.println("5) Generate CDR3/CDR1 occupancy graph");
|
||||
//System.out.println("6) Simulate CDR3/CDR1 T cell matching");
|
||||
System.out.println("8) Options");
|
||||
System.out.println("9) About/Acknowledgments");
|
||||
System.out.println("0) Exit");
|
||||
try {
|
||||
input = sc.nextInt();
|
||||
switch (input) {
|
||||
case 1 -> makeCells();
|
||||
case 2 -> makePlate();
|
||||
case 3 -> makeCDR3Graph();
|
||||
case 4 -> matchCDR3s();
|
||||
//case 6 -> matchCellsCDR1();
|
||||
case 8 -> mainOptions();
|
||||
case 9 -> acknowledge();
|
||||
case 0 -> quit = true;
|
||||
default -> System.out.println("Invalid input.");
|
||||
}
|
||||
} catch (InputMismatchException | IOException ex) {
|
||||
System.out.println(ex);
|
||||
sc.next();
|
||||
}
|
||||
}
|
||||
sc.close();
|
||||
}
|
||||
|
||||
private static void makeCells() {
|
||||
String filename = null;
|
||||
Integer numCells = 0;
|
||||
Integer cdr1Freq = 1;
|
||||
try {
|
||||
System.out.println("\nSimulated T-Cells consist of integer values representing:\n" +
|
||||
"* a pair of alpha and beta CDR3 peptides (unique within simulated population)\n" +
|
||||
"* a pair of alpha and beta CDR1 peptides (not necessarily unique).");
|
||||
System.out.println("\nThe cells will be written to a CSV file.");
|
||||
System.out.print("Please enter a file name: ");
|
||||
filename = sc.next();
|
||||
System.out.println("\nCDR3 sequences are more diverse than CDR1 sequences.");
|
||||
System.out.println("Please enter the factor by which distinct CDR3s outnumber CDR1s: ");
|
||||
cdr1Freq = sc.nextInt();
|
||||
System.out.print("\nPlease enter the number of T-cells to generate: ");
|
||||
numCells = sc.nextInt();
|
||||
if(numCells <= 0){
|
||||
throw new InputMismatchException("Number of cells must be a positive integer.");
|
||||
}
|
||||
} catch (InputMismatchException ex) {
|
||||
System.out.println(ex);
|
||||
sc.next();
|
||||
}
|
||||
CellSample sample = new CellSample(numCells, cdr1Freq);
|
||||
assert filename != null;
|
||||
System.out.println("Writing cells to file");
|
||||
CellFileWriter writer = new CellFileWriter(filename, sample);
|
||||
writer.writeCellsToFile();
|
||||
System.out.println("Cell sample written to: " + filename);
|
||||
if(BiGpairSEQ.cacheCells()) {
|
||||
BiGpairSEQ.setCellSampleInMemory(sample, filename);
|
||||
}
|
||||
}
|
||||
|
||||
//Output a CSV of sample plate
|
||||
private static void makePlate() {
|
||||
String cellFile = null;
|
||||
String filename = null;
|
||||
Double stdDev = 0.0;
|
||||
Integer numWells = 0;
|
||||
Integer numSections;
|
||||
Integer[] populations = {1};
|
||||
Double dropOutRate = 0.0;
|
||||
boolean poisson = false;
|
||||
boolean exponential = false;
|
||||
double lambda = 1.5;
|
||||
try {
|
||||
System.out.println("\nSimulated sample plates consist of:");
|
||||
System.out.println("* a number of wells");
|
||||
System.out.println(" * separated into one or more sections");
|
||||
System.out.println(" * each of which has a set quantity of cells per well");
|
||||
System.out.println(" * selected from a statistical distribution of distinct cells");
|
||||
System.out.println(" * with a set dropout rate for individual sequences within a cell");
|
||||
System.out.println("\nMaking a sample plate requires a population of distinct cells");
|
||||
System.out.print("Please enter name of an existing cell sample file: ");
|
||||
cellFile = sc.next();
|
||||
System.out.println("\nThe sample plate will be written to a CSV file");
|
||||
System.out.print("Please enter a name for the output file: ");
|
||||
filename = sc.next();
|
||||
System.out.println("\nSelect T-cell frequency distribution function");
|
||||
System.out.println("1) Poisson");
|
||||
System.out.println("2) Gaussian");
|
||||
System.out.println("3) Exponential");
|
||||
System.out.println("(Note: approximate distribution in original paper is exponential, lambda = 0.6)");
|
||||
System.out.println("(lambda value approximated from slope of log-log graph in figure 4c)");
|
||||
System.out.println("(Note: wider distributions are more memory intensive to match)");
|
||||
System.out.print("Enter selection value: ");
|
||||
input = sc.nextInt();
|
||||
switch (input) {
|
||||
case 1 -> poisson = true;
|
||||
case 2 -> {
|
||||
System.out.println("How many distinct T-cells within one standard deviation of peak frequency?");
|
||||
System.out.println("(Note: wider distributions are more memory intensive to match)");
|
||||
stdDev = sc.nextDouble();
|
||||
if (stdDev <= 0.0) {
|
||||
throw new InputMismatchException("Value must be positive.");
|
||||
}
|
||||
}
|
||||
case 3 -> {
|
||||
exponential = true;
|
||||
System.out.print("Please enter lambda value for exponential distribution: ");
|
||||
lambda = sc.nextDouble();
|
||||
if (lambda <= 0.0) {
|
||||
lambda = 0.6;
|
||||
System.out.println("Value must be positive. Defaulting to 0.6.");
|
||||
}
|
||||
}
|
||||
default -> {
|
||||
System.out.println("Invalid input. Defaulting to exponential.");
|
||||
exponential = true;
|
||||
}
|
||||
}
|
||||
System.out.print("\nNumber of wells on plate: ");
|
||||
numWells = sc.nextInt();
|
||||
if(numWells < 1){
|
||||
throw new InputMismatchException("No wells on plate");
|
||||
}
|
||||
//choose whether to make T cell population/well random
|
||||
boolean randomWellPopulations;
|
||||
System.out.println("Randomize number of T cells in each well? (y/n)");
|
||||
String ans = sc.next();
|
||||
Pattern pattern = Pattern.compile("(?:yes|y)", Pattern.CASE_INSENSITIVE);
|
||||
Matcher matcher = pattern.matcher(ans);
|
||||
if(matcher.matches()){
|
||||
randomWellPopulations = true;
|
||||
}
|
||||
else{
|
||||
randomWellPopulations = false;
|
||||
}
|
||||
if(randomWellPopulations) { //if T cell population/well is random
|
||||
numSections = numWells;
|
||||
Integer minPop;
|
||||
Integer maxPop;
|
||||
System.out.print("Please enter minimum number of T cells in a well: ");
|
||||
minPop = sc.nextInt();
|
||||
if(minPop < 1) {
|
||||
throw new InputMismatchException("Minimum well population must be positive");
|
||||
}
|
||||
System.out.println("Please enter maximum number of T cells in a well: ");
|
||||
maxPop = sc.nextInt();
|
||||
if(maxPop < minPop) {
|
||||
throw new InputMismatchException("Max well population must be greater than min well population");
|
||||
}
|
||||
//maximum should be inclusive, so need to add one to max of randomly generated values
|
||||
populations = rand.ints(minPop, maxPop + 1)
|
||||
.limit(numSections)
|
||||
.boxed()
|
||||
.toArray(Integer[]::new);
|
||||
System.out.print("Populations: ");
|
||||
System.out.println(Arrays.toString(populations));
|
||||
}
|
||||
else{ //if T cell population/well is not random
|
||||
System.out.println("\nThe plate can be evenly sectioned to allow different numbers of T cells per well.");
|
||||
System.out.println("How many sections would you like to make (minimum 1)?");
|
||||
numSections = sc.nextInt();
|
||||
if (numSections < 1) {
|
||||
throw new InputMismatchException("Too few sections.");
|
||||
} else if (numSections > numWells) {
|
||||
throw new InputMismatchException("Cannot have more sections than wells.");
|
||||
}
|
||||
int i = 1;
|
||||
populations = new Integer[numSections];
|
||||
while (numSections > 0) {
|
||||
System.out.print("Enter number of T cells per well in section " + i + ": ");
|
||||
populations[i - 1] = sc.nextInt();
|
||||
i++;
|
||||
numSections--;
|
||||
}
|
||||
}
|
||||
System.out.println("\nErrors in amplification can induce a well dropout rate for sequences");
|
||||
System.out.print("Enter well dropout rate (0.0 to 1.0): ");
|
||||
dropOutRate = sc.nextDouble();
|
||||
if(dropOutRate < 0.0 || dropOutRate > 1.0) {
|
||||
throw new InputMismatchException("The well dropout rate must be in the range [0.0, 1.0]");
|
||||
}
|
||||
}catch(InputMismatchException ex){
|
||||
System.out.println(ex);
|
||||
sc.next();
|
||||
}
|
||||
assert cellFile != null;
|
||||
CellSample cells;
|
||||
if (cellFile.equals(BiGpairSEQ.getCellFilename())){
|
||||
cells = BiGpairSEQ.getCellSampleInMemory();
|
||||
}
|
||||
else {
|
||||
System.out.println("Reading Cell Sample file: " + cellFile);
|
||||
CellFileReader cellReader = new CellFileReader(cellFile);
|
||||
cells = cellReader.getCellSample();
|
||||
if(BiGpairSEQ.cacheCells()) {
|
||||
BiGpairSEQ.setCellSampleInMemory(cells, cellFile);
|
||||
}
|
||||
}
|
||||
assert filename != null;
|
||||
Plate samplePlate;
|
||||
PlateFileWriter writer;
|
||||
if(exponential){
|
||||
samplePlate = new Plate(cells, cellFile, numWells, populations, dropOutRate, lambda, true);
|
||||
writer = new PlateFileWriter(filename, samplePlate);
|
||||
}
|
||||
else {
|
||||
if (poisson) {
|
||||
stdDev = Math.sqrt(cells.getCellCount()); //gaussian with square root of elements approximates poisson
|
||||
}
|
||||
samplePlate = new Plate(cells, cellFile, numWells, populations, dropOutRate, stdDev, false);
|
||||
writer = new PlateFileWriter(filename, samplePlate);
|
||||
}
|
||||
System.out.println("Writing Sample Plate to file");
|
||||
writer.writePlateFile();
|
||||
System.out.println("Sample Plate written to file: " + filename);
|
||||
if(BiGpairSEQ.cachePlate()) {
|
||||
BiGpairSEQ.setPlateInMemory(samplePlate, filename);
|
||||
}
|
||||
}
|
||||
|
||||
//Output serialized binary of GraphAndMapData object
|
||||
private static void makeCDR3Graph() {
|
||||
String filename = null;
|
||||
String cellFile = null;
|
||||
String plateFile = null;
|
||||
try {
|
||||
String str = "\nGenerating bipartite weighted graph encoding occupancy overlap data ";
|
||||
str = str.concat("\nrequires a cell sample file and a sample plate file.");
|
||||
System.out.println(str);
|
||||
System.out.print("\nPlease enter name of an existing cell sample file: ");
|
||||
cellFile = sc.next();
|
||||
System.out.print("\nPlease enter name of an existing sample plate file: ");
|
||||
plateFile = sc.next();
|
||||
System.out.println("\nThe graph and occupancy data will be written to a serialized binary file.");
|
||||
System.out.print("Please enter a name for the output file: ");
|
||||
filename = sc.next();
|
||||
} catch (InputMismatchException ex) {
|
||||
System.out.println(ex);
|
||||
sc.next();
|
||||
}
|
||||
|
||||
assert cellFile != null;
|
||||
CellSample cellSample;
|
||||
//check if cells are already in memory
|
||||
if(cellFile.equals(BiGpairSEQ.getCellFilename()) && BiGpairSEQ.getCellSampleInMemory() != null) {
|
||||
cellSample = BiGpairSEQ.getCellSampleInMemory();
|
||||
}
|
||||
else {
|
||||
System.out.println("Reading Cell Sample file: " + cellFile);
|
||||
CellFileReader cellReader = new CellFileReader(cellFile);
|
||||
cellSample = cellReader.getCellSample();
|
||||
if(BiGpairSEQ.cacheCells()) {
|
||||
BiGpairSEQ.setCellSampleInMemory(cellSample, cellFile);
|
||||
}
|
||||
}
|
||||
|
||||
assert plateFile != null;
|
||||
Plate plate;
|
||||
//check if plate is already in memory
|
||||
if(plateFile.equals(BiGpairSEQ.getPlateFilename())){
|
||||
plate = BiGpairSEQ.getPlateInMemory();
|
||||
}
|
||||
else {
|
||||
System.out.println("Reading Sample Plate file: " + plateFile);
|
||||
PlateFileReader plateReader = new PlateFileReader(plateFile);
|
||||
plate = plateReader.getSamplePlate();
|
||||
if(BiGpairSEQ.cachePlate()) {
|
||||
BiGpairSEQ.setPlateInMemory(plate, plateFile);
|
||||
}
|
||||
}
|
||||
if (cellSample.getCells().size() == 0){
|
||||
System.out.println("No cell sample found.");
|
||||
System.out.println("Returning to main menu.");
|
||||
}
|
||||
else if(plate.getWells().size() == 0 || plate.getPopulations().length == 0){
|
||||
System.out.println("No sample plate found.");
|
||||
System.out.println("Returning to main menu.");
|
||||
}
|
||||
else{
|
||||
GraphWithMapData data = Simulator.makeGraph(cellSample, plate, true);
|
||||
assert filename != null;
|
||||
if(BiGpairSEQ.outputBinary()) {
|
||||
GraphDataObjectWriter dataWriter = new GraphDataObjectWriter(filename, data);
|
||||
dataWriter.writeDataToFile();
|
||||
System.out.println("Serialized binary graph/data file written to: " + filename);
|
||||
}
|
||||
if(BiGpairSEQ.outputGraphML()) {
|
||||
GraphMLFileWriter graphMLWriter = new GraphMLFileWriter(filename, data);
|
||||
graphMLWriter.writeGraphToFile();
|
||||
System.out.println("GraphML file written to: " + filename);
|
||||
}
|
||||
if(BiGpairSEQ.cacheGraph()) {
|
||||
BiGpairSEQ.setGraphInMemory(data, filename);
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//Simulate matching and output CSV file of results
|
||||
private static void matchCDR3s() throws IOException {
|
||||
String filename = null;
|
||||
String graphFilename = null;
|
||||
Integer lowThreshold = 0;
|
||||
Integer highThreshold = Integer.MAX_VALUE;
|
||||
Integer maxOccupancyDiff = Integer.MAX_VALUE;
|
||||
Integer minOverlapPercent = 0;
|
||||
try {
|
||||
System.out.println("\nBiGpairSEQ simulation requires an occupancy data and overlap graph file");
|
||||
System.out.println("Please enter name of an existing graph and occupancy data file: ");
|
||||
graphFilename = sc.next();
|
||||
System.out.println("The matching results will be written to a file.");
|
||||
System.out.print("Please enter a name for the output file: ");
|
||||
filename = sc.next();
|
||||
System.out.println("\nWhat is the minimum number of CDR3 alpha/beta overlap wells to attempt matching?");
|
||||
lowThreshold = sc.nextInt();
|
||||
if(lowThreshold < 1){
|
||||
lowThreshold = 1;
|
||||
System.out.println("Value for low occupancy overlap threshold must be positive");
|
||||
System.out.println("Value for low occupancy overlap threshold set to 1");
|
||||
}
|
||||
System.out.println("\nWhat is the maximum number of CDR3 alpha/beta overlap wells to attempt matching?");
|
||||
highThreshold = sc.nextInt();
|
||||
if(highThreshold < lowThreshold) {
|
||||
highThreshold = lowThreshold;
|
||||
System.out.println("Value for high occupancy overlap threshold must be >= low overlap threshold");
|
||||
System.out.println("Value for high occupancy overlap threshold set to " + lowThreshold);
|
||||
}
|
||||
System.out.println("What is the minimum percentage of a sequence's wells in alpha/beta overlap to attempt matching? (0 - 100)");
|
||||
minOverlapPercent = sc.nextInt();
|
||||
if (minOverlapPercent < 0 || minOverlapPercent > 100) {
|
||||
System.out.println("Value outside range. Minimum occupancy overlap percentage set to 0");
|
||||
}
|
||||
System.out.println("\nWhat is the maximum difference in alpha/beta occupancy to attempt matching?");
|
||||
maxOccupancyDiff = sc.nextInt();
|
||||
if (maxOccupancyDiff < 0) {
|
||||
maxOccupancyDiff = 0;
|
||||
System.out.println("Maximum allowable difference in alpha/beta occupancy must be nonnegative");
|
||||
System.out.println("Maximum allowable difference in alpha/beta occupancy set to 0");
|
||||
}
|
||||
} catch (InputMismatchException ex) {
|
||||
System.out.println(ex);
|
||||
sc.next();
|
||||
}
|
||||
assert graphFilename != null;
|
||||
//check if this is the same graph we already have in memory.
|
||||
GraphWithMapData data;
|
||||
if(graphFilename.equals(BiGpairSEQ.getGraphFilename())) {
|
||||
data = BiGpairSEQ.getGraphInMemory();
|
||||
}
|
||||
else {
|
||||
GraphDataObjectReader dataReader = new GraphDataObjectReader(graphFilename, true);
|
||||
data = dataReader.getData();
|
||||
if(BiGpairSEQ.cacheGraph()) {
|
||||
BiGpairSEQ.setGraphInMemory(data, graphFilename);
|
||||
}
|
||||
}
|
||||
//simulate matching
|
||||
MatchingResult results = Simulator.matchCDR3s(data, graphFilename, lowThreshold, highThreshold, maxOccupancyDiff,
|
||||
minOverlapPercent, true);
|
||||
//write results to file
|
||||
assert filename != null;
|
||||
MatchingFileWriter writer = new MatchingFileWriter(filename, results);
|
||||
System.out.println("Writing results to file");
|
||||
writer.writeResultsToFile();
|
||||
System.out.println("Results written to file: " + filename);
|
||||
}
|
||||
|
||||
///////
|
||||
//Rewrite this to fit new matchCDR3 method with file I/O
|
||||
///////
|
||||
// public static void matchCellsCDR1(){
|
||||
// /*
|
||||
// The idea here is that we'll get the CDR3 alpha/beta matches first. Then we'll try to match CDR3s to CDR1s by
|
||||
// looking at the top two matches for each CDR3. If CDR3s in the same cell simply swap CDR1s, we assume a correct
|
||||
// match
|
||||
// */
|
||||
// String filename = null;
|
||||
// String preliminaryResultsFilename = null;
|
||||
// String cellFile = null;
|
||||
// String plateFile = null;
|
||||
// Integer lowThresholdCDR3 = 0;
|
||||
// Integer highThresholdCDR3 = Integer.MAX_VALUE;
|
||||
// Integer maxOccupancyDiffCDR3 = 96; //no filtering if max difference is all wells by default
|
||||
// Integer minOverlapPercentCDR3 = 0; //no filtering if min percentage is zero by default
|
||||
// Integer lowThresholdCDR1 = 0;
|
||||
// Integer highThresholdCDR1 = Integer.MAX_VALUE;
|
||||
// boolean outputCDR3Matches = false;
|
||||
// try {
|
||||
// System.out.println("\nSimulated experiment requires a cell sample file and a sample plate file.");
|
||||
// System.out.print("Please enter name of an existing cell sample file: ");
|
||||
// cellFile = sc.next();
|
||||
// System.out.print("Please enter name of an existing sample plate file: ");
|
||||
// plateFile = sc.next();
|
||||
// System.out.println("The matching results will be written to a file.");
|
||||
// System.out.print("Please enter a name for the output file: ");
|
||||
// filename = sc.next();
|
||||
// System.out.println("What is the minimum number of CDR3 alpha/beta overlap wells to attempt matching?");
|
||||
// lowThresholdCDR3 = sc.nextInt();
|
||||
// if(lowThresholdCDR3 < 1){
|
||||
// throw new InputMismatchException("Minimum value for low threshold is 1");
|
||||
// }
|
||||
// System.out.println("What is the maximum number of CDR3 alpha/beta overlap wells to attempt matching?");
|
||||
// highThresholdCDR3 = sc.nextInt();
|
||||
// System.out.println("What is the maximum difference in CDR3 alpha/beta occupancy to attempt matching?");
|
||||
// maxOccupancyDiffCDR3 = sc.nextInt();
|
||||
// System.out.println("What is the minimum CDR3 overlap percentage to attempt matching? (0 - 100)");
|
||||
// minOverlapPercentCDR3 = sc.nextInt();
|
||||
// if (minOverlapPercentCDR3 < 0 || minOverlapPercentCDR3 > 100) {
|
||||
// throw new InputMismatchException("Value outside range. Minimum percent set to 0");
|
||||
// }
|
||||
// System.out.println("What is the minimum number of CDR3/CDR1 overlap wells to attempt matching?");
|
||||
// lowThresholdCDR1 = sc.nextInt();
|
||||
// if(lowThresholdCDR1 < 1){
|
||||
// throw new InputMismatchException("Minimum value for low threshold is 1");
|
||||
// }
|
||||
// System.out.println("What is the maximum number of CDR3/CDR1 overlap wells to attempt matching?");
|
||||
// highThresholdCDR1 = sc.nextInt();
|
||||
// System.out.println("Matching CDR3s to CDR1s requires first matching CDR3 alpha/betas.");
|
||||
// System.out.println("Output a file for CDR3 alpha/beta match results as well?");
|
||||
// System.out.print("Please enter y/n: ");
|
||||
// String ans = sc.next();
|
||||
// Pattern pattern = Pattern.compile("(?:yes|y)", Pattern.CASE_INSENSITIVE);
|
||||
// Matcher matcher = pattern.matcher(ans);
|
||||
// if(matcher.matches()){
|
||||
// outputCDR3Matches = true;
|
||||
// System.out.println("Please enter filename for CDR3 alpha/beta match results");
|
||||
// preliminaryResultsFilename = sc.next();
|
||||
// System.out.println("CDR3 alpha/beta matches will be output to file");
|
||||
// }
|
||||
// else{
|
||||
// System.out.println("CDR3 alpha/beta matches will not be output to file");
|
||||
// }
|
||||
// } catch (InputMismatchException ex) {
|
||||
// System.out.println(ex);
|
||||
// sc.next();
|
||||
// }
|
||||
// CellFileReader cellReader = new CellFileReader(cellFile);
|
||||
// PlateFileReader plateReader = new PlateFileReader(plateFile);
|
||||
// Plate plate = new Plate(plateReader.getFilename(), plateReader.getWells());
|
||||
// if (cellReader.getCells().size() == 0){
|
||||
// System.out.println("No cell sample found.");
|
||||
// System.out.println("Returning to main menu.");
|
||||
// }
|
||||
// else if(plate.getWells().size() == 0){
|
||||
// System.out.println("No sample plate found.");
|
||||
// System.out.println("Returning to main menu.");
|
||||
//
|
||||
// }
|
||||
// else{
|
||||
// if(highThresholdCDR3 >= plate.getSize()){
|
||||
// highThresholdCDR3 = plate.getSize() - 1;
|
||||
// }
|
||||
// if(highThresholdCDR1 >= plate.getSize()){
|
||||
// highThresholdCDR1 = plate.getSize() - 1;
|
||||
// }
|
||||
// List<Integer[]> cells = cellReader.getCells();
|
||||
// MatchingResult preliminaryResults = Simulator.matchCDR3s(cells, plate, lowThresholdCDR3, highThresholdCDR3,
|
||||
// maxOccupancyDiffCDR3, minOverlapPercentCDR3, true);
|
||||
// MatchingResult[] results = Simulator.matchCDR1s(cells, plate, lowThresholdCDR1,
|
||||
// highThresholdCDR1, preliminaryResults);
|
||||
// MatchingFileWriter writer = new MatchingFileWriter(filename + "_FirstPass", results[0]);
|
||||
// writer.writeResultsToFile();
|
||||
// writer = new MatchingFileWriter(filename + "_SecondPass", results[1]);
|
||||
// writer.writeResultsToFile();
|
||||
// if(outputCDR3Matches){
|
||||
// writer = new MatchingFileWriter(preliminaryResultsFilename, preliminaryResults);
|
||||
// writer.writeResultsToFile();
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
private static void mainOptions(){
|
||||
boolean backToMain = false;
|
||||
while(!backToMain) {
|
||||
System.out.println("\n--------------OPTIONS---------------");
|
||||
System.out.println("1) Turn " + getOnOff(!BiGpairSEQ.cacheCells()) + " cell sample file caching");
|
||||
System.out.println("2) Turn " + getOnOff(!BiGpairSEQ.cachePlate()) + " plate file caching");
|
||||
System.out.println("3) Turn " + getOnOff(!BiGpairSEQ.cacheGraph()) + " graph/data file caching");
|
||||
System.out.println("4) Turn " + getOnOff(!BiGpairSEQ.outputBinary()) + " serialized binary graph output");
|
||||
System.out.println("5) Turn " + getOnOff(!BiGpairSEQ.outputGraphML()) + " GraphML graph output");
|
||||
System.out.println("6) Maximum weight matching algorithm options");
|
||||
System.out.println("0) Return to main menu");
|
||||
try {
|
||||
input = sc.nextInt();
|
||||
switch (input) {
|
||||
case 1 -> BiGpairSEQ.setCacheCells(!BiGpairSEQ.cacheCells());
|
||||
case 2 -> BiGpairSEQ.setCachePlate(!BiGpairSEQ.cachePlate());
|
||||
case 3 -> BiGpairSEQ.setCacheGraph(!BiGpairSEQ.cacheGraph());
|
||||
case 4 -> BiGpairSEQ.setOutputBinary(!BiGpairSEQ.outputBinary());
|
||||
case 5 -> BiGpairSEQ.setOutputGraphML(!BiGpairSEQ.outputGraphML());
|
||||
case 6 -> algorithmOptions();
|
||||
case 0 -> backToMain = true;
|
||||
default -> System.out.println("Invalid input");
|
||||
}
|
||||
} catch (InputMismatchException ex) {
|
||||
System.out.println(ex);
|
||||
sc.next();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper function for printing menu items in mainOptions(). Returns a string based on the value of parameter.
|
||||
*
|
||||
* @param b - a boolean value
|
||||
* @return String "on" if b is true, "off" if b is false
|
||||
*/
|
||||
private static String getOnOff(boolean b) {
|
||||
if (b) { return "on";}
|
||||
else { return "off"; }
|
||||
}
|
||||
|
||||
private static void algorithmOptions(){
|
||||
boolean backToOptions = false;
|
||||
while(!backToOptions) {
|
||||
System.out.println("\n---------ALGORITHM OPTIONS----------");
|
||||
System.out.println("1) Use scaling algorithm by Duan and Su.");
|
||||
System.out.println("2) Use LEDA book algorithm with Fibonacci heap priority queue");
|
||||
System.out.println("3) Use LEDA book algorithm with pairing heap priority queue");
|
||||
System.out.println("0) Return to Options menu");
|
||||
try {
|
||||
input = sc.nextInt();
|
||||
switch (input) {
|
||||
case 1 -> System.out.println("This option is not yet implemented. Choose another.");
|
||||
case 2 -> {
|
||||
BiGpairSEQ.setFibonacciHeap();
|
||||
System.out.println("MWM algorithm set to LEDA with Fibonacci heap");
|
||||
backToOptions = true;
|
||||
}
|
||||
case 3 -> {
|
||||
BiGpairSEQ.setPairingHeap();
|
||||
System.out.println("MWM algorithm set to LEDA with pairing heap");
|
||||
backToOptions = true;
|
||||
}
|
||||
case 0 -> backToOptions = true;
|
||||
default -> System.out.println("Invalid input");
|
||||
}
|
||||
} catch (InputMismatchException ex) {
|
||||
System.out.println(ex);
|
||||
sc.next();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static void acknowledge(){
|
||||
System.out.println("BiGpairSEQ_Sim " + BiGpairSEQ.getVersion());
|
||||
System.out.println();
|
||||
System.out.println("This program simulates BiGpairSEQ, a graph theory based adaptation");
|
||||
System.out.println("of the pairSEQ algorithm for pairing T cell receptor sequences.");
|
||||
System.out.println();
|
||||
System.out.println("For full documentation, view readme.md file distributed with this code");
|
||||
System.out.println("or visit https://gitea.ejsf.synology.me/efischer/BiGpairSEQ.");
|
||||
System.out.println();
|
||||
System.out.println("pairSEQ citation:");
|
||||
System.out.println("Howie, B., Sherwood, A. M., et. al.");
|
||||
System.out.println("High-throughput pairing of T cell receptor alpha and beta sequences.");
|
||||
System.out.println("Sci. Transl. Med. 7, 301ra131 (2015)");
|
||||
System.out.println();
|
||||
System.out.println("BiGpairSEQ_Sim by Eugene Fischer, 2021-2022");
|
||||
}
|
||||
}
|
||||
3
src/main/java/META-INF/MANIFEST.MF
Normal file
3
src/main/java/META-INF/MANIFEST.MF
Normal file
@@ -0,0 +1,3 @@
|
||||
Manifest-Version: 1.0
|
||||
Main-Class: BiGpairSEQ
|
||||
|
||||
@@ -7,13 +7,10 @@ import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.StandardOpenOption;
|
||||
import java.util.List;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
|
||||
public class MatchingFileWriter {
|
||||
|
||||
private String filename;
|
||||
private String sourceFileName;
|
||||
private List<String> comments;
|
||||
private List<String> headers;
|
||||
private List<List<String>> allResults;
|
||||
@@ -23,7 +20,6 @@ public class MatchingFileWriter {
|
||||
filename = filename + ".csv";
|
||||
}
|
||||
this.filename = filename;
|
||||
this.sourceFileName = result.getSourceFileName();
|
||||
this.comments = result.getComments();
|
||||
this.headers = result.getHeaders();
|
||||
this.allResults = result.getAllResults();
|
||||
|
||||
@@ -1,18 +1,41 @@
|
||||
import java.time.Duration;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
public class MatchingResult {
|
||||
private String sourceFile;
|
||||
private List<String> comments;
|
||||
private List<String> headers;
|
||||
private List<List<String>> allResults;
|
||||
private Map<Integer, Integer> matchMap;
|
||||
private Duration time;
|
||||
|
||||
public MatchingResult(String sourceFileName, List<String> comments, List<String> headers, List<List<String>> allResults, Map<Integer, Integer>matchMap, Duration time){
|
||||
this.sourceFile = sourceFileName;
|
||||
this.comments = comments;
|
||||
private final Map<String, String> metadata;
|
||||
private final List<String> comments;
|
||||
private final List<String> headers;
|
||||
private final List<List<String>> allResults;
|
||||
private final Map<Integer, Integer> matchMap;
|
||||
private final Duration time;
|
||||
|
||||
public MatchingResult(Map<String, String> metadata, List<String> headers,
|
||||
List<List<String>> allResults, Map<Integer, Integer>matchMap, Duration time){
|
||||
/*
|
||||
* POSSIBLE KEYS FOR METADATA MAP ARE:
|
||||
* sample plate filename *
|
||||
* graph filename *
|
||||
* well populations *
|
||||
* total alphas found *
|
||||
* total betas found *
|
||||
* high overlap threshold *
|
||||
* low overlap threshold *
|
||||
* maximum occupancy difference *
|
||||
* minimum overlap percent *
|
||||
* pairing attempt rate *
|
||||
* correct pairing count *
|
||||
* incorrect pairing count *
|
||||
* pairing error rate *
|
||||
* simulation time (seconds)
|
||||
*/
|
||||
this.metadata = metadata;
|
||||
this.comments = new ArrayList<>();
|
||||
for (String key : metadata.keySet()) {
|
||||
comments.add(key +": " + metadata.get(key));
|
||||
}
|
||||
this.headers = headers;
|
||||
this.allResults = allResults;
|
||||
this.matchMap = matchMap;
|
||||
@@ -20,6 +43,8 @@ public class MatchingResult {
|
||||
|
||||
}
|
||||
|
||||
public Map<String, String> getMetadata() {return metadata;}
|
||||
|
||||
public List<String> getComments() {
|
||||
return comments;
|
||||
}
|
||||
@@ -40,7 +65,48 @@ public class MatchingResult {
|
||||
return time;
|
||||
}
|
||||
|
||||
public String getSourceFileName() {
|
||||
return sourceFile;
|
||||
public String getPlateFilename() {
|
||||
return metadata.get("sample plate filename");
|
||||
}
|
||||
|
||||
public String getGraphFilename() {
|
||||
return metadata.get("graph filename");
|
||||
}
|
||||
|
||||
public Integer[] getWellPopulations() {
|
||||
List<Integer> wellPopulations = new ArrayList<>();
|
||||
String popString = metadata.get("well populations");
|
||||
for (String p : popString.split(", ")) {
|
||||
wellPopulations.add(Integer.parseInt(p));
|
||||
}
|
||||
Integer[] popArray = new Integer[wellPopulations.size()];
|
||||
return wellPopulations.toArray(popArray);
|
||||
}
|
||||
|
||||
public Integer getAlphaCount() {
|
||||
return Integer.parseInt(metadata.get("total alpha count"));
|
||||
}
|
||||
|
||||
public Integer getBetaCount() {
|
||||
return Integer.parseInt(metadata.get("total beta count"));
|
||||
}
|
||||
|
||||
public Integer getHighOverlapThreshold() { return Integer.parseInt(metadata.get("high overlap threshold"));}
|
||||
|
||||
public Integer getLowOverlapThreshold() { return Integer.parseInt(metadata.get("low overlap threshold"));}
|
||||
|
||||
public Integer getMaxOccupancyDifference() { return Integer.parseInt(metadata.get("maximum occupancy difference"));}
|
||||
|
||||
public Integer getMinOverlapPercent() { return Integer.parseInt(metadata.get("minimum overlap percent"));}
|
||||
|
||||
public Double getPairingAttemptRate() { return Double.parseDouble(metadata.get("pairing attempt rate"));}
|
||||
|
||||
public Integer getCorrectPairingCount() { return Integer.parseInt(metadata.get("correct pairing count"));}
|
||||
|
||||
public Integer getIncorrectPairingCount() { return Integer.parseInt(metadata.get("incorrect pairing count"));}
|
||||
|
||||
public Double getPairingErrorRate() { return Double.parseDouble(metadata.get("pairing error rate"));}
|
||||
|
||||
public String getSimulationTime() { return metadata.get("simulation time (seconds)"); }
|
||||
|
||||
}
|
||||
|
||||
@@ -1,31 +1,55 @@
|
||||
import java.util.*;
|
||||
|
||||
|
||||
/*
|
||||
TODO: Implement exponential distribution using inversion method - DONE
|
||||
TODO: Implement discrete frequency distributions using Vose's Alias Method
|
||||
*/
|
||||
|
||||
import java.util.*;
|
||||
|
||||
public class Plate {
|
||||
private CellSample cells;
|
||||
private String sourceFile;
|
||||
private String filename;
|
||||
private List<List<Integer[]>> wells;
|
||||
private Random rand = new Random();
|
||||
private final Random rand = BiGpairSEQ.getRand();
|
||||
private int size;
|
||||
private double error;
|
||||
private Integer[] concentrations;
|
||||
private Integer[] populations;
|
||||
private double stdDev;
|
||||
private double lambda;
|
||||
boolean exponential = false;
|
||||
|
||||
public Plate(CellSample cells, String cellFilename, int numWells, Integer[] populations,
|
||||
double dropoutRate, double stdDev_or_lambda, boolean exponential){
|
||||
this.cells = cells;
|
||||
this.sourceFile = cellFilename;
|
||||
this.size = numWells;
|
||||
this.wells = new ArrayList<>();
|
||||
this.error = dropoutRate;
|
||||
this.populations = populations;
|
||||
this.exponential = exponential;
|
||||
if (this.exponential) {
|
||||
this.lambda = stdDev_or_lambda;
|
||||
fillWellsExponential(cells.getCells(), this.lambda);
|
||||
}
|
||||
else {
|
||||
this.stdDev = stdDev_or_lambda;
|
||||
fillWells(cells.getCells(), this.stdDev);
|
||||
}
|
||||
}
|
||||
|
||||
public Plate(int size, double error, Integer[] concentrations) {
|
||||
|
||||
public Plate(int size, double error, Integer[] populations) {
|
||||
this.size = size;
|
||||
this.error = error;
|
||||
this.concentrations = concentrations;
|
||||
this.populations = populations;
|
||||
wells = new ArrayList<>();
|
||||
}
|
||||
|
||||
public Plate(String sourceFileName, List<List<Integer[]>> wells) {
|
||||
this.sourceFile = sourceFileName;
|
||||
//constructor for returning a Plate from a PlateFileReader
|
||||
public Plate(String filename, List<List<Integer[]>> wells) {
|
||||
this.filename = filename;
|
||||
this.wells = wells;
|
||||
this.size = wells.size();
|
||||
|
||||
@@ -35,37 +59,28 @@ public class Plate {
|
||||
concentrations.add(w.size());
|
||||
}
|
||||
}
|
||||
this.concentrations = new Integer[concentrations.size()];
|
||||
for (int i = 0; i < this.concentrations.length; i++) {
|
||||
this.concentrations[i] = concentrations.get(i);
|
||||
this.populations = new Integer[concentrations.size()];
|
||||
for (int i = 0; i < this.populations.length; i++) {
|
||||
this.populations[i] = concentrations.get(i);
|
||||
}
|
||||
}
|
||||
|
||||
public void fillWellsExponential(String sourceFileName, List<Integer[]> cells, double lambda){
|
||||
private void fillWellsExponential(List<Integer[]> cells, double lambda){
|
||||
this.lambda = lambda;
|
||||
exponential = true;
|
||||
sourceFile = sourceFileName;
|
||||
int numSections = concentrations.length;
|
||||
int numSections = populations.length;
|
||||
int section = 0;
|
||||
double m;
|
||||
int n;
|
||||
int test=0;
|
||||
while (section < numSections){
|
||||
for (int i = 0; i < (size / numSections); i++) {
|
||||
List<Integer[]> well = new ArrayList<>();
|
||||
for (int j = 0; j < concentrations[section]; j++) {
|
||||
for (int j = 0; j < populations[section]; j++) {
|
||||
do {
|
||||
//inverse transform sampling: for random number u in [0,1), x = log(1-u) / (-lambda)
|
||||
m = (Math.log10((1 - rand.nextDouble()))/(-lambda)) * Math.sqrt(cells.size());
|
||||
} while (m >= cells.size() || m < 0);
|
||||
n = (int) Math.floor(m);
|
||||
//n = Equations.getRandomNumber(0, cells.size());
|
||||
// was testing generating the cell sample file with exponential dist, then sampling flat here
|
||||
//that would be more realistic
|
||||
//But would mess up other things in the simulation with how I've coded it.
|
||||
if(n > test){
|
||||
test = n;
|
||||
}
|
||||
Integer[] cellToAdd = cells.get(n).clone();
|
||||
for(int k = 0; k < cellToAdd.length; k++){
|
||||
if(Math.abs(rand.nextDouble()) < error){//error applied to each seqeunce
|
||||
@@ -78,20 +93,18 @@ public class Plate {
|
||||
}
|
||||
section++;
|
||||
}
|
||||
System.out.println("Highest index: " +test);
|
||||
}
|
||||
|
||||
public void fillWells(String sourceFileName, List<Integer[]> cells, double stdDev) {
|
||||
private void fillWells( List<Integer[]> cells, double stdDev) {
|
||||
this.stdDev = stdDev;
|
||||
sourceFile = sourceFileName;
|
||||
int numSections = concentrations.length;
|
||||
int numSections = populations.length;
|
||||
int section = 0;
|
||||
double m;
|
||||
int n;
|
||||
while (section < numSections){
|
||||
for (int i = 0; i < (size / numSections); i++) {
|
||||
List<Integer[]> well = new ArrayList<>();
|
||||
for (int j = 0; j < concentrations[section]; j++) {
|
||||
for (int j = 0; j < populations[section]; j++) {
|
||||
do {
|
||||
m = (rand.nextGaussian() * stdDev) + (cells.size() / 2);
|
||||
} while (m >= cells.size() || m < 0);
|
||||
@@ -110,8 +123,8 @@ public class Plate {
|
||||
}
|
||||
}
|
||||
|
||||
public Integer[] getConcentrations(){
|
||||
return concentrations;
|
||||
public Integer[] getPopulations(){
|
||||
return populations;
|
||||
}
|
||||
|
||||
public int getSize(){
|
||||
@@ -166,4 +179,6 @@ public class Plate {
|
||||
public String getSourceFileName() {
|
||||
return sourceFile;
|
||||
}
|
||||
|
||||
public String getFilename() { return filename; }
|
||||
}
|
||||
|
||||
@@ -56,11 +56,8 @@ public class PlateFileReader {
|
||||
|
||||
}
|
||||
|
||||
public List<List<Integer[]>> getWells() {
|
||||
return wells;
|
||||
public Plate getSamplePlate() {
|
||||
return new Plate(filename, wells);
|
||||
}
|
||||
|
||||
public String getFilename() {
|
||||
return filename;
|
||||
}
|
||||
}
|
||||
@@ -7,7 +7,6 @@ import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.StandardOpenOption;
|
||||
import java.util.*;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
public class PlateFileWriter {
|
||||
private int size;
|
||||
@@ -17,8 +16,7 @@ public class PlateFileWriter {
|
||||
private Double error;
|
||||
private String filename;
|
||||
private String sourceFileName;
|
||||
private String[] headers;
|
||||
private List<Integer> concentrations;
|
||||
private Integer[] populations;
|
||||
private boolean isExponential = false;
|
||||
|
||||
public PlateFileWriter(String filename, Plate plate) {
|
||||
@@ -37,8 +35,8 @@ public class PlateFileWriter {
|
||||
}
|
||||
this.error = plate.getError();
|
||||
this.wells = plate.getWells();
|
||||
this.concentrations = Arrays.asList(plate.getConcentrations());
|
||||
concentrations.sort(Comparator.reverseOrder());
|
||||
this.populations = plate.getPopulations();
|
||||
Arrays.sort(populations);
|
||||
}
|
||||
|
||||
public void writePlateFile(){
|
||||
@@ -59,28 +57,32 @@ public class PlateFileWriter {
|
||||
}
|
||||
}
|
||||
|
||||
//this took forever
|
||||
List<List<String>> rows = new ArrayList<>();
|
||||
List<String> tmp = new ArrayList<>();
|
||||
for(int i = 0; i < wellsAsStrings.size(); i++){//List<Integer[]> w: wells){
|
||||
tmp.add("well " + (i+1));
|
||||
}
|
||||
rows.add(tmp);
|
||||
for(int row = 0; row < maxLength; row++){
|
||||
tmp = new ArrayList<>();
|
||||
for(List<String> c: wellsAsStrings){
|
||||
tmp.add(c.get(row));
|
||||
}
|
||||
rows.add(tmp);
|
||||
}
|
||||
//build string of well concentrations
|
||||
StringBuilder concen = new StringBuilder();
|
||||
for(Integer i: concentrations){
|
||||
concen.append(i.toString());
|
||||
concen.append(" ");
|
||||
}
|
||||
String concenString = concen.toString();
|
||||
// //this took forever and I don't use it
|
||||
// //if I wanted to use it, I'd replace printer.printRecords(wellsAsStrings) with printer.printRecords(rows)
|
||||
// List<List<String>> rows = new ArrayList<>();
|
||||
// List<String> tmp = new ArrayList<>();
|
||||
// for(int i = 0; i < wellsAsStrings.size(); i++){//List<Integer[]> w: wells){
|
||||
// tmp.add("well " + (i+1));
|
||||
// }
|
||||
// rows.add(tmp);
|
||||
// for(int row = 0; row < maxLength; row++){
|
||||
// tmp = new ArrayList<>();
|
||||
// for(List<String> c: wellsAsStrings){
|
||||
// tmp.add(c.get(row));
|
||||
// }
|
||||
// rows.add(tmp);
|
||||
// }
|
||||
|
||||
//make string out of populations array
|
||||
StringBuilder populationsStringBuilder = new StringBuilder();
|
||||
populationsStringBuilder.append(populations[0].toString());
|
||||
for(int i = 1; i < populations.length; i++){
|
||||
populationsStringBuilder.append(", ");
|
||||
populationsStringBuilder.append(populations[i].toString());
|
||||
}
|
||||
String wellPopulationsString = populationsStringBuilder.toString();
|
||||
|
||||
//set CSV format
|
||||
CSVFormat plateFileFormat = CSVFormat.Builder.create()
|
||||
.setCommentMarker('#')
|
||||
.build();
|
||||
@@ -92,7 +94,7 @@ public class PlateFileWriter {
|
||||
printer.printComment("Each row represents one well on the plate.");
|
||||
printer.printComment("Plate size: " + size);
|
||||
printer.printComment("Error rate: " + error);
|
||||
printer.printComment("Concentrations: " + concenString);
|
||||
printer.printComment("Well populations: " + wellPopulationsString);
|
||||
if(isExponential){
|
||||
printer.printComment("Lambda: " + lambda);
|
||||
}
|
||||
|
||||
@@ -3,6 +3,7 @@ import org.jgrapht.alg.matching.MaximumWeightBipartiteMatching;
|
||||
import org.jgrapht.generate.SimpleWeightedBipartiteGraphMatrixGenerator;
|
||||
import org.jgrapht.graph.DefaultWeightedEdge;
|
||||
import org.jgrapht.graph.SimpleWeightedGraph;
|
||||
import org.jheaps.tree.FibonacciHeap;
|
||||
import org.jheaps.tree.PairingHeap;
|
||||
|
||||
import java.math.BigDecimal;
|
||||
@@ -13,42 +14,22 @@ import java.time.Duration;
|
||||
import java.util.*;
|
||||
import java.util.stream.IntStream;
|
||||
|
||||
import static java.lang.Float.*;
|
||||
|
||||
//NOTE: "sequence" in method and variable names refers to a peptide sequence from a simulated T cell
|
||||
public class Simulator {
|
||||
public class Simulator implements GraphModificationFunctions {
|
||||
private static final int cdr3AlphaIndex = 0;
|
||||
private static final int cdr3BetaIndex = 1;
|
||||
private static final int cdr1AlphaIndex = 2;
|
||||
private static final int cdr1BetaIndex = 3;
|
||||
|
||||
public static CellSample generateCellSample(Integer numDistinctCells, Integer cdr1Freq) {
|
||||
//In real T cells, CDR1s have about one third the diversity of CDR3s
|
||||
List<Integer> numbersCDR3 = new ArrayList<>();
|
||||
List<Integer> numbersCDR1 = new ArrayList<>();
|
||||
Integer numDistCDR3s = 2 * numDistinctCells + 1;
|
||||
IntStream.range(1, numDistCDR3s + 1).forEach(i -> numbersCDR3.add(i));
|
||||
IntStream.range(numDistCDR3s + 1, numDistCDR3s + 1 + (numDistCDR3s / cdr1Freq) + 1).forEach(i -> numbersCDR1.add(i));
|
||||
Collections.shuffle(numbersCDR3);
|
||||
Collections.shuffle(numbersCDR1);
|
||||
|
||||
//Each cell represented by 4 values
|
||||
//two CDR3s, and two CDR1s. First two values are CDR3s (alpha, beta), second two are CDR1s (alpha, beta)
|
||||
List<Integer[]> distinctCells = new ArrayList<>();
|
||||
for(int i = 0; i < numbersCDR3.size() - 1; i = i + 2){
|
||||
Integer tmpCDR3a = numbersCDR3.get(i);
|
||||
Integer tmpCDR3b = numbersCDR3.get(i+1);
|
||||
Integer tmpCDR1a = numbersCDR1.get(i % numbersCDR1.size());
|
||||
Integer tmpCDR1b = numbersCDR1.get((i+1) % numbersCDR1.size());
|
||||
Integer[] tmp = {tmpCDR3a, tmpCDR3b, tmpCDR1a, tmpCDR1b};
|
||||
distinctCells.add(tmp);
|
||||
}
|
||||
return new CellSample(distinctCells, cdr1Freq);
|
||||
}
|
||||
|
||||
//Make the graph needed for matching CDR3s
|
||||
public static GraphWithMapData makeGraph(List<Integer[]> distinctCells, Plate samplePlate, boolean verbose) {
|
||||
public static GraphWithMapData makeGraph(CellSample cellSample, Plate samplePlate, boolean verbose) {
|
||||
Instant start = Instant.now();
|
||||
List<Integer[]> distinctCells = cellSample.getCells();
|
||||
int[] alphaIndex = {cdr3AlphaIndex};
|
||||
int[] betaIndex = {cdr3BetaIndex};
|
||||
|
||||
int numWells = samplePlate.getSize();
|
||||
|
||||
if(verbose){System.out.println("Making cell maps");}
|
||||
@@ -63,15 +44,11 @@ public class Simulator {
|
||||
if(verbose){System.out.println("All alphas count: " + alphaCount);}
|
||||
int betaCount = allBetas.size();
|
||||
if(verbose){System.out.println("All betas count: " + betaCount);}
|
||||
|
||||
if(verbose){System.out.println("Well maps made");}
|
||||
|
||||
//Remove saturating-occupancy sequences because they have no signal value.
|
||||
//Remove sequences with total occupancy below minimum pair overlap threshold
|
||||
if(verbose){System.out.println("Removing sequences present in all wells.");}
|
||||
//if(verbose){System.out.println("Removing sequences with occupancy below the minimum overlap threshold");}
|
||||
filterByOccupancyThreshold(allAlphas, 1, numWells - 1);
|
||||
filterByOccupancyThreshold(allBetas, 1, numWells - 1);
|
||||
if(verbose){System.out.println("Removing singleton sequences and sequences present in all wells.");}
|
||||
filterByOccupancyThresholds(allAlphas, 2, numWells - 1);
|
||||
filterByOccupancyThresholds(allBetas, 2, numWells - 1);
|
||||
if(verbose){System.out.println("Sequences removed");}
|
||||
int pairableAlphaCount = allAlphas.size();
|
||||
if(verbose){System.out.println("Remaining alphas count: " + pairableAlphaCount);}
|
||||
@@ -131,10 +108,15 @@ public class Simulator {
|
||||
|
||||
Instant stop = Instant.now();
|
||||
Duration time = Duration.between(start, stop);
|
||||
//return GraphWithMapData object
|
||||
return new GraphWithMapData(graph, numWells, samplePlate.getConcentrations(), alphaCount, betaCount,
|
||||
|
||||
//create GraphWithMapData object
|
||||
GraphWithMapData output = new GraphWithMapData(graph, numWells, samplePlate.getPopulations(), alphaCount, betaCount,
|
||||
distCellsMapAlphaKey, plateVtoAMap, plateVtoBMap, plateAtoVMap,
|
||||
plateBtoVMap, alphaWellCounts, betaWellCounts, time);
|
||||
//Set source file name in graph to name of sample plate
|
||||
output.setSourceFilename(samplePlate.getFilename());
|
||||
//return GraphWithMapData object
|
||||
return output;
|
||||
}
|
||||
|
||||
//match CDR3s.
|
||||
@@ -142,6 +124,8 @@ public class Simulator {
|
||||
Integer highThreshold, Integer maxOccupancyDifference,
|
||||
Integer minOverlapPercent, boolean verbose) {
|
||||
Instant start = Instant.now();
|
||||
List<Integer[]> removedEdges = new ArrayList<>();
|
||||
boolean saveEdges = BiGpairSEQ.cacheGraph();
|
||||
int numWells = data.getNumWells();
|
||||
Integer alphaCount = data.getAlphaCount();
|
||||
Integer betaCount = data.getBetaCount();
|
||||
@@ -152,33 +136,51 @@ public class Simulator {
|
||||
Map<Integer, Integer> betaWellCounts = data.getBetaWellCounts();
|
||||
SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph = data.getGraph();
|
||||
|
||||
//remove weights outside given overlap thresholds
|
||||
//remove edges with weights outside given overlap thresholds, add those to removed edge list
|
||||
if(verbose){System.out.println("Eliminating edges with weights outside overlap threshold values");}
|
||||
filterByOccupancyThreshold(graph, lowThreshold, highThreshold);
|
||||
if(verbose){System.out.println("Over- and under-weight edges set to 0.0");}
|
||||
removedEdges.addAll(GraphModificationFunctions.filterByOverlapThresholds(graph, lowThreshold, highThreshold, saveEdges));
|
||||
if(verbose){System.out.println("Over- and under-weight edges removed");}
|
||||
|
||||
//Filter by overlap size
|
||||
//remove edges between vertices with too small an overlap size, add those to removed edge list
|
||||
if(verbose){System.out.println("Eliminating edges with weights less than " + minOverlapPercent.toString() +
|
||||
" percent of vertex occupancy value.");}
|
||||
filterByOverlapSize(graph, alphaWellCounts, betaWellCounts, plateVtoAMap, plateVtoBMap, minOverlapPercent);
|
||||
if(verbose){System.out.println("Edges with weights too far below vertex occupancy values set to 0.0");}
|
||||
removedEdges.addAll(GraphModificationFunctions.filterByOverlapPercent(graph, alphaWellCounts, betaWellCounts,
|
||||
plateVtoAMap, plateVtoBMap, minOverlapPercent, saveEdges));
|
||||
if(verbose){System.out.println("Edges with weights too far below a vertex occupancy value removed");}
|
||||
|
||||
//Filter by relative occupancy
|
||||
if(verbose){System.out.println("Eliminating edges between vertices with occupancy difference > "
|
||||
+ maxOccupancyDifference);}
|
||||
filterByRelativeOccupancy(graph, alphaWellCounts, betaWellCounts, plateVtoAMap, plateVtoBMap,
|
||||
maxOccupancyDifference);
|
||||
removedEdges.addAll(GraphModificationFunctions.filterByRelativeOccupancy(graph, alphaWellCounts, betaWellCounts,
|
||||
plateVtoAMap, plateVtoBMap, maxOccupancyDifference, saveEdges));
|
||||
if(verbose){System.out.println("Edges between vertices of with excessively different occupancy values " +
|
||||
"set to 0.0");}
|
||||
"removed");}
|
||||
|
||||
//Find Maximum Weighted Matching
|
||||
//using jheaps library class PairingHeap for improved efficiency
|
||||
if(verbose){System.out.println("Finding maximum weighted matching");}
|
||||
//Attempting to use addressable heap to improve performance
|
||||
MaximumWeightBipartiteMatching maxWeightMatching =
|
||||
new MaximumWeightBipartiteMatching(graph,
|
||||
MaximumWeightBipartiteMatching maxWeightMatching;
|
||||
//Use correct heap type for priority queue
|
||||
String heapType = BiGpairSEQ.getPriorityQueueHeapType();
|
||||
switch (heapType) {
|
||||
case "PAIRING" -> {
|
||||
maxWeightMatching = new MaximumWeightBipartiteMatching(graph,
|
||||
plateVtoAMap.keySet(),
|
||||
plateVtoBMap.keySet(),
|
||||
i -> new PairingHeap(Comparator.naturalOrder()));
|
||||
}
|
||||
case "FIBONACCI" -> {
|
||||
maxWeightMatching = new MaximumWeightBipartiteMatching(graph,
|
||||
plateVtoAMap.keySet(),
|
||||
plateVtoBMap.keySet(),
|
||||
i -> new FibonacciHeap(Comparator.naturalOrder()));
|
||||
}
|
||||
default -> {
|
||||
maxWeightMatching = new MaximumWeightBipartiteMatching(graph,
|
||||
plateVtoAMap.keySet(),
|
||||
plateVtoBMap.keySet());
|
||||
}
|
||||
}
|
||||
//get the matching
|
||||
MatchingAlgorithm.Matching<String, DefaultWeightedEdge> graphMatching = maxWeightMatching.getMatching();
|
||||
if(verbose){System.out.println("Matching completed");}
|
||||
Instant stop = Instant.now();
|
||||
@@ -233,351 +235,385 @@ public class Simulator {
|
||||
allResults.add(result);
|
||||
}
|
||||
|
||||
//Metadate comments for CSV file
|
||||
//Metadata comments for CSV file
|
||||
String algoType = "LEDA book with heap: " + heapType;
|
||||
int min = Math.min(alphaCount, betaCount);
|
||||
//rate of attempted matching
|
||||
double attemptRate = (double) (trueCount + falseCount) / min;
|
||||
BigDecimal attemptRateTrunc = new BigDecimal(attemptRate, mc);
|
||||
//rate of pairing error
|
||||
double pairingErrorRate = (double) falseCount / (trueCount + falseCount);
|
||||
BigDecimal pairingErrorRateTrunc = new BigDecimal(pairingErrorRate, mc);
|
||||
//get list of well concentrations
|
||||
List<Integer> wellConcentrations = Arrays.asList(data.getWellConcentrations());
|
||||
//make string out of concentrations list
|
||||
StringBuilder concentrationStringBuilder = new StringBuilder();
|
||||
for(Integer i: wellConcentrations){
|
||||
concentrationStringBuilder.append(i.toString());
|
||||
concentrationStringBuilder.append(" ");
|
||||
BigDecimal pairingErrorRateTrunc;
|
||||
if(Double.isFinite(pairingErrorRate)) {
|
||||
pairingErrorRateTrunc = new BigDecimal(pairingErrorRate, mc);
|
||||
}
|
||||
String concentrationString = concentrationStringBuilder.toString();
|
||||
|
||||
List<String> comments = new ArrayList<>();
|
||||
comments.add("Source Sample Plate filename: " + data.getSourceFilename());
|
||||
comments.add("Source Graph and Data filename: " + dataFilename);
|
||||
comments.add("T cell counts in sample plate wells: " + concentrationString);
|
||||
comments.add("Total alphas found: " + alphaCount);
|
||||
comments.add("Total betas found: " + betaCount);
|
||||
comments.add("High overlap threshold: " + highThreshold);
|
||||
comments.add("Low overlap threshold: " + lowThreshold);
|
||||
comments.add("Minimum overlap percent: " + minOverlapPercent);
|
||||
comments.add("Maximum occupancy difference: " + maxOccupancyDifference);
|
||||
comments.add("Pairing attempt rate: " + attemptRateTrunc);
|
||||
comments.add("Correct pairings: " + trueCount);
|
||||
comments.add("Incorrect pairings: " + falseCount);
|
||||
comments.add("Pairing error rate: " + pairingErrorRateTrunc);
|
||||
else{
|
||||
pairingErrorRateTrunc = new BigDecimal(-1, mc);
|
||||
}
|
||||
//get list of well populations
|
||||
Integer[] wellPopulations = data.getWellPopulations();
|
||||
//make string out of populations list
|
||||
StringBuilder populationsStringBuilder = new StringBuilder();
|
||||
populationsStringBuilder.append(wellPopulations[0].toString());
|
||||
for(int i = 1; i < wellPopulations.length; i++){
|
||||
populationsStringBuilder.append(", ");
|
||||
populationsStringBuilder.append(wellPopulations[i].toString());
|
||||
}
|
||||
String wellPopulationsString = populationsStringBuilder.toString();
|
||||
//total simulation time
|
||||
Duration time = Duration.between(start, stop);
|
||||
time = time.plus(data.getTime());
|
||||
comments.add("Simulation time: " + nf.format(time.toSeconds()) + " seconds");
|
||||
|
||||
Map<String, String> metadata = new LinkedHashMap<>();
|
||||
metadata.put("sample plate filename", data.getSourceFilename());
|
||||
metadata.put("graph filename", dataFilename);
|
||||
metadata.put("algorithm type", algoType);
|
||||
metadata.put("well populations", wellPopulationsString);
|
||||
metadata.put("total alphas found", alphaCount.toString());
|
||||
metadata.put("total betas found", betaCount.toString());
|
||||
metadata.put("high overlap threshold", highThreshold.toString());
|
||||
metadata.put("low overlap threshold", lowThreshold.toString());
|
||||
metadata.put("minimum overlap percent", minOverlapPercent.toString());
|
||||
metadata.put("maximum occupancy difference", maxOccupancyDifference.toString());
|
||||
metadata.put("pairing attempt rate", attemptRateTrunc.toString());
|
||||
metadata.put("correct pairing count", Integer.toString(trueCount));
|
||||
metadata.put("incorrect pairing count", Integer.toString(falseCount));
|
||||
metadata.put("pairing error rate", pairingErrorRateTrunc.toString());
|
||||
metadata.put("simulation time (seconds)", nf.format(time.toSeconds()));
|
||||
//create MatchingResult object
|
||||
MatchingResult output = new MatchingResult(metadata, header, allResults, matchMap, time);
|
||||
if(verbose){
|
||||
for(String s: comments){
|
||||
for(String s: output.getComments()){
|
||||
System.out.println(s);
|
||||
}
|
||||
}
|
||||
return new MatchingResult(data.getSourceFilename(), comments, header, allResults, matchMap, time);
|
||||
}
|
||||
|
||||
//Simulated matching of CDR1s to CDR3s. Requires MatchingResult from prior run of matchCDR3s.
|
||||
public static MatchingResult[] matchCDR1s(List<Integer[]> distinctCells,
|
||||
Plate samplePlate, Integer lowThreshold,
|
||||
Integer highThreshold, MatchingResult priorResult){
|
||||
Instant start = Instant.now();
|
||||
Duration previousTime = priorResult.getTime();
|
||||
Map<Integer, Integer> previousMatches = priorResult.getMatchMap();
|
||||
int numWells = samplePlate.getSize();
|
||||
int[] cdr3Indices = {cdr3AlphaIndex, cdr3BetaIndex};
|
||||
int[] cdr1Indices = {cdr1AlphaIndex, cdr1BetaIndex};
|
||||
|
||||
System.out.println("Making previous match maps");
|
||||
Map<Integer, Integer> cdr3AtoBMap = previousMatches;
|
||||
Map<Integer, Integer> cdr3BtoAMap = invertVertexMap(cdr3AtoBMap);
|
||||
System.out.println("Previous match maps made");
|
||||
|
||||
System.out.println("Making cell maps");
|
||||
Map<Integer, Integer> alphaCDR3toCDR1Map = makeSequenceToSequenceMap(distinctCells, cdr3AlphaIndex, cdr1AlphaIndex);
|
||||
Map<Integer, Integer> betaCDR3toCDR1Map = makeSequenceToSequenceMap(distinctCells, cdr3BetaIndex, cdr1BetaIndex);
|
||||
System.out.println("Cell maps made");
|
||||
|
||||
System.out.println("Making well maps");
|
||||
Map<Integer, Integer> allCDR3s = samplePlate.assayWellsSequenceS(cdr3Indices);
|
||||
Map<Integer, Integer> allCDR1s = samplePlate.assayWellsSequenceS(cdr1Indices);
|
||||
int CDR3Count = allCDR3s.size();
|
||||
System.out.println("all CDR3s count: " + CDR3Count);
|
||||
int CDR1Count = allCDR1s.size();
|
||||
System.out.println("all CDR1s count: " + CDR1Count);
|
||||
System.out.println("Well maps made");
|
||||
|
||||
System.out.println("Removing unpaired CDR3s from well maps");
|
||||
List<Integer> unpairedCDR3s = new ArrayList<>();
|
||||
for(Integer i: allCDR3s.keySet()){
|
||||
if(!(cdr3AtoBMap.containsKey(i) || cdr3BtoAMap.containsKey(i))){
|
||||
unpairedCDR3s.add(i);
|
||||
}
|
||||
if(saveEdges) {
|
||||
//put the removed edges back on the graph
|
||||
System.out.println("Restoring removed edges to graph.");
|
||||
GraphModificationFunctions.addRemovedEdges(graph, removedEdges);
|
||||
}
|
||||
for(Integer i: unpairedCDR3s){
|
||||
allCDR3s.remove(i);
|
||||
}
|
||||
System.out.println("Unpaired CDR3s removed.");
|
||||
System.out.println("Remaining CDR3 count: " + allCDR3s.size());
|
||||
|
||||
System.out.println("Removing below-minimum-overlap-threshold and saturating-occupancy CDR1s");
|
||||
filterByOccupancyThreshold(allCDR1s, lowThreshold, numWells - 1);
|
||||
System.out.println("CDR1s removed.");
|
||||
System.out.println("Remaining CDR1 count: " + allCDR1s.size());
|
||||
|
||||
System.out.println("Making vertex maps");
|
||||
|
||||
//For the SimpleWeightedBipartiteGraphMatrixGenerator, all vertices must have
|
||||
// distinct numbers associated with them. Since I'm using a 2D array, that means
|
||||
// distinct indices between the rows and columns. vertexStartValue lets me track where I switch
|
||||
// from numbering rows to columns, so I can assign unique numbers to every vertex, and then
|
||||
// subtract the vertexStartValue from CDR1s to use their vertex labels as array indices
|
||||
Integer vertexStartValue = 0;
|
||||
//keys are sequential integer vertices, values are CDR3s
|
||||
Map<Integer, Integer> plateVtoCDR3Map = makeVertexToSequenceMap(allCDR3s, vertexStartValue);
|
||||
//New start value for vertex to CDR1 map should be one more than final vertex value in CDR3 map
|
||||
vertexStartValue += plateVtoCDR3Map.size();
|
||||
//keys are sequential integers vertices, values are CDR1s
|
||||
Map<Integer, Integer> plateVtoCDR1Map = makeVertexToSequenceMap(allCDR1s, vertexStartValue);
|
||||
//keys are CDR3s, values are sequential integer vertices from previous map
|
||||
Map<Integer, Integer> plateCDR3toVMap = invertVertexMap(plateVtoCDR3Map);
|
||||
//keys are CDR1s, values are sequential integer vertices from previous map
|
||||
Map<Integer, Integer> plateCDR1toVMap = invertVertexMap(plateVtoCDR1Map);
|
||||
System.out.println("Vertex maps made");
|
||||
|
||||
System.out.println("Creating adjacency matrix");
|
||||
//Count how many wells each CDR3 appears in
|
||||
Map<Integer, Integer> cdr3WellCounts = new HashMap<>();
|
||||
//count how many wells each CDR1 appears in
|
||||
Map<Integer, Integer> cdr1WellCounts = new HashMap<>();
|
||||
//add edges, where weights are number of wells the peptides share in common.
|
||||
//If this is too slow, can make a 2d array and use the SimpleWeightedGraphMatrixGenerator class
|
||||
Map<Integer, Integer> wellNCDR3s = null;
|
||||
Map<Integer, Integer> wellNCDR1s = null;
|
||||
double[][] weights = new double[plateVtoCDR3Map.size()][plateVtoCDR1Map.size()];
|
||||
countSequencesAndFillMatrix(samplePlate, allCDR3s, allCDR1s, plateCDR3toVMap, plateCDR1toVMap,
|
||||
cdr3Indices, cdr1Indices, cdr3WellCounts, cdr1WellCounts, weights);
|
||||
System.out.println("Matrix created");
|
||||
|
||||
System.out.println("Creating graph");
|
||||
SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph =
|
||||
new SimpleWeightedGraph<>(DefaultWeightedEdge.class);
|
||||
|
||||
SimpleWeightedBipartiteGraphMatrixGenerator graphGenerator = new SimpleWeightedBipartiteGraphMatrixGenerator();
|
||||
List<Integer> cdr3Vertices = new ArrayList<>(plateVtoCDR3Map.keySet()); //This will work because LinkedHashMap preserves order of entry
|
||||
graphGenerator.first(cdr3Vertices);
|
||||
List<Integer> cdr1Vertices = new ArrayList<>(plateVtoCDR1Map.keySet());
|
||||
graphGenerator.second(cdr1Vertices); //This will work because LinkedHashMap preserves order of entry
|
||||
graphGenerator.weights(weights);
|
||||
graphGenerator.generateGraph(graph);
|
||||
System.out.println("Graph created");
|
||||
|
||||
System.out.println("Removing edges outside of weight thresholds");
|
||||
filterByOccupancyThreshold(graph, lowThreshold, highThreshold);
|
||||
System.out.println("Over- and under-weight edges set to 0.0");
|
||||
|
||||
System.out.println("Finding first maximum weighted matching");
|
||||
MaximumWeightBipartiteMatching firstMaxWeightMatching =
|
||||
new MaximumWeightBipartiteMatching(graph, plateVtoCDR3Map.keySet(), plateVtoCDR1Map.keySet());
|
||||
MatchingAlgorithm.Matching<String, DefaultWeightedEdge> graphMatching = firstMaxWeightMatching.getMatching();
|
||||
System.out.println("First maximum weighted matching found");
|
||||
|
||||
|
||||
//first processing run
|
||||
Map<Integer, Integer> firstMatchCDR3toCDR1Map = new HashMap<>();
|
||||
Iterator<DefaultWeightedEdge> weightIter = graphMatching.iterator();
|
||||
DefaultWeightedEdge e;
|
||||
while(weightIter.hasNext()){
|
||||
e = weightIter.next();
|
||||
// if(graph.getEdgeWeight(e) < lowThreshold || graph.getEdgeWeight(e) > highThreshold) {
|
||||
// continue;
|
||||
// }
|
||||
Integer source = graph.getEdgeSource(e);
|
||||
Integer target = graph.getEdgeTarget(e);
|
||||
firstMatchCDR3toCDR1Map.put(plateVtoCDR3Map.get(source), plateVtoCDR1Map.get(target));
|
||||
}
|
||||
System.out.println("First pass matches: " + firstMatchCDR3toCDR1Map.size());
|
||||
|
||||
System.out.println("Removing edges from first maximum weighted matching");
|
||||
//zero out the edge weights in the matching
|
||||
weightIter = graphMatching.iterator();
|
||||
while(weightIter.hasNext()){
|
||||
graph.removeEdge(weightIter.next());
|
||||
}
|
||||
System.out.println("Edges removed");
|
||||
|
||||
//Generate a new matching
|
||||
System.out.println("Finding second maximum weighted matching");
|
||||
MaximumWeightBipartiteMatching secondMaxWeightMatching =
|
||||
new MaximumWeightBipartiteMatching(graph, plateVtoCDR3Map.keySet(), plateVtoCDR1Map.keySet());
|
||||
graphMatching = secondMaxWeightMatching.getMatching();
|
||||
System.out.println("Second maximum weighted matching found");
|
||||
|
||||
|
||||
//second processing run
|
||||
Map<Integer, Integer> secondMatchCDR3toCDR1Map = new HashMap<>();
|
||||
weightIter = graphMatching.iterator();
|
||||
while(weightIter.hasNext()){
|
||||
e = weightIter.next();
|
||||
// if(graph.getEdgeWeight(e) < lowThreshold || graph.getEdgeWeight(e) > highThreshold) {
|
||||
// continue;
|
||||
// }
|
||||
Integer source = graph.getEdgeSource(e);
|
||||
// if(!(CDR3AtoBMap.containsKey(source) || CDR3BtoAMap.containsKey(source))){
|
||||
// continue;
|
||||
// }
|
||||
Integer target = graph.getEdgeTarget(e);
|
||||
secondMatchCDR3toCDR1Map.put(plateVtoCDR3Map.get(source), plateVtoCDR1Map.get(target));
|
||||
}
|
||||
System.out.println("Second pass matches: " + secondMatchCDR3toCDR1Map.size());
|
||||
|
||||
System.out.println("Mapping first pass CDR3 alpha/beta pairs");
|
||||
//get linked map for first matching attempt
|
||||
Map<Integer, Integer> firstMatchesMap = new LinkedHashMap<>();
|
||||
for(Integer alphaCDR3: cdr3AtoBMap.keySet()) {
|
||||
if (!(firstMatchCDR3toCDR1Map.containsKey(alphaCDR3))) {
|
||||
continue;
|
||||
}
|
||||
Integer betaCDR3 = cdr3AtoBMap.get(alphaCDR3);
|
||||
if (!(firstMatchCDR3toCDR1Map.containsKey(betaCDR3))) {
|
||||
continue;
|
||||
}
|
||||
firstMatchesMap.put(alphaCDR3, firstMatchCDR3toCDR1Map.get(alphaCDR3));
|
||||
firstMatchesMap.put(betaCDR3, firstMatchCDR3toCDR1Map.get(betaCDR3));
|
||||
}
|
||||
System.out.println("First pass CDR3 alpha/beta pairs mapped");
|
||||
|
||||
System.out.println("Mapping second pass CDR3 alpha/beta pairs.");
|
||||
System.out.println("Finding CDR3 pairs that swapped CDR1 matches between first pass and second pass.");
|
||||
//Look for matches that simply swapped already-matched alpha and beta CDR3s
|
||||
Map<Integer, Integer> dualMatchesMap = new LinkedHashMap<>();
|
||||
for(Integer alphaCDR3: cdr3AtoBMap.keySet()) {
|
||||
if (!(firstMatchCDR3toCDR1Map.containsKey(alphaCDR3) && secondMatchCDR3toCDR1Map.containsKey(alphaCDR3))) {
|
||||
continue;
|
||||
}
|
||||
Integer betaCDR3 = cdr3AtoBMap.get(alphaCDR3);
|
||||
if (!(firstMatchCDR3toCDR1Map.containsKey(betaCDR3) && secondMatchCDR3toCDR1Map.containsKey(betaCDR3))) {
|
||||
continue;
|
||||
}
|
||||
if(firstMatchCDR3toCDR1Map.get(alphaCDR3).equals(secondMatchCDR3toCDR1Map.get(betaCDR3))){
|
||||
if(firstMatchCDR3toCDR1Map.get(betaCDR3).equals(secondMatchCDR3toCDR1Map.get(alphaCDR3))){
|
||||
dualMatchesMap.put(alphaCDR3, firstMatchCDR3toCDR1Map.get(alphaCDR3));
|
||||
dualMatchesMap.put(betaCDR3, firstMatchCDR3toCDR1Map.get(betaCDR3));
|
||||
}
|
||||
}
|
||||
}
|
||||
System.out.println("Second pass mapping made. Dual CDR3/CDR1 pairings found.");
|
||||
|
||||
Instant stop = Instant.now();
|
||||
//results for first map
|
||||
System.out.println("RESULTS FOR FIRST PASS MATCHING");
|
||||
List<List<String>> allResults = new ArrayList<>();
|
||||
Integer trueCount = 0;
|
||||
Iterator iter = firstMatchesMap.keySet().iterator();
|
||||
|
||||
while(iter.hasNext()){
|
||||
Boolean proven = false;
|
||||
List<String> tmp = new ArrayList<>();
|
||||
tmp.add(iter.next().toString());
|
||||
tmp.add(iter.next().toString());
|
||||
tmp.add(firstMatchesMap.get(Integer.valueOf(tmp.get(0))).toString());
|
||||
tmp.add(firstMatchesMap.get(Integer.valueOf(tmp.get(1))).toString());
|
||||
if(alphaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(0))).equals(Integer.valueOf(tmp.get(2)))){
|
||||
if(betaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(1))).equals(Integer.valueOf(tmp.get(3)))){
|
||||
proven = true;
|
||||
}
|
||||
}
|
||||
else if(alphaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(0))).equals(Integer.valueOf(tmp.get(3)))){
|
||||
if(betaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(1))).equals(Integer.valueOf(tmp.get(2)))){
|
||||
proven = true;
|
||||
}
|
||||
}
|
||||
tmp.add(proven.toString());
|
||||
allResults.add(tmp);
|
||||
if(proven){
|
||||
trueCount++;
|
||||
}
|
||||
}
|
||||
|
||||
List<String> comments = new ArrayList<>();
|
||||
comments.add("Plate size: " + samplePlate.getSize() + " wells");
|
||||
comments.add("Previous pairs found: " + previousMatches.size());
|
||||
comments.add("CDR1 matches attempted: " + allResults.size());
|
||||
double attemptRate = (double) allResults.size() / previousMatches.size();
|
||||
comments.add("Matching attempt rate: " + attemptRate);
|
||||
comments.add("Number of correct matches: " + trueCount);
|
||||
double correctRate = (double) trueCount / allResults.size();
|
||||
comments.add("Correct matching rate: " + correctRate);
|
||||
NumberFormat nf = NumberFormat.getInstance(Locale.US);
|
||||
Duration time = Duration.between(start, stop);
|
||||
time = time.plus(previousTime);
|
||||
comments.add("Simulation time: " + nf.format(time.toSeconds()) + " seconds");
|
||||
for(String s: comments){
|
||||
System.out.println(s);
|
||||
}
|
||||
|
||||
|
||||
|
||||
List<String> headers = new ArrayList<>();
|
||||
headers.add("CDR3 alpha");
|
||||
headers.add("CDR3 beta");
|
||||
headers.add("first matched CDR1");
|
||||
headers.add("second matched CDR1");
|
||||
headers.add("Correct match?");
|
||||
|
||||
MatchingResult firstTest = new MatchingResult(samplePlate.getSourceFileName(),
|
||||
comments, headers, allResults, dualMatchesMap, time);
|
||||
|
||||
//results for dual map
|
||||
System.out.println("RESULTS FOR SECOND PASS MATCHING");
|
||||
allResults = new ArrayList<>();
|
||||
trueCount = 0;
|
||||
iter = dualMatchesMap.keySet().iterator();
|
||||
while(iter.hasNext()){
|
||||
Boolean proven = false;
|
||||
List<String> tmp = new ArrayList<>();
|
||||
tmp.add(iter.next().toString());
|
||||
tmp.add(iter.next().toString());
|
||||
tmp.add(dualMatchesMap.get(Integer.valueOf(tmp.get(0))).toString());
|
||||
tmp.add(dualMatchesMap.get(Integer.valueOf(tmp.get(1))).toString());
|
||||
if(alphaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(0))).equals(Integer.valueOf(tmp.get(2)))){
|
||||
if(betaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(1))).equals(Integer.valueOf(tmp.get(3)))){
|
||||
proven = true;
|
||||
}
|
||||
}
|
||||
else if(alphaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(0))).equals(Integer.valueOf(tmp.get(3)))){
|
||||
if(betaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(1))).equals(Integer.valueOf(tmp.get(2)))){
|
||||
proven = true;
|
||||
}
|
||||
}
|
||||
tmp.add(proven.toString());
|
||||
allResults.add(tmp);
|
||||
if(proven){
|
||||
trueCount++;
|
||||
}
|
||||
}
|
||||
|
||||
comments = new ArrayList<>();
|
||||
comments.add("Plate size: " + samplePlate.getSize() + " wells");
|
||||
comments.add("Previous pairs found: " + previousMatches.size());
|
||||
comments.add("High overlap threshold: " + highThreshold);
|
||||
comments.add("Low overlap threshold: " + lowThreshold);
|
||||
comments.add("CDR1 matches attempted: " + allResults.size());
|
||||
attemptRate = (double) allResults.size() / previousMatches.size();
|
||||
comments.add("Matching attempt rate: " + attemptRate);
|
||||
comments.add("Number of correct matches: " + trueCount);
|
||||
correctRate = (double) trueCount / allResults.size();
|
||||
comments.add("Correct matching rate: " + correctRate);
|
||||
comments.add("Simulation time: " + nf.format(time.toSeconds()) + " seconds");
|
||||
|
||||
for(String s: comments){
|
||||
System.out.println(s);
|
||||
}
|
||||
|
||||
System.out.println("Simulation time: " + nf.format(time.toSeconds()) + " seconds");
|
||||
MatchingResult dualTest = new MatchingResult(samplePlate.getSourceFileName(), comments, headers,
|
||||
allResults, dualMatchesMap, time);
|
||||
MatchingResult[] output = {firstTest, dualTest};
|
||||
//return MatchingResult object
|
||||
return output;
|
||||
}
|
||||
|
||||
//Commented out CDR1 matching until it's time to re-implement it
|
||||
// //Simulated matching of CDR1s to CDR3s. Requires MatchingResult from prior run of matchCDR3s.
|
||||
// public static MatchingResult[] matchCDR1s(List<Integer[]> distinctCells,
|
||||
// Plate samplePlate, Integer lowThreshold,
|
||||
// Integer highThreshold, MatchingResult priorResult){
|
||||
// Instant start = Instant.now();
|
||||
// Duration previousTime = priorResult.getTime();
|
||||
// Map<Integer, Integer> previousMatches = priorResult.getMatchMap();
|
||||
// int numWells = samplePlate.getSize();
|
||||
// int[] cdr3Indices = {cdr3AlphaIndex, cdr3BetaIndex};
|
||||
// int[] cdr1Indices = {cdr1AlphaIndex, cdr1BetaIndex};
|
||||
//
|
||||
// System.out.println("Making previous match maps");
|
||||
// Map<Integer, Integer> cdr3AtoBMap = previousMatches;
|
||||
// Map<Integer, Integer> cdr3BtoAMap = invertVertexMap(cdr3AtoBMap);
|
||||
// System.out.println("Previous match maps made");
|
||||
//
|
||||
// System.out.println("Making cell maps");
|
||||
// Map<Integer, Integer> alphaCDR3toCDR1Map = makeSequenceToSequenceMap(distinctCells, cdr3AlphaIndex, cdr1AlphaIndex);
|
||||
// Map<Integer, Integer> betaCDR3toCDR1Map = makeSequenceToSequenceMap(distinctCells, cdr3BetaIndex, cdr1BetaIndex);
|
||||
// System.out.println("Cell maps made");
|
||||
//
|
||||
// System.out.println("Making well maps");
|
||||
// Map<Integer, Integer> allCDR3s = samplePlate.assayWellsSequenceS(cdr3Indices);
|
||||
// Map<Integer, Integer> allCDR1s = samplePlate.assayWellsSequenceS(cdr1Indices);
|
||||
// int CDR3Count = allCDR3s.size();
|
||||
// System.out.println("all CDR3s count: " + CDR3Count);
|
||||
// int CDR1Count = allCDR1s.size();
|
||||
// System.out.println("all CDR1s count: " + CDR1Count);
|
||||
// System.out.println("Well maps made");
|
||||
//
|
||||
// System.out.println("Removing unpaired CDR3s from well maps");
|
||||
// List<Integer> unpairedCDR3s = new ArrayList<>();
|
||||
// for(Integer i: allCDR3s.keySet()){
|
||||
// if(!(cdr3AtoBMap.containsKey(i) || cdr3BtoAMap.containsKey(i))){
|
||||
// unpairedCDR3s.add(i);
|
||||
// }
|
||||
// }
|
||||
// for(Integer i: unpairedCDR3s){
|
||||
// allCDR3s.remove(i);
|
||||
// }
|
||||
// System.out.println("Unpaired CDR3s removed.");
|
||||
// System.out.println("Remaining CDR3 count: " + allCDR3s.size());
|
||||
//
|
||||
// System.out.println("Removing below-minimum-overlap-threshold and saturating-occupancy CDR1s");
|
||||
// filterByOccupancyThreshold(allCDR1s, lowThreshold, numWells - 1);
|
||||
// System.out.println("CDR1s removed.");
|
||||
// System.out.println("Remaining CDR1 count: " + allCDR1s.size());
|
||||
//
|
||||
// System.out.println("Making vertex maps");
|
||||
//
|
||||
// //For the SimpleWeightedBipartiteGraphMatrixGenerator, all vertices must have
|
||||
// // distinct numbers associated with them. Since I'm using a 2D array, that means
|
||||
// // distinct indices between the rows and columns. vertexStartValue lets me track where I switch
|
||||
// // from numbering rows to columns, so I can assign unique numbers to every vertex, and then
|
||||
// // subtract the vertexStartValue from CDR1s to use their vertex labels as array indices
|
||||
// Integer vertexStartValue = 0;
|
||||
// //keys are sequential integer vertices, values are CDR3s
|
||||
// Map<Integer, Integer> plateVtoCDR3Map = makeVertexToSequenceMap(allCDR3s, vertexStartValue);
|
||||
// //New start value for vertex to CDR1 map should be one more than final vertex value in CDR3 map
|
||||
// vertexStartValue += plateVtoCDR3Map.size();
|
||||
// //keys are sequential integers vertices, values are CDR1s
|
||||
// Map<Integer, Integer> plateVtoCDR1Map = makeVertexToSequenceMap(allCDR1s, vertexStartValue);
|
||||
// //keys are CDR3s, values are sequential integer vertices from previous map
|
||||
// Map<Integer, Integer> plateCDR3toVMap = invertVertexMap(plateVtoCDR3Map);
|
||||
// //keys are CDR1s, values are sequential integer vertices from previous map
|
||||
// Map<Integer, Integer> plateCDR1toVMap = invertVertexMap(plateVtoCDR1Map);
|
||||
// System.out.println("Vertex maps made");
|
||||
//
|
||||
// System.out.println("Creating adjacency matrix");
|
||||
// //Count how many wells each CDR3 appears in
|
||||
// Map<Integer, Integer> cdr3WellCounts = new HashMap<>();
|
||||
// //count how many wells each CDR1 appears in
|
||||
// Map<Integer, Integer> cdr1WellCounts = new HashMap<>();
|
||||
// //add edges, where weights are number of wells the peptides share in common.
|
||||
// //If this is too slow, can make a 2d array and use the SimpleWeightedGraphMatrixGenerator class
|
||||
// Map<Integer, Integer> wellNCDR3s = null;
|
||||
// Map<Integer, Integer> wellNCDR1s = null;
|
||||
// double[][] weights = new double[plateVtoCDR3Map.size()][plateVtoCDR1Map.size()];
|
||||
// countSequencesAndFillMatrix(samplePlate, allCDR3s, allCDR1s, plateCDR3toVMap, plateCDR1toVMap,
|
||||
// cdr3Indices, cdr1Indices, cdr3WellCounts, cdr1WellCounts, weights);
|
||||
// System.out.println("Matrix created");
|
||||
//
|
||||
// System.out.println("Creating graph");
|
||||
// SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph =
|
||||
// new SimpleWeightedGraph<>(DefaultWeightedEdge.class);
|
||||
//
|
||||
// SimpleWeightedBipartiteGraphMatrixGenerator graphGenerator = new SimpleWeightedBipartiteGraphMatrixGenerator();
|
||||
// List<Integer> cdr3Vertices = new ArrayList<>(plateVtoCDR3Map.keySet()); //This will work because LinkedHashMap preserves order of entry
|
||||
// graphGenerator.first(cdr3Vertices);
|
||||
// List<Integer> cdr1Vertices = new ArrayList<>(plateVtoCDR1Map.keySet());
|
||||
// graphGenerator.second(cdr1Vertices); //This will work because LinkedHashMap preserves order of entry
|
||||
// graphGenerator.weights(weights);
|
||||
// graphGenerator.generateGraph(graph);
|
||||
// System.out.println("Graph created");
|
||||
//
|
||||
// System.out.println("Removing edges outside of weight thresholds");
|
||||
// filterByOccupancyThreshold(graph, lowThreshold, highThreshold);
|
||||
// System.out.println("Over- and under-weight edges set to 0.0");
|
||||
//
|
||||
// System.out.println("Finding first maximum weighted matching");
|
||||
// MaximumWeightBipartiteMatching firstMaxWeightMatching =
|
||||
// new MaximumWeightBipartiteMatching(graph, plateVtoCDR3Map.keySet(), plateVtoCDR1Map.keySet());
|
||||
// MatchingAlgorithm.Matching<String, DefaultWeightedEdge> graphMatching = firstMaxWeightMatching.getMatching();
|
||||
// System.out.println("First maximum weighted matching found");
|
||||
//
|
||||
//
|
||||
// //first processing run
|
||||
// Map<Integer, Integer> firstMatchCDR3toCDR1Map = new HashMap<>();
|
||||
// Iterator<DefaultWeightedEdge> weightIter = graphMatching.iterator();
|
||||
// DefaultWeightedEdge e;
|
||||
// while(weightIter.hasNext()){
|
||||
// e = weightIter.next();
|
||||
//// if(graph.getEdgeWeight(e) < lowThreshold || graph.getEdgeWeight(e) > highThreshold) {
|
||||
//// continue;
|
||||
//// }
|
||||
// Integer source = graph.getEdgeSource(e);
|
||||
// Integer target = graph.getEdgeTarget(e);
|
||||
// firstMatchCDR3toCDR1Map.put(plateVtoCDR3Map.get(source), plateVtoCDR1Map.get(target));
|
||||
// }
|
||||
// System.out.println("First pass matches: " + firstMatchCDR3toCDR1Map.size());
|
||||
//
|
||||
// System.out.println("Removing edges from first maximum weighted matching");
|
||||
// //zero out the edge weights in the matching
|
||||
// weightIter = graphMatching.iterator();
|
||||
// while(weightIter.hasNext()){
|
||||
// graph.removeEdge(weightIter.next());
|
||||
// }
|
||||
// System.out.println("Edges removed");
|
||||
//
|
||||
// //Generate a new matching
|
||||
// System.out.println("Finding second maximum weighted matching");
|
||||
// MaximumWeightBipartiteMatching secondMaxWeightMatching =
|
||||
// new MaximumWeightBipartiteMatching(graph, plateVtoCDR3Map.keySet(), plateVtoCDR1Map.keySet());
|
||||
// graphMatching = secondMaxWeightMatching.getMatching();
|
||||
// System.out.println("Second maximum weighted matching found");
|
||||
//
|
||||
//
|
||||
// //second processing run
|
||||
// Map<Integer, Integer> secondMatchCDR3toCDR1Map = new HashMap<>();
|
||||
// weightIter = graphMatching.iterator();
|
||||
// while(weightIter.hasNext()){
|
||||
// e = weightIter.next();
|
||||
//// if(graph.getEdgeWeight(e) < lowThreshold || graph.getEdgeWeight(e) > highThreshold) {
|
||||
//// continue;
|
||||
//// }
|
||||
// Integer source = graph.getEdgeSource(e);
|
||||
//// if(!(CDR3AtoBMap.containsKey(source) || CDR3BtoAMap.containsKey(source))){
|
||||
//// continue;
|
||||
//// }
|
||||
// Integer target = graph.getEdgeTarget(e);
|
||||
// secondMatchCDR3toCDR1Map.put(plateVtoCDR3Map.get(source), plateVtoCDR1Map.get(target));
|
||||
// }
|
||||
// System.out.println("Second pass matches: " + secondMatchCDR3toCDR1Map.size());
|
||||
//
|
||||
// System.out.println("Mapping first pass CDR3 alpha/beta pairs");
|
||||
// //get linked map for first matching attempt
|
||||
// Map<Integer, Integer> firstMatchesMap = new LinkedHashMap<>();
|
||||
// for(Integer alphaCDR3: cdr3AtoBMap.keySet()) {
|
||||
// if (!(firstMatchCDR3toCDR1Map.containsKey(alphaCDR3))) {
|
||||
// continue;
|
||||
// }
|
||||
// Integer betaCDR3 = cdr3AtoBMap.get(alphaCDR3);
|
||||
// if (!(firstMatchCDR3toCDR1Map.containsKey(betaCDR3))) {
|
||||
// continue;
|
||||
// }
|
||||
// firstMatchesMap.put(alphaCDR3, firstMatchCDR3toCDR1Map.get(alphaCDR3));
|
||||
// firstMatchesMap.put(betaCDR3, firstMatchCDR3toCDR1Map.get(betaCDR3));
|
||||
// }
|
||||
// System.out.println("First pass CDR3 alpha/beta pairs mapped");
|
||||
//
|
||||
// System.out.println("Mapping second pass CDR3 alpha/beta pairs.");
|
||||
// System.out.println("Finding CDR3 pairs that swapped CDR1 matches between first pass and second pass.");
|
||||
// //Look for matches that simply swapped already-matched alpha and beta CDR3s
|
||||
// Map<Integer, Integer> dualMatchesMap = new LinkedHashMap<>();
|
||||
// for(Integer alphaCDR3: cdr3AtoBMap.keySet()) {
|
||||
// if (!(firstMatchCDR3toCDR1Map.containsKey(alphaCDR3) && secondMatchCDR3toCDR1Map.containsKey(alphaCDR3))) {
|
||||
// continue;
|
||||
// }
|
||||
// Integer betaCDR3 = cdr3AtoBMap.get(alphaCDR3);
|
||||
// if (!(firstMatchCDR3toCDR1Map.containsKey(betaCDR3) && secondMatchCDR3toCDR1Map.containsKey(betaCDR3))) {
|
||||
// continue;
|
||||
// }
|
||||
// if(firstMatchCDR3toCDR1Map.get(alphaCDR3).equals(secondMatchCDR3toCDR1Map.get(betaCDR3))){
|
||||
// if(firstMatchCDR3toCDR1Map.get(betaCDR3).equals(secondMatchCDR3toCDR1Map.get(alphaCDR3))){
|
||||
// dualMatchesMap.put(alphaCDR3, firstMatchCDR3toCDR1Map.get(alphaCDR3));
|
||||
// dualMatchesMap.put(betaCDR3, firstMatchCDR3toCDR1Map.get(betaCDR3));
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// System.out.println("Second pass mapping made. Dual CDR3/CDR1 pairings found.");
|
||||
//
|
||||
// Instant stop = Instant.now();
|
||||
// //results for first map
|
||||
// System.out.println("RESULTS FOR FIRST PASS MATCHING");
|
||||
// List<List<String>> allResults = new ArrayList<>();
|
||||
// Integer trueCount = 0;
|
||||
// Iterator iter = firstMatchesMap.keySet().iterator();
|
||||
//
|
||||
// while(iter.hasNext()){
|
||||
// Boolean proven = false;
|
||||
// List<String> tmp = new ArrayList<>();
|
||||
// tmp.add(iter.next().toString());
|
||||
// tmp.add(iter.next().toString());
|
||||
// tmp.add(firstMatchesMap.get(Integer.valueOf(tmp.get(0))).toString());
|
||||
// tmp.add(firstMatchesMap.get(Integer.valueOf(tmp.get(1))).toString());
|
||||
// if(alphaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(0))).equals(Integer.valueOf(tmp.get(2)))){
|
||||
// if(betaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(1))).equals(Integer.valueOf(tmp.get(3)))){
|
||||
// proven = true;
|
||||
// }
|
||||
// }
|
||||
// else if(alphaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(0))).equals(Integer.valueOf(tmp.get(3)))){
|
||||
// if(betaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(1))).equals(Integer.valueOf(tmp.get(2)))){
|
||||
// proven = true;
|
||||
// }
|
||||
// }
|
||||
// tmp.add(proven.toString());
|
||||
// allResults.add(tmp);
|
||||
// if(proven){
|
||||
// trueCount++;
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// List<String> comments = new ArrayList<>();
|
||||
// comments.add("Plate size: " + samplePlate.getSize() + " wells");
|
||||
// comments.add("Previous pairs found: " + previousMatches.size());
|
||||
// comments.add("CDR1 matches attempted: " + allResults.size());
|
||||
// double attemptRate = (double) allResults.size() / previousMatches.size();
|
||||
// comments.add("Matching attempt rate: " + attemptRate);
|
||||
// comments.add("Number of correct matches: " + trueCount);
|
||||
// double correctRate = (double) trueCount / allResults.size();
|
||||
// comments.add("Correct matching rate: " + correctRate);
|
||||
// NumberFormat nf = NumberFormat.getInstance(Locale.US);
|
||||
// Duration time = Duration.between(start, stop);
|
||||
// time = time.plus(previousTime);
|
||||
// comments.add("Simulation time: " + nf.format(time.toSeconds()) + " seconds");
|
||||
// for(String s: comments){
|
||||
// System.out.println(s);
|
||||
// }
|
||||
//
|
||||
//
|
||||
//
|
||||
// List<String> headers = new ArrayList<>();
|
||||
// headers.add("CDR3 alpha");
|
||||
// headers.add("CDR3 beta");
|
||||
// headers.add("first matched CDR1");
|
||||
// headers.add("second matched CDR1");
|
||||
// headers.add("Correct match?");
|
||||
//
|
||||
// MatchingResult firstTest = new MatchingResult(samplePlate.getSourceFileName(),
|
||||
// comments, headers, allResults, dualMatchesMap, time);
|
||||
//
|
||||
// //results for dual map
|
||||
// System.out.println("RESULTS FOR SECOND PASS MATCHING");
|
||||
// allResults = new ArrayList<>();
|
||||
// trueCount = 0;
|
||||
// iter = dualMatchesMap.keySet().iterator();
|
||||
// while(iter.hasNext()){
|
||||
// Boolean proven = false;
|
||||
// List<String> tmp = new ArrayList<>();
|
||||
// tmp.add(iter.next().toString());
|
||||
// tmp.add(iter.next().toString());
|
||||
// tmp.add(dualMatchesMap.get(Integer.valueOf(tmp.get(0))).toString());
|
||||
// tmp.add(dualMatchesMap.get(Integer.valueOf(tmp.get(1))).toString());
|
||||
// if(alphaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(0))).equals(Integer.valueOf(tmp.get(2)))){
|
||||
// if(betaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(1))).equals(Integer.valueOf(tmp.get(3)))){
|
||||
// proven = true;
|
||||
// }
|
||||
// }
|
||||
// else if(alphaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(0))).equals(Integer.valueOf(tmp.get(3)))){
|
||||
// if(betaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(1))).equals(Integer.valueOf(tmp.get(2)))){
|
||||
// proven = true;
|
||||
// }
|
||||
// }
|
||||
// tmp.add(proven.toString());
|
||||
// allResults.add(tmp);
|
||||
// if(proven){
|
||||
// trueCount++;
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// comments = new ArrayList<>();
|
||||
// comments.add("Plate size: " + samplePlate.getSize() + " wells");
|
||||
// comments.add("Previous pairs found: " + previousMatches.size());
|
||||
// comments.add("High overlap threshold: " + highThreshold);
|
||||
// comments.add("Low overlap threshold: " + lowThreshold);
|
||||
// comments.add("CDR1 matches attempted: " + allResults.size());
|
||||
// attemptRate = (double) allResults.size() / previousMatches.size();
|
||||
// comments.add("Matching attempt rate: " + attemptRate);
|
||||
// comments.add("Number of correct matches: " + trueCount);
|
||||
// correctRate = (double) trueCount / allResults.size();
|
||||
// comments.add("Correct matching rate: " + correctRate);
|
||||
// comments.add("Simulation time: " + nf.format(time.toSeconds()) + " seconds");
|
||||
//
|
||||
// for(String s: comments){
|
||||
// System.out.println(s);
|
||||
// }
|
||||
//
|
||||
// System.out.println("Simulation time: " + nf.format(time.toSeconds()) + " seconds");
|
||||
// MatchingResult dualTest = new MatchingResult(samplePlate.getSourceFileName(), comments, headers,
|
||||
// allResults, dualMatchesMap, time);
|
||||
// MatchingResult[] output = {firstTest, dualTest};
|
||||
// return output;
|
||||
// }
|
||||
|
||||
//Remove sequences based on occupancy
|
||||
public static void filterByOccupancyThresholds(Map<Integer, Integer> wellMap, int low, int high){
|
||||
List<Integer> noise = new ArrayList<>();
|
||||
for(Integer k: wellMap.keySet()){
|
||||
if((wellMap.get(k) > high) || (wellMap.get(k) < low)){
|
||||
noise.add(k);
|
||||
}
|
||||
}
|
||||
for(Integer k: noise) {
|
||||
wellMap.remove(k);
|
||||
}
|
||||
}
|
||||
|
||||
//Counts the well occupancy of the row peptides and column peptides into given maps, and
|
||||
//fills weights in the given 2D array
|
||||
@@ -621,62 +657,6 @@ public class Simulator {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private static void filterByOccupancyThreshold(SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph,
|
||||
int low, int high) {
|
||||
for(DefaultWeightedEdge e: graph.edgeSet()){
|
||||
if ((graph.getEdgeWeight(e) > high) || (graph.getEdgeWeight(e) < low)){
|
||||
graph.setEdgeWeight(e, 0.0);
|
||||
}
|
||||
}
|
||||
}
|
||||
private static void filterByOccupancyThreshold(Map<Integer, Integer> wellMap, int low, int high){
|
||||
List<Integer> noise = new ArrayList<>();
|
||||
for(Integer k: wellMap.keySet()){
|
||||
if((wellMap.get(k) > high) || (wellMap.get(k) < low)){
|
||||
noise.add(k);
|
||||
}
|
||||
}
|
||||
for(Integer k: noise) {
|
||||
wellMap.remove(k);
|
||||
}
|
||||
}
|
||||
|
||||
//Remove edges for pairs with large occupancy discrepancy
|
||||
private static void filterByRelativeOccupancy(SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph,
|
||||
Map<Integer, Integer> alphaWellCounts,
|
||||
Map<Integer, Integer> betaWellCounts,
|
||||
Map<Integer, Integer> plateVtoAMap,
|
||||
Map<Integer, Integer> plateVtoBMap,
|
||||
Integer maxOccupancyDifference) {
|
||||
for (DefaultWeightedEdge e : graph.edgeSet()) {
|
||||
Integer alphaOcc = alphaWellCounts.get(plateVtoAMap.get(graph.getEdgeSource(e)));
|
||||
Integer betaOcc = betaWellCounts.get(plateVtoBMap.get(graph.getEdgeTarget(e)));
|
||||
//Adjust this to something cleverer later
|
||||
if (Math.abs(alphaOcc - betaOcc) >= maxOccupancyDifference) {
|
||||
graph.setEdgeWeight(e, 0.0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//Remove edges for pairs where overlap size is significantly lower than the well occupancy
|
||||
private static void filterByOverlapSize(SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph,
|
||||
Map<Integer, Integer> alphaWellCounts,
|
||||
Map<Integer, Integer> betaWellCounts,
|
||||
Map<Integer, Integer> plateVtoAMap,
|
||||
Map<Integer, Integer> plateVtoBMap,
|
||||
Integer minOverlapPercent) {
|
||||
for (DefaultWeightedEdge e : graph.edgeSet()) {
|
||||
Integer alphaOcc = alphaWellCounts.get(plateVtoAMap.get(graph.getEdgeSource(e)));
|
||||
Integer betaOcc = betaWellCounts.get(plateVtoBMap.get(graph.getEdgeTarget(e)));
|
||||
double weight = graph.getEdgeWeight(e);
|
||||
double min = minOverlapPercent / 100.0;
|
||||
if ((weight / alphaOcc < min) || (weight / betaOcc < min)) {
|
||||
graph.setEdgeWeight(e, 0.0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static Map<Integer, Integer> makeSequenceToSequenceMap(List<Integer[]> cells, int keySequenceIndex,
|
||||
int valueSequenceIndex){
|
||||
Map<Integer, Integer> keySequenceToValueSequenceMap = new HashMap<>();
|
||||
@@ -688,7 +668,7 @@ public class Simulator {
|
||||
|
||||
private static Map<Integer, Integer> makeVertexToSequenceMap(Map<Integer, Integer> sequences, Integer startValue) {
|
||||
Map<Integer, Integer> map = new LinkedHashMap<>(); //LinkedHashMap to preserve order of entry
|
||||
Integer index = startValue;
|
||||
Integer index = startValue; //is this necessary? I don't think I use this.
|
||||
for (Integer k: sequences.keySet()) {
|
||||
map.put(index, k);
|
||||
index++;
|
||||
|
||||
@@ -1,694 +0,0 @@
|
||||
import org.apache.commons.cli.*;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.Scanner;
|
||||
import java.util.InputMismatchException;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
//
|
||||
public class UserInterface {
|
||||
|
||||
final static Scanner sc = new Scanner(System.in);
|
||||
static int input;
|
||||
static boolean quit = false;
|
||||
|
||||
public static void main(String[] args) {
|
||||
//for now, commenting out all the command line argument stuff.
|
||||
// Refactoring to output files of graphs, so it would all need to change anyway.
|
||||
|
||||
// if(args.length != 0){
|
||||
// //These command line options are a big mess
|
||||
// //Really, I don't think command line tools are expected to work in this many different modes
|
||||
// //making cells, making plates, and matching are the sort of thing that UNIX philosophy would say
|
||||
// //should be three separate programs.
|
||||
// //There might be a way to do it with option parameters?
|
||||
//
|
||||
// Options mainOptions = new Options();
|
||||
// Option makeCells = Option.builder("cells")
|
||||
// .longOpt("make-cells")
|
||||
// .desc("Makes a file of distinct cells")
|
||||
// .build();
|
||||
// Option makePlate = Option.builder("plates")
|
||||
// .longOpt("make-plates")
|
||||
// .desc("Makes a sample plate file")
|
||||
// .build();
|
||||
// Option matchCDR3 = Option.builder("match")
|
||||
// .longOpt("match-cdr3")
|
||||
// .desc("Match CDR3s. Requires a cell sample file and any number of plate files.")
|
||||
// .build();
|
||||
// OptionGroup mainGroup = new OptionGroup();
|
||||
// mainGroup.addOption(makeCells);
|
||||
// mainGroup.addOption(makePlate);
|
||||
// mainGroup.addOption(matchCDR3);
|
||||
// mainGroup.setRequired(true);
|
||||
// mainOptions.addOptionGroup(mainGroup);
|
||||
//
|
||||
// //Reuse clones of this for other options groups, rather than making it lots of times
|
||||
// Option outputFile = Option.builder("o")
|
||||
// .longOpt("output-file")
|
||||
// .hasArg()
|
||||
// .argName("filename")
|
||||
// .desc("Name of output file")
|
||||
// .build();
|
||||
// mainOptions.addOption(outputFile);
|
||||
//
|
||||
// //Options cellOptions = new Options();
|
||||
// Option numCells = Option.builder("nc")
|
||||
// .longOpt("num-cells")
|
||||
// .desc("The number of distinct cells to generate")
|
||||
// .hasArg()
|
||||
// .argName("number")
|
||||
// .build();
|
||||
// mainOptions.addOption(numCells);
|
||||
// Option cdr1Freq = Option.builder("d")
|
||||
// .longOpt("peptide-diversity-factor")
|
||||
// .hasArg()
|
||||
// .argName("number")
|
||||
// .desc("Number of distinct CDR3s for every CDR1")
|
||||
// .build();
|
||||
// mainOptions.addOption(cdr1Freq);
|
||||
// //Option cellOutput = (Option) outputFile.clone();
|
||||
// //cellOutput.setRequired(true);
|
||||
// //mainOptions.addOption(cellOutput);
|
||||
//
|
||||
// //Options plateOptions = new Options();
|
||||
// Option inputCells = Option.builder("c")
|
||||
// .longOpt("cell-file")
|
||||
// .hasArg()
|
||||
// .argName("file")
|
||||
// .desc("The cell sample file used for filling wells")
|
||||
// .build();
|
||||
// mainOptions.addOption(inputCells);
|
||||
// Option numWells = Option.builder("w")
|
||||
// .longOpt("num-wells")
|
||||
// .hasArg()
|
||||
// .argName("number")
|
||||
// .desc("The number of wells on each plate")
|
||||
// .build();
|
||||
// mainOptions.addOption(numWells);
|
||||
// Option numPlates = Option.builder("np")
|
||||
// .longOpt("num-plates")
|
||||
// .hasArg()
|
||||
// .argName("number")
|
||||
// .desc("The number of plate files to output")
|
||||
// .build();
|
||||
// mainOptions.addOption(numPlates);
|
||||
// //Option plateOutput = (Option) outputFile.clone();
|
||||
// //plateOutput.setRequired(true);
|
||||
// //plateOutput.setDescription("Prefix for plate output filenames");
|
||||
// //mainOptions.addOption(plateOutput);
|
||||
// Option plateErr = Option.builder("err")
|
||||
// .longOpt("drop-out-rate")
|
||||
// .hasArg()
|
||||
// .argName("number")
|
||||
// .desc("Well drop-out rate. (Probability between 0 and 1)")
|
||||
// .build();
|
||||
// mainOptions.addOption(plateErr);
|
||||
// Option plateConcentrations = Option.builder("t")
|
||||
// .longOpt("t-cells-per-well")
|
||||
// .hasArgs()
|
||||
// .argName("number 1, number 2, ...")
|
||||
// .desc("Number of T cells per well for each plate section")
|
||||
// .build();
|
||||
// mainOptions.addOption(plateConcentrations);
|
||||
//
|
||||
////different distributions, mutually exclusive
|
||||
// OptionGroup plateDistributions = new OptionGroup();
|
||||
// Option plateExp = Option.builder("exponential")
|
||||
// .desc("Sample from distinct cells with exponential frequency distribution")
|
||||
// .build();
|
||||
// plateDistributions.addOption(plateExp);
|
||||
// Option plateGaussian = Option.builder("gaussian")
|
||||
// .desc("Sample from distinct cells with gaussain frequency distribution")
|
||||
// .build();
|
||||
// plateDistributions.addOption(plateGaussian);
|
||||
// Option platePoisson = Option.builder("poisson")
|
||||
// .desc("Sample from distinct cells with poisson frequency distribution")
|
||||
// .build();
|
||||
// plateDistributions.addOption(platePoisson);
|
||||
// mainOptions.addOptionGroup(plateDistributions);
|
||||
//
|
||||
// Option plateStdDev = Option.builder("stddev")
|
||||
// .desc("Standard deviation for gaussian distribution")
|
||||
// .hasArg()
|
||||
// .argName("number")
|
||||
// .build();
|
||||
// mainOptions.addOption(plateStdDev);
|
||||
//
|
||||
// Option plateLambda = Option.builder("lambda")
|
||||
// .desc("Lambda for exponential distribution")
|
||||
// .hasArg()
|
||||
// .argName("number")
|
||||
// .build();
|
||||
// mainOptions.addOption(plateLambda);
|
||||
//
|
||||
//
|
||||
//
|
||||
////
|
||||
//// String cellFile, String filename, Double stdDev,
|
||||
//// Integer numWells, Integer numSections,
|
||||
//// Integer[] concentrations, Double dropOutRate
|
||||
////
|
||||
//
|
||||
// //Options matchOptions = new Options();
|
||||
// inputCells.setDescription("The cell sample file to be used for matching.");
|
||||
// mainOptions.addOption(inputCells);
|
||||
// Option lowThresh = Option.builder("low")
|
||||
// .longOpt("low-threshold")
|
||||
// .hasArg()
|
||||
// .argName("number")
|
||||
// .desc("Sets the minimum occupancy overlap to attempt matching")
|
||||
// .build();
|
||||
// mainOptions.addOption(lowThresh);
|
||||
// Option highThresh = Option.builder("high")
|
||||
// .longOpt("high-threshold")
|
||||
// .hasArg()
|
||||
// .argName("number")
|
||||
// .desc("Sets the maximum occupancy overlap to attempt matching")
|
||||
// .build();
|
||||
// mainOptions.addOption(highThresh);
|
||||
// Option occDiff = Option.builder("occdiff")
|
||||
// .longOpt("occupancy-difference")
|
||||
// .hasArg()
|
||||
// .argName("Number")
|
||||
// .desc("Maximum difference in alpha/beta occupancy to attempt matching")
|
||||
// .build();
|
||||
// mainOptions.addOption(occDiff);
|
||||
// Option overlapPer = Option.builder("ovper")
|
||||
// .longOpt("overlap-percent")
|
||||
// .hasArg()
|
||||
// .argName("Percent")
|
||||
// .desc("Minimum overlap percent to attempt matching (0 -100)")
|
||||
// .build();
|
||||
// mainOptions.addOption(overlapPer);
|
||||
// Option inputPlates = Option.builder("p")
|
||||
// .longOpt("plate-files")
|
||||
// .hasArgs()
|
||||
// .desc("Plate files to match")
|
||||
// .build();
|
||||
// mainOptions.addOption(inputPlates);
|
||||
//
|
||||
//
|
||||
//
|
||||
// CommandLineParser parser = new DefaultParser();
|
||||
// try {
|
||||
// CommandLine line = parser.parse(mainOptions, args);
|
||||
// if(line.hasOption("match")){
|
||||
// //line = parser.parse(mainOptions, args);
|
||||
// String cellFile = line.getOptionValue("c");
|
||||
// Integer lowThreshold = Integer.valueOf(line.getOptionValue(lowThresh));
|
||||
// Integer highThreshold = Integer.valueOf(line.getOptionValue(highThresh));
|
||||
// Integer occupancyDifference = Integer.valueOf(line.getOptionValue(occDiff));
|
||||
// Integer overlapPercent = Integer.valueOf(line.getOptionValue(overlapPer));
|
||||
// for(String plate: line.getOptionValues("p")) {
|
||||
// matchCDR3s(cellFile, plate, lowThreshold, highThreshold, occupancyDifference, overlapPercent);
|
||||
// }
|
||||
// }
|
||||
// else if(line.hasOption("cells")){
|
||||
// //line = parser.parse(mainOptions, args);
|
||||
// String filename = line.getOptionValue("o");
|
||||
// Integer numDistCells = Integer.valueOf(line.getOptionValue("nc"));
|
||||
// Integer freq = Integer.valueOf(line.getOptionValue("d"));
|
||||
// makeCells(filename, numDistCells, freq);
|
||||
// }
|
||||
// else if(line.hasOption("plates")){
|
||||
// //line = parser.parse(mainOptions, args);
|
||||
// String cellFile = line.getOptionValue("c");
|
||||
// String filenamePrefix = line.getOptionValue("o");
|
||||
// Integer numWellsOnPlate = Integer.valueOf(line.getOptionValue("w"));
|
||||
// Integer numPlatesToMake = Integer.valueOf(line.getOptionValue("np"));
|
||||
// String[] concentrationsToUseString = line.getOptionValues("t");
|
||||
// Integer numSections = concentrationsToUseString.length;
|
||||
//
|
||||
// Integer[] concentrationsToUse = new Integer[numSections];
|
||||
// for(int i = 0; i <numSections; i++){
|
||||
// concentrationsToUse[i] = Integer.valueOf(concentrationsToUseString[i]);
|
||||
// }
|
||||
// Double dropOutRate = Double.valueOf(line.getOptionValue("err"));
|
||||
// if(line.hasOption("exponential")){
|
||||
// Double lambda = Double.valueOf(line.getOptionValue("lambda"));
|
||||
// for(int i = 1; i <= numPlatesToMake; i++){
|
||||
// makePlateExp(cellFile, filenamePrefix + i, lambda, numWellsOnPlate,
|
||||
// concentrationsToUse,dropOutRate);
|
||||
// }
|
||||
// }
|
||||
// else if(line.hasOption("gaussian")){
|
||||
// Double stdDev = Double.valueOf(line.getOptionValue("std-dev"));
|
||||
// for(int i = 1; i <= numPlatesToMake; i++){
|
||||
// makePlate(cellFile, filenamePrefix + i, stdDev, numWellsOnPlate,
|
||||
// concentrationsToUse,dropOutRate);
|
||||
// }
|
||||
//
|
||||
// }
|
||||
// else if(line.hasOption("poisson")){
|
||||
// for(int i = 1; i <= numPlatesToMake; i++){
|
||||
// makePlatePoisson(cellFile, filenamePrefix + i, numWellsOnPlate,
|
||||
// concentrationsToUse,dropOutRate);
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// catch (ParseException exp) {
|
||||
// System.err.println("Parsing failed. Reason: " + exp.getMessage());
|
||||
// }
|
||||
// }
|
||||
// else {
|
||||
while (!quit) {
|
||||
System.out.println();
|
||||
System.out.println("--------BiGPairSEQ SIMULATOR--------");
|
||||
System.out.println("ALPHA/BETA T CELL RECEPTOR MATCHING");
|
||||
System.out.println(" USING WEIGHTED BIPARTITE GRAPHS ");
|
||||
System.out.println("------------------------------------");
|
||||
System.out.println("Please select an option:");
|
||||
System.out.println("1) Generate a population of distinct cells");
|
||||
System.out.println("2) Generate a sample plate of T cells");
|
||||
System.out.println("3) Generate CDR3 alpha/beta occupancy data and overlap graph");
|
||||
System.out.println("4) Simulate bipartite graph CDR3 alpha/beta matching (BiGpairSEQ)");
|
||||
//Need to re-do the CDR3/CDR1 matching to correspond to new pattern
|
||||
//System.out.println("5) Generate CDR3/CDR1 occupancy graph");
|
||||
//System.out.println("6) Simulate CDR3/CDR1 T cell matching");
|
||||
System.out.println("9) About/Acknowledgments");
|
||||
System.out.println("0) Exit");
|
||||
try {
|
||||
input = sc.nextInt();
|
||||
switch (input) {
|
||||
case 1 -> makeCells();
|
||||
case 2 -> makePlate();
|
||||
case 3 -> makeCDR3Graph();
|
||||
case 4 -> matchCDR3s();
|
||||
//case 6 -> matchCellsCDR1();
|
||||
case 9 -> acknowledge();
|
||||
case 0 -> quit = true;
|
||||
default -> throw new InputMismatchException("Invalid input.");
|
||||
}
|
||||
} catch (InputMismatchException | IOException ex) {
|
||||
System.out.println(ex);
|
||||
sc.next();
|
||||
}
|
||||
}
|
||||
sc.close();
|
||||
// }
|
||||
}
|
||||
|
||||
private static void makeCells() {
|
||||
String filename = null;
|
||||
Integer numCells = 0;
|
||||
Integer cdr1Freq = 1;
|
||||
try {
|
||||
System.out.println("\nSimulated T-Cells consist of integer values representing:\n" +
|
||||
"* a pair of alpha and beta CDR3 peptides (unique within simulated population)\n" +
|
||||
"* a pair of alpha and beta CDR1 peptides (not necessarily unique).");
|
||||
System.out.println("\nThe cells will be written to a CSV file.");
|
||||
System.out.print("Please enter a file name: ");
|
||||
filename = sc.next();
|
||||
System.out.println("\nCDR3 sequences are more diverse than CDR1 sequences.");
|
||||
System.out.println("Please enter the factor by which distinct CDR3s outnumber CDR1s: ");
|
||||
cdr1Freq = sc.nextInt();
|
||||
System.out.print("\nPlease enter the number of T-cells to generate: ");
|
||||
numCells = sc.nextInt();
|
||||
if(numCells <= 0){
|
||||
throw new InputMismatchException("Number of cells must be a positive integer.");
|
||||
}
|
||||
} catch (InputMismatchException ex) {
|
||||
System.out.println(ex);
|
||||
sc.next();
|
||||
}
|
||||
CellSample sample = Simulator.generateCellSample(numCells, cdr1Freq);
|
||||
assert filename != null;
|
||||
CellFileWriter writer = new CellFileWriter(filename, sample);
|
||||
writer.writeCellsToFile();
|
||||
System.gc();
|
||||
}
|
||||
|
||||
// //for calling from command line
|
||||
// private static void makeCells(String filename, Integer numCells, Integer cdr1Freq){
|
||||
// CellSample sample = Simulator.generateCellSample(numCells, cdr1Freq);
|
||||
// CellFileWriter writer = new CellFileWriter(filename, sample);
|
||||
// writer.writeCellsToFile();
|
||||
// }
|
||||
//
|
||||
// private static void makePlateExp(String cellFile, String filename, Double lambda,
|
||||
// Integer numWells, Integer[] concentrations, Double dropOutRate){
|
||||
// CellFileReader cellReader = new CellFileReader(cellFile);
|
||||
// Plate samplePlate = new Plate(numWells, dropOutRate, concentrations);
|
||||
// samplePlate.fillWellsExponential(cellReader.getFilename(), cellReader.getCells(), lambda);
|
||||
// PlateFileWriter writer = new PlateFileWriter(filename, samplePlate);
|
||||
// writer.writePlateFile();
|
||||
// }
|
||||
//
|
||||
// private static void makePlatePoisson(String cellFile, String filename, Integer numWells,
|
||||
// Integer[] concentrations, Double dropOutRate){
|
||||
// CellFileReader cellReader = new CellFileReader(cellFile);
|
||||
// Double stdDev = Math.sqrt(cellReader.getCellCount());
|
||||
// Plate samplePlate = new Plate(numWells, dropOutRate, concentrations);
|
||||
// samplePlate.fillWells(cellReader.getFilename(), cellReader.getCells(), stdDev);
|
||||
// PlateFileWriter writer = new PlateFileWriter(filename, samplePlate);
|
||||
// writer.writePlateFile();
|
||||
// }
|
||||
//
|
||||
// private static void makePlate(String cellFile, String filename, Double stdDev,
|
||||
// Integer numWells, Integer[] concentrations, Double dropOutRate){
|
||||
// CellFileReader cellReader = new CellFileReader(cellFile);
|
||||
// Plate samplePlate = new Plate(numWells, dropOutRate, concentrations);
|
||||
// samplePlate.fillWells(cellReader.getFilename(), cellReader.getCells(), stdDev);
|
||||
// PlateFileWriter writer = new PlateFileWriter(filename, samplePlate);
|
||||
// writer.writePlateFile();
|
||||
// }
|
||||
|
||||
//Output a CSV of sample plate
|
||||
private static void makePlate() {
|
||||
String cellFile = null;
|
||||
String filename = null;
|
||||
Double stdDev = 0.0;
|
||||
Integer numWells = 0;
|
||||
Integer numSections;
|
||||
Integer[] concentrations = {1};
|
||||
Double dropOutRate = 0.0;
|
||||
boolean poisson = false;
|
||||
boolean exponential = false;
|
||||
double lambda = 1.5;
|
||||
try {
|
||||
System.out.println("\nSimulated sample plates consist of:");
|
||||
System.out.println("* a number of wells");
|
||||
System.out.println(" * separated into one or more sections");
|
||||
System.out.println(" * each of which has a set quantity of cells per well");
|
||||
System.out.println(" * selected from a statistical distribution of distinct cells");
|
||||
System.out.println(" * with a set dropout rate for individual sequences within a cell");
|
||||
System.out.println("\nMaking a sample plate requires a population of distinct cells");
|
||||
System.out.print("Please enter name of an existing cell sample file: ");
|
||||
cellFile = sc.next();
|
||||
System.out.println("\nThe sample plate will be written to a CSV file");
|
||||
System.out.print("Please enter a name for the output file: ");
|
||||
filename = sc.next();
|
||||
System.out.println("\nSelect T-cell frequency distribution function");
|
||||
System.out.println("1) Poisson");
|
||||
System.out.println("2) Gaussian");
|
||||
System.out.println("3) Exponential");
|
||||
System.out.println("(Note: approximate distribution in original paper is exponential, lambda = 0.6)");
|
||||
System.out.println("(lambda value approximated from slope of log-log graph in figure 4c)");
|
||||
System.out.println("(Note: wider distributions are more memory intensive to match)");
|
||||
System.out.print("Enter selection value: ");
|
||||
input = sc.nextInt();
|
||||
switch (input) {
|
||||
case 1 -> poisson = true;
|
||||
case 2 -> {
|
||||
System.out.println("How many distinct T-cells within one standard deviation of peak frequency?");
|
||||
System.out.println("(Note: wider distributions are more memory intensive to match)");
|
||||
stdDev = sc.nextDouble();
|
||||
if (stdDev <= 0.0) {
|
||||
throw new InputMismatchException("Value must be positive.");
|
||||
}
|
||||
}
|
||||
case 3 -> {
|
||||
exponential = true;
|
||||
System.out.println("Please enter lambda value for exponential distribution.");
|
||||
lambda = sc.nextDouble();
|
||||
if (lambda <= 0.0) {
|
||||
throw new InputMismatchException("Value must be positive.");
|
||||
}
|
||||
}
|
||||
default -> {
|
||||
System.out.println("Invalid input. Defaulting to exponential.");
|
||||
exponential = true;
|
||||
}
|
||||
}
|
||||
System.out.print("\nNumber of wells on plate: ");
|
||||
numWells = sc.nextInt();
|
||||
if(numWells < 1){
|
||||
throw new InputMismatchException("No wells on plate");
|
||||
}
|
||||
System.out.println("\nThe plate can be evenly sectioned to allow multiple concentrations of T-cells/well");
|
||||
System.out.println("How many sections would you like to make (minimum 1)?");
|
||||
numSections = sc.nextInt();
|
||||
if(numSections < 1) {
|
||||
throw new InputMismatchException("Too few sections.");
|
||||
}
|
||||
else if (numSections > numWells) {
|
||||
throw new InputMismatchException("Cannot have more sections than wells.");
|
||||
}
|
||||
int i = 1;
|
||||
concentrations = new Integer[numSections];
|
||||
while(numSections > 0) {
|
||||
System.out.print("Enter number of T-cells per well in section " + i +": ");
|
||||
concentrations[i - 1] = sc.nextInt();
|
||||
i++;
|
||||
numSections--;
|
||||
}
|
||||
System.out.println("\nErrors in amplification can induce a well dropout rate for sequences");
|
||||
System.out.print("Enter well dropout rate (0.0 to 1.0): ");
|
||||
dropOutRate = sc.nextDouble();
|
||||
if(dropOutRate < 0.0 || dropOutRate > 1.0) {
|
||||
throw new InputMismatchException("The well dropout rate must be in the range [0.0, 1.0]");
|
||||
}
|
||||
}catch(InputMismatchException ex){
|
||||
System.out.println(ex);
|
||||
sc.next();
|
||||
}
|
||||
System.out.println("Reading Cell Sample file: " + cellFile);
|
||||
assert cellFile != null;
|
||||
CellFileReader cellReader = new CellFileReader(cellFile);
|
||||
if(exponential){
|
||||
Plate samplePlate = new Plate(numWells, dropOutRate, concentrations);
|
||||
samplePlate.fillWellsExponential(cellReader.getFilename(), cellReader.getCells(), lambda);
|
||||
PlateFileWriter writer = new PlateFileWriter(filename, samplePlate);
|
||||
writer.writePlateFile();
|
||||
}
|
||||
else {
|
||||
if (poisson) {
|
||||
stdDev = Math.sqrt(cellReader.getCellCount()); //gaussian with square root of elements approximates poisson
|
||||
}
|
||||
Plate samplePlate = new Plate(numWells, dropOutRate, concentrations);
|
||||
samplePlate.fillWells(cellReader.getFilename(), cellReader.getCells(), stdDev);
|
||||
assert filename != null;
|
||||
PlateFileWriter writer = new PlateFileWriter(filename, samplePlate);
|
||||
System.out.println("Writing Sample Plate to file");
|
||||
writer.writePlateFile();
|
||||
System.out.println("Sample Plate written to file: " + filename);
|
||||
System.gc();
|
||||
}
|
||||
}
|
||||
|
||||
//Output serialized binary of GraphAndMapData object
|
||||
private static void makeCDR3Graph() {
|
||||
String filename = null;
|
||||
String cellFile = null;
|
||||
String plateFile = null;
|
||||
|
||||
try {
|
||||
String str = "\nGenerating bipartite weighted graph encoding occupancy overlap data ";
|
||||
str = str.concat("\nrequires a cell sample file and a sample plate file.");
|
||||
System.out.println(str);
|
||||
System.out.print("\nPlease enter name of an existing cell sample file: ");
|
||||
cellFile = sc.next();
|
||||
System.out.print("\nPlease enter name of an existing sample plate file: ");
|
||||
plateFile = sc.next();
|
||||
System.out.println("\nThe graph and occupancy data will be written to a serialized binary file.");
|
||||
System.out.print("Please enter a name for the output file: ");
|
||||
filename = sc.next();
|
||||
} catch (InputMismatchException ex) {
|
||||
System.out.println(ex);
|
||||
sc.next();
|
||||
}
|
||||
System.out.println("Reading Cell Sample file: " + cellFile);
|
||||
assert cellFile != null;
|
||||
CellFileReader cellReader = new CellFileReader(cellFile);
|
||||
System.out.println("Reading Sample Plate file: " + plateFile);
|
||||
assert plateFile != null;
|
||||
PlateFileReader plateReader = new PlateFileReader(plateFile);
|
||||
Plate plate = new Plate(plateReader.getFilename(), plateReader.getWells());
|
||||
if (cellReader.getCells().size() == 0){
|
||||
System.out.println("No cell sample found.");
|
||||
System.out.println("Returning to main menu.");
|
||||
}
|
||||
else if(plate.getWells().size() == 0 || plate.getConcentrations().length == 0){
|
||||
System.out.println("No sample plate found.");
|
||||
System.out.println("Returning to main menu.");
|
||||
}
|
||||
else{
|
||||
List<Integer[]> cells = cellReader.getCells();
|
||||
GraphWithMapData data = Simulator.makeGraph(cells, plate, true);
|
||||
assert filename != null;
|
||||
GraphDataObjectWriter dataWriter = new GraphDataObjectWriter(filename, data);
|
||||
System.out.println("Writing graph and occupancy data to file. This may take some time.");
|
||||
System.out.println("File I/O time is not included in results.");
|
||||
dataWriter.writeDataToFile();
|
||||
System.out.println("Graph and Data file written to: " + filename);
|
||||
System.gc();
|
||||
}
|
||||
}
|
||||
|
||||
//Simulate matching and output CSV file of results
|
||||
private static void matchCDR3s() throws IOException {
|
||||
String filename = null;
|
||||
String dataFilename = null;
|
||||
Integer lowThreshold = 0;
|
||||
Integer highThreshold = Integer.MAX_VALUE;
|
||||
Integer maxOccupancyDiff = Integer.MAX_VALUE;
|
||||
Integer minOverlapPercent = 0;
|
||||
try {
|
||||
System.out.println("\nBiGpairSEQ simulation requires an occupancy data and overlap graph file");
|
||||
System.out.println("Please enter name of an existing graph and occupancy data file: ");
|
||||
dataFilename = sc.next();
|
||||
System.out.println("The matching results will be written to a file.");
|
||||
System.out.print("Please enter a name for the output file: ");
|
||||
filename = sc.next();
|
||||
System.out.println("\nWhat is the minimum number of CDR3 alpha/beta overlap wells to attempt matching?");
|
||||
lowThreshold = sc.nextInt();
|
||||
if(lowThreshold < 1){
|
||||
throw new InputMismatchException("Minimum value for low threshold set to 1");
|
||||
}
|
||||
System.out.println("\nWhat is the maximum number of CDR3 alpha/beta overlap wells to attempt matching?");
|
||||
highThreshold = sc.nextInt();
|
||||
System.out.println("\nWhat is the maximum difference in alpha/beta occupancy to attempt matching?");
|
||||
maxOccupancyDiff = sc.nextInt();
|
||||
System.out.println("\nWell overlap percentage = pair overlap / sequence occupancy");
|
||||
System.out.println("What is the minimum well overlap percentage to attempt matching? (0 to 100)");
|
||||
minOverlapPercent = sc.nextInt();
|
||||
if (minOverlapPercent < 0 || minOverlapPercent > 100) {
|
||||
throw new InputMismatchException("Value outside range. Minimum percent set to 0");
|
||||
}
|
||||
} catch (InputMismatchException ex) {
|
||||
System.out.println(ex);
|
||||
sc.next();
|
||||
}
|
||||
//read object data from file
|
||||
System.out.println("Reading graph data from file. This may take some time");
|
||||
System.out.println("File I/O time is not included in results");
|
||||
assert dataFilename != null;
|
||||
GraphDataObjectReader dataReader = new GraphDataObjectReader(dataFilename);
|
||||
GraphWithMapData data = dataReader.getData();
|
||||
//set source file name
|
||||
data.setSourceFilename(dataFilename);
|
||||
//simulate matching
|
||||
MatchingResult results = Simulator.matchCDR3s(data, dataFilename, lowThreshold, highThreshold, maxOccupancyDiff,
|
||||
minOverlapPercent, true);
|
||||
//write results to file
|
||||
assert filename != null;
|
||||
MatchingFileWriter writer = new MatchingFileWriter(filename, results);
|
||||
System.out.println("Writing results to file");
|
||||
writer.writeResultsToFile();
|
||||
System.out.println("Results written to file: " + filename);
|
||||
System.gc();
|
||||
}
|
||||
|
||||
///////
|
||||
//Rewrite this to fit new matchCDR3 method with file I/O
|
||||
///////
|
||||
// public static void matchCellsCDR1(){
|
||||
// /*
|
||||
// The idea here is that we'll get the CDR3 alpha/beta matches first. Then we'll try to match CDR3s to CDR1s by
|
||||
// looking at the top two matches for each CDR3. If CDR3s in the same cell simply swap CDR1s, we assume a correct
|
||||
// match
|
||||
// */
|
||||
// String filename = null;
|
||||
// String preliminaryResultsFilename = null;
|
||||
// String cellFile = null;
|
||||
// String plateFile = null;
|
||||
// Integer lowThresholdCDR3 = 0;
|
||||
// Integer highThresholdCDR3 = Integer.MAX_VALUE;
|
||||
// Integer maxOccupancyDiffCDR3 = 96; //no filtering if max difference is all wells by default
|
||||
// Integer minOverlapPercentCDR3 = 0; //no filtering if min percentage is zero by default
|
||||
// Integer lowThresholdCDR1 = 0;
|
||||
// Integer highThresholdCDR1 = Integer.MAX_VALUE;
|
||||
// boolean outputCDR3Matches = false;
|
||||
// try {
|
||||
// System.out.println("\nSimulated experiment requires a cell sample file and a sample plate file.");
|
||||
// System.out.print("Please enter name of an existing cell sample file: ");
|
||||
// cellFile = sc.next();
|
||||
// System.out.print("Please enter name of an existing sample plate file: ");
|
||||
// plateFile = sc.next();
|
||||
// System.out.println("The matching results will be written to a file.");
|
||||
// System.out.print("Please enter a name for the output file: ");
|
||||
// filename = sc.next();
|
||||
// System.out.println("What is the minimum number of CDR3 alpha/beta overlap wells to attempt matching?");
|
||||
// lowThresholdCDR3 = sc.nextInt();
|
||||
// if(lowThresholdCDR3 < 1){
|
||||
// throw new InputMismatchException("Minimum value for low threshold is 1");
|
||||
// }
|
||||
// System.out.println("What is the maximum number of CDR3 alpha/beta overlap wells to attempt matching?");
|
||||
// highThresholdCDR3 = sc.nextInt();
|
||||
// System.out.println("What is the maximum difference in CDR3 alpha/beta occupancy to attempt matching?");
|
||||
// maxOccupancyDiffCDR3 = sc.nextInt();
|
||||
// System.out.println("What is the minimum CDR3 overlap percentage to attempt matching? (0 - 100)");
|
||||
// minOverlapPercentCDR3 = sc.nextInt();
|
||||
// if (minOverlapPercentCDR3 < 0 || minOverlapPercentCDR3 > 100) {
|
||||
// throw new InputMismatchException("Value outside range. Minimum percent set to 0");
|
||||
// }
|
||||
// System.out.println("What is the minimum number of CDR3/CDR1 overlap wells to attempt matching?");
|
||||
// lowThresholdCDR1 = sc.nextInt();
|
||||
// if(lowThresholdCDR1 < 1){
|
||||
// throw new InputMismatchException("Minimum value for low threshold is 1");
|
||||
// }
|
||||
// System.out.println("What is the maximum number of CDR3/CDR1 overlap wells to attempt matching?");
|
||||
// highThresholdCDR1 = sc.nextInt();
|
||||
// System.out.println("Matching CDR3s to CDR1s requires first matching CDR3 alpha/betas.");
|
||||
// System.out.println("Output a file for CDR3 alpha/beta match results as well?");
|
||||
// System.out.print("Please enter y/n: ");
|
||||
// String ans = sc.next();
|
||||
// Pattern pattern = Pattern.compile("(?:yes|y)", Pattern.CASE_INSENSITIVE);
|
||||
// Matcher matcher = pattern.matcher(ans);
|
||||
// if(matcher.matches()){
|
||||
// outputCDR3Matches = true;
|
||||
// System.out.println("Please enter filename for CDR3 alpha/beta match results");
|
||||
// preliminaryResultsFilename = sc.next();
|
||||
// System.out.println("CDR3 alpha/beta matches will be output to file");
|
||||
// }
|
||||
// else{
|
||||
// System.out.println("CDR3 alpha/beta matches will not be output to file");
|
||||
// }
|
||||
// } catch (InputMismatchException ex) {
|
||||
// System.out.println(ex);
|
||||
// sc.next();
|
||||
// }
|
||||
// CellFileReader cellReader = new CellFileReader(cellFile);
|
||||
// PlateFileReader plateReader = new PlateFileReader(plateFile);
|
||||
// Plate plate = new Plate(plateReader.getFilename(), plateReader.getWells());
|
||||
// if (cellReader.getCells().size() == 0){
|
||||
// System.out.println("No cell sample found.");
|
||||
// System.out.println("Returning to main menu.");
|
||||
// }
|
||||
// else if(plate.getWells().size() == 0){
|
||||
// System.out.println("No sample plate found.");
|
||||
// System.out.println("Returning to main menu.");
|
||||
//
|
||||
// }
|
||||
// else{
|
||||
// if(highThresholdCDR3 >= plate.getSize()){
|
||||
// highThresholdCDR3 = plate.getSize() - 1;
|
||||
// }
|
||||
// if(highThresholdCDR1 >= plate.getSize()){
|
||||
// highThresholdCDR1 = plate.getSize() - 1;
|
||||
// }
|
||||
// List<Integer[]> cells = cellReader.getCells();
|
||||
// MatchingResult preliminaryResults = Simulator.matchCDR3s(cells, plate, lowThresholdCDR3, highThresholdCDR3,
|
||||
// maxOccupancyDiffCDR3, minOverlapPercentCDR3, true);
|
||||
// MatchingResult[] results = Simulator.matchCDR1s(cells, plate, lowThresholdCDR1,
|
||||
// highThresholdCDR1, preliminaryResults);
|
||||
// MatchingFileWriter writer = new MatchingFileWriter(filename + "_FirstPass", results[0]);
|
||||
// writer.writeResultsToFile();
|
||||
// writer = new MatchingFileWriter(filename + "_SecondPass", results[1]);
|
||||
// writer.writeResultsToFile();
|
||||
// if(outputCDR3Matches){
|
||||
// writer = new MatchingFileWriter(preliminaryResultsFilename, preliminaryResults);
|
||||
// writer.writeResultsToFile();
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
private static void acknowledge(){
|
||||
System.out.println("This program simulates BiGpairSEQ, a graph theory based adaptation");
|
||||
System.out.println("of the pairSEQ algorithm for pairing T cell receptor sequences.");
|
||||
System.out.println();
|
||||
System.out.println("For full documentation, view readme.md file distributed with this code");
|
||||
System.out.println("or visit https://gitea.ejsf.synology.me/efischer/BiGpairSEQ.");
|
||||
System.out.println();
|
||||
System.out.println("pairSEQ citation:");
|
||||
System.out.println("Howie, B., Sherwood, A. M., et. al.");
|
||||
System.out.println("High-throughput pairing of T cell receptor alpha and beta sequences.");
|
||||
System.out.println("Sci. Transl. Med. 7, 301ra131 (2015)");
|
||||
System.out.println();
|
||||
System.out.println("BiGpairSEQ_Sim by Eugene Fischer, 2021-2022");
|
||||
}
|
||||
}
|
||||
@@ -1,14 +1,20 @@
|
||||
|
||||
|
||||
public class Vertex {
|
||||
private final Integer peptide;
|
||||
private final Integer vertexLabel;
|
||||
private final Integer sequence;
|
||||
private final Integer occupancy;
|
||||
|
||||
public Vertex(Integer peptide, Integer occupancy) {
|
||||
this.peptide = peptide;
|
||||
public Vertex(Integer vertexLabel, Integer sequence, Integer occupancy) {
|
||||
this.vertexLabel = vertexLabel;
|
||||
this.sequence = sequence;
|
||||
this.occupancy = occupancy;
|
||||
}
|
||||
|
||||
public Integer getPeptide() {
|
||||
return peptide;
|
||||
public Integer getVertexLabel() { return vertexLabel; }
|
||||
|
||||
public Integer getSequence() {
|
||||
return sequence;
|
||||
}
|
||||
|
||||
public Integer getOccupancy() {
|
||||
|
||||
Reference in New Issue
Block a user