Compare commits
57 Commits
bf32a55e4b
...
v1.0
| Author | SHA1 | Date | |
|---|---|---|---|
| 4b053e6ec4 | |||
| 44784b7976 | |||
| 7c19896dc9 | |||
| aec7e3016f | |||
| 5c75c1ac09 | |||
| cb1f7adece | |||
| 370de79546 | |||
| a803336f56 | |||
| 94b54b3416 | |||
| 601e141fd0 | |||
| 8f9c6b7d33 | |||
| e5ddc73723 | |||
| 9b18fac74f | |||
| 63ef6aa7a0 | |||
| 7558455f39 | |||
| 410f0ae547 | |||
| 1bc6a11545 | |||
| 2b13e10e95 | |||
| 4fd5baeb98 | |||
| b2a4e9a42b | |||
| d1bb49b482 | |||
| 9adb7dffb8 | |||
| 2023bb9d7e | |||
| 405fbf17ff | |||
| 24519f4a52 | |||
| 2afd01eeef | |||
| 10d0b711bf | |||
| 8f98baf44e | |||
| d6c7c40c96 | |||
| 61c14b2ecf | |||
| 22fc4aedfe | |||
| 5d24dc6f70 | |||
| 2c01a0211c | |||
| f2b5d9e1b7 | |||
| 74c8cafd81 | |||
| d1c37b5ccd | |||
| cb2c5a6024 | |||
| 284a5b3a40 | |||
| 52afb1edc2 | |||
| 9c52bc878a | |||
| 248fe4d662 | |||
| 5d0e60708c | |||
| c96b7237e9 | |||
| 0b28259800 | |||
| 837ef7bfe4 | |||
| 0bebbc7602 | |||
| 84f7ddb696 | |||
| c4633da9eb | |||
| 5b2ed165d0 | |||
| 0026d8cdfe | |||
| 13fb7168bf | |||
| 568a6be3c7 | |||
| cfa473c7ce | |||
| 6faacd9a82 | |||
| ce88e170c1 | |||
| 47e23addfa | |||
| b9ee31b64c |
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
@@ -0,0 +1 @@
|
||||
/out/
|
||||
@@ -1,11 +1,11 @@
|
||||
<component name="ArtifactManager">
|
||||
<artifact type="jar" name="TCellSim:jar">
|
||||
<output-path>$PROJECT_DIR$/out/artifacts/TCellSim_jar</output-path>
|
||||
<root id="archive" name="TCellSim.jar">
|
||||
<artifact type="jar" build-on-make="true" name="BiGpairSEQ_Sim:jar">
|
||||
<output-path>$PROJECT_DIR$/out/artifacts/BiGpairSEQ_Sim_jar</output-path>
|
||||
<root id="archive" name="BiGpairSEQ_Sim.jar">
|
||||
<element id="directory" name="META-INF">
|
||||
<element id="file-copy" path="$PROJECT_DIR$/src/main/java/META-INF/MANIFEST.MF" />
|
||||
</element>
|
||||
<element id="module-output" name="TCellSim" />
|
||||
<element id="module-output" name="BigPairSEQ" />
|
||||
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/jgrapht/jgrapht-core/1.5.1/jgrapht-core-1.5.1.jar" path-in-jar="/" />
|
||||
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/jheaps/jheaps/0.13/jheaps-0.13.jar" path-in-jar="/" />
|
||||
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/commons-cli/commons-cli/1.5.0/commons-cli-1.5.0.jar" path-in-jar="/" />
|
||||
2
.idea/compiler.xml
generated
2
.idea/compiler.xml
generated
@@ -6,7 +6,7 @@
|
||||
<sourceOutputDir name="target/generated-sources/annotations" />
|
||||
<sourceTestOutputDir name="target/generated-test-sources/test-annotations" />
|
||||
<outputRelativeToContentRoot value="true" />
|
||||
<module name="TCellSim" />
|
||||
<module name="BigPairSEQ" />
|
||||
</profile>
|
||||
</annotationProcessing>
|
||||
</component>
|
||||
|
||||
15
.idea/libraries/jgrapht_io.xml
generated
Normal file
15
.idea/libraries/jgrapht_io.xml
generated
Normal file
@@ -0,0 +1,15 @@
|
||||
<component name="libraryTable">
|
||||
<library name="jgrapht.io" type="repository">
|
||||
<properties maven-id="org.jgrapht:jgrapht-io:1.5.1" />
|
||||
<CLASSES>
|
||||
<root url="jar://$MAVEN_REPOSITORY$/org/jgrapht/jgrapht-io/1.5.1/jgrapht-io-1.5.1.jar!/" />
|
||||
<root url="jar://$MAVEN_REPOSITORY$/org/jgrapht/jgrapht-core/1.5.1/jgrapht-core-1.5.1.jar!/" />
|
||||
<root url="jar://$MAVEN_REPOSITORY$/org/jheaps/jheaps/0.13/jheaps-0.13.jar!/" />
|
||||
<root url="jar://$MAVEN_REPOSITORY$/org/antlr/antlr4-runtime/4.8-1/antlr4-runtime-4.8-1.jar!/" />
|
||||
<root url="jar://$MAVEN_REPOSITORY$/org/apache/commons/commons-text/1.8/commons-text-1.8.jar!/" />
|
||||
<root url="jar://$MAVEN_REPOSITORY$/org/apache/commons/commons-lang3/3.9/commons-lang3-3.9.jar!/" />
|
||||
</CLASSES>
|
||||
<JAVADOC />
|
||||
<SOURCES />
|
||||
</library>
|
||||
</component>
|
||||
Binary file not shown.
@@ -1,4 +0,0 @@
|
||||
Executable .jar file with interactive, command line UI
|
||||
Usage: java -jar TCellSim.jar
|
||||
|
||||
Requires Java11 or higher (Openjdk-17 recommended)
|
||||
278
readme.md
Normal file
278
readme.md
Normal file
@@ -0,0 +1,278 @@
|
||||
# BiGpairSEQ SIMULATOR
|
||||
|
||||
|
||||
## ABOUT
|
||||
|
||||
This program simulates BiGpairSEQ (Bipartite Graph pairSEQ), a graph theory-based adaptation
|
||||
of the pairSEQ algorithm (Howie, et al. 2015) for pairing T cell receptor sequences.
|
||||
|
||||
## THEORY
|
||||
|
||||
Unlike pairSEQ, which calculates p-values for every TCR alpha/beta overlap and compares
|
||||
against a null distribution, BiGpairSEQ does not do any statistical calculations
|
||||
directly.
|
||||
|
||||
BiGpairSEQ creates a [simple bipartite weighted graph](https://en.wikipedia.org/wiki/Bipartite_graph) representing the sample plate.
|
||||
The distinct TCRA and TCRB sequences form the two sets of vertices. Every TCRA/TCRB pair that share a well
|
||||
are connected by an edge, with the edge weight set to the number of wells in which both sequences appear.
|
||||
(Sequences present in *all* wells are filtered out prior to creating the graph, as there is no signal in their occupancy pattern.)
|
||||
The problem of pairing TCRA/TCRB sequences thus reduces to the "assignment problem" of finding a maximum weight
|
||||
matching on a bipartite graph--the subset of vertex-disjoint edges whose weights sum to the maximum possible value.
|
||||
|
||||
This is a well-studied combinatorial optimization problem, with many known solutions.
|
||||
The most efficient algorithm known to the author for maximum weight matching of a bipartite graph with strictly integral weights
|
||||
is from Duan and Su (2012). For a graph with m edges, n vertices per side, and maximum integer edge weight N,
|
||||
their algorithm runs in **O(m sqrt(n) log(N))** time. As the graph representation of a pairSEQ experiment is
|
||||
bipartite with integer weights, this algorithm is ideal for BiGpairSEQ.
|
||||
|
||||
Unfortunately, it's a fairly new algorithm, and not yet implemented by the graph theory library used in this simulator.
|
||||
So this program instead uses the Fibonacci heap-based algorithm of Fredman and Tarjan (1987), which has a worst-case
|
||||
runtime of **O(n (n log(n) + m))**. The algorithm is implemented as described in Melhorn and Näher (1999).
|
||||
|
||||
The current version of the program uses a pairing heap instead of a Fibonacci heap for its priority queue,
|
||||
which has lower theoretical efficiency but also lower complexity overhead, and is often equivalently performant
|
||||
in practice.
|
||||
|
||||
## USAGE
|
||||
|
||||
### RUNNING THE PROGRAM
|
||||
|
||||
BiGpairSEQ_Sim is an executable .jar file. Requires Java 11 or higher. [OpenJDK 17](https://jdk.java.net/17/)
|
||||
recommended.
|
||||
|
||||
Run with the command:
|
||||
|
||||
`java -jar BiGpairSEQ_Sim.jar`
|
||||
|
||||
Processing sample plates with tens of thousands of sequences may require large amounts
|
||||
of RAM. It is often desirable to increase the JVM maximum heap allocation with the -Xmx flag.
|
||||
For example, to run the program with 32 gigabytes of memory, use the command:
|
||||
|
||||
`java -Xmx32G -jar BiGpairSEQ_Sim.jar`
|
||||
|
||||
Once running, BiGpairSEQ_Sim has an interactive, menu-driven CLI for generating files and simulating TCR pairing. The
|
||||
main menu looks like this:
|
||||
|
||||
```
|
||||
--------BiGPairSEQ SIMULATOR--------
|
||||
ALPHA/BETA T CELL RECEPTOR MATCHING
|
||||
USING WEIGHTED BIPARTITE GRAPHS
|
||||
------------------------------------
|
||||
Please select an option:
|
||||
1) Generate a population of distinct cells
|
||||
2) Generate a sample plate of T cells
|
||||
3) Generate CDR3 alpha/beta occupancy data and overlap graph
|
||||
4) Simulate bipartite graph CDR3 alpha/beta matching (BiGpairSEQ)
|
||||
9) About/Acknowledgments
|
||||
0) Exit
|
||||
```
|
||||
|
||||
### OUTPUT
|
||||
|
||||
To run the simulation, the program reads and writes 4 kinds of files:
|
||||
* Cell Sample files in CSV format
|
||||
* Sample Plate files in CSV format
|
||||
* Graph and Data files in binary object serialization format
|
||||
* Matching Results files in CSV format
|
||||
|
||||
When entering filenames, it is not necessary to include the file extension (.csv or .ser). When reading or
|
||||
writing files, the program will automatically add the correct extension to any filename without one.
|
||||
|
||||
#### Cell Sample Files
|
||||
Cell Sample files consist of any number of distinct "T cells." Every cell contains
|
||||
four sequences: Alpha CDR3, Beta CDR3, Alpha CDR1, Beta CDR1. The sequences are represented by
|
||||
random integers. CDR3 Alpha and Beta sequences are all unique within a given Cell Sample file. CDR1 Alpha and Beta sequences
|
||||
are not necessarily unique; the relative diversity can be set when making the file.
|
||||
|
||||
(Note: though cells still have CDR1 sequences, matching of CDR1s is currently awaiting re-implementation.)
|
||||
|
||||
Options when making a Cell Sample file:
|
||||
* Number of T cells to generate
|
||||
* Factor by which CDR3s are more diverse than CDR1s
|
||||
|
||||
Files are in CSV format. Rows are distinct T cells, columns are sequences within the cells.
|
||||
Comments are preceded by `#`
|
||||
|
||||
Structure:
|
||||
|
||||
---
|
||||
# Sample contains 1 unique CDR1 for every 4 unique CDR3s.
|
||||
| Alpha CDR3 | Beta CDR3 | Alpha CDR1 | Beta CDR1 |
|
||||
|---|---|---|---|
|
||||
|unique number|unique number|number|number|
|
||||
|
||||
---
|
||||
|
||||
#### Sample Plate Files
|
||||
Sample Plate files consist of any number of "wells" containing any number of T cells (as
|
||||
described above). The wells are filled randomly from a Cell Sample file, according to a selected
|
||||
frequency distribution. Additionally, every individual sequence within each cell may, with some
|
||||
given dropout probability, be omitted from the file; this simulates the effect of amplification errors
|
||||
prior to sequencing. Plates can also be partitioned into any number of sections, each of which can have a
|
||||
different concentration of T cells per well.
|
||||
|
||||
Options when making a Sample Plate file:
|
||||
* Cell Sample file to use
|
||||
* Statistical distribution to apply to Cell Sample file
|
||||
* Poisson
|
||||
* Gaussian
|
||||
* Standard deviation size
|
||||
* Exponential
|
||||
* Lambda value
|
||||
* (Based on the slope of the graph in Figure 4C of the pairSEQ paper, the distribution of the original experiment was exponential with a lambda of approximately 0.6. (Howie, et al. 2015))
|
||||
* Total number of wells on the plate
|
||||
* Number of sections on plate
|
||||
* Number of T cells per well
|
||||
* per section, if more than one section
|
||||
* Dropout rate
|
||||
|
||||
Files are in CSV format. There are no header labels. Every row represents a well.
|
||||
Every column represents an individual cell, containing four sequences, depicted as an array string:
|
||||
`[CDR3A, CDR3B, CDR1A, CDR1B]`. So a representative cell might look like this:
|
||||
|
||||
`[525902, 791533, -1, 866282]`
|
||||
|
||||
Notice that the CDR1 Alpha is missing in the cell above--sequence dropout from simulated amplification error.
|
||||
Dropout sequences are replaced with the value `-1`. Comments are preceded by `#`
|
||||
|
||||
Structure:
|
||||
|
||||
---
|
||||
```
|
||||
# Cell source file name:
|
||||
# Each row represents one well on the plate
|
||||
# Plate size:
|
||||
# Concentrations:
|
||||
# Lambda -or- StdDev:
|
||||
```
|
||||
| Well 1, cell 1 | Well 1, cell 2 | Well 1, cell 3| ... |
|
||||
|---|---|---|---|
|
||||
| **Well 2, cell 1** | **Well 2, cell 2** | **Well 2, cell 3**| **...** |
|
||||
| **Well 3, cell 1** | **Well 3, cell 2** | **Well 3, cell 3**| **...** |
|
||||
| **...** | **...** | **...** | **...** |
|
||||
|
||||
---
|
||||
|
||||
#### Graph and Data Files
|
||||
Graph and Data files are serialized binaries of a Java object containing the weigthed bipartite graph representation of a
|
||||
Sample Plate, along with the necessary metadata for matching and results output. Making them requires a Cell Sample file
|
||||
(to construct a list of correct sequence pairs for checking the accuracy of BiGpairSEQ simulations) and a
|
||||
Sample Plate file (to construct the associated occupancy graph). These files can be several gigabytes in size.
|
||||
Writing them to a file lets us generate a graph and its metadata once, then use it for multiple different BiGpairSEQ simulations.
|
||||
|
||||
Options for creating a Graph and Data file:
|
||||
* The Cell Sample file to use
|
||||
* The Sample Plate file to use. (This must have been generated from the selected Cell Sample file.)
|
||||
|
||||
These files do not have a human-readable structure, and are not portable to other programs. (Export of graphs in a
|
||||
portable data format may be implemented in the future. The tricky part is encoding the necessary metadata.)
|
||||
|
||||
---
|
||||
|
||||
#### Matching Results Files
|
||||
Matching results files consist of the results of a BiGpairSEQ matching simulation.
|
||||
Files are in CSV format. Rows are sequence pairings with extra relevant data. Columns are pairing-specific details.
|
||||
Metadata about the matching simulation is included as comments. Comments are preceded by `#`.
|
||||
|
||||
Options when running a BiGpairSEQ simulation of CDR3 alpha/beta matching:
|
||||
* The minimum number of alpha/beta overlap wells to attempt to match
|
||||
* (must be >= 1)
|
||||
* The maximum number of alpha/beta overlap wells to attempt to match
|
||||
* (must be <= the number of wells on the plate - 1)
|
||||
* The maximum difference in alpha/beta occupancy to attempt to match
|
||||
* (Optional. To skip using this filter, enter a value >= the number of wells on the plate)
|
||||
* The minimum overlap percentage--the percentage of a sequence's occupied wells shared by another sequence--to attempt to match. Given as value in range 0 - 100.
|
||||
* (Optional. To skip using this filter, enter 0)
|
||||
|
||||
Example output:
|
||||
|
||||
---
|
||||
```
|
||||
# Source Sample Plate file: 4MilCellsPlate.csv
|
||||
# Source Graph and Data file: 4MilCellsPlateGraph.ser
|
||||
# T cell counts in sample plate wells: 30000
|
||||
# Total alphas found: 11813
|
||||
# Total betas found: 11808
|
||||
# High overlap threshold: 94
|
||||
# Low overlap threshold: 3
|
||||
# Minimum overlap percent: 0
|
||||
# Maximum occupancy difference: 96
|
||||
# Pairing attempt rate: 0.438
|
||||
# Correct pairings: 5151
|
||||
# Incorrect pairings: 18
|
||||
# Pairing error rate: 0.00348
|
||||
# Simulation time: 862 seconds
|
||||
```
|
||||
|
||||
| Alpha | Alpha well count | Beta | Beta well count | Overlap count | Matched Correctly? | P-value |
|
||||
|---|---|---|---|---|---|---|
|
||||
|5242972|17|1571520|18|17|true|1.41E-18|
|
||||
|5161027|18|2072219|18|18|true|7.31E-20|
|
||||
|4145198|33|1064455|30|29|true|2.65E-21|
|
||||
|7700582|18|112748|18|18|true|7.31E-20|
|
||||
|...|...|...|...|...|...|...|
|
||||
|
||||
---
|
||||
|
||||
**NOTE: The p-values in the output are not used for matching**—they aren't part of the BiGpairSEQ algorithm at all.
|
||||
P-values are calculated *after* BiGpairSEQ matching is completed, for purposes of comparison only,
|
||||
using the (2021 corrected) formula from the original pairSEQ paper. (Howie, et al. 2015)
|
||||
|
||||
### PERFORMANCE
|
||||
Performance details of the example excerpted above:
|
||||
|
||||
On a home computer with a Ryzen 5600X CPU, 64GB of 3200MHz DDR4 RAM (half of which was allocated to the Java Virtual Machine), and a PCIe 3.0 SSD, running Linux Mint 20.3 Edge (5.13 kernel),
|
||||
the author ran a BiGpairSEQ simulation of a 96-well sample plate with 30,000 T cells/well comprising ~11,800 alphas and betas,
|
||||
taken from a sample of 4,000,000 distinct cells with an exponential frequency distribution.
|
||||
|
||||
With min/max occupancy threshold of 3 and 94 wells for matching, and no other pre-filtering, BiGpairSEQ identified 5,151
|
||||
correct pairings and 18 incorrect pairings, for an accuracy of 99.652%.
|
||||
|
||||
The simulation time was 14'22". If intermediate results were held in memory, this would be equivalent to the total elapsed time.
|
||||
|
||||
Since this implementation of BiGpairSEQ writes intermediate results to disk (to improve the efficiency of *repeated* simulations
|
||||
with different filtering options), the actual elapsed time was greater. File I/O time was not measured, but took
|
||||
slightly less time than the simulation itself. Real elapsed time from start to finish was under 30 minutes.
|
||||
|
||||
## TODO
|
||||
|
||||
* ~~Try invoking GC at end of workloads to reduce paging to disk~~ DONE
|
||||
* ~~Hold graph data in memory until another graph is read-in?~~ ABANDONED
|
||||
* *No, this won't work, because BiGpairSEQ simulations alter the underlying graph based on filtering constraints. Changes would cascade with multiple experiments.*
|
||||
* See if there's a reasonable way to reformat Sample Plate files so that wells are columns instead of rows.
|
||||
* ~~Problem is variable number of cells in a well~~
|
||||
* ~~Apache Commons CSV library writes entries a row at a time~~
|
||||
* _Got this working, but at the cost of a profoundly strange bug in graph occupancy filtering. Have reverted the repo until I can figure out what caused that. Given how easily Thingiverse transposes CSV matrices in R, might not even be worth fixing._
|
||||
* Re-implement command line arguments, to enable scripting and statistical simulation studies
|
||||
* Implement sample plates with random numbers of T cells per well.
|
||||
* Possible BiGpairSEQ advantage over pairSEQ: BiGpairSEQ is resilient to variations in well population sizes on a sample plate; pairSEQ is not.
|
||||
* preliminary data suggests that BiGpairSEQ behaves roughly as though the whole plate had whatever the *average* well concentration is, but that's still speculative.
|
||||
* Enable GraphML output in addition to serialized object binaries, for data portability
|
||||
* Custom vertex type with attribute for sequence occupancy?
|
||||
* Re-implement CDR1 matching method
|
||||
* Implement Duan and Su's maximum weight matching algorithm
|
||||
* Add controllable algorithm-type parameter?
|
||||
* Test whether pairing heap (currently used) or Fibonacci heap is more efficient for priority queue in current matching algorithm
|
||||
* in theory Fibonacci heap should be more efficient, but complexity overhead may eliminate theoretical advantage
|
||||
* Add controllable heap-type parameter?
|
||||
|
||||
|
||||
|
||||
## CITATIONS
|
||||
* Howie, B., Sherwood, A. M., et al. ["High-throughput pairing of T cell receptor alpha and beta sequences."](https://pubmed.ncbi.nlm.nih.gov/26290413/) Sci. Transl. Med. 7, 301ra131 (2015)
|
||||
* Duan, R., Su H. ["A Scaling Algorithm for Maximum Weight Matching in Bipartite Graphs."](https://web.eecs.umich.edu/~pettie/matching/Duan-Su-scaling-bipartite-matching.pdf) Proceedings of the Twenty-Third Annual ACM-SIAM Symposium on Discrete Algorithms, p. 1413-1424. (2012)
|
||||
* Melhorn, K., Näher, St. [The LEDA Platform of Combinatorial and Geometric Computing.](https://people.mpi-inf.mpg.de/~mehlhorn/LEDAbook.html) Cambridge University Press. Chapter 7, Graph Algorithms; p. 132-162 (1999)
|
||||
* Fredman, M., Tarjan, R. ["Fibonacci heaps and their uses in improved network optimization algorithms."](https://www.cl.cam.ac.uk/teaching/1011/AlgorithII/1987-FredmanTar-fibonacci.pdf) J. ACM, 34(3):596–615 (1987))
|
||||
|
||||
## EXTERNAL LIBRARIES USED
|
||||
* [JGraphT](https://jgrapht.org) -- Graph theory data structures and algorithms
|
||||
* [JHeaps](https://www.jheaps.org) -- For pairing heap priority queue used in maximum weight matching algorithm
|
||||
* [Apache Commons CSV](https://commons.apache.org/proper/commons-csv/) -- For CSV file output
|
||||
* [Apache Commons CLI](https://commons.apache.org/proper/commons-cli/) -- To enable command line arguments for scripting. (**Awaiting re-implementation**.)
|
||||
|
||||
## ACKNOWLEDGEMENTS
|
||||
BiGpairSEQ was conceived in collaboration with Dr. Alice MacQueen, who brought the original
|
||||
pairSEQ paper to the author's attention and explained all the biology terms he didn't know.
|
||||
|
||||
## AUTHOR
|
||||
BiGpairSEQ algorithm and simulation by Eugene Fischer, 2021. UI improvements and documentation, 2022.
|
||||
@@ -8,6 +8,9 @@ public abstract class Equations {
|
||||
return (int) ((Math.random() * (max - min)) + min);
|
||||
}
|
||||
|
||||
//pValue calculation as described in original pairSEQ paper.
|
||||
//Included for comparison with original results.
|
||||
//Not used by BiGpairSEQ for matching.
|
||||
public static double pValue(Integer w, Integer w_a, Integer w_b, double w_ab_d) {
|
||||
int w_ab = (int) w_ab_d;
|
||||
double pv = 0.0;
|
||||
@@ -18,6 +21,9 @@ public abstract class Equations {
|
||||
return pv;
|
||||
}
|
||||
|
||||
//Implementation of the (corrected) probability equation from pairSEQ paper.
|
||||
//Included for comparison with original results.
|
||||
//Not used by BiGpairSEQ for matching.
|
||||
private static double probPairedByChance(Integer w, Integer w_a, Integer w_b, Integer w_ab){
|
||||
BigInteger numer1 = choose(w, w_ab);
|
||||
BigInteger numer2 = choose(w - w_ab, w_a - w_ab);
|
||||
@@ -30,10 +36,9 @@ public abstract class Equations {
|
||||
return prob.doubleValue();
|
||||
}
|
||||
|
||||
/*
|
||||
* This works because nC(k+1) = nCk * (n-k)/(k+1)
|
||||
* Since nC0 = 1, can start there and generate all the rest.
|
||||
*/
|
||||
|
||||
//This works because nC(k+1) = nCk * (n-k)/(k+1)
|
||||
//Since nC0 = 1, can start there and generate all the rest.
|
||||
public static BigInteger choose(final int N, final int K) {
|
||||
BigInteger nCk = BigInteger.ONE;
|
||||
for (int k = 0; k < K; k++) {
|
||||
|
||||
29
src/main/java/GraphDataObjectReader.java
Normal file
29
src/main/java/GraphDataObjectReader.java
Normal file
@@ -0,0 +1,29 @@
|
||||
import java.io.*;
|
||||
|
||||
public class GraphDataObjectReader {
|
||||
private GraphWithMapData data;
|
||||
private String filename;
|
||||
|
||||
public GraphDataObjectReader(String filename) throws IOException {
|
||||
if(!filename.matches(".*\\.ser")){
|
||||
filename = filename + ".ser";
|
||||
}
|
||||
this.filename = filename;
|
||||
try(//don't need to close these because of try-with-resources
|
||||
BufferedInputStream fileIn = new BufferedInputStream(new FileInputStream(filename));
|
||||
ObjectInputStream in = new ObjectInputStream(fileIn))
|
||||
{
|
||||
data = (GraphWithMapData) in.readObject();
|
||||
} catch (FileNotFoundException | ClassNotFoundException ex) {
|
||||
ex.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
public GraphWithMapData getData() {
|
||||
return data;
|
||||
}
|
||||
|
||||
public String getFilename() {
|
||||
return filename;
|
||||
}
|
||||
}
|
||||
28
src/main/java/GraphDataObjectWriter.java
Normal file
28
src/main/java/GraphDataObjectWriter.java
Normal file
@@ -0,0 +1,28 @@
|
||||
import java.io.BufferedOutputStream;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.ObjectOutputStream;
|
||||
|
||||
public class GraphDataObjectWriter {
|
||||
|
||||
private GraphWithMapData data;
|
||||
private String filename;
|
||||
|
||||
public GraphDataObjectWriter(String filename, GraphWithMapData data) {
|
||||
if(!filename.matches(".*\\.ser")){
|
||||
filename = filename + ".ser";
|
||||
}
|
||||
this.filename = filename;
|
||||
this.data = data;
|
||||
}
|
||||
|
||||
public void writeDataToFile() {
|
||||
try (BufferedOutputStream bufferedOut = new BufferedOutputStream(new FileOutputStream(filename));
|
||||
ObjectOutputStream out = new ObjectOutputStream(bufferedOut);
|
||||
){
|
||||
out.writeObject(data);
|
||||
} catch (IOException ex) {
|
||||
ex.printStackTrace();
|
||||
}
|
||||
}
|
||||
}
|
||||
35
src/main/java/GraphMLFileReader.java
Normal file
35
src/main/java/GraphMLFileReader.java
Normal file
@@ -0,0 +1,35 @@
|
||||
import org.jgrapht.graph.SimpleWeightedGraph;
|
||||
import org.jgrapht.nio.graphml.GraphMLImporter;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
|
||||
public class GraphMLFileReader {
|
||||
|
||||
private String filename;
|
||||
private SimpleWeightedGraph graph;
|
||||
|
||||
public GraphMLFileReader(String filename, SimpleWeightedGraph graph) {
|
||||
if(!filename.matches(".*\\.graphml")){
|
||||
filename = filename + ".graphml";
|
||||
}
|
||||
this.filename = filename;
|
||||
this.graph = graph;
|
||||
|
||||
try(//don't need to close reader bc of try-with-resources auto-closing
|
||||
BufferedReader reader = Files.newBufferedReader(Path.of(filename));
|
||||
){
|
||||
GraphMLImporter<SimpleWeightedGraph, BufferedReader> importer = new GraphMLImporter<>();
|
||||
importer.importGraph(graph, reader);
|
||||
}
|
||||
catch (IOException ex) {
|
||||
System.out.println("Graph file " + filename + " not found.");
|
||||
System.err.println(ex);
|
||||
}
|
||||
}
|
||||
|
||||
public SimpleWeightedGraph getGraph() { return graph; }
|
||||
|
||||
}
|
||||
35
src/main/java/GraphMLFileWriter.java
Normal file
35
src/main/java/GraphMLFileWriter.java
Normal file
@@ -0,0 +1,35 @@
|
||||
import org.jgrapht.graph.SimpleWeightedGraph;
|
||||
import org.jgrapht.nio.dot.DOTExporter;
|
||||
import org.jgrapht.nio.graphml.GraphMLExporter;
|
||||
|
||||
import java.io.BufferedWriter;
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.StandardOpenOption;
|
||||
|
||||
public class GraphMLFileWriter {
|
||||
|
||||
String filename;
|
||||
SimpleWeightedGraph graph;
|
||||
|
||||
|
||||
public GraphMLFileWriter(String filename, SimpleWeightedGraph graph) {
|
||||
if(!filename.matches(".*\\.graphml")){
|
||||
filename = filename + ".graphml";
|
||||
}
|
||||
this.filename = filename;
|
||||
this.graph = graph;
|
||||
}
|
||||
|
||||
public void writeGraphToFile() {
|
||||
try(BufferedWriter writer = Files.newBufferedWriter(Path.of(filename), StandardOpenOption.CREATE_NEW);
|
||||
){
|
||||
GraphMLExporter<SimpleWeightedGraph, BufferedWriter> exporter = new GraphMLExporter<>();
|
||||
exporter.exportGraph(graph, writer);
|
||||
} catch(IOException ex){
|
||||
System.out.println("Could not make new file named "+filename);
|
||||
System.err.println(ex);
|
||||
}
|
||||
}
|
||||
}
|
||||
106
src/main/java/GraphWithMapData.java
Normal file
106
src/main/java/GraphWithMapData.java
Normal file
@@ -0,0 +1,106 @@
|
||||
import org.jgrapht.graph.SimpleWeightedGraph;
|
||||
|
||||
import java.time.Duration;
|
||||
import java.util.Map;
|
||||
|
||||
//Can't just write the graph, because I need the occupancy data too.
|
||||
//Makes most sense to serialize object and write that to a file.
|
||||
//Which means there's no reason to split map data and graph data up.
|
||||
public class GraphWithMapData implements java.io.Serializable {
|
||||
|
||||
private String sourceFilename;
|
||||
private final SimpleWeightedGraph graph;
|
||||
private Integer numWells;
|
||||
private Integer[] wellConcentrations;
|
||||
private Integer alphaCount;
|
||||
private Integer betaCount;
|
||||
private final Map<Integer, Integer> distCellsMapAlphaKey;
|
||||
private final Map<Integer, Integer> plateVtoAMap;
|
||||
private final Map<Integer, Integer> plateVtoBMap;
|
||||
private final Map<Integer, Integer> plateAtoVMap;
|
||||
private final Map<Integer, Integer> plateBtoVMap;
|
||||
private final Map<Integer, Integer> alphaWellCounts;
|
||||
private final Map<Integer, Integer> betaWellCounts;
|
||||
private final Duration time;
|
||||
|
||||
public GraphWithMapData(SimpleWeightedGraph graph, Integer numWells, Integer[] wellConcentrations,
|
||||
Integer alphaCount, Integer betaCount,
|
||||
Map<Integer, Integer> distCellsMapAlphaKey, Map<Integer, Integer> plateVtoAMap,
|
||||
Map<Integer,Integer> plateVtoBMap, Map<Integer, Integer> plateAtoVMap,
|
||||
Map<Integer, Integer> plateBtoVMap, Map<Integer, Integer> alphaWellCounts,
|
||||
Map<Integer, Integer> betaWellCounts, Duration time) {
|
||||
this.graph = graph;
|
||||
this.numWells = numWells;
|
||||
this.wellConcentrations = wellConcentrations;
|
||||
this.alphaCount = alphaCount;
|
||||
this.betaCount = betaCount;
|
||||
this.distCellsMapAlphaKey = distCellsMapAlphaKey;
|
||||
this.plateVtoAMap = plateVtoAMap;
|
||||
this.plateVtoBMap = plateVtoBMap;
|
||||
this.plateAtoVMap = plateAtoVMap;
|
||||
this.plateBtoVMap = plateBtoVMap;
|
||||
this.alphaWellCounts = alphaWellCounts;
|
||||
this.betaWellCounts = betaWellCounts;
|
||||
this.time = time;
|
||||
}
|
||||
|
||||
public SimpleWeightedGraph getGraph() {
|
||||
return graph;
|
||||
}
|
||||
|
||||
public Integer getNumWells() {
|
||||
return numWells;
|
||||
}
|
||||
|
||||
public Integer[] getWellConcentrations() {
|
||||
return wellConcentrations;
|
||||
}
|
||||
|
||||
public Integer getAlphaCount() {
|
||||
return alphaCount;
|
||||
}
|
||||
|
||||
public Integer getBetaCount() {
|
||||
return betaCount;
|
||||
}
|
||||
|
||||
public Map<Integer, Integer> getDistCellsMapAlphaKey() {
|
||||
return distCellsMapAlphaKey;
|
||||
}
|
||||
|
||||
public Map<Integer, Integer> getPlateVtoAMap() {
|
||||
return plateVtoAMap;
|
||||
}
|
||||
|
||||
public Map<Integer, Integer> getPlateVtoBMap() {
|
||||
return plateVtoBMap;
|
||||
}
|
||||
|
||||
public Map<Integer, Integer> getPlateAtoVMap() {
|
||||
return plateAtoVMap;
|
||||
}
|
||||
|
||||
public Map<Integer, Integer> getPlateBtoVMap() {
|
||||
return plateBtoVMap;
|
||||
}
|
||||
|
||||
public Map<Integer, Integer> getAlphaWellCounts() {
|
||||
return alphaWellCounts;
|
||||
}
|
||||
|
||||
public Map<Integer, Integer> getBetaWellCounts() {
|
||||
return betaWellCounts;
|
||||
}
|
||||
|
||||
public Duration getTime() {
|
||||
return time;
|
||||
}
|
||||
|
||||
public void setSourceFilename(String filename) {
|
||||
this.sourceFilename = filename;
|
||||
}
|
||||
|
||||
public String getSourceFilename() {
|
||||
return sourceFilename;
|
||||
}
|
||||
}
|
||||
@@ -21,7 +21,6 @@ public class Plate {
|
||||
this.size = size;
|
||||
this.error = error;
|
||||
this.concentrations = concentrations;
|
||||
//this.stdDev = stdDev;
|
||||
wells = new ArrayList<>();
|
||||
}
|
||||
|
||||
@@ -29,6 +28,17 @@ public class Plate {
|
||||
this.sourceFile = sourceFileName;
|
||||
this.wells = wells;
|
||||
this.size = wells.size();
|
||||
|
||||
List<Integer> concentrations = new ArrayList<>();
|
||||
for (List<Integer[]> w: wells) {
|
||||
if(!concentrations.contains(w.size())){
|
||||
concentrations.add(w.size());
|
||||
}
|
||||
}
|
||||
this.concentrations = new Integer[concentrations.size()];
|
||||
for (int i = 0; i < this.concentrations.length; i++) {
|
||||
this.concentrations[i] = concentrations.get(i);
|
||||
}
|
||||
}
|
||||
|
||||
public void fillWellsExponential(String sourceFileName, List<Integer[]> cells, double lambda){
|
||||
@@ -45,6 +55,7 @@ public class Plate {
|
||||
List<Integer[]> well = new ArrayList<>();
|
||||
for (int j = 0; j < concentrations[section]; j++) {
|
||||
do {
|
||||
//inverse transform sampling: for random number u in [0,1), x = log(1-u) / (-lambda)
|
||||
m = (Math.log10((1 - rand.nextDouble()))/(-lambda)) * Math.sqrt(cells.size());
|
||||
} while (m >= cells.size() || m < 0);
|
||||
n = (int) Math.floor(m);
|
||||
@@ -57,7 +68,7 @@ public class Plate {
|
||||
}
|
||||
Integer[] cellToAdd = cells.get(n).clone();
|
||||
for(int k = 0; k < cellToAdd.length; k++){
|
||||
if(Math.abs(rand.nextDouble()) < error){//error applied to each peptide
|
||||
if(Math.abs(rand.nextDouble()) < error){//error applied to each seqeunce
|
||||
cellToAdd[k] = -1;
|
||||
}
|
||||
}
|
||||
@@ -87,7 +98,7 @@ public class Plate {
|
||||
n = (int) Math.floor(m);
|
||||
Integer[] cellToAdd = cells.get(n).clone();
|
||||
for(int k = 0; k < cellToAdd.length; k++){
|
||||
if(Math.abs(rand.nextDouble()) < error){//error applied to each peptide
|
||||
if(Math.abs(rand.nextDouble()) < error){//error applied to each sequence
|
||||
cellToAdd[k] = -1;
|
||||
}
|
||||
}
|
||||
@@ -123,30 +134,30 @@ public class Plate {
|
||||
return wells;
|
||||
}
|
||||
|
||||
//returns a map of the counts of the peptide at cell index pIndex, in all wells
|
||||
public Map<Integer, Integer> assayWellsPeptideP(int... pIndices){
|
||||
return this.assayWellsPeptideP(0, size, pIndices);
|
||||
//returns a map of the counts of the sequence at cell index sIndex, in all wells
|
||||
public Map<Integer, Integer> assayWellsSequenceS(int... sIndices){
|
||||
return this.assayWellsSequenceS(0, size, sIndices);
|
||||
}
|
||||
|
||||
//returns a map of the counts of the peptide at cell index pIndex, in a specific well
|
||||
public Map<Integer, Integer> assayWellsPeptideP(int n, int... pIndices) { return this.assayWellsPeptideP(n, n+1, pIndices);}
|
||||
//returns a map of the counts of the sequence at cell index sIndex, in a specific well
|
||||
public Map<Integer, Integer> assayWellsSequenceS(int n, int... sIndices) { return this.assayWellsSequenceS(n, n+1, sIndices);}
|
||||
|
||||
//returns a map of the counts of the peptide at cell index pIndex, in a range of wells
|
||||
public Map<Integer, Integer> assayWellsPeptideP(int start, int end, int... pIndices) {
|
||||
//returns a map of the counts of the sequence at cell index sIndex, in a range of wells
|
||||
public Map<Integer, Integer> assayWellsSequenceS(int start, int end, int... sIndices) {
|
||||
Map<Integer,Integer> assay = new HashMap<>();
|
||||
for(int pIndex: pIndices){
|
||||
for(int pIndex: sIndices){
|
||||
for(int i = start; i < end; i++){
|
||||
countPeptides(assay, wells.get(i), pIndex);
|
||||
countSequences(assay, wells.get(i), pIndex);
|
||||
}
|
||||
}
|
||||
return assay;
|
||||
}
|
||||
//For the peptides at cell indices pIndices, counts number of unique peptides in the given well into the given map
|
||||
private void countPeptides(Map<Integer, Integer> wellMap, List<Integer[]> well, int... pIndices) {
|
||||
//For the sequences at cell indices sIndices, counts number of unique sequences in the given well into the given map
|
||||
private void countSequences(Map<Integer, Integer> wellMap, List<Integer[]> well, int... sIndices) {
|
||||
for(Integer[] cell : well) {
|
||||
for(int pIndex: pIndices){
|
||||
if(cell[pIndex] != -1){
|
||||
wellMap.merge(cell[pIndex], 1, (oldValue, newValue) -> oldValue + newValue);
|
||||
for(int sIndex: sIndices){
|
||||
if(cell[sIndex] != -1){
|
||||
wellMap.merge(cell[sIndex], 1, (oldValue, newValue) -> oldValue + newValue);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -73,6 +73,7 @@ public class PlateFileWriter {
|
||||
}
|
||||
rows.add(tmp);
|
||||
}
|
||||
//build string of well concentrations
|
||||
StringBuilder concen = new StringBuilder();
|
||||
for(Integer i: concentrations){
|
||||
concen.append(i.toString());
|
||||
@@ -80,7 +81,9 @@ public class PlateFileWriter {
|
||||
}
|
||||
String concenString = concen.toString();
|
||||
|
||||
CSVFormat plateFileFormat = CSVFormat.Builder.create().setCommentMarker('#').build();
|
||||
CSVFormat plateFileFormat = CSVFormat.Builder.create()
|
||||
.setCommentMarker('#')
|
||||
.build();
|
||||
|
||||
try(BufferedWriter writer = Files.newBufferedWriter(Path.of(filename), StandardOpenOption.CREATE_NEW);
|
||||
CSVPrinter printer = new CSVPrinter(writer, plateFileFormat);
|
||||
|
||||
@@ -1,84 +1,27 @@
|
||||
//import org.jgrapht.Graph;
|
||||
import org.jgrapht.alg.interfaces.MatchingAlgorithm;
|
||||
import org.jgrapht.alg.matching.MaximumWeightBipartiteMatching;
|
||||
import org.jgrapht.generate.SimpleWeightedBipartiteGraphMatrixGenerator;
|
||||
import org.jgrapht.graph.DefaultWeightedEdge;
|
||||
import org.jgrapht.graph.SimpleWeightedGraph;
|
||||
import org.jheaps.AddressableHeap;
|
||||
import org.jheaps.tree.PairingHeap;
|
||||
|
||||
import java.math.BigDecimal;
|
||||
import java.math.BigInteger;
|
||||
import java.math.MathContext;
|
||||
import java.text.NumberFormat;
|
||||
import java.time.Instant;
|
||||
import java.time.Duration;
|
||||
import java.util.*;
|
||||
import java.util.function.Function;
|
||||
import java.util.function.Supplier;
|
||||
import java.util.stream.IntStream;
|
||||
|
||||
//NOTE: "sequence" in method and variable names refers to a peptide sequence from a simulated T cell
|
||||
public class Simulator {
|
||||
private static int cdr3AlphaIndex = 0;
|
||||
private static int cdr3BetaIndex = 1;
|
||||
private static int cdr1AlphaIndex = 2;
|
||||
private static int cdr1BetaIndex = 3;
|
||||
private static final int cdr3AlphaIndex = 0;
|
||||
private static final int cdr3BetaIndex = 1;
|
||||
private static final int cdr1AlphaIndex = 2;
|
||||
private static final int cdr1BetaIndex = 3;
|
||||
|
||||
|
||||
//Tested generating the cell sample file itself with a set distribution, but it wasn't working well
|
||||
//realized I'd have to change matching algos as well, so abandoned it.
|
||||
// public static CellSample generateCellSampleExp(Integer numDistinctCells, Integer cdr1Freq){
|
||||
// //was told I=in real T cells, CDR1s have about one third the diversity of CDR3s
|
||||
// //this could be wrong, though. could be less diversity than that.
|
||||
// //previous sim was only CDR3s
|
||||
// //Integer numDistinctCells = 1000000;
|
||||
// //int cdr1Freq = 3;
|
||||
//
|
||||
// List<Integer> numbersCDR3 = new ArrayList<>();
|
||||
// List<Integer> numbersCDR1 = new ArrayList<>();
|
||||
// Integer numDistCDR3s = 2 * numDistinctCells + 1;
|
||||
// IntStream.range(1, numDistCDR3s + 1).forEach(i -> numbersCDR3.add(i));
|
||||
// IntStream.range(numDistCDR3s + 1, numDistCDR3s + 1 + (numDistCDR3s / cdr1Freq) + 1).forEach(i -> numbersCDR1.add(i));
|
||||
// Collections.shuffle(numbersCDR3);
|
||||
// Collections.shuffle(numbersCDR1);
|
||||
//
|
||||
// //Each cell represented by 4 values
|
||||
// //two CDR3s, and two CDR1s. First two values are CDR3s, second two are CDR1s
|
||||
// List<Integer[]> distinctCells = new ArrayList<>();
|
||||
// for(int i = 0; i < numbersCDR3.size() - 1; i = i + 2){
|
||||
// Integer tmpCDR3a = numbersCDR3.get(i);
|
||||
// Integer tmpCDR3b = numbersCDR3.get(i+1);
|
||||
// Integer tmpCDR1a = numbersCDR1.get(i % numbersCDR1.size());
|
||||
// Integer tmpCDR1b = numbersCDR1.get((i+1) % numbersCDR1.size());
|
||||
// Integer[] tmp = {tmpCDR3a, tmpCDR3b, tmpCDR1a, tmpCDR1b};
|
||||
// distinctCells.add(tmp);
|
||||
// }
|
||||
// List<Integer[]>sampleCells = new ArrayList<>();
|
||||
// int count;
|
||||
// int lastFactor = 0;
|
||||
// int factor = 10;
|
||||
// int index = 0;
|
||||
// while(numDistinctCells / factor >= 1){
|
||||
// int i = index;
|
||||
// while(i < index + factor - lastFactor){
|
||||
// count = 0;
|
||||
// while(count < (numDistinctCells/(factor * 10))){
|
||||
// sampleCells.add(distinctCells.get(i));
|
||||
// count++;
|
||||
// }
|
||||
// i++;
|
||||
// }
|
||||
// index = i;
|
||||
// lastFactor = factor;
|
||||
// factor *=10;
|
||||
// }
|
||||
// System.out.println("Total cells in sample: " + sampleCells.size());
|
||||
// System.out.println("Distinct cells in sample: " + index);
|
||||
// return new CellSample(sampleCells, cdr1Freq);
|
||||
// }
|
||||
public static CellSample generateCellSample(Integer numDistinctCells, Integer cdr1Freq) {
|
||||
//In real T cells, CDR1s have about one third the diversity of CDR3s
|
||||
//previous sim was only CDR3s
|
||||
List<Integer> numbersCDR3 = new ArrayList<>();
|
||||
List<Integer> numbersCDR1 = new ArrayList<>();
|
||||
Integer numDistCDR3s = 2 * numDistinctCells + 1;
|
||||
@@ -88,7 +31,7 @@ public class Simulator {
|
||||
Collections.shuffle(numbersCDR1);
|
||||
|
||||
//Each cell represented by 4 values
|
||||
//two CDR3s, and two CDR1s. First two values are CDR3s, second two are CDR1s
|
||||
//two CDR3s, and two CDR1s. First two values are CDR3s (alpha, beta), second two are CDR1s (alpha, beta)
|
||||
List<Integer[]> distinctCells = new ArrayList<>();
|
||||
for(int i = 0; i < numbersCDR3.size() - 1; i = i + 2){
|
||||
Integer tmpCDR3a = numbersCDR3.get(i);
|
||||
@@ -101,43 +44,39 @@ public class Simulator {
|
||||
return new CellSample(distinctCells, cdr1Freq);
|
||||
}
|
||||
|
||||
public static MatchingResult matchCDR3s(List<Integer[]> distinctCells,
|
||||
Plate samplePlate, Integer lowThreshold,
|
||||
Integer highThreshold, boolean verbose){
|
||||
if(verbose){System.out.println("Cells: " + distinctCells.size());}
|
||||
|
||||
//Make the graph needed for matching CDR3s
|
||||
public static GraphWithMapData makeGraph(List<Integer[]> distinctCells, Plate samplePlate, boolean verbose) {
|
||||
Instant start = Instant.now();
|
||||
int numWells = samplePlate.getSize();
|
||||
int[] alphaIndex = {cdr3AlphaIndex};
|
||||
int[] betaIndex = {cdr3BetaIndex};
|
||||
int numWells = samplePlate.getSize();
|
||||
|
||||
if(verbose){System.out.println("Making cell maps");}
|
||||
//HashMap keyed to Alphas, values Betas
|
||||
Map<Integer, Integer> distCellsMapAlphaKey = makePeptideToPeptideMap(distinctCells, 0, 1);
|
||||
Map<Integer, Integer> distCellsMapAlphaKey = makeSequenceToSequenceMap(distinctCells, 0, 1);
|
||||
if(verbose){System.out.println("Cell maps made");}
|
||||
|
||||
if(verbose){System.out.println("Making well maps");}
|
||||
Map<Integer, Integer> allAlphas = samplePlate.assayWellsPeptideP(alphaIndex);
|
||||
Map<Integer, Integer> allBetas = samplePlate.assayWellsPeptideP(betaIndex);
|
||||
Map<Integer, Integer> allAlphas = samplePlate.assayWellsSequenceS(alphaIndex);
|
||||
Map<Integer, Integer> allBetas = samplePlate.assayWellsSequenceS(betaIndex);
|
||||
int alphaCount = allAlphas.size();
|
||||
if(verbose){System.out.println("all alphas count: " + alphaCount);}
|
||||
if(verbose){System.out.println("All alphas count: " + alphaCount);}
|
||||
int betaCount = allBetas.size();
|
||||
if(verbose){System.out.println("all betas count: " + betaCount);}
|
||||
if(verbose){System.out.println("All betas count: " + betaCount);}
|
||||
|
||||
if(verbose){System.out.println("Well maps made");}
|
||||
|
||||
//Remove saturating-occupancy peptides because they have no signal value.
|
||||
//Remove below-minimum-overlap-threshold peptides because they can't possibly have an overlap with another
|
||||
//peptide that's above the threshold.
|
||||
if(verbose){System.out.println("Removing peptides present in all wells.");}
|
||||
if(verbose){System.out.println("Removing peptides with occupancy below the minimum overlap threshold");}
|
||||
filterByOccupancyThreshold(allAlphas, lowThreshold, numWells - 1);
|
||||
filterByOccupancyThreshold(allBetas, lowThreshold, numWells - 1);
|
||||
if(verbose){System.out.println("Peptides removed");}
|
||||
//Remove saturating-occupancy sequences because they have no signal value.
|
||||
//Remove sequences with total occupancy below minimum pair overlap threshold
|
||||
if(verbose){System.out.println("Removing sequences present in all wells.");}
|
||||
//if(verbose){System.out.println("Removing sequences with occupancy below the minimum overlap threshold");}
|
||||
filterByOccupancyThreshold(allAlphas, 1, numWells - 1);
|
||||
filterByOccupancyThreshold(allBetas, 1, numWells - 1);
|
||||
if(verbose){System.out.println("Sequences removed");}
|
||||
int pairableAlphaCount = allAlphas.size();
|
||||
if(verbose){System.out.println("Remaining alpha count: " + pairableAlphaCount);}
|
||||
if(verbose){System.out.println("Remaining alphas count: " + pairableAlphaCount);}
|
||||
int pairableBetaCount = allBetas.size();
|
||||
if(verbose){System.out.println("Remaining beta count: " + pairableBetaCount);}
|
||||
if(verbose){System.out.println("Remaining betas count: " + pairableBetaCount);}
|
||||
|
||||
if(verbose){System.out.println("Making vertex maps");}
|
||||
//For the SimpleWeightedBipartiteGraphMatrixGenerator, all vertices must have
|
||||
@@ -147,17 +86,20 @@ public class Simulator {
|
||||
//subtract the vertexStartValue from betas to use their vertex labels as array indices
|
||||
Integer vertexStartValue = 0;
|
||||
//keys are sequential integer vertices, values are alphas
|
||||
Map<Integer, Integer> plateVtoAMap = makeVertexToPeptideMap(allAlphas, vertexStartValue);
|
||||
//New start value for vertex to beta map should be one more than final vertex value in alpha map
|
||||
Map<Integer, Integer> plateVtoAMap = makeVertexToSequenceMap(allAlphas, vertexStartValue);
|
||||
//new start value for vertex to beta map should be one more than final vertex value in alpha map
|
||||
vertexStartValue += plateVtoAMap.size();
|
||||
//keys are sequential integers vertices, values are betas
|
||||
Map<Integer, Integer> plateVtoBMap = makeVertexToPeptideMap(allBetas, vertexStartValue);
|
||||
Map<Integer, Integer> plateVtoBMap = makeVertexToSequenceMap(allBetas, vertexStartValue);
|
||||
//keys are alphas, values are sequential integer vertices from previous map
|
||||
Map<Integer, Integer> plateAtoVMap = invertVertexMap(plateVtoAMap);
|
||||
//keys are betas, values are sequential integer vertices from previous map
|
||||
Map<Integer, Integer> plateBtoVMap = invertVertexMap(plateVtoBMap);
|
||||
if(verbose){System.out.println("Vertex maps made");}
|
||||
|
||||
//make adjacency matrix for bipartite graph generator
|
||||
//(technically this is only 1/4 of an adjacency matrix, but that's all you need
|
||||
//for a bipartite graph, and all the SimpleWeightedBipartiteGraphMatrixGenerator class expects.)
|
||||
if(verbose){System.out.println("Creating adjacency matrix");}
|
||||
//Count how many wells each alpha appears in
|
||||
Map<Integer, Integer> alphaWellCounts = new HashMap<>();
|
||||
@@ -165,54 +107,78 @@ public class Simulator {
|
||||
Map<Integer, Integer> betaWellCounts = new HashMap<>();
|
||||
//the adjacency matrix to be used by the graph generator
|
||||
double[][] weights = new double[plateVtoAMap.size()][plateVtoBMap.size()];
|
||||
countPeptidesAndFillMatrix(samplePlate, allAlphas, allBetas, plateAtoVMap,
|
||||
countSequencesAndFillMatrix(samplePlate, allAlphas, allBetas, plateAtoVMap,
|
||||
plateBtoVMap, alphaIndex, betaIndex, alphaWellCounts, betaWellCounts, weights);
|
||||
if(verbose){System.out.println("matrix created");}
|
||||
if(verbose){System.out.println("Matrix created");}
|
||||
|
||||
/**
|
||||
* This is where the bipartite graph is created
|
||||
*/
|
||||
if(verbose){System.out.println("creating graph");}
|
||||
//create bipartite graph
|
||||
if(verbose){System.out.println("Creating graph");}
|
||||
//the graph object
|
||||
SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph =
|
||||
new SimpleWeightedGraph<>(DefaultWeightedEdge.class);
|
||||
//the graph generator
|
||||
SimpleWeightedBipartiteGraphMatrixGenerator graphGenerator = new SimpleWeightedBipartiteGraphMatrixGenerator();
|
||||
//the list of alpha vertices
|
||||
List<Integer> alphaVertices = new ArrayList<>();
|
||||
alphaVertices.addAll(plateVtoAMap.keySet()); //This will work because LinkedHashMap preserves order of entry
|
||||
List<Integer> alphaVertices = new ArrayList<>(plateVtoAMap.keySet()); //This will work because LinkedHashMap preserves order of entry
|
||||
graphGenerator.first(alphaVertices);
|
||||
//the list of beta vertices
|
||||
List<Integer> betaVertices = new ArrayList<>();
|
||||
betaVertices.addAll(plateVtoBMap.keySet());
|
||||
List<Integer> betaVertices = new ArrayList<>(plateVtoBMap.keySet());
|
||||
graphGenerator.second(betaVertices); //This will work because LinkedHashMap preserves order of entry
|
||||
//use adjacency matrix of weight created previously
|
||||
graphGenerator.weights(weights);
|
||||
graphGenerator.generateGraph(graph);
|
||||
if(verbose){System.out.println("Graph created");}
|
||||
|
||||
if(verbose){System.out.println("Eliminating edges with weights outside threshold values");}
|
||||
Instant stop = Instant.now();
|
||||
Duration time = Duration.between(start, stop);
|
||||
//return GraphWithMapData object
|
||||
return new GraphWithMapData(graph, numWells, samplePlate.getConcentrations(), alphaCount, betaCount,
|
||||
distCellsMapAlphaKey, plateVtoAMap, plateVtoBMap, plateAtoVMap,
|
||||
plateBtoVMap, alphaWellCounts, betaWellCounts, time);
|
||||
}
|
||||
|
||||
//match CDR3s.
|
||||
public static MatchingResult matchCDR3s(GraphWithMapData data, String dataFilename, Integer lowThreshold,
|
||||
Integer highThreshold, Integer maxOccupancyDifference,
|
||||
Integer minOverlapPercent, boolean verbose) {
|
||||
Instant start = Instant.now();
|
||||
int numWells = data.getNumWells();
|
||||
Integer alphaCount = data.getAlphaCount();
|
||||
Integer betaCount = data.getBetaCount();
|
||||
Map<Integer, Integer> distCellsMapAlphaKey = data.getDistCellsMapAlphaKey();
|
||||
Map<Integer, Integer> plateVtoAMap = data.getPlateVtoAMap();
|
||||
Map<Integer, Integer> plateVtoBMap = data.getPlateVtoBMap();
|
||||
Map<Integer, Integer> alphaWellCounts = data.getAlphaWellCounts();
|
||||
Map<Integer, Integer> betaWellCounts = data.getBetaWellCounts();
|
||||
SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph = data.getGraph();
|
||||
|
||||
//remove weights outside given overlap thresholds
|
||||
if(verbose){System.out.println("Eliminating edges with weights outside overlap threshold values");}
|
||||
filterByOccupancyThreshold(graph, lowThreshold, highThreshold);
|
||||
if(verbose){System.out.println("Over- and under-weight edges set to 0.0");}
|
||||
|
||||
//Filter by overlap size
|
||||
if(verbose){System.out.println("Eliminating edges with weights less than " + minOverlapPercent.toString() +
|
||||
" percent of vertex occupancy value.");}
|
||||
filterByOverlapSize(graph, alphaWellCounts, betaWellCounts, plateVtoAMap, plateVtoBMap, minOverlapPercent);
|
||||
if(verbose){System.out.println("Edges with weights too far below vertex occupancy values set to 0.0");}
|
||||
|
||||
/**
|
||||
* This is where the maximum weighted matching is found
|
||||
*/
|
||||
// if(verbose){System.out.println("Finding maximum weighted matching");}
|
||||
// MaximumWeightBipartiteMatching maxWeightMatching =
|
||||
// new MaximumWeightBipartiteMatching(graph, plateVtoAMap.keySet(), plateVtoBMap.keySet());
|
||||
// MatchingAlgorithm.Matching<String, DefaultWeightedEdge> graphMatching = maxWeightMatching.getMatching();
|
||||
// if(verbose){System.out.println("Matching completed");}
|
||||
// Instant stop = Instant.now();
|
||||
|
||||
//trying with jheaps now
|
||||
//Filter by relative occupancy
|
||||
if(verbose){System.out.println("Eliminating edges between vertices with occupancy difference > "
|
||||
+ maxOccupancyDifference);}
|
||||
filterByRelativeOccupancy(graph, alphaWellCounts, betaWellCounts, plateVtoAMap, plateVtoBMap,
|
||||
maxOccupancyDifference);
|
||||
if(verbose){System.out.println("Edges between vertices of with excessively different occupancy values " +
|
||||
"set to 0.0");}
|
||||
//Find Maximum Weighted Matching
|
||||
//using jheaps library class PairingHeap for improved efficiency
|
||||
if(verbose){System.out.println("Finding maximum weighted matching");}
|
||||
//Attempting to use addressable heap to improve performance
|
||||
MaximumWeightBipartiteMatching maxWeightMatching =
|
||||
new MaximumWeightBipartiteMatching(graph,
|
||||
plateVtoAMap.keySet(),
|
||||
plateVtoBMap.keySet(),
|
||||
(i) -> new PairingHeap(Comparator.naturalOrder()));
|
||||
i -> new PairingHeap(Comparator.naturalOrder()));
|
||||
MatchingAlgorithm.Matching<String, DefaultWeightedEdge> graphMatching = maxWeightMatching.getMatching();
|
||||
if(verbose){System.out.println("Matching completed");}
|
||||
Instant stop = Instant.now();
|
||||
@@ -227,16 +193,15 @@ public class Simulator {
|
||||
header.add("Matched correctly?");
|
||||
header.add("P-value");
|
||||
|
||||
|
||||
//Results for csv file
|
||||
List<List<String>> allResults = new ArrayList<>();
|
||||
NumberFormat nf = NumberFormat.getInstance(Locale.US);
|
||||
MathContext mc = new MathContext(3);
|
||||
Iterator<DefaultWeightedEdge> weightIter = graphMatching.iterator();
|
||||
DefaultWeightedEdge e = null;
|
||||
DefaultWeightedEdge e;
|
||||
int trueCount = 0;
|
||||
int falseCount = 0;
|
||||
boolean check = false;
|
||||
boolean check;
|
||||
Map<Integer, Integer> matchMap = new HashMap<>();
|
||||
while(weightIter.hasNext()) {
|
||||
e = weightIter.next();
|
||||
@@ -269,31 +234,44 @@ public class Simulator {
|
||||
}
|
||||
|
||||
//Metadate comments for CSV file
|
||||
int min = alphaCount > betaCount ? betaCount : alphaCount;
|
||||
int min = Math.min(alphaCount, betaCount);
|
||||
double attemptRate = (double) (trueCount + falseCount) / min;
|
||||
BigDecimal attemptRateTrunc = new BigDecimal(attemptRate, mc);
|
||||
double pairingErrorRate = (double) falseCount / (trueCount + falseCount);
|
||||
BigDecimal pairingErrorRateTrunc = new BigDecimal(pairingErrorRate, mc);
|
||||
//get list of well concentrations
|
||||
List<Integer> wellConcentrations = Arrays.asList(data.getWellConcentrations());
|
||||
//make string out of concentrations list
|
||||
StringBuilder concentrationStringBuilder = new StringBuilder();
|
||||
for(Integer i: wellConcentrations){
|
||||
concentrationStringBuilder.append(i.toString());
|
||||
concentrationStringBuilder.append(" ");
|
||||
}
|
||||
String concentrationString = concentrationStringBuilder.toString();
|
||||
|
||||
List<String> comments = new ArrayList<>();
|
||||
comments.add("Source Sample Plate filename: " + data.getSourceFilename());
|
||||
comments.add("Source Graph and Data filename: " + dataFilename);
|
||||
comments.add("T cell counts in sample plate wells: " + concentrationString);
|
||||
comments.add("Total alphas found: " + alphaCount);
|
||||
comments.add("Total betas found: " + betaCount);
|
||||
comments.add("High overlap threshold: " + highThreshold);
|
||||
comments.add("Low overlap threshold: " + lowThreshold);
|
||||
comments.add("Minimum overlap percent: " + minOverlapPercent);
|
||||
comments.add("Maximum occupancy difference: " + maxOccupancyDifference);
|
||||
comments.add("Pairing attempt rate: " + attemptRateTrunc);
|
||||
comments.add("Correct pairings: " + trueCount);
|
||||
comments.add("Incorrect pairings: " + falseCount);
|
||||
comments.add("Pairing error rate: " + pairingErrorRateTrunc);
|
||||
Duration time = Duration.between(start, stop);
|
||||
time = time.plus(data.getTime());
|
||||
comments.add("Simulation time: " + nf.format(time.toSeconds()) + " seconds");
|
||||
if(verbose){
|
||||
for(String s: comments){
|
||||
System.out.println(s);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return new MatchingResult(samplePlate.getSourceFileName(), comments, header, allResults, matchMap, time);
|
||||
return new MatchingResult(data.getSourceFilename(), comments, header, allResults, matchMap, time);
|
||||
}
|
||||
|
||||
//Simulated matching of CDR1s to CDR3s. Requires MatchingResult from prior run of matchCDR3s.
|
||||
@@ -313,13 +291,13 @@ public class Simulator {
|
||||
System.out.println("Previous match maps made");
|
||||
|
||||
System.out.println("Making cell maps");
|
||||
Map<Integer, Integer> alphaCDR3toCDR1Map = makePeptideToPeptideMap(distinctCells, cdr3AlphaIndex, cdr1AlphaIndex);
|
||||
Map<Integer, Integer> betaCDR3toCDR1Map = makePeptideToPeptideMap(distinctCells, cdr3BetaIndex, cdr1BetaIndex);
|
||||
Map<Integer, Integer> alphaCDR3toCDR1Map = makeSequenceToSequenceMap(distinctCells, cdr3AlphaIndex, cdr1AlphaIndex);
|
||||
Map<Integer, Integer> betaCDR3toCDR1Map = makeSequenceToSequenceMap(distinctCells, cdr3BetaIndex, cdr1BetaIndex);
|
||||
System.out.println("Cell maps made");
|
||||
|
||||
System.out.println("Making well maps");
|
||||
Map<Integer, Integer> allCDR3s = samplePlate.assayWellsPeptideP(cdr3Indices);
|
||||
Map<Integer, Integer> allCDR1s = samplePlate.assayWellsPeptideP(cdr1Indices);
|
||||
Map<Integer, Integer> allCDR3s = samplePlate.assayWellsSequenceS(cdr3Indices);
|
||||
Map<Integer, Integer> allCDR1s = samplePlate.assayWellsSequenceS(cdr1Indices);
|
||||
int CDR3Count = allCDR3s.size();
|
||||
System.out.println("all CDR3s count: " + CDR3Count);
|
||||
int CDR1Count = allCDR1s.size();
|
||||
@@ -353,11 +331,11 @@ public class Simulator {
|
||||
// subtract the vertexStartValue from CDR1s to use their vertex labels as array indices
|
||||
Integer vertexStartValue = 0;
|
||||
//keys are sequential integer vertices, values are CDR3s
|
||||
Map<Integer, Integer> plateVtoCDR3Map = makeVertexToPeptideMap(allCDR3s, vertexStartValue);
|
||||
Map<Integer, Integer> plateVtoCDR3Map = makeVertexToSequenceMap(allCDR3s, vertexStartValue);
|
||||
//New start value for vertex to CDR1 map should be one more than final vertex value in CDR3 map
|
||||
vertexStartValue += plateVtoCDR3Map.size();
|
||||
//keys are sequential integers vertices, values are CDR1s
|
||||
Map<Integer, Integer> plateVtoCDR1Map = makeVertexToPeptideMap(allCDR1s, vertexStartValue);
|
||||
Map<Integer, Integer> plateVtoCDR1Map = makeVertexToSequenceMap(allCDR1s, vertexStartValue);
|
||||
//keys are CDR3s, values are sequential integer vertices from previous map
|
||||
Map<Integer, Integer> plateCDR3toVMap = invertVertexMap(plateVtoCDR3Map);
|
||||
//keys are CDR1s, values are sequential integer vertices from previous map
|
||||
@@ -374,7 +352,7 @@ public class Simulator {
|
||||
Map<Integer, Integer> wellNCDR3s = null;
|
||||
Map<Integer, Integer> wellNCDR1s = null;
|
||||
double[][] weights = new double[plateVtoCDR3Map.size()][plateVtoCDR1Map.size()];
|
||||
countPeptidesAndFillMatrix(samplePlate, allCDR3s, allCDR1s, plateCDR3toVMap, plateCDR1toVMap,
|
||||
countSequencesAndFillMatrix(samplePlate, allCDR3s, allCDR1s, plateCDR3toVMap, plateCDR1toVMap,
|
||||
cdr3Indices, cdr1Indices, cdr3WellCounts, cdr1WellCounts, weights);
|
||||
System.out.println("Matrix created");
|
||||
|
||||
@@ -383,11 +361,9 @@ public class Simulator {
|
||||
new SimpleWeightedGraph<>(DefaultWeightedEdge.class);
|
||||
|
||||
SimpleWeightedBipartiteGraphMatrixGenerator graphGenerator = new SimpleWeightedBipartiteGraphMatrixGenerator();
|
||||
List<Integer> cdr3Vertices = new ArrayList<>();
|
||||
cdr3Vertices.addAll(plateVtoCDR3Map.keySet()); //This will work because LinkedHashMap preserves order of entry
|
||||
List<Integer> cdr3Vertices = new ArrayList<>(plateVtoCDR3Map.keySet()); //This will work because LinkedHashMap preserves order of entry
|
||||
graphGenerator.first(cdr3Vertices);
|
||||
List<Integer> cdr1Vertices = new ArrayList<>();
|
||||
cdr1Vertices.addAll(plateVtoCDR1Map.keySet());
|
||||
List<Integer> cdr1Vertices = new ArrayList<>(plateVtoCDR1Map.keySet());
|
||||
graphGenerator.second(cdr1Vertices); //This will work because LinkedHashMap preserves order of entry
|
||||
graphGenerator.weights(weights);
|
||||
graphGenerator.generateGraph(graph);
|
||||
@@ -407,7 +383,7 @@ public class Simulator {
|
||||
//first processing run
|
||||
Map<Integer, Integer> firstMatchCDR3toCDR1Map = new HashMap<>();
|
||||
Iterator<DefaultWeightedEdge> weightIter = graphMatching.iterator();
|
||||
DefaultWeightedEdge e = null;
|
||||
DefaultWeightedEdge e;
|
||||
while(weightIter.hasNext()){
|
||||
e = weightIter.next();
|
||||
// if(graph.getEdgeWeight(e) < lowThreshold || graph.getEdgeWeight(e) > highThreshold) {
|
||||
@@ -531,7 +507,7 @@ public class Simulator {
|
||||
comments.add("Correct matching rate: " + correctRate);
|
||||
NumberFormat nf = NumberFormat.getInstance(Locale.US);
|
||||
Duration time = Duration.between(start, stop);
|
||||
time.plus(previousTime);
|
||||
time = time.plus(previousTime);
|
||||
comments.add("Simulation time: " + nf.format(time.toSeconds()) + " seconds");
|
||||
for(String s: comments){
|
||||
System.out.println(s);
|
||||
@@ -605,38 +581,38 @@ public class Simulator {
|
||||
|
||||
//Counts the well occupancy of the row peptides and column peptides into given maps, and
|
||||
//fills weights in the given 2D array
|
||||
private static void countPeptidesAndFillMatrix(Plate samplePlate,
|
||||
Map<Integer,Integer> allRowPeptides,
|
||||
Map<Integer,Integer> allColumnPeptides,
|
||||
Map<Integer,Integer> rowPeptideToVertexMap,
|
||||
Map<Integer,Integer> columnPeptideToVertexMap,
|
||||
int[] rowPeptideIndices,
|
||||
int[] colPeptideIndices,
|
||||
Map<Integer, Integer> rowPeptideCounts,
|
||||
Map<Integer,Integer> columnPeptideCounts,
|
||||
private static void countSequencesAndFillMatrix(Plate samplePlate,
|
||||
Map<Integer,Integer> allRowSequences,
|
||||
Map<Integer,Integer> allColumnSequences,
|
||||
Map<Integer,Integer> rowSequenceToVertexMap,
|
||||
Map<Integer,Integer> columnSequenceToVertexMap,
|
||||
int[] rowSequenceIndices,
|
||||
int[] colSequenceIndices,
|
||||
Map<Integer, Integer> rowSequenceCounts,
|
||||
Map<Integer,Integer> columnSequenceCounts,
|
||||
double[][] weights){
|
||||
Map<Integer, Integer> wellNRowPeptides = null;
|
||||
Map<Integer, Integer> wellNColumnPeptides = null;
|
||||
int vertexStartValue = rowPeptideToVertexMap.size();
|
||||
Map<Integer, Integer> wellNRowSequences = null;
|
||||
Map<Integer, Integer> wellNColumnSequences = null;
|
||||
int vertexStartValue = rowSequenceToVertexMap.size();
|
||||
int numWells = samplePlate.getSize();
|
||||
for (int n = 0; n < numWells; n++) {
|
||||
wellNRowPeptides = samplePlate.assayWellsPeptideP(n, rowPeptideIndices);
|
||||
for (Integer a : wellNRowPeptides.keySet()) {
|
||||
if(allRowPeptides.containsKey(a)){
|
||||
rowPeptideCounts.merge(a, 1, (oldValue, newValue) -> oldValue + newValue);
|
||||
wellNRowSequences = samplePlate.assayWellsSequenceS(n, rowSequenceIndices);
|
||||
for (Integer a : wellNRowSequences.keySet()) {
|
||||
if(allRowSequences.containsKey(a)){
|
||||
rowSequenceCounts.merge(a, 1, (oldValue, newValue) -> oldValue + newValue);
|
||||
}
|
||||
}
|
||||
wellNColumnPeptides = samplePlate.assayWellsPeptideP(n, colPeptideIndices);
|
||||
for (Integer b : wellNColumnPeptides.keySet()) {
|
||||
if(allColumnPeptides.containsKey(b)){
|
||||
columnPeptideCounts.merge(b, 1, (oldValue, newValue) -> oldValue + newValue);
|
||||
wellNColumnSequences = samplePlate.assayWellsSequenceS(n, colSequenceIndices);
|
||||
for (Integer b : wellNColumnSequences.keySet()) {
|
||||
if(allColumnSequences.containsKey(b)){
|
||||
columnSequenceCounts.merge(b, 1, (oldValue, newValue) -> oldValue + newValue);
|
||||
}
|
||||
}
|
||||
for (Integer i : wellNRowPeptides.keySet()) {
|
||||
if(allRowPeptides.containsKey(i)){
|
||||
for (Integer j : wellNColumnPeptides.keySet()) {
|
||||
if(allColumnPeptides.containsKey(j)){
|
||||
weights[rowPeptideToVertexMap.get(i)][columnPeptideToVertexMap.get(j) - vertexStartValue] += 1.0;
|
||||
for (Integer i : wellNRowSequences.keySet()) {
|
||||
if(allRowSequences.containsKey(i)){
|
||||
for (Integer j : wellNColumnSequences.keySet()) {
|
||||
if(allColumnSequences.containsKey(j)){
|
||||
weights[rowSequenceToVertexMap.get(i)][columnSequenceToVertexMap.get(j) - vertexStartValue] += 1.0;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -666,19 +642,54 @@ public class Simulator {
|
||||
}
|
||||
}
|
||||
|
||||
private static Map<Integer, Integer> makePeptideToPeptideMap(List<Integer[]> cells, int keyPeptideIndex,
|
||||
int valuePeptideIndex){
|
||||
Map<Integer, Integer> keyPeptideToValuePeptideMap = new HashMap<>();
|
||||
for (Integer[] cell : cells) {
|
||||
keyPeptideToValuePeptideMap.put(cell[keyPeptideIndex], cell[valuePeptideIndex]);
|
||||
//Remove edges for pairs with large occupancy discrepancy
|
||||
private static void filterByRelativeOccupancy(SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph,
|
||||
Map<Integer, Integer> alphaWellCounts,
|
||||
Map<Integer, Integer> betaWellCounts,
|
||||
Map<Integer, Integer> plateVtoAMap,
|
||||
Map<Integer, Integer> plateVtoBMap,
|
||||
Integer maxOccupancyDifference) {
|
||||
for (DefaultWeightedEdge e : graph.edgeSet()) {
|
||||
Integer alphaOcc = alphaWellCounts.get(plateVtoAMap.get(graph.getEdgeSource(e)));
|
||||
Integer betaOcc = betaWellCounts.get(plateVtoBMap.get(graph.getEdgeTarget(e)));
|
||||
//Adjust this to something cleverer later
|
||||
if (Math.abs(alphaOcc - betaOcc) >= maxOccupancyDifference) {
|
||||
graph.setEdgeWeight(e, 0.0);
|
||||
}
|
||||
}
|
||||
return keyPeptideToValuePeptideMap;
|
||||
}
|
||||
|
||||
private static Map<Integer, Integer> makeVertexToPeptideMap(Map<Integer, Integer> peptides, Integer startValue) {
|
||||
//Remove edges for pairs where overlap size is significantly lower than the well occupancy
|
||||
private static void filterByOverlapSize(SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph,
|
||||
Map<Integer, Integer> alphaWellCounts,
|
||||
Map<Integer, Integer> betaWellCounts,
|
||||
Map<Integer, Integer> plateVtoAMap,
|
||||
Map<Integer, Integer> plateVtoBMap,
|
||||
Integer minOverlapPercent) {
|
||||
for (DefaultWeightedEdge e : graph.edgeSet()) {
|
||||
Integer alphaOcc = alphaWellCounts.get(plateVtoAMap.get(graph.getEdgeSource(e)));
|
||||
Integer betaOcc = betaWellCounts.get(plateVtoBMap.get(graph.getEdgeTarget(e)));
|
||||
double weight = graph.getEdgeWeight(e);
|
||||
double min = minOverlapPercent / 100.0;
|
||||
if ((weight / alphaOcc < min) || (weight / betaOcc < min)) {
|
||||
graph.setEdgeWeight(e, 0.0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static Map<Integer, Integer> makeSequenceToSequenceMap(List<Integer[]> cells, int keySequenceIndex,
|
||||
int valueSequenceIndex){
|
||||
Map<Integer, Integer> keySequenceToValueSequenceMap = new HashMap<>();
|
||||
for (Integer[] cell : cells) {
|
||||
keySequenceToValueSequenceMap.put(cell[keySequenceIndex], cell[valueSequenceIndex]);
|
||||
}
|
||||
return keySequenceToValueSequenceMap;
|
||||
}
|
||||
|
||||
private static Map<Integer, Integer> makeVertexToSequenceMap(Map<Integer, Integer> sequences, Integer startValue) {
|
||||
Map<Integer, Integer> map = new LinkedHashMap<>(); //LinkedHashMap to preserve order of entry
|
||||
Integer index = startValue;
|
||||
for (Integer k: peptides.keySet()) {
|
||||
for (Integer k: sequences.keySet()) {
|
||||
map.put(index, k);
|
||||
index++;
|
||||
}
|
||||
@@ -693,9 +704,4 @@ public class Simulator {
|
||||
return inverse;
|
||||
}
|
||||
|
||||
private static int bigDecimalComparator(BigDecimal bd, Integer i) {
|
||||
return bd.compareTo(BigDecimal.valueOf(i));
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
17
src/main/java/Vertex.java
Normal file
17
src/main/java/Vertex.java
Normal file
@@ -0,0 +1,17 @@
|
||||
public class Vertex {
|
||||
private final Integer peptide;
|
||||
private final Integer occupancy;
|
||||
|
||||
public Vertex(Integer peptide, Integer occupancy) {
|
||||
this.peptide = peptide;
|
||||
this.occupancy = occupancy;
|
||||
}
|
||||
|
||||
public Integer getPeptide() {
|
||||
return peptide;
|
||||
}
|
||||
|
||||
public Integer getOccupancy() {
|
||||
return occupancy;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user