57 Commits

Author SHA1 Message Date
4b053e6ec4 Remove artifacts from tracking to stop repo bloat. 2022-02-22 16:14:50 -06:00
44784b7976 Remove artifacts from tracking to stop repo bloat. 2022-02-22 16:10:22 -06:00
7c19896dc9 update readme 2022-02-22 16:09:50 -06:00
aec7e3016f Typos in documentation 2022-02-21 11:19:54 -06:00
5c75c1ac09 Update readme.md 2022-02-21 06:53:30 +00:00
cb1f7adece Change "peptide" references in code to "sequence", adding comments 2022-02-21 00:29:34 -06:00
370de79546 Add performance section to readme 2022-02-21 00:02:49 -06:00
a803336f56 Add performance section to readme 2022-02-21 00:01:20 -06:00
94b54b3416 Add performance section to readme 2022-02-20 23:31:25 -06:00
601e141fd0 Update readme 2022-02-20 22:51:49 -06:00
8f9c6b7d33 Update readme TODO 2022-02-20 20:59:05 -06:00
e5ddc73723 Finish reverting back to wells-as-rows 2022-02-20 20:54:44 -06:00
9b18fac74f Invoke garbage collection 2022-02-20 20:47:12 -06:00
63ef6aa7a0 Revert attempt to switch plate output format. It worked, but introduced a bug in graph filtering I don't want to chase down 2022-02-20 20:45:35 -06:00
7558455f39 Correct errors in output and documentation 2022-02-20 20:13:38 -06:00
410f0ae547 Remove testing code, add garbage collection calls 2022-02-20 20:06:45 -06:00
1bc6a11545 Change plate reader/writer to use columns as wells 2022-02-20 19:58:24 -06:00
2b13e10e95 Change plate reader/writer to use columns as wells 2022-02-20 19:48:09 -06:00
4fd5baeb98 Change plate reader/writer to use columns as wells 2022-02-20 19:41:06 -06:00
b2a4e9a42b Change plate reader/writer to use columns as wells 2022-02-20 19:17:56 -06:00
d1bb49b482 Change plate reader/writer to use columns as wells 2022-02-20 19:12:11 -06:00
9adb7dffb8 Change plate reader/writer to use columns as wells 2022-02-20 19:08:04 -06:00
2023bb9d7e Cleanup file output, add UI verbosity 2022-02-20 18:31:31 -06:00
405fbf17ff improve documentation 2022-02-20 17:11:39 -06:00
24519f4a52 improve documentation 2022-02-20 17:04:25 -06:00
2afd01eeef improve documentation 2022-02-20 15:48:11 -06:00
10d0b711bf improve documentation 2022-02-20 15:38:40 -06:00
8f98baf44e improve documentation 2022-02-20 15:37:39 -06:00
d6c7c40c96 improve documentation 2022-02-20 13:23:15 -06:00
61c14b2ecf improve documentation 2022-02-20 13:20:47 -06:00
22fc4aedfe improve documentation 2022-02-20 13:18:49 -06:00
5d24dc6f70 improve documentation 2022-02-20 13:15:32 -06:00
2c01a0211c move readme 2022-02-20 12:02:27 -06:00
f2b5d9e1b7 Rename and update readme 2022-02-20 11:58:12 -06:00
74c8cafd81 scan for filename 2022-02-20 03:08:31 -06:00
d1c37b5ccd Relocate overlap threshold filters 2022-02-20 03:05:56 -06:00
cb2c5a6024 Add plate well concentrations to output data 2022-02-20 02:29:42 -06:00
284a5b3a40 Add plate well concentrations to output data 2022-02-20 02:23:31 -06:00
52afb1edc2 Add plate well concentrations to output data 2022-02-20 02:17:36 -06:00
9c52bc878a Add plate well concentrations to output data 2022-02-20 02:13:13 -06:00
248fe4d662 Add plate well concentrations to output data 2022-02-20 02:09:22 -06:00
5d0e60708c Add plate well concentrations to output data 2022-02-20 01:53:34 -06:00
c96b7237e9 Add plate well concentrations to output data 2022-02-20 01:40:01 -06:00
0b28259800 Add plate well concentrations to output data 2022-02-20 01:13:22 -06:00
837ef7bfe4 UI cleanup, some code cleanup 2022-02-20 01:05:28 -06:00
0bebbc7602 Add missing filtering code 2022-02-19 22:56:38 -06:00
84f7ddb696 Fix interactive output 2022-02-19 22:49:50 -06:00
c4633da9eb Correct propogation of peptide counts 2022-02-19 22:33:38 -06:00
5b2ed165d0 Clean up interactive text, bugfix 2022-02-19 22:21:09 -06:00
0026d8cdfe Use buffered input/output streams 2022-02-19 22:04:41 -06:00
13fb7168bf Refactor to read/write files of graph and map data 2022-02-19 21:46:01 -06:00
568a6be3c7 Refactoring to allow graphs from file 2022-02-19 17:23:55 -06:00
cfa473c7ce Adding parameters to filter by occupancy difference and percent overlap 2022-02-19 14:06:11 -06:00
6faacd9a82 Adding parameters to filter by occupancy difference and percent overlap 2022-02-19 14:05:26 -06:00
ce88e170c1 Update readme with max memory flag 2022-02-18 17:48:25 -06:00
47e23addfa Do new filtering before matching 2022-02-18 17:42:05 -06:00
b9ee31b64c Do new filtering before matching 2022-02-18 17:28:24 -06:00
18 changed files with 1293 additions and 642 deletions

1
.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
/out/

View File

@@ -1,11 +1,11 @@
<component name="ArtifactManager"> <component name="ArtifactManager">
<artifact type="jar" name="TCellSim:jar"> <artifact type="jar" build-on-make="true" name="BiGpairSEQ_Sim:jar">
<output-path>$PROJECT_DIR$/out/artifacts/TCellSim_jar</output-path> <output-path>$PROJECT_DIR$/out/artifacts/BiGpairSEQ_Sim_jar</output-path>
<root id="archive" name="TCellSim.jar"> <root id="archive" name="BiGpairSEQ_Sim.jar">
<element id="directory" name="META-INF"> <element id="directory" name="META-INF">
<element id="file-copy" path="$PROJECT_DIR$/src/main/java/META-INF/MANIFEST.MF" /> <element id="file-copy" path="$PROJECT_DIR$/src/main/java/META-INF/MANIFEST.MF" />
</element> </element>
<element id="module-output" name="TCellSim" /> <element id="module-output" name="BigPairSEQ" />
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/jgrapht/jgrapht-core/1.5.1/jgrapht-core-1.5.1.jar" path-in-jar="/" /> <element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/jgrapht/jgrapht-core/1.5.1/jgrapht-core-1.5.1.jar" path-in-jar="/" />
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/jheaps/jheaps/0.13/jheaps-0.13.jar" path-in-jar="/" /> <element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/jheaps/jheaps/0.13/jheaps-0.13.jar" path-in-jar="/" />
<element id="extracted-dir" path="$MAVEN_REPOSITORY$/commons-cli/commons-cli/1.5.0/commons-cli-1.5.0.jar" path-in-jar="/" /> <element id="extracted-dir" path="$MAVEN_REPOSITORY$/commons-cli/commons-cli/1.5.0/commons-cli-1.5.0.jar" path-in-jar="/" />

2
.idea/compiler.xml generated
View File

@@ -6,7 +6,7 @@
<sourceOutputDir name="target/generated-sources/annotations" /> <sourceOutputDir name="target/generated-sources/annotations" />
<sourceTestOutputDir name="target/generated-test-sources/test-annotations" /> <sourceTestOutputDir name="target/generated-test-sources/test-annotations" />
<outputRelativeToContentRoot value="true" /> <outputRelativeToContentRoot value="true" />
<module name="TCellSim" /> <module name="BigPairSEQ" />
</profile> </profile>
</annotationProcessing> </annotationProcessing>
</component> </component>

15
.idea/libraries/jgrapht_io.xml generated Normal file
View File

@@ -0,0 +1,15 @@
<component name="libraryTable">
<library name="jgrapht.io" type="repository">
<properties maven-id="org.jgrapht:jgrapht-io:1.5.1" />
<CLASSES>
<root url="jar://$MAVEN_REPOSITORY$/org/jgrapht/jgrapht-io/1.5.1/jgrapht-io-1.5.1.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/jgrapht/jgrapht-core/1.5.1/jgrapht-core-1.5.1.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/jheaps/jheaps/0.13/jheaps-0.13.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/antlr/antlr4-runtime/4.8-1/antlr4-runtime-4.8-1.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/apache/commons/commons-text/1.8/commons-text-1.8.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/apache/commons/commons-lang3/3.9/commons-lang3-3.9.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES />
</library>
</component>

View File

@@ -1,4 +0,0 @@
Executable .jar file with interactive, command line UI
Usage: java -jar TCellSim.jar
Requires Java11 or higher (Openjdk-17 recommended)

278
readme.md Normal file
View File

@@ -0,0 +1,278 @@
# BiGpairSEQ SIMULATOR
## ABOUT
This program simulates BiGpairSEQ (Bipartite Graph pairSEQ), a graph theory-based adaptation
of the pairSEQ algorithm (Howie, et al. 2015) for pairing T cell receptor sequences.
## THEORY
Unlike pairSEQ, which calculates p-values for every TCR alpha/beta overlap and compares
against a null distribution, BiGpairSEQ does not do any statistical calculations
directly.
BiGpairSEQ creates a [simple bipartite weighted graph](https://en.wikipedia.org/wiki/Bipartite_graph) representing the sample plate.
The distinct TCRA and TCRB sequences form the two sets of vertices. Every TCRA/TCRB pair that share a well
are connected by an edge, with the edge weight set to the number of wells in which both sequences appear.
(Sequences present in *all* wells are filtered out prior to creating the graph, as there is no signal in their occupancy pattern.)
The problem of pairing TCRA/TCRB sequences thus reduces to the "assignment problem" of finding a maximum weight
matching on a bipartite graph--the subset of vertex-disjoint edges whose weights sum to the maximum possible value.
This is a well-studied combinatorial optimization problem, with many known solutions.
The most efficient algorithm known to the author for maximum weight matching of a bipartite graph with strictly integral weights
is from Duan and Su (2012). For a graph with m edges, n vertices per side, and maximum integer edge weight N,
their algorithm runs in **O(m sqrt(n) log(N))** time. As the graph representation of a pairSEQ experiment is
bipartite with integer weights, this algorithm is ideal for BiGpairSEQ.
Unfortunately, it's a fairly new algorithm, and not yet implemented by the graph theory library used in this simulator.
So this program instead uses the Fibonacci heap-based algorithm of Fredman and Tarjan (1987), which has a worst-case
runtime of **O(n (n log(n) + m))**. The algorithm is implemented as described in Melhorn and Näher (1999).
The current version of the program uses a pairing heap instead of a Fibonacci heap for its priority queue,
which has lower theoretical efficiency but also lower complexity overhead, and is often equivalently performant
in practice.
## USAGE
### RUNNING THE PROGRAM
BiGpairSEQ_Sim is an executable .jar file. Requires Java 11 or higher. [OpenJDK 17](https://jdk.java.net/17/)
recommended.
Run with the command:
`java -jar BiGpairSEQ_Sim.jar`
Processing sample plates with tens of thousands of sequences may require large amounts
of RAM. It is often desirable to increase the JVM maximum heap allocation with the -Xmx flag.
For example, to run the program with 32 gigabytes of memory, use the command:
`java -Xmx32G -jar BiGpairSEQ_Sim.jar`
Once running, BiGpairSEQ_Sim has an interactive, menu-driven CLI for generating files and simulating TCR pairing. The
main menu looks like this:
```
--------BiGPairSEQ SIMULATOR--------
ALPHA/BETA T CELL RECEPTOR MATCHING
USING WEIGHTED BIPARTITE GRAPHS
------------------------------------
Please select an option:
1) Generate a population of distinct cells
2) Generate a sample plate of T cells
3) Generate CDR3 alpha/beta occupancy data and overlap graph
4) Simulate bipartite graph CDR3 alpha/beta matching (BiGpairSEQ)
9) About/Acknowledgments
0) Exit
```
### OUTPUT
To run the simulation, the program reads and writes 4 kinds of files:
* Cell Sample files in CSV format
* Sample Plate files in CSV format
* Graph and Data files in binary object serialization format
* Matching Results files in CSV format
When entering filenames, it is not necessary to include the file extension (.csv or .ser). When reading or
writing files, the program will automatically add the correct extension to any filename without one.
#### Cell Sample Files
Cell Sample files consist of any number of distinct "T cells." Every cell contains
four sequences: Alpha CDR3, Beta CDR3, Alpha CDR1, Beta CDR1. The sequences are represented by
random integers. CDR3 Alpha and Beta sequences are all unique within a given Cell Sample file. CDR1 Alpha and Beta sequences
are not necessarily unique; the relative diversity can be set when making the file.
(Note: though cells still have CDR1 sequences, matching of CDR1s is currently awaiting re-implementation.)
Options when making a Cell Sample file:
* Number of T cells to generate
* Factor by which CDR3s are more diverse than CDR1s
Files are in CSV format. Rows are distinct T cells, columns are sequences within the cells.
Comments are preceded by `#`
Structure:
---
# Sample contains 1 unique CDR1 for every 4 unique CDR3s.
| Alpha CDR3 | Beta CDR3 | Alpha CDR1 | Beta CDR1 |
|---|---|---|---|
|unique number|unique number|number|number|
---
#### Sample Plate Files
Sample Plate files consist of any number of "wells" containing any number of T cells (as
described above). The wells are filled randomly from a Cell Sample file, according to a selected
frequency distribution. Additionally, every individual sequence within each cell may, with some
given dropout probability, be omitted from the file; this simulates the effect of amplification errors
prior to sequencing. Plates can also be partitioned into any number of sections, each of which can have a
different concentration of T cells per well.
Options when making a Sample Plate file:
* Cell Sample file to use
* Statistical distribution to apply to Cell Sample file
* Poisson
* Gaussian
* Standard deviation size
* Exponential
* Lambda value
* (Based on the slope of the graph in Figure 4C of the pairSEQ paper, the distribution of the original experiment was exponential with a lambda of approximately 0.6. (Howie, et al. 2015))
* Total number of wells on the plate
* Number of sections on plate
* Number of T cells per well
* per section, if more than one section
* Dropout rate
Files are in CSV format. There are no header labels. Every row represents a well.
Every column represents an individual cell, containing four sequences, depicted as an array string:
`[CDR3A, CDR3B, CDR1A, CDR1B]`. So a representative cell might look like this:
`[525902, 791533, -1, 866282]`
Notice that the CDR1 Alpha is missing in the cell above--sequence dropout from simulated amplification error.
Dropout sequences are replaced with the value `-1`. Comments are preceded by `#`
Structure:
---
```
# Cell source file name:
# Each row represents one well on the plate
# Plate size:
# Concentrations:
# Lambda -or- StdDev:
```
| Well 1, cell 1 | Well 1, cell 2 | Well 1, cell 3| ... |
|---|---|---|---|
| **Well 2, cell 1** | **Well 2, cell 2** | **Well 2, cell 3**| **...** |
| **Well 3, cell 1** | **Well 3, cell 2** | **Well 3, cell 3**| **...** |
| **...** | **...** | **...** | **...** |
---
#### Graph and Data Files
Graph and Data files are serialized binaries of a Java object containing the weigthed bipartite graph representation of a
Sample Plate, along with the necessary metadata for matching and results output. Making them requires a Cell Sample file
(to construct a list of correct sequence pairs for checking the accuracy of BiGpairSEQ simulations) and a
Sample Plate file (to construct the associated occupancy graph). These files can be several gigabytes in size.
Writing them to a file lets us generate a graph and its metadata once, then use it for multiple different BiGpairSEQ simulations.
Options for creating a Graph and Data file:
* The Cell Sample file to use
* The Sample Plate file to use. (This must have been generated from the selected Cell Sample file.)
These files do not have a human-readable structure, and are not portable to other programs. (Export of graphs in a
portable data format may be implemented in the future. The tricky part is encoding the necessary metadata.)
---
#### Matching Results Files
Matching results files consist of the results of a BiGpairSEQ matching simulation.
Files are in CSV format. Rows are sequence pairings with extra relevant data. Columns are pairing-specific details.
Metadata about the matching simulation is included as comments. Comments are preceded by `#`.
Options when running a BiGpairSEQ simulation of CDR3 alpha/beta matching:
* The minimum number of alpha/beta overlap wells to attempt to match
* (must be >= 1)
* The maximum number of alpha/beta overlap wells to attempt to match
* (must be <= the number of wells on the plate - 1)
* The maximum difference in alpha/beta occupancy to attempt to match
* (Optional. To skip using this filter, enter a value >= the number of wells on the plate)
* The minimum overlap percentage--the percentage of a sequence's occupied wells shared by another sequence--to attempt to match. Given as value in range 0 - 100.
* (Optional. To skip using this filter, enter 0)
Example output:
---
```
# Source Sample Plate file: 4MilCellsPlate.csv
# Source Graph and Data file: 4MilCellsPlateGraph.ser
# T cell counts in sample plate wells: 30000
# Total alphas found: 11813
# Total betas found: 11808
# High overlap threshold: 94
# Low overlap threshold: 3
# Minimum overlap percent: 0
# Maximum occupancy difference: 96
# Pairing attempt rate: 0.438
# Correct pairings: 5151
# Incorrect pairings: 18
# Pairing error rate: 0.00348
# Simulation time: 862 seconds
```
| Alpha | Alpha well count | Beta | Beta well count | Overlap count | Matched Correctly? | P-value |
|---|---|---|---|---|---|---|
|5242972|17|1571520|18|17|true|1.41E-18|
|5161027|18|2072219|18|18|true|7.31E-20|
|4145198|33|1064455|30|29|true|2.65E-21|
|7700582|18|112748|18|18|true|7.31E-20|
|...|...|...|...|...|...|...|
---
**NOTE: The p-values in the output are not used for matching**—they aren't part of the BiGpairSEQ algorithm at all.
P-values are calculated *after* BiGpairSEQ matching is completed, for purposes of comparison only,
using the (2021 corrected) formula from the original pairSEQ paper. (Howie, et al. 2015)
### PERFORMANCE
Performance details of the example excerpted above:
On a home computer with a Ryzen 5600X CPU, 64GB of 3200MHz DDR4 RAM (half of which was allocated to the Java Virtual Machine), and a PCIe 3.0 SSD, running Linux Mint 20.3 Edge (5.13 kernel),
the author ran a BiGpairSEQ simulation of a 96-well sample plate with 30,000 T cells/well comprising ~11,800 alphas and betas,
taken from a sample of 4,000,000 distinct cells with an exponential frequency distribution.
With min/max occupancy threshold of 3 and 94 wells for matching, and no other pre-filtering, BiGpairSEQ identified 5,151
correct pairings and 18 incorrect pairings, for an accuracy of 99.652%.
The simulation time was 14'22". If intermediate results were held in memory, this would be equivalent to the total elapsed time.
Since this implementation of BiGpairSEQ writes intermediate results to disk (to improve the efficiency of *repeated* simulations
with different filtering options), the actual elapsed time was greater. File I/O time was not measured, but took
slightly less time than the simulation itself. Real elapsed time from start to finish was under 30 minutes.
## TODO
* ~~Try invoking GC at end of workloads to reduce paging to disk~~ DONE
* ~~Hold graph data in memory until another graph is read-in?~~ ABANDONED
* *No, this won't work, because BiGpairSEQ simulations alter the underlying graph based on filtering constraints. Changes would cascade with multiple experiments.*
* See if there's a reasonable way to reformat Sample Plate files so that wells are columns instead of rows.
* ~~Problem is variable number of cells in a well~~
* ~~Apache Commons CSV library writes entries a row at a time~~
* _Got this working, but at the cost of a profoundly strange bug in graph occupancy filtering. Have reverted the repo until I can figure out what caused that. Given how easily Thingiverse transposes CSV matrices in R, might not even be worth fixing._
* Re-implement command line arguments, to enable scripting and statistical simulation studies
* Implement sample plates with random numbers of T cells per well.
* Possible BiGpairSEQ advantage over pairSEQ: BiGpairSEQ is resilient to variations in well population sizes on a sample plate; pairSEQ is not.
* preliminary data suggests that BiGpairSEQ behaves roughly as though the whole plate had whatever the *average* well concentration is, but that's still speculative.
* Enable GraphML output in addition to serialized object binaries, for data portability
* Custom vertex type with attribute for sequence occupancy?
* Re-implement CDR1 matching method
* Implement Duan and Su's maximum weight matching algorithm
* Add controllable algorithm-type parameter?
* Test whether pairing heap (currently used) or Fibonacci heap is more efficient for priority queue in current matching algorithm
* in theory Fibonacci heap should be more efficient, but complexity overhead may eliminate theoretical advantage
* Add controllable heap-type parameter?
## CITATIONS
* Howie, B., Sherwood, A. M., et al. ["High-throughput pairing of T cell receptor alpha and beta sequences."](https://pubmed.ncbi.nlm.nih.gov/26290413/) Sci. Transl. Med. 7, 301ra131 (2015)
* Duan, R., Su H. ["A Scaling Algorithm for Maximum Weight Matching in Bipartite Graphs."](https://web.eecs.umich.edu/~pettie/matching/Duan-Su-scaling-bipartite-matching.pdf) Proceedings of the Twenty-Third Annual ACM-SIAM Symposium on Discrete Algorithms, p. 1413-1424. (2012)
* Melhorn, K., Näher, St. [The LEDA Platform of Combinatorial and Geometric Computing.](https://people.mpi-inf.mpg.de/~mehlhorn/LEDAbook.html) Cambridge University Press. Chapter 7, Graph Algorithms; p. 132-162 (1999)
* Fredman, M., Tarjan, R. ["Fibonacci heaps and their uses in improved network optimization algorithms."](https://www.cl.cam.ac.uk/teaching/1011/AlgorithII/1987-FredmanTar-fibonacci.pdf) J. ACM, 34(3):596615 (1987))
## EXTERNAL LIBRARIES USED
* [JGraphT](https://jgrapht.org) -- Graph theory data structures and algorithms
* [JHeaps](https://www.jheaps.org) -- For pairing heap priority queue used in maximum weight matching algorithm
* [Apache Commons CSV](https://commons.apache.org/proper/commons-csv/) -- For CSV file output
* [Apache Commons CLI](https://commons.apache.org/proper/commons-cli/) -- To enable command line arguments for scripting. (**Awaiting re-implementation**.)
## ACKNOWLEDGEMENTS
BiGpairSEQ was conceived in collaboration with Dr. Alice MacQueen, who brought the original
pairSEQ paper to the author's attention and explained all the biology terms he didn't know.
## AUTHOR
BiGpairSEQ algorithm and simulation by Eugene Fischer, 2021. UI improvements and documentation, 2022.

View File

@@ -8,6 +8,9 @@ public abstract class Equations {
return (int) ((Math.random() * (max - min)) + min); return (int) ((Math.random() * (max - min)) + min);
} }
//pValue calculation as described in original pairSEQ paper.
//Included for comparison with original results.
//Not used by BiGpairSEQ for matching.
public static double pValue(Integer w, Integer w_a, Integer w_b, double w_ab_d) { public static double pValue(Integer w, Integer w_a, Integer w_b, double w_ab_d) {
int w_ab = (int) w_ab_d; int w_ab = (int) w_ab_d;
double pv = 0.0; double pv = 0.0;
@@ -18,6 +21,9 @@ public abstract class Equations {
return pv; return pv;
} }
//Implementation of the (corrected) probability equation from pairSEQ paper.
//Included for comparison with original results.
//Not used by BiGpairSEQ for matching.
private static double probPairedByChance(Integer w, Integer w_a, Integer w_b, Integer w_ab){ private static double probPairedByChance(Integer w, Integer w_a, Integer w_b, Integer w_ab){
BigInteger numer1 = choose(w, w_ab); BigInteger numer1 = choose(w, w_ab);
BigInteger numer2 = choose(w - w_ab, w_a - w_ab); BigInteger numer2 = choose(w - w_ab, w_a - w_ab);
@@ -30,10 +36,9 @@ public abstract class Equations {
return prob.doubleValue(); return prob.doubleValue();
} }
/*
* This works because nC(k+1) = nCk * (n-k)/(k+1) //This works because nC(k+1) = nCk * (n-k)/(k+1)
* Since nC0 = 1, can start there and generate all the rest. //Since nC0 = 1, can start there and generate all the rest.
*/
public static BigInteger choose(final int N, final int K) { public static BigInteger choose(final int N, final int K) {
BigInteger nCk = BigInteger.ONE; BigInteger nCk = BigInteger.ONE;
for (int k = 0; k < K; k++) { for (int k = 0; k < K; k++) {

View File

@@ -0,0 +1,29 @@
import java.io.*;
public class GraphDataObjectReader {
private GraphWithMapData data;
private String filename;
public GraphDataObjectReader(String filename) throws IOException {
if(!filename.matches(".*\\.ser")){
filename = filename + ".ser";
}
this.filename = filename;
try(//don't need to close these because of try-with-resources
BufferedInputStream fileIn = new BufferedInputStream(new FileInputStream(filename));
ObjectInputStream in = new ObjectInputStream(fileIn))
{
data = (GraphWithMapData) in.readObject();
} catch (FileNotFoundException | ClassNotFoundException ex) {
ex.printStackTrace();
}
}
public GraphWithMapData getData() {
return data;
}
public String getFilename() {
return filename;
}
}

View File

@@ -0,0 +1,28 @@
import java.io.BufferedOutputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.ObjectOutputStream;
public class GraphDataObjectWriter {
private GraphWithMapData data;
private String filename;
public GraphDataObjectWriter(String filename, GraphWithMapData data) {
if(!filename.matches(".*\\.ser")){
filename = filename + ".ser";
}
this.filename = filename;
this.data = data;
}
public void writeDataToFile() {
try (BufferedOutputStream bufferedOut = new BufferedOutputStream(new FileOutputStream(filename));
ObjectOutputStream out = new ObjectOutputStream(bufferedOut);
){
out.writeObject(data);
} catch (IOException ex) {
ex.printStackTrace();
}
}
}

View File

@@ -0,0 +1,35 @@
import org.jgrapht.graph.SimpleWeightedGraph;
import org.jgrapht.nio.graphml.GraphMLImporter;
import java.io.BufferedReader;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
public class GraphMLFileReader {
private String filename;
private SimpleWeightedGraph graph;
public GraphMLFileReader(String filename, SimpleWeightedGraph graph) {
if(!filename.matches(".*\\.graphml")){
filename = filename + ".graphml";
}
this.filename = filename;
this.graph = graph;
try(//don't need to close reader bc of try-with-resources auto-closing
BufferedReader reader = Files.newBufferedReader(Path.of(filename));
){
GraphMLImporter<SimpleWeightedGraph, BufferedReader> importer = new GraphMLImporter<>();
importer.importGraph(graph, reader);
}
catch (IOException ex) {
System.out.println("Graph file " + filename + " not found.");
System.err.println(ex);
}
}
public SimpleWeightedGraph getGraph() { return graph; }
}

View File

@@ -0,0 +1,35 @@
import org.jgrapht.graph.SimpleWeightedGraph;
import org.jgrapht.nio.dot.DOTExporter;
import org.jgrapht.nio.graphml.GraphMLExporter;
import java.io.BufferedWriter;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
public class GraphMLFileWriter {
String filename;
SimpleWeightedGraph graph;
public GraphMLFileWriter(String filename, SimpleWeightedGraph graph) {
if(!filename.matches(".*\\.graphml")){
filename = filename + ".graphml";
}
this.filename = filename;
this.graph = graph;
}
public void writeGraphToFile() {
try(BufferedWriter writer = Files.newBufferedWriter(Path.of(filename), StandardOpenOption.CREATE_NEW);
){
GraphMLExporter<SimpleWeightedGraph, BufferedWriter> exporter = new GraphMLExporter<>();
exporter.exportGraph(graph, writer);
} catch(IOException ex){
System.out.println("Could not make new file named "+filename);
System.err.println(ex);
}
}
}

View File

@@ -0,0 +1,106 @@
import org.jgrapht.graph.SimpleWeightedGraph;
import java.time.Duration;
import java.util.Map;
//Can't just write the graph, because I need the occupancy data too.
//Makes most sense to serialize object and write that to a file.
//Which means there's no reason to split map data and graph data up.
public class GraphWithMapData implements java.io.Serializable {
private String sourceFilename;
private final SimpleWeightedGraph graph;
private Integer numWells;
private Integer[] wellConcentrations;
private Integer alphaCount;
private Integer betaCount;
private final Map<Integer, Integer> distCellsMapAlphaKey;
private final Map<Integer, Integer> plateVtoAMap;
private final Map<Integer, Integer> plateVtoBMap;
private final Map<Integer, Integer> plateAtoVMap;
private final Map<Integer, Integer> plateBtoVMap;
private final Map<Integer, Integer> alphaWellCounts;
private final Map<Integer, Integer> betaWellCounts;
private final Duration time;
public GraphWithMapData(SimpleWeightedGraph graph, Integer numWells, Integer[] wellConcentrations,
Integer alphaCount, Integer betaCount,
Map<Integer, Integer> distCellsMapAlphaKey, Map<Integer, Integer> plateVtoAMap,
Map<Integer,Integer> plateVtoBMap, Map<Integer, Integer> plateAtoVMap,
Map<Integer, Integer> plateBtoVMap, Map<Integer, Integer> alphaWellCounts,
Map<Integer, Integer> betaWellCounts, Duration time) {
this.graph = graph;
this.numWells = numWells;
this.wellConcentrations = wellConcentrations;
this.alphaCount = alphaCount;
this.betaCount = betaCount;
this.distCellsMapAlphaKey = distCellsMapAlphaKey;
this.plateVtoAMap = plateVtoAMap;
this.plateVtoBMap = plateVtoBMap;
this.plateAtoVMap = plateAtoVMap;
this.plateBtoVMap = plateBtoVMap;
this.alphaWellCounts = alphaWellCounts;
this.betaWellCounts = betaWellCounts;
this.time = time;
}
public SimpleWeightedGraph getGraph() {
return graph;
}
public Integer getNumWells() {
return numWells;
}
public Integer[] getWellConcentrations() {
return wellConcentrations;
}
public Integer getAlphaCount() {
return alphaCount;
}
public Integer getBetaCount() {
return betaCount;
}
public Map<Integer, Integer> getDistCellsMapAlphaKey() {
return distCellsMapAlphaKey;
}
public Map<Integer, Integer> getPlateVtoAMap() {
return plateVtoAMap;
}
public Map<Integer, Integer> getPlateVtoBMap() {
return plateVtoBMap;
}
public Map<Integer, Integer> getPlateAtoVMap() {
return plateAtoVMap;
}
public Map<Integer, Integer> getPlateBtoVMap() {
return plateBtoVMap;
}
public Map<Integer, Integer> getAlphaWellCounts() {
return alphaWellCounts;
}
public Map<Integer, Integer> getBetaWellCounts() {
return betaWellCounts;
}
public Duration getTime() {
return time;
}
public void setSourceFilename(String filename) {
this.sourceFilename = filename;
}
public String getSourceFilename() {
return sourceFilename;
}
}

View File

@@ -17,18 +17,28 @@ public class Plate {
boolean exponential = false; boolean exponential = false;
public Plate (int size, double error, Integer[] concentrations) { public Plate(int size, double error, Integer[] concentrations) {
this.size = size; this.size = size;
this.error = error; this.error = error;
this.concentrations = concentrations; this.concentrations = concentrations;
//this.stdDev = stdDev;
wells = new ArrayList<>(); wells = new ArrayList<>();
} }
public Plate(String sourceFileName, List<List<Integer[]>> wells){ public Plate(String sourceFileName, List<List<Integer[]>> wells) {
this.sourceFile = sourceFileName; this.sourceFile = sourceFileName;
this.wells = wells; this.wells = wells;
this.size = wells.size(); this.size = wells.size();
List<Integer> concentrations = new ArrayList<>();
for (List<Integer[]> w: wells) {
if(!concentrations.contains(w.size())){
concentrations.add(w.size());
}
}
this.concentrations = new Integer[concentrations.size()];
for (int i = 0; i < this.concentrations.length; i++) {
this.concentrations[i] = concentrations.get(i);
}
} }
public void fillWellsExponential(String sourceFileName, List<Integer[]> cells, double lambda){ public void fillWellsExponential(String sourceFileName, List<Integer[]> cells, double lambda){
@@ -45,6 +55,7 @@ public class Plate {
List<Integer[]> well = new ArrayList<>(); List<Integer[]> well = new ArrayList<>();
for (int j = 0; j < concentrations[section]; j++) { for (int j = 0; j < concentrations[section]; j++) {
do { do {
//inverse transform sampling: for random number u in [0,1), x = log(1-u) / (-lambda)
m = (Math.log10((1 - rand.nextDouble()))/(-lambda)) * Math.sqrt(cells.size()); m = (Math.log10((1 - rand.nextDouble()))/(-lambda)) * Math.sqrt(cells.size());
} while (m >= cells.size() || m < 0); } while (m >= cells.size() || m < 0);
n = (int) Math.floor(m); n = (int) Math.floor(m);
@@ -57,7 +68,7 @@ public class Plate {
} }
Integer[] cellToAdd = cells.get(n).clone(); Integer[] cellToAdd = cells.get(n).clone();
for(int k = 0; k < cellToAdd.length; k++){ for(int k = 0; k < cellToAdd.length; k++){
if(Math.abs(rand.nextDouble()) < error){//error applied to each peptide if(Math.abs(rand.nextDouble()) < error){//error applied to each seqeunce
cellToAdd[k] = -1; cellToAdd[k] = -1;
} }
} }
@@ -87,7 +98,7 @@ public class Plate {
n = (int) Math.floor(m); n = (int) Math.floor(m);
Integer[] cellToAdd = cells.get(n).clone(); Integer[] cellToAdd = cells.get(n).clone();
for(int k = 0; k < cellToAdd.length; k++){ for(int k = 0; k < cellToAdd.length; k++){
if(Math.abs(rand.nextDouble()) < error){//error applied to each peptide if(Math.abs(rand.nextDouble()) < error){//error applied to each sequence
cellToAdd[k] = -1; cellToAdd[k] = -1;
} }
} }
@@ -123,30 +134,30 @@ public class Plate {
return wells; return wells;
} }
//returns a map of the counts of the peptide at cell index pIndex, in all wells //returns a map of the counts of the sequence at cell index sIndex, in all wells
public Map<Integer, Integer> assayWellsPeptideP(int... pIndices){ public Map<Integer, Integer> assayWellsSequenceS(int... sIndices){
return this.assayWellsPeptideP(0, size, pIndices); return this.assayWellsSequenceS(0, size, sIndices);
} }
//returns a map of the counts of the peptide at cell index pIndex, in a specific well //returns a map of the counts of the sequence at cell index sIndex, in a specific well
public Map<Integer, Integer> assayWellsPeptideP(int n, int... pIndices) { return this.assayWellsPeptideP(n, n+1, pIndices);} public Map<Integer, Integer> assayWellsSequenceS(int n, int... sIndices) { return this.assayWellsSequenceS(n, n+1, sIndices);}
//returns a map of the counts of the peptide at cell index pIndex, in a range of wells //returns a map of the counts of the sequence at cell index sIndex, in a range of wells
public Map<Integer, Integer> assayWellsPeptideP(int start, int end, int... pIndices) { public Map<Integer, Integer> assayWellsSequenceS(int start, int end, int... sIndices) {
Map<Integer,Integer> assay = new HashMap<>(); Map<Integer,Integer> assay = new HashMap<>();
for(int pIndex: pIndices){ for(int pIndex: sIndices){
for(int i = start; i < end; i++){ for(int i = start; i < end; i++){
countPeptides(assay, wells.get(i), pIndex); countSequences(assay, wells.get(i), pIndex);
} }
} }
return assay; return assay;
} }
//For the peptides at cell indices pIndices, counts number of unique peptides in the given well into the given map //For the sequences at cell indices sIndices, counts number of unique sequences in the given well into the given map
private void countPeptides(Map<Integer, Integer> wellMap, List<Integer[]> well, int... pIndices) { private void countSequences(Map<Integer, Integer> wellMap, List<Integer[]> well, int... sIndices) {
for(Integer[] cell : well) { for(Integer[] cell : well) {
for(int pIndex: pIndices){ for(int sIndex: sIndices){
if(cell[pIndex] != -1){ if(cell[sIndex] != -1){
wellMap.merge(cell[pIndex], 1, (oldValue, newValue) -> oldValue + newValue); wellMap.merge(cell[sIndex], 1, (oldValue, newValue) -> oldValue + newValue);
} }
} }
} }

View File

@@ -73,6 +73,7 @@ public class PlateFileWriter {
} }
rows.add(tmp); rows.add(tmp);
} }
//build string of well concentrations
StringBuilder concen = new StringBuilder(); StringBuilder concen = new StringBuilder();
for(Integer i: concentrations){ for(Integer i: concentrations){
concen.append(i.toString()); concen.append(i.toString());
@@ -80,7 +81,9 @@ public class PlateFileWriter {
} }
String concenString = concen.toString(); String concenString = concen.toString();
CSVFormat plateFileFormat = CSVFormat.Builder.create().setCommentMarker('#').build(); CSVFormat plateFileFormat = CSVFormat.Builder.create()
.setCommentMarker('#')
.build();
try(BufferedWriter writer = Files.newBufferedWriter(Path.of(filename), StandardOpenOption.CREATE_NEW); try(BufferedWriter writer = Files.newBufferedWriter(Path.of(filename), StandardOpenOption.CREATE_NEW);
CSVPrinter printer = new CSVPrinter(writer, plateFileFormat); CSVPrinter printer = new CSVPrinter(writer, plateFileFormat);

View File

@@ -1,84 +1,27 @@
//import org.jgrapht.Graph;
import org.jgrapht.alg.interfaces.MatchingAlgorithm; import org.jgrapht.alg.interfaces.MatchingAlgorithm;
import org.jgrapht.alg.matching.MaximumWeightBipartiteMatching; import org.jgrapht.alg.matching.MaximumWeightBipartiteMatching;
import org.jgrapht.generate.SimpleWeightedBipartiteGraphMatrixGenerator; import org.jgrapht.generate.SimpleWeightedBipartiteGraphMatrixGenerator;
import org.jgrapht.graph.DefaultWeightedEdge; import org.jgrapht.graph.DefaultWeightedEdge;
import org.jgrapht.graph.SimpleWeightedGraph; import org.jgrapht.graph.SimpleWeightedGraph;
import org.jheaps.AddressableHeap;
import org.jheaps.tree.PairingHeap; import org.jheaps.tree.PairingHeap;
import java.math.BigDecimal; import java.math.BigDecimal;
import java.math.BigInteger;
import java.math.MathContext; import java.math.MathContext;
import java.text.NumberFormat; import java.text.NumberFormat;
import java.time.Instant; import java.time.Instant;
import java.time.Duration; import java.time.Duration;
import java.util.*; import java.util.*;
import java.util.function.Function;
import java.util.function.Supplier;
import java.util.stream.IntStream; import java.util.stream.IntStream;
//NOTE: "sequence" in method and variable names refers to a peptide sequence from a simulated T cell
public class Simulator { public class Simulator {
private static int cdr3AlphaIndex = 0; private static final int cdr3AlphaIndex = 0;
private static int cdr3BetaIndex = 1; private static final int cdr3BetaIndex = 1;
private static int cdr1AlphaIndex = 2; private static final int cdr1AlphaIndex = 2;
private static int cdr1BetaIndex = 3; private static final int cdr1BetaIndex = 3;
//Tested generating the cell sample file itself with a set distribution, but it wasn't working well
//realized I'd have to change matching algos as well, so abandoned it.
// public static CellSample generateCellSampleExp(Integer numDistinctCells, Integer cdr1Freq){
// //was told I=in real T cells, CDR1s have about one third the diversity of CDR3s
// //this could be wrong, though. could be less diversity than that.
// //previous sim was only CDR3s
// //Integer numDistinctCells = 1000000;
// //int cdr1Freq = 3;
//
// List<Integer> numbersCDR3 = new ArrayList<>();
// List<Integer> numbersCDR1 = new ArrayList<>();
// Integer numDistCDR3s = 2 * numDistinctCells + 1;
// IntStream.range(1, numDistCDR3s + 1).forEach(i -> numbersCDR3.add(i));
// IntStream.range(numDistCDR3s + 1, numDistCDR3s + 1 + (numDistCDR3s / cdr1Freq) + 1).forEach(i -> numbersCDR1.add(i));
// Collections.shuffle(numbersCDR3);
// Collections.shuffle(numbersCDR1);
//
// //Each cell represented by 4 values
// //two CDR3s, and two CDR1s. First two values are CDR3s, second two are CDR1s
// List<Integer[]> distinctCells = new ArrayList<>();
// for(int i = 0; i < numbersCDR3.size() - 1; i = i + 2){
// Integer tmpCDR3a = numbersCDR3.get(i);
// Integer tmpCDR3b = numbersCDR3.get(i+1);
// Integer tmpCDR1a = numbersCDR1.get(i % numbersCDR1.size());
// Integer tmpCDR1b = numbersCDR1.get((i+1) % numbersCDR1.size());
// Integer[] tmp = {tmpCDR3a, tmpCDR3b, tmpCDR1a, tmpCDR1b};
// distinctCells.add(tmp);
// }
// List<Integer[]>sampleCells = new ArrayList<>();
// int count;
// int lastFactor = 0;
// int factor = 10;
// int index = 0;
// while(numDistinctCells / factor >= 1){
// int i = index;
// while(i < index + factor - lastFactor){
// count = 0;
// while(count < (numDistinctCells/(factor * 10))){
// sampleCells.add(distinctCells.get(i));
// count++;
// }
// i++;
// }
// index = i;
// lastFactor = factor;
// factor *=10;
// }
// System.out.println("Total cells in sample: " + sampleCells.size());
// System.out.println("Distinct cells in sample: " + index);
// return new CellSample(sampleCells, cdr1Freq);
// }
public static CellSample generateCellSample(Integer numDistinctCells, Integer cdr1Freq) { public static CellSample generateCellSample(Integer numDistinctCells, Integer cdr1Freq) {
//In real T cells, CDR1s have about one third the diversity of CDR3s //In real T cells, CDR1s have about one third the diversity of CDR3s
//previous sim was only CDR3s
List<Integer> numbersCDR3 = new ArrayList<>(); List<Integer> numbersCDR3 = new ArrayList<>();
List<Integer> numbersCDR1 = new ArrayList<>(); List<Integer> numbersCDR1 = new ArrayList<>();
Integer numDistCDR3s = 2 * numDistinctCells + 1; Integer numDistCDR3s = 2 * numDistinctCells + 1;
@@ -88,7 +31,7 @@ public class Simulator {
Collections.shuffle(numbersCDR1); Collections.shuffle(numbersCDR1);
//Each cell represented by 4 values //Each cell represented by 4 values
//two CDR3s, and two CDR1s. First two values are CDR3s, second two are CDR1s //two CDR3s, and two CDR1s. First two values are CDR3s (alpha, beta), second two are CDR1s (alpha, beta)
List<Integer[]> distinctCells = new ArrayList<>(); List<Integer[]> distinctCells = new ArrayList<>();
for(int i = 0; i < numbersCDR3.size() - 1; i = i + 2){ for(int i = 0; i < numbersCDR3.size() - 1; i = i + 2){
Integer tmpCDR3a = numbersCDR3.get(i); Integer tmpCDR3a = numbersCDR3.get(i);
@@ -101,63 +44,62 @@ public class Simulator {
return new CellSample(distinctCells, cdr1Freq); return new CellSample(distinctCells, cdr1Freq);
} }
public static MatchingResult matchCDR3s(List<Integer[]> distinctCells, //Make the graph needed for matching CDR3s
Plate samplePlate, Integer lowThreshold, public static GraphWithMapData makeGraph(List<Integer[]> distinctCells, Plate samplePlate, boolean verbose) {
Integer highThreshold, boolean verbose){
if(verbose){System.out.println("Cells: " + distinctCells.size());}
Instant start = Instant.now(); Instant start = Instant.now();
int numWells = samplePlate.getSize();
int[] alphaIndex = {cdr3AlphaIndex}; int[] alphaIndex = {cdr3AlphaIndex};
int[] betaIndex = {cdr3BetaIndex}; int[] betaIndex = {cdr3BetaIndex};
int numWells = samplePlate.getSize();
if(verbose){System.out.println("Making cell maps");} if(verbose){System.out.println("Making cell maps");}
//HashMap keyed to Alphas, values Betas //HashMap keyed to Alphas, values Betas
Map<Integer, Integer> distCellsMapAlphaKey = makePeptideToPeptideMap(distinctCells, 0, 1); Map<Integer, Integer> distCellsMapAlphaKey = makeSequenceToSequenceMap(distinctCells, 0, 1);
if(verbose){System.out.println("Cell maps made");} if(verbose){System.out.println("Cell maps made");}
if(verbose){System.out.println("Making well maps");} if(verbose){System.out.println("Making well maps");}
Map<Integer, Integer> allAlphas = samplePlate.assayWellsPeptideP(alphaIndex); Map<Integer, Integer> allAlphas = samplePlate.assayWellsSequenceS(alphaIndex);
Map<Integer, Integer> allBetas = samplePlate.assayWellsPeptideP(betaIndex); Map<Integer, Integer> allBetas = samplePlate.assayWellsSequenceS(betaIndex);
int alphaCount = allAlphas.size(); int alphaCount = allAlphas.size();
if(verbose){System.out.println("all alphas count: " + alphaCount);} if(verbose){System.out.println("All alphas count: " + alphaCount);}
int betaCount = allBetas.size(); int betaCount = allBetas.size();
if(verbose){System.out.println("all betas count: " + betaCount);} if(verbose){System.out.println("All betas count: " + betaCount);}
if(verbose){System.out.println("Well maps made");} if(verbose){System.out.println("Well maps made");}
//Remove saturating-occupancy peptides because they have no signal value. //Remove saturating-occupancy sequences because they have no signal value.
//Remove below-minimum-overlap-threshold peptides because they can't possibly have an overlap with another //Remove sequences with total occupancy below minimum pair overlap threshold
//peptide that's above the threshold. if(verbose){System.out.println("Removing sequences present in all wells.");}
if(verbose){System.out.println("Removing peptides present in all wells.");} //if(verbose){System.out.println("Removing sequences with occupancy below the minimum overlap threshold");}
if(verbose){System.out.println("Removing peptides with occupancy below the minimum overlap threshold");} filterByOccupancyThreshold(allAlphas, 1, numWells - 1);
filterByOccupancyThreshold(allAlphas, lowThreshold, numWells - 1); filterByOccupancyThreshold(allBetas, 1, numWells - 1);
filterByOccupancyThreshold(allBetas, lowThreshold, numWells - 1); if(verbose){System.out.println("Sequences removed");}
if(verbose){System.out.println("Peptides removed");}
int pairableAlphaCount = allAlphas.size(); int pairableAlphaCount = allAlphas.size();
if(verbose){System.out.println("Remaining alpha count: " + pairableAlphaCount);} if(verbose){System.out.println("Remaining alphas count: " + pairableAlphaCount);}
int pairableBetaCount = allBetas.size(); int pairableBetaCount = allBetas.size();
if(verbose){System.out.println("Remaining beta count: " + pairableBetaCount);} if(verbose){System.out.println("Remaining betas count: " + pairableBetaCount);}
if(verbose){System.out.println("Making vertex maps");} if(verbose){System.out.println("Making vertex maps");}
//For the SimpleWeightedBipartiteGraphMatrixGenerator, all vertices must have //For the SimpleWeightedBipartiteGraphMatrixGenerator, all vertices must have
// distinct numbers associated with them. Since I'm using a 2D array, that means //distinct numbers associated with them. Since I'm using a 2D array, that means
// distinct indices between the rows and columns. vertexStartValue lets me track where I switch //distinct indices between the rows and columns. vertexStartValue lets me track where I switch
// from numbering rows to columns, so I can assign unique numbers to every vertex, and then //from numbering rows to columns, so I can assign unique numbers to every vertex, and then
// subtract the vertexStartValue from betas to use their vertex labels as array indices //subtract the vertexStartValue from betas to use their vertex labels as array indices
Integer vertexStartValue = 0; Integer vertexStartValue = 0;
//keys are sequential integer vertices, values are alphas //keys are sequential integer vertices, values are alphas
Map<Integer, Integer> plateVtoAMap = makeVertexToPeptideMap(allAlphas, vertexStartValue); Map<Integer, Integer> plateVtoAMap = makeVertexToSequenceMap(allAlphas, vertexStartValue);
//New start value for vertex to beta map should be one more than final vertex value in alpha map //new start value for vertex to beta map should be one more than final vertex value in alpha map
vertexStartValue += plateVtoAMap.size(); vertexStartValue += plateVtoAMap.size();
//keys are sequential integers vertices, values are betas //keys are sequential integers vertices, values are betas
Map<Integer, Integer> plateVtoBMap = makeVertexToPeptideMap(allBetas, vertexStartValue); Map<Integer, Integer> plateVtoBMap = makeVertexToSequenceMap(allBetas, vertexStartValue);
//keys are alphas, values are sequential integer vertices from previous map //keys are alphas, values are sequential integer vertices from previous map
Map<Integer, Integer> plateAtoVMap = invertVertexMap(plateVtoAMap); Map<Integer, Integer> plateAtoVMap = invertVertexMap(plateVtoAMap);
//keys are betas, values are sequential integer vertices from previous map //keys are betas, values are sequential integer vertices from previous map
Map<Integer, Integer> plateBtoVMap = invertVertexMap(plateVtoBMap); Map<Integer, Integer> plateBtoVMap = invertVertexMap(plateVtoBMap);
if(verbose){System.out.println("Vertex maps made");} if(verbose){System.out.println("Vertex maps made");}
//make adjacency matrix for bipartite graph generator
//(technically this is only 1/4 of an adjacency matrix, but that's all you need
//for a bipartite graph, and all the SimpleWeightedBipartiteGraphMatrixGenerator class expects.)
if(verbose){System.out.println("Creating adjacency matrix");} if(verbose){System.out.println("Creating adjacency matrix");}
//Count how many wells each alpha appears in //Count how many wells each alpha appears in
Map<Integer, Integer> alphaWellCounts = new HashMap<>(); Map<Integer, Integer> alphaWellCounts = new HashMap<>();
@@ -165,54 +107,78 @@ public class Simulator {
Map<Integer, Integer> betaWellCounts = new HashMap<>(); Map<Integer, Integer> betaWellCounts = new HashMap<>();
//the adjacency matrix to be used by the graph generator //the adjacency matrix to be used by the graph generator
double[][] weights = new double[plateVtoAMap.size()][plateVtoBMap.size()]; double[][] weights = new double[plateVtoAMap.size()][plateVtoBMap.size()];
countPeptidesAndFillMatrix(samplePlate, allAlphas, allBetas, plateAtoVMap, countSequencesAndFillMatrix(samplePlate, allAlphas, allBetas, plateAtoVMap,
plateBtoVMap, alphaIndex, betaIndex, alphaWellCounts, betaWellCounts, weights); plateBtoVMap, alphaIndex, betaIndex, alphaWellCounts, betaWellCounts, weights);
if(verbose){System.out.println("matrix created");} if(verbose){System.out.println("Matrix created");}
/** //create bipartite graph
* This is where the bipartite graph is created if(verbose){System.out.println("Creating graph");}
*/
if(verbose){System.out.println("creating graph");}
//the graph object //the graph object
SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph = SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph =
new SimpleWeightedGraph<>(DefaultWeightedEdge.class); new SimpleWeightedGraph<>(DefaultWeightedEdge.class);
//the graph generator //the graph generator
SimpleWeightedBipartiteGraphMatrixGenerator graphGenerator = new SimpleWeightedBipartiteGraphMatrixGenerator(); SimpleWeightedBipartiteGraphMatrixGenerator graphGenerator = new SimpleWeightedBipartiteGraphMatrixGenerator();
//the list of alpha vertices //the list of alpha vertices
List<Integer> alphaVertices = new ArrayList<>(); List<Integer> alphaVertices = new ArrayList<>(plateVtoAMap.keySet()); //This will work because LinkedHashMap preserves order of entry
alphaVertices.addAll(plateVtoAMap.keySet()); //This will work because LinkedHashMap preserves order of entry
graphGenerator.first(alphaVertices); graphGenerator.first(alphaVertices);
//the list of beta vertices //the list of beta vertices
List<Integer> betaVertices = new ArrayList<>(); List<Integer> betaVertices = new ArrayList<>(plateVtoBMap.keySet());
betaVertices.addAll(plateVtoBMap.keySet());
graphGenerator.second(betaVertices); //This will work because LinkedHashMap preserves order of entry graphGenerator.second(betaVertices); //This will work because LinkedHashMap preserves order of entry
//use adjacency matrix of weight created previously
graphGenerator.weights(weights); graphGenerator.weights(weights);
graphGenerator.generateGraph(graph); graphGenerator.generateGraph(graph);
if(verbose){System.out.println("Graph created");} if(verbose){System.out.println("Graph created");}
if(verbose){System.out.println("Eliminating edges with weights outside threshold values");} Instant stop = Instant.now();
Duration time = Duration.between(start, stop);
//return GraphWithMapData object
return new GraphWithMapData(graph, numWells, samplePlate.getConcentrations(), alphaCount, betaCount,
distCellsMapAlphaKey, plateVtoAMap, plateVtoBMap, plateAtoVMap,
plateBtoVMap, alphaWellCounts, betaWellCounts, time);
}
//match CDR3s.
public static MatchingResult matchCDR3s(GraphWithMapData data, String dataFilename, Integer lowThreshold,
Integer highThreshold, Integer maxOccupancyDifference,
Integer minOverlapPercent, boolean verbose) {
Instant start = Instant.now();
int numWells = data.getNumWells();
Integer alphaCount = data.getAlphaCount();
Integer betaCount = data.getBetaCount();
Map<Integer, Integer> distCellsMapAlphaKey = data.getDistCellsMapAlphaKey();
Map<Integer, Integer> plateVtoAMap = data.getPlateVtoAMap();
Map<Integer, Integer> plateVtoBMap = data.getPlateVtoBMap();
Map<Integer, Integer> alphaWellCounts = data.getAlphaWellCounts();
Map<Integer, Integer> betaWellCounts = data.getBetaWellCounts();
SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph = data.getGraph();
//remove weights outside given overlap thresholds
if(verbose){System.out.println("Eliminating edges with weights outside overlap threshold values");}
filterByOccupancyThreshold(graph, lowThreshold, highThreshold); filterByOccupancyThreshold(graph, lowThreshold, highThreshold);
if(verbose){System.out.println("Over- and under-weight edges set to 0.0");} if(verbose){System.out.println("Over- and under-weight edges set to 0.0");}
//Filter by overlap size
if(verbose){System.out.println("Eliminating edges with weights less than " + minOverlapPercent.toString() +
" percent of vertex occupancy value.");}
filterByOverlapSize(graph, alphaWellCounts, betaWellCounts, plateVtoAMap, plateVtoBMap, minOverlapPercent);
if(verbose){System.out.println("Edges with weights too far below vertex occupancy values set to 0.0");}
/** //Filter by relative occupancy
* This is where the maximum weighted matching is found if(verbose){System.out.println("Eliminating edges between vertices with occupancy difference > "
*/ + maxOccupancyDifference);}
// if(verbose){System.out.println("Finding maximum weighted matching");} filterByRelativeOccupancy(graph, alphaWellCounts, betaWellCounts, plateVtoAMap, plateVtoBMap,
// MaximumWeightBipartiteMatching maxWeightMatching = maxOccupancyDifference);
// new MaximumWeightBipartiteMatching(graph, plateVtoAMap.keySet(), plateVtoBMap.keySet()); if(verbose){System.out.println("Edges between vertices of with excessively different occupancy values " +
// MatchingAlgorithm.Matching<String, DefaultWeightedEdge> graphMatching = maxWeightMatching.getMatching(); "set to 0.0");}
// if(verbose){System.out.println("Matching completed");} //Find Maximum Weighted Matching
// Instant stop = Instant.now(); //using jheaps library class PairingHeap for improved efficiency
//trying with jheaps now
if(verbose){System.out.println("Finding maximum weighted matching");} if(verbose){System.out.println("Finding maximum weighted matching");}
//Attempting to use addressable heap to improve performance //Attempting to use addressable heap to improve performance
MaximumWeightBipartiteMatching maxWeightMatching = MaximumWeightBipartiteMatching maxWeightMatching =
new MaximumWeightBipartiteMatching(graph, new MaximumWeightBipartiteMatching(graph,
plateVtoAMap.keySet(), plateVtoAMap.keySet(),
plateVtoBMap.keySet(), plateVtoBMap.keySet(),
(i) -> new PairingHeap(Comparator.naturalOrder())); i -> new PairingHeap(Comparator.naturalOrder()));
MatchingAlgorithm.Matching<String, DefaultWeightedEdge> graphMatching = maxWeightMatching.getMatching(); MatchingAlgorithm.Matching<String, DefaultWeightedEdge> graphMatching = maxWeightMatching.getMatching();
if(verbose){System.out.println("Matching completed");} if(verbose){System.out.println("Matching completed");}
Instant stop = Instant.now(); Instant stop = Instant.now();
@@ -227,16 +193,15 @@ public class Simulator {
header.add("Matched correctly?"); header.add("Matched correctly?");
header.add("P-value"); header.add("P-value");
//Results for csv file //Results for csv file
List<List<String>> allResults = new ArrayList<>(); List<List<String>> allResults = new ArrayList<>();
NumberFormat nf = NumberFormat.getInstance(Locale.US); NumberFormat nf = NumberFormat.getInstance(Locale.US);
MathContext mc = new MathContext(3); MathContext mc = new MathContext(3);
Iterator<DefaultWeightedEdge> weightIter = graphMatching.iterator(); Iterator<DefaultWeightedEdge> weightIter = graphMatching.iterator();
DefaultWeightedEdge e = null; DefaultWeightedEdge e;
int trueCount = 0; int trueCount = 0;
int falseCount = 0; int falseCount = 0;
boolean check = false; boolean check;
Map<Integer, Integer> matchMap = new HashMap<>(); Map<Integer, Integer> matchMap = new HashMap<>();
while(weightIter.hasNext()) { while(weightIter.hasNext()) {
e = weightIter.next(); e = weightIter.next();
@@ -269,31 +234,44 @@ public class Simulator {
} }
//Metadate comments for CSV file //Metadate comments for CSV file
int min = alphaCount > betaCount ? betaCount : alphaCount; int min = Math.min(alphaCount, betaCount);
double attemptRate = (double) (trueCount + falseCount) / min; double attemptRate = (double) (trueCount + falseCount) / min;
BigDecimal attemptRateTrunc = new BigDecimal(attemptRate, mc); BigDecimal attemptRateTrunc = new BigDecimal(attemptRate, mc);
double pairingErrorRate = (double) falseCount / (trueCount + falseCount); double pairingErrorRate = (double) falseCount / (trueCount + falseCount);
BigDecimal pairingErrorRateTrunc = new BigDecimal(pairingErrorRate, mc); BigDecimal pairingErrorRateTrunc = new BigDecimal(pairingErrorRate, mc);
//get list of well concentrations
List<Integer> wellConcentrations = Arrays.asList(data.getWellConcentrations());
//make string out of concentrations list
StringBuilder concentrationStringBuilder = new StringBuilder();
for(Integer i: wellConcentrations){
concentrationStringBuilder.append(i.toString());
concentrationStringBuilder.append(" ");
}
String concentrationString = concentrationStringBuilder.toString();
List<String> comments = new ArrayList<>(); List<String> comments = new ArrayList<>();
comments.add("Source Sample Plate filename: " + data.getSourceFilename());
comments.add("Source Graph and Data filename: " + dataFilename);
comments.add("T cell counts in sample plate wells: " + concentrationString);
comments.add("Total alphas found: " + alphaCount); comments.add("Total alphas found: " + alphaCount);
comments.add("Total betas found: " + betaCount); comments.add("Total betas found: " + betaCount);
comments.add("High overlap threshold: " + highThreshold); comments.add("High overlap threshold: " + highThreshold);
comments.add("Low overlap threshold: " + lowThreshold); comments.add("Low overlap threshold: " + lowThreshold);
comments.add("Minimum overlap percent: " + minOverlapPercent);
comments.add("Maximum occupancy difference: " + maxOccupancyDifference);
comments.add("Pairing attempt rate: " + attemptRateTrunc); comments.add("Pairing attempt rate: " + attemptRateTrunc);
comments.add("Correct pairings: " + trueCount); comments.add("Correct pairings: " + trueCount);
comments.add("Incorrect pairings: " + falseCount); comments.add("Incorrect pairings: " + falseCount);
comments.add("Pairing error rate: " + pairingErrorRateTrunc); comments.add("Pairing error rate: " + pairingErrorRateTrunc);
Duration time = Duration.between(start, stop); Duration time = Duration.between(start, stop);
time = time.plus(data.getTime());
comments.add("Simulation time: " + nf.format(time.toSeconds()) + " seconds"); comments.add("Simulation time: " + nf.format(time.toSeconds()) + " seconds");
if(verbose){ if(verbose){
for(String s: comments){ for(String s: comments){
System.out.println(s); System.out.println(s);
} }
} }
return new MatchingResult(data.getSourceFilename(), comments, header, allResults, matchMap, time);
return new MatchingResult(samplePlate.getSourceFileName(), comments, header, allResults, matchMap, time);
} }
//Simulated matching of CDR1s to CDR3s. Requires MatchingResult from prior run of matchCDR3s. //Simulated matching of CDR1s to CDR3s. Requires MatchingResult from prior run of matchCDR3s.
@@ -313,13 +291,13 @@ public class Simulator {
System.out.println("Previous match maps made"); System.out.println("Previous match maps made");
System.out.println("Making cell maps"); System.out.println("Making cell maps");
Map<Integer, Integer> alphaCDR3toCDR1Map = makePeptideToPeptideMap(distinctCells, cdr3AlphaIndex, cdr1AlphaIndex); Map<Integer, Integer> alphaCDR3toCDR1Map = makeSequenceToSequenceMap(distinctCells, cdr3AlphaIndex, cdr1AlphaIndex);
Map<Integer, Integer> betaCDR3toCDR1Map = makePeptideToPeptideMap(distinctCells, cdr3BetaIndex, cdr1BetaIndex); Map<Integer, Integer> betaCDR3toCDR1Map = makeSequenceToSequenceMap(distinctCells, cdr3BetaIndex, cdr1BetaIndex);
System.out.println("Cell maps made"); System.out.println("Cell maps made");
System.out.println("Making well maps"); System.out.println("Making well maps");
Map<Integer, Integer> allCDR3s = samplePlate.assayWellsPeptideP(cdr3Indices); Map<Integer, Integer> allCDR3s = samplePlate.assayWellsSequenceS(cdr3Indices);
Map<Integer, Integer> allCDR1s = samplePlate.assayWellsPeptideP(cdr1Indices); Map<Integer, Integer> allCDR1s = samplePlate.assayWellsSequenceS(cdr1Indices);
int CDR3Count = allCDR3s.size(); int CDR3Count = allCDR3s.size();
System.out.println("all CDR3s count: " + CDR3Count); System.out.println("all CDR3s count: " + CDR3Count);
int CDR1Count = allCDR1s.size(); int CDR1Count = allCDR1s.size();
@@ -353,11 +331,11 @@ public class Simulator {
// subtract the vertexStartValue from CDR1s to use their vertex labels as array indices // subtract the vertexStartValue from CDR1s to use their vertex labels as array indices
Integer vertexStartValue = 0; Integer vertexStartValue = 0;
//keys are sequential integer vertices, values are CDR3s //keys are sequential integer vertices, values are CDR3s
Map<Integer, Integer> plateVtoCDR3Map = makeVertexToPeptideMap(allCDR3s, vertexStartValue); Map<Integer, Integer> plateVtoCDR3Map = makeVertexToSequenceMap(allCDR3s, vertexStartValue);
//New start value for vertex to CDR1 map should be one more than final vertex value in CDR3 map //New start value for vertex to CDR1 map should be one more than final vertex value in CDR3 map
vertexStartValue += plateVtoCDR3Map.size(); vertexStartValue += plateVtoCDR3Map.size();
//keys are sequential integers vertices, values are CDR1s //keys are sequential integers vertices, values are CDR1s
Map<Integer, Integer> plateVtoCDR1Map = makeVertexToPeptideMap(allCDR1s, vertexStartValue); Map<Integer, Integer> plateVtoCDR1Map = makeVertexToSequenceMap(allCDR1s, vertexStartValue);
//keys are CDR3s, values are sequential integer vertices from previous map //keys are CDR3s, values are sequential integer vertices from previous map
Map<Integer, Integer> plateCDR3toVMap = invertVertexMap(plateVtoCDR3Map); Map<Integer, Integer> plateCDR3toVMap = invertVertexMap(plateVtoCDR3Map);
//keys are CDR1s, values are sequential integer vertices from previous map //keys are CDR1s, values are sequential integer vertices from previous map
@@ -374,7 +352,7 @@ public class Simulator {
Map<Integer, Integer> wellNCDR3s = null; Map<Integer, Integer> wellNCDR3s = null;
Map<Integer, Integer> wellNCDR1s = null; Map<Integer, Integer> wellNCDR1s = null;
double[][] weights = new double[plateVtoCDR3Map.size()][plateVtoCDR1Map.size()]; double[][] weights = new double[plateVtoCDR3Map.size()][plateVtoCDR1Map.size()];
countPeptidesAndFillMatrix(samplePlate, allCDR3s, allCDR1s, plateCDR3toVMap, plateCDR1toVMap, countSequencesAndFillMatrix(samplePlate, allCDR3s, allCDR1s, plateCDR3toVMap, plateCDR1toVMap,
cdr3Indices, cdr1Indices, cdr3WellCounts, cdr1WellCounts, weights); cdr3Indices, cdr1Indices, cdr3WellCounts, cdr1WellCounts, weights);
System.out.println("Matrix created"); System.out.println("Matrix created");
@@ -383,11 +361,9 @@ public class Simulator {
new SimpleWeightedGraph<>(DefaultWeightedEdge.class); new SimpleWeightedGraph<>(DefaultWeightedEdge.class);
SimpleWeightedBipartiteGraphMatrixGenerator graphGenerator = new SimpleWeightedBipartiteGraphMatrixGenerator(); SimpleWeightedBipartiteGraphMatrixGenerator graphGenerator = new SimpleWeightedBipartiteGraphMatrixGenerator();
List<Integer> cdr3Vertices = new ArrayList<>(); List<Integer> cdr3Vertices = new ArrayList<>(plateVtoCDR3Map.keySet()); //This will work because LinkedHashMap preserves order of entry
cdr3Vertices.addAll(plateVtoCDR3Map.keySet()); //This will work because LinkedHashMap preserves order of entry
graphGenerator.first(cdr3Vertices); graphGenerator.first(cdr3Vertices);
List<Integer> cdr1Vertices = new ArrayList<>(); List<Integer> cdr1Vertices = new ArrayList<>(plateVtoCDR1Map.keySet());
cdr1Vertices.addAll(plateVtoCDR1Map.keySet());
graphGenerator.second(cdr1Vertices); //This will work because LinkedHashMap preserves order of entry graphGenerator.second(cdr1Vertices); //This will work because LinkedHashMap preserves order of entry
graphGenerator.weights(weights); graphGenerator.weights(weights);
graphGenerator.generateGraph(graph); graphGenerator.generateGraph(graph);
@@ -407,7 +383,7 @@ public class Simulator {
//first processing run //first processing run
Map<Integer, Integer> firstMatchCDR3toCDR1Map = new HashMap<>(); Map<Integer, Integer> firstMatchCDR3toCDR1Map = new HashMap<>();
Iterator<DefaultWeightedEdge> weightIter = graphMatching.iterator(); Iterator<DefaultWeightedEdge> weightIter = graphMatching.iterator();
DefaultWeightedEdge e = null; DefaultWeightedEdge e;
while(weightIter.hasNext()){ while(weightIter.hasNext()){
e = weightIter.next(); e = weightIter.next();
// if(graph.getEdgeWeight(e) < lowThreshold || graph.getEdgeWeight(e) > highThreshold) { // if(graph.getEdgeWeight(e) < lowThreshold || graph.getEdgeWeight(e) > highThreshold) {
@@ -531,7 +507,7 @@ public class Simulator {
comments.add("Correct matching rate: " + correctRate); comments.add("Correct matching rate: " + correctRate);
NumberFormat nf = NumberFormat.getInstance(Locale.US); NumberFormat nf = NumberFormat.getInstance(Locale.US);
Duration time = Duration.between(start, stop); Duration time = Duration.between(start, stop);
time.plus(previousTime); time = time.plus(previousTime);
comments.add("Simulation time: " + nf.format(time.toSeconds()) + " seconds"); comments.add("Simulation time: " + nf.format(time.toSeconds()) + " seconds");
for(String s: comments){ for(String s: comments){
System.out.println(s); System.out.println(s);
@@ -605,38 +581,38 @@ public class Simulator {
//Counts the well occupancy of the row peptides and column peptides into given maps, and //Counts the well occupancy of the row peptides and column peptides into given maps, and
//fills weights in the given 2D array //fills weights in the given 2D array
private static void countPeptidesAndFillMatrix(Plate samplePlate, private static void countSequencesAndFillMatrix(Plate samplePlate,
Map<Integer,Integer> allRowPeptides, Map<Integer,Integer> allRowSequences,
Map<Integer,Integer> allColumnPeptides, Map<Integer,Integer> allColumnSequences,
Map<Integer,Integer> rowPeptideToVertexMap, Map<Integer,Integer> rowSequenceToVertexMap,
Map<Integer,Integer> columnPeptideToVertexMap, Map<Integer,Integer> columnSequenceToVertexMap,
int[] rowPeptideIndices, int[] rowSequenceIndices,
int[] colPeptideIndices, int[] colSequenceIndices,
Map<Integer, Integer> rowPeptideCounts, Map<Integer, Integer> rowSequenceCounts,
Map<Integer,Integer> columnPeptideCounts, Map<Integer,Integer> columnSequenceCounts,
double[][] weights){ double[][] weights){
Map<Integer, Integer> wellNRowPeptides = null; Map<Integer, Integer> wellNRowSequences = null;
Map<Integer, Integer> wellNColumnPeptides = null; Map<Integer, Integer> wellNColumnSequences = null;
int vertexStartValue = rowPeptideToVertexMap.size(); int vertexStartValue = rowSequenceToVertexMap.size();
int numWells = samplePlate.getSize(); int numWells = samplePlate.getSize();
for (int n = 0; n < numWells; n++) { for (int n = 0; n < numWells; n++) {
wellNRowPeptides = samplePlate.assayWellsPeptideP(n, rowPeptideIndices); wellNRowSequences = samplePlate.assayWellsSequenceS(n, rowSequenceIndices);
for (Integer a : wellNRowPeptides.keySet()) { for (Integer a : wellNRowSequences.keySet()) {
if(allRowPeptides.containsKey(a)){ if(allRowSequences.containsKey(a)){
rowPeptideCounts.merge(a, 1, (oldValue, newValue) -> oldValue + newValue); rowSequenceCounts.merge(a, 1, (oldValue, newValue) -> oldValue + newValue);
} }
} }
wellNColumnPeptides = samplePlate.assayWellsPeptideP(n, colPeptideIndices); wellNColumnSequences = samplePlate.assayWellsSequenceS(n, colSequenceIndices);
for (Integer b : wellNColumnPeptides.keySet()) { for (Integer b : wellNColumnSequences.keySet()) {
if(allColumnPeptides.containsKey(b)){ if(allColumnSequences.containsKey(b)){
columnPeptideCounts.merge(b, 1, (oldValue, newValue) -> oldValue + newValue); columnSequenceCounts.merge(b, 1, (oldValue, newValue) -> oldValue + newValue);
} }
} }
for (Integer i : wellNRowPeptides.keySet()) { for (Integer i : wellNRowSequences.keySet()) {
if(allRowPeptides.containsKey(i)){ if(allRowSequences.containsKey(i)){
for (Integer j : wellNColumnPeptides.keySet()) { for (Integer j : wellNColumnSequences.keySet()) {
if(allColumnPeptides.containsKey(j)){ if(allColumnSequences.containsKey(j)){
weights[rowPeptideToVertexMap.get(i)][columnPeptideToVertexMap.get(j) - vertexStartValue] += 1.0; weights[rowSequenceToVertexMap.get(i)][columnSequenceToVertexMap.get(j) - vertexStartValue] += 1.0;
} }
} }
} }
@@ -666,19 +642,54 @@ public class Simulator {
} }
} }
private static Map<Integer, Integer> makePeptideToPeptideMap(List<Integer[]> cells, int keyPeptideIndex, //Remove edges for pairs with large occupancy discrepancy
int valuePeptideIndex){ private static void filterByRelativeOccupancy(SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph,
Map<Integer, Integer> keyPeptideToValuePeptideMap = new HashMap<>(); Map<Integer, Integer> alphaWellCounts,
for (Integer[] cell : cells) { Map<Integer, Integer> betaWellCounts,
keyPeptideToValuePeptideMap.put(cell[keyPeptideIndex], cell[valuePeptideIndex]); Map<Integer, Integer> plateVtoAMap,
Map<Integer, Integer> plateVtoBMap,
Integer maxOccupancyDifference) {
for (DefaultWeightedEdge e : graph.edgeSet()) {
Integer alphaOcc = alphaWellCounts.get(plateVtoAMap.get(graph.getEdgeSource(e)));
Integer betaOcc = betaWellCounts.get(plateVtoBMap.get(graph.getEdgeTarget(e)));
//Adjust this to something cleverer later
if (Math.abs(alphaOcc - betaOcc) >= maxOccupancyDifference) {
graph.setEdgeWeight(e, 0.0);
}
} }
return keyPeptideToValuePeptideMap;
} }
private static Map<Integer, Integer> makeVertexToPeptideMap(Map<Integer, Integer> peptides, Integer startValue) { //Remove edges for pairs where overlap size is significantly lower than the well occupancy
private static void filterByOverlapSize(SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph,
Map<Integer, Integer> alphaWellCounts,
Map<Integer, Integer> betaWellCounts,
Map<Integer, Integer> plateVtoAMap,
Map<Integer, Integer> plateVtoBMap,
Integer minOverlapPercent) {
for (DefaultWeightedEdge e : graph.edgeSet()) {
Integer alphaOcc = alphaWellCounts.get(plateVtoAMap.get(graph.getEdgeSource(e)));
Integer betaOcc = betaWellCounts.get(plateVtoBMap.get(graph.getEdgeTarget(e)));
double weight = graph.getEdgeWeight(e);
double min = minOverlapPercent / 100.0;
if ((weight / alphaOcc < min) || (weight / betaOcc < min)) {
graph.setEdgeWeight(e, 0.0);
}
}
}
private static Map<Integer, Integer> makeSequenceToSequenceMap(List<Integer[]> cells, int keySequenceIndex,
int valueSequenceIndex){
Map<Integer, Integer> keySequenceToValueSequenceMap = new HashMap<>();
for (Integer[] cell : cells) {
keySequenceToValueSequenceMap.put(cell[keySequenceIndex], cell[valueSequenceIndex]);
}
return keySequenceToValueSequenceMap;
}
private static Map<Integer, Integer> makeVertexToSequenceMap(Map<Integer, Integer> sequences, Integer startValue) {
Map<Integer, Integer> map = new LinkedHashMap<>(); //LinkedHashMap to preserve order of entry Map<Integer, Integer> map = new LinkedHashMap<>(); //LinkedHashMap to preserve order of entry
Integer index = startValue; Integer index = startValue;
for (Integer k: peptides.keySet()) { for (Integer k: sequences.keySet()) {
map.put(index, k); map.put(index, k);
index++; index++;
} }
@@ -693,9 +704,4 @@ public class Simulator {
return inverse; return inverse;
} }
private static int bigDecimalComparator(BigDecimal bd, Integer i) {
return bd.compareTo(BigDecimal.valueOf(i));
}
} }

File diff suppressed because it is too large Load Diff

17
src/main/java/Vertex.java Normal file
View File

@@ -0,0 +1,17 @@
public class Vertex {
private final Integer peptide;
private final Integer occupancy;
public Vertex(Integer peptide, Integer occupancy) {
this.peptide = peptide;
this.occupancy = occupancy;
}
public Integer getPeptide() {
return peptide;
}
public Integer getOccupancy() {
return occupancy;
}
}