diff --git a/.idea/artifacts/TCellSim_jar.xml b/.idea/artifacts/BiGpairSEQ_Sim_jar.xml similarity index 77% rename from .idea/artifacts/TCellSim_jar.xml rename to .idea/artifacts/BiGpairSEQ_Sim_jar.xml index a083f93..6372f62 100644 --- a/.idea/artifacts/TCellSim_jar.xml +++ b/.idea/artifacts/BiGpairSEQ_Sim_jar.xml @@ -1,11 +1,11 @@ - - $PROJECT_DIR$/out/artifacts/TCellSim_jar - + + $PROJECT_DIR$/out/artifacts/BiGpairSEQ_Sim_jar + - + diff --git a/.idea/compiler.xml b/.idea/compiler.xml index c3728c7..13fb425 100644 --- a/.idea/compiler.xml +++ b/.idea/compiler.xml @@ -6,7 +6,7 @@ - + diff --git a/out/artifacts/TCellSim_jar/TCellSim.jar b/out/artifacts/BiGpairSEQ_Sim_jar/BiGpairSEQ_Sim.jar similarity index 94% rename from out/artifacts/TCellSim_jar/TCellSim.jar rename to out/artifacts/BiGpairSEQ_Sim_jar/BiGpairSEQ_Sim.jar index b83a18c..18913c8 100644 Binary files a/out/artifacts/TCellSim_jar/TCellSim.jar and b/out/artifacts/BiGpairSEQ_Sim_jar/BiGpairSEQ_Sim.jar differ diff --git a/out/artifacts/BiGpairSEQ_Sim_jar/readme.md b/out/artifacts/BiGpairSEQ_Sim_jar/readme.md new file mode 100644 index 0000000..31902f5 --- /dev/null +++ b/out/artifacts/BiGpairSEQ_Sim_jar/readme.md @@ -0,0 +1,32 @@ +BiGpairSEQ SIMULATOR + +ABOUT: +This program simulates BiGpairSEQ, a graph theory based adaptation +of the pairSEQ algorithm for pairing T cell receptor sequences. + +Unlike pairSEQ, which calculates p-values for every TCR alpha/beta overlap and compares +against a null distribution, BiGpairSEQ does not do any statistical calculations +directly. Instead, BiGpairSEQ creates a simple bipartite weighted graph representing the sample plate. +The distinct TCRA and TCRB sequences form the two sets of vertices. Every TCRA/TCRB pair that share a well +are connected by an edge, with the edge weight set to the number of wells in which both sequences appear. +(Sequences in all wells are filtered out prior to creating the graph, as there is no signal in their occupancy +The problem of pairing TCRA/TCRB sequences thus reduces to the "assignment problem" of finding a maximum weight +matching on a bipartite graph--the subset of vertex-disjoint edges whose weights sum to the maximum possible value. + +USAGE +Released as an executable .jar file with interactive, command line UI +Usage: java -jar BiGpairSEQ_Sim.jar + +Large cell sample or sample plate files may require large amounts of RAM. +It is often desirable to increase the JVM memory allocation with the -Xmx flag +For example, to run the program with 32 gigabytes of memory, use command: +java -Xmx32G -jar BiGpairSEQ_Sim.jar + +Requires Java11 or higher (Openjdk-17 recommended) + +pairSEQ citation: +Howie, B., Sherwood, A. M., et. al. +"High-throughput pairing of T cell receptor alpha and beta sequences." +Sci. Transl. Med. 7, 301ra131 (2015) + +Simulation by Eugene Fischer, 2021-2022 \ No newline at end of file diff --git a/out/artifacts/TCellSim_jar/readme.md b/out/artifacts/TCellSim_jar/readme.md deleted file mode 100644 index d929fd5..0000000 --- a/out/artifacts/TCellSim_jar/readme.md +++ /dev/null @@ -1,7 +0,0 @@ -Executable .jar file with interactive, command line UI -Usage: java -jar TCellSim.jar -To increase memory allocation, use -Xmx flag. -For example, to give the program 20 gigabytes of memory: -java -Xmx20G -jar TCellSim.jar - -Requires Java11 or higher (Openjdk-17 recommended) \ No newline at end of file diff --git a/src/main/java/Plate.java b/src/main/java/Plate.java index 3588f33..f33d84d 100644 --- a/src/main/java/Plate.java +++ b/src/main/java/Plate.java @@ -55,6 +55,7 @@ public class Plate { List well = new ArrayList<>(); for (int j = 0; j < concentrations[section]; j++) { do { + //inverse transform sampling: for random number u in [0,1), x = log(1-u) / (-lambda) m = (Math.log10((1 - rand.nextDouble()))/(-lambda)) * Math.sqrt(cells.size()); } while (m >= cells.size() || m < 0); n = (int) Math.floor(m); diff --git a/src/main/java/UserInterface.java b/src/main/java/UserInterface.java index 0c54472..3a67326 100644 --- a/src/main/java/UserInterface.java +++ b/src/main/java/UserInterface.java @@ -269,7 +269,7 @@ public class UserInterface { //Need to re-do the CDR3/CDR1 matching to correspond to new pattern //System.out.println("5) Generate CDR3/CDR1 occupancy graph"); //System.out.println("6) Simulate CDR3/CDR1 T cell matching"); - System.out.println("9) Acknowledgements"); + System.out.println("9) About/Acknowledgments"); System.out.println("0) Exit"); try { input = sc.nextInt(); @@ -384,8 +384,8 @@ public class UserInterface { System.out.println("1) Poisson"); System.out.println("2) Gaussian"); System.out.println("3) Exponential"); - System.out.println("(Note: approximate distribution in original paper is exponential, lambda = 0.6.)"); - System.out.println("(approximated from slope of log-log graph in figure 4c)"); + System.out.println("(Note: approximate distribution in original paper is exponential, lambda = 0.6)"); + System.out.println("(lambda value approximated from slope of log-log graph in figure 4c)"); System.out.println("(Note: wider distributions are more memory intensive to match)"); System.out.print("Enter selection value: "); input = sc.nextInt(); @@ -664,6 +664,17 @@ public class UserInterface { System.out.println("This program simulates BiGpairSEQ, a graph theory based adaptation"); System.out.println("of the pairSEQ algorithm for pairing T cell receptor sequences."); System.out.println(""); + System.out.println("Unlike pairSEQ, which calculates p-values for every TCR alpha/beta overlap and compares"); + System.out.println("against a null distribution, BiGpairSEQ does not do any statistical calculations"); + System.out.println("directly. Instead, BiGpairSEQ creates a simple bipartite weighted graph representing"); + System.out.println("the sample plate. The distinct TCRA and TCRB sequences form the two sets of vertices."); + System.out.println("Every TCRA/TCRB pair that share a well are connected by an edge, with the edge weight"); + System.out.println("set to the number of wells in which both sequences appear. (Sequences in all wells are"); + System.out.println("filtered out prior to creating the graph, as there is no signal in their occupancy"); + System.out.println("pattern.) The problem of pairing TCRA/TCRB sequences thus reduces to the \"assignment"); + System.out.println("problem\" of finding a maximum weight matching on a bipartite graph--the subset of"); + System.out.println("vertex-disjoint edges whose weights sum to the maximum possible value."); + System.out.println(""); System.out.println("pairSEQ citation:"); System.out.println("Howie, B., Sherwood, A. M., et. al."); System.out.println("High-throughput pairing of T cell receptor alpha and beta sequences.");