Compare commits
16 Commits
v2.0
...
578bdc0fbf
| Author | SHA1 | Date | |
|---|---|---|---|
| 578bdc0fbf | |||
| 8275cf7740 | |||
| 64209691f0 | |||
| 1886800873 | |||
| bedf0894bc | |||
| 2ac3451842 | |||
| 67ec3f3764 | |||
| b5a8b7e2d5 | |||
| 9fb3095f0f | |||
| 25acf920c2 | |||
| f301327693 | |||
| e04d2d6777 | |||
| 3e41afaa64 | |||
| bc5d67680d | |||
| f2347e8fc2 | |||
| c8364d8a6e |
19
readme.md
19
readme.md
@@ -20,8 +20,8 @@ The problem of pairing TCRA/TCRB sequences thus reduces to the "assignment probl
|
|||||||
matching on a bipartite graph--the subset of vertex-disjoint edges whose weights sum to the maximum possible value.
|
matching on a bipartite graph--the subset of vertex-disjoint edges whose weights sum to the maximum possible value.
|
||||||
|
|
||||||
This is a well-studied combinatorial optimization problem, with many known solutions.
|
This is a well-studied combinatorial optimization problem, with many known solutions.
|
||||||
The most efficient algorithm known to the author for maximum weight matching of a bipartite graph with strictly integral weights
|
The most efficient algorithm known to the author for maximum weight matching of a bipartite graph with strictly integral
|
||||||
is from Duan and Su (2012). For a graph with m edges, n vertices per side, and maximum integer edge weight N,
|
weights is from Duan and Su (2012). For a graph with m edges, n vertices per side, and maximum integer edge weight N,
|
||||||
their algorithm runs in **O(m sqrt(n) log(N))** time. As the graph representation of a pairSEQ experiment is
|
their algorithm runs in **O(m sqrt(n) log(N))** time. As the graph representation of a pairSEQ experiment is
|
||||||
bipartite with integer weights, this algorithm is ideal for BiGpairSEQ.
|
bipartite with integer weights, this algorithm is ideal for BiGpairSEQ.
|
||||||
|
|
||||||
@@ -43,13 +43,13 @@ Run with the command:
|
|||||||
`java -jar BiGpairSEQ_Sim.jar`
|
`java -jar BiGpairSEQ_Sim.jar`
|
||||||
|
|
||||||
Processing sample plates with tens of thousands of sequences may require large amounts
|
Processing sample plates with tens of thousands of sequences may require large amounts
|
||||||
of RAM. It is often desirable to increase the JVM maximum heap allocation with the -Xmx flag.
|
of RAM. It is often desirable to increase the JVM maximum heap allocation with the `-Xmx` flag.
|
||||||
For example, to run the program with 32 gigabytes of memory, use the command:
|
For example, to run the program with 32 gigabytes of memory, use the command:
|
||||||
|
|
||||||
`java -Xmx32G -jar BiGpairSEQ_Sim.jar`
|
`java -Xmx32G -jar BiGpairSEQ_Sim.jar`
|
||||||
|
|
||||||
There are a number of command line options, to allow the program to be used in shell scripts. For a full list,
|
There are a number of command line options, to allow the program to be used in shell scripts. For a full list,
|
||||||
use the -help flag:
|
use the `-help` flag:
|
||||||
|
|
||||||
`java -jar BiGpairSEQ_Sim.jar -help`
|
`java -jar BiGpairSEQ_Sim.jar -help`
|
||||||
|
|
||||||
@@ -108,7 +108,7 @@ device-specific.)
|
|||||||
|
|
||||||
The program's caching behavior can be controlled in the Options menu. By default, all caching is OFF.
|
The program's caching behavior can be controlled in the Options menu. By default, all caching is OFF.
|
||||||
|
|
||||||
The program can optionally output Graph/Data files in .GraphML format (.graphml) for data portability. This can be
|
The program can optionally output Graph/Data files in GraphML format (.graphml) for data portability. This can be
|
||||||
turned on in the Options menu. By default, GraphML output is OFF.
|
turned on in the Options menu. By default, GraphML output is OFF.
|
||||||
|
|
||||||
---
|
---
|
||||||
@@ -203,8 +203,13 @@ Options for creating a Graph/Data file:
|
|||||||
|
|
||||||
These files do not have a human-readable structure, and are not portable to other programs.
|
These files do not have a human-readable structure, and are not portable to other programs.
|
||||||
|
|
||||||
(For portability to other software, turn on GraphML output in the Options menu. This will produce a .graphml file
|
*Optional GraphML output*
|
||||||
for the weighted graph, with vertex attributes sequence, type, and occupancy data.)
|
|
||||||
|
For portability of graph data to other software, turn on [GraphML](http://graphml.graphdrawing.org/index.html) output
|
||||||
|
in the Options menu in interactive mode, or use the `-graphml`command line argument. This will produce a .graphml file
|
||||||
|
for the weighted graph, with vertex attributes for sequence, type, and occupancy data. This graph contains all the data
|
||||||
|
necessary for the BiGpairSEQ matching algorithm. It does not include the data to measure pairing accuracy; for that,
|
||||||
|
compare the matching results to the original Cell Sample .csv file.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
|||||||
@@ -16,6 +16,7 @@ public class BiGpairSEQ {
|
|||||||
private static String priorityQueueHeapType = "FIBONACCI";
|
private static String priorityQueueHeapType = "FIBONACCI";
|
||||||
private static boolean outputBinary = true;
|
private static boolean outputBinary = true;
|
||||||
private static boolean outputGraphML = false;
|
private static boolean outputGraphML = false;
|
||||||
|
private static final String version = "version 2.0";
|
||||||
|
|
||||||
public static void main(String[] args) {
|
public static void main(String[] args) {
|
||||||
if (args.length == 0) {
|
if (args.length == 0) {
|
||||||
@@ -172,5 +173,5 @@ public class BiGpairSEQ {
|
|||||||
|
|
||||||
public static boolean outputGraphML() {return outputGraphML;}
|
public static boolean outputGraphML() {return outputGraphML;}
|
||||||
public static void setOutputGraphML(boolean b) {outputGraphML = b;}
|
public static void setOutputGraphML(boolean b) {outputGraphML = b;}
|
||||||
|
public static String getVersion() { return version; }
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -62,15 +62,18 @@ public class CommandLineInterface {
|
|||||||
|
|
||||||
if (line.hasOption("help")) {
|
if (line.hasOption("help")) {
|
||||||
HelpFormatter formatter = new HelpFormatter();
|
HelpFormatter formatter = new HelpFormatter();
|
||||||
formatter.printHelp("BiGpairSEQ_Sim", mainOptions);
|
formatter.printHelp("BiGpairSEQ_Sim.jar", mainOptions);
|
||||||
System.out.println();
|
System.out.println();
|
||||||
formatter.printHelp("BiGpairSEQ_SIM -cells", cellOptions);
|
formatter.printHelp("BiGpairSEQ_Sim.jar -cells", cellOptions);
|
||||||
System.out.println();
|
System.out.println();
|
||||||
formatter.printHelp("BiGpairSEQ_Sim -plate", plateOptions);
|
formatter.printHelp("BiGpairSEQ_Sim.jar -plate", plateOptions);
|
||||||
System.out.println();
|
System.out.println();
|
||||||
formatter.printHelp("BiGpairSEQ_Sim -graph", graphOptions);
|
formatter.printHelp("BiGpairSEQ_Sim.jar -graph", graphOptions);
|
||||||
System.out.println();
|
System.out.println();
|
||||||
formatter.printHelp("BiGpairSEQ_Sim -match", matchOptions);
|
formatter.printHelp("BiGpairSEQ_Sim.jar -match", matchOptions);
|
||||||
|
}
|
||||||
|
else if (line.hasOption("version")) {
|
||||||
|
System.out.println("BiGpairSEQ_Sim " + BiGpairSEQ.getVersion());
|
||||||
}
|
}
|
||||||
else if (line.hasOption("cells")) {
|
else if (line.hasOption("cells")) {
|
||||||
line = parser.parse(cellOptions, Arrays.copyOfRange(args, 1, args.length));
|
line = parser.parse(cellOptions, Arrays.copyOfRange(args, 1, args.length));
|
||||||
@@ -153,17 +156,24 @@ public class CommandLineInterface {
|
|||||||
else if (line.hasOption("match")) { //can add a flag for which match type in future, spit this in two
|
else if (line.hasOption("match")) { //can add a flag for which match type in future, spit this in two
|
||||||
line = parser.parse(matchOptions, Arrays.copyOfRange(args, 1, args.length));
|
line = parser.parse(matchOptions, Arrays.copyOfRange(args, 1, args.length));
|
||||||
String graphFilename = line.getOptionValue("g");
|
String graphFilename = line.getOptionValue("g");
|
||||||
String outputFilename = line.getOptionValue("o");
|
|
||||||
|
String outputFilename;
|
||||||
|
if(line.hasOption("o")) {
|
||||||
|
outputFilename = line.getOptionValue("o");
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
outputFilename = null;
|
||||||
|
}
|
||||||
Integer minThreshold = Integer.parseInt(line.getOptionValue("min"));
|
Integer minThreshold = Integer.parseInt(line.getOptionValue("min"));
|
||||||
Integer maxThreshold = Integer.parseInt(line.getOptionValue("max"));
|
Integer maxThreshold = Integer.parseInt(line.getOptionValue("max"));
|
||||||
Integer minOverlapPct;
|
int minOverlapPct;
|
||||||
if (line.hasOption("minpct")) { //see if this filter is being used
|
if (line.hasOption("minpct")) { //see if this filter is being used
|
||||||
minOverlapPct = Integer.parseInt(line.getOptionValue("minpct"));
|
minOverlapPct = Integer.parseInt(line.getOptionValue("minpct"));
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
minOverlapPct = 0;
|
minOverlapPct = 0;
|
||||||
}
|
}
|
||||||
Integer maxOccupancyDiff;
|
int maxOccupancyDiff;
|
||||||
if (line.hasOption("maxdiff")) { //see if this filter is being used
|
if (line.hasOption("maxdiff")) { //see if this filter is being used
|
||||||
maxOccupancyDiff = Integer.parseInt(line.getOptionValue("maxdiff"));
|
maxOccupancyDiff = Integer.parseInt(line.getOptionValue("maxdiff"));
|
||||||
}
|
}
|
||||||
@@ -173,10 +183,17 @@ public class CommandLineInterface {
|
|||||||
GraphWithMapData graph = getGraph(graphFilename);
|
GraphWithMapData graph = getGraph(graphFilename);
|
||||||
MatchingResult result = Simulator.matchCDR3s(graph, graphFilename, minThreshold, maxThreshold,
|
MatchingResult result = Simulator.matchCDR3s(graph, graphFilename, minThreshold, maxThreshold,
|
||||||
maxOccupancyDiff, minOverlapPct, false);
|
maxOccupancyDiff, minOverlapPct, false);
|
||||||
MatchingFileWriter writer = new MatchingFileWriter(outputFilename, result);
|
if(outputFilename != null){
|
||||||
writer.writeResultsToFile();
|
MatchingFileWriter writer = new MatchingFileWriter(outputFilename, result);
|
||||||
|
writer.writeResultsToFile();
|
||||||
|
}
|
||||||
//can put a bunch of ifs for outputting various things from the MatchingResult to System.out here
|
//can put a bunch of ifs for outputting various things from the MatchingResult to System.out here
|
||||||
//after I put those flags in the matchOptions
|
//after I put those flags in the matchOptions
|
||||||
|
if(line.hasOption("print-metadata")) {
|
||||||
|
for (String k : result.getMetadata().keySet()) {
|
||||||
|
System.out.println(k + ": " + result.getMetadata().get(k));
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
catch (ParseException exp) {
|
catch (ParseException exp) {
|
||||||
@@ -216,8 +233,11 @@ public class CommandLineInterface {
|
|||||||
.longOpt("match-cdr3")
|
.longOpt("match-cdr3")
|
||||||
.desc("Matches CDR3s. Requires a graph/data file.")
|
.desc("Matches CDR3s. Requires a graph/data file.")
|
||||||
.build();
|
.build();
|
||||||
|
Option printVersion = Option.builder("version")
|
||||||
|
.desc("Prints the program version number to stdout").build();
|
||||||
OptionGroup mainGroup = new OptionGroup();
|
OptionGroup mainGroup = new OptionGroup();
|
||||||
mainGroup.addOption(help);
|
mainGroup.addOption(help);
|
||||||
|
mainGroup.addOption(printVersion);
|
||||||
mainGroup.addOption(makeCells);
|
mainGroup.addOption(makeCells);
|
||||||
mainGroup.addOption(makePlate);
|
mainGroup.addOption(makePlate);
|
||||||
mainGroup.addOption(makeGraph);
|
mainGroup.addOption(makeGraph);
|
||||||
@@ -297,7 +317,7 @@ public class CommandLineInterface {
|
|||||||
.desc("Randomize well populations on sample plate. Takes two arguments: the minimum possible population and the maximum possible population.")
|
.desc("Randomize well populations on sample plate. Takes two arguments: the minimum possible population and the maximum possible population.")
|
||||||
.hasArgs()
|
.hasArgs()
|
||||||
.numberOfArgs(2)
|
.numberOfArgs(2)
|
||||||
.argName("minimum maximum")
|
.argName("min> <max")
|
||||||
.build();
|
.build();
|
||||||
Option specificWellPopulations = Option.builder("pop")
|
Option specificWellPopulations = Option.builder("pop")
|
||||||
.desc("The well populations for each section of the sample plate. There will be as many sections as there are populations given.")
|
.desc("The well populations for each section of the sample plate. There will be as many sections as there are populations given.")
|
||||||
@@ -326,22 +346,22 @@ public class CommandLineInterface {
|
|||||||
Options graphOptions = new Options();
|
Options graphOptions = new Options();
|
||||||
Option cellFilename = Option.builder("c")
|
Option cellFilename = Option.builder("c")
|
||||||
.longOpt("cell-file")
|
.longOpt("cell-file")
|
||||||
.desc("Cell sample file to use for checking accuracy")
|
.desc("Cell sample file to use for checking pairing accuracy")
|
||||||
.hasArg()
|
.hasArg()
|
||||||
.argName("filename")
|
.argName("filename")
|
||||||
.required().build();
|
.required().build();
|
||||||
Option plateFilename = Option.builder("p")
|
Option plateFilename = Option.builder("p")
|
||||||
.longOpt("plate-filename")
|
.longOpt("plate-filename")
|
||||||
.desc("Sample plate file (made from given cell sample file) to construct graph from")
|
.desc("Sample plate file from which to construct graph")
|
||||||
.hasArg()
|
.hasArg()
|
||||||
.argName("filename")
|
.argName("filename")
|
||||||
.required().build();
|
.required().build();
|
||||||
Option outputGraphML = Option.builder("graphml")
|
Option outputGraphML = Option.builder("graphml")
|
||||||
.desc("Output GraphML file")
|
.desc("(Optional) Output GraphML file")
|
||||||
.build();
|
.build();
|
||||||
Option outputSerializedBinary = Option.builder("nb")
|
Option outputSerializedBinary = Option.builder("nb")
|
||||||
.longOpt("no-binary")
|
.longOpt("no-binary")
|
||||||
.desc("Don't output serialized binary file")
|
.desc("(Optional) Don't output serialized binary file")
|
||||||
.build();
|
.build();
|
||||||
graphOptions.addOption(cellFilename);
|
graphOptions.addOption(cellFilename);
|
||||||
graphOptions.addOption(plateFilename);
|
graphOptions.addOption(plateFilename);
|
||||||
@@ -379,15 +399,36 @@ public class CommandLineInterface {
|
|||||||
.hasArg()
|
.hasArg()
|
||||||
.argName("number")
|
.argName("number")
|
||||||
.build();
|
.build();
|
||||||
matchCDR3options.addOption(graphFilename);
|
Option outputFile = Option.builder("o") //can't call the method this time, because this one's optional
|
||||||
matchCDR3options.addOption(minOccupancyOverlap);
|
.longOpt("output-file")
|
||||||
matchCDR3options.addOption(maxOccupancyOverlap);
|
.hasArg()
|
||||||
matchCDR3options.addOption(minOverlapPercent);
|
.argName("filename")
|
||||||
matchCDR3options.addOption(maxOccupancyDifference);
|
.desc("(Optional) Name of output the output file. If not present, no file will be written.")
|
||||||
matchCDR3options.addOption(outputFileOption());
|
.build();
|
||||||
//options for output to System.out
|
matchCDR3options.addOption(graphFilename)
|
||||||
//Option printPairingErrorRate = Option.builder()
|
.addOption(minOccupancyOverlap)
|
||||||
|
.addOption(maxOccupancyOverlap)
|
||||||
|
.addOption(minOverlapPercent)
|
||||||
|
.addOption(maxOccupancyDifference)
|
||||||
|
.addOption(outputFile);
|
||||||
|
|
||||||
|
//options for output to System.out
|
||||||
|
// Option printErrorRate = Option.builder().longOpt("print-error")
|
||||||
|
// .desc("(Optional) Print the pairing error rate to stdout").build();
|
||||||
|
// Option printAttempt = Option.builder().longOpt("print-attempt")
|
||||||
|
// .desc("(Optional) Print the pairing attempt rate to stdout").build();
|
||||||
|
// Option printCorrect = Option.builder().longOpt("print-correct")
|
||||||
|
// .desc("(Optional) Print the number of correct pairs to stdout").build();
|
||||||
|
// Option printIncorrect = Option.builder().longOpt("print-incorrect")
|
||||||
|
// .desc("(Optional) Print the number of incorrect pairs to stdout").build();
|
||||||
|
Option printMetadata = Option.builder().longOpt("print-metadata")
|
||||||
|
.desc("(Optional) Print summary of matching results to stdout.").build();
|
||||||
|
matchCDR3options
|
||||||
|
// .addOption(printErrorRate)
|
||||||
|
// .addOption(printAttempt)
|
||||||
|
// .addOption(printCorrect)
|
||||||
|
// .addOption(printIncorrect)
|
||||||
|
.addOption(printMetadata);
|
||||||
return matchCDR3options;
|
return matchCDR3options;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ public class GraphDataObjectReader {
|
|||||||
|
|
||||||
private GraphWithMapData data;
|
private GraphWithMapData data;
|
||||||
private String filename;
|
private String filename;
|
||||||
private boolean verbose = true;
|
|
||||||
|
|
||||||
public GraphDataObjectReader(String filename, boolean verbose) throws IOException {
|
public GraphDataObjectReader(String filename, boolean verbose) throws IOException {
|
||||||
if(!filename.matches(".*\\.ser")){
|
if(!filename.matches(".*\\.ser")){
|
||||||
@@ -15,8 +15,10 @@ public class GraphDataObjectReader {
|
|||||||
BufferedInputStream fileIn = new BufferedInputStream(new FileInputStream(filename));
|
BufferedInputStream fileIn = new BufferedInputStream(new FileInputStream(filename));
|
||||||
ObjectInputStream in = new ObjectInputStream(fileIn))
|
ObjectInputStream in = new ObjectInputStream(fileIn))
|
||||||
{
|
{
|
||||||
System.out.println("Reading graph data from file. This may take some time");
|
if (verbose) {
|
||||||
System.out.println("File I/O time is not included in results");
|
System.out.println("Reading graph data from file. This may take some time");
|
||||||
|
System.out.println("File I/O time is not included in results");
|
||||||
|
}
|
||||||
data = (GraphWithMapData) in.readObject();
|
data = (GraphWithMapData) in.readObject();
|
||||||
} catch (FileNotFoundException | ClassNotFoundException ex) {
|
} catch (FileNotFoundException | ClassNotFoundException ex) {
|
||||||
ex.printStackTrace();
|
ex.printStackTrace();
|
||||||
|
|||||||
@@ -570,6 +570,8 @@ public class InteractiveInterface {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private static void acknowledge(){
|
private static void acknowledge(){
|
||||||
|
System.out.println("BiGpairSEQ_Sim " + BiGpairSEQ.getVersion());
|
||||||
|
System.out.println();
|
||||||
System.out.println("This program simulates BiGpairSEQ, a graph theory based adaptation");
|
System.out.println("This program simulates BiGpairSEQ, a graph theory based adaptation");
|
||||||
System.out.println("of the pairSEQ algorithm for pairing T cell receptor sequences.");
|
System.out.println("of the pairSEQ algorithm for pairing T cell receptor sequences.");
|
||||||
System.out.println();
|
System.out.println();
|
||||||
|
|||||||
@@ -245,11 +245,11 @@ public class Simulator implements GraphModificationFunctions {
|
|||||||
//rate of pairing error
|
//rate of pairing error
|
||||||
double pairingErrorRate = (double) falseCount / (trueCount + falseCount);
|
double pairingErrorRate = (double) falseCount / (trueCount + falseCount);
|
||||||
BigDecimal pairingErrorRateTrunc;
|
BigDecimal pairingErrorRateTrunc;
|
||||||
if(pairingErrorRate == NaN || pairingErrorRate == POSITIVE_INFINITY || pairingErrorRate == NEGATIVE_INFINITY) {
|
if(Double.isFinite(pairingErrorRate)) {
|
||||||
pairingErrorRateTrunc = new BigDecimal(-1, mc);
|
pairingErrorRateTrunc = new BigDecimal(pairingErrorRate, mc);
|
||||||
}
|
}
|
||||||
else{
|
else{
|
||||||
pairingErrorRateTrunc = new BigDecimal(pairingErrorRate, mc);
|
pairingErrorRateTrunc = new BigDecimal(-1, mc);
|
||||||
}
|
}
|
||||||
//get list of well populations
|
//get list of well populations
|
||||||
Integer[] wellPopulations = data.getWellPopulations();
|
Integer[] wellPopulations = data.getWellPopulations();
|
||||||
|
|||||||
Reference in New Issue
Block a user