Compare commits
18 Commits
v2.0
...
4c872ed48e
| Author | SHA1 | Date | |
|---|---|---|---|
| 4c872ed48e | |||
| 3fc39302c7 | |||
| 578bdc0fbf | |||
| 8275cf7740 | |||
| 64209691f0 | |||
| 1886800873 | |||
| bedf0894bc | |||
| 2ac3451842 | |||
| 67ec3f3764 | |||
| b5a8b7e2d5 | |||
| 9fb3095f0f | |||
| 25acf920c2 | |||
| f301327693 | |||
| e04d2d6777 | |||
| 3e41afaa64 | |||
| bc5d67680d | |||
| f2347e8fc2 | |||
| c8364d8a6e |
19
readme.md
19
readme.md
@@ -20,8 +20,8 @@ The problem of pairing TCRA/TCRB sequences thus reduces to the "assignment probl
|
||||
matching on a bipartite graph--the subset of vertex-disjoint edges whose weights sum to the maximum possible value.
|
||||
|
||||
This is a well-studied combinatorial optimization problem, with many known solutions.
|
||||
The most efficient algorithm known to the author for maximum weight matching of a bipartite graph with strictly integral weights
|
||||
is from Duan and Su (2012). For a graph with m edges, n vertices per side, and maximum integer edge weight N,
|
||||
The most efficient algorithm known to the author for maximum weight matching of a bipartite graph with strictly integral
|
||||
weights is from Duan and Su (2012). For a graph with m edges, n vertices per side, and maximum integer edge weight N,
|
||||
their algorithm runs in **O(m sqrt(n) log(N))** time. As the graph representation of a pairSEQ experiment is
|
||||
bipartite with integer weights, this algorithm is ideal for BiGpairSEQ.
|
||||
|
||||
@@ -43,13 +43,13 @@ Run with the command:
|
||||
`java -jar BiGpairSEQ_Sim.jar`
|
||||
|
||||
Processing sample plates with tens of thousands of sequences may require large amounts
|
||||
of RAM. It is often desirable to increase the JVM maximum heap allocation with the -Xmx flag.
|
||||
of RAM. It is often desirable to increase the JVM maximum heap allocation with the `-Xmx` flag.
|
||||
For example, to run the program with 32 gigabytes of memory, use the command:
|
||||
|
||||
`java -Xmx32G -jar BiGpairSEQ_Sim.jar`
|
||||
|
||||
There are a number of command line options, to allow the program to be used in shell scripts. For a full list,
|
||||
use the -help flag:
|
||||
use the `-help` flag:
|
||||
|
||||
`java -jar BiGpairSEQ_Sim.jar -help`
|
||||
|
||||
@@ -108,7 +108,7 @@ device-specific.)
|
||||
|
||||
The program's caching behavior can be controlled in the Options menu. By default, all caching is OFF.
|
||||
|
||||
The program can optionally output Graph/Data files in .GraphML format (.graphml) for data portability. This can be
|
||||
The program can optionally output Graph/Data files in GraphML format (.graphml) for data portability. This can be
|
||||
turned on in the Options menu. By default, GraphML output is OFF.
|
||||
|
||||
---
|
||||
@@ -203,8 +203,13 @@ Options for creating a Graph/Data file:
|
||||
|
||||
These files do not have a human-readable structure, and are not portable to other programs.
|
||||
|
||||
(For portability to other software, turn on GraphML output in the Options menu. This will produce a .graphml file
|
||||
for the weighted graph, with vertex attributes sequence, type, and occupancy data.)
|
||||
*Optional GraphML output*
|
||||
|
||||
For portability of graph data to other software, turn on [GraphML](http://graphml.graphdrawing.org/index.html) output
|
||||
in the Options menu in interactive mode, or use the `-graphml`command line argument. This will produce a .graphml file
|
||||
for the weighted graph, with vertex attributes for sequence, type, and occupancy data. This graph contains all the data
|
||||
necessary for the BiGpairSEQ matching algorithm. It does not include the data to measure pairing accuracy; for that,
|
||||
compare the matching results to the original Cell Sample .csv file.
|
||||
|
||||
---
|
||||
|
||||
|
||||
@@ -16,6 +16,7 @@ public class BiGpairSEQ {
|
||||
private static String priorityQueueHeapType = "FIBONACCI";
|
||||
private static boolean outputBinary = true;
|
||||
private static boolean outputGraphML = false;
|
||||
private static final String version = "version 2.0";
|
||||
|
||||
public static void main(String[] args) {
|
||||
if (args.length == 0) {
|
||||
@@ -172,5 +173,5 @@ public class BiGpairSEQ {
|
||||
|
||||
public static boolean outputGraphML() {return outputGraphML;}
|
||||
public static void setOutputGraphML(boolean b) {outputGraphML = b;}
|
||||
|
||||
public static String getVersion() { return version; }
|
||||
}
|
||||
|
||||
@@ -62,15 +62,18 @@ public class CommandLineInterface {
|
||||
|
||||
if (line.hasOption("help")) {
|
||||
HelpFormatter formatter = new HelpFormatter();
|
||||
formatter.printHelp("BiGpairSEQ_Sim", mainOptions);
|
||||
formatter.printHelp("BiGpairSEQ_Sim.jar", mainOptions);
|
||||
System.out.println();
|
||||
formatter.printHelp("BiGpairSEQ_SIM -cells", cellOptions);
|
||||
formatter.printHelp("BiGpairSEQ_Sim.jar -cells", cellOptions);
|
||||
System.out.println();
|
||||
formatter.printHelp("BiGpairSEQ_Sim -plate", plateOptions);
|
||||
formatter.printHelp("BiGpairSEQ_Sim.jar -plate", plateOptions);
|
||||
System.out.println();
|
||||
formatter.printHelp("BiGpairSEQ_Sim -graph", graphOptions);
|
||||
formatter.printHelp("BiGpairSEQ_Sim.jar -graph", graphOptions);
|
||||
System.out.println();
|
||||
formatter.printHelp("BiGpairSEQ_Sim -match", matchOptions);
|
||||
formatter.printHelp("BiGpairSEQ_Sim.jar -match", matchOptions);
|
||||
}
|
||||
else if (line.hasOption("version")) {
|
||||
System.out.println("BiGpairSEQ_Sim " + BiGpairSEQ.getVersion());
|
||||
}
|
||||
else if (line.hasOption("cells")) {
|
||||
line = parser.parse(cellOptions, Arrays.copyOfRange(args, 1, args.length));
|
||||
@@ -153,17 +156,24 @@ public class CommandLineInterface {
|
||||
else if (line.hasOption("match")) { //can add a flag for which match type in future, spit this in two
|
||||
line = parser.parse(matchOptions, Arrays.copyOfRange(args, 1, args.length));
|
||||
String graphFilename = line.getOptionValue("g");
|
||||
String outputFilename = line.getOptionValue("o");
|
||||
|
||||
String outputFilename;
|
||||
if(line.hasOption("o")) {
|
||||
outputFilename = line.getOptionValue("o");
|
||||
}
|
||||
else {
|
||||
outputFilename = null;
|
||||
}
|
||||
Integer minThreshold = Integer.parseInt(line.getOptionValue("min"));
|
||||
Integer maxThreshold = Integer.parseInt(line.getOptionValue("max"));
|
||||
Integer minOverlapPct;
|
||||
int minOverlapPct;
|
||||
if (line.hasOption("minpct")) { //see if this filter is being used
|
||||
minOverlapPct = Integer.parseInt(line.getOptionValue("minpct"));
|
||||
}
|
||||
else {
|
||||
minOverlapPct = 0;
|
||||
}
|
||||
Integer maxOccupancyDiff;
|
||||
int maxOccupancyDiff;
|
||||
if (line.hasOption("maxdiff")) { //see if this filter is being used
|
||||
maxOccupancyDiff = Integer.parseInt(line.getOptionValue("maxdiff"));
|
||||
}
|
||||
@@ -173,10 +183,38 @@ public class CommandLineInterface {
|
||||
GraphWithMapData graph = getGraph(graphFilename);
|
||||
MatchingResult result = Simulator.matchCDR3s(graph, graphFilename, minThreshold, maxThreshold,
|
||||
maxOccupancyDiff, minOverlapPct, false);
|
||||
if(outputFilename != null){
|
||||
MatchingFileWriter writer = new MatchingFileWriter(outputFilename, result);
|
||||
writer.writeResultsToFile();
|
||||
}
|
||||
//can put a bunch of ifs for outputting various things from the MatchingResult to System.out here
|
||||
//after I put those flags in the matchOptions
|
||||
if(line.hasOption("print-metadata")) {
|
||||
for (String k : result.getMetadata().keySet()) {
|
||||
System.out.println(k + ": " + result.getMetadata().get(k));
|
||||
}
|
||||
}
|
||||
if(line.hasOption("print-error")) {
|
||||
System.out.println("pairing error rate: " + result.getPairingErrorRate());
|
||||
}
|
||||
if(line.hasOption("print-attempt")) {
|
||||
System.out.println("pairing attempt rate: " +result.getPairingAttemptRate());
|
||||
}
|
||||
if(line.hasOption("print-correct")) {
|
||||
System.out.println("correct pairings: " + result.getCorrectPairingCount());
|
||||
}
|
||||
if(line.hasOption("print-incorrect")) {
|
||||
System.out.println("incorrect pairings: " + result.getIncorrectPairingCount());
|
||||
}
|
||||
if(line.hasOption("print-alphas")) {
|
||||
System.out.println("total alphas found: " + result.getAlphaCount());
|
||||
}
|
||||
if(line.hasOption("print-betas")) {
|
||||
System.out.println("total betas found: " + result.getBetaCount());
|
||||
}
|
||||
if(line.hasOption("print-time")) {
|
||||
System.out.println("simulation time (seconds): " + result.getSimulationTime());
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (ParseException exp) {
|
||||
@@ -216,8 +254,11 @@ public class CommandLineInterface {
|
||||
.longOpt("match-cdr3")
|
||||
.desc("Matches CDR3s. Requires a graph/data file.")
|
||||
.build();
|
||||
Option printVersion = Option.builder("version")
|
||||
.desc("Prints the program version number to stdout").build();
|
||||
OptionGroup mainGroup = new OptionGroup();
|
||||
mainGroup.addOption(help);
|
||||
mainGroup.addOption(printVersion);
|
||||
mainGroup.addOption(makeCells);
|
||||
mainGroup.addOption(makePlate);
|
||||
mainGroup.addOption(makeGraph);
|
||||
@@ -297,7 +338,7 @@ public class CommandLineInterface {
|
||||
.desc("Randomize well populations on sample plate. Takes two arguments: the minimum possible population and the maximum possible population.")
|
||||
.hasArgs()
|
||||
.numberOfArgs(2)
|
||||
.argName("minimum maximum")
|
||||
.argName("min> <max")
|
||||
.build();
|
||||
Option specificWellPopulations = Option.builder("pop")
|
||||
.desc("The well populations for each section of the sample plate. There will be as many sections as there are populations given.")
|
||||
@@ -326,22 +367,22 @@ public class CommandLineInterface {
|
||||
Options graphOptions = new Options();
|
||||
Option cellFilename = Option.builder("c")
|
||||
.longOpt("cell-file")
|
||||
.desc("Cell sample file to use for checking accuracy")
|
||||
.desc("Cell sample file to use for checking pairing accuracy")
|
||||
.hasArg()
|
||||
.argName("filename")
|
||||
.required().build();
|
||||
Option plateFilename = Option.builder("p")
|
||||
.longOpt("plate-filename")
|
||||
.desc("Sample plate file (made from given cell sample file) to construct graph from")
|
||||
.desc("Sample plate file from which to construct graph")
|
||||
.hasArg()
|
||||
.argName("filename")
|
||||
.required().build();
|
||||
Option outputGraphML = Option.builder("graphml")
|
||||
.desc("Output GraphML file")
|
||||
.desc("(Optional) Output GraphML file")
|
||||
.build();
|
||||
Option outputSerializedBinary = Option.builder("nb")
|
||||
.longOpt("no-binary")
|
||||
.desc("Don't output serialized binary file")
|
||||
.desc("(Optional) Don't output serialized binary file")
|
||||
.build();
|
||||
graphOptions.addOption(cellFilename);
|
||||
graphOptions.addOption(plateFilename);
|
||||
@@ -379,15 +420,46 @@ public class CommandLineInterface {
|
||||
.hasArg()
|
||||
.argName("number")
|
||||
.build();
|
||||
matchCDR3options.addOption(graphFilename);
|
||||
matchCDR3options.addOption(minOccupancyOverlap);
|
||||
matchCDR3options.addOption(maxOccupancyOverlap);
|
||||
matchCDR3options.addOption(minOverlapPercent);
|
||||
matchCDR3options.addOption(maxOccupancyDifference);
|
||||
matchCDR3options.addOption(outputFileOption());
|
||||
//options for output to System.out
|
||||
//Option printPairingErrorRate = Option.builder()
|
||||
Option outputFile = Option.builder("o") //can't call the method this time, because this one's optional
|
||||
.longOpt("output-file")
|
||||
.hasArg()
|
||||
.argName("filename")
|
||||
.desc("(Optional) Name of output the output file. If not present, no file will be written.")
|
||||
.build();
|
||||
matchCDR3options.addOption(graphFilename)
|
||||
.addOption(minOccupancyOverlap)
|
||||
.addOption(maxOccupancyOverlap)
|
||||
.addOption(minOverlapPercent)
|
||||
.addOption(maxOccupancyDifference)
|
||||
.addOption(outputFile);
|
||||
|
||||
//options for output to System.out
|
||||
Option printAlphaCount = Option.builder().longOpt("print-alphas")
|
||||
.desc("(Optional) Print the number of distinct alpha sequences to stdout.").build();
|
||||
Option printBetaCount = Option.builder().longOpt("print-betas")
|
||||
.desc("(Optional) Print the number of distinct beta sequences to stdout.").build();
|
||||
Option printTime = Option.builder().longOpt("print-time")
|
||||
.desc("(Optional) Print the total simulation time to stdout.").build();
|
||||
Option printErrorRate = Option.builder().longOpt("print-error")
|
||||
.desc("(Optional) Print the pairing error rate to stdout").build();
|
||||
Option printAttempt = Option.builder().longOpt("print-attempt")
|
||||
.desc("(Optional) Print the pairing attempt rate to stdout").build();
|
||||
Option printCorrect = Option.builder().longOpt("print-correct")
|
||||
.desc("(Optional) Print the number of correct pairs to stdout").build();
|
||||
Option printIncorrect = Option.builder().longOpt("print-incorrect")
|
||||
.desc("(Optional) Print the number of incorrect pairs to stdout").build();
|
||||
Option printMetadata = Option.builder().longOpt("print-metadata")
|
||||
.desc("(Optional) Print a full summary of the matching results to stdout.").build();
|
||||
|
||||
matchCDR3options
|
||||
.addOption(printErrorRate)
|
||||
.addOption(printAttempt)
|
||||
.addOption(printCorrect)
|
||||
.addOption(printIncorrect)
|
||||
.addOption(printMetadata)
|
||||
.addOption(printAlphaCount)
|
||||
.addOption(printBetaCount)
|
||||
.addOption(printTime);
|
||||
return matchCDR3options;
|
||||
}
|
||||
|
||||
|
||||
@@ -4,7 +4,7 @@ public class GraphDataObjectReader {
|
||||
|
||||
private GraphWithMapData data;
|
||||
private String filename;
|
||||
private boolean verbose = true;
|
||||
|
||||
|
||||
public GraphDataObjectReader(String filename, boolean verbose) throws IOException {
|
||||
if(!filename.matches(".*\\.ser")){
|
||||
@@ -15,10 +15,13 @@ public class GraphDataObjectReader {
|
||||
BufferedInputStream fileIn = new BufferedInputStream(new FileInputStream(filename));
|
||||
ObjectInputStream in = new ObjectInputStream(fileIn))
|
||||
{
|
||||
if (verbose) {
|
||||
System.out.println("Reading graph data from file. This may take some time");
|
||||
System.out.println("File I/O time is not included in results");
|
||||
}
|
||||
data = (GraphWithMapData) in.readObject();
|
||||
} catch (FileNotFoundException | ClassNotFoundException ex) {
|
||||
System.out.println("Graph/data file " + filename + " not found.");
|
||||
ex.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -570,6 +570,8 @@ public class InteractiveInterface {
|
||||
}
|
||||
|
||||
private static void acknowledge(){
|
||||
System.out.println("BiGpairSEQ_Sim " + BiGpairSEQ.getVersion());
|
||||
System.out.println();
|
||||
System.out.println("This program simulates BiGpairSEQ, a graph theory based adaptation");
|
||||
System.out.println("of the pairSEQ algorithm for pairing T cell receptor sequences.");
|
||||
System.out.println();
|
||||
|
||||
@@ -245,11 +245,11 @@ public class Simulator implements GraphModificationFunctions {
|
||||
//rate of pairing error
|
||||
double pairingErrorRate = (double) falseCount / (trueCount + falseCount);
|
||||
BigDecimal pairingErrorRateTrunc;
|
||||
if(pairingErrorRate == NaN || pairingErrorRate == POSITIVE_INFINITY || pairingErrorRate == NEGATIVE_INFINITY) {
|
||||
pairingErrorRateTrunc = new BigDecimal(-1, mc);
|
||||
if(Double.isFinite(pairingErrorRate)) {
|
||||
pairingErrorRateTrunc = new BigDecimal(pairingErrorRate, mc);
|
||||
}
|
||||
else{
|
||||
pairingErrorRateTrunc = new BigDecimal(pairingErrorRate, mc);
|
||||
pairingErrorRateTrunc = new BigDecimal(-1, mc);
|
||||
}
|
||||
//get list of well populations
|
||||
Integer[] wellPopulations = data.getWellPopulations();
|
||||
|
||||
Reference in New Issue
Block a user