Cleanup file output, add UI verbosity

This commit is contained in:
2022-02-20 18:31:31 -06:00
parent 405fbf17ff
commit 2023bb9d7e
5 changed files with 63 additions and 39 deletions

View File

@@ -190,24 +190,28 @@ Example output:
---
```
# T cell counts in sample plate wells: 5000
# Total alphas found: 3387
# Total betas found: 3396
# Source Sample Plate file: 4MilCellsPlate.csv
# Source Graph and Data file: 4MilCellsPlateGraph.ser
# T cell counts in sample plate wells: 30000
# Total alphas found: 11813
# Total betas found: 11808
# High overlap threshold: 94
# Low overlap threshold: 3
# Minimum overlap percent: 0
# Maximum occupancy difference: 50
# Pairing attempt rate: 0.488
# Correct pairings: 1650
# Incorrect pairings: 4
# Pairing error rate: 0.00242
# Simulation time: 19 seconds
# Maximum occupancy difference: 96
# Pairing attempt rate: 0.438
# Correct pairings: 5151
# Incorrect pairings: 18
# Pairing error rate: 0.00348
# Simulation time: 862 seconds
```
| Alpha | Alpha well count | Beta | Beta well count | Overlap count | Matched Correctly? | P-value |
|---|---|---|---|---|---|---|
|716809|31|20739|34|31.0|TRUE|4.99E-25|
|753685|28|733213|27|27.0|TRUE|5.26E-23|
|5242972|17|1571520|18|17|true|1.41E-18|
|5161027|18|2072219|18|18|true|7.31E-20|
|4145198|33|1064455|30|29|true|2.65E-21|
|7700582|18|112748|18|18|true|7.31E-20|
|...|...|...|...|...|...|...|
---

View File

@@ -73,6 +73,7 @@ public class PlateFileWriter {
}
rows.add(tmp);
}
//build string of well concentrations
StringBuilder concen = new StringBuilder();
for(Integer i: concentrations){
concen.append(i.toString());
@@ -80,7 +81,9 @@ public class PlateFileWriter {
}
String concenString = concen.toString();
CSVFormat plateFileFormat = CSVFormat.Builder.create().setCommentMarker('#').build();
CSVFormat plateFileFormat = CSVFormat.Builder.create()
.setCommentMarker('#')
.build();
try(BufferedWriter writer = Files.newBufferedWriter(Path.of(filename), StandardOpenOption.CREATE_NEW);
CSVPrinter printer = new CSVPrinter(writer, plateFileFormat);
@@ -96,7 +99,7 @@ public class PlateFileWriter {
else {
printer.printComment("Std. dev.: " + stdDev);
}
printer.printRecords(wellsAsStrings);
printer.printRecords(rows);
} catch(IOException ex){
System.out.println("Could not make new file named "+filename);
System.err.println(ex);

View File

@@ -138,9 +138,9 @@ public class Simulator {
}
//match CDR3s.
public static MatchingResult matchCDR3s(GraphWithMapData data, Integer lowThreshold, Integer highThreshold,
Integer maxOccupancyDifference, Integer minOverlapPercent,
boolean verbose) {
public static MatchingResult matchCDR3s(GraphWithMapData data, String dataFilename, Integer lowThreshold,
Integer highThreshold, Integer maxOccupancyDifference,
Integer minOverlapPercent, boolean verbose) {
Instant start = Instant.now();
int numWells = data.getNumWells();
Integer alphaCount = data.getAlphaCount();
@@ -250,6 +250,8 @@ public class Simulator {
String concentrationString = concentrationStringBuilder.toString();
List<String> comments = new ArrayList<>();
comments.add("Source Sample Plate filename: " + data.getSourceFilename());
comments.add("Source Graph and Data filename: " + dataFilename);
comments.add("T cell counts in sample plate wells: " + concentrationString);
comments.add("Total alphas found: " + alphaCount);
comments.add("Total betas found: " + betaCount);

View File

@@ -256,7 +256,7 @@ public class UserInterface {
// }
// else {
while (!quit) {
System.out.println("");
System.out.println();
System.out.println("--------BiGPairSEQ SIMULATOR--------");
System.out.println("ALPHA/BETA T-CELL RECEPTOR MATCHING");
System.out.println(" USING WEIGHTED BIPARTITE GRAPHS ");
@@ -316,6 +316,7 @@ public class UserInterface {
sc.next();
}
CellSample sample = Simulator.generateCellSample(numCells, cdr1Freq);
assert filename != null;
CellFileWriter writer = new CellFileWriter(filename, sample);
writer.writeCellsToFile();
}
@@ -361,7 +362,7 @@ public class UserInterface {
String filename = null;
Double stdDev = 0.0;
Integer numWells = 0;
Integer numSections = 0;
Integer numSections;
Integer[] concentrations = {1};
Double dropOutRate = 0.0;
boolean poisson = false;
@@ -389,29 +390,28 @@ public class UserInterface {
System.out.println("(Note: wider distributions are more memory intensive to match)");
System.out.print("Enter selection value: ");
input = sc.nextInt();
switch(input) {
case 1:
poisson = true;
break;
case 2:
switch (input) {
case 1 -> poisson = true;
case 2 -> {
System.out.println("How many distinct T-cells within one standard deviation of peak frequency?");
System.out.println("(Note: wider distributions are more memory intensive to match)");
stdDev = sc.nextDouble();
if(stdDev <= 0.0){
if (stdDev <= 0.0) {
throw new InputMismatchException("Value must be positive.");
}
break;
case 3:
}
case 3 -> {
exponential = true;
System.out.println("Please enter lambda value for exponential distribution.");
lambda = sc.nextDouble();
if(lambda <= 0.0){
if (lambda <= 0.0) {
throw new InputMismatchException("Value must be positive.");
}
break;
default:
}
default -> {
System.out.println("Invalid input. Defaulting to exponential.");
exponential = true;
}
}
System.out.print("\nNumber of wells on plate: ");
numWells = sc.nextInt();
@@ -445,6 +445,8 @@ public class UserInterface {
System.out.println(ex);
sc.next();
}
System.out.println("Reading Cell Sample file: " + cellFile);
assert cellFile != null;
CellFileReader cellReader = new CellFileReader(cellFile);
if(exponential){
Plate samplePlate = new Plate(numWells, dropOutRate, concentrations);
@@ -458,8 +460,11 @@ public class UserInterface {
}
Plate samplePlate = new Plate(numWells, dropOutRate, concentrations);
samplePlate.fillWells(cellReader.getFilename(), cellReader.getCells(), stdDev);
assert filename != null;
PlateFileWriter writer = new PlateFileWriter(filename, samplePlate);
System.out.println("Writing Sample Plate to file");
writer.writePlateFile();
System.out.println("Sample Plate written to file: " + filename);
}
}
@@ -484,7 +489,11 @@ public class UserInterface {
System.out.println(ex);
sc.next();
}
System.out.println("Reading Cell Sample file: " + cellFile);
assert cellFile != null;
CellFileReader cellReader = new CellFileReader(cellFile);
System.out.println("Reading Sample Plate file: " + plateFile);
assert plateFile != null;
PlateFileReader plateReader = new PlateFileReader(plateFile);
Plate plate = new Plate(plateReader.getFilename(), plateReader.getWells());
if (cellReader.getCells().size() == 0){
@@ -496,20 +505,21 @@ public class UserInterface {
System.out.println("Returning to main menu.");
}
else{
List<Integer[]> cells = cellReader.getCells();
GraphWithMapData data = Simulator.makeGraph(cells, plate, true);
assert filename != null;
GraphDataObjectWriter dataWriter = new GraphDataObjectWriter(filename, data);
System.out.println("Writing graph and occupancy data to file. This may take some time.");
System.out.println("File I/O time is not included in results.");
dataWriter.writeDataToFile();
System.out.println("Graph and Data file written to: " + filename);
}
}
//Simulate matching and output CSV file of results
private static void matchCDR3s() throws IOException {
String filename = null;
String dataFile = null;
String dataFilename = null;
Integer lowThreshold = 0;
Integer highThreshold = Integer.MAX_VALUE;
Integer maxOccupancyDiff = Integer.MAX_VALUE;
@@ -517,7 +527,7 @@ public class UserInterface {
try {
System.out.println("\nBiGpairSEQ simulation requires an occupancy data and overlap graph file");
System.out.println("Please enter name of an existing graph and occupancy data file: ");
dataFile = sc.next();
dataFilename = sc.next();
System.out.println("The matching results will be written to a file.");
System.out.print("Please enter a name for the output file: ");
filename = sc.next();
@@ -543,17 +553,20 @@ public class UserInterface {
//read object data from file
System.out.println("Reading graph data from file. This may take some time");
System.out.println("File I/O time is not included in results");
GraphDataObjectReader dataReader = new GraphDataObjectReader(dataFile);
assert dataFilename != null;
GraphDataObjectReader dataReader = new GraphDataObjectReader(dataFilename);
GraphWithMapData data = dataReader.getData();
//set source file name
data.setSourceFilename(dataFile);
data.setSourceFilename(dataFilename);
//simulate matching
MatchingResult results = Simulator.matchCDR3s(data, lowThreshold, highThreshold, maxOccupancyDiff,
MatchingResult results = Simulator.matchCDR3s(data, dataFilename, lowThreshold, highThreshold, maxOccupancyDiff,
minOverlapPercent, true);
//write results to file
assert filename != null;
MatchingFileWriter writer = new MatchingFileWriter(filename, results);
System.out.println("Writing results to file");
writer.writeResultsToFile();
System.out.println("Results written to file: " + filename);
}
///////
@@ -663,7 +676,7 @@ public class UserInterface {
private static void acknowledge(){
System.out.println("This program simulates BiGpairSEQ, a graph theory based adaptation");
System.out.println("of the pairSEQ algorithm for pairing T cell receptor sequences.");
System.out.println("");
System.out.println();
System.out.println("Unlike pairSEQ, which calculates p-values for every TCR alpha/beta overlap and compares");
System.out.println("against a null distribution, BiGpairSEQ does not do any statistical calculations");
System.out.println("directly. Instead, BiGpairSEQ creates a simple bipartite weighted graph representing");
@@ -674,12 +687,14 @@ public class UserInterface {
System.out.println("pattern.) The problem of pairing TCRA/TCRB sequences thus reduces to the \"assignment");
System.out.println("problem\" of finding a maximum weight matching on a bipartite graph--the subset of");
System.out.println("vertex-disjoint edges whose weights sum to the maximum possible value.");
System.out.println("");
System.out.println();
System.out.println("For full documentation, see: https://gitea.ejsf.synology.me/efischer/BiGpairSEQ");
System.out.println();
System.out.println("pairSEQ citation:");
System.out.println("Howie, B., Sherwood, A. M., et. al.");
System.out.println("High-throughput pairing of T cell receptor alpha and beta sequences.");
System.out.println("Sci. Transl. Med. 7, 301ra131 (2015)");
System.out.println("");
System.out.println("Simulation by Eugene Fischer, 2021-2022");
System.out.println();
System.out.println("BiGpairSEQ_Sim by Eugene Fischer, 2021-2022");
}
}