Revert attempt to switch plate output format. It worked, but introduced a bug in graph filtering I don't want to chase down
This commit is contained in:
Binary file not shown.
28
readme.md
28
readme.md
@@ -94,7 +94,7 @@ Options when making a Cell Sample file:
|
||||
Files are in CSV format. Rows are distinct T cells, columns are sequences within the cells.
|
||||
Comments are preceded by `#`
|
||||
|
||||
Structure example:
|
||||
Structure:
|
||||
|
||||
---
|
||||
# Sample contains 1 unique CDR1 for every 4 unique CDR3s.
|
||||
@@ -136,20 +136,20 @@ Every column represents an individual cell, containing four sequences, represent
|
||||
Notice that the Alpha CDR1 is missing in the cell above, due to sequence dropout.
|
||||
Dropouts are represented by replacing sequences with the value `-1`. Comments are preceded by `#`
|
||||
|
||||
Structure Example:
|
||||
Structure:
|
||||
|
||||
---
|
||||
```
|
||||
# Cell source file name: 4MilCells.csv
|
||||
# Plate size: 96
|
||||
# Error rate: 0.1
|
||||
# Concentrations: 10000 5000 500
|
||||
# Lambda: 0.6
|
||||
# Cell source file name:
|
||||
# Each row represents one well on the plate
|
||||
# Plate size:
|
||||
# Concentrations:
|
||||
# Lambda:
|
||||
```
|
||||
| well 1 | well 2 | well 3| ... |
|
||||
| Well 1, cell 1 | Well 1, cell 2 | Well 1, cell 3| ... |
|
||||
|---|---|---|---|
|
||||
| [105383, 786528, 959247, 925928] | [525902, 791533, -1, 866282] | [409236, 132303, 804465, 942261]| ... |
|
||||
| [249930, 301502, 970003, 881099] | [523787, 552952, 997194, 970507]| [425363, 417411, 845399, -1]| ... |
|
||||
| **Well 2, cell 1** | **Well 2, cell 2** | **Well 2, cell 3**| ... |
|
||||
| **Well 3, cell 1** | **Well 3, cell 2** | **Well 3, cell 3**| ... |
|
||||
| ... | ... | ... | ... |
|
||||
|
||||
---
|
||||
@@ -222,10 +222,9 @@ using the (2021 corrected) formula from the original pairSEQ paper. (Howie, et a
|
||||
|
||||
## TODO
|
||||
|
||||
* ~~Try invoking GC at end of workloads to reduce paging to disk~~ DONE
|
||||
* Try invoking GC at end of workloads to reduce paging to disk
|
||||
* ~~Hold graph data in memory until another graph is read-in?~~
|
||||
* No, this won't work, because BiGpairSEQ simulations alter the underlying graph based on filtering constraints. Changes would cascade with multiple experiments.
|
||||
* ~~See if there's a reasonable way to reformat Sample Plate files so that wells are columns instead of rows~~ DONE
|
||||
* Enable GraphML output in addition to serialized object binaries, for data portability
|
||||
* Custom vertex type with attribute for sequence occupancy?
|
||||
* Re-implement CDR1 matching method
|
||||
@@ -238,7 +237,10 @@ using the (2021 corrected) formula from the original pairSEQ paper. (Howie, et a
|
||||
* Implement sample plates with random numbers of T cells per well
|
||||
* Possible BiGpairSEQ advantage over pairSEQ: BiGpairSEQ is resilient to variations in well populations; pairSEQ is not.
|
||||
* preliminary data suggests that BiGpairSEQ behaves roughly as though the whole plate had whatever the *average* well concentration is, but that's still speculative.
|
||||
|
||||
* See if there's a reasonable way to reformat Sample Plate files so that wells are columns instead of rows
|
||||
* Problem is variable number of cells in a well
|
||||
* Apache Commons CSV library writes entries a row at a time
|
||||
* Can possibly sort the wells by length first, then construct entries
|
||||
|
||||
## CITATIONS
|
||||
* Howie, B., Sherwood, A. M., et al. ["High-throughput pairing of T cell receptor alpha and beta sequences."](https://pubmed.ncbi.nlm.nih.gov/26290413/) Sci. Transl. Med. 7, 301ra131 (2015)
|
||||
|
||||
@@ -31,54 +31,23 @@ public class PlateFileReader {
|
||||
BufferedReader reader = Files.newBufferedReader(Path.of(filename));
|
||||
CSVParser parser = new CSVParser(reader, plateFileFormat);
|
||||
){
|
||||
//old code for wells as rows
|
||||
// for(CSVRecord record: parser.getRecords()) {
|
||||
// List<Integer[]> well = new ArrayList<>();
|
||||
// for(String s: record) {
|
||||
// if(!"".equals(s)) {
|
||||
// String[] intString = s.replaceAll("\\[", "")
|
||||
// .replaceAll("]", "")
|
||||
// .replaceAll(" ", "")
|
||||
// .split(",");
|
||||
// //System.out.println(intString);
|
||||
// Integer[] arr = new Integer[intString.length];
|
||||
// for (int i = 0; i < intString.length; i++) {
|
||||
// arr[i] = Integer.valueOf(intString[i]);
|
||||
// }
|
||||
// well.add(arr);
|
||||
// }
|
||||
// }
|
||||
// wells.add(well);
|
||||
for(CSVRecord record: parser.getRecords()) {
|
||||
if (wells.size() == 0) {
|
||||
int num = 0;
|
||||
for (String s: record) {
|
||||
num++;
|
||||
}
|
||||
for (int i = 0; i < num; i++) {
|
||||
wells.add(new ArrayList<>());
|
||||
}
|
||||
} else {
|
||||
int i = 0;
|
||||
for (String s : record) {
|
||||
if (!"".equals(s)) { //if value isn't the empty string
|
||||
//get rid of brackets, split at commas into a string array
|
||||
String[] intsAsStrings = s.replaceAll("\\[", "")
|
||||
.replaceAll("]", "")
|
||||
.replaceAll(" ", "")
|
||||
.split(",");
|
||||
//Make Integer array with the same values
|
||||
Integer[] arr = new Integer[intsAsStrings.length];
|
||||
for (int j = 0; j < intsAsStrings.length; j++) {
|
||||
arr[j] = Integer.valueOf(intsAsStrings[j]);
|
||||
}
|
||||
//Add Integer array to the correct well
|
||||
wells.get(i).add(arr);
|
||||
i++;
|
||||
List<Integer[]> well = new ArrayList<>();
|
||||
for(String s: record) {
|
||||
if(!"".equals(s)) {
|
||||
String[] intString = s.replaceAll("\\[", "")
|
||||
.replaceAll("]", "")
|
||||
.replaceAll(" ", "")
|
||||
.split(",");
|
||||
//System.out.println(intString);
|
||||
Integer[] arr = new Integer[intString.length];
|
||||
for (int i = 0; i < intString.length; i++) {
|
||||
arr[i] = Integer.valueOf(intString[i]);
|
||||
}
|
||||
well.add(arr);
|
||||
}
|
||||
|
||||
}
|
||||
wells.add(well);
|
||||
}
|
||||
} catch(IOException ex){
|
||||
System.out.println("plate file " + filename + " not found.");
|
||||
|
||||
@@ -59,7 +59,7 @@ public class PlateFileWriter {
|
||||
}
|
||||
}
|
||||
|
||||
//this took forever, and I don't use it, because it makes reading data in a huge pain
|
||||
//this took forever
|
||||
List<List<String>> rows = new ArrayList<>();
|
||||
List<String> tmp = new ArrayList<>();
|
||||
for(int i = 0; i < wellsAsStrings.size(); i++){//List<Integer[]> w: wells){
|
||||
@@ -89,6 +89,7 @@ public class PlateFileWriter {
|
||||
CSVPrinter printer = new CSVPrinter(writer, plateFileFormat);
|
||||
){
|
||||
printer.printComment("Cell source file name: " + sourceFileName);
|
||||
printer.printComment("Each row represents one well on the plate.");
|
||||
printer.printComment("Plate size: " + size);
|
||||
printer.printComment("Error rate: " + error);
|
||||
printer.printComment("Concentrations: " + concenString);
|
||||
|
||||
@@ -319,7 +319,6 @@ public class UserInterface {
|
||||
assert filename != null;
|
||||
CellFileWriter writer = new CellFileWriter(filename, sample);
|
||||
writer.writeCellsToFile();
|
||||
System.gc();
|
||||
}
|
||||
|
||||
// //for calling from command line
|
||||
@@ -466,7 +465,6 @@ public class UserInterface {
|
||||
System.out.println("Writing Sample Plate to file");
|
||||
writer.writePlateFile();
|
||||
System.out.println("Sample Plate written to file: " + filename);
|
||||
System.gc();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -515,7 +513,6 @@ public class UserInterface {
|
||||
System.out.println("File I/O time is not included in results.");
|
||||
dataWriter.writeDataToFile();
|
||||
System.out.println("Graph and Data file written to: " + filename);
|
||||
System.gc();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -570,7 +567,6 @@ public class UserInterface {
|
||||
System.out.println("Writing results to file");
|
||||
writer.writeResultsToFile();
|
||||
System.out.println("Results written to file: " + filename);
|
||||
System.gc();
|
||||
}
|
||||
|
||||
///////
|
||||
|
||||
Reference in New Issue
Block a user