Add comments

This commit is contained in:
eugenefischer
2022-09-27 11:51:51 -05:00
parent 810abdb705
commit 2bf2a9f5f7

View File

@@ -178,7 +178,7 @@ public class Plate {
} }
} }
//returns a map of the counts of the sequence at cell index sIndex, in a specific of wells //returns a map of the counts of the sequence at cell index sIndex, in all wells
//Simulates read depth and read errors, counts the number of reads of a unique sequence into the given map. //Simulates read depth and read errors, counts the number of reads of a unique sequence into the given map.
public void assayWellsSequenceSWithReadDepth(Map<String, Integer> misreadCounts, Map<String, Integer> occupancyMap, Map<String, Integer> readCountMap, public void assayWellsSequenceSWithReadDepth(Map<String, Integer> misreadCounts, Map<String, Integer> occupancyMap, Map<String, Integer> readCountMap,
int readDepth, double readErrorProb, double errorCollisionProb, int... sIndices) { int readDepth, double readErrorProb, double errorCollisionProb, int... sIndices) {
@@ -203,16 +203,26 @@ public class Plate {
} }
} }
} }
//For the sequences at cell indices sIndices, counts number of unique sequences in the given well into the given map //For the sequences at cell indices sIndices, counts number of unique sequences in the given well into the given map
//Simulates read depth and read errors, counts the number of reads of a unique sequence into the given map. //Simulates read depth and read errors, counts the number of reads of a unique sequence into the given map.
//NOTE: this function changes the content of the well, adding spurious cells to contain the misread sequences
//(this is necessary because, in the simulation, the plate is read multiple times, but random misreads can only
//be simulated once).
//(Possibly I should refactor all of this to only require a single plate assay, to speed things up. Or at least
//to see if it would speed things up.)
private void countSequencesWithReadDepth(Map<String, Integer> distinctMisreadCounts, Map<String, Integer> occupancyMap, Map<String, Integer> readCountMap, private void countSequencesWithReadDepth(Map<String, Integer> distinctMisreadCounts, Map<String, Integer> occupancyMap, Map<String, Integer> readCountMap,
int readDepth, double readErrorProb, double errorCollisionProb, int readDepth, double readErrorProb, double errorCollisionProb,
List<String[]> well, int... sIndices) { List<String[]> well, int... sIndices) {
//list of spurious cells to add to well after counting
List<String[]> spuriousCells = new ArrayList<>(); List<String[]> spuriousCells = new ArrayList<>();
for(String[] cell : well) { for(String[] cell : well) {
//new potential spurious cell for each cell that gets read
String[] spuriousCell = new String[SequenceType.values().length]; String[] spuriousCell = new String[SequenceType.values().length];
//initialize spurious cell with all dropout sequences
Arrays.fill(spuriousCell, "-1"); Arrays.fill(spuriousCell, "-1");
Boolean readError = false; //has a read error occurred?
boolean readError = false;
for(int sIndex: sIndices){ for(int sIndex: sIndices){
//skip dropout sequences, which have value "-1" //skip dropout sequences, which have value "-1"
if(!"-1".equals(cell[sIndex])){ if(!"-1".equals(cell[sIndex])){
@@ -220,21 +230,26 @@ public class Plate {
for(int i = 0; i < readDepth; i++) { for(int i = 0; i < readDepth; i++) {
if (rand.nextDouble() <= readErrorProb) { if (rand.nextDouble() <= readErrorProb) {
readError = true; readError = true;
//Read errors are represented by appending "*"s to the end of the sequence some number of times
StringBuilder spurious = new StringBuilder(cell[sIndex]); StringBuilder spurious = new StringBuilder(cell[sIndex]);
//if this sequence hasn't been misread before, or the read error is unique,
//append one more "*" than has been appended before
if (!distinctMisreadCounts.containsKey(cell[sIndex]) || rand.nextDouble() > errorCollisionProb) { if (!distinctMisreadCounts.containsKey(cell[sIndex]) || rand.nextDouble() > errorCollisionProb) {
distinctMisreadCounts.merge(cell[sIndex], 1, (oldValue, newValue) -> oldValue + newValue); distinctMisreadCounts.merge(cell[sIndex], 1, (oldValue, newValue) -> oldValue + newValue);
for (int j = 0; j < distinctMisreadCounts.get(cell[sIndex]); j++) { for (int j = 0; j < distinctMisreadCounts.get(cell[sIndex]); j++) {
spurious.append("*"); spurious.append("*");
} }
} }
//if this is a read error collision, randomly choose a number of "*"s that has been appended before
else { else {
int starCount = rand.nextInt(distinctMisreadCounts.get(cell[sIndex])); int starCount = rand.nextInt(distinctMisreadCounts.get(cell[sIndex]));
for (int j = 0; j < starCount; j++) { for (int j = 0; j < starCount; j++) {
spurious.append("*"); spurious.append("*");
} }
} }
sequencesWithReadCounts.merge(spurious.toString(), 1, (oldValue, newValue) -> oldValue + newValue); sequencesWithReadCounts.merge(spurious.toString(), 1, (oldValue, newValue) -> oldValue + newValue);
spuriousCell[sIndex] = spurious.toString(); //add spurious sequence to spurious cell
spuriousCell[sIndex] = spurious.toString();
} }
else { else {
sequencesWithReadCounts.merge(cell[sIndex], 1, (oldValue, newValue) -> oldValue + newValue); sequencesWithReadCounts.merge(cell[sIndex], 1, (oldValue, newValue) -> oldValue + newValue);
@@ -246,19 +261,14 @@ public class Plate {
} }
} }
} }
if (readError) { if (readError) { //only add a new spurious cell if there was a read error
spuriousCells.add(spuriousCell); spuriousCells.add(spuriousCell);
} }
} }
//add all spurious cells to the well
well.addAll(spuriousCells); well.addAll(spuriousCells);
} }
private String getRandomErrorKeyFromMap(Map<String, Integer> map) {
//Only want to choose from index 1 to n, since index 0 is not an error sequence
int index = rand.nextInt(map.size() - 1);
return (String) map.keySet().toArray()[index + 1];
}
public String getSourceFileName() { public String getSourceFileName() {
return sourceFile; return sourceFile;
} }