Rework read depth simulation to allow edge weight calculations to work as expected. (This changes sample plate in memory, so caching the sample plate is incompatible)

This commit is contained in:
eugenefischer
2022-09-26 23:03:23 -05:00
parent 13a1af1f71
commit db99c74810
2 changed files with 32 additions and 21 deletions

View File

@@ -180,56 +180,61 @@ public class Plate {
//returns a map of the counts of the sequence at cell index sIndex, in a specific of wells
//Simulates read depth and read errors, counts the number of reads of a unique sequence into the given map.
public void assayWellsSequenceSWithReadDepth(Map<String, Integer> occupancyMap, Map<String, Integer> readCountMap,
public void assayWellsSequenceSWithReadDepth(Map<String, Integer> misreadCounts, Map<String, Integer> occupancyMap, Map<String, Integer> readCountMap,
int readDepth, double readErrorProb, double errorCollisionProb, int... sIndices) {
this.assayWellsSequenceSWithReadDepth(occupancyMap, readCountMap, readDepth, readErrorProb, errorCollisionProb, 0, size, sIndices);
this.assayWellsSequenceSWithReadDepth(misreadCounts, occupancyMap, readCountMap, readDepth, readErrorProb, errorCollisionProb, 0, size, sIndices);
}
//returns a map of the counts of the sequence at cell index sIndex, in a specific of wells
//Simulates read depth and read errors, counts the number of reads of a unique sequence into the given map.
public void assayWellsSequenceSWithReadDepth(Map<String, Integer> occupancyMap, Map<String, Integer> readCountMap,
public void assayWellsSequenceSWithReadDepth(Map<String, Integer> misreadCounts, Map<String, Integer> occupancyMap, Map<String, Integer> readCountMap,
int readDepth, double readErrorProb, double errorCollisionProb,
int n, int... sIndices) {
this.assayWellsSequenceSWithReadDepth(occupancyMap, readCountMap, readDepth, readErrorProb, errorCollisionProb, n, n+1, sIndices);
this.assayWellsSequenceSWithReadDepth(misreadCounts, occupancyMap, readCountMap, readDepth, readErrorProb, errorCollisionProb, n, n+1, sIndices);
}
//returns a map of the counts of the sequence at cell index sIndex, in a range of wells
//Simulates read depth and read errors, counts the number of reads of a unique sequence into the given map.
public void assayWellsSequenceSWithReadDepth(Map<String, Integer> occupancyMap, Map<String, Integer> readCountMap,
public void assayWellsSequenceSWithReadDepth(Map<String, Integer> misreadCounts, Map<String, Integer> occupancyMap, Map<String, Integer> readCountMap,
int readDepth, double readErrorProb, double errorCollisionProb,
int start, int end, int... sIndices) {
for(int sIndex: sIndices){
for(int i = start; i < end; i++){
countSequencesWithReadDepth(occupancyMap, readCountMap, readDepth, readErrorProb, errorCollisionProb, wells.get(i), sIndex);
countSequencesWithReadDepth(misreadCounts, occupancyMap, readCountMap, readDepth, readErrorProb, errorCollisionProb, wells.get(i), sIndex);
}
}
}
//For the sequences at cell indices sIndices, counts number of unique sequences in the given well into the given map
//Simulates read depth and read errors, counts the number of reads of a unique sequence into the given map.
private void countSequencesWithReadDepth(Map<String, Integer> occupancyMap, Map<String, Integer> readCountMap,
private void countSequencesWithReadDepth(Map<String, Integer> distinctMisreadCounts, Map<String, Integer> occupancyMap, Map<String, Integer> readCountMap,
int readDepth, double readErrorProb, double errorCollisionProb,
List<String[]> well, int... sIndices) {
List<String[]> spuriousCells = new ArrayList<>();
for(String[] cell : well) {
String[] spuriousCell = new String[SequenceType.values().length];
Arrays.fill(spuriousCell, "-1");
Boolean readError = false;
for(int sIndex: sIndices){
//skip dropout sequences, which have value "-1"
if(!"-1".equals(cell[sIndex])){
Map<String, Integer> sequencesWithReadCounts = new LinkedHashMap<>();
int nextErrorStarCount = 1;
sequencesWithReadCounts.put(cell[sIndex], 0);
for(int i = 0; i < readDepth; i++) {
if (rand.nextDouble() <= readErrorProb) {
if (rand.nextDouble() <= errorCollisionProb && nextErrorStarCount > 1) {
String spurious;
spurious = getRandomErrorKeyFromMap(sequencesWithReadCounts);
sequencesWithReadCounts.merge(spurious, 1, (oldValue, newValue) -> oldValue + newValue);
}
else {
StringBuilder spurious = new StringBuilder(cell[sIndex]);
for (int j = 0; j < nextErrorStarCount; j++) {
readError = true;
StringBuilder spurious = new StringBuilder(cell[sIndex]);
if (rand.nextDouble() > errorCollisionProb) {
distinctMisreadCounts.merge(cell[sIndex], 1, (oldValue, newValue) -> oldValue + newValue);
for (int j = 0; j < distinctMisreadCounts.get(cell[sIndex]); j++) {
spurious.append("*");
}
nextErrorStarCount++;
sequencesWithReadCounts.merge(spurious.toString(), 1, (oldValue, newValue) -> oldValue + newValue);
}
else {
int starCount = rand.nextInt(distinctMisreadCounts.get(cell[sIndex]));
for (int j = 0; j < starCount; j++) {
spurious.append("*");
}
}
sequencesWithReadCounts.merge(spurious.toString(), 1, (oldValue, newValue) -> oldValue + newValue);
spuriousCell[sIndex] = spurious.toString();
}
else {
sequencesWithReadCounts.merge(cell[sIndex], 1, (oldValue, newValue) -> oldValue + newValue);
@@ -241,7 +246,11 @@ public class Plate {
}
}
}
if (readError) {
spuriousCells.add(spuriousCell);
}
}
well.addAll(spuriousCells);
}
private String getRandomErrorKeyFromMap(Map<String, Integer> map) {

View File

@@ -27,6 +27,7 @@ public class Simulator implements GraphModificationFunctions {
Map<String, Integer> allBetas;
Map<String, Integer> alphaReadCounts = null;
Map<String, Integer> betaReadCounts = null;
Map<String, Integer> misreadCounts;
List<String[]> distinctCells = cellSample.getCells();
int[] alphaIndices = {SequenceType.CDR3_ALPHA.ordinal()};
int[] betaIndices = {SequenceType.CDR3_BETA.ordinal()};
@@ -46,12 +47,13 @@ public class Simulator implements GraphModificationFunctions {
samplePlate.assayWellsSequenceS(allBetas, betaIndices);
}
else {
misreadCounts = new HashMap<>();
allAlphas = new HashMap<>();
alphaReadCounts = new HashMap<>();
samplePlate.assayWellsSequenceSWithReadDepth(allAlphas, alphaReadCounts, readDepth, readErrorRate, errorCollisionRate, alphaIndices);
samplePlate.assayWellsSequenceSWithReadDepth(misreadCounts, allAlphas, alphaReadCounts, readDepth, readErrorRate, errorCollisionRate, alphaIndices);
allBetas = new HashMap<>();
betaReadCounts = new HashMap<>();
samplePlate.assayWellsSequenceSWithReadDepth(allBetas, betaReadCounts, readDepth, readErrorRate, errorCollisionRate, betaIndices);
samplePlate.assayWellsSequenceSWithReadDepth(misreadCounts, allBetas, betaReadCounts, readDepth, readErrorRate, errorCollisionRate, betaIndices);
}
int alphaCount = allAlphas.size();