From db99c7481097840872dfebbaf52eca39a9b2cede Mon Sep 17 00:00:00 2001 From: eugenefischer <66030419+eugenefischer@users.noreply.github.com> Date: Mon, 26 Sep 2022 23:03:23 -0500 Subject: [PATCH] Rework read depth simulation to allow edge weight calculations to work as expected. (This changes sample plate in memory, so caching the sample plate is incompatible) --- src/main/java/Plate.java | 47 +++++++++++++++++++++--------------- src/main/java/Simulator.java | 6 +++-- 2 files changed, 32 insertions(+), 21 deletions(-) diff --git a/src/main/java/Plate.java b/src/main/java/Plate.java index de7d872..19ff86f 100644 --- a/src/main/java/Plate.java +++ b/src/main/java/Plate.java @@ -180,56 +180,61 @@ public class Plate { //returns a map of the counts of the sequence at cell index sIndex, in a specific of wells //Simulates read depth and read errors, counts the number of reads of a unique sequence into the given map. - public void assayWellsSequenceSWithReadDepth(Map occupancyMap, Map readCountMap, + public void assayWellsSequenceSWithReadDepth(Map misreadCounts, Map occupancyMap, Map readCountMap, int readDepth, double readErrorProb, double errorCollisionProb, int... sIndices) { - this.assayWellsSequenceSWithReadDepth(occupancyMap, readCountMap, readDepth, readErrorProb, errorCollisionProb, 0, size, sIndices); + this.assayWellsSequenceSWithReadDepth(misreadCounts, occupancyMap, readCountMap, readDepth, readErrorProb, errorCollisionProb, 0, size, sIndices); } //returns a map of the counts of the sequence at cell index sIndex, in a specific of wells //Simulates read depth and read errors, counts the number of reads of a unique sequence into the given map. - public void assayWellsSequenceSWithReadDepth(Map occupancyMap, Map readCountMap, + public void assayWellsSequenceSWithReadDepth(Map misreadCounts, Map occupancyMap, Map readCountMap, int readDepth, double readErrorProb, double errorCollisionProb, int n, int... sIndices) { - this.assayWellsSequenceSWithReadDepth(occupancyMap, readCountMap, readDepth, readErrorProb, errorCollisionProb, n, n+1, sIndices); + this.assayWellsSequenceSWithReadDepth(misreadCounts, occupancyMap, readCountMap, readDepth, readErrorProb, errorCollisionProb, n, n+1, sIndices); } //returns a map of the counts of the sequence at cell index sIndex, in a range of wells //Simulates read depth and read errors, counts the number of reads of a unique sequence into the given map. - public void assayWellsSequenceSWithReadDepth(Map occupancyMap, Map readCountMap, + public void assayWellsSequenceSWithReadDepth(Map misreadCounts, Map occupancyMap, Map readCountMap, int readDepth, double readErrorProb, double errorCollisionProb, int start, int end, int... sIndices) { for(int sIndex: sIndices){ for(int i = start; i < end; i++){ - countSequencesWithReadDepth(occupancyMap, readCountMap, readDepth, readErrorProb, errorCollisionProb, wells.get(i), sIndex); + countSequencesWithReadDepth(misreadCounts, occupancyMap, readCountMap, readDepth, readErrorProb, errorCollisionProb, wells.get(i), sIndex); } } } //For the sequences at cell indices sIndices, counts number of unique sequences in the given well into the given map //Simulates read depth and read errors, counts the number of reads of a unique sequence into the given map. - private void countSequencesWithReadDepth(Map occupancyMap, Map readCountMap, + private void countSequencesWithReadDepth(Map distinctMisreadCounts, Map occupancyMap, Map readCountMap, int readDepth, double readErrorProb, double errorCollisionProb, List well, int... sIndices) { + List spuriousCells = new ArrayList<>(); for(String[] cell : well) { + String[] spuriousCell = new String[SequenceType.values().length]; + Arrays.fill(spuriousCell, "-1"); + Boolean readError = false; for(int sIndex: sIndices){ //skip dropout sequences, which have value "-1" if(!"-1".equals(cell[sIndex])){ Map sequencesWithReadCounts = new LinkedHashMap<>(); - int nextErrorStarCount = 1; - sequencesWithReadCounts.put(cell[sIndex], 0); for(int i = 0; i < readDepth; i++) { if (rand.nextDouble() <= readErrorProb) { - if (rand.nextDouble() <= errorCollisionProb && nextErrorStarCount > 1) { - String spurious; - spurious = getRandomErrorKeyFromMap(sequencesWithReadCounts); - sequencesWithReadCounts.merge(spurious, 1, (oldValue, newValue) -> oldValue + newValue); - } - else { - StringBuilder spurious = new StringBuilder(cell[sIndex]); - for (int j = 0; j < nextErrorStarCount; j++) { + readError = true; + StringBuilder spurious = new StringBuilder(cell[sIndex]); + if (rand.nextDouble() > errorCollisionProb) { + distinctMisreadCounts.merge(cell[sIndex], 1, (oldValue, newValue) -> oldValue + newValue); + for (int j = 0; j < distinctMisreadCounts.get(cell[sIndex]); j++) { spurious.append("*"); } - nextErrorStarCount++; - sequencesWithReadCounts.merge(spurious.toString(), 1, (oldValue, newValue) -> oldValue + newValue); } + else { + int starCount = rand.nextInt(distinctMisreadCounts.get(cell[sIndex])); + for (int j = 0; j < starCount; j++) { + spurious.append("*"); + } + } + sequencesWithReadCounts.merge(spurious.toString(), 1, (oldValue, newValue) -> oldValue + newValue); + spuriousCell[sIndex] = spurious.toString(); } else { sequencesWithReadCounts.merge(cell[sIndex], 1, (oldValue, newValue) -> oldValue + newValue); @@ -241,7 +246,11 @@ public class Plate { } } } + if (readError) { + spuriousCells.add(spuriousCell); + } } + well.addAll(spuriousCells); } private String getRandomErrorKeyFromMap(Map map) { diff --git a/src/main/java/Simulator.java b/src/main/java/Simulator.java index 9b30d7c..5b31a81 100644 --- a/src/main/java/Simulator.java +++ b/src/main/java/Simulator.java @@ -27,6 +27,7 @@ public class Simulator implements GraphModificationFunctions { Map allBetas; Map alphaReadCounts = null; Map betaReadCounts = null; + Map misreadCounts; List distinctCells = cellSample.getCells(); int[] alphaIndices = {SequenceType.CDR3_ALPHA.ordinal()}; int[] betaIndices = {SequenceType.CDR3_BETA.ordinal()}; @@ -46,12 +47,13 @@ public class Simulator implements GraphModificationFunctions { samplePlate.assayWellsSequenceS(allBetas, betaIndices); } else { + misreadCounts = new HashMap<>(); allAlphas = new HashMap<>(); alphaReadCounts = new HashMap<>(); - samplePlate.assayWellsSequenceSWithReadDepth(allAlphas, alphaReadCounts, readDepth, readErrorRate, errorCollisionRate, alphaIndices); + samplePlate.assayWellsSequenceSWithReadDepth(misreadCounts, allAlphas, alphaReadCounts, readDepth, readErrorRate, errorCollisionRate, alphaIndices); allBetas = new HashMap<>(); betaReadCounts = new HashMap<>(); - samplePlate.assayWellsSequenceSWithReadDepth(allBetas, betaReadCounts, readDepth, readErrorRate, errorCollisionRate, betaIndices); + samplePlate.assayWellsSequenceSWithReadDepth(misreadCounts, allBetas, betaReadCounts, readDepth, readErrorRate, errorCollisionRate, betaIndices); } int alphaCount = allAlphas.size();