From 36c628cde5c464bd00d1ffe292011fe645cb0b99 Mon Sep 17 00:00:00 2001 From: eugenefischer <66030419+eugenefischer@users.noreply.github.com> Date: Mon, 26 Sep 2022 16:52:56 -0500 Subject: [PATCH] Add code to simulate read depth --- src/main/java/InteractiveInterface.java | 8 ++-- src/main/java/Plate.java | 60 ++++++++++++++++++++++++- 2 files changed, 62 insertions(+), 6 deletions(-) diff --git a/src/main/java/InteractiveInterface.java b/src/main/java/InteractiveInterface.java index ef79319..8982d2d 100644 --- a/src/main/java/InteractiveInterface.java +++ b/src/main/java/InteractiveInterface.java @@ -252,9 +252,9 @@ public class InteractiveInterface { String plateFile = null; Boolean simulateReadDepth = false; //number of times to read each sequence in a well - Integer readDepth = 1; - Double readErrorRate = 0.0; - Double errorCollisionRate = 0.0; + int readDepth = 1; + double readErrorRate = 0.0; + double errorCollisionRate = 0.0; try { String str = "\nGenerating bipartite weighted graph encoding occupancy overlap data "; str = str.concat("\nrequires a cell sample file and a sample plate file."); @@ -283,7 +283,7 @@ public class InteractiveInterface { System.out.print("(0.0 to 1.0): "); errorCollisionRate = sc.nextDouble(); if(errorCollisionRate < 0.0 || errorCollisionRate > 1.0) { - throw new InputMismatchException("The error collision rate must be in the range [0.0, 1.0]"); + throw new InputMismatchException("The error collision probability must be an in the range [0.0, 1.0]"); } } System.out.println("\nThe graph and occupancy data will be written to a file."); diff --git a/src/main/java/Plate.java b/src/main/java/Plate.java index 6af544b..5d4ea60 100644 --- a/src/main/java/Plate.java +++ b/src/main/java/Plate.java @@ -5,6 +5,7 @@ TODO: Implement exponential distribution using inversion method - DONE TODO: Implement discrete frequency distributions using Vose's Alias Method */ + import java.util.*; public class Plate { @@ -83,7 +84,7 @@ public class Plate { n = (int) Math.floor(m); String[] cellToAdd = cells.get(n).clone(); for(int k = 0; k < cellToAdd.length; k++){ - if(Math.abs(rand.nextDouble()) < error){//error applied to each seqeunce + if(Math.abs(rand.nextDouble()) <= error){//error applied to each sequence cellToAdd[k] = "-1"; } } @@ -179,8 +180,63 @@ public class Plate { } } - private void countSequencesWithReadDepth(Map wellMap, List well, int... sIndices) { + //returns a map of the counts of the sequence at cell index sIndex, in a range of wells + //Simulates read depth and read errors, counts the number of reads of a unique sequence into the given map. + public Map assayWellsSequenceSWithReadDepth(int start, int end, int... sIndices) { + Map assay = new HashMap<>(); + for(int sIndex: sIndices){ + for(int i = start; i < end; i++){ + countSequences(assay, wells.get(i), sIndex); + } + } + return assay; + } + //For the sequences at cell indices sIndices, counts number of unique sequences in the given well into the given map + //Simulates read depth and read errors, counts the number of reads of a unique sequence into the given map. + private void countSequencesWithReadDepth(Map occupancyMap, Map readCountMap, + int readDepth, double readErrorProb, double errorCollisionProb, + List well, int... sIndices) { + for(String[] cell : well) { + for(int sIndex: sIndices){ + //skip dropout sequences, which have value "-1" + if(!"-1".equals(cell[sIndex])){ + Map sequencesWithReadCounts = new LinkedHashMap<>(); + int nextErrorStarCount = 1; + sequencesWithReadCounts.put(cell[sIndex], 0); + for(int i = 0; i < readDepth; i++) { + if (rand.nextDouble() <= readErrorProb) { + if (rand.nextDouble() <= errorCollisionProb && nextErrorStarCount > 1) { + String spurious; + spurious = getRandomErrorKeyFromMap(sequencesWithReadCounts); + sequencesWithReadCounts.merge(spurious, 1, (oldValue, newValue) -> oldValue + newValue); + } + else { + StringBuilder spurious = new StringBuilder(cell[sIndex]); + for (int j = 0; j < nextErrorStarCount; j++) { + spurious.append("*"); + } + nextErrorStarCount++; + sequencesWithReadCounts.merge(spurious.toString(), 1, (oldValue, newValue) -> oldValue + newValue); + } + } + else { + sequencesWithReadCounts.merge(cell[sIndex], 1, (oldValue, newValue) -> oldValue + newValue); + } + } + for(String seq : sequencesWithReadCounts.keySet()) { + occupancyMap.merge(seq, 1, (oldValue, newValue) -> oldValue + newValue); + readCountMap.merge(seq, sequencesWithReadCounts.get(seq), (oldValue, newValue) -> oldValue + newValue); + } + } + } + } + } + + private String getRandomErrorKeyFromMap(Map map) { + //Only want to choose from index 1 to n, since index 0 is not an error sequence + int index = rand.nextInt(map.size() - 1); + return (String) map.keySet().toArray()[index + 1]; } public String getSourceFileName() {