Rewrite countSequences to allow for collision with real sequences on misreads
This commit is contained in:
@@ -2,6 +2,13 @@
|
||||
|
||||
/*
|
||||
TODO: Implement exponential distribution using inversion method - DONE
|
||||
TODO: Implement collisions with real sequences by having the counting function keep a map of all sequences it's read,
|
||||
with values of all misreads. Can then have a spurious/real collision rate, which will have count randomly select a sequence
|
||||
it's already read at least once, and put that into the list of spurious sequences for the given real sequence. Will let me get rid
|
||||
of the distinctMisreadCount map, and use this new map instead. Doing it this way, once a sequence has been misread as another
|
||||
sequence once, it is more likely to be misread that way again, as future read error collisions can also be real sequence collisions
|
||||
Prob A: a read error occurs. Prob B: it's a new error (otherwise it's a repeated error). Prob C: if new error, prob that it's
|
||||
a real sequence collision (otherwise it's a new spurious sequence) - DONE
|
||||
TODO: Implement discrete frequency distributions using Vose's Alias Method
|
||||
*/
|
||||
|
||||
@@ -148,49 +155,76 @@ public class Plate {
|
||||
return wells;
|
||||
}
|
||||
|
||||
//For the sequences at cell indices sIndices, counts number of unique sequences in all wells into the given map
|
||||
//For the sequences at cell indices sIndices, counts number of unique sequences in all wells.
|
||||
//Also simulates sequence read errors with given probabilities.
|
||||
//Returns a map of SequenceRecords containing plate data for all sequences read.
|
||||
//TODO actually implement usage of misreadSequences - DONE
|
||||
public Map<String, SequenceRecord> countSequences(Integer readDepth, Double readErrorRate,
|
||||
Double errorCollisionRate, int... sIndices) {
|
||||
Double errorCollisionRate, Double realSequenceCollisionRate, int... sIndices) {
|
||||
SequenceType[] sequenceTypes = EnumSet.allOf(SequenceType.class).toArray(new SequenceType[0]);
|
||||
Map<String, Integer> distinctMisreadCounts = new HashMap<>();
|
||||
//Map of all sequences read. Keys are sequences, values are ways sequence has been misread.
|
||||
Map<String, List<String>> sequencesAndMisreads = new HashMap<>();
|
||||
Map<String, SequenceRecord> sequenceMap = new LinkedHashMap<>();
|
||||
for (int well = 0; well < size; well++) {
|
||||
for (String[] cell : wells.get(well)) {
|
||||
for (int sIndex : sIndices) {
|
||||
String currentSequence = cell[sIndex];
|
||||
//skip dropout sequences, which have value -1
|
||||
if (!"-1".equals(cell[sIndex])) {
|
||||
if (!"-1".equals(currentSequence)) {
|
||||
for (int j = 0; j < readDepth; j++) {
|
||||
//Misread sequence
|
||||
//The sequence is misread
|
||||
if (rand.nextDouble() < readErrorRate) {
|
||||
StringBuilder spurious = new StringBuilder(cell[sIndex]);
|
||||
//if this sequence hasn't been misread before, or the read error is unique,
|
||||
//append one more "*" than has been appended before
|
||||
if (rand.nextDouble() > errorCollisionRate || !distinctMisreadCounts.containsKey(cell[sIndex])) {
|
||||
distinctMisreadCounts.merge(cell[sIndex], 1, (oldValue, newValue) -> oldValue + newValue);
|
||||
for (int k = 0; k < distinctMisreadCounts.get(cell[sIndex]); k++) {
|
||||
//The sequence hasn't been read or misread before
|
||||
if (!sequencesAndMisreads.containsKey(currentSequence)) {
|
||||
sequencesAndMisreads.put(currentSequence, new ArrayList<>());
|
||||
}
|
||||
//The specific misread hasn't happened before
|
||||
if (rand.nextDouble() >= errorCollisionRate || sequencesAndMisreads.get(currentSequence).size() == 0) {
|
||||
//The misread doesn't collide with a real sequence already on the plate
|
||||
if(rand.nextDouble() >= realSequenceCollisionRate || !sequenceMap.isEmpty()){
|
||||
StringBuilder spurious = new StringBuilder(currentSequence);
|
||||
for (int k = 0; k <= sequencesAndMisreads.get(currentSequence).size(); k++) {
|
||||
spurious.append("*");
|
||||
}
|
||||
//New sequence record for the spurious sequence
|
||||
SequenceRecord tmp = new SequenceRecord(spurious.toString(), sequenceTypes[sIndex]);
|
||||
tmp.addRead(well);
|
||||
sequenceMap.put(spurious.toString(), tmp);
|
||||
//add spurious sequence to list of misreads for the real sequence
|
||||
sequencesAndMisreads.get(currentSequence).add(spurious.toString());
|
||||
}
|
||||
//if this is a read error collision, randomly choose a number of "*"s that has been appended before
|
||||
//The misread collides with a real sequence already read from plate
|
||||
else {
|
||||
int starCount = rand.nextInt(distinctMisreadCounts.get(cell[sIndex]));
|
||||
for (int k = 0; k < starCount; k++) {
|
||||
spurious.append("*");
|
||||
}
|
||||
sequenceMap.get(spurious.toString()).addRead(well);
|
||||
String wrongSequence;
|
||||
do{
|
||||
//get a random real sequence that's been read from the plate before
|
||||
int index = rand.nextInt(sequencesAndMisreads.size());
|
||||
wrongSequence = sequencesAndMisreads.keySet().toArray(new String[0])[index];
|
||||
//make sure it's not accidentally the *right* sequence
|
||||
//Also that it's not a wrong sequence already in the misread list
|
||||
} while(cell[sIndex].equals(wrongSequence) || sequencesAndMisreads.get(currentSequence).contains(wrongSequence));
|
||||
sequenceMap.get(wrongSequence).addRead(well);
|
||||
|
||||
}
|
||||
}
|
||||
//sequence is read correctly
|
||||
}
|
||||
//The sequence is read correctly
|
||||
else {
|
||||
if (!sequenceMap.containsKey(cell[sIndex])) {
|
||||
SequenceRecord tmp = new SequenceRecord(cell[sIndex], sequenceTypes[sIndex]);
|
||||
//the sequence hasn't been read before
|
||||
if (!sequenceMap.containsKey(currentSequence)) {
|
||||
//create new record for the sequence
|
||||
SequenceRecord tmp = new SequenceRecord(currentSequence, sequenceTypes[sIndex]);
|
||||
//add this read to the sequence record
|
||||
tmp.addRead(well);
|
||||
sequenceMap.put(cell[sIndex], tmp);
|
||||
} else {
|
||||
sequenceMap.get(cell[sIndex]).addRead(well);
|
||||
//add the sequence and its record to the sequence map
|
||||
sequenceMap.put(currentSequence, tmp);
|
||||
//add the sequence to the sequences and misreads map
|
||||
sequencesAndMisreads.put(currentSequence, new ArrayList<>());
|
||||
}
|
||||
//the sequence has been read before
|
||||
else {
|
||||
//get the sequence's record and add this read to it
|
||||
sequenceMap.get(currentSequence).addRead(well);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user