332 lines
16 KiB
Java
332 lines
16 KiB
Java
|
|
|
|
/*
|
|
TODO: Implement exponential distribution using inversion method - DONE
|
|
TODO: Implement discrete frequency distributions using Vose's Alias Method
|
|
*/
|
|
|
|
|
|
import java.util.*;
|
|
|
|
public class Plate {
|
|
private CellSample cells;
|
|
private String sourceFile;
|
|
private String filename;
|
|
private List<List<String[]>> wells;
|
|
private final Random rand = BiGpairSEQ.getRand();
|
|
private int size;
|
|
private double error;
|
|
private Integer[] populations;
|
|
private double stdDev;
|
|
private double lambda;
|
|
boolean exponential = false;
|
|
|
|
public Plate(CellSample cells, String cellFilename, int numWells, Integer[] populations,
|
|
double dropoutRate, double stdDev_or_lambda, boolean exponential){
|
|
this.cells = cells;
|
|
this.sourceFile = cellFilename;
|
|
this.size = numWells;
|
|
this.wells = new ArrayList<>();
|
|
this.error = dropoutRate;
|
|
this.populations = populations;
|
|
this.exponential = exponential;
|
|
if (this.exponential) {
|
|
this.lambda = stdDev_or_lambda;
|
|
fillWellsExponential(cells.getCells(), this.lambda);
|
|
}
|
|
else {
|
|
this.stdDev = stdDev_or_lambda;
|
|
fillWells(cells.getCells(), this.stdDev);
|
|
}
|
|
}
|
|
|
|
|
|
public Plate(int size, double error, Integer[] populations) {
|
|
this.size = size;
|
|
this.error = error;
|
|
this.populations = populations;
|
|
wells = new ArrayList<>();
|
|
}
|
|
|
|
//constructor for returning a Plate from a PlateFileReader
|
|
public Plate(String filename, List<List<String[]>> wells) {
|
|
this.filename = filename;
|
|
this.wells = wells;
|
|
this.size = wells.size();
|
|
|
|
List<Integer> concentrations = new ArrayList<>();
|
|
for (List<String[]> w: wells) {
|
|
if(!concentrations.contains(w.size())){
|
|
concentrations.add(w.size());
|
|
}
|
|
}
|
|
this.populations = new Integer[concentrations.size()];
|
|
for (int i = 0; i < this.populations.length; i++) {
|
|
this.populations[i] = concentrations.get(i);
|
|
}
|
|
}
|
|
|
|
private void fillWellsExponential(List<String[]> cells, double lambda){
|
|
this.lambda = lambda;
|
|
exponential = true;
|
|
int numSections = populations.length;
|
|
int section = 0;
|
|
double m;
|
|
int n;
|
|
while (section < numSections){
|
|
for (int i = 0; i < (size / numSections); i++) {
|
|
List<String[]> well = new ArrayList<>();
|
|
for (int j = 0; j < populations[section]; j++) {
|
|
do {
|
|
//inverse transform sampling: for random number u in [0,1), x = log(1-u) / (-lambda)
|
|
m = (Math.log10((1 - rand.nextDouble()))/(-lambda)) * Math.sqrt(cells.size());
|
|
} while (m >= cells.size() || m < 0);
|
|
n = (int) Math.floor(m);
|
|
String[] cellToAdd = cells.get(n).clone();
|
|
for(int k = 0; k < cellToAdd.length; k++){
|
|
if(Math.abs(rand.nextDouble()) <= error){//error applied to each sequence
|
|
cellToAdd[k] = "-1";
|
|
}
|
|
}
|
|
well.add(cellToAdd);
|
|
}
|
|
wells.add(well);
|
|
}
|
|
section++;
|
|
}
|
|
}
|
|
|
|
private void fillWells( List<String[]> cells, double stdDev) {
|
|
this.stdDev = stdDev;
|
|
int numSections = populations.length;
|
|
int section = 0;
|
|
double m;
|
|
int n;
|
|
while (section < numSections){
|
|
for (int i = 0; i < (size / numSections); i++) {
|
|
List<String[]> well = new ArrayList<>();
|
|
for (int j = 0; j < populations[section]; j++) {
|
|
do {
|
|
m = (rand.nextGaussian() * stdDev) + (cells.size() / 2);
|
|
} while (m >= cells.size() || m < 0);
|
|
n = (int) Math.floor(m);
|
|
String[] cellToAdd = cells.get(n).clone();
|
|
for(int k = 0; k < cellToAdd.length; k++){
|
|
if(Math.abs(rand.nextDouble()) < error){//error applied to each sequence
|
|
cellToAdd[k] = "-1";
|
|
}
|
|
}
|
|
well.add(cellToAdd);
|
|
}
|
|
wells.add(well);
|
|
}
|
|
section++;
|
|
}
|
|
}
|
|
|
|
public Integer[] getPopulations(){
|
|
return populations;
|
|
}
|
|
|
|
public int getSize(){
|
|
return size;
|
|
}
|
|
|
|
public double getStdDev() {
|
|
return stdDev;
|
|
}
|
|
|
|
public boolean isExponential(){return exponential;}
|
|
|
|
public double getLambda(){return lambda;}
|
|
|
|
public double getError() {
|
|
return error;
|
|
}
|
|
|
|
public List<List<String[]>> getWells() {
|
|
return wells;
|
|
}
|
|
|
|
// //returns a map of the counts of the sequence at cell index sIndex, in all wells
|
|
// public void assayWellsSequenceS(Map<String, Integer> sequences, int... sIndices){
|
|
// this.assayWellsSequenceS(sequences, 0, size, sIndices);
|
|
// }
|
|
//
|
|
// //returns a map of the counts of the sequence at cell index sIndex, in a specific well
|
|
// public void assayWellsSequenceS(Map<String, Integer> sequences, int n, int... sIndices) {
|
|
// this.assayWellsSequenceS(sequences, n, n+1, sIndices);
|
|
// }
|
|
//
|
|
// //returns a map of the counts of the sequence at cell index sIndex, in a range of wells
|
|
// public void assayWellsSequenceS(Map<String, Integer> sequences, int start, int end, int... sIndices) {
|
|
// for(int sIndex: sIndices){
|
|
// for(int i = start; i < end; i++){
|
|
// countSequences(sequences, wells.get(i), sIndex);
|
|
// }
|
|
// }
|
|
// }
|
|
// //For the sequences at cell indices sIndices, counts number of unique sequences in the given well into the given map
|
|
// private void countSequences(Map<String, Integer> wellMap, List<String[]> well, int... sIndices) {
|
|
// for(String[] cell : well) {
|
|
// for(int sIndex: sIndices){
|
|
// //skip dropout sequences, which have value -1
|
|
// if(!"-1".equals(cell[sIndex])){
|
|
// wellMap.merge(cell[sIndex], 1, (oldValue, newValue) -> oldValue + newValue);
|
|
// }
|
|
// }
|
|
// }
|
|
// }
|
|
|
|
//For the sequences at cell indices sIndices, counts number of unique sequences in all well into the given map
|
|
public Map<String, SequenceRecord> countSequences(Integer readDepth, Double readErrorRate,
|
|
Double errorCollisionRate, int... sIndices) {
|
|
SequenceType[] sequenceTypes = EnumSet.allOf(SequenceType.class).toArray(new SequenceType[0]);
|
|
Map<String, Integer> distinctMisreadCounts = new HashMap<>();
|
|
Map<String, SequenceRecord> sequenceMap = new LinkedHashMap<>();
|
|
for (int well = 0; well < size; well++) {
|
|
for (String[] cell : wells.get(well)) {
|
|
for (int sIndex : sIndices) {
|
|
//skip dropout sequences, which have value -1
|
|
if (!"-1".equals(cell[sIndex])) {
|
|
for (int j = 0; j < readDepth; j++) {
|
|
//Misread sequence
|
|
if (rand.nextDouble() < readErrorRate) {
|
|
StringBuilder spurious = new StringBuilder(cell[sIndex]);
|
|
//if this sequence hasn't been misread before, or the read error is unique,
|
|
//append one more "*" than has been appended before
|
|
if (rand.nextDouble() > errorCollisionRate || !distinctMisreadCounts.containsKey(cell[sIndex])) {
|
|
distinctMisreadCounts.merge(cell[sIndex], 1, (oldValue, newValue) -> oldValue + newValue);
|
|
for (int k = 0; k < distinctMisreadCounts.get(cell[sIndex]); k++) {
|
|
spurious.append("*");
|
|
}
|
|
SequenceRecord tmp = new SequenceRecord(spurious.toString(), sequenceTypes[sIndex]);
|
|
tmp.addRead(well);
|
|
sequenceMap.put(spurious.toString(), tmp);
|
|
}
|
|
//if this is a read error collision, randomly choose a number of "*"s that has been appended before
|
|
else {
|
|
int starCount = rand.nextInt(distinctMisreadCounts.get(cell[sIndex]));
|
|
for (int k = 0; k < starCount; k++) {
|
|
spurious.append("*");
|
|
}
|
|
sequenceMap.get(spurious.toString()).addRead(well);
|
|
}
|
|
}
|
|
//sequence is read correctly
|
|
else {
|
|
if (!sequenceMap.containsKey(cell[sIndex])) {
|
|
SequenceRecord tmp = new SequenceRecord(cell[sIndex], sequenceTypes[sIndex]);
|
|
tmp.addRead(well);
|
|
sequenceMap.put(cell[sIndex], tmp);
|
|
} else {
|
|
sequenceMap.get(cell[sIndex]).addRead(well);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return sequenceMap;
|
|
}
|
|
|
|
|
|
// //returns a map of the counts of the sequence at cell index sIndex, in all wells
|
|
// //Simulates read depth and read errors, counts the number of reads of a unique sequence into the given map.
|
|
// public void assayWellsSequenceSWithReadDepth(Map<String, Integer> misreadCounts, Map<String, Integer> occupancyMap, Map<String, Integer> readCountMap,
|
|
// int readDepth, double readErrorProb, double errorCollisionProb, int... sIndices) {
|
|
// this.assayWellsSequenceSWithReadDepth(misreadCounts, occupancyMap, readCountMap, readDepth, readErrorProb, errorCollisionProb, 0, size, sIndices);
|
|
// }
|
|
// //returns a map of the counts of the sequence at cell index sIndex, in a specific of wells
|
|
// //Simulates read depth and read errors, counts the number of reads of a unique sequence into the given map.
|
|
// public void assayWellsSequenceSWithReadDepth(Map<String, Integer> misreadCounts, Map<String, Integer> occupancyMap, Map<String, Integer> readCountMap,
|
|
// int readDepth, double readErrorProb, double errorCollisionProb,
|
|
// int n, int... sIndices) {
|
|
// this.assayWellsSequenceSWithReadDepth(misreadCounts, occupancyMap, readCountMap, readDepth, readErrorProb, errorCollisionProb, n, n+1, sIndices);
|
|
// }
|
|
//
|
|
// //returns a map of the counts of the sequence at cell index sIndex, in a range of wells
|
|
// //Simulates read depth and read errors, counts the number of reads of a unique sequence into the given map.
|
|
// public void assayWellsSequenceSWithReadDepth(Map<String, Integer> misreadCounts, Map<String, Integer> occupancyMap, Map<String, Integer> readCountMap,
|
|
// int readDepth, double readErrorProb, double errorCollisionProb,
|
|
// int start, int end, int... sIndices) {
|
|
// for(int sIndex: sIndices){
|
|
// for(int i = start; i < end; i++){
|
|
// countSequencesWithReadDepth(misreadCounts, occupancyMap, readCountMap, readDepth, readErrorProb, errorCollisionProb, wells.get(i), sIndex);
|
|
// }
|
|
// }
|
|
// }
|
|
//
|
|
// //For the sequences at cell indices sIndices, counts number of unique sequences in the given well into the given map
|
|
// //Simulates read depth and read errors, counts the number of reads of a unique sequence into the given map.
|
|
// //NOTE: this function changes the content of the well, adding spurious cells to contain the misread sequences
|
|
// //(this is necessary because, in the simulation, the plate is read multiple times, but random misreads can only
|
|
// //be simulated once).
|
|
// //(Possibly I should refactor all of this to only require a single plate assay, to speed things up. Or at least
|
|
// //to see if it would speed things up.)
|
|
// private void countSequencesWithReadDepth(Map<String, Integer> distinctMisreadCounts, Map<String, Integer> occupancyMap, Map<String, Integer> readCountMap,
|
|
// int readDepth, double readErrorProb, double errorCollisionProb,
|
|
// List<String[]> well, int... sIndices) {
|
|
// //list of spurious cells to add to well after counting
|
|
// List<String[]> spuriousCells = new ArrayList<>();
|
|
// for(String[] cell : well) {
|
|
// //new potential spurious cell for each cell that gets read
|
|
// String[] spuriousCell = new String[SequenceType.values().length];
|
|
// //initialize spurious cell with all dropout sequences
|
|
// Arrays.fill(spuriousCell, "-1");
|
|
// //has a read error occurred?
|
|
// boolean readError = false;
|
|
// for(int sIndex: sIndices){
|
|
// //skip dropout sequences, which have value "-1"
|
|
// if(!"-1".equals(cell[sIndex])){
|
|
// Map<String, Integer> sequencesWithReadCounts = new LinkedHashMap<>();
|
|
// for(int i = 0; i < readDepth; i++) {
|
|
// if (rand.nextDouble() <= readErrorProb) {
|
|
// readError = true;
|
|
// //Read errors are represented by appending "*"s to the end of the sequence some number of times
|
|
// StringBuilder spurious = new StringBuilder(cell[sIndex]);
|
|
// //if this sequence hasn't been misread before, or the read error is unique,
|
|
// //append one more "*" than has been appended before
|
|
// if (!distinctMisreadCounts.containsKey(cell[sIndex]) || rand.nextDouble() > errorCollisionProb) {
|
|
// distinctMisreadCounts.merge(cell[sIndex], 1, (oldValue, newValue) -> oldValue + newValue);
|
|
// for (int j = 0; j < distinctMisreadCounts.get(cell[sIndex]); j++) {
|
|
// spurious.append("*");
|
|
// }
|
|
// }
|
|
// //if this is a read error collision, randomly choose a number of "*"s that has been appended before
|
|
// else {
|
|
// int starCount = rand.nextInt(distinctMisreadCounts.get(cell[sIndex]));
|
|
// for (int j = 0; j < starCount; j++) {
|
|
// spurious.append("*");
|
|
// }
|
|
// }
|
|
// sequencesWithReadCounts.merge(spurious.toString(), 1, (oldValue, newValue) -> oldValue + newValue);
|
|
// //add spurious sequence to spurious cell
|
|
// spuriousCell[sIndex] = spurious.toString();
|
|
// }
|
|
// else {
|
|
// sequencesWithReadCounts.merge(cell[sIndex], 1, (oldValue, newValue) -> oldValue + newValue);
|
|
// }
|
|
// }
|
|
// for(String seq : sequencesWithReadCounts.keySet()) {
|
|
// occupancyMap.merge(seq, 1, (oldValue, newValue) -> oldValue + newValue);
|
|
// readCountMap.merge(seq, sequencesWithReadCounts.get(seq), (oldValue, newValue) -> oldValue + newValue);
|
|
// }
|
|
// }
|
|
// }
|
|
// if (readError) { //only add a new spurious cell if there was a read error
|
|
// spuriousCells.add(spuriousCell);
|
|
// }
|
|
// }
|
|
// //add all spurious cells to the well
|
|
// well.addAll(spuriousCells);
|
|
// }
|
|
|
|
public String getSourceFileName() {
|
|
return sourceFile;
|
|
}
|
|
|
|
public String getFilename() { return filename; }
|
|
}
|