BiGpairSEQ/src/main/java/Plate.java



/*
TODO: Implement exponential distribution using inversion method - DONE
TODO: Implement discrete frequency distributions using Vose's Alias Method
 */


import java.util.*;

public class Plate {
    private CellSample cells;
    private String sourceFile;
    private String filename;
    private List<List<String[]>> wells;
    private final Random rand = BiGpairSEQ.getRand();
    private int size;
    private double error;
    private Integer[] populations;
    private double stdDev;
    private double lambda;
    boolean exponential = false;

    public Plate(CellSample cells, String cellFilename, int numWells, Integer[] populations,
                 double dropoutRate, double stdDev_or_lambda, boolean exponential){
        this.cells = cells;
        this.sourceFile = cellFilename;
        this.size = numWells;
        this.wells = new ArrayList<>();
        this.error = dropoutRate;
        this.populations = populations;
        this.exponential = exponential;
        if (this.exponential) {
            this.lambda = stdDev_or_lambda;
            fillWellsExponential(cells.getCells(), this.lambda);
        }
        else {
            this.stdDev = stdDev_or_lambda;
            fillWells(cells.getCells(), this.stdDev);
        }
    }


    public Plate(int size, double error, Integer[] populations) {
        this.size = size;
        this.error = error;
        this.populations = populations;
        wells = new ArrayList<>();
    }

    //constructor for returning a Plate from a PlateFileReader
    public Plate(String filename, List<List<String[]>> wells) {
        this.filename = filename;
        this.wells = wells;
        this.size = wells.size();

        List<Integer> concentrations = new ArrayList<>();
        for (List<String[]> w: wells) {
            if(!concentrations.contains(w.size())){
                concentrations.add(w.size());
            }
        }
        this.populations = new Integer[concentrations.size()];
        for (int i = 0; i < this.populations.length; i++) {
            this.populations[i] = concentrations.get(i);
        }
    }

    private void fillWellsExponential(List<String[]> cells, double lambda){
        this.lambda = lambda;
        exponential = true;
        int numSections = populations.length;
        int section = 0;
        double m;
        int n;
        while (section < numSections){
            for (int i = 0; i < (size / numSections); i++) {
                List<String[]> well = new ArrayList<>();
                for (int j = 0; j < populations[section]; j++) {
                    do {
                        //inverse transform sampling: for random number u in [0,1), x = log(1-u) / (-lambda)
                        m = (Math.log10((1 - rand.nextDouble()))/(-lambda)) * Math.sqrt(cells.size());
                    } while (m >= cells.size() || m < 0);
                    n = (int) Math.floor(m);
                    String[] cellToAdd = cells.get(n).clone();
                    for(int k = 0; k < cellToAdd.length; k++){
                        if(Math.abs(rand.nextDouble()) <= error){//error applied to each sequence
                            cellToAdd[k] = "-1";
                        }
                    }
                    well.add(cellToAdd);
                }
                wells.add(well);
            }
            section++;
        }
    }

    private void fillWells( List<String[]> cells, double stdDev) {
        this.stdDev = stdDev;
        int numSections = populations.length;
        int section = 0;
        double m;
        int n;
        while (section < numSections){
            for (int i = 0; i < (size / numSections); i++) {
                List<String[]> well = new ArrayList<>();
                for (int j = 0; j < populations[section]; j++) {
                    do {
                        m = (rand.nextGaussian() * stdDev) + (cells.size() / 2);
                    } while (m >= cells.size() || m < 0);
                    n = (int) Math.floor(m);
                    String[] cellToAdd = cells.get(n).clone();
                    for(int k = 0; k < cellToAdd.length; k++){
                        if(Math.abs(rand.nextDouble()) < error){//error applied to each sequence
                            cellToAdd[k] = "-1";
                        }
                    }
                    well.add(cellToAdd);
                }
                wells.add(well);
            }
            section++;
        }
    }

    public Integer[] getPopulations(){
        return populations;
    }

    public int getSize(){
        return size;
    }

    public double getStdDev() {
        return stdDev;
    }

    public boolean isExponential(){return exponential;}

    public double getLambda(){return lambda;}

    public double getError() {
        return error;
    }

    public List<List<String[]>> getWells() {
        return wells;
    }

//    //returns a map of the counts of the sequence at cell index sIndex, in all wells
//    public void assayWellsSequenceS(Map<String, Integer> sequences, int... sIndices){
//        this.assayWellsSequenceS(sequences, 0, size, sIndices);
//    }
//
//    //returns a map of the counts of the sequence at cell index sIndex, in a specific well
//    public void assayWellsSequenceS(Map<String, Integer> sequences, int n, int... sIndices) {
//        this.assayWellsSequenceS(sequences, n, n+1, sIndices);
//    }
//
//    //returns a map of the counts of the sequence at cell index sIndex, in a range of wells
//    public void assayWellsSequenceS(Map<String, Integer> sequences, int start, int end, int... sIndices) {
//        for(int sIndex: sIndices){
//            for(int i = start; i < end; i++){
//                countSequences(sequences, wells.get(i), sIndex);
//            }
//        }
//    }
//    //For the sequences at cell indices sIndices, counts number of unique sequences in the given well into the given map
//    private void countSequences(Map<String, Integer> wellMap, List<String[]> well, int... sIndices) {
//        for(String[] cell : well) {
//            for(int sIndex: sIndices){
//                //skip dropout sequences, which have value -1
//                if(!"-1".equals(cell[sIndex])){
//                    wellMap.merge(cell[sIndex], 1, (oldValue, newValue) -> oldValue + newValue);
//                }
//            }
//        }
//    }

    //For the sequences at cell indices sIndices, counts number of unique sequences in all well into the given map
    public Map<String, SequenceRecord> countSequences(Integer readDepth, Double readErrorRate,
                                                       Double errorCollisionRate, int... sIndices) {
        SequenceType[] sequenceTypes = EnumSet.allOf(SequenceType.class).toArray(new SequenceType[0]);
        Map<String, Integer> distinctMisreadCounts = new HashMap<>();
        Map<String, SequenceRecord> sequenceMap = new LinkedHashMap<>();
        for (int well = 0; well < size; well++) {
            for (String[] cell : wells.get(well)) {
                for (int sIndex : sIndices) {
                    //skip dropout sequences, which have value -1
                    if (!"-1".equals(cell[sIndex])) {
                        for (int j = 0; j < readDepth; j++) {
                            //Misread sequence
                            if (rand.nextDouble() < readErrorRate) {
                                StringBuilder spurious = new StringBuilder(cell[sIndex]);
                                //if this sequence hasn't been misread before, or the read error is unique,
                                //append one more "*" than has been appended before
                                if (rand.nextDouble() > errorCollisionRate || !distinctMisreadCounts.containsKey(cell[sIndex])) {
                                    distinctMisreadCounts.merge(cell[sIndex], 1, (oldValue, newValue) -> oldValue + newValue);
                                    for (int k = 0; k < distinctMisreadCounts.get(cell[sIndex]); k++) {
                                        spurious.append("*");
                                    }
                                    SequenceRecord tmp = new SequenceRecord(spurious.toString(), sequenceTypes[sIndex]);
                                    tmp.addRead(well);
                                    sequenceMap.put(spurious.toString(), tmp);
                                }
                                //if this is a read error collision, randomly choose a number of "*"s that has been appended before
                                else {
                                    int starCount = rand.nextInt(distinctMisreadCounts.get(cell[sIndex]));
                                    for (int k = 0; k < starCount; k++) {
                                        spurious.append("*");
                                    }
                                    sequenceMap.get(spurious.toString()).addRead(well);
                                }
                            }
                            //sequence is read correctly
                            else {
                                if (!sequenceMap.containsKey(cell[sIndex])) {
                                    SequenceRecord tmp = new SequenceRecord(cell[sIndex], sequenceTypes[sIndex]);
                                    tmp.addRead(well);
                                    sequenceMap.put(cell[sIndex], tmp);
                                } else {
                                    sequenceMap.get(cell[sIndex]).addRead(well);
                                }
                            }
                        }
                    }
                }
            }
        }
        return sequenceMap;
    }


//    //returns a map of the counts of the sequence at cell index sIndex, in all wells
//    //Simulates read depth and read errors, counts the number of reads of a unique sequence into the given map.
//    public void assayWellsSequenceSWithReadDepth(Map<String, Integer> misreadCounts, Map<String, Integer> occupancyMap, Map<String, Integer> readCountMap,
//                                                 int readDepth, double readErrorProb, double errorCollisionProb, int... sIndices) {
//        this.assayWellsSequenceSWithReadDepth(misreadCounts, occupancyMap, readCountMap, readDepth, readErrorProb, errorCollisionProb, 0, size, sIndices);
//    }
//    //returns a map of the counts of the sequence at cell index sIndex, in a specific of wells
//    //Simulates read depth and read errors, counts the number of reads of a unique sequence into the given map.
//    public void assayWellsSequenceSWithReadDepth(Map<String, Integer> misreadCounts, Map<String, Integer> occupancyMap, Map<String, Integer> readCountMap,
//                                                 int readDepth, double readErrorProb, double errorCollisionProb,
//                                                 int n, int... sIndices) {
//       this.assayWellsSequenceSWithReadDepth(misreadCounts, occupancyMap, readCountMap, readDepth, readErrorProb, errorCollisionProb, n, n+1, sIndices);
//    }
//
//    //returns a map of the counts of the sequence at cell index sIndex, in a range of wells
//    //Simulates read depth and read errors, counts the number of reads of a unique sequence into the given map.
//    public void assayWellsSequenceSWithReadDepth(Map<String, Integer> misreadCounts, Map<String, Integer> occupancyMap, Map<String, Integer> readCountMap,
//                                                 int readDepth, double readErrorProb, double errorCollisionProb,
//                                                 int start, int end, int... sIndices) {
//        for(int sIndex: sIndices){
//            for(int i = start; i < end; i++){
//                countSequencesWithReadDepth(misreadCounts, occupancyMap, readCountMap, readDepth, readErrorProb, errorCollisionProb, wells.get(i), sIndex);
//            }
//        }
//    }
//
//    //For the sequences at cell indices sIndices, counts number of unique sequences in the given well into the given map
//    //Simulates read depth and read errors, counts the number of reads of a unique sequence into the given map.
//    //NOTE: this function changes the content of the well, adding spurious cells to contain the misread sequences
//    //(this is necessary because, in the simulation, the plate is read multiple times, but random misreads can only
//    //be simulated once).
//    //(Possibly I should refactor all of this to only require a single plate assay, to speed things up. Or at least
//    //to see if it would speed things up.)
//    private void countSequencesWithReadDepth(Map<String, Integer> distinctMisreadCounts, Map<String, Integer> occupancyMap, Map<String, Integer> readCountMap,
//                                             int readDepth, double readErrorProb, double errorCollisionProb,
//                                             List<String[]> well, int... sIndices) {
//        //list of spurious cells to add to well after counting
//        List<String[]> spuriousCells = new ArrayList<>();
//        for(String[] cell : well) {
//            //new potential spurious cell for each cell that gets read
//            String[] spuriousCell = new String[SequenceType.values().length];
//            //initialize spurious cell with all dropout sequences
//            Arrays.fill(spuriousCell, "-1");
//            //has a read error occurred?
//            boolean readError = false;
//            for(int sIndex: sIndices){
//                //skip dropout sequences, which have value "-1"
//                if(!"-1".equals(cell[sIndex])){
//                    Map<String, Integer> sequencesWithReadCounts = new LinkedHashMap<>();
//                    for(int i = 0; i < readDepth; i++) {
//                        if (rand.nextDouble() <= readErrorProb) {
//                            readError = true;
//                            //Read errors are represented by appending "*"s to the end of the sequence some number of times
//                            StringBuilder spurious = new StringBuilder(cell[sIndex]);
//                            //if this sequence hasn't been misread before, or the read error is unique,
//                            //append one more "*" than has been appended before
//                            if (!distinctMisreadCounts.containsKey(cell[sIndex]) || rand.nextDouble() > errorCollisionProb) {
//                                distinctMisreadCounts.merge(cell[sIndex], 1, (oldValue, newValue) -> oldValue + newValue);
//                                for (int j = 0; j < distinctMisreadCounts.get(cell[sIndex]); j++) {
//                                    spurious.append("*");
//                                }
//                            }
//                            //if this is a read error collision, randomly choose a number of "*"s that has been appended before
//                            else {
//                                int starCount = rand.nextInt(distinctMisreadCounts.get(cell[sIndex]));
//                                for (int j = 0; j < starCount; j++) {
//                                    spurious.append("*");
//                                }
//                            }
//                            sequencesWithReadCounts.merge(spurious.toString(), 1, (oldValue, newValue) -> oldValue + newValue);
//                            //add spurious sequence to spurious cell
//                            spuriousCell[sIndex] = spurious.toString();
//                        }
//                        else {
//                            sequencesWithReadCounts.merge(cell[sIndex], 1, (oldValue, newValue) -> oldValue + newValue);
//                        }
//                    }
//                    for(String seq : sequencesWithReadCounts.keySet()) {
//                        occupancyMap.merge(seq, 1, (oldValue, newValue) -> oldValue + newValue);
//                        readCountMap.merge(seq, sequencesWithReadCounts.get(seq), (oldValue, newValue) -> oldValue + newValue);
//                    }
//                }
//            }
//            if (readError) { //only add a new spurious cell if there was a read error
//                spuriousCells.add(spuriousCell);
//            }
//        }
//        //add all spurious cells to the well
//        well.addAll(spuriousCells);
//    }

    public String getSourceFileName() {
        return sourceFile;
    }

    public String getFilename() { return filename; }
}