Add filtering based on occupancy/read count discrepancy
This commit is contained in:
@@ -63,10 +63,22 @@ public class Simulator implements GraphModificationFunctions {
|
|||||||
if(verbose){System.out.println("Well maps made");}
|
if(verbose){System.out.println("Well maps made");}
|
||||||
|
|
||||||
//ideally we wouldn't do any graph pre-filtering. But sequences present in all wells add a huge number of edges to the graph and don't carry any signal value
|
//ideally we wouldn't do any graph pre-filtering. But sequences present in all wells add a huge number of edges to the graph and don't carry any signal value
|
||||||
if(verbose){System.out.println("Removing sequences present in all wells.");}
|
if (readDepth == 1) {
|
||||||
filterByOccupancyThresholds(allAlphas, 1, numWells - 1);
|
if(verbose){System.out.println("Removing sequences present in all wells.");}
|
||||||
filterByOccupancyThresholds(allBetas, 1, numWells - 1);
|
filterByOccupancyThresholds(allAlphas, 1, numWells - 1);
|
||||||
if(verbose){System.out.println("Sequences removed");}
|
filterByOccupancyThresholds(allBetas, 1, numWells - 1);
|
||||||
|
if(verbose){System.out.println("Sequences removed");}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
if(verbose){System.out.println("Removing sequences present in all wells.");}
|
||||||
|
filterByOccupancyThresholds(allAlphas, 1, numWells - 1);
|
||||||
|
filterByOccupancyThresholds(allBetas, 1, numWells - 1);
|
||||||
|
if(verbose){System.out.println("Sequences removed");}
|
||||||
|
if(verbose){System.out.println("Removing sequences with disparate occupancies and read counts");}
|
||||||
|
filterByOccupancyAndReadCount(allAlphas, alphaReadCounts, readDepth);
|
||||||
|
filterByOccupancyAndReadCount(allBetas, betaReadCounts, readDepth);
|
||||||
|
if(verbose){System.out.println("Sequences removed");}
|
||||||
|
}
|
||||||
int pairableAlphaCount = allAlphas.size();
|
int pairableAlphaCount = allAlphas.size();
|
||||||
if(verbose){System.out.println("Remaining alphas count: " + pairableAlphaCount);}
|
if(verbose){System.out.println("Remaining alphas count: " + pairableAlphaCount);}
|
||||||
int pairableBetaCount = allBetas.size();
|
int pairableBetaCount = allBetas.size();
|
||||||
@@ -676,6 +688,21 @@ public class Simulator implements GraphModificationFunctions {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static void filterByOccupancyAndReadCount(Map<String, Integer> sequences,
|
||||||
|
Map<String, Integer> sequenceReadCounts, int readDepth) {
|
||||||
|
List<String> noise = new ArrayList<>();
|
||||||
|
for(String k : sequences.keySet()){
|
||||||
|
//occupancy times read depth should be more than half the sequence read count if the read error rate is low
|
||||||
|
Integer threshold = (sequences.get(k) * readDepth) / 2;
|
||||||
|
if(sequenceReadCounts.get(k) < threshold) {
|
||||||
|
noise.add(k);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for(String k : noise) {
|
||||||
|
sequences.remove(k);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
//Counts the well occupancy of the row peptides and column peptides into given maps, and
|
//Counts the well occupancy of the row peptides and column peptides into given maps, and
|
||||||
//fills weights in the given 2D array
|
//fills weights in the given 2D array
|
||||||
private static void countSequencesAndFillMatrix(Plate samplePlate,
|
private static void countSequencesAndFillMatrix(Plate samplePlate,
|
||||||
|
|||||||
Reference in New Issue
Block a user