Add filtering based on occupancy/read count discrepancy
This commit is contained in:
@@ -63,10 +63,22 @@ public class Simulator implements GraphModificationFunctions {
|
||||
if(verbose){System.out.println("Well maps made");}
|
||||
|
||||
//ideally we wouldn't do any graph pre-filtering. But sequences present in all wells add a huge number of edges to the graph and don't carry any signal value
|
||||
if(verbose){System.out.println("Removing sequences present in all wells.");}
|
||||
filterByOccupancyThresholds(allAlphas, 1, numWells - 1);
|
||||
filterByOccupancyThresholds(allBetas, 1, numWells - 1);
|
||||
if(verbose){System.out.println("Sequences removed");}
|
||||
if (readDepth == 1) {
|
||||
if(verbose){System.out.println("Removing sequences present in all wells.");}
|
||||
filterByOccupancyThresholds(allAlphas, 1, numWells - 1);
|
||||
filterByOccupancyThresholds(allBetas, 1, numWells - 1);
|
||||
if(verbose){System.out.println("Sequences removed");}
|
||||
}
|
||||
else {
|
||||
if(verbose){System.out.println("Removing sequences present in all wells.");}
|
||||
filterByOccupancyThresholds(allAlphas, 1, numWells - 1);
|
||||
filterByOccupancyThresholds(allBetas, 1, numWells - 1);
|
||||
if(verbose){System.out.println("Sequences removed");}
|
||||
if(verbose){System.out.println("Removing sequences with disparate occupancies and read counts");}
|
||||
filterByOccupancyAndReadCount(allAlphas, alphaReadCounts, readDepth);
|
||||
filterByOccupancyAndReadCount(allBetas, betaReadCounts, readDepth);
|
||||
if(verbose){System.out.println("Sequences removed");}
|
||||
}
|
||||
int pairableAlphaCount = allAlphas.size();
|
||||
if(verbose){System.out.println("Remaining alphas count: " + pairableAlphaCount);}
|
||||
int pairableBetaCount = allBetas.size();
|
||||
@@ -676,6 +688,21 @@ public class Simulator implements GraphModificationFunctions {
|
||||
}
|
||||
}
|
||||
|
||||
public static void filterByOccupancyAndReadCount(Map<String, Integer> sequences,
|
||||
Map<String, Integer> sequenceReadCounts, int readDepth) {
|
||||
List<String> noise = new ArrayList<>();
|
||||
for(String k : sequences.keySet()){
|
||||
//occupancy times read depth should be more than half the sequence read count if the read error rate is low
|
||||
Integer threshold = (sequences.get(k) * readDepth) / 2;
|
||||
if(sequenceReadCounts.get(k) < threshold) {
|
||||
noise.add(k);
|
||||
}
|
||||
}
|
||||
for(String k : noise) {
|
||||
sequences.remove(k);
|
||||
}
|
||||
}
|
||||
|
||||
//Counts the well occupancy of the row peptides and column peptides into given maps, and
|
||||
//fills weights in the given 2D array
|
||||
private static void countSequencesAndFillMatrix(Plate samplePlate,
|
||||
|
||||
Reference in New Issue
Block a user