Add filtering based on occupancy/read count discrepancy

This commit is contained in:
eugenefischer
2022-09-26 23:39:18 -05:00
parent 14fcfe1ff3
commit f7b3c133bf

View File

@@ -63,10 +63,22 @@ public class Simulator implements GraphModificationFunctions {
if(verbose){System.out.println("Well maps made");}
//ideally we wouldn't do any graph pre-filtering. But sequences present in all wells add a huge number of edges to the graph and don't carry any signal value
if (readDepth == 1) {
if(verbose){System.out.println("Removing sequences present in all wells.");}
filterByOccupancyThresholds(allAlphas, 1, numWells - 1);
filterByOccupancyThresholds(allBetas, 1, numWells - 1);
if(verbose){System.out.println("Sequences removed");}
}
else {
if(verbose){System.out.println("Removing sequences present in all wells.");}
filterByOccupancyThresholds(allAlphas, 1, numWells - 1);
filterByOccupancyThresholds(allBetas, 1, numWells - 1);
if(verbose){System.out.println("Sequences removed");}
if(verbose){System.out.println("Removing sequences with disparate occupancies and read counts");}
filterByOccupancyAndReadCount(allAlphas, alphaReadCounts, readDepth);
filterByOccupancyAndReadCount(allBetas, betaReadCounts, readDepth);
if(verbose){System.out.println("Sequences removed");}
}
int pairableAlphaCount = allAlphas.size();
if(verbose){System.out.println("Remaining alphas count: " + pairableAlphaCount);}
int pairableBetaCount = allBetas.size();
@@ -676,6 +688,21 @@ public class Simulator implements GraphModificationFunctions {
}
}
public static void filterByOccupancyAndReadCount(Map<String, Integer> sequences,
Map<String, Integer> sequenceReadCounts, int readDepth) {
List<String> noise = new ArrayList<>();
for(String k : sequences.keySet()){
//occupancy times read depth should be more than half the sequence read count if the read error rate is low
Integer threshold = (sequences.get(k) * readDepth) / 2;
if(sequenceReadCounts.get(k) < threshold) {
noise.add(k);
}
}
for(String k : noise) {
sequences.remove(k);
}
}
//Counts the well occupancy of the row peptides and column peptides into given maps, and
//fills weights in the given 2D array
private static void countSequencesAndFillMatrix(Plate samplePlate,