From f7b3c133bf949ca177f9202d5b52bc2fdd873ddd Mon Sep 17 00:00:00 2001 From: eugenefischer <66030419+eugenefischer@users.noreply.github.com> Date: Mon, 26 Sep 2022 23:39:18 -0500 Subject: [PATCH] Add filtering based on occupancy/read count discrepancy --- src/main/java/Simulator.java | 35 +++++++++++++++++++++++++++++++---- 1 file changed, 31 insertions(+), 4 deletions(-) diff --git a/src/main/java/Simulator.java b/src/main/java/Simulator.java index 5b31a81..4ce0a06 100644 --- a/src/main/java/Simulator.java +++ b/src/main/java/Simulator.java @@ -63,10 +63,22 @@ public class Simulator implements GraphModificationFunctions { if(verbose){System.out.println("Well maps made");} //ideally we wouldn't do any graph pre-filtering. But sequences present in all wells add a huge number of edges to the graph and don't carry any signal value - if(verbose){System.out.println("Removing sequences present in all wells.");} - filterByOccupancyThresholds(allAlphas, 1, numWells - 1); - filterByOccupancyThresholds(allBetas, 1, numWells - 1); - if(verbose){System.out.println("Sequences removed");} + if (readDepth == 1) { + if(verbose){System.out.println("Removing sequences present in all wells.");} + filterByOccupancyThresholds(allAlphas, 1, numWells - 1); + filterByOccupancyThresholds(allBetas, 1, numWells - 1); + if(verbose){System.out.println("Sequences removed");} + } + else { + if(verbose){System.out.println("Removing sequences present in all wells.");} + filterByOccupancyThresholds(allAlphas, 1, numWells - 1); + filterByOccupancyThresholds(allBetas, 1, numWells - 1); + if(verbose){System.out.println("Sequences removed");} + if(verbose){System.out.println("Removing sequences with disparate occupancies and read counts");} + filterByOccupancyAndReadCount(allAlphas, alphaReadCounts, readDepth); + filterByOccupancyAndReadCount(allBetas, betaReadCounts, readDepth); + if(verbose){System.out.println("Sequences removed");} + } int pairableAlphaCount = allAlphas.size(); if(verbose){System.out.println("Remaining alphas count: " + pairableAlphaCount);} int pairableBetaCount = allBetas.size(); @@ -676,6 +688,21 @@ public class Simulator implements GraphModificationFunctions { } } + public static void filterByOccupancyAndReadCount(Map sequences, + Map sequenceReadCounts, int readDepth) { + List noise = new ArrayList<>(); + for(String k : sequences.keySet()){ + //occupancy times read depth should be more than half the sequence read count if the read error rate is low + Integer threshold = (sequences.get(k) * readDepth) / 2; + if(sequenceReadCounts.get(k) < threshold) { + noise.add(k); + } + } + for(String k : noise) { + sequences.remove(k); + } + } + //Counts the well occupancy of the row peptides and column peptides into given maps, and //fills weights in the given 2D array private static void countSequencesAndFillMatrix(Plate samplePlate,