BiGpairSEQ/src/main/java/Simulator.java

import org.jgrapht.alg.interfaces.MatchingAlgorithm;
import org.jgrapht.alg.matching.MaximumWeightBipartiteMatching;
import org.jgrapht.generate.SimpleWeightedBipartiteGraphMatrixGenerator;
import org.jgrapht.graph.DefaultWeightedEdge;
import org.jgrapht.graph.SimpleWeightedGraph;
import org.jheaps.tree.FibonacciHeap;
import org.jheaps.tree.PairingHeap;

import java.math.BigDecimal;
import java.math.MathContext;
import java.text.NumberFormat;
import java.time.Instant;
import java.time.Duration;
import java.util.*;
import java.util.stream.IntStream;

import static java.lang.Float.*;

//NOTE: "sequence" in method and variable names refers to a peptide sequence from a simulated T cell
public class Simulator implements GraphModificationFunctions {


    //Make the graph needed for matching sequences.
    //sourceVertexIndices and targetVertexIndices are indices within the cell to use as for the two sets of vertices
    //in the bipartite graph. "Source" and "target" are JGraphT terms for the two vertices an edge touches,
    //even if not directed.
    public static GraphWithMapData makeGraph(CellSample cellSample, Plate samplePlate, boolean verbose) {
        Instant start = Instant.now();
        List<Integer[]> distinctCells = cellSample.getCells();
        int[] alphaIndices = {SequenceType.CDR3_ALPHA.ordinal()};
        int[] betaIndices = {SequenceType.CDR3_BETA.ordinal()};

        int numWells = samplePlate.getSize();

        if(verbose){System.out.println("Making cell maps");}
        //HashMap keyed to Alphas, values Betas
        Map<Integer, Integer> distCellsMapAlphaKey = makeSequenceToSequenceMap(distinctCells, 0, 1);
        if(verbose){System.out.println("Cell maps made");}

        if(verbose){System.out.println("Making well maps");}

        Map<Integer, Integer> allAlphas = samplePlate.assayWellsSequenceS(alphaIndices);
        Map<Integer, Integer> allBetas = samplePlate.assayWellsSequenceS(betaIndices);
        int alphaCount = allAlphas.size();
        if(verbose){System.out.println("All alphas count: " + alphaCount);}
        int betaCount = allBetas.size();
        if(verbose){System.out.println("All betas count: " + betaCount);}
        if(verbose){System.out.println("Well maps made");}


//        if(verbose){System.out.println("Removing singleton sequences and sequences present in all wells.");}
//        filterByOccupancyThresholds(allAlphas, 2, numWells - 1);
//        filterByOccupancyThresholds(allBetas, 2, numWells - 1);
//        if(verbose){System.out.println("Sequences removed");}
        int pairableAlphaCount = allAlphas.size();
        if(verbose){System.out.println("Remaining alphas count: " + pairableAlphaCount);}
        int pairableBetaCount = allBetas.size();
        if(verbose){System.out.println("Remaining betas count: " + pairableBetaCount);}

        if(verbose){System.out.println("Making vertex maps");}
        //For the SimpleWeightedBipartiteGraphMatrixGenerator, all vertices must have
        //distinct numbers associated with them. Since I'm using a 2D array, that means
        //distinct indices between the rows and columns. vertexStartValue lets me track where I switch
        //from numbering rows to columns, so I can assign unique numbers to every vertex, and then
        //subtract the vertexStartValue from betas to use their vertex labels as array indices
        Integer vertexStartValue = 0;
        //keys are sequential integer vertices, values are alphas
        Map<Integer, Integer> plateVtoAMap = makeVertexToSequenceMap(allAlphas, vertexStartValue);
        //new start value for vertex to beta map should be one more than final vertex value in alpha map
        vertexStartValue += plateVtoAMap.size();
        //keys are sequential integers vertices, values are betas
        Map<Integer, Integer> plateVtoBMap = makeVertexToSequenceMap(allBetas, vertexStartValue);
        //keys are alphas, values are sequential integer vertices from previous map
        Map<Integer, Integer> plateAtoVMap = invertVertexMap(plateVtoAMap);
        //keys are betas, values are sequential integer vertices from previous map
        Map<Integer, Integer> plateBtoVMap = invertVertexMap(plateVtoBMap);
        if(verbose){System.out.println("Vertex maps made");}

        //make adjacency matrix for bipartite graph generator
        //(technically this is only 1/4 of an adjacency matrix, but that's all you need
        //for a bipartite graph, and all the SimpleWeightedBipartiteGraphMatrixGenerator class expects.)
        if(verbose){System.out.println("Creating adjacency matrix");}
        //Count how many wells each alpha sequence appears in
        Map<Integer, Integer> alphaWellCounts = new HashMap<>();
        //count how many wells each beta sequence appears in
        Map<Integer, Integer> betaWellCounts = new HashMap<>();
        //the adjacency matrix to be used by the graph generator
        double[][] weights = new double[plateVtoAMap.size()][plateVtoBMap.size()];
        countSequencesAndFillMatrix(samplePlate, allAlphas, allBetas, plateAtoVMap,
                plateBtoVMap, alphaIndices, betaIndices, alphaWellCounts, betaWellCounts, weights);
        if(verbose){System.out.println("Matrix created");}

        //create bipartite graph
        if(verbose){System.out.println("Creating graph");}
        //the graph object
        SimpleWeightedGraph<Vertex, DefaultWeightedEdge> graph =
                new SimpleWeightedGraph<>(DefaultWeightedEdge.class);
        //the graph generator
        SimpleWeightedBipartiteGraphMatrixGenerator graphGenerator = new SimpleWeightedBipartiteGraphMatrixGenerator();
        //the list of alpha vertices
        //List<Integer> alphaVertices = new ArrayList<>(plateVtoAMap.keySet()); //This will work because LinkedHashMap preserves order of entry
        List<Vertex> alphaVertices = new ArrayList<>();
        //start with map of all alphas mapped to vertex values, get occupancy from the alphaWellCounts map
        for (Integer seq : plateAtoVMap.keySet()) {
            Vertex alphaVertex = new Vertex(SequenceType.CDR3_ALPHA, seq, alphaWellCounts.get(seq), plateAtoVMap.get(seq));
            alphaVertices.add(alphaVertex);
        }
        graphGenerator.first(alphaVertices);
        //the list of beta vertices
        //List<Integer> betaVertices = new ArrayList<>(plateVtoBMap.keySet());//This will work because LinkedHashMap preserves order of entry
        List<Vertex> betaVertices = new ArrayList<>();
        for (Integer seq : plateBtoVMap.keySet()) {
            Vertex betaVertex = new Vertex(SequenceType.CDR3_BETA, seq, betaWellCounts.get(seq), plateBtoVMap.get(seq));
            betaVertices.add(betaVertex);
        }
        graphGenerator.second(betaVertices);
        //use adjacency matrix of weight created previously
        graphGenerator.weights(weights);
        graphGenerator.generateGraph(graph);
        if(verbose){System.out.println("Graph created");}

        Instant stop = Instant.now();
        Duration time = Duration.between(start, stop);

        //create GraphWithMapData object
        GraphWithMapData output = new GraphWithMapData(graph, numWells, samplePlate.getPopulations(), distCellsMapAlphaKey, time);
        //Set source file name in graph to name of sample plate
        output.setSourceFilename(samplePlate.getFilename());
        //return GraphWithMapData object
        return output;
    }

    //match CDR3s.
    public static MatchingResult matchCDR3s(GraphWithMapData data, String dataFilename, Integer lowThreshold,
                                            Integer highThreshold, Integer maxOccupancyDifference,
                                            Integer minOverlapPercent, boolean verbose) {
        Instant start = Instant.now();
        SimpleWeightedGraph<Vertex, DefaultWeightedEdge> graph = data.getGraph();
        Map<Vertex[], Integer> removedEdges = new HashMap<>();
        boolean saveEdges = BiGpairSEQ.cacheGraph();
        int numWells = data.getNumWells();
        //Integer alphaCount = data.getAlphaCount();
        //Integer betaCount = data.getBetaCount();
        Map<Integer, Integer> distCellsMapAlphaKey = data.getDistCellsMapAlphaKey();
        Set<Vertex> alphas = new HashSet<>();
        Set<Vertex> betas = new HashSet<>();
        for(Vertex v: graph.vertexSet()) {
            if (SequenceType.CDR3_ALPHA.equals(v.getType())){
                alphas.add(v);
            }
            else {
                betas.add(v);
            }
        }
        Integer alphaCount = alphas.size();
        Integer betaCount = betas.size();

        //remove edges with weights outside given overlap thresholds, add those to removed edge list
        if(verbose){System.out.println("Eliminating edges with weights outside overlap threshold values");}
        removedEdges.putAll(GraphModificationFunctions.filterByOverlapThresholds(graph, lowThreshold, highThreshold, saveEdges));
        if(verbose){System.out.println("Over- and under-weight edges removed");}

        //remove edges between vertices with too small an overlap size, add those to removed edge list
        if(verbose){System.out.println("Eliminating edges with weights less than " + minOverlapPercent.toString() +
                " percent of vertex occupancy value.");}
        removedEdges.putAll(GraphModificationFunctions.filterByOverlapPercent(graph, minOverlapPercent, saveEdges));
        if(verbose){System.out.println("Edges with weights too far below a vertex occupancy value removed");}

        //Filter by relative occupancy
        if(verbose){System.out.println("Eliminating edges between vertices with occupancy difference > "
                + maxOccupancyDifference);}
        removedEdges.putAll(GraphModificationFunctions.filterByRelativeOccupancy(graph, maxOccupancyDifference, saveEdges));
        if(verbose){System.out.println("Edges between vertices of with excessively different occupancy values " +
                "removed");}

        //Find Maximum Weighted Matching
        //using jheaps library class PairingHeap for improved efficiency
        if(verbose){System.out.println("Finding maximum weighted matching");}
        MaximumWeightBipartiteMatching maxWeightMatching;
        //Use correct heap type for priority queue
        String heapType = BiGpairSEQ.getPriorityQueueHeapType();
        switch (heapType) {
            case "PAIRING" -> {
                maxWeightMatching = new MaximumWeightBipartiteMatching(graph,
                        alphas,
                        betas,
                        i -> new PairingHeap(Comparator.naturalOrder()));
            }
            case "FIBONACCI" -> {
                maxWeightMatching = new MaximumWeightBipartiteMatching(graph,
                        alphas,
                        betas,
                        i -> new FibonacciHeap(Comparator.naturalOrder()));
            }
            default -> {
                maxWeightMatching = new MaximumWeightBipartiteMatching(graph,
                        alphas,
                        betas);
            }
        }
        //get the matching
        MatchingAlgorithm.Matching<String, DefaultWeightedEdge> graphMatching = maxWeightMatching.getMatching();
        if(verbose){System.out.println("Matching completed");}
        Instant stop = Instant.now();

        //Header for CSV file
        List<String> header = new ArrayList<>();
        header.add("Alpha");
        header.add("Alpha well count");
        header.add("Beta");
        header.add("Beta well count");
        header.add("Overlap well count");
        header.add("Matched correctly?");
        header.add("P-value");

        //Results for csv file
        List<List<String>> allResults = new ArrayList<>();
        NumberFormat nf = NumberFormat.getInstance(Locale.US);
        MathContext mc = new MathContext(3);
        Iterator<DefaultWeightedEdge> weightIter = graphMatching.iterator();
        DefaultWeightedEdge e;
        int trueCount = 0;
        int falseCount = 0;
        boolean check;
        Map<Integer, Integer> matchMap = new HashMap<>();
        while(weightIter.hasNext()) {
            e = weightIter.next();
            Vertex source = graph.getEdgeSource(e);
            Vertex target = graph.getEdgeTarget(e);
            //Integer source = graph.getEdgeSource(e);
            //Integer target = graph.getEdgeTarget(e);
            //The match map is all matches found, not just true matches!
            matchMap.put(source.getSequence(), target.getSequence());
            check = target.getSequence().equals(distCellsMapAlphaKey.get(source.getSequence()));
            //check = plateVtoBMap.get(target).equals(distCellsMapAlphaKey.get(plateVtoAMap.get(source)));
            if(check) {
                trueCount++;
            }
            else {
                falseCount++;
            }
            List<String> result = new ArrayList<>();
            //alpha sequence
            result.add(source.getSequence().toString());
            //alpha well count
            result.add(source.getOccupancy().toString());
            //beta sequence
            result.add(target.getSequence().toString());
            //beta well count
            result.add(target.getOccupancy().toString());
            //overlap count
            result.add(Double.toString(graph.getEdgeWeight(e)));
            result.add(Boolean.toString(check));
            double pValue = Equations.pValue(numWells, source.getOccupancy(),
                    target.getOccupancy(), graph.getEdgeWeight(e));
            BigDecimal pValueTrunc = new BigDecimal(pValue, mc);
            result.add(pValueTrunc.toString());
            allResults.add(result);
        }

        //Metadata comments for CSV file
        String algoType = "LEDA book with heap: " + heapType;
        int min = Math.min(alphaCount, betaCount);
        //matching weight
        BigDecimal totalMatchingWeight = maxWeightMatching.getMatchingWeight();
        //rate of attempted matching
        double attemptRate = (double) (trueCount + falseCount) / min;
        BigDecimal attemptRateTrunc = new BigDecimal(attemptRate, mc);
        //rate of pairing error
        double pairingErrorRate = (double) falseCount / (trueCount + falseCount);
        BigDecimal pairingErrorRateTrunc;
        if(Double.isFinite(pairingErrorRate)) {
            pairingErrorRateTrunc = new BigDecimal(pairingErrorRate, mc);
        }
        else{
            pairingErrorRateTrunc = new BigDecimal(-1, mc);
        }
        //get list of well populations
        Integer[] wellPopulations = data.getWellPopulations();
        //make string out of populations list
        StringBuilder populationsStringBuilder = new StringBuilder();
        populationsStringBuilder.append(wellPopulations[0].toString());
        for(int i = 1; i < wellPopulations.length; i++){
            populationsStringBuilder.append(", ");
            populationsStringBuilder.append(wellPopulations[i].toString());
        }
        String wellPopulationsString = populationsStringBuilder.toString();
        //total simulation time
        Duration time = Duration.between(start, stop);
        time = time.plus(data.getTime());

        Map<String, String> metadata = new LinkedHashMap<>();
        metadata.put("sample plate filename", data.getSourceFilename());
        metadata.put("graph filename", dataFilename);
        metadata.put("algorithm type", algoType);
        metadata.put("matching weight", totalMatchingWeight.toString());
        metadata.put("well populations", wellPopulationsString);
        metadata.put("total alphas found", alphaCount.toString());
        metadata.put("total betas found", betaCount.toString());
        metadata.put("high overlap threshold", highThreshold.toString());
        metadata.put("low overlap threshold", lowThreshold.toString());
        metadata.put("minimum overlap percent", minOverlapPercent.toString());
        metadata.put("maximum occupancy difference", maxOccupancyDifference.toString());
        metadata.put("pairing attempt rate", attemptRateTrunc.toString());
        metadata.put("correct pairing count", Integer.toString(trueCount));
        metadata.put("incorrect pairing count", Integer.toString(falseCount));
        metadata.put("pairing error rate", pairingErrorRateTrunc.toString());
        metadata.put("simulation time (seconds)", nf.format(time.toSeconds()));
        //create MatchingResult object
        MatchingResult output = new MatchingResult(metadata, header, allResults, matchMap, time);
        if(verbose){
            for(String s: output.getComments()){
                System.out.println(s);
            }
        }

        if(saveEdges) {
            //put the removed edges back on the graph
            System.out.println("Restoring removed edges to graph.");
            GraphModificationFunctions.addRemovedEdges(graph, removedEdges);
        }
        //return MatchingResult object
        return output;
    }

    //Commented out CDR1 matching until it's time to re-implement it
//    //Simulated matching of CDR1s to CDR3s. Requires MatchingResult from prior run of matchCDR3s.
//    public static MatchingResult[] matchCDR1s(List<Integer[]> distinctCells,
//                                  Plate samplePlate, Integer lowThreshold,
//                                            Integer highThreshold, MatchingResult priorResult){
//        Instant start = Instant.now();
//        Duration previousTime = priorResult.getTime();
//        Map<Integer, Integer> previousMatches = priorResult.getMatchMap();
//        int numWells = samplePlate.getSize();
//        int[] cdr3Indices = {cdr3AlphaIndex, cdr3BetaIndex};
//        int[] cdr1Indices = {cdr1AlphaIndex, cdr1BetaIndex};
//
//        System.out.println("Making previous match maps");
//        Map<Integer, Integer> cdr3AtoBMap = previousMatches;
//        Map<Integer, Integer> cdr3BtoAMap = invertVertexMap(cdr3AtoBMap);
//        System.out.println("Previous match maps made");
//
//        System.out.println("Making cell maps");
//        Map<Integer, Integer> alphaCDR3toCDR1Map = makeSequenceToSequenceMap(distinctCells, cdr3AlphaIndex, cdr1AlphaIndex);
//        Map<Integer, Integer> betaCDR3toCDR1Map = makeSequenceToSequenceMap(distinctCells, cdr3BetaIndex, cdr1BetaIndex);
//        System.out.println("Cell maps made");
//
//        System.out.println("Making well maps");
//        Map<Integer, Integer> allCDR3s = samplePlate.assayWellsSequenceS(cdr3Indices);
//        Map<Integer, Integer> allCDR1s = samplePlate.assayWellsSequenceS(cdr1Indices);
//        int CDR3Count = allCDR3s.size();
//        System.out.println("all CDR3s count: " + CDR3Count);
//        int CDR1Count = allCDR1s.size();
//        System.out.println("all CDR1s count: " + CDR1Count);
//        System.out.println("Well maps made");
//
//        System.out.println("Removing unpaired CDR3s from well maps");
//        List<Integer> unpairedCDR3s = new ArrayList<>();
//        for(Integer i: allCDR3s.keySet()){
//            if(!(cdr3AtoBMap.containsKey(i) || cdr3BtoAMap.containsKey(i))){
//                unpairedCDR3s.add(i);
//            }
//        }
//        for(Integer i: unpairedCDR3s){
//            allCDR3s.remove(i);
//        }
//        System.out.println("Unpaired CDR3s removed.");
//        System.out.println("Remaining CDR3 count: " + allCDR3s.size());
//
//        System.out.println("Removing below-minimum-overlap-threshold and saturating-occupancy CDR1s");
//        filterByOccupancyThreshold(allCDR1s, lowThreshold, numWells - 1);
//        System.out.println("CDR1s removed.");
//        System.out.println("Remaining CDR1 count: " + allCDR1s.size());
//
//        System.out.println("Making vertex maps");
//
//        //For the SimpleWeightedBipartiteGraphMatrixGenerator, all vertices must have
//        // distinct numbers associated with them. Since I'm using a 2D array, that means
//        // distinct indices between the rows and columns. vertexStartValue lets me track where I switch
//        // from numbering rows to columns, so I can assign unique numbers to every vertex, and then
//        // subtract the vertexStartValue from CDR1s to use their vertex labels as array indices
//        Integer vertexStartValue = 0;
//        //keys are sequential integer vertices, values are CDR3s
//        Map<Integer, Integer> plateVtoCDR3Map = makeVertexToSequenceMap(allCDR3s, vertexStartValue);
//        //New start value for vertex to CDR1 map should be one more than final vertex value in CDR3 map
//        vertexStartValue += plateVtoCDR3Map.size();
//        //keys are sequential integers vertices, values are CDR1s
//        Map<Integer, Integer> plateVtoCDR1Map = makeVertexToSequenceMap(allCDR1s, vertexStartValue);
//        //keys are CDR3s, values are sequential integer vertices from previous map
//        Map<Integer, Integer> plateCDR3toVMap = invertVertexMap(plateVtoCDR3Map);
//        //keys are CDR1s, values are sequential integer vertices from previous map
//        Map<Integer, Integer> plateCDR1toVMap = invertVertexMap(plateVtoCDR1Map);
//        System.out.println("Vertex maps made");
//
//        System.out.println("Creating adjacency matrix");
//        //Count how many wells each CDR3 appears in
//        Map<Integer, Integer> cdr3WellCounts = new HashMap<>();
//        //count how many wells each CDR1 appears in
//        Map<Integer, Integer> cdr1WellCounts = new HashMap<>();
//        //add edges, where weights are number of wells the peptides share in common.
//        //If this is too slow, can make a 2d array and use the SimpleWeightedGraphMatrixGenerator class
//        Map<Integer, Integer> wellNCDR3s = null;
//        Map<Integer, Integer> wellNCDR1s = null;
//        double[][] weights = new double[plateVtoCDR3Map.size()][plateVtoCDR1Map.size()];
//        countSequencesAndFillMatrix(samplePlate, allCDR3s, allCDR1s, plateCDR3toVMap, plateCDR1toVMap,
//                cdr3Indices, cdr1Indices, cdr3WellCounts, cdr1WellCounts, weights);
//        System.out.println("Matrix created");
//
//        System.out.println("Creating graph");
//        SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph =
//                new SimpleWeightedGraph<>(DefaultWeightedEdge.class);
//
//        SimpleWeightedBipartiteGraphMatrixGenerator graphGenerator = new SimpleWeightedBipartiteGraphMatrixGenerator();
//        List<Integer> cdr3Vertices = new ArrayList<>(plateVtoCDR3Map.keySet()); //This will work because LinkedHashMap preserves order of entry
//        graphGenerator.first(cdr3Vertices);
//        List<Integer> cdr1Vertices = new ArrayList<>(plateVtoCDR1Map.keySet());
//        graphGenerator.second(cdr1Vertices); //This will work because LinkedHashMap preserves order of entry
//        graphGenerator.weights(weights);
//        graphGenerator.generateGraph(graph);
//        System.out.println("Graph created");
//
//        System.out.println("Removing edges outside of weight thresholds");
//        filterByOccupancyThreshold(graph, lowThreshold, highThreshold);
//        System.out.println("Over- and under-weight edges set to 0.0");
//
//        System.out.println("Finding first maximum weighted matching");
//        MaximumWeightBipartiteMatching firstMaxWeightMatching =
//                new MaximumWeightBipartiteMatching(graph, plateVtoCDR3Map.keySet(), plateVtoCDR1Map.keySet());
//        MatchingAlgorithm.Matching<String, DefaultWeightedEdge> graphMatching = firstMaxWeightMatching.getMatching();
//        System.out.println("First maximum weighted matching found");
//
//
//        //first processing run
//        Map<Integer, Integer> firstMatchCDR3toCDR1Map = new HashMap<>();
//        Iterator<DefaultWeightedEdge> weightIter = graphMatching.iterator();
//        DefaultWeightedEdge e;
//        while(weightIter.hasNext()){
//            e = weightIter.next();
////            if(graph.getEdgeWeight(e) < lowThreshold || graph.getEdgeWeight(e) > highThreshold) {
////                continue;
////            }
//            Integer source = graph.getEdgeSource(e);
//            Integer target = graph.getEdgeTarget(e);
//            firstMatchCDR3toCDR1Map.put(plateVtoCDR3Map.get(source), plateVtoCDR1Map.get(target));
//        }
//        System.out.println("First pass matches: " + firstMatchCDR3toCDR1Map.size());
//
//        System.out.println("Removing edges from first maximum weighted matching");
//        //zero out the edge weights in the matching
//        weightIter = graphMatching.iterator();
//        while(weightIter.hasNext()){
//            graph.removeEdge(weightIter.next());
//        }
//        System.out.println("Edges removed");
//
//        //Generate a new matching
//        System.out.println("Finding second maximum weighted matching");
//        MaximumWeightBipartiteMatching secondMaxWeightMatching =
//                new MaximumWeightBipartiteMatching(graph, plateVtoCDR3Map.keySet(), plateVtoCDR1Map.keySet());
//        graphMatching = secondMaxWeightMatching.getMatching();
//        System.out.println("Second maximum weighted matching found");
//
//
//        //second processing run
//        Map<Integer, Integer> secondMatchCDR3toCDR1Map = new HashMap<>();
//        weightIter = graphMatching.iterator();
//        while(weightIter.hasNext()){
//            e = weightIter.next();
////            if(graph.getEdgeWeight(e) < lowThreshold || graph.getEdgeWeight(e) > highThreshold) {
////                continue;
////            }
//            Integer source = graph.getEdgeSource(e);
////            if(!(CDR3AtoBMap.containsKey(source) || CDR3BtoAMap.containsKey(source))){
////                continue;
////            }
//            Integer target = graph.getEdgeTarget(e);
//            secondMatchCDR3toCDR1Map.put(plateVtoCDR3Map.get(source), plateVtoCDR1Map.get(target));
//        }
//        System.out.println("Second pass matches: " + secondMatchCDR3toCDR1Map.size());
//
//        System.out.println("Mapping first pass CDR3 alpha/beta pairs");
//        //get linked map for first matching attempt
//        Map<Integer, Integer> firstMatchesMap = new LinkedHashMap<>();
//        for(Integer alphaCDR3: cdr3AtoBMap.keySet()) {
//            if (!(firstMatchCDR3toCDR1Map.containsKey(alphaCDR3))) {
//                continue;
//            }
//            Integer betaCDR3 = cdr3AtoBMap.get(alphaCDR3);
//            if (!(firstMatchCDR3toCDR1Map.containsKey(betaCDR3))) {
//                continue;
//            }
//            firstMatchesMap.put(alphaCDR3, firstMatchCDR3toCDR1Map.get(alphaCDR3));
//            firstMatchesMap.put(betaCDR3, firstMatchCDR3toCDR1Map.get(betaCDR3));
//        }
//        System.out.println("First pass CDR3 alpha/beta pairs mapped");
//
//        System.out.println("Mapping second pass CDR3 alpha/beta pairs.");
//        System.out.println("Finding CDR3 pairs that swapped CDR1 matches between first pass and second pass.");
//        //Look for matches that simply swapped already-matched alpha and beta CDR3s
//        Map<Integer, Integer> dualMatchesMap = new LinkedHashMap<>();
//        for(Integer alphaCDR3: cdr3AtoBMap.keySet()) {
//            if (!(firstMatchCDR3toCDR1Map.containsKey(alphaCDR3) && secondMatchCDR3toCDR1Map.containsKey(alphaCDR3))) {
//                continue;
//            }
//            Integer betaCDR3 = cdr3AtoBMap.get(alphaCDR3);
//            if (!(firstMatchCDR3toCDR1Map.containsKey(betaCDR3) && secondMatchCDR3toCDR1Map.containsKey(betaCDR3))) {
//                continue;
//            }
//            if(firstMatchCDR3toCDR1Map.get(alphaCDR3).equals(secondMatchCDR3toCDR1Map.get(betaCDR3))){
//                if(firstMatchCDR3toCDR1Map.get(betaCDR3).equals(secondMatchCDR3toCDR1Map.get(alphaCDR3))){
//                    dualMatchesMap.put(alphaCDR3, firstMatchCDR3toCDR1Map.get(alphaCDR3));
//                    dualMatchesMap.put(betaCDR3, firstMatchCDR3toCDR1Map.get(betaCDR3));
//                }
//            }
//        }
//        System.out.println("Second pass mapping made. Dual CDR3/CDR1 pairings found.");
//
//        Instant stop = Instant.now();
//        //results for first map
//        System.out.println("RESULTS FOR FIRST PASS MATCHING");
//        List<List<String>> allResults = new ArrayList<>();
//        Integer trueCount = 0;
//        Iterator iter = firstMatchesMap.keySet().iterator();
//
//        while(iter.hasNext()){
//            Boolean proven = false;
//            List<String> tmp = new ArrayList<>();
//            tmp.add(iter.next().toString());
//            tmp.add(iter.next().toString());
//            tmp.add(firstMatchesMap.get(Integer.valueOf(tmp.get(0))).toString());
//            tmp.add(firstMatchesMap.get(Integer.valueOf(tmp.get(1))).toString());
//            if(alphaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(0))).equals(Integer.valueOf(tmp.get(2)))){
//                if(betaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(1))).equals(Integer.valueOf(tmp.get(3)))){
//                    proven = true;
//                }
//            }
//            else if(alphaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(0))).equals(Integer.valueOf(tmp.get(3)))){
//                if(betaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(1))).equals(Integer.valueOf(tmp.get(2)))){
//                    proven = true;
//                }
//            }
//            tmp.add(proven.toString());
//            allResults.add(tmp);
//            if(proven){
//                trueCount++;
//            }
//        }
//
//        List<String> comments = new ArrayList<>();
//        comments.add("Plate size: " + samplePlate.getSize() + " wells");
//        comments.add("Previous pairs found: " + previousMatches.size());
//        comments.add("CDR1 matches attempted: " + allResults.size());
//        double attemptRate = (double) allResults.size() / previousMatches.size();
//        comments.add("Matching attempt rate: " + attemptRate);
//        comments.add("Number of correct matches: " + trueCount);
//        double correctRate = (double) trueCount / allResults.size();
//        comments.add("Correct matching rate: " + correctRate);
//        NumberFormat nf = NumberFormat.getInstance(Locale.US);
//        Duration time = Duration.between(start, stop);
//        time = time.plus(previousTime);
//        comments.add("Simulation time: " + nf.format(time.toSeconds()) + " seconds");
//        for(String s: comments){
//            System.out.println(s);
//        }
//
//
//
//        List<String> headers = new ArrayList<>();
//        headers.add("CDR3 alpha");
//        headers.add("CDR3 beta");
//        headers.add("first matched CDR1");
//        headers.add("second matched CDR1");
//        headers.add("Correct match?");
//
//        MatchingResult firstTest = new MatchingResult(samplePlate.getSourceFileName(),
//                comments, headers, allResults, dualMatchesMap, time);
//
//        //results for dual map
//        System.out.println("RESULTS FOR SECOND PASS MATCHING");
//        allResults = new ArrayList<>();
//        trueCount = 0;
//        iter = dualMatchesMap.keySet().iterator();
//        while(iter.hasNext()){
//            Boolean proven = false;
//            List<String> tmp = new ArrayList<>();
//            tmp.add(iter.next().toString());
//            tmp.add(iter.next().toString());
//            tmp.add(dualMatchesMap.get(Integer.valueOf(tmp.get(0))).toString());
//            tmp.add(dualMatchesMap.get(Integer.valueOf(tmp.get(1))).toString());
//            if(alphaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(0))).equals(Integer.valueOf(tmp.get(2)))){
//                if(betaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(1))).equals(Integer.valueOf(tmp.get(3)))){
//                    proven = true;
//                }
//            }
//            else if(alphaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(0))).equals(Integer.valueOf(tmp.get(3)))){
//                if(betaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(1))).equals(Integer.valueOf(tmp.get(2)))){
//                    proven = true;
//                }
//            }
//            tmp.add(proven.toString());
//            allResults.add(tmp);
//            if(proven){
//                trueCount++;
//            }
//        }
//
//        comments = new ArrayList<>();
//        comments.add("Plate size: " + samplePlate.getSize() + " wells");
//        comments.add("Previous pairs found: " + previousMatches.size());
//        comments.add("High overlap threshold: " + highThreshold);
//        comments.add("Low overlap threshold: " + lowThreshold);
//        comments.add("CDR1 matches attempted: " + allResults.size());
//        attemptRate = (double) allResults.size() / previousMatches.size();
//        comments.add("Matching attempt rate: " + attemptRate);
//        comments.add("Number of correct matches: " + trueCount);
//        correctRate = (double) trueCount / allResults.size();
//        comments.add("Correct matching rate: " + correctRate);
//        comments.add("Simulation time: " + nf.format(time.toSeconds()) + " seconds");
//
//        for(String s: comments){
//            System.out.println(s);
//        }
//
//        System.out.println("Simulation time: " + nf.format(time.toSeconds()) + " seconds");
//        MatchingResult dualTest = new MatchingResult(samplePlate.getSourceFileName(), comments, headers,
//                allResults, dualMatchesMap, time);
//        MatchingResult[] output = {firstTest, dualTest};
//        return output;
//    }

    //Remove sequences based on occupancy
    public static void filterByOccupancyThresholds(Map<Integer, Integer> wellMap, int low, int high){
        List<Integer> noise = new ArrayList<>();
        for(Integer k: wellMap.keySet()){
            if((wellMap.get(k) > high) || (wellMap.get(k) < low)){
                noise.add(k);
            }
        }
        for(Integer k: noise) {
            wellMap.remove(k);
        }
    }

    //Counts the well occupancy of the row peptides and column peptides into given maps, and
    //fills weights in the given 2D array
    private static void countSequencesAndFillMatrix(Plate samplePlate,
                                                    Map<Integer,Integer> allRowSequences,
                                                    Map<Integer,Integer> allColumnSequences,
                                                    Map<Integer,Integer> rowSequenceToVertexMap,
                                                    Map<Integer,Integer> columnSequenceToVertexMap,
                                                    int[] rowSequenceIndices,
                                                    int[] colSequenceIndices,
                                                    Map<Integer, Integer> rowSequenceCounts,
                                                    Map<Integer,Integer> columnSequenceCounts,
                                                    double[][] weights){
        Map<Integer, Integer> wellNRowSequences = null;
        Map<Integer, Integer> wellNColumnSequences = null;
        int vertexStartValue = rowSequenceToVertexMap.size();
        int numWells = samplePlate.getSize();
        for (int n = 0; n < numWells; n++) {
            wellNRowSequences = samplePlate.assayWellsSequenceS(n, rowSequenceIndices);
            for (Integer a : wellNRowSequences.keySet()) {
                if(allRowSequences.containsKey(a)){
                    rowSequenceCounts.merge(a, 1, (oldValue, newValue) -> oldValue + newValue);
                }
            }
            wellNColumnSequences = samplePlate.assayWellsSequenceS(n, colSequenceIndices);
            for (Integer b : wellNColumnSequences.keySet()) {
                if(allColumnSequences.containsKey(b)){
                    columnSequenceCounts.merge(b, 1, (oldValue, newValue) -> oldValue + newValue);
                }
            }
            for (Integer i : wellNRowSequences.keySet()) {
                if(allRowSequences.containsKey(i)){
                    for (Integer j : wellNColumnSequences.keySet()) {
                        if(allColumnSequences.containsKey(j)){
                            weights[rowSequenceToVertexMap.get(i)][columnSequenceToVertexMap.get(j) - vertexStartValue] += 1.0;
                        }
                    }
                }
            }

        }
    }

    private static Map<Integer, Integer> makeSequenceToSequenceMap(List<Integer[]> cells, int keySequenceIndex,
                                                                   int valueSequenceIndex){
        Map<Integer, Integer> keySequenceToValueSequenceMap = new HashMap<>();
        for (Integer[] cell : cells) {
           keySequenceToValueSequenceMap.put(cell[keySequenceIndex], cell[valueSequenceIndex]);
        }
        return keySequenceToValueSequenceMap;
    }

    private static Map<Integer, Integer> makeVertexToSequenceMap(Map<Integer, Integer> sequences, Integer startValue) {
        Map<Integer, Integer> map = new LinkedHashMap<>(); //LinkedHashMap to preserve order of entry
        Integer index = startValue; //is this necessary? I don't think I use this.
        for (Integer k: sequences.keySet()) {
            map.put(index, k);
            index++;
        }
        return map;
    }

    private static Map<Integer, Integer> invertVertexMap(Map<Integer, Integer> map) {
        Map<Integer, Integer> inverse = new HashMap<>();
        for (Integer k : map.keySet()) {
            inverse.put(map.get(k), k);
        }
        return inverse;
    }

}