2 Commits

8 changed files with 719 additions and 656 deletions

View File

@@ -0,0 +1,14 @@
//main class. Only job is to choose which interface to use.
public class BiGpairSEQ {
private static void main(String[] args) {
if (args.length == 0) {
InteractiveInterface.startInteractive();
}
else {
//This will be uncommented when command line arguments are fixed.
//CommandLineInterface.startCLI(args);
System.out.println("Command line arguments are still being re-implemented.");
}
}
}

View File

@@ -0,0 +1,289 @@
import org.apache.commons.cli.*;
//Class for parsing options passed to program from command line
public class CommandLineInterface {
public static void startCLI(String[] args) {
//These command line options are a big mess
//Really, I don't think command line tools are expected to work in this many different modes
//making cells, making plates, and matching are the sort of thing that UNIX philosophy would say
//should be three separate programs.
//There might be a way to do it with option parameters?
//main options set
Options mainOptions = new Options();
Option makeCells = Option.builder("cells")
.longOpt("make-cells")
.desc("Makes a file of distinct cells")
.build();
Option makePlate = Option.builder("plates")
.longOpt("make-plates")
.desc("Makes a sample plate file")
.build();
Option makeGraph = Option.builder("graoh")
.longOpt("make-graph")
.desc("Makes a graph and data file")
.build();
Option matchCDR3 = Option.builder("match")
.longOpt("match-cdr3")
.desc("Match CDR3s. Requires a cell sample file and any number of plate files.")
.build();
OptionGroup mainGroup = new OptionGroup();
mainGroup.addOption(makeCells);
mainGroup.addOption(makePlate);
mainGroup.addOption(makeGraph);
mainGroup.addOption(matchCDR3);
mainGroup.setRequired(true);
mainOptions.addOptionGroup(mainGroup);
//Reuse clones of this for other options groups, rather than making it lots of times
Option outputFile = Option.builder("o")
.longOpt("output-file")
.hasArg()
.argName("filename")
.desc("Name of output file")
.build();
mainOptions.addOption(outputFile);
//Options cellOptions = new Options();
Option numCells = Option.builder("nc")
.longOpt("num-cells")
.desc("The number of distinct cells to generate")
.hasArg()
.argName("number")
.build();
mainOptions.addOption(numCells);
Option cdr1Freq = Option.builder("d")
.longOpt("peptide-diversity-factor")
.hasArg()
.argName("number")
.desc("Number of distinct CDR3s for every CDR1")
.build();
mainOptions.addOption(cdr1Freq);
//Option cellOutput = (Option) outputFile.clone();
//cellOutput.setRequired(true);
//mainOptions.addOption(cellOutput);
//Options plateOptions = new Options();
Option inputCells = Option.builder("c")
.longOpt("cell-file")
.hasArg()
.argName("file")
.desc("The cell sample file used for filling wells")
.build();
mainOptions.addOption(inputCells);
Option numWells = Option.builder("w")
.longOpt("num-wells")
.hasArg()
.argName("number")
.desc("The number of wells on each plate")
.build();
mainOptions.addOption(numWells);
Option numPlates = Option.builder("np")
.longOpt("num-plates")
.hasArg()
.argName("number")
.desc("The number of plate files to output")
.build();
mainOptions.addOption(numPlates);
//Option plateOutput = (Option) outputFile.clone();
//plateOutput.setRequired(true);
//plateOutput.setDescription("Prefix for plate output filenames");
//mainOptions.addOption(plateOutput);
Option plateErr = Option.builder("err")
.longOpt("drop-out-rate")
.hasArg()
.argName("number")
.desc("Well drop-out rate. (Probability between 0 and 1)")
.build();
mainOptions.addOption(plateErr);
Option plateConcentrations = Option.builder("t")
.longOpt("t-cells-per-well")
.hasArgs()
.argName("number 1, number 2, ...")
.desc("Number of T cells per well for each plate section")
.build();
mainOptions.addOption(plateConcentrations);
//different distributions, mutually exclusive
OptionGroup plateDistributions = new OptionGroup();
Option plateExp = Option.builder("exponential")
.desc("Sample from distinct cells with exponential frequency distribution")
.build();
plateDistributions.addOption(plateExp);
Option plateGaussian = Option.builder("gaussian")
.desc("Sample from distinct cells with gaussain frequency distribution")
.build();
plateDistributions.addOption(plateGaussian);
Option platePoisson = Option.builder("poisson")
.desc("Sample from distinct cells with poisson frequency distribution")
.build();
plateDistributions.addOption(platePoisson);
mainOptions.addOptionGroup(plateDistributions);
Option plateStdDev = Option.builder("stddev")
.desc("Standard deviation for gaussian distribution")
.hasArg()
.argName("number")
.build();
mainOptions.addOption(plateStdDev);
Option plateLambda = Option.builder("lambda")
.desc("Lambda for exponential distribution")
.hasArg()
.argName("number")
.build();
mainOptions.addOption(plateLambda);
//
// String cellFile, String filename, Double stdDev,
// Integer numWells, Integer numSections,
// Integer[] concentrations, Double dropOutRate
//
//Options matchOptions = new Options();
inputCells.setDescription("The cell sample file to be used for matching.");
mainOptions.addOption(inputCells);
Option lowThresh = Option.builder("low")
.longOpt("low-threshold")
.hasArg()
.argName("number")
.desc("Sets the minimum occupancy overlap to attempt matching")
.build();
mainOptions.addOption(lowThresh);
Option highThresh = Option.builder("high")
.longOpt("high-threshold")
.hasArg()
.argName("number")
.desc("Sets the maximum occupancy overlap to attempt matching")
.build();
mainOptions.addOption(highThresh);
Option occDiff = Option.builder("occdiff")
.longOpt("occupancy-difference")
.hasArg()
.argName("Number")
.desc("Maximum difference in alpha/beta occupancy to attempt matching")
.build();
mainOptions.addOption(occDiff);
Option overlapPer = Option.builder("ovper")
.longOpt("overlap-percent")
.hasArg()
.argName("Percent")
.desc("Minimum overlap percent to attempt matching (0 -100)")
.build();
mainOptions.addOption(overlapPer);
Option inputPlates = Option.builder("p")
.longOpt("plate-files")
.hasArgs()
.desc("Plate files to match")
.build();
mainOptions.addOption(inputPlates);
CommandLineParser parser = new DefaultParser();
try {
CommandLine line = parser.parse(mainOptions, args);
if(line.hasOption("match")){
//line = parser.parse(mainOptions, args);
//String cellFile = line.getOptionValue("c");
String graphFile = line.getOptionValue("g");
Integer lowThreshold = Integer.valueOf(line.getOptionValue(lowThresh));
Integer highThreshold = Integer.valueOf(line.getOptionValue(highThresh));
Integer occupancyDifference = Integer.valueOf(line.getOptionValue(occDiff));
Integer overlapPercent = Integer.valueOf(line.getOptionValue(overlapPer));
for(String plate: line.getOptionValues("p")) {
matchCDR3s(graphFile, lowThreshold, highThreshold, occupancyDifference, overlapPercent);
}
}
else if(line.hasOption("cells")){
//line = parser.parse(mainOptions, args);
String filename = line.getOptionValue("o");
Integer numDistCells = Integer.valueOf(line.getOptionValue("nc"));
Integer freq = Integer.valueOf(line.getOptionValue("d"));
makeCells(filename, numDistCells, freq);
}
else if(line.hasOption("plates")){
//line = parser.parse(mainOptions, args);
String cellFile = line.getOptionValue("c");
String filenamePrefix = line.getOptionValue("o");
Integer numWellsOnPlate = Integer.valueOf(line.getOptionValue("w"));
Integer numPlatesToMake = Integer.valueOf(line.getOptionValue("np"));
String[] concentrationsToUseString = line.getOptionValues("t");
Integer numSections = concentrationsToUseString.length;
Integer[] concentrationsToUse = new Integer[numSections];
for(int i = 0; i <numSections; i++){
concentrationsToUse[i] = Integer.valueOf(concentrationsToUseString[i]);
}
Double dropOutRate = Double.valueOf(line.getOptionValue("err"));
if(line.hasOption("exponential")){
Double lambda = Double.valueOf(line.getOptionValue("lambda"));
for(int i = 1; i <= numPlatesToMake; i++){
makePlateExp(cellFile, filenamePrefix + i, lambda, numWellsOnPlate,
concentrationsToUse,dropOutRate);
}
}
else if(line.hasOption("gaussian")){
Double stdDev = Double.valueOf(line.getOptionValue("std-dev"));
for(int i = 1; i <= numPlatesToMake; i++){
makePlate(cellFile, filenamePrefix + i, stdDev, numWellsOnPlate,
concentrationsToUse,dropOutRate);
}
}
else if(line.hasOption("poisson")){
for(int i = 1; i <= numPlatesToMake; i++){
makePlatePoisson(cellFile, filenamePrefix + i, numWellsOnPlate,
concentrationsToUse,dropOutRate);
}
}
}
}
catch (ParseException exp) {
System.err.println("Parsing failed. Reason: " + exp.getMessage());
}
}
//for calling from command line
public static void makeCells(String filename, Integer numCells, Integer cdr1Freq){
CellSample sample = Simulator.generateCellSample(numCells, cdr1Freq);
CellFileWriter writer = new CellFileWriter(filename, sample);
writer.writeCellsToFile();
}
public static void makePlateExp(String cellFile, String filename, Double lambda,
Integer numWells, Integer[] concentrations, Double dropOutRate){
CellFileReader cellReader = new CellFileReader(cellFile);
Plate samplePlate = new Plate(numWells, dropOutRate, concentrations);
samplePlate.fillWellsExponential(cellReader.getFilename(), cellReader.getCells(), lambda);
PlateFileWriter writer = new PlateFileWriter(filename, samplePlate);
writer.writePlateFile();
}
private static void makePlatePoisson(String cellFile, String filename, Integer numWells,
Integer[] concentrations, Double dropOutRate){
CellFileReader cellReader = new CellFileReader(cellFile);
Double stdDev = Math.sqrt(cellReader.getCellCount());
Plate samplePlate = new Plate(numWells, dropOutRate, concentrations);
samplePlate.fillWells(cellReader.getFilename(), cellReader.getCells(), stdDev);
PlateFileWriter writer = new PlateFileWriter(filename, samplePlate);
writer.writePlateFile();
}
private static void makePlate(String cellFile, String filename, Double stdDev,
Integer numWells, Integer[] concentrations, Double dropOutRate){
CellFileReader cellReader = new CellFileReader(cellFile);
Plate samplePlate = new Plate(numWells, dropOutRate, concentrations);
samplePlate.fillWells(cellReader.getFilename(), cellReader.getCells(), stdDev);
PlateFileWriter writer = new PlateFileWriter(filename, samplePlate);
writer.writePlateFile();
}
private static void matchCDR3s(String graphFile, Integer lowThreshold, Integer highThreshold,
Integer occupancyDifference, Integer overlapPercent) {
}
}

View File

@@ -1,260 +1,17 @@
import org.apache.commons.cli.*;
import java.io.IOException;
import java.util.List;
import java.util.Scanner;
import java.util.InputMismatchException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
//
public class UserInterface {
public class InteractiveInterface {
final static Scanner sc = new Scanner(System.in);
static int input;
static boolean quit = false;
public static void main(String[] args) {
//for now, commenting out all the command line argument stuff.
// Refactoring to output files of graphs, so it would all need to change anyway.
public static void startInteractive() {
// if(args.length != 0){
// //These command line options are a big mess
// //Really, I don't think command line tools are expected to work in this many different modes
// //making cells, making plates, and matching are the sort of thing that UNIX philosophy would say
// //should be three separate programs.
// //There might be a way to do it with option parameters?
//
// Options mainOptions = new Options();
// Option makeCells = Option.builder("cells")
// .longOpt("make-cells")
// .desc("Makes a file of distinct cells")
// .build();
// Option makePlate = Option.builder("plates")
// .longOpt("make-plates")
// .desc("Makes a sample plate file")
// .build();
// Option matchCDR3 = Option.builder("match")
// .longOpt("match-cdr3")
// .desc("Match CDR3s. Requires a cell sample file and any number of plate files.")
// .build();
// OptionGroup mainGroup = new OptionGroup();
// mainGroup.addOption(makeCells);
// mainGroup.addOption(makePlate);
// mainGroup.addOption(matchCDR3);
// mainGroup.setRequired(true);
// mainOptions.addOptionGroup(mainGroup);
//
// //Reuse clones of this for other options groups, rather than making it lots of times
// Option outputFile = Option.builder("o")
// .longOpt("output-file")
// .hasArg()
// .argName("filename")
// .desc("Name of output file")
// .build();
// mainOptions.addOption(outputFile);
//
// //Options cellOptions = new Options();
// Option numCells = Option.builder("nc")
// .longOpt("num-cells")
// .desc("The number of distinct cells to generate")
// .hasArg()
// .argName("number")
// .build();
// mainOptions.addOption(numCells);
// Option cdr1Freq = Option.builder("d")
// .longOpt("peptide-diversity-factor")
// .hasArg()
// .argName("number")
// .desc("Number of distinct CDR3s for every CDR1")
// .build();
// mainOptions.addOption(cdr1Freq);
// //Option cellOutput = (Option) outputFile.clone();
// //cellOutput.setRequired(true);
// //mainOptions.addOption(cellOutput);
//
// //Options plateOptions = new Options();
// Option inputCells = Option.builder("c")
// .longOpt("cell-file")
// .hasArg()
// .argName("file")
// .desc("The cell sample file used for filling wells")
// .build();
// mainOptions.addOption(inputCells);
// Option numWells = Option.builder("w")
// .longOpt("num-wells")
// .hasArg()
// .argName("number")
// .desc("The number of wells on each plate")
// .build();
// mainOptions.addOption(numWells);
// Option numPlates = Option.builder("np")
// .longOpt("num-plates")
// .hasArg()
// .argName("number")
// .desc("The number of plate files to output")
// .build();
// mainOptions.addOption(numPlates);
// //Option plateOutput = (Option) outputFile.clone();
// //plateOutput.setRequired(true);
// //plateOutput.setDescription("Prefix for plate output filenames");
// //mainOptions.addOption(plateOutput);
// Option plateErr = Option.builder("err")
// .longOpt("drop-out-rate")
// .hasArg()
// .argName("number")
// .desc("Well drop-out rate. (Probability between 0 and 1)")
// .build();
// mainOptions.addOption(plateErr);
// Option plateConcentrations = Option.builder("t")
// .longOpt("t-cells-per-well")
// .hasArgs()
// .argName("number 1, number 2, ...")
// .desc("Number of T cells per well for each plate section")
// .build();
// mainOptions.addOption(plateConcentrations);
//
////different distributions, mutually exclusive
// OptionGroup plateDistributions = new OptionGroup();
// Option plateExp = Option.builder("exponential")
// .desc("Sample from distinct cells with exponential frequency distribution")
// .build();
// plateDistributions.addOption(plateExp);
// Option plateGaussian = Option.builder("gaussian")
// .desc("Sample from distinct cells with gaussain frequency distribution")
// .build();
// plateDistributions.addOption(plateGaussian);
// Option platePoisson = Option.builder("poisson")
// .desc("Sample from distinct cells with poisson frequency distribution")
// .build();
// plateDistributions.addOption(platePoisson);
// mainOptions.addOptionGroup(plateDistributions);
//
// Option plateStdDev = Option.builder("stddev")
// .desc("Standard deviation for gaussian distribution")
// .hasArg()
// .argName("number")
// .build();
// mainOptions.addOption(plateStdDev);
//
// Option plateLambda = Option.builder("lambda")
// .desc("Lambda for exponential distribution")
// .hasArg()
// .argName("number")
// .build();
// mainOptions.addOption(plateLambda);
//
//
//
////
//// String cellFile, String filename, Double stdDev,
//// Integer numWells, Integer numSections,
//// Integer[] concentrations, Double dropOutRate
////
//
// //Options matchOptions = new Options();
// inputCells.setDescription("The cell sample file to be used for matching.");
// mainOptions.addOption(inputCells);
// Option lowThresh = Option.builder("low")
// .longOpt("low-threshold")
// .hasArg()
// .argName("number")
// .desc("Sets the minimum occupancy overlap to attempt matching")
// .build();
// mainOptions.addOption(lowThresh);
// Option highThresh = Option.builder("high")
// .longOpt("high-threshold")
// .hasArg()
// .argName("number")
// .desc("Sets the maximum occupancy overlap to attempt matching")
// .build();
// mainOptions.addOption(highThresh);
// Option occDiff = Option.builder("occdiff")
// .longOpt("occupancy-difference")
// .hasArg()
// .argName("Number")
// .desc("Maximum difference in alpha/beta occupancy to attempt matching")
// .build();
// mainOptions.addOption(occDiff);
// Option overlapPer = Option.builder("ovper")
// .longOpt("overlap-percent")
// .hasArg()
// .argName("Percent")
// .desc("Minimum overlap percent to attempt matching (0 -100)")
// .build();
// mainOptions.addOption(overlapPer);
// Option inputPlates = Option.builder("p")
// .longOpt("plate-files")
// .hasArgs()
// .desc("Plate files to match")
// .build();
// mainOptions.addOption(inputPlates);
//
//
//
// CommandLineParser parser = new DefaultParser();
// try {
// CommandLine line = parser.parse(mainOptions, args);
// if(line.hasOption("match")){
// //line = parser.parse(mainOptions, args);
// String cellFile = line.getOptionValue("c");
// Integer lowThreshold = Integer.valueOf(line.getOptionValue(lowThresh));
// Integer highThreshold = Integer.valueOf(line.getOptionValue(highThresh));
// Integer occupancyDifference = Integer.valueOf(line.getOptionValue(occDiff));
// Integer overlapPercent = Integer.valueOf(line.getOptionValue(overlapPer));
// for(String plate: line.getOptionValues("p")) {
// matchCDR3s(cellFile, plate, lowThreshold, highThreshold, occupancyDifference, overlapPercent);
// }
// }
// else if(line.hasOption("cells")){
// //line = parser.parse(mainOptions, args);
// String filename = line.getOptionValue("o");
// Integer numDistCells = Integer.valueOf(line.getOptionValue("nc"));
// Integer freq = Integer.valueOf(line.getOptionValue("d"));
// makeCells(filename, numDistCells, freq);
// }
// else if(line.hasOption("plates")){
// //line = parser.parse(mainOptions, args);
// String cellFile = line.getOptionValue("c");
// String filenamePrefix = line.getOptionValue("o");
// Integer numWellsOnPlate = Integer.valueOf(line.getOptionValue("w"));
// Integer numPlatesToMake = Integer.valueOf(line.getOptionValue("np"));
// String[] concentrationsToUseString = line.getOptionValues("t");
// Integer numSections = concentrationsToUseString.length;
//
// Integer[] concentrationsToUse = new Integer[numSections];
// for(int i = 0; i <numSections; i++){
// concentrationsToUse[i] = Integer.valueOf(concentrationsToUseString[i]);
// }
// Double dropOutRate = Double.valueOf(line.getOptionValue("err"));
// if(line.hasOption("exponential")){
// Double lambda = Double.valueOf(line.getOptionValue("lambda"));
// for(int i = 1; i <= numPlatesToMake; i++){
// makePlateExp(cellFile, filenamePrefix + i, lambda, numWellsOnPlate,
// concentrationsToUse,dropOutRate);
// }
// }
// else if(line.hasOption("gaussian")){
// Double stdDev = Double.valueOf(line.getOptionValue("std-dev"));
// for(int i = 1; i <= numPlatesToMake; i++){
// makePlate(cellFile, filenamePrefix + i, stdDev, numWellsOnPlate,
// concentrationsToUse,dropOutRate);
// }
//
// }
// else if(line.hasOption("poisson")){
// for(int i = 1; i <= numPlatesToMake; i++){
// makePlatePoisson(cellFile, filenamePrefix + i, numWellsOnPlate,
// concentrationsToUse,dropOutRate);
// }
// }
// }
// }
// catch (ParseException exp) {
// System.err.println("Parsing failed. Reason: " + exp.getMessage());
// }
// }
// else {
while (!quit) {
System.out.println();
System.out.println("--------BiGPairSEQ SIMULATOR--------");
@@ -274,10 +31,10 @@ public class UserInterface {
try {
input = sc.nextInt();
switch (input) {
case 1 -> makeCells();
case 2 -> makePlate();
case 3 -> makeCDR3Graph();
case 4 -> matchCDR3s();
case 1 -> makeCellsInteractive();
case 2 -> makePlateInteractive();
case 3 -> makeCDR3GraphInteractive();
case 4 -> matchCDR3sInteractive();
//case 6 -> matchCellsCDR1();
case 9 -> acknowledge();
case 0 -> quit = true;
@@ -289,10 +46,9 @@ public class UserInterface {
}
}
sc.close();
// }
}
private static void makeCells() {
private static void makeCellsInteractive() {
String filename = null;
Integer numCells = 0;
Integer cdr1Freq = 1;
@@ -322,43 +78,8 @@ public class UserInterface {
System.gc();
}
// //for calling from command line
// private static void makeCells(String filename, Integer numCells, Integer cdr1Freq){
// CellSample sample = Simulator.generateCellSample(numCells, cdr1Freq);
// CellFileWriter writer = new CellFileWriter(filename, sample);
// writer.writeCellsToFile();
// }
//
// private static void makePlateExp(String cellFile, String filename, Double lambda,
// Integer numWells, Integer[] concentrations, Double dropOutRate){
// CellFileReader cellReader = new CellFileReader(cellFile);
// Plate samplePlate = new Plate(numWells, dropOutRate, concentrations);
// samplePlate.fillWellsExponential(cellReader.getFilename(), cellReader.getCells(), lambda);
// PlateFileWriter writer = new PlateFileWriter(filename, samplePlate);
// writer.writePlateFile();
// }
//
// private static void makePlatePoisson(String cellFile, String filename, Integer numWells,
// Integer[] concentrations, Double dropOutRate){
// CellFileReader cellReader = new CellFileReader(cellFile);
// Double stdDev = Math.sqrt(cellReader.getCellCount());
// Plate samplePlate = new Plate(numWells, dropOutRate, concentrations);
// samplePlate.fillWells(cellReader.getFilename(), cellReader.getCells(), stdDev);
// PlateFileWriter writer = new PlateFileWriter(filename, samplePlate);
// writer.writePlateFile();
// }
//
// private static void makePlate(String cellFile, String filename, Double stdDev,
// Integer numWells, Integer[] concentrations, Double dropOutRate){
// CellFileReader cellReader = new CellFileReader(cellFile);
// Plate samplePlate = new Plate(numWells, dropOutRate, concentrations);
// samplePlate.fillWells(cellReader.getFilename(), cellReader.getCells(), stdDev);
// PlateFileWriter writer = new PlateFileWriter(filename, samplePlate);
// writer.writePlateFile();
// }
//Output a CSV of sample plate
private static void makePlate() {
private static void makePlateInteractive() {
String cellFile = null;
String filename = null;
Double stdDev = 0.0;
@@ -471,7 +192,7 @@ public class UserInterface {
}
//Output serialized binary of GraphAndMapData object
private static void makeCDR3Graph() {
private static void makeCDR3GraphInteractive() {
String filename = null;
String cellFile = null;
String plateFile = null;
@@ -502,7 +223,7 @@ public class UserInterface {
System.out.println("No cell sample found.");
System.out.println("Returning to main menu.");
}
else if(plate.getWells().size() == 0 || plate.getConcentrations().length == 0){
else if(plate.getWells().size() == 0 || plate.getPopulations().length == 0){
System.out.println("No sample plate found.");
System.out.println("Returning to main menu.");
}
@@ -520,7 +241,7 @@ public class UserInterface {
}
//Simulate matching and output CSV file of results
private static void matchCDR3s() throws IOException {
private static void matchCDR3sInteractive() throws IOException {
String filename = null;
String dataFilename = null;
Integer lowThreshold = 0;

View File

@@ -7,8 +7,6 @@ import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
import java.util.List;
import java.util.regex.Pattern;
public class MatchingFileWriter {

View File

@@ -1,18 +1,42 @@
import java.time.Duration;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
public class MatchingResult {
private String sourceFile;
private List<String> comments;
private List<String> headers;
private List<List<String>> allResults;
private Map<Integer, Integer> matchMap;
private Duration time;
private final String sourceFile;
private final Map<String, String> metadata;
private final List<String> comments;
private final List<String> headers;
private final List<List<String>> allResults;
private final Map<Integer, Integer> matchMap;
private final Duration time;
public MatchingResult(String sourceFileName, List<String> comments, List<String> headers, List<List<String>> allResults, Map<Integer, Integer>matchMap, Duration time){
public MatchingResult(String sourceFileName, Map<String, String> metadata, List<String> headers,
List<List<String>> allResults, Map<Integer, Integer>matchMap, Duration time){
this.sourceFile = sourceFileName;
this.comments = comments;
/*
* POSSIBLE KEYS FOR METADATA MAP ARE:
* sample plate filename
* graph filename
* well populations
* total alphas found
* total betas found
* high overlap threshold
* low overlap threshold
* maximum occupancy difference
* minimum overlap percent
* pairing attempt rate
* correct pairing count
* incorrect pairing count
* pairing error rate
* simulation time
*/
this.metadata = metadata;
this.comments = new ArrayList<>();
for (String key : metadata.keySet()) {
comments.add(key +": " + metadata.get(key));
}
this.headers = headers;
this.allResults = allResults;
this.matchMap = matchMap;
@@ -20,6 +44,8 @@ public class MatchingResult {
}
public Map<String, String> getMetadata() {return metadata;}
public List<String> getComments() {
return comments;
}

View File

@@ -1,26 +1,28 @@
import java.util.*;
/*
TODO: Implement exponential distribution using inversion method - DONE
TODO: Implement discrete frequency distributions using Vose's Alias Method
*/
import java.util.*;
public class Plate {
private String sourceFile;
private List<List<Integer[]>> wells;
private Random rand = new Random();
private int size;
private double error;
private Integer[] concentrations;
private Integer[] populations;
private double stdDev;
private double lambda;
boolean exponential = false;
public Plate(int size, double error, Integer[] concentrations) {
public Plate(int size, double error, Integer[] populations) {
this.size = size;
this.error = error;
this.concentrations = concentrations;
this.populations = populations;
wells = new ArrayList<>();
}
@@ -35,9 +37,9 @@ public class Plate {
concentrations.add(w.size());
}
}
this.concentrations = new Integer[concentrations.size()];
for (int i = 0; i < this.concentrations.length; i++) {
this.concentrations[i] = concentrations.get(i);
this.populations = new Integer[concentrations.size()];
for (int i = 0; i < this.populations.length; i++) {
this.populations[i] = concentrations.get(i);
}
}
@@ -45,7 +47,7 @@ public class Plate {
this.lambda = lambda;
exponential = true;
sourceFile = sourceFileName;
int numSections = concentrations.length;
int numSections = populations.length;
int section = 0;
double m;
int n;
@@ -53,7 +55,7 @@ public class Plate {
while (section < numSections){
for (int i = 0; i < (size / numSections); i++) {
List<Integer[]> well = new ArrayList<>();
for (int j = 0; j < concentrations[section]; j++) {
for (int j = 0; j < populations[section]; j++) {
do {
//inverse transform sampling: for random number u in [0,1), x = log(1-u) / (-lambda)
m = (Math.log10((1 - rand.nextDouble()))/(-lambda)) * Math.sqrt(cells.size());
@@ -84,14 +86,14 @@ public class Plate {
public void fillWells(String sourceFileName, List<Integer[]> cells, double stdDev) {
this.stdDev = stdDev;
sourceFile = sourceFileName;
int numSections = concentrations.length;
int numSections = populations.length;
int section = 0;
double m;
int n;
while (section < numSections){
for (int i = 0; i < (size / numSections); i++) {
List<Integer[]> well = new ArrayList<>();
for (int j = 0; j < concentrations[section]; j++) {
for (int j = 0; j < populations[section]; j++) {
do {
m = (rand.nextGaussian() * stdDev) + (cells.size() / 2);
} while (m >= cells.size() || m < 0);
@@ -110,8 +112,8 @@ public class Plate {
}
}
public Integer[] getConcentrations(){
return concentrations;
public Integer[] getPopulations(){
return populations;
}
public int getSize(){

View File

@@ -7,7 +7,6 @@ import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
import java.util.*;
import java.util.regex.Pattern;
public class PlateFileWriter {
private int size;
@@ -18,7 +17,7 @@ public class PlateFileWriter {
private String filename;
private String sourceFileName;
private String[] headers;
private List<Integer> concentrations;
private Integer[] concentrations;
private boolean isExponential = false;
public PlateFileWriter(String filename, Plate plate) {
@@ -37,8 +36,8 @@ public class PlateFileWriter {
}
this.error = plate.getError();
this.wells = plate.getWells();
this.concentrations = Arrays.asList(plate.getConcentrations());
concentrations.sort(Comparator.reverseOrder());
this.concentrations = plate.getPopulations();
Arrays.sort(concentrations);
}
public void writePlateFile(){
@@ -59,7 +58,7 @@ public class PlateFileWriter {
}
}
//this took forever
//this took forever and I don't use it
List<List<String>> rows = new ArrayList<>();
List<String> tmp = new ArrayList<>();
for(int i = 0; i < wellsAsStrings.size(); i++){//List<Integer[]> w: wells){
@@ -73,14 +72,19 @@ public class PlateFileWriter {
}
rows.add(tmp);
}
//build string of well concentrations
StringBuilder concen = new StringBuilder();
for(Integer i: concentrations){
concen.append(i.toString());
concen.append(" ");
}
String concenString = concen.toString();
//get list of well populations
List<Integer> wellPopulations = Arrays.asList(concentrations);
//make string out of populations list
StringBuilder populationsStringBuilder = new StringBuilder();
populationsStringBuilder.append(wellPopulations.remove(0).toString());
for(Integer i: wellPopulations){
populationsStringBuilder.append(", ");
populationsStringBuilder.append(i.toString());
}
String wellPopulationsString = populationsStringBuilder.toString();
//set CSV format
CSVFormat plateFileFormat = CSVFormat.Builder.create()
.setCommentMarker('#')
.build();
@@ -92,7 +96,7 @@ public class PlateFileWriter {
printer.printComment("Each row represents one well on the plate.");
printer.printComment("Plate size: " + size);
printer.printComment("Error rate: " + error);
printer.printComment("Concentrations: " + concenString);
printer.printComment("Well populations: " + wellPopulationsString);
if(isExponential){
printer.printComment("Lambda: " + lambda);
}

View File

@@ -131,10 +131,14 @@ public class Simulator {
Instant stop = Instant.now();
Duration time = Duration.between(start, stop);
//return GraphWithMapData object
return new GraphWithMapData(graph, numWells, samplePlate.getConcentrations(), alphaCount, betaCount,
//create GraphWithMapData object
GraphWithMapData output = new GraphWithMapData(graph, numWells, samplePlate.getPopulations(), alphaCount, betaCount,
distCellsMapAlphaKey, plateVtoAMap, plateVtoBMap, plateAtoVMap,
plateBtoVMap, alphaWellCounts, betaWellCounts, time);
output.setSourceFilename(samplePlate.getSourceFileName());
//return GraphWithMapData object
return output;
}
//match CDR3s.
@@ -233,351 +237,356 @@ public class Simulator {
allResults.add(result);
}
//Metadate comments for CSV file
//Metadata comments for CSV file
int min = Math.min(alphaCount, betaCount);
double attemptRate = (double) (trueCount + falseCount) / min;
BigDecimal attemptRateTrunc = new BigDecimal(attemptRate, mc);
double pairingErrorRate = (double) falseCount / (trueCount + falseCount);
BigDecimal pairingErrorRateTrunc = new BigDecimal(pairingErrorRate, mc);
//get list of well concentrations
List<Integer> wellConcentrations = Arrays.asList(data.getWellConcentrations());
List<Integer> wellPopulations = Arrays.asList(data.getWellConcentrations());
//make string out of concentrations list
StringBuilder concentrationStringBuilder = new StringBuilder();
for(Integer i: wellConcentrations){
concentrationStringBuilder.append(i.toString());
concentrationStringBuilder.append(" ");
StringBuilder populationsStringBuilder = new StringBuilder();
populationsStringBuilder.append(wellPopulations.remove(0).toString());
for(Integer i: wellPopulations){
populationsStringBuilder.append(", ");
populationsStringBuilder.append(i.toString());
}
String concentrationString = concentrationStringBuilder.toString();
List<String> comments = new ArrayList<>();
comments.add("Source Sample Plate filename: " + data.getSourceFilename());
comments.add("Source Graph and Data filename: " + dataFilename);
comments.add("T cell counts in sample plate wells: " + concentrationString);
comments.add("Total alphas found: " + alphaCount);
comments.add("Total betas found: " + betaCount);
comments.add("High overlap threshold: " + highThreshold);
comments.add("Low overlap threshold: " + lowThreshold);
comments.add("Minimum overlap percent: " + minOverlapPercent);
comments.add("Maximum occupancy difference: " + maxOccupancyDifference);
comments.add("Pairing attempt rate: " + attemptRateTrunc);
comments.add("Correct pairings: " + trueCount);
comments.add("Incorrect pairings: " + falseCount);
comments.add("Pairing error rate: " + pairingErrorRateTrunc);
String wellPopulationsString = populationsStringBuilder.toString();
//total simulation time
Duration time = Duration.between(start, stop);
time = time.plus(data.getTime());
comments.add("Simulation time: " + nf.format(time.toSeconds()) + " seconds");
Map<String, String> metadata = new LinkedHashMap<>();
metadata.put("sample plate filename", data.getSourceFilename());
metadata.put("graph filename", dataFilename);
metadata.put("well populations", wellPopulationsString);
metadata.put("total alphas found", alphaCount.toString());
metadata.put("total betas found", betaCount.toString());
metadata.put("high overlap threshold", highThreshold.toString());
metadata.put("low overlap threshold", lowThreshold.toString());
metadata.put("maximum occupancy difference", maxOccupancyDifference.toString());
metadata.put("minimum overlap percent", minOverlapPercent.toString());
metadata.put("pairing attempt rate", attemptRateTrunc.toString());
metadata.put("correct pairing count", Integer.toString(trueCount));
metadata.put("incorrect pairing count", Integer.toString(falseCount));
metadata.put("pairing error rate", pairingErrorRateTrunc.toString());
metadata.put("simulation time", nf.format(time.toSeconds()));
MatchingResult output = new MatchingResult(data.getSourceFilename(), metadata, header, allResults, matchMap, time);
if(verbose){
for(String s: comments){
for(String s: output.getComments()){
System.out.println(s);
}
}
return new MatchingResult(data.getSourceFilename(), comments, header, allResults, matchMap, time);
}
//Simulated matching of CDR1s to CDR3s. Requires MatchingResult from prior run of matchCDR3s.
public static MatchingResult[] matchCDR1s(List<Integer[]> distinctCells,
Plate samplePlate, Integer lowThreshold,
Integer highThreshold, MatchingResult priorResult){
Instant start = Instant.now();
Duration previousTime = priorResult.getTime();
Map<Integer, Integer> previousMatches = priorResult.getMatchMap();
int numWells = samplePlate.getSize();
int[] cdr3Indices = {cdr3AlphaIndex, cdr3BetaIndex};
int[] cdr1Indices = {cdr1AlphaIndex, cdr1BetaIndex};
System.out.println("Making previous match maps");
Map<Integer, Integer> cdr3AtoBMap = previousMatches;
Map<Integer, Integer> cdr3BtoAMap = invertVertexMap(cdr3AtoBMap);
System.out.println("Previous match maps made");
System.out.println("Making cell maps");
Map<Integer, Integer> alphaCDR3toCDR1Map = makeSequenceToSequenceMap(distinctCells, cdr3AlphaIndex, cdr1AlphaIndex);
Map<Integer, Integer> betaCDR3toCDR1Map = makeSequenceToSequenceMap(distinctCells, cdr3BetaIndex, cdr1BetaIndex);
System.out.println("Cell maps made");
System.out.println("Making well maps");
Map<Integer, Integer> allCDR3s = samplePlate.assayWellsSequenceS(cdr3Indices);
Map<Integer, Integer> allCDR1s = samplePlate.assayWellsSequenceS(cdr1Indices);
int CDR3Count = allCDR3s.size();
System.out.println("all CDR3s count: " + CDR3Count);
int CDR1Count = allCDR1s.size();
System.out.println("all CDR1s count: " + CDR1Count);
System.out.println("Well maps made");
System.out.println("Removing unpaired CDR3s from well maps");
List<Integer> unpairedCDR3s = new ArrayList<>();
for(Integer i: allCDR3s.keySet()){
if(!(cdr3AtoBMap.containsKey(i) || cdr3BtoAMap.containsKey(i))){
unpairedCDR3s.add(i);
}
}
for(Integer i: unpairedCDR3s){
allCDR3s.remove(i);
}
System.out.println("Unpaired CDR3s removed.");
System.out.println("Remaining CDR3 count: " + allCDR3s.size());
System.out.println("Removing below-minimum-overlap-threshold and saturating-occupancy CDR1s");
filterByOccupancyThreshold(allCDR1s, lowThreshold, numWells - 1);
System.out.println("CDR1s removed.");
System.out.println("Remaining CDR1 count: " + allCDR1s.size());
System.out.println("Making vertex maps");
//For the SimpleWeightedBipartiteGraphMatrixGenerator, all vertices must have
// distinct numbers associated with them. Since I'm using a 2D array, that means
// distinct indices between the rows and columns. vertexStartValue lets me track where I switch
// from numbering rows to columns, so I can assign unique numbers to every vertex, and then
// subtract the vertexStartValue from CDR1s to use their vertex labels as array indices
Integer vertexStartValue = 0;
//keys are sequential integer vertices, values are CDR3s
Map<Integer, Integer> plateVtoCDR3Map = makeVertexToSequenceMap(allCDR3s, vertexStartValue);
//New start value for vertex to CDR1 map should be one more than final vertex value in CDR3 map
vertexStartValue += plateVtoCDR3Map.size();
//keys are sequential integers vertices, values are CDR1s
Map<Integer, Integer> plateVtoCDR1Map = makeVertexToSequenceMap(allCDR1s, vertexStartValue);
//keys are CDR3s, values are sequential integer vertices from previous map
Map<Integer, Integer> plateCDR3toVMap = invertVertexMap(plateVtoCDR3Map);
//keys are CDR1s, values are sequential integer vertices from previous map
Map<Integer, Integer> plateCDR1toVMap = invertVertexMap(plateVtoCDR1Map);
System.out.println("Vertex maps made");
System.out.println("Creating adjacency matrix");
//Count how many wells each CDR3 appears in
Map<Integer, Integer> cdr3WellCounts = new HashMap<>();
//count how many wells each CDR1 appears in
Map<Integer, Integer> cdr1WellCounts = new HashMap<>();
//add edges, where weights are number of wells the peptides share in common.
//If this is too slow, can make a 2d array and use the SimpleWeightedGraphMatrixGenerator class
Map<Integer, Integer> wellNCDR3s = null;
Map<Integer, Integer> wellNCDR1s = null;
double[][] weights = new double[plateVtoCDR3Map.size()][plateVtoCDR1Map.size()];
countSequencesAndFillMatrix(samplePlate, allCDR3s, allCDR1s, plateCDR3toVMap, plateCDR1toVMap,
cdr3Indices, cdr1Indices, cdr3WellCounts, cdr1WellCounts, weights);
System.out.println("Matrix created");
System.out.println("Creating graph");
SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph =
new SimpleWeightedGraph<>(DefaultWeightedEdge.class);
SimpleWeightedBipartiteGraphMatrixGenerator graphGenerator = new SimpleWeightedBipartiteGraphMatrixGenerator();
List<Integer> cdr3Vertices = new ArrayList<>(plateVtoCDR3Map.keySet()); //This will work because LinkedHashMap preserves order of entry
graphGenerator.first(cdr3Vertices);
List<Integer> cdr1Vertices = new ArrayList<>(plateVtoCDR1Map.keySet());
graphGenerator.second(cdr1Vertices); //This will work because LinkedHashMap preserves order of entry
graphGenerator.weights(weights);
graphGenerator.generateGraph(graph);
System.out.println("Graph created");
System.out.println("Removing edges outside of weight thresholds");
filterByOccupancyThreshold(graph, lowThreshold, highThreshold);
System.out.println("Over- and under-weight edges set to 0.0");
System.out.println("Finding first maximum weighted matching");
MaximumWeightBipartiteMatching firstMaxWeightMatching =
new MaximumWeightBipartiteMatching(graph, plateVtoCDR3Map.keySet(), plateVtoCDR1Map.keySet());
MatchingAlgorithm.Matching<String, DefaultWeightedEdge> graphMatching = firstMaxWeightMatching.getMatching();
System.out.println("First maximum weighted matching found");
//first processing run
Map<Integer, Integer> firstMatchCDR3toCDR1Map = new HashMap<>();
Iterator<DefaultWeightedEdge> weightIter = graphMatching.iterator();
DefaultWeightedEdge e;
while(weightIter.hasNext()){
e = weightIter.next();
// if(graph.getEdgeWeight(e) < lowThreshold || graph.getEdgeWeight(e) > highThreshold) {
// continue;
// }
Integer source = graph.getEdgeSource(e);
Integer target = graph.getEdgeTarget(e);
firstMatchCDR3toCDR1Map.put(plateVtoCDR3Map.get(source), plateVtoCDR1Map.get(target));
}
System.out.println("First pass matches: " + firstMatchCDR3toCDR1Map.size());
System.out.println("Removing edges from first maximum weighted matching");
//zero out the edge weights in the matching
weightIter = graphMatching.iterator();
while(weightIter.hasNext()){
graph.removeEdge(weightIter.next());
}
System.out.println("Edges removed");
//Generate a new matching
System.out.println("Finding second maximum weighted matching");
MaximumWeightBipartiteMatching secondMaxWeightMatching =
new MaximumWeightBipartiteMatching(graph, plateVtoCDR3Map.keySet(), plateVtoCDR1Map.keySet());
graphMatching = secondMaxWeightMatching.getMatching();
System.out.println("Second maximum weighted matching found");
//second processing run
Map<Integer, Integer> secondMatchCDR3toCDR1Map = new HashMap<>();
weightIter = graphMatching.iterator();
while(weightIter.hasNext()){
e = weightIter.next();
// if(graph.getEdgeWeight(e) < lowThreshold || graph.getEdgeWeight(e) > highThreshold) {
// continue;
// }
Integer source = graph.getEdgeSource(e);
// if(!(CDR3AtoBMap.containsKey(source) || CDR3BtoAMap.containsKey(source))){
// continue;
// }
Integer target = graph.getEdgeTarget(e);
secondMatchCDR3toCDR1Map.put(plateVtoCDR3Map.get(source), plateVtoCDR1Map.get(target));
}
System.out.println("Second pass matches: " + secondMatchCDR3toCDR1Map.size());
System.out.println("Mapping first pass CDR3 alpha/beta pairs");
//get linked map for first matching attempt
Map<Integer, Integer> firstMatchesMap = new LinkedHashMap<>();
for(Integer alphaCDR3: cdr3AtoBMap.keySet()) {
if (!(firstMatchCDR3toCDR1Map.containsKey(alphaCDR3))) {
continue;
}
Integer betaCDR3 = cdr3AtoBMap.get(alphaCDR3);
if (!(firstMatchCDR3toCDR1Map.containsKey(betaCDR3))) {
continue;
}
firstMatchesMap.put(alphaCDR3, firstMatchCDR3toCDR1Map.get(alphaCDR3));
firstMatchesMap.put(betaCDR3, firstMatchCDR3toCDR1Map.get(betaCDR3));
}
System.out.println("First pass CDR3 alpha/beta pairs mapped");
System.out.println("Mapping second pass CDR3 alpha/beta pairs.");
System.out.println("Finding CDR3 pairs that swapped CDR1 matches between first pass and second pass.");
//Look for matches that simply swapped already-matched alpha and beta CDR3s
Map<Integer, Integer> dualMatchesMap = new LinkedHashMap<>();
for(Integer alphaCDR3: cdr3AtoBMap.keySet()) {
if (!(firstMatchCDR3toCDR1Map.containsKey(alphaCDR3) && secondMatchCDR3toCDR1Map.containsKey(alphaCDR3))) {
continue;
}
Integer betaCDR3 = cdr3AtoBMap.get(alphaCDR3);
if (!(firstMatchCDR3toCDR1Map.containsKey(betaCDR3) && secondMatchCDR3toCDR1Map.containsKey(betaCDR3))) {
continue;
}
if(firstMatchCDR3toCDR1Map.get(alphaCDR3).equals(secondMatchCDR3toCDR1Map.get(betaCDR3))){
if(firstMatchCDR3toCDR1Map.get(betaCDR3).equals(secondMatchCDR3toCDR1Map.get(alphaCDR3))){
dualMatchesMap.put(alphaCDR3, firstMatchCDR3toCDR1Map.get(alphaCDR3));
dualMatchesMap.put(betaCDR3, firstMatchCDR3toCDR1Map.get(betaCDR3));
}
}
}
System.out.println("Second pass mapping made. Dual CDR3/CDR1 pairings found.");
Instant stop = Instant.now();
//results for first map
System.out.println("RESULTS FOR FIRST PASS MATCHING");
List<List<String>> allResults = new ArrayList<>();
Integer trueCount = 0;
Iterator iter = firstMatchesMap.keySet().iterator();
while(iter.hasNext()){
Boolean proven = false;
List<String> tmp = new ArrayList<>();
tmp.add(iter.next().toString());
tmp.add(iter.next().toString());
tmp.add(firstMatchesMap.get(Integer.valueOf(tmp.get(0))).toString());
tmp.add(firstMatchesMap.get(Integer.valueOf(tmp.get(1))).toString());
if(alphaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(0))).equals(Integer.valueOf(tmp.get(2)))){
if(betaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(1))).equals(Integer.valueOf(tmp.get(3)))){
proven = true;
}
}
else if(alphaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(0))).equals(Integer.valueOf(tmp.get(3)))){
if(betaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(1))).equals(Integer.valueOf(tmp.get(2)))){
proven = true;
}
}
tmp.add(proven.toString());
allResults.add(tmp);
if(proven){
trueCount++;
}
}
List<String> comments = new ArrayList<>();
comments.add("Plate size: " + samplePlate.getSize() + " wells");
comments.add("Previous pairs found: " + previousMatches.size());
comments.add("CDR1 matches attempted: " + allResults.size());
double attemptRate = (double) allResults.size() / previousMatches.size();
comments.add("Matching attempt rate: " + attemptRate);
comments.add("Number of correct matches: " + trueCount);
double correctRate = (double) trueCount / allResults.size();
comments.add("Correct matching rate: " + correctRate);
NumberFormat nf = NumberFormat.getInstance(Locale.US);
Duration time = Duration.between(start, stop);
time = time.plus(previousTime);
comments.add("Simulation time: " + nf.format(time.toSeconds()) + " seconds");
for(String s: comments){
System.out.println(s);
}
List<String> headers = new ArrayList<>();
headers.add("CDR3 alpha");
headers.add("CDR3 beta");
headers.add("first matched CDR1");
headers.add("second matched CDR1");
headers.add("Correct match?");
MatchingResult firstTest = new MatchingResult(samplePlate.getSourceFileName(),
comments, headers, allResults, dualMatchesMap, time);
//results for dual map
System.out.println("RESULTS FOR SECOND PASS MATCHING");
allResults = new ArrayList<>();
trueCount = 0;
iter = dualMatchesMap.keySet().iterator();
while(iter.hasNext()){
Boolean proven = false;
List<String> tmp = new ArrayList<>();
tmp.add(iter.next().toString());
tmp.add(iter.next().toString());
tmp.add(dualMatchesMap.get(Integer.valueOf(tmp.get(0))).toString());
tmp.add(dualMatchesMap.get(Integer.valueOf(tmp.get(1))).toString());
if(alphaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(0))).equals(Integer.valueOf(tmp.get(2)))){
if(betaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(1))).equals(Integer.valueOf(tmp.get(3)))){
proven = true;
}
}
else if(alphaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(0))).equals(Integer.valueOf(tmp.get(3)))){
if(betaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(1))).equals(Integer.valueOf(tmp.get(2)))){
proven = true;
}
}
tmp.add(proven.toString());
allResults.add(tmp);
if(proven){
trueCount++;
}
}
comments = new ArrayList<>();
comments.add("Plate size: " + samplePlate.getSize() + " wells");
comments.add("Previous pairs found: " + previousMatches.size());
comments.add("High overlap threshold: " + highThreshold);
comments.add("Low overlap threshold: " + lowThreshold);
comments.add("CDR1 matches attempted: " + allResults.size());
attemptRate = (double) allResults.size() / previousMatches.size();
comments.add("Matching attempt rate: " + attemptRate);
comments.add("Number of correct matches: " + trueCount);
correctRate = (double) trueCount / allResults.size();
comments.add("Correct matching rate: " + correctRate);
comments.add("Simulation time: " + nf.format(time.toSeconds()) + " seconds");
for(String s: comments){
System.out.println(s);
}
System.out.println("Simulation time: " + nf.format(time.toSeconds()) + " seconds");
MatchingResult dualTest = new MatchingResult(samplePlate.getSourceFileName(), comments, headers,
allResults, dualMatchesMap, time);
MatchingResult[] output = {firstTest, dualTest};
return output;
}
//Commented out CDR1 matching until it's time to re-implement it
// //Simulated matching of CDR1s to CDR3s. Requires MatchingResult from prior run of matchCDR3s.
// public static MatchingResult[] matchCDR1s(List<Integer[]> distinctCells,
// Plate samplePlate, Integer lowThreshold,
// Integer highThreshold, MatchingResult priorResult){
// Instant start = Instant.now();
// Duration previousTime = priorResult.getTime();
// Map<Integer, Integer> previousMatches = priorResult.getMatchMap();
// int numWells = samplePlate.getSize();
// int[] cdr3Indices = {cdr3AlphaIndex, cdr3BetaIndex};
// int[] cdr1Indices = {cdr1AlphaIndex, cdr1BetaIndex};
//
// System.out.println("Making previous match maps");
// Map<Integer, Integer> cdr3AtoBMap = previousMatches;
// Map<Integer, Integer> cdr3BtoAMap = invertVertexMap(cdr3AtoBMap);
// System.out.println("Previous match maps made");
//
// System.out.println("Making cell maps");
// Map<Integer, Integer> alphaCDR3toCDR1Map = makeSequenceToSequenceMap(distinctCells, cdr3AlphaIndex, cdr1AlphaIndex);
// Map<Integer, Integer> betaCDR3toCDR1Map = makeSequenceToSequenceMap(distinctCells, cdr3BetaIndex, cdr1BetaIndex);
// System.out.println("Cell maps made");
//
// System.out.println("Making well maps");
// Map<Integer, Integer> allCDR3s = samplePlate.assayWellsSequenceS(cdr3Indices);
// Map<Integer, Integer> allCDR1s = samplePlate.assayWellsSequenceS(cdr1Indices);
// int CDR3Count = allCDR3s.size();
// System.out.println("all CDR3s count: " + CDR3Count);
// int CDR1Count = allCDR1s.size();
// System.out.println("all CDR1s count: " + CDR1Count);
// System.out.println("Well maps made");
//
// System.out.println("Removing unpaired CDR3s from well maps");
// List<Integer> unpairedCDR3s = new ArrayList<>();
// for(Integer i: allCDR3s.keySet()){
// if(!(cdr3AtoBMap.containsKey(i) || cdr3BtoAMap.containsKey(i))){
// unpairedCDR3s.add(i);
// }
// }
// for(Integer i: unpairedCDR3s){
// allCDR3s.remove(i);
// }
// System.out.println("Unpaired CDR3s removed.");
// System.out.println("Remaining CDR3 count: " + allCDR3s.size());
//
// System.out.println("Removing below-minimum-overlap-threshold and saturating-occupancy CDR1s");
// filterByOccupancyThreshold(allCDR1s, lowThreshold, numWells - 1);
// System.out.println("CDR1s removed.");
// System.out.println("Remaining CDR1 count: " + allCDR1s.size());
//
// System.out.println("Making vertex maps");
//
// //For the SimpleWeightedBipartiteGraphMatrixGenerator, all vertices must have
// // distinct numbers associated with them. Since I'm using a 2D array, that means
// // distinct indices between the rows and columns. vertexStartValue lets me track where I switch
// // from numbering rows to columns, so I can assign unique numbers to every vertex, and then
// // subtract the vertexStartValue from CDR1s to use their vertex labels as array indices
// Integer vertexStartValue = 0;
// //keys are sequential integer vertices, values are CDR3s
// Map<Integer, Integer> plateVtoCDR3Map = makeVertexToSequenceMap(allCDR3s, vertexStartValue);
// //New start value for vertex to CDR1 map should be one more than final vertex value in CDR3 map
// vertexStartValue += plateVtoCDR3Map.size();
// //keys are sequential integers vertices, values are CDR1s
// Map<Integer, Integer> plateVtoCDR1Map = makeVertexToSequenceMap(allCDR1s, vertexStartValue);
// //keys are CDR3s, values are sequential integer vertices from previous map
// Map<Integer, Integer> plateCDR3toVMap = invertVertexMap(plateVtoCDR3Map);
// //keys are CDR1s, values are sequential integer vertices from previous map
// Map<Integer, Integer> plateCDR1toVMap = invertVertexMap(plateVtoCDR1Map);
// System.out.println("Vertex maps made");
//
// System.out.println("Creating adjacency matrix");
// //Count how many wells each CDR3 appears in
// Map<Integer, Integer> cdr3WellCounts = new HashMap<>();
// //count how many wells each CDR1 appears in
// Map<Integer, Integer> cdr1WellCounts = new HashMap<>();
// //add edges, where weights are number of wells the peptides share in common.
// //If this is too slow, can make a 2d array and use the SimpleWeightedGraphMatrixGenerator class
// Map<Integer, Integer> wellNCDR3s = null;
// Map<Integer, Integer> wellNCDR1s = null;
// double[][] weights = new double[plateVtoCDR3Map.size()][plateVtoCDR1Map.size()];
// countSequencesAndFillMatrix(samplePlate, allCDR3s, allCDR1s, plateCDR3toVMap, plateCDR1toVMap,
// cdr3Indices, cdr1Indices, cdr3WellCounts, cdr1WellCounts, weights);
// System.out.println("Matrix created");
//
// System.out.println("Creating graph");
// SimpleWeightedGraph<Integer, DefaultWeightedEdge> graph =
// new SimpleWeightedGraph<>(DefaultWeightedEdge.class);
//
// SimpleWeightedBipartiteGraphMatrixGenerator graphGenerator = new SimpleWeightedBipartiteGraphMatrixGenerator();
// List<Integer> cdr3Vertices = new ArrayList<>(plateVtoCDR3Map.keySet()); //This will work because LinkedHashMap preserves order of entry
// graphGenerator.first(cdr3Vertices);
// List<Integer> cdr1Vertices = new ArrayList<>(plateVtoCDR1Map.keySet());
// graphGenerator.second(cdr1Vertices); //This will work because LinkedHashMap preserves order of entry
// graphGenerator.weights(weights);
// graphGenerator.generateGraph(graph);
// System.out.println("Graph created");
//
// System.out.println("Removing edges outside of weight thresholds");
// filterByOccupancyThreshold(graph, lowThreshold, highThreshold);
// System.out.println("Over- and under-weight edges set to 0.0");
//
// System.out.println("Finding first maximum weighted matching");
// MaximumWeightBipartiteMatching firstMaxWeightMatching =
// new MaximumWeightBipartiteMatching(graph, plateVtoCDR3Map.keySet(), plateVtoCDR1Map.keySet());
// MatchingAlgorithm.Matching<String, DefaultWeightedEdge> graphMatching = firstMaxWeightMatching.getMatching();
// System.out.println("First maximum weighted matching found");
//
//
// //first processing run
// Map<Integer, Integer> firstMatchCDR3toCDR1Map = new HashMap<>();
// Iterator<DefaultWeightedEdge> weightIter = graphMatching.iterator();
// DefaultWeightedEdge e;
// while(weightIter.hasNext()){
// e = weightIter.next();
//// if(graph.getEdgeWeight(e) < lowThreshold || graph.getEdgeWeight(e) > highThreshold) {
//// continue;
//// }
// Integer source = graph.getEdgeSource(e);
// Integer target = graph.getEdgeTarget(e);
// firstMatchCDR3toCDR1Map.put(plateVtoCDR3Map.get(source), plateVtoCDR1Map.get(target));
// }
// System.out.println("First pass matches: " + firstMatchCDR3toCDR1Map.size());
//
// System.out.println("Removing edges from first maximum weighted matching");
// //zero out the edge weights in the matching
// weightIter = graphMatching.iterator();
// while(weightIter.hasNext()){
// graph.removeEdge(weightIter.next());
// }
// System.out.println("Edges removed");
//
// //Generate a new matching
// System.out.println("Finding second maximum weighted matching");
// MaximumWeightBipartiteMatching secondMaxWeightMatching =
// new MaximumWeightBipartiteMatching(graph, plateVtoCDR3Map.keySet(), plateVtoCDR1Map.keySet());
// graphMatching = secondMaxWeightMatching.getMatching();
// System.out.println("Second maximum weighted matching found");
//
//
// //second processing run
// Map<Integer, Integer> secondMatchCDR3toCDR1Map = new HashMap<>();
// weightIter = graphMatching.iterator();
// while(weightIter.hasNext()){
// e = weightIter.next();
//// if(graph.getEdgeWeight(e) < lowThreshold || graph.getEdgeWeight(e) > highThreshold) {
//// continue;
//// }
// Integer source = graph.getEdgeSource(e);
//// if(!(CDR3AtoBMap.containsKey(source) || CDR3BtoAMap.containsKey(source))){
//// continue;
//// }
// Integer target = graph.getEdgeTarget(e);
// secondMatchCDR3toCDR1Map.put(plateVtoCDR3Map.get(source), plateVtoCDR1Map.get(target));
// }
// System.out.println("Second pass matches: " + secondMatchCDR3toCDR1Map.size());
//
// System.out.println("Mapping first pass CDR3 alpha/beta pairs");
// //get linked map for first matching attempt
// Map<Integer, Integer> firstMatchesMap = new LinkedHashMap<>();
// for(Integer alphaCDR3: cdr3AtoBMap.keySet()) {
// if (!(firstMatchCDR3toCDR1Map.containsKey(alphaCDR3))) {
// continue;
// }
// Integer betaCDR3 = cdr3AtoBMap.get(alphaCDR3);
// if (!(firstMatchCDR3toCDR1Map.containsKey(betaCDR3))) {
// continue;
// }
// firstMatchesMap.put(alphaCDR3, firstMatchCDR3toCDR1Map.get(alphaCDR3));
// firstMatchesMap.put(betaCDR3, firstMatchCDR3toCDR1Map.get(betaCDR3));
// }
// System.out.println("First pass CDR3 alpha/beta pairs mapped");
//
// System.out.println("Mapping second pass CDR3 alpha/beta pairs.");
// System.out.println("Finding CDR3 pairs that swapped CDR1 matches between first pass and second pass.");
// //Look for matches that simply swapped already-matched alpha and beta CDR3s
// Map<Integer, Integer> dualMatchesMap = new LinkedHashMap<>();
// for(Integer alphaCDR3: cdr3AtoBMap.keySet()) {
// if (!(firstMatchCDR3toCDR1Map.containsKey(alphaCDR3) && secondMatchCDR3toCDR1Map.containsKey(alphaCDR3))) {
// continue;
// }
// Integer betaCDR3 = cdr3AtoBMap.get(alphaCDR3);
// if (!(firstMatchCDR3toCDR1Map.containsKey(betaCDR3) && secondMatchCDR3toCDR1Map.containsKey(betaCDR3))) {
// continue;
// }
// if(firstMatchCDR3toCDR1Map.get(alphaCDR3).equals(secondMatchCDR3toCDR1Map.get(betaCDR3))){
// if(firstMatchCDR3toCDR1Map.get(betaCDR3).equals(secondMatchCDR3toCDR1Map.get(alphaCDR3))){
// dualMatchesMap.put(alphaCDR3, firstMatchCDR3toCDR1Map.get(alphaCDR3));
// dualMatchesMap.put(betaCDR3, firstMatchCDR3toCDR1Map.get(betaCDR3));
// }
// }
// }
// System.out.println("Second pass mapping made. Dual CDR3/CDR1 pairings found.");
//
// Instant stop = Instant.now();
// //results for first map
// System.out.println("RESULTS FOR FIRST PASS MATCHING");
// List<List<String>> allResults = new ArrayList<>();
// Integer trueCount = 0;
// Iterator iter = firstMatchesMap.keySet().iterator();
//
// while(iter.hasNext()){
// Boolean proven = false;
// List<String> tmp = new ArrayList<>();
// tmp.add(iter.next().toString());
// tmp.add(iter.next().toString());
// tmp.add(firstMatchesMap.get(Integer.valueOf(tmp.get(0))).toString());
// tmp.add(firstMatchesMap.get(Integer.valueOf(tmp.get(1))).toString());
// if(alphaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(0))).equals(Integer.valueOf(tmp.get(2)))){
// if(betaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(1))).equals(Integer.valueOf(tmp.get(3)))){
// proven = true;
// }
// }
// else if(alphaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(0))).equals(Integer.valueOf(tmp.get(3)))){
// if(betaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(1))).equals(Integer.valueOf(tmp.get(2)))){
// proven = true;
// }
// }
// tmp.add(proven.toString());
// allResults.add(tmp);
// if(proven){
// trueCount++;
// }
// }
//
// List<String> comments = new ArrayList<>();
// comments.add("Plate size: " + samplePlate.getSize() + " wells");
// comments.add("Previous pairs found: " + previousMatches.size());
// comments.add("CDR1 matches attempted: " + allResults.size());
// double attemptRate = (double) allResults.size() / previousMatches.size();
// comments.add("Matching attempt rate: " + attemptRate);
// comments.add("Number of correct matches: " + trueCount);
// double correctRate = (double) trueCount / allResults.size();
// comments.add("Correct matching rate: " + correctRate);
// NumberFormat nf = NumberFormat.getInstance(Locale.US);
// Duration time = Duration.between(start, stop);
// time = time.plus(previousTime);
// comments.add("Simulation time: " + nf.format(time.toSeconds()) + " seconds");
// for(String s: comments){
// System.out.println(s);
// }
//
//
//
// List<String> headers = new ArrayList<>();
// headers.add("CDR3 alpha");
// headers.add("CDR3 beta");
// headers.add("first matched CDR1");
// headers.add("second matched CDR1");
// headers.add("Correct match?");
//
// MatchingResult firstTest = new MatchingResult(samplePlate.getSourceFileName(),
// comments, headers, allResults, dualMatchesMap, time);
//
// //results for dual map
// System.out.println("RESULTS FOR SECOND PASS MATCHING");
// allResults = new ArrayList<>();
// trueCount = 0;
// iter = dualMatchesMap.keySet().iterator();
// while(iter.hasNext()){
// Boolean proven = false;
// List<String> tmp = new ArrayList<>();
// tmp.add(iter.next().toString());
// tmp.add(iter.next().toString());
// tmp.add(dualMatchesMap.get(Integer.valueOf(tmp.get(0))).toString());
// tmp.add(dualMatchesMap.get(Integer.valueOf(tmp.get(1))).toString());
// if(alphaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(0))).equals(Integer.valueOf(tmp.get(2)))){
// if(betaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(1))).equals(Integer.valueOf(tmp.get(3)))){
// proven = true;
// }
// }
// else if(alphaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(0))).equals(Integer.valueOf(tmp.get(3)))){
// if(betaCDR3toCDR1Map.get(Integer.valueOf(tmp.get(1))).equals(Integer.valueOf(tmp.get(2)))){
// proven = true;
// }
// }
// tmp.add(proven.toString());
// allResults.add(tmp);
// if(proven){
// trueCount++;
// }
// }
//
// comments = new ArrayList<>();
// comments.add("Plate size: " + samplePlate.getSize() + " wells");
// comments.add("Previous pairs found: " + previousMatches.size());
// comments.add("High overlap threshold: " + highThreshold);
// comments.add("Low overlap threshold: " + lowThreshold);
// comments.add("CDR1 matches attempted: " + allResults.size());
// attemptRate = (double) allResults.size() / previousMatches.size();
// comments.add("Matching attempt rate: " + attemptRate);
// comments.add("Number of correct matches: " + trueCount);
// correctRate = (double) trueCount / allResults.size();
// comments.add("Correct matching rate: " + correctRate);
// comments.add("Simulation time: " + nf.format(time.toSeconds()) + " seconds");
//
// for(String s: comments){
// System.out.println(s);
// }
//
// System.out.println("Simulation time: " + nf.format(time.toSeconds()) + " seconds");
// MatchingResult dualTest = new MatchingResult(samplePlate.getSourceFileName(), comments, headers,
// allResults, dualMatchesMap, time);
// MatchingResult[] output = {firstTest, dualTest};
// return output;
// }
//Counts the well occupancy of the row peptides and column peptides into given maps, and
//fills weights in the given 2D array