Files
BiGpairSEQ/src/main/java/InteractiveInterface.java
2022-02-26 10:36:00 -06:00

590 lines
29 KiB
Java

import java.io.IOException;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
//
public class InteractiveInterface {
private static final Random rand = BiGpairSEQ.getRand();
private static final Scanner sc = new Scanner(System.in);
private static int input;
private static boolean quit = false;
public static void startInteractive() {
while (!quit) {
System.out.println();
System.out.println("--------BiGPairSEQ SIMULATOR--------");
System.out.println("ALPHA/BETA T CELL RECEPTOR MATCHING");
System.out.println(" USING WEIGHTED BIPARTITE GRAPHS ");
System.out.println("------------------------------------");
System.out.println("Please select an option:");
System.out.println("1) Generate a population of distinct cells");
System.out.println("2) Generate a sample plate of T cells");
System.out.println("3) Generate CDR3 alpha/beta occupancy data and overlap graph");
System.out.println("4) Simulate bipartite graph CDR3 alpha/beta matching (BiGpairSEQ)");
//Need to re-do the CDR3/CDR1 matching to correspond to new pattern
//System.out.println("5) Generate CDR3/CDR1 occupancy graph");
//System.out.println("6) Simulate CDR3/CDR1 T cell matching");
System.out.println("8) Options");
System.out.println("9) About/Acknowledgments");
System.out.println("0) Exit");
try {
input = sc.nextInt();
switch (input) {
case 1 -> makeCells();
case 2 -> makePlate();
case 3 -> makeCDR3Graph();
case 4 -> matchCDR3s();
//case 6 -> matchCellsCDR1();
case 8 -> mainOptions();
case 9 -> acknowledge();
case 0 -> quit = true;
default -> System.out.println("Invalid input.");
}
} catch (InputMismatchException | IOException ex) {
System.out.println(ex);
sc.next();
}
}
sc.close();
}
private static void makeCells() {
String filename = null;
Integer numCells = 0;
Integer cdr1Freq = 1;
try {
System.out.println("\nSimulated T-Cells consist of integer values representing:\n" +
"* a pair of alpha and beta CDR3 peptides (unique within simulated population)\n" +
"* a pair of alpha and beta CDR1 peptides (not necessarily unique).");
System.out.println("\nThe cells will be written to a CSV file.");
System.out.print("Please enter a file name: ");
filename = sc.next();
System.out.println("\nCDR3 sequences are more diverse than CDR1 sequences.");
System.out.println("Please enter the factor by which distinct CDR3s outnumber CDR1s: ");
cdr1Freq = sc.nextInt();
System.out.print("\nPlease enter the number of T-cells to generate: ");
numCells = sc.nextInt();
if(numCells <= 0){
throw new InputMismatchException("Number of cells must be a positive integer.");
}
} catch (InputMismatchException ex) {
System.out.println(ex);
sc.next();
}
CellSample sample = new CellSample(numCells, cdr1Freq);
assert filename != null;
System.out.println("Writing cells to file");
CellFileWriter writer = new CellFileWriter(filename, sample);
writer.writeCellsToFile();
System.out.println("Cell sample written to: " + filename);
if(BiGpairSEQ.cacheCells()) {
BiGpairSEQ.setCellSampleInMemory(sample, filename);
}
}
//Output a CSV of sample plate
private static void makePlate() {
String cellFile = null;
String filename = null;
Double stdDev = 0.0;
Integer numWells = 0;
Integer numSections;
Integer[] populations = {1};
Double dropOutRate = 0.0;
boolean poisson = false;
boolean exponential = false;
double lambda = 1.5;
try {
System.out.println("\nSimulated sample plates consist of:");
System.out.println("* a number of wells");
System.out.println(" * separated into one or more sections");
System.out.println(" * each of which has a set quantity of cells per well");
System.out.println(" * selected from a statistical distribution of distinct cells");
System.out.println(" * with a set dropout rate for individual sequences within a cell");
System.out.println("\nMaking a sample plate requires a population of distinct cells");
System.out.print("Please enter name of an existing cell sample file: ");
cellFile = sc.next();
System.out.println("\nThe sample plate will be written to a CSV file");
System.out.print("Please enter a name for the output file: ");
filename = sc.next();
System.out.println("\nSelect T-cell frequency distribution function");
System.out.println("1) Poisson");
System.out.println("2) Gaussian");
System.out.println("3) Exponential");
System.out.println("(Note: approximate distribution in original paper is exponential, lambda = 0.6)");
System.out.println("(lambda value approximated from slope of log-log graph in figure 4c)");
System.out.println("(Note: wider distributions are more memory intensive to match)");
System.out.print("Enter selection value: ");
input = sc.nextInt();
switch (input) {
case 1 -> poisson = true;
case 2 -> {
System.out.println("How many distinct T-cells within one standard deviation of peak frequency?");
System.out.println("(Note: wider distributions are more memory intensive to match)");
stdDev = sc.nextDouble();
if (stdDev <= 0.0) {
throw new InputMismatchException("Value must be positive.");
}
}
case 3 -> {
exponential = true;
System.out.print("Please enter lambda value for exponential distribution: ");
lambda = sc.nextDouble();
if (lambda <= 0.0) {
lambda = 0.6;
System.out.println("Value must be positive. Defaulting to 0.6.");
}
}
default -> {
System.out.println("Invalid input. Defaulting to exponential.");
exponential = true;
}
}
System.out.print("\nNumber of wells on plate: ");
numWells = sc.nextInt();
if(numWells < 1){
throw new InputMismatchException("No wells on plate");
}
//choose whether to make T cell population/well random
boolean randomWellPopulations;
System.out.println("Randomize number of T cells in each well? (y/n)");
String ans = sc.next();
Pattern pattern = Pattern.compile("(?:yes|y)", Pattern.CASE_INSENSITIVE);
Matcher matcher = pattern.matcher(ans);
if(matcher.matches()){
randomWellPopulations = true;
}
else{
randomWellPopulations = false;
}
if(randomWellPopulations) { //if T cell population/well is random
numSections = numWells;
Integer minPop;
Integer maxPop;
System.out.print("Please enter minimum number of T cells in a well: ");
minPop = sc.nextInt();
if(minPop < 1) {
throw new InputMismatchException("Minimum well population must be positive");
}
System.out.println("Please enter maximum number of T cells in a well: ");
maxPop = sc.nextInt();
if(maxPop < minPop) {
throw new InputMismatchException("Max well population must be greater than min well population");
}
//maximum should be inclusive, so need to add one to max of randomly generated values
populations = rand.ints(minPop, maxPop + 1)
.limit(numSections)
.boxed()
.toArray(Integer[]::new);
System.out.print("Populations: ");
System.out.println(Arrays.toString(populations));
}
else{ //if T cell population/well is not random
System.out.println("\nThe plate can be evenly sectioned to allow different numbers of T cells per well.");
System.out.println("How many sections would you like to make (minimum 1)?");
numSections = sc.nextInt();
if (numSections < 1) {
throw new InputMismatchException("Too few sections.");
} else if (numSections > numWells) {
throw new InputMismatchException("Cannot have more sections than wells.");
}
int i = 1;
populations = new Integer[numSections];
while (numSections > 0) {
System.out.print("Enter number of T cells per well in section " + i + ": ");
populations[i - 1] = sc.nextInt();
i++;
numSections--;
}
}
System.out.println("\nErrors in amplification can induce a well dropout rate for sequences");
System.out.print("Enter well dropout rate (0.0 to 1.0): ");
dropOutRate = sc.nextDouble();
if(dropOutRate < 0.0 || dropOutRate > 1.0) {
throw new InputMismatchException("The well dropout rate must be in the range [0.0, 1.0]");
}
}catch(InputMismatchException ex){
System.out.println(ex);
sc.next();
}
assert cellFile != null;
CellSample cells;
if (cellFile.equals(BiGpairSEQ.getCellFilename())){
cells = BiGpairSEQ.getCellSampleInMemory();
}
else {
System.out.println("Reading Cell Sample file: " + cellFile);
CellFileReader cellReader = new CellFileReader(cellFile);
cells = cellReader.getCellSample();
if(BiGpairSEQ.cacheCells()) {
BiGpairSEQ.setCellSampleInMemory(cells, cellFile);
}
}
assert filename != null;
Plate samplePlate;
PlateFileWriter writer;
if(exponential){
samplePlate = new Plate(numWells, dropOutRate, populations);
samplePlate.fillWellsExponential(cellFile, cells.getCells(), lambda);
writer = new PlateFileWriter(filename, samplePlate);
}
else {
if (poisson) {
stdDev = Math.sqrt(cells.getCellCount()); //gaussian with square root of elements approximates poisson
}
samplePlate = new Plate(numWells, dropOutRate, populations);
samplePlate.fillWells(cellFile, cells.getCells(), stdDev);
writer = new PlateFileWriter(filename, samplePlate);
}
System.out.println("Writing Sample Plate to file");
writer.writePlateFile();
System.out.println("Sample Plate written to file: " + filename);
if(BiGpairSEQ.cachePlate()) {
BiGpairSEQ.setPlateInMemory(samplePlate, filename);
}
}
//Output serialized binary of GraphAndMapData object
private static void makeCDR3Graph() {
String filename = null;
String cellFile = null;
String plateFile = null;
try {
String str = "\nGenerating bipartite weighted graph encoding occupancy overlap data ";
str = str.concat("\nrequires a cell sample file and a sample plate file.");
System.out.println(str);
System.out.print("\nPlease enter name of an existing cell sample file: ");
cellFile = sc.next();
System.out.print("\nPlease enter name of an existing sample plate file: ");
plateFile = sc.next();
System.out.println("\nThe graph and occupancy data will be written to a serialized binary file.");
System.out.print("Please enter a name for the output file: ");
filename = sc.next();
} catch (InputMismatchException ex) {
System.out.println(ex);
sc.next();
}
assert cellFile != null;
CellSample cellSample;
//check if cells are already in memory
if(cellFile.equals(BiGpairSEQ.getCellFilename()) && BiGpairSEQ.getCellSampleInMemory() != null) {
cellSample = BiGpairSEQ.getCellSampleInMemory();
}
else {
System.out.println("Reading Cell Sample file: " + cellFile);
CellFileReader cellReader = new CellFileReader(cellFile);
cellSample = cellReader.getCellSample();
if(BiGpairSEQ.cacheCells()) {
BiGpairSEQ.setCellSampleInMemory(cellSample, cellFile);
}
}
assert plateFile != null;
Plate plate;
//check if plate is already in memory
if(plateFile.equals(BiGpairSEQ.getPlateFilename())){
plate = BiGpairSEQ.getPlateInMemory();
}
else {
System.out.println("Reading Sample Plate file: " + plateFile);
PlateFileReader plateReader = new PlateFileReader(plateFile);
plate = new Plate(plateReader.getFilename(), plateReader.getWells());
if(BiGpairSEQ.cachePlate()) {
BiGpairSEQ.setPlateInMemory(plate, plateFile);
}
}
if (cellSample.getCells().size() == 0){
System.out.println("No cell sample found.");
System.out.println("Returning to main menu.");
}
else if(plate.getWells().size() == 0 || plate.getPopulations().length == 0){
System.out.println("No sample plate found.");
System.out.println("Returning to main menu.");
}
else{
List<Integer[]> cells = cellSample.getCells();
GraphWithMapData data = Simulator.makeGraph(cells, plate, true);
assert filename != null;
if(BiGpairSEQ.outputBinary()) {
GraphDataObjectWriter dataWriter = new GraphDataObjectWriter(filename, data);
dataWriter.writeDataToFile();
System.out.println("Serialized binary graph/data file written to: " + filename);
}
if(BiGpairSEQ.outputGraphML()) {
GraphMLFileWriter graphMLWriter = new GraphMLFileWriter(filename, data);
graphMLWriter.writeGraphToFile();
System.out.println("GraphML file written to: " + filename);
}
if(BiGpairSEQ.cacheGraph()) {
BiGpairSEQ.setGraphInMemory(data, filename);
}
}
}
//Simulate matching and output CSV file of results
private static void matchCDR3s() throws IOException {
String filename = null;
String graphFilename = null;
Integer lowThreshold = 0;
Integer highThreshold = Integer.MAX_VALUE;
Integer maxOccupancyDiff = Integer.MAX_VALUE;
Integer minOverlapPercent = 0;
try {
System.out.println("\nBiGpairSEQ simulation requires an occupancy data and overlap graph file");
System.out.println("Please enter name of an existing graph and occupancy data file: ");
graphFilename = sc.next();
System.out.println("The matching results will be written to a file.");
System.out.print("Please enter a name for the output file: ");
filename = sc.next();
System.out.println("\nWhat is the minimum number of CDR3 alpha/beta overlap wells to attempt matching?");
lowThreshold = sc.nextInt();
if(lowThreshold < 1){
lowThreshold = 1;
System.out.println("Value for low occupancy overlap threshold must be positive");
System.out.println("Value for low occupancy overlap threshold set to 1");
}
System.out.println("\nWhat is the maximum number of CDR3 alpha/beta overlap wells to attempt matching?");
highThreshold = sc.nextInt();
if(highThreshold < lowThreshold) {
highThreshold = lowThreshold;
System.out.println("Value for high occupancy overlap threshold must be >= low overlap threshold");
System.out.println("Value for high occupancy overlap threshold set to " + lowThreshold);
}
System.out.println("What is the minimum percentage of a sequence's wells in alpha/beta overlap to attempt matching? (0 - 100)");
minOverlapPercent = sc.nextInt();
if (minOverlapPercent < 0 || minOverlapPercent > 100) {
System.out.println("Value outside range. Minimum occupancy overlap percentage set to 0");
}
System.out.println("\nWhat is the maximum difference in alpha/beta occupancy to attempt matching?");
maxOccupancyDiff = sc.nextInt();
if (maxOccupancyDiff < 0) {
maxOccupancyDiff = 0;
System.out.println("Maximum allowable difference in alpha/beta occupancy must be nonnegative");
System.out.println("Maximum allowable difference in alpha/beta occupancy set to 0");
}
} catch (InputMismatchException ex) {
System.out.println(ex);
sc.next();
}
assert graphFilename != null;
//check if this is the same graph we already have in memory.
GraphWithMapData data;
if(graphFilename.equals(BiGpairSEQ.getGraphFilename())) {
data = BiGpairSEQ.getGraphInMemory();
}
else {
GraphDataObjectReader dataReader = new GraphDataObjectReader(graphFilename);
data = dataReader.getData();
if(BiGpairSEQ.cacheGraph()) {
BiGpairSEQ.setGraphInMemory(data, graphFilename);
}
}
//simulate matching
MatchingResult results = Simulator.matchCDR3s(data, graphFilename, lowThreshold, highThreshold, maxOccupancyDiff,
minOverlapPercent, true);
//write results to file
assert filename != null;
MatchingFileWriter writer = new MatchingFileWriter(filename, results);
System.out.println("Writing results to file");
writer.writeResultsToFile();
System.out.println("Results written to file: " + filename);
}
///////
//Rewrite this to fit new matchCDR3 method with file I/O
///////
// public static void matchCellsCDR1(){
// /*
// The idea here is that we'll get the CDR3 alpha/beta matches first. Then we'll try to match CDR3s to CDR1s by
// looking at the top two matches for each CDR3. If CDR3s in the same cell simply swap CDR1s, we assume a correct
// match
// */
// String filename = null;
// String preliminaryResultsFilename = null;
// String cellFile = null;
// String plateFile = null;
// Integer lowThresholdCDR3 = 0;
// Integer highThresholdCDR3 = Integer.MAX_VALUE;
// Integer maxOccupancyDiffCDR3 = 96; //no filtering if max difference is all wells by default
// Integer minOverlapPercentCDR3 = 0; //no filtering if min percentage is zero by default
// Integer lowThresholdCDR1 = 0;
// Integer highThresholdCDR1 = Integer.MAX_VALUE;
// boolean outputCDR3Matches = false;
// try {
// System.out.println("\nSimulated experiment requires a cell sample file and a sample plate file.");
// System.out.print("Please enter name of an existing cell sample file: ");
// cellFile = sc.next();
// System.out.print("Please enter name of an existing sample plate file: ");
// plateFile = sc.next();
// System.out.println("The matching results will be written to a file.");
// System.out.print("Please enter a name for the output file: ");
// filename = sc.next();
// System.out.println("What is the minimum number of CDR3 alpha/beta overlap wells to attempt matching?");
// lowThresholdCDR3 = sc.nextInt();
// if(lowThresholdCDR3 < 1){
// throw new InputMismatchException("Minimum value for low threshold is 1");
// }
// System.out.println("What is the maximum number of CDR3 alpha/beta overlap wells to attempt matching?");
// highThresholdCDR3 = sc.nextInt();
// System.out.println("What is the maximum difference in CDR3 alpha/beta occupancy to attempt matching?");
// maxOccupancyDiffCDR3 = sc.nextInt();
// System.out.println("What is the minimum CDR3 overlap percentage to attempt matching? (0 - 100)");
// minOverlapPercentCDR3 = sc.nextInt();
// if (minOverlapPercentCDR3 < 0 || minOverlapPercentCDR3 > 100) {
// throw new InputMismatchException("Value outside range. Minimum percent set to 0");
// }
// System.out.println("What is the minimum number of CDR3/CDR1 overlap wells to attempt matching?");
// lowThresholdCDR1 = sc.nextInt();
// if(lowThresholdCDR1 < 1){
// throw new InputMismatchException("Minimum value for low threshold is 1");
// }
// System.out.println("What is the maximum number of CDR3/CDR1 overlap wells to attempt matching?");
// highThresholdCDR1 = sc.nextInt();
// System.out.println("Matching CDR3s to CDR1s requires first matching CDR3 alpha/betas.");
// System.out.println("Output a file for CDR3 alpha/beta match results as well?");
// System.out.print("Please enter y/n: ");
// String ans = sc.next();
// Pattern pattern = Pattern.compile("(?:yes|y)", Pattern.CASE_INSENSITIVE);
// Matcher matcher = pattern.matcher(ans);
// if(matcher.matches()){
// outputCDR3Matches = true;
// System.out.println("Please enter filename for CDR3 alpha/beta match results");
// preliminaryResultsFilename = sc.next();
// System.out.println("CDR3 alpha/beta matches will be output to file");
// }
// else{
// System.out.println("CDR3 alpha/beta matches will not be output to file");
// }
// } catch (InputMismatchException ex) {
// System.out.println(ex);
// sc.next();
// }
// CellFileReader cellReader = new CellFileReader(cellFile);
// PlateFileReader plateReader = new PlateFileReader(plateFile);
// Plate plate = new Plate(plateReader.getFilename(), plateReader.getWells());
// if (cellReader.getCells().size() == 0){
// System.out.println("No cell sample found.");
// System.out.println("Returning to main menu.");
// }
// else if(plate.getWells().size() == 0){
// System.out.println("No sample plate found.");
// System.out.println("Returning to main menu.");
//
// }
// else{
// if(highThresholdCDR3 >= plate.getSize()){
// highThresholdCDR3 = plate.getSize() - 1;
// }
// if(highThresholdCDR1 >= plate.getSize()){
// highThresholdCDR1 = plate.getSize() - 1;
// }
// List<Integer[]> cells = cellReader.getCells();
// MatchingResult preliminaryResults = Simulator.matchCDR3s(cells, plate, lowThresholdCDR3, highThresholdCDR3,
// maxOccupancyDiffCDR3, minOverlapPercentCDR3, true);
// MatchingResult[] results = Simulator.matchCDR1s(cells, plate, lowThresholdCDR1,
// highThresholdCDR1, preliminaryResults);
// MatchingFileWriter writer = new MatchingFileWriter(filename + "_FirstPass", results[0]);
// writer.writeResultsToFile();
// writer = new MatchingFileWriter(filename + "_SecondPass", results[1]);
// writer.writeResultsToFile();
// if(outputCDR3Matches){
// writer = new MatchingFileWriter(preliminaryResultsFilename, preliminaryResults);
// writer.writeResultsToFile();
// }
// }
// }
private static void mainOptions(){
boolean backToMain = false;
while(!backToMain) {
System.out.println("\n--------------OPTIONS---------------");
System.out.println("1) Turn " + getOnOff(!BiGpairSEQ.cacheCells()) + " cell sample file caching");
System.out.println("2) Turn " + getOnOff(!BiGpairSEQ.cachePlate()) + " plate file caching");
System.out.println("3) Turn " + getOnOff(!BiGpairSEQ.cacheGraph()) + " graph/data file caching");
System.out.println("4) Turn " + getOnOff(!BiGpairSEQ.outputBinary()) + " serialized binary graph output");
System.out.println("5) Turn " + getOnOff(!BiGpairSEQ.outputGraphML()) + " GraphML graph output");
System.out.println("6) Maximum weight matching algorithm options");
System.out.println("0) Return to main menu");
try {
input = sc.nextInt();
switch (input) {
case 1 -> BiGpairSEQ.setCacheCells(!BiGpairSEQ.cacheCells());
case 2 -> BiGpairSEQ.setCachePlate(!BiGpairSEQ.cachePlate());
case 3 -> BiGpairSEQ.setCacheGraph(!BiGpairSEQ.cacheGraph());
case 4 -> BiGpairSEQ.setOutputBinary(!BiGpairSEQ.outputBinary());
case 5 -> BiGpairSEQ.setOutputGraphML(!BiGpairSEQ.outputGraphML());
case 6 -> algorithmOptions();
case 0 -> backToMain = true;
default -> System.out.println("Invalid input");
}
} catch (InputMismatchException ex) {
System.out.println(ex);
sc.next();
}
}
}
/**
* Helper function for printing menu items in mainOptions(). Returns a string based on the value of parameter.
*
* @param b - a boolean value
* @return String "on" if b is true, "off" if b is false
*/
private static String getOnOff(boolean b) {
if (b) { return "on";}
else { return "off"; }
}
private static void algorithmOptions(){
boolean backToOptions = false;
while(!backToOptions) {
System.out.println("\n---------ALGORITHM OPTIONS----------");
System.out.println("1) Use scaling algorithm by Duan and Su.");
System.out.println("2) Use LEDA book algorithm with Fibonacci heap priority queue");
System.out.println("3) Use LEDA book algorithm with pairing heap priority queue");
System.out.println("0) Return to Options menu");
try {
input = sc.nextInt();
switch (input) {
case 1 -> System.out.println("This option is not yet implemented. Choose another.");
case 2 -> {
BiGpairSEQ.setFibonacciHeap();
System.out.println("MWM algorithm set to LEDA with Fibonacci heap");
backToOptions = true;
}
case 3 -> {
BiGpairSEQ.setPairingHeap();
System.out.println("MWM algorithm set to LEDA with pairing heap");
backToOptions = true;
}
case 0 -> backToOptions = true;
default -> System.out.println("Invalid input");
}
} catch (InputMismatchException ex) {
System.out.println(ex);
sc.next();
}
}
}
private static void acknowledge(){
System.out.println("This program simulates BiGpairSEQ, a graph theory based adaptation");
System.out.println("of the pairSEQ algorithm for pairing T cell receptor sequences.");
System.out.println();
System.out.println("For full documentation, view readme.md file distributed with this code");
System.out.println("or visit https://gitea.ejsf.synology.me/efischer/BiGpairSEQ.");
System.out.println();
System.out.println("pairSEQ citation:");
System.out.println("Howie, B., Sherwood, A. M., et. al.");
System.out.println("High-throughput pairing of T cell receptor alpha and beta sequences.");
System.out.println("Sci. Transl. Med. 7, 301ra131 (2015)");
System.out.println();
System.out.println("BiGpairSEQ_Sim by Eugene Fischer, 2021-2022");
}
}