refactor to construct the bipartite graph directly, rather than by using an adjacency matrix and a graph generator.

This commit is contained in:
eugenefischer
2025-04-10 11:47:15 -05:00
parent bcf5a4c749
commit e1888a99c6
2 changed files with 83 additions and 44 deletions

View File

@@ -241,7 +241,7 @@ public class Plate {
sequencesAndMisreads.put(currentSequence, new ArrayList<>()); sequencesAndMisreads.put(currentSequence, new ArrayList<>());
} }
//The specific misread hasn't happened before //The specific misread hasn't happened before
if (rand.nextDouble() >= errorCollisionRate || sequencesAndMisreads.get(currentSequence).size() == 0) { if (rand.nextDouble() >= errorCollisionRate || sequencesAndMisreads.get(currentSequence).isEmpty()) {
//The misread doesn't collide with a real sequence already on the plate and some sequences have already been read //The misread doesn't collide with a real sequence already on the plate and some sequences have already been read
if(rand.nextDouble() >= realSequenceCollisionRate || !sequenceMap.isEmpty()){ if(rand.nextDouble() >= realSequenceCollisionRate || !sequenceMap.isEmpty()){
StringBuilder spurious = new StringBuilder(currentSequence); StringBuilder spurious = new StringBuilder(currentSequence);

View File

@@ -70,58 +70,97 @@ public class Simulator implements GraphModificationFunctions {
if(verbose){System.out.println("Total beta sequence wells removed: " + betaWellsRemoved);} if(verbose){System.out.println("Total beta sequence wells removed: " + betaWellsRemoved);}
} }
//construct the graph. For simplicity, going to make /*
if(verbose){System.out.println("Making vertex maps");} * The commented out code below works beautifully for small enough graphs. However, after implementing a
//For the SimpleWeightedBipartiteGraphMatrixGenerator, all vertices must have * Zipf distribution and attempting to simulate Experiment 3 from the paper again, I discovered that
//distinct numbers associated with them. Since I'm using a 2D array, that means * this method uses too much memory. Even a 120GB heap is not enough to build this adjacency matrix.
//distinct indices between the rows and columns. vertexStartValue lets me track where I switch * So I'm going to attempt to build this graph directly and see if that is less memory intensive
//from numbering rows to columns, so I can assign unique numbers to every vertex, and then */
//subtract the vertexStartValue from betas to use their vertex labels as array indices // //construct the graph. For simplicity, going to make
int vertexStartValue = 0; // if(verbose){System.out.println("Making vertex maps");}
//keys are sequential integer vertices, values are alphas // //For the SimpleWeightedBipartiteGraphMatrixGenerator, all vertices must have
Map<String, Integer> plateAtoVMap = makeSequenceToVertexMap(alphaSequences, vertexStartValue); // //distinct numbers associated with them. Since I'm using a 2D array, that means
//new start value for vertex to beta map should be one more than final vertex value in alpha map // //distinct indices between the rows and columns. vertexStartValue lets me track where I switch
vertexStartValue += plateAtoVMap.size(); // //from numbering rows to columns, so I can assign unique numbers to every vertex, and then
//keys are betas, values are sequential integers // //subtract the vertexStartValue from betas to use their vertex labels as array indices
Map<String, Integer> plateBtoVMap = makeSequenceToVertexMap(betaSequences, vertexStartValue); // int vertexStartValue = 0;
if(verbose){System.out.println("Vertex maps made");} // //keys are sequential integer vertices, values are alphas
//make adjacency matrix for bipartite graph generator // Map<String, Integer> plateAtoVMap = makeSequenceToVertexMap(alphaSequences, vertexStartValue);
//(technically this is only 1/4 of an adjacency matrix, but that's all you need // //new start value for vertex to beta map should be one more than final vertex value in alpha map
//for a bipartite graph, and all the SimpleWeightedBipartiteGraphMatrixGenerator class expects.) // vertexStartValue += plateAtoVMap.size();
if(verbose){System.out.println("Making adjacency matrix");} // //keys are betas, values are sequential integers
double[][] weights = new double[plateAtoVMap.size()][plateBtoVMap.size()]; // Map<String, Integer> plateBtoVMap = makeSequenceToVertexMap(betaSequences, vertexStartValue);
fillAdjacencyMatrix(weights, vertexStartValue, alphaSequences, betaSequences, plateAtoVMap, plateBtoVMap); // if(verbose){System.out.println("Vertex maps made");}
if(verbose){System.out.println("Adjacency matrix made");} // //make adjacency matrix for bipartite graph generator
// //(technically this is only 1/4 of an adjacency matrix, but that's all you need
// //for a bipartite graph, and all the SimpleWeightedBipartiteGraphMatrixGenerator class expects.)
// if(verbose){System.out.println("Making adjacency matrix");}
// double[][] weights = new double[plateAtoVMap.size()][plateBtoVMap.size()];
// fillAdjacencyMatrix(weights, vertexStartValue, alphaSequences, betaSequences, plateAtoVMap, plateBtoVMap);
// if(verbose){System.out.println("Adjacency matrix made");}
// //make bipartite graph
// if(verbose){System.out.println("Making bipartite weighted graph");}
// //the graph object
// SimpleWeightedGraph<Vertex, DefaultWeightedEdge> graph =
// new SimpleWeightedGraph<>(DefaultWeightedEdge.class);
// //the graph generator
// SimpleWeightedBipartiteGraphMatrixGenerator graphGenerator = new SimpleWeightedBipartiteGraphMatrixGenerator();
// //the list of alpha vertices
// List<Vertex> alphaVertices = new ArrayList<>();
// for (String seq : plateAtoVMap.keySet()) {
// Vertex alphaVertex = new Vertex(alphaSequences.get(seq), plateAtoVMap.get(seq));
// alphaVertices.add(alphaVertex);
// }
// //Sort to make sure the order of vertices in list matches the order of the adjacency matrix
// Collections.sort(alphaVertices);
// //Add ordered list of vertices to the graph
// graphGenerator.first(alphaVertices);
// //the list of beta vertices
// List<Vertex> betaVertices = new ArrayList<>();
// for (String seq : plateBtoVMap.keySet()) {
// Vertex betaVertex = new Vertex(betaSequences.get(seq), plateBtoVMap.get(seq));
// betaVertices.add(betaVertex);
// }
// //Sort to make sure the order of vertices in list matches the order of the adjacency matrix
// Collections.sort(betaVertices);
// //Add ordered list of vertices to the graph
// graphGenerator.second(betaVertices);
// //use adjacency matrix of weight created previously
// graphGenerator.weights(weights);
// graphGenerator.generateGraph(graph);
//make bipartite graph //make bipartite graph
if(verbose){System.out.println("Making bipartite weighted graph");} if(verbose){System.out.println("Making bipartite weighted graph");}
//the graph object //the graph object
SimpleWeightedGraph<Vertex, DefaultWeightedEdge> graph = SimpleWeightedGraph<Vertex, DefaultWeightedEdge> graph =
new SimpleWeightedGraph<>(DefaultWeightedEdge.class); new SimpleWeightedGraph<>(DefaultWeightedEdge.class);
//the graph generator int vertexLabelValue = 0;
SimpleWeightedBipartiteGraphMatrixGenerator graphGenerator = new SimpleWeightedBipartiteGraphMatrixGenerator(); //create and add alpha sequence vertices
//the list of alpha vertices
List<Vertex> alphaVertices = new ArrayList<>(); List<Vertex> alphaVertices = new ArrayList<>();
for (String seq : plateAtoVMap.keySet()) { for (Map.Entry<String, SequenceRecord> entry: alphaSequences.entrySet()) {
Vertex alphaVertex = new Vertex(alphaSequences.get(seq), plateAtoVMap.get(seq)); alphaVertices.add(new Vertex(entry.getValue(), vertexLabelValue));
alphaVertices.add(alphaVertex); vertexLabelValue++;
} }
//Sort to make sure the order of vertices in list matches the order of the adjacency matrix alphaVertices.forEach(graph::addVertex);
Collections.sort(alphaVertices); //add beta sequence vertices
//Add ordered list of vertices to the graph
graphGenerator.first(alphaVertices);
//the list of beta vertices
List<Vertex> betaVertices = new ArrayList<>(); List<Vertex> betaVertices = new ArrayList<>();
for (String seq : plateBtoVMap.keySet()) { for (Map.Entry<String, SequenceRecord> entry: betaSequences.entrySet()) {
Vertex betaVertex = new Vertex(betaSequences.get(seq), plateBtoVMap.get(seq)); betaVertices.add(new Vertex(entry.getValue(), vertexLabelValue));
betaVertices.add(betaVertex); vertexLabelValue++;
} }
//Sort to make sure the order of vertices in list matches the order of the adjacency matrix betaVertices.forEach(graph::addVertex);
Collections.sort(betaVertices); //add edges
//Add ordered list of vertices to the graph for(Vertex alpha: alphaVertices){
graphGenerator.second(betaVertices); for(Vertex beta: betaVertices) {
//use adjacency matrix of weight created previously Set<Integer> sharedWells = alpha.getRecord().getWells();
graphGenerator.weights(weights); sharedWells.retainAll(beta.getRecord().getWells());
graphGenerator.generateGraph(graph); if(!sharedWells.isEmpty()) {
DefaultWeightedEdge edge = graph.addEdge(alpha, beta);
graph.setEdgeWeight(edge, sharedWells.size());
}
}
}
if(verbose){System.out.println("Graph created");} if(verbose){System.out.println("Graph created");}
//stop timing //stop timing
Instant stop = Instant.now(); Instant stop = Instant.now();