org.geneweaver.io.writer.ExportBuilder Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of gweaver-stream-io Show documentation
Show all versions of gweaver-stream-io Show documentation
The IO bundle for Geneweaver.
package org.geneweaver.io.writer;
import java.io.BufferedWriter;
import java.io.PrintStream;
import java.nio.file.Path;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.concurrent.CountDownLatch;
import java.util.function.Function;
import java.util.stream.Stream;
import org.geneweaver.domain.Entity;
import org.geneweaver.io.DirectSave;
import org.geneweaver.io.Timer;
import org.geneweaver.io.connector.Connector;
import org.geneweaver.io.reader.ReaderException;
import org.geneweaver.io.reader.ReaderFactory;
import org.geneweaver.io.reader.ReaderRequest;
import org.geneweaver.io.reader.StreamReader;
import com.fasterxml.jackson.annotation.JsonIgnore;
/**
* Builder for bulk exports.
*
* @author gerrim
*
*/
public class ExportBuilder implements AutoCloseable {
/**
* The directory in which to export
*/
private Path dir;
/**
* Iterable of paths from which we will export.
*/
private Iterable inputs;
/**
* If the connectors are different from the default connector.
* A connector must be a org.geneweaver.io.connector.Connector
* or a Function which returns a stream of entities when passed
* an entity.
*/
private Collection> connectors;
/**
* The chunk size if there is none on the command line.
*/
private int defaultChunkSize = 4096;
/**
* Consumer for running the export. By default the export does a
* simple save using the default connector.
*/
@JsonIgnore
private Export exporter = (builder, path) -> defaultExport(path, false);
/**
* The value of the -c command line
*/
private String chunkProperty;
/**
*
*/
private String species;
/**
* Set to always add the default connector as the first connector when
* making the connector list. If addConnector(..) has not been used,
* this setting does nothing as the default connector will be used anyway,
* however if
*/
private boolean alwaysUseDefaultConnector = false;
/**
* If there are multiple files, when calling export a
* parallel exporter will run each file with a separate thread.
*/
private boolean parallelFiles = false;
/**
* Stream for printing messages of each export run.
*/
@JsonIgnore
private PrintStream out = System.out;
private boolean verbose = false;
/**
* Map of writers cached while we write all the files.
*/
@JsonIgnore
private Map, Map> writers = Collections.synchronizedMap(new HashMap<>());
@JsonIgnore
private Map, Map> paths = Collections.synchronizedMap(new HashMap<>());
private Collection errors = new LinkedList<>();
public ExportBuilder() {
}
public void export() throws Exception {
try {
if (isParallelFiles()) {
parallelExport();
} else {
singleThreadExport();
}
} catch (Exception ne) {
errors.add(ne);
throw ne;
}
}
private void singleThreadExport() throws Exception {
// Process one or more paths into the bulk file.
for (Path input : inputs) {
String message = exporter.export(this, input);
out.println(message);
}
}
private void parallelExport() throws InterruptedException {
ThreadGroup pool = new ThreadGroup("Exporters");
// Have to use list as inputs is just an iterator and
// does not know its size.
List latches = new LinkedList<>();
// Mostly the number of files is around 23 for all the chromosome
// files. 23 long running threads should be reasonably efficient without
// using executor service. If swapping small tasks would use.
for (Path input : inputs) {
CountDownLatch latch = new CountDownLatch(1);
latches.add(latch);
Thread thread = new Thread(pool, ()->exportQuietly(input, latch));
thread.setName("Export "+input.getFileName());
thread.start();
}
for (CountDownLatch latch : latches) {
latch.await();
}
}
private void exportQuietly(Path input, CountDownLatch count) {
try {
exporter.export(this, input);
} catch (Exception e) {
errors.add(e);
} finally {
count.countDown();
}
}
public String status() {
if (errors.isEmpty()) return "Complete";
String message = "";
for (Throwable err : errors) {
err.printStackTrace(out);
message = message+err.getMessage()+"\n";
}
return message;
}
/**
* Default save stream the reader, gets its connector and writes the lot to file.
*
* @param input
* @throws ReaderException
*/
protected String defaultExport(Path input, boolean append) throws Exception {
if (isVerbose() && getOut()!=null) {
getOut().println("Input file: "+input);
}
StreamReader reader = createReader(input);
Collection>> conns = getConnnectors(reader);
if (isVerbose() && getOut()!=null) {
getOut().println("Input file: "+input);
getOut().println("There are "+conns.size()+" connectors");
for (Function> c : conns) {
getOut().println("Connector type: "+c.getClass().getName());
boolean isConnector = (c instanceof Connector);
getOut().println("Conector instance of 'Connector' class: "+isConnector);
}
}
try (DirectSave saver = new DirectSave(getOut(), isVerbose())) {
Timer timer = createTimer();
Stream stream = reader.stream();
for (Function> c : conns) {
boolean isConnector = (c instanceof Connector);
if (isVerbose() && isConnector) {
Connector conn = (Connector)c;
stream = stream.flatMap(g->conn.stream(g, null, getOut()));
} else {
stream = stream.flatMap(g->c.apply(g));
}
}
long saved = stream.map(g->saver.save(g, paths, writers, dir, timer, append))
.count();
return "Wrote bulk file(s) for '"+input.getFileName()+"' in "+timer.getFormattedTime()+" parsed "+saved+" objects.";
}
}
private Collection>> getConnnectors(StreamReader reader) {
Collection>> conns = null;
if (this.connectors==null || this.connectors.isEmpty()) {
Function> def = reader.getDefaultConnector();
conns = Arrays.asList(def);
} else {
conns = new LinkedList<>();
if (isAlwaysUseDefaultConnector()) {
conns.add(reader.getDefaultConnector());
}
for (Function, ?> function : this.connectors) {
@SuppressWarnings("unchecked")
// If you add a function which cannot be cast
Function> cast = (Function>)function;
conns.add(cast);
}
}
return conns;
}
/**
* Create a
* @param input
* @return
* @throws ReaderException
*/
public StreamReader createReader(Path input) throws ReaderException {
StreamReader reader = ReaderFactory.getReader(new ReaderRequest(species, input.toFile()));
// With direct streams, chunk size does little.
reader.setChunkSize(chunkSize());
return reader;
}
/**
* @return the dir
*/
public Path getDir() {
return dir;
}
/**
* @param dir the dir to set
*/
public ExportBuilder setDir(Path dir) {
this.dir = dir;
return this;
}
/**
* @return the inputs
*/
public Iterable getInputs() {
return inputs;
}
/**
* @param inputs the inputs to set
*/
public ExportBuilder setInputs(Iterable inputs) {
this.inputs = inputs;
return this;
}
/**
* @param inputs the inputs to set
*/
public ExportBuilder setInput(Path input) {
this.inputs = Arrays.asList(input);
return this;
}
/**
* @param inputs the inputs to set
*/
public ExportBuilder addConnector(Function,?> conn) {
if (this.connectors==null) this.connectors = new LinkedList<>();
this.connectors.add(conn);
return this;
}
/**
* @return the defaultChunkSize
*/
public int getDefaultChunkSize() {
return defaultChunkSize;
}
/**
* @param defaultChunkSize the defaultChunkSize to set
*/
public ExportBuilder setDefaultChunkSize(int defaultChunkSize) {
this.defaultChunkSize = defaultChunkSize;
return this;
}
@Override
public void close() throws Exception {
for (Map brs : writers.values()) {
for (BufferedWriter writer : brs.values()) {
writer.close();
}
}
}
/**
* @return the writers
*/
@JsonIgnore
public Map, Map> getWriters() {
return writers;
}
/**
* @return the writers
*/
@JsonIgnore
public Map, Map> getPaths() {
return paths;
}
/**
* @return the exporter
*/
@JsonIgnore
public Export getExporter() {
return exporter;
}
/**
* @param exporter the exporter to set
*/
@JsonIgnore
public ExportBuilder setExporter(Export exporter) {
this.exporter = exporter;
return this;
}
/**
* Create a simple timer.
* @return
*/
public Timer createTimer() {
Timer timer = new Timer(); // Just used as timer.
int chunkSize = chunkSize();
timer.setTimedChunkSize(chunkSize);
return timer;
}
/**
* The chunk size.
* @return
*/
public int chunkSize() {
String c = chunkProperty!=null ? chunkProperty : String.valueOf(defaultChunkSize);
return Integer.parseInt(c);
}
/**
* @return the chunkProperty
*/
public String getChunkProperty() {
return chunkProperty;
}
/**
* @param chunkProperty the chunkProperty to set
*/
public ExportBuilder setChunkProperty(String chunkProperty) {
this.chunkProperty = chunkProperty;
return this;
}
/**
* @return the species
*/
public String getSpecies() {
return species;
}
/**
* @param species the species to set
*/
public ExportBuilder setSpecies(String species) {
this.species = species;
return this;
}
@Override
public int hashCode() {
return Objects.hash(chunkProperty, defaultChunkSize, dir, inputs, species);
}
@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if (!(obj instanceof ExportBuilder))
return false;
ExportBuilder other = (ExportBuilder) obj;
return Objects.equals(chunkProperty, other.chunkProperty) && defaultChunkSize == other.defaultChunkSize
&& Objects.equals(dir, other.dir) && Objects.equals(inputs, other.inputs)
&& Objects.equals(species, other.species);
}
/**
* @return the out
*/
@JsonIgnore
public PrintStream getOut() {
return out;
}
/**
* @param out the out to set
*/
@JsonIgnore
public ExportBuilder setOut(PrintStream out) {
this.out = out;
return this;
}
/**
* @return the alwaysUseDefaultConnector
*/
public boolean isAlwaysUseDefaultConnector() {
return alwaysUseDefaultConnector;
}
/**
* @param alwaysUseDefaultConnector the alwaysUseDefaultConnector to set
*/
public ExportBuilder setAlwaysUseDefaultConnector(boolean alwaysUseDefaultConnector) {
this.alwaysUseDefaultConnector = alwaysUseDefaultConnector;
return this;
}
/**
* @return the parallel
*/
public boolean isParallelFiles() {
return parallelFiles;
}
/**
* @param parallel the parallel to set
*/
public ExportBuilder setParallelFiles(boolean parallel) {
this.parallelFiles = parallel;
return this;
}
/**
* @return the verbose
*/
public boolean isVerbose() {
return verbose;
}
/**
* @param verbose the verbose to set
*/
public ExportBuilder setVerbose(boolean verbose) {
this.verbose = verbose;
return this;
}
}