org.broadinstitute.hellbender.utils.tsv.SimpleXSVWriter Maven / Gradle / Ivy
package org.broadinstitute.hellbender.utils.tsv;
import com.opencsv.CSVWriter;
import org.broadinstitute.hellbender.exceptions.GATKException;
import org.broadinstitute.hellbender.utils.Utils;
import java.io.Closeable;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.*;
/**
* A simple TSV/CSV/XSV writer with support for writing in the cloud with configurable delimiter.
*
* The expected use case for this class is that first {@link #setHeaderLine} is called with a list of the column names
* which will be used to determine the number of columns per line as well as how the header is indexed. Then in order to
* construct a new line call {@link #getNewLineBuilder} to get a line builder for each line, which then has convienent
* methods for individually assigning column values based on the header line etc. Once a line is finished being mutated
* one simply needs to call write() on the line to validate and finalize the line.
*
* Header lines are encoded in the same format as each row, a single row of delimeted column titles as the first row in the table.
*
* Note: this class is intended for creating XSV files with loosely defined input types. If there exists a well defined object
* that summarizes your table data points then consider using {@link TableWriter}.
*/
public class SimpleXSVWriter implements Closeable {
private int expectedNumColumns;
private Map headerMap = null;
private CSVWriter outputWriter;
// The current incomplete line in the writer.
private LineBuilder currentLineBuilder = null;
/**
* Creates a new table writer given the file and column names.
*
* @param path the destination path. This could be a cloud uri (ex. gs://...)
* @param separator separator to use for the XSV file
* @throws IOException if one was raised when opening the the destination file for writing.
*/
public SimpleXSVWriter(final Path path, final char separator) throws IOException {
this( new OutputStreamWriter(
Files.newOutputStream(Utils.nonNull(path, "The path cannot be null."))),
separator);
}
/**
* Creates a new table writer given an initialized writer and column names.
*
* @param writer the destination writer.
* @param separator separator to use for the TSV file
* @throws IOException if one was raised when opening the the destination file for writing.
*/
public SimpleXSVWriter(final Writer writer, final char separator) {
Utils.validate(separator!='\n', "Column separator cannot be a newline character");
outputWriter = new CSVWriter(writer, separator);
}
/**
* Provides a header line to the XSV output file. Note that this will throw an exception if all header lines
* are not unique as it attempts to create an index for the provided header lines for convenience when building
* rows of the XSV.
*
* NOTE: This can only be set once, XSV output files are expected to only have a single row as header.
*
* @param columns Ordered list of header lines to be built into the XSV
*/
public void setHeaderLine(List columns) {
if (headerMap != null) {
throw new GATKException("Cannot modify header line once set");
}
outputWriter.writeNext(columns.toArray(new String[0]), false);
expectedNumColumns = columns.size();
// Create the mapping between header and column
headerMap = new HashMap<>();
for (int i = 0; i < columns.size(); i++) {
Utils.nonNull(columns.get(i), "Provided header had null column at position: " + i);
if (headerMap.putIfAbsent(columns.get(i), i) != null) {
throw new GATKException("Column names must be unique, but found a duplicate name: " + columns.get(i));
}
}
}
private void writeLine(String[] line) {
outputWriter.writeNext(line, false);
currentLineBuilder = null;
}
/**
* Builds a new LineBuilder and writes out the previous line if it exists.
*
* @return a blank LineBuilder to allow for defining the next line
*/
public LineBuilder getNewLineBuilder() {
if (headerMap == null) {
throw new GATKException("Cannot construct line without first setting the header line");
}
if (currentLineBuilder != null) {
currentLineBuilder.write();
}
currentLineBuilder = new LineBuilder(expectedNumColumns);
return currentLineBuilder;
}
/**
* @param column header line to get index for
* @return zero based index corresponding to that header string, throws an exception if the headerline doesn't exist
*/
public Integer getIndexForColumn(String column) {
Utils.nonNull(headerMap, "Cannot request column index if the header has not been specified");
Integer index = headerMap.get(column);
Utils.nonNull(index, "Requested column " + column + " does not exist in the provided header");
return index;
}
@Override
public void close() throws IOException {
if (currentLineBuilder != null) {
currentLineBuilder.write();
}
outputWriter.close();
}
/**
* Helper to allow for incremental construction of a body line using either indexes or column headings
*
* Calling build() will cause the line to be written out into the underlying CSV writer in its current state. Doing
* so will result in a validation call where an exception will be thrown if any columns of the current line have
* not been defined. fill() can be used to provide a default value for undefined columns.
*/
public class LineBuilder {
String[] lineToBuild;
boolean hasBuilt = false;
LineBuilder(int lineLength) {
lineToBuild = new String[lineLength];
}
/**
* @param row complete line corresponding to this row of the tsv
*/
public LineBuilder setRow(final String[] row) {
checkAlterationAfterWrite();
Utils.validate(row.length == lineToBuild.length, "Provided line must have the correct number of columns");
for (int i = 0; i < row.length; i++) {
lineToBuild[i] = row[i];
}
return this;
}
/**
* @param row complete line corresponding to this row of the tsv
*/
public LineBuilder setRow(final List row) {
checkAlterationAfterWrite();
Utils.validate(row.size() == lineToBuild.length, "Provided line must have the correct number of columns");
for (int i = 0; i < row.size(); i++) {
lineToBuild[i] = row.get(i);
}
return this;
}
/**
* @param index Column index to be set
* @param value Value to be placed into the line
*/
public LineBuilder setColumn(final int index, final String value) {
checkAlterationAfterWrite();
lineToBuild[index] = value;
return this;
}
/**
* @param heading Column heading to be set
* @param value Value to be placed into the line
*/
public LineBuilder setColumn(final String heading, final String value) {
int index = getIndexForColumn(heading);
return setColumn(index, value);
}
/**
* Fills in every empty column of the pending line with the provided value
*/
public LineBuilder fill(final String filling) {
checkAlterationAfterWrite();
for (int i = 0; i < lineToBuild.length; i++) {
if (lineToBuild[i] == null) {
lineToBuild[i] = filling;
}
}
return this;
}
/**
* Constructs the line and writes it out to the output
*/
public void write() {
Utils.validate(!Arrays.stream(lineToBuild).anyMatch(Objects::isNull), "Attempted to construct an incomplete line, make sure all columns are filled");
writeLine(lineToBuild);
hasBuilt = true;
}
// Throw an exception if we try to alter an already written out line
private void checkAlterationAfterWrite() {
Utils.validate(!hasBuilt, "Cannot make alterations to an already written out CSV line");
}
}
}