All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.broadinstitute.hellbender.utils.tsv.TableWriter Maven / Gradle / Ivy

The newest version!
package org.broadinstitute.hellbender.utils.tsv;

import com.opencsv.CSVWriter;
import java.nio.file.Files;
import java.nio.file.Path;
import org.broadinstitute.hellbender.utils.Utils;

import java.io.*;

/**
 * Class to write tab separated value files.
 * 

* The column (and they names) are passed in the constructor parameter along the output {@link Path path} * or {@link Writer writer}. *

*

* Extending classes must indicate how we can transcribe row record or type {@link R} to the corresponding * record data-line in the output by overriding {@link #composeLine(R,DataLine)}. *

*

* Example: *

 *         public class Person {
 *             public final String name;
 *             public final int age;
 *             public final double netWorth;
 *         }
 *
 *         public class PeopleTableWriter extends TableWriter<Person> {
 *
 *             public MyRecordWriter(final File file) {
 *                 super(file, new TableColumnCollection("name","age","net.worth"));
 *             }
 *
 *             @Override
 *             protected void dataLine(final Person person, final DataLine dataLine) {
 *                  dataLine.setAll(person.name, "" + person.age, "" + person.netWorth);
 *             }
 *         }
 *     
*

*

* You must use the {@link DataLine} instance passed and no other. *

*

* Instead of passing all the values as converted string in column order you may opt to use {@link DataLine#set} * method family to set values one by one using the column index or column name like so: *

*

* Example (using the column index): *

 *          @Override
 *          protected void composeLine(final Person person, final DataLine dataLine) {
 *              dataLine
 *                  .set(0,person.name)
 *                  .set(1,person.age)
 *                  .set(2,person.netWorth);
 *          }
 *      
*

*

* Example (using column names): *

 *          @Override
 *          protected void composeLine(final Person person, final DataLine dataLine) {
 *              dataLine
 *                  .set("name",person.name)
 *                  .set("age",person.age)
 *                  .set("net.worth",person.netWorth);
 *          }
 * 
* Notice that you don't need to explicitly convert neither the age nor the net-worth into a * string thanks to {@link DataLine#set set} various overloads. *

*

* Alternatively, if you know the column order, that should quite often the case, you can avoid * indexing all together using {@link DataLine#append append} operations instead: *

 *         @Override
 *          protected void composeLine(final Person person, final DataLine dataLine) {
 *              dataLine
 *                  .append(person.name)
 *                  .append(person.age)
 *                  .append(person.netWorth);
 *          }
 *     
*

*

* At any time the implementation can query the correspondence between column names and position within the data-line * by querying the {@link TableColumnCollection} object directly that can be obtained from the dataLine's {@link #columns} field. *

*

* Example (using column names): *

 *          @Override
 *          protected void composeLine(final Person person, final DataLine dataLine) {
 *              dataLine
 *                .set("name",person.name)
 *                .set("age",person.age);
 *
 *              if (dataLine.columns().contains("net.worth"))
 *                dataLine.set("net.worth",person.netWorth);
 *          }
 * 
*

* * @param the row record type. * @author Valentin Ruano-Rubio <[email protected]> */ public abstract class TableWriter implements Closeable { private long lineNumber; /** * Csv writer use to do the actual writing. */ private final CSVWriter writer; /** * The table column names. */ private final TableColumnCollection columns; /** * Whether the header column name line has been written or not. */ private boolean headerWritten = false; public static final String METADATA_TAG = ""; /** * Creates a new table writer given the file and column names. * * @param path the destination path. * @param tableColumns the table column names. * @throws IllegalArgumentException if either {@code file} or {@code tableColumns} are {@code null}. * @throws IOException if one was raised when opening the the destination file for writing. */ public TableWriter(final Path path, final TableColumnCollection tableColumns) throws IOException { this( new OutputStreamWriter( Files.newOutputStream(Utils.nonNull(path, "The path cannot be null."))), tableColumns); } /** * Creates a new table writer given the destination writer and column names. * * @param writer the destination writer. * @param columns the table column names. * @throws IllegalArgumentException if either {@code writer} or {@code columns} are {@code null}. * @throws IOException if one was raised when opening the the destination file for writing. */ public TableWriter(final Writer writer, final TableColumnCollection columns) throws IOException { this.columns = Utils.nonNull(columns, "The columns cannot be null."); this.writer = new CSVWriter(Utils.nonNull(writer, "the input writer cannot be null"), TableUtils.COLUMN_SEPARATOR, TableUtils.QUOTE_CHARACTER, TableUtils.ESCAPE_CHARACTER); } /** * Writes a comment into the output. *

* This can be invoked at any time; comment lines can be present anywhere in the file. *

*

* Comments written before any record, will be output *

* * @param comment the comment to write out. * @throws IllegalArgumentException if {@code comment} is {@code null}. * @throws IOException if any was raised by this operation. */ public final void writeComment(final String comment) throws IOException { Utils.nonNull(comment, "The comment cannot be null."); writer.writeNext(new String[]{TableUtils.COMMENT_PREFIX + comment}, false); lineNumber++; } // write a comment line of the form #key=value public final void writeMetadata(final String key, final String value) throws IOException { Utils.nonNull(key); Utils.nonNull(value); Utils.validateArg(!headerWritten, "Metadata must precede the header."); writeComment(METADATA_TAG + key + "=" + value); } /** * Writes a new record. * * @param record the record to write. * @throws IOException if it was raised when writing the record. * @throws ClassCastException if {@code record} is of the correct type * for this writer. * @throws IllegalArgumentException if {@code record} is {@code null} or it is not a valid record * as per the implementation of this writer (see {@link #composeLine}). */ public void writeRecord(final R record) throws IOException { Utils.nonNull(record, "The record cannot be null."); writeHeaderIfApplies(); final DataLine dataLine = new DataLine(lineNumber + 1, columns,IllegalArgumentException::new); composeLine(record,dataLine); writer.writeNext(dataLine.unpack(), false); lineNumber++; } /** * Write all the records in a {@link Iterable}. *

* Records are written in the order they appear in the input {@link Iterable}. *

* * @param records to write. * @throws IOException if any raised when writing any of the records. * @throws ClassCastException if {@code record} is of the correct type * for this writer. * @throws IllegalArgumentException if {@code records} is {@code null} or it contains * some values that would cause such an exception when {@link #writeRecord} is call on * that value. Previous record in the iterable would have been already written by then. */ public final void writeAllRecords(final Iterable records) throws IOException { Utils.nonNull(records, "the record iterable cannot be null"); for (final R record : records) { writeRecord(record); } } @Override public final void close() throws IOException { writeHeaderIfApplies(); writer.close(); } /** * Pushes in-memory buffered content to the output stream. * @throws IOException */ public final void flush() throws IOException { writeHeaderIfApplies(); writer.flush(); } /** * Writes the header if it has not been written already. *

* The header is written automatically before the first record is written or when the writer is closed * and no record was written. *

*

* Comments written using {@link #writeComment} before any record will precede the header * unless you invoke your method first. *

*

* Once the header line has been written, invoking this method does not have any effect. *

* * @throws IOException if any raised when writing into the destination writer. */ public void writeHeaderIfApplies() throws IOException { if (!headerWritten) { writer.writeNext(columns.names().toArray(new String[columns.columnCount()]), false); lineNumber++; } headerWritten = true; } /** * Composes the data-line to write into the output to represent a given record *

* Also the first element cannot contain the {@link TableUtils#COMMENT_PREFIX comment prefix}. * If that is a genuine valid value for the first column you shall consider to re-order the columns or * change the encoding of the first column to avoid this issue. *

*

* Both inputs, {@code record} and {@code dataLine} are guaranteed not to be {@code null}s. *

* * @param record the record to write into the data-line. * @param dataLine the destination data-line object. * @return never {@code null}. * @throws ClassCastException if {@code record} is of the correct type * for this writer. * @throws IllegalArgumentException if there is some conversion issue that does * not allow the current write to generate a valid string array to encode the record. */ protected abstract void composeLine(final R record, final DataLine dataLine); }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy