All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.broadinstitute.hellbender.utils.tsv.DataLine Maven / Gradle / Ivy

There is a newer version: 4.6.0.0
Show newest version
package org.broadinstitute.hellbender.utils.tsv;

import org.broadinstitute.hellbender.utils.MathUtils;
import org.broadinstitute.hellbender.utils.Utils;

import java.util.function.Function;

/**
 * Table data-line string array wrapper.
 * 

* This wrapper includes convenience methods to {@link #get get} and {@link #set} values on data-line string array from within * record type conversion methods: * {@link TableReader#createRecord(DataLine) TableReader.record} and {@link TableWriter#composeLine TableWriter.composeLine}. *

*

* Apart for {@link #set set} operations, it includes convenience methods to set column values in order * of appearance when this is known using {@link #append append}. *

*

* There are various method overloads for type conversion to and from primitive type {@code int} and {@code double}. *

*

* You can use {@link #columns()} to obtain the corresponding {@link TableColumnCollection} and query the presence of * and the index of columns. *

*/ public final class DataLine { /** * Constant to indicate that a data-line has no line number assigned. */ public static final long NO_LINE_NUMBER = -1; /** * The line number for this data-line. */ private final long lineNumber; /** * Holds the values for the data line in construction. */ private final String[] values; /** * Next appending index used by {@link #append append} methods. */ private int nextIndex = 0; /** * Reference to the enclosing table's columns. */ private final TableColumnCollection columns; /** * Reference to the format error exception factory. */ private Function formatErrorFactory; /** * String accepted as a "false" as parsed from a table file cell value. */ private static final String FALSE_STRING = Boolean.toString(false); /** * String accepted as a "true" as parsed from a table file cell value. */ private static final String TRUE_STRING = Boolean.toString(true); /** * Creates a new data-line instance. *

* The value array passed is not copied and will be used directly to store the data-line values. *

*

* Therefore later modification of its content after creating this data-line may result in * breaking the consistency of this instance. *

* * @param lineNumber the line number for this data-line, {@link #NO_LINE_NUMBER} when this is unspecified. * @param values the value array. * @param columns the columns of the table that will enclose this data-line instance. * @param formatErrorFactory to be used when there is a column formatting error based on the requested data-type. * @throws IllegalArgumentException if {@code columns} or {@code formatErrorFactory} are {@code null}. */ DataLine(final long lineNumber, final String[] values, final TableColumnCollection columns, final Function formatErrorFactory) { this.lineNumber = lineNumber; this.values = Utils.nonNull(values, "the value array cannot be null"); this.columns = Utils.nonNull(columns, "the columns cannot be null"); this.formatErrorFactory = Utils.nonNull(formatErrorFactory, "the format error factory cannot be null"); if (values.length != columns.columnCount()) { throw new IllegalArgumentException("mismatching value length and column count"); } } /** * Creates a new data-line instance. * * @param lineNumber the line number for this data-line, {@link #NO_LINE_NUMBER} when this is unspecified. * @param columns the columns of the table that will enclose this data-line instance. * @param formatErrorFactory to be used when there is a column formatting error based on the requested data-type. * @throws IllegalArgumentException if {@code columns} or {@code formatErrorFactory} are {@code null}. */ public DataLine(final long lineNumber, final TableColumnCollection columns, final Function formatErrorFactory) { this(lineNumber, new String[Utils.nonNull(columns, "the columns cannot be null").columnCount()], columns, formatErrorFactory); } /** * Creates a new data-line instance with no line-number. * * @param columns the columns of the table that will enclose this data-line instance. * @param formatErrorFactory to be used when there is a column formatting error based on the requested data-type. * @throws IllegalArgumentException if {@code columns} or {@code formatErrorFactory} are {@code null}. */ DataLine(final String[] fields, final TableColumnCollection columns, final Function formatErrorFactory) { this(NO_LINE_NUMBER, fields, columns, formatErrorFactory); } /** * Creates a new data-line instance with no line-number. * * @param columns the columns of the table that will enclose this data-line instance. * @param formatErrorFactory to be used when there is a column formatting error based on the requested data-type. * @throws IllegalArgumentException if {@code columns} or {@code formatErrorFactory} are {@code null}. */ public DataLine(final TableColumnCollection columns, final Function formatErrorFactory) { this(NO_LINE_NUMBER, columns, formatErrorFactory); } /** * Returns the column collection for this data-line instance. * * @return never {@code null}. */ public TableColumnCollection columns() { return columns; } /** * Returns a reference to the data-line values after making sure that they are all defined. *

* The value returned is a direct reference to the array and any modification of its contents * may result in invalidating the state of this {@link DataLine} object. *

* * @return never {@code null} and with no {@code null} elements. */ String[] unpack() { for (int i = 0; i < values.length; i++) { if (values[i] == null) { throw new IllegalStateException(String.format("some data line value remains undefined: e.g. column '%s' index %d", columns.nameAt(i), i)); } } return values; } /** * Sets the string value in the data-line that correspond to a column by its name. * * @param name the column name. * @param value the new value. * @return reference to this data-line. * @throws IllegalArgumentException if {@code name} is {@code null} or it does not match an actual column name. */ public DataLine set(final String name, final String value) { return set(columnIndex(name), value); } /** * Sets the boolean value in the data-line that correspond to a column by its name. * * @param name the column name. * @param value the new value. * @return reference to this data-line. * @throws IllegalArgumentException if {@code name} is {@code null} or it does not match an actual column name. */ public DataLine set(final String name, final boolean value) { return set(columnIndex(name), Boolean.toString(value)); } /** * Sets the int value in the data-line that correspond to a column by its name. * * @param name the column name. * @param value the new value. * @return reference to this data-line. * @throws IllegalArgumentException if {@code name} is {@code null} or it does not match an actual column name. */ public DataLine set(final String name, final int value) { return set(name, Integer.toString(value)); } /** * Sets the long value in the data-line that correspond to a column by its name. * * @param name the column name. * @param value the new value. * @return reference to this data-line. * @throws IllegalArgumentException if {@code name} is {@code null} or it does not match an actual column name. */ public DataLine set(final String name, final long value) { return set(name, Long.toString(value)); } /** * Sets the double value in the data-line that correspond to a column by its name. * * @param name the column name. * @param value the new value. * @return reference to this data-line. * @throws IllegalArgumentException if {@code name} is {@code null} or it does not match an actual column name. */ public DataLine set(final String name, final double value) { return set(columnIndex(name), value); } /** * Sets the double value in the data-line that correspond to a column by its name. * * @param name the column name. * @param value the new value. * @param numDecimals the number of decimals to print * @return reference to this data-line. * @throws IllegalArgumentException if {@code name} is {@code null} or it does not match an actual column name. */ public DataLine set(final String name, final double value, final int numDecimals) { return set(columnIndex(name), value, numDecimals); } /** * Sets the string value of a column given its index. * * @param index the target column index. * @param value the new value for that column. * @return reference to this data-line. * @throws IllegalArgumentException if {@code index} is not a valid column index. */ public DataLine set(final int index, final String value) { Utils.validIndex(index, values.length); if (index == 0 && value != null) { if (value.startsWith(TableUtils.COMMENT_PREFIX)) { throw new IllegalArgumentException("the value of the first column cannot start with the comment prefix: " + TableUtils.COMMENT_PREFIX); } } values[index] = value; return this; } /** * Sets the int value of a column given its index. * * @param index the target column index. * @param value the new value for that column. * @return reference to this data-line. * @throws IllegalArgumentException if {@code index} is not a valid column index. */ public DataLine set(final int index, final int value) { return set(index, Integer.toString(value)); } /** * Sets the long value of a column given its index. * * @param index the target column index. * @param value the new value for that column. * @return reference to this data-line. * @throws IllegalArgumentException if {@code index} is not a valid column index. */ public DataLine set(final int index, final long value) { return set(index, Long.toString(value)); } /** * Sets the value for a column to a double given its index. * * @param index the target column index. * @param value the new value for that column. * @return reference to this data-line. * @throws IllegalArgumentException if {@code index} is not a valid column index. */ public DataLine set(final int index, final double value) { final long rounded = Math.round(value); if (rounded == value) { set(index, Long.toString(rounded)); } else { set(index, Double.toString(value)); } return set(index, Double.toString(value)); } /** * Sets the value for a column to a double given its index. * * @param index the target column index. * @param value the new value for that column. * @param numDecimals the number of decimal places to print * @return reference to this data-line. * @throws IllegalArgumentException if {@code index} is not a valid column index. */ public DataLine set(final int index, final double value, final int numDecimals) { return set(index, MathUtils.roundToNDecimalPlaces(value, numDecimals)); } /** * Returns the string value in a column by its index. * * @param index target column index. * @return never {@code null}. * @throws IllegalArgumentException if {@code index} is not a valid column index. * @throws IllegalStateException if the value for that column is undefined ({@code null}). */ public String get(final int index) { Utils.validIndex(index, values.length); Utils.validate(values[index] != null, () -> "requested column value at " + index + " has not been initialized yet"); return values[index]; } /** * Returns the int value in a column by its index. * * @param index the target column index. * @return any int value. * @throws IllegalArgumentException if {@code index} is not valid. * @throws IllegalStateException if {@code index} has not been initialized and contains a {@code null}. * @throws RuntimeException if the value at that target column cannot be transform into an integer. * The exact class of the exception will depend on the exception factory provided when creating this * {@link DataLine}. */ public int getInt(final int index) { try { return Integer.parseInt(get(index)); } catch (final NumberFormatException ex) { throw formatErrorFactory.apply(String.format("expected int value for column %s but found %s", columns.nameAt(index), get(index))); } } /** * Returns the long value in a column by its index. * * @param index the target column index. * @return any long value. * @throws IllegalArgumentException if {@code index} is not valid. * @throws IllegalStateException if {@code index} has not been initialized and contains a {@code null}. * @throws RuntimeException if the value at that target column cannot be transform into a long. * The exact class of the exception will depend on the exception factory provided when creating this * {@link DataLine}. */ public long getLong(final int index) { try { return Long.parseLong(get(index)); } catch (final NumberFormatException ex) { throw formatErrorFactory.apply(String.format("expected long value for column %s but found %s", columns.nameAt(index), get(index))); } } /** * Returns the boolean value in a column by its index. * * @param index the target column index. * @return any boolean value. * @throws IllegalArgumentException if {@code index} is not valid. * @throws IllegalStateException if {@code index} has not been initialized and contains a {@code null}. * @throws RuntimeException if the value at that target column cannot be transform into a boolean. * The exact class of the exception will depend on the exception factory provided when creating this * {@link DataLine}. */ public boolean getBoolean(final int index) { final String string = get(index); if (string.equals(TRUE_STRING)) { return true; } else if (string.equals(FALSE_STRING)) { return false; } else { throw formatErrorFactory.apply(String.format("Boolean value must be '%s' or '%s' (case sensitive) " + "for column %s but found %s", TRUE_STRING, FALSE_STRING, columns.nameAt(index), get(index))); } } /** * Returns the double value in a column by its index. * * @param index the target column index. * @return any double value. * @throws IllegalArgumentException if {@code index} is not valid. * @throws IllegalStateException if {@code index} has not been initialized and contains a {@code null}. * @throws RuntimeException if the value at that target column cannot be transform into a double. * The exact class of the exception will depend on the exception factory provided when creating this * {@link DataLine}. */ public double getDouble(final int index) { return getDouble(index, null); } /** * Returns the double value in a column by its index. * * @param index the target column index. * @param formatErrorFactory format error factory. * @return any double value. * @throws IllegalArgumentException if {@code index} is not valid. * @throws IllegalStateException if {@code index} has not been initialized and contains a {@code null}. * @throws RuntimeException if the value at that target column cannot be transform into a double. * The exact class of the exception will depend on the exception factory provided when creating this * {@link DataLine}. */ public double getDouble(final int index, final Function formatErrorFactory) { try { return Double.parseDouble(get(index)); } catch (final NumberFormatException ex) { if (formatErrorFactory != null) { throw formatErrorFactory.apply(String.format("expected int value for column %s but found %s", columns.nameAt(index), get(index))); } else { throw this.formatErrorFactory.apply(String.format("expected int value for column %s but found %s", columns.nameAt(index), get(index))); } } } /** * Returns the string value in a column by its index. * * @param columnName the target column name. * @return never {@code null}. * @throws IllegalArgumentException if {@code columnName} is {@code null} or an unknown column name. * @throws IllegalStateException if that column values is undefined ({@code null}). */ public String get(final String columnName) { final int index = columnIndex(columnName); Utils.validate(values[index] != null, () -> String.format("the value for column '%s' is undefined", columnName)); return values[index]; } /** * Returns the string value in a column by its index. If column name does not exist, returns the default value. * * @param columnName the target column name. * @param defaultValue default value to use if columnName not found. * @return {@code null} iff {@code defaultValue == null} and there is not such a column with name {@code columnName}. */ public String get(final String columnName, final String defaultValue) { final int index = columns.indexOf(columnName); if (index < 0) { return defaultValue; } else { return values[index]; } } /** * Returns the index of a column by its name or fails if invalid or unknown. * * @param columnName the column name. * @return a valid index from 0 to {@link #columns}.size() - 1 * @throws IllegalArgumentException if {@code columnName} is {@code null} or it not a known column name. */ private int columnIndex(final String columnName) { final int index = columns.indexOf(columnName); if (index < 0) { throw new IllegalArgumentException("there is no such column: " + columnName); } return index; } /** * Returns the int value in a column by its index. * * @param columnName the target column name. * @return any int value. * @throws IllegalArgumentException if {@code columnName} is {@code null} or an unknown column name. * @throws IllegalStateException if that column values is undefined ({@code null}). * @throws RuntimeException if the value at that target column cannot be transform into an integer. * The exact class of the exception will depend on the exception factory provided when creating this * {@link DataLine}. */ public int getInt(final String columnName) { return getInt(columnIndex(columnName)); } /** * Returns the long value in a column by its index. * * @param columnName the target column name. * @return any long value. * @throws IllegalArgumentException if {@code columnName} is {@code null} or an unknown column name. * @throws IllegalStateException if that column values is undefined ({@code null}). * @throws RuntimeException if the value at that target column cannot be transform into a long. * The exact class of the exception will depend on the exception factory provided when creating this * {@link DataLine}. */ public long getLong(final String columnName) { return getLong(columnIndex(columnName)); } /** * Returns the long value in a column by its name expressed as * an enum constant. * * @param column the target column name. * @return any long value. * @throws IllegalArgumentException if {@code column} is {@code null} or an unknown column name. * @throws IllegalStateException if that column values is undefined ({@code null}). * @throws RuntimeException if the value at that target column cannot be transform into a long. * The exact class of the exception will depend on the exception factory provided when creating this * {@link DataLine}. */ public long getLong(final Enum column) { return getLong(Utils.nonNull(column).toString()); } /** * Returns the boolean value in a column by its index. * * @param columnName the target column name. * @return any boolean value. * @throws IllegalArgumentException if {@code columnName} is {@code null} or an unknown column name. * @throws IllegalStateException if that column values is undefined ({@code null}). * @throws RuntimeException if the value at that target column cannot be transform into a boolean. * The exact class of the exception will depend on the exception factory provided when creating this * {@link DataLine}. */ public boolean getBoolean(final String columnName) { return getBoolean(columnIndex(columnName)); } /** * Returns the double value in a column by its index. * * @param columnName the target column name. * @return any double value. * @throws IllegalArgumentException if {@code columnName} is {@code null} or an unknown column name. * @throws IllegalStateException if that column values is undefined ({@code null}). * @throws RuntimeException if the value at that target column cannot be transform into a double. * The exact class of the exception will depend on the exception factory provided when creating this * {@link DataLine}. */ public double getDouble(final String columnName) { return getDouble(columnIndex(columnName)); } /** * Returns the string value of a column by its name. *

* The target column name is resolved as the string returned by {@link Object#toString toString} applied * to the input enum. *

* * @param column the enum value that provides the name of the column. * @return never {@code null}. * @throws IllegalArgumentException if {@code column} is {@code null} or it does not point to a * known column name. * @throws IllegalStateException if that column values is undefined ({@code null}). * @throws RuntimeException if the value at that target column cannot be transform into a double. * The exact class of the exception will depend on the exception factory provided when creating this * {@link DataLine}. */ public String get(final Enum column) { return get(Utils.nonNull(column).toString()); } /** * Returns the int value of a column by its name. *

* The target column name is resolved as the string returned by {@link Object#toString toString} applied * to the input enum. *

* * @param column the enum value that provides the name of the column. * @return any int value. * @throws IllegalArgumentException if {@code column} is {@code null} or it does not point to a * known column name. * @throws IllegalStateException if that column values is undefined ({@code null}). * @throws RuntimeException if the value at that target column cannot be transform into a double. * The exact class of the exception will depend on the exception factory provided when creating this * {@link DataLine}. */ public int getInt(final Enum column) { return getInt(Utils.nonNull(column).toString()); } /** * Returns the double value of a column by its name. *

* The target column name is resolved as the string returned by {@link Object#toString toString} applied * to the input enum. *

* * @param column the enum value that provides the name of the column. * @return any double value. * @throws IllegalArgumentException if {@code column} is {@code null} or it does not point to a * known column name. * @throws IllegalStateException if that column values is undefined ({@code null}). * @throws RuntimeException if the value at that target column cannot be transform into a double. * The exact class of the exception will depend on the exception factory provided when creating this * {@link DataLine}. */ public double getDouble(final Enum column) { return getDouble(Utils.nonNull(column).toString()); } /** * Sets the next string value in the data-line that correspond to a column. *

* The next column index advances so that the following append operations will change the value of * the following columns and so forth. *

* * @param value the new value. * @return reference to this data-line. * @throws IllegalStateException if the next column to set is beyond the last column. */ public DataLine append(final String value) { Utils.validate(nextIndex < values.length, "gone beyond of the end of the data-line"); values[nextIndex++] = value; return this; } /** * Sets the next int value in the data-line that correspond to a column. *

* The next column index advances so that the following {@link #append append} will change the value of * the following column and so forth. *

* * @param value the new value. * @return reference to this data-line. * @throws IllegalStateException if the next column to set is beyond the last column. */ public DataLine append(final int value) { return append(Integer.toString(value)); } /** * Sets the next long value in the data-line that correspond to a column. *

* The next column index advances so that the following {@link #append append} will change the value of * the following column and so forth. *

* * @param value the new value. * @return reference to this data-line. * @throws IllegalStateException if the next column to set is beyond the last column. */ public DataLine append(final long value) { return append(Long.toString(value)); } /** * Sets the next long values in the data-line that correspond to next few columns. *

* The next column index advances so that the following {@link #append append} will change the value of * the following column and so forth. *

* * @param values the new values. * @return reference to this data-line. * @throws IllegalStateException if this operation goes beyond the last column index. */ public DataLine append(final long... values) { for (final long l : Utils.nonNull(values, "the values cannot be null")) { append(l); } return this; } /** * Sets the next double value in the data-line that correspond to a column. *

* The next column index advances so that the following {@link #append append} will change the value of * the following column and so forth. *

* * @param value the new value. * @return reference to this data-line. * @throws IllegalStateException if the next column to set is beyond the last column. */ public DataLine append(final double value) { final long rounded = Math.round(value); if (rounded == value) { return append(Long.toString(rounded)); } else { return append(Double.toString(value)); } } /** * Sets the next int values in the data-line that correspond to next few columns. *

* The next column index advances so that the following {@link #append append} will change the value of * the following column and so forth. *

* * @param values the new values. * @return reference to this data-line. * @throws IllegalStateException if this operation goes beyond the last column index. */ public DataLine append(final int... values) { for (final int i : Utils.nonNull(values, "the values cannot be null")) { append(i); } return this; } /** * Sets the next string values in the data-line that correspond to next few columns. *

* The next column index advances so that the following {@link #append append} will change the value of * the following column and so forth. *

* * @param values the new values. * @return reference to this data-line. * @throws IllegalStateException if this operation goes beyond the last column index. */ public DataLine append(final String... values) { for (final String v : Utils.nonNull(values, "the values cannot be null")) { append(v); } return this; } /** * Sets the next double values in the data-line that correspond to next few columns. *

* The next column index advances so that the following {@link #append append} will change the value of * the following column and so forth. *

* @param values the new values. * @return reference to this data-line. * @throws IllegalStateException if this operation goes beyond the last column index. */ public DataLine append(final double... values) { for (final double d : Utils.nonNull(values, "the values cannot be null")) { append(d); } return this; } /** * Sets all the data-line values at once. * @param values the new values. * @throws IllegalArgumentException if {@code values} is {@code null}, its length does not match * this data-line column count, or the first element starts like a comment line. * @return a reference to this data-line. */ public DataLine setAll(final String... values) { Utils.nonNull(values,"the input values cannot be null"); if (values.length != this.values.length) { throw new IllegalArgumentException("the input value length must be equal to the total number of columns "); } // No index error here this.values.length is guaranteed to be greater than 0, as 0 columns is not allowed. if (values[0] != null && values[0].startsWith(TableUtils.COMMENT_PREFIX)) { throw new IllegalArgumentException("first column value cannot start as a comment: " + TableUtils.COMMENT_PREFIX); } System.arraycopy(values,0,this.values,0,values.length); return this; } /** * Changes the index of the next value to set using {@link #append append} operations. * * @param index the new index. * @return a reference to this data-line. * @throws IllegalArgumentException if {@code index} is greater than the number of columns */ public DataLine seek(final int index) { // +1 as is valid to seek to the position after the last value. nextIndex = Utils.validIndex(index, values.length + 1); return this; } /** * Changes the index of the next value to set using {@link #append append} operations. * * @param columnName the name of the column to seek to. * @return a reference to this data-line. * @throws IllegalArgumentException if {@code columnName} is {@code null} or an unknown column name. */ public DataLine seek(final String columnName) { nextIndex = columnIndex(columnName); return this; } /** * Changes the index of the next value to set using {@link #append append} operations. *

* The input enum's string conversion using {@link Object#toString toString} determines the name * of the target column. *

* @param column the enum value that makes reference to the target column. * @throws IllegalArgumentException if {@code column} is {@code null} or does not make reference to * a known column. * */ public DataLine seek(final Enum column) { nextIndex = columnIndex(Utils.nonNull(column).toString()); return this; } /** * Returns the current values in a string array. *

* The returned array is a copy that can be modified without changing the state of the data line. *

* @return never {@code null}, but it can contain {@code null}s. */ public String[] toArray() { return values.clone(); } /** * Returns the line number for this data-line. *

{@link #NO_LINE_NUMBER}

must be used to mark data-lines that do not have * a line number assigned, e.g. because the actually aren't part of a file. * @return any long value. */ public long getLineNumber() { return lineNumber; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy