All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.broadinstitute.hellbender.utils.codecs.table.TableCodec Maven / Gradle / Ivy

There is a newer version: 4.6.0.0
Show newest version
package org.broadinstitute.hellbender.utils.codecs.table;

import htsjdk.tribble.AsciiFeatureCodec;
import htsjdk.tribble.readers.LineIterator;
import org.broadinstitute.hellbender.exceptions.GATKException;
import org.broadinstitute.hellbender.exceptions.UserException;
import org.broadinstitute.hellbender.utils.SimpleInterval;
import org.broadinstitute.hellbender.utils.Utils;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;

/**
 * Reads tab deliminated tabular text files
 *
 * 

*

    *
  • Header: must begin with line HEADER or track (for IGV), followed by any number of column names, * separated by whitespace.
  • *
  • Header: Custom header delimiters can be provided, with a null header line being interpreted as having a non-delimeted * header which consists of one line.
  • *
  • Comment lines starting with # are ignored
  • *
  • Each non-header and non-comment line is split into parts by whitespace, * and these parts are assigned as a map to their corresponding column name in the header. * Note that the first element (corresponding to the HEADER column) must be a valid genome loc * such as 1, 1:1 or 1:1-10, which is the position of the Table element on the genome. TableCodec * requires that there be one value for each column in the header, and no more, on all lines.
  • *
*

* *

* *

File format example 1

*
 *     HEADER a b c
 *     1:1  1   2   3
 *     1:2  4   5   6
 *     1:3  7   8   9
 * 
* *

File format example 2

*
 *     a b c
 *     1:1  1   2   3
 *     1:2  4   5   6
 *     1:3  7   8   9
 * 
*/ public final class TableCodec extends AsciiFeatureCodec { protected static final String DEFAULT_HEADER_DELIMITER = "HEADER"; protected static final String IGV_HEADER_DELIMITER = "track"; protected static final String COMMENT_DELIMITER = "#"; private final String headerDelimiter; protected String delimiter_regex = "\\s+"; protected List header = new ArrayList<>(); private boolean havePassedHeader = false; /** * Create a TableCodec with a configured header line delimiter * * @param headerLineDelimiter the delimeter for comment header lines, or null if the header is a single commented line- */ public TableCodec(final String headerLineDelimiter) { super(TableFeature.class); if ( "".equals(headerLineDelimiter) ) { // Note, it is valid for headerLineDelimiter to be null, just not empty as the regex breaks in that case. throw new GATKException("HeaderLineDelimiter must either be a valid delimiter or null"); } headerDelimiter = headerLineDelimiter; } /** * Create a TableCodec for IGV track data. */ public TableCodec() { this(DEFAULT_HEADER_DELIMITER); } @Override public TableFeature decode(final String line) { if ((headerDelimiter != null && line.startsWith(headerDelimiter)) || (headerDelimiter == null && !havePassedHeader) || line.startsWith(COMMENT_DELIMITER) || line.startsWith(IGV_HEADER_DELIMITER)) { havePassedHeader = true; return null; } final String[] split = line.split(delimiter_regex); if (split.length < 1) { throw new IllegalArgumentException("TableCodec line = " + line + " is not a valid table format"); } return new TableFeature(new SimpleInterval(split[0]), Arrays.asList(split), header); } @Override public List readActualHeader(final LineIterator reader) { boolean isFirst = true; while (reader.hasNext()) { final String line = reader.peek(); // Peek to avoid reading non-header data if ( isFirst && ! line.startsWith(COMMENT_DELIMITER) && headerDelimiter != null && ! line.startsWith(headerDelimiter) ) { throw new UserException.MalformedFile("TableCodec file does not have a header"); } isFirst &= line.startsWith(COMMENT_DELIMITER); if (headerDelimiter == null || line.startsWith(headerDelimiter)) { reader.next(); // "Commit" the peek if (!header.isEmpty()) { throw new UserException.MalformedFile("Input table file seems to have two header lines. The second is = " + line); } final String[] spl = line.split(delimiter_regex); Collections.addAll(header, spl); return header; } else if (line.startsWith(COMMENT_DELIMITER)) { reader.next(); // "Commit" the peek } else { break; } } return header; } @Override public boolean canDecode(final String path) { return path.toLowerCase().endsWith(".table"); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy