All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.broadinstitute.hellbender.utils.dragstr.DragstrParamUtils Maven / Gradle / Ivy

The newest version!
package org.broadinstitute.hellbender.utils.dragstr;

import org.broadinstitute.hellbender.engine.GATKPath;
import org.broadinstitute.hellbender.exceptions.UserException;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.functional.IntToDoubleBiFunction;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.PrintWriter;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.*;

/**
 * Utils to read and write {@link DragstrParams} instances from an to files and other resources.
 * 

Parameter matrices

*

* The DRAGstr model are composed of three matrices with the same dimensions (period x repeat-length): *

*
GOP
*
gap-opening-penalty table where we store the Phred scale score that we use for pairhmm match to insert or * delete transitions
. *
GCP
*
gap-continuation-penalty table where we store the Phred scale score that we use for pair-hmm insert to insert, and * deletion to deletion *
. *
API
*
adjusted prior probability of an indel occurring at a position in the reference with an STR.
*
*

*

Text file format

* If P is the maximum period length and L is the maximum repeat-length: *

*

 *         ############################
 *         # DragstrParams
 *         # -------------------------
 *         # annotation1 = value1
 *         # annotation2 = value2
 *         # ...
 *         ###########################
 *              1      2     3     4 ...     L
 *         GOP:
 *          gop11  gop12 gop13 gop14 ... gop1L
 *          gop21  gop22 ...
 *          ...
 *          gopP1  gopP2 ...             gopPL
 *         GCP:
 *          gcp11  gcp12 gcp13 ...
 *          ...
 *          gcpP1  apiP2 ...     ...     apiPL
 *         API:
 *          api11  api12 ...
 *          ..
 *          apiP1  apiP2 ...             apiPL
 *     
*

*

* Currently there is no fix/standard set of annotations but is just a mechanism to add some additional * metadata that might be useful for tracking purposes. *

*/ public final class DragstrParamUtils { public static final String GOP_TABLE_NAME = "GOP"; public static final String GCP_TABLE_NAME = "GCP"; public static final String API_TABLE_NAME = "API"; /** * Initial size of the buffer/builder used to compose lines in the output text format. */ private static int LINE_BUILDER_BUFFER_SIZE = 1024; public static DragstrParams parse(final GATKPath path) { if (path == null) { return null; } try (final BufferedReader reader = openBufferedReader(path.toString())) { return parse(reader, path.toString()); } catch (final IOException e) { throw new UserException.CouldNotReadInputFile(path.toString(), e); } } public static void print(final DragstrParams params, final GATKPath path, final Object ... annotations) { Utils.nonNull(params, "the input params cannot be null"); Utils.nonNull(path, "the input path cannot be null"); try (final PrintWriter pw = new PrintWriter(path.getOutputStream())) { print(params, pw, annotations); } params.setName(path.toString()); } private static void print(final DragstrParams params, final PrintWriter writer, final Object ... annotations) { writer.println("############################################################################################"); writer.println("# DragstrParams"); writer.println("# -------------------------"); for (int i = 0; i < annotations.length;) { final Object name = annotations[i++]; final Object value = i < annotations.length ? annotations[i++] : null; writer.println("# " + name + (value != null ? (" = " + value) : "")); } writer.println("############################################################################################"); printTables(params, writer); } private static BufferedReader openBufferedReader(final String path) { try { return Files.newBufferedReader(Paths.get(path)); } catch (final IOException ex) { throw new UserException.CouldNotReadInputFile(path, ex); } } private static DragstrParams parse(final BufferedReader reader, final String name) { try { String header; while ((header = reader.readLine()) != null) { if (!header.startsWith("#")) { break; } } if (header == null) { throw new UserException.BadInput("there is no content in the dragstr-params file " + name); } final String[] headerParts = header.split("\\s+"); final int[] repeats = Arrays.stream(headerParts) .filter(str -> !str.isEmpty()) .mapToInt(str -> { try { return Integer.parseInt(str); } catch (final NumberFormatException ex) { throw new UserException.BadInput("bad format for an integer", ex); } }) .toArray(); final int maxRepeats = repeats.length; for (int i = 0; i < repeats.length; i++) { if (repeats[i] != i + 1) { throw new UserException.BadInput("the DRAGstr parameter file header line must contain integers starting at 1 " + Arrays.toString(repeats)); } } final Map tables = new HashMap<>(); String line = reader.readLine(); if (line == null) { throw new UserException.BadInput("end of table list before expected"); } String tableName = line.replaceAll(":$", ""); List tableLines = new ArrayList<>(); while ((line = reader.readLine()) != null) { if (line.charAt(line.length() - 1) == ':') { tables.put(tableName, linesToMatrix(tableLines, repeats.length)); tableName = line.replaceAll(":$", ""); tableLines.clear(); } else { tableLines.add(line); } } if (tableName.isEmpty()) { throw new UserException.BadInput("table with no name"); } tables.put(tableName, linesToMatrix(tableLines, repeats.length)); final double[][] gopMatrix = mandatoryMatrix(tables, GOP_TABLE_NAME); final double[][] gcpMatrix = mandatoryMatrix(tables, GCP_TABLE_NAME); final double[][] apiMatrix = mandatoryMatrix(tables, API_TABLE_NAME); final int maxPeriod = gopMatrix.length; return DragstrParams.of(maxPeriod, maxRepeats, gopMatrix, gcpMatrix, apiMatrix, name); } catch (final IOException ex) { throw new UserException.CouldNotReadInputFile(name, ex); } } private static void printTable(final PrintWriter printWriter, final StringBuilder lineBuilder, final String tableName, final int maxPeriod, final int maxRepeat, IntToDoubleBiFunction func) { printWriter.println(tableName + ":"); for (int i = 1; i <= maxPeriod; i++) { lineBuilder.setLength(0); lineBuilder.append(String.format("%5s", String.format("%.2f", func.apply(i, 1)))); for (int j = 2; j <= maxRepeat; j++) { lineBuilder.append(" "); lineBuilder.append(String.format("%5s", String.format("%.2f", func.apply(i, j)))); } printWriter.println(lineBuilder.toString()); } } /** * Dump the parameters in their text file form given the sink print writer. * @param printWriter the sink for the dump. * */ private static void printTables(final DragstrParams params, final PrintWriter printWriter) { final StringBuilder lineBuilder = new StringBuilder(LINE_BUILDER_BUFFER_SIZE); lineBuilder.append(String.format("%5s", "1")); for (int i = 2; i <= params.maximumRepeats(); i++) { lineBuilder.append(" "); lineBuilder.append(String.format("%5s", i)); } printWriter.println(lineBuilder.toString()); printTable(printWriter, lineBuilder, GOP_TABLE_NAME, params.maximumPeriod(), params.maximumRepeats(), params::gop); printTable(printWriter, lineBuilder, GCP_TABLE_NAME, params.maximumPeriod(), params.maximumRepeats(), params::gcp); printTable(printWriter, lineBuilder, API_TABLE_NAME, params.maximumPeriod(), params.maximumRepeats(), params::api); } private static double[][] mandatoryMatrix(final Map tableData, final String name) { final double[][] result = tableData.get(name); if (result == null) { throw new UserException.BadInput("missing matrix " + name); } else { return result; } } private static double[][] linesToMatrix(final List lines, final int expectedNumberOfColumns) { final double[][] result = new double[lines.size()][expectedNumberOfColumns]; for (int i = 0; i < lines.size(); i++) { final String line = lines.get(i); final String[] parts = line.split("\\s+"); if (parts.length < expectedNumberOfColumns) { throw new UserException.BadInput("line has the wrong number of columns"); } int k = 0; for (int j = 0; j < parts.length; j++) { if (!parts[j].isEmpty()) { if (k >= expectedNumberOfColumns) { throw new UserException.BadInput("line has the wrong number of columns"); } try { final double val = Double.parseDouble(parts[j]); if (Double.isNaN(val) || Double.isInfinite(val) || val < 0.0) { throw new NullPointerException(); } result[i][k++] = val; } catch (final NumberFormatException ex) { throw new UserException.BadInput( String.format("score is not a valid Phred value (%d,%d) == %s", i + 1, j + 1, parts[j])); } } } } return result; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy