All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.pharmgkb.parser.vcf.VcfWriter Maven / Gradle / Ivy

There is a newer version: 0.3.1
Show newest version
package org.pharmgkb.parser.vcf;

import org.apache.commons.io.IOUtils;
import org.pharmgkb.parser.vcf.model.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import java.io.*;
import java.lang.invoke.MethodHandles;
import java.nio.file.Path;
import java.util.*;

/**
 * Writes to VCF format from a {@link VcfSample}, {@link VcfPosition VcfPositions}, and {@link VcfMetadata}.
 * For now, this class performs little validation of its own, relying on {@link VcfParser} instead. For that reason, it
 * is currently package-accessible only.
 *
 * @author Douglas Myers-Turnbull
 * @see TransformingVcfLineParser TransformingVcfLineParser - a read-transform-write streamer that is publically accessible
 */
public class VcfWriter implements Closeable {

  private static final Logger sf_logger = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());

  private final Path m_file;
  private final PrintWriter m_writer;
  private int m_lineNumber;

  private VcfWriter(@Nullable Path file, @Nonnull PrintWriter writer) {
    m_file = file;
    m_writer = writer;
  }

  public void writeHeader(@Nonnull VcfMetadata metadata) {

    // file format
    printLine("##fileformat=" + metadata.getFileFormat());

    // metadata, in order from spec
    printLines("INFO", metadata.getInfo().values());
    printLines("FILTER", metadata.getFilters().values());
    printLines("FORMAT", metadata.getFormats().values());
    printLines("ALT", metadata.getAlts().values());
    printLines("contig", metadata.getContigs().values());
    printLines("SAMPLE", metadata.getSamples().values());
    printLines("PEDIGREE", metadata.getPedigrees());

    for (String key : metadata.getRawPropertyKeys()) {
      printPropertyLines(key, metadata.getRawValuesOfProperty(key));
    }

    // header line
    StringBuilder sb = new StringBuilder("#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO");
    if (metadata.getNumSamples() > 0) {
      sb.append("\tFORMAT");
    }
    for (int i = 0; i < metadata.getNumSamples(); i++) {
      sb.append("\t").append(metadata.getSampleName(i));
    }
    printLine(sb);

    m_writer.flush();
    sf_logger.info("Wrote {} lines of header{}", m_lineNumber, (m_file == null ? "" : " to " + m_file));
  }

  public void writeLine(@Nonnull VcfMetadata metadata, @Nonnull VcfPosition position,
      @Nonnull List samples) {

    StringBuilder sb = new StringBuilder();

    sb.append(position.getChromosome()).append("\t");
    sb.append(position.getPosition()).append("\t");
    addListOrElse(position.getIds(), ";", ".", sb);
    if (position.getRef().isEmpty()) {
      sf_logger.warn("No REF bases, but the column is required (on line {})", m_lineNumber);
    }
    addListOrElse(Arrays.asList(position.getRef()), ",", ".", sb);
    addListOrElse(position.getAltBases(), ",", ".", sb);
    addStringOrElse(position.getQuality(), ".", sb);
    addListOrElse(position.getFilters(), ";", "PASS", sb);
    addInfoOrDot(metadata, position, sb);

    position.getFilters().stream().filter(key -> !metadata.getFilters().containsKey(key)).forEach(key -> {
      if (key.equals(".")) {
        sf_logger.warn("Position {}:{} has FILTER {}; the absence of a filter should instead be marked with PASS (on line {})",
            position.getChromosome(), position.getPosition(), key, m_lineNumber);
      } else {
        sf_logger.warn("Position {}:{} has FILTER {}, but there is no FILTER metadata with that name (on line {})",
          position.getChromosome(), position.getPosition(), key, m_lineNumber);
      }
    });

    // these columns can be skipped completely
    addFormatConditionally(position, sb);
    int sampleIndex = 0;
    for (VcfSample sample : samples) {
      addSampleConditionally(metadata, sampleIndex, position, sample, sb);
      sampleIndex++;
    }

    String line = sb.toString();
    if (line.endsWith("\t")) line = line.substring(0, line.length() - 1);
    printLine(line);
    m_writer.flush();
  }

  @Override
  public void close() {
    IOUtils.closeQuietly(m_writer);
  }

  private void addFormatConditionally(@Nonnull VcfPosition position, @Nonnull StringBuilder sb) {
    Iterator formats = position.getFormat().iterator();
    if (!formats.hasNext()) {
      return;
    }
    while (formats.hasNext()) {
      sb.append(formats.next());
      if (formats.hasNext()) {
        sb.append(":");
      }
    }
    sb.append("\t");
  }

  private void addSampleConditionally(@Nonnull VcfMetadata metadata, int sampleIndex,
      @Nonnull VcfPosition position, @Nonnull VcfSample sample, @Nonnull StringBuilder sb) {

    Iterator keys = sample.getPropertyKeys().iterator();
    if (!keys.hasNext() && position.getFormat().isEmpty()) {
      return;
    }

    for (String key : position.getFormat()) {

      keys.next();

      if (!metadata.getFormats().containsKey(key)) {
        sf_logger.warn("Sample #{} for {}:{} contains FORMAT {}, but there is no FORMAT metadata with that name " +
                "(on line {})",
            sampleIndex, position.getChromosome(), position.getPosition(), key, m_lineNumber);
      }

      if (!sample.containsProperty(key)) {
        sf_logger.warn("Sample #{} is missing property {}" +
            " (on line {})", sampleIndex, key, m_lineNumber);
      }

      String value = sample.getProperty(key);

      FormatMetadata format = metadata.getFormats().get(key);
      Integer number = null;
      try {
        number = Integer.parseInt(format.getNumber());
      } catch (NumberFormatException ignored) {}
      if (number != null && number == 1) {
        try {
          VcfUtils.convertProperty(format.getType(), value);
        } catch (IllegalArgumentException e) {
          sf_logger.warn("Property {} for sample #{} is not of type {}" +
              " (on line {})", key, sampleIndex, format.getType(), m_lineNumber);
        }
      }

      sb.append(value);
      if (keys.hasNext()) {
        sb.append(":");
      }
    }

    // now make sure the sample doesn't contain extra keys
    sample.getPropertyKeys().stream().filter(key -> !position.getFormat().contains(key)).forEach(key -> {
      sf_logger.warn("Sample #{} contains extra property {} " +
          "(on line {})", sampleIndex, key, m_lineNumber);
    });
    sb.append("\t");
  }

  private void addInfoOrDot(@Nonnull VcfMetadata metadata, @Nonnull VcfPosition position, @Nonnull StringBuilder sb) {

    Iterator keys = position.getInfoKeys().iterator();
    if (!keys.hasNext()) {
      sb.append(".");
    }

    while (keys.hasNext()) {
      String key = keys.next();

      List values = position.getInfo(key);
      assert values != null;

      if (!metadata.getInfo().containsKey(key)) {
        sf_logger.warn("Position {}:{} contains INFO {}, but there is no INFO metadata with that name (on line {})",
            position.getChromosome(), position.getPosition(), key, m_lineNumber);
      } else {
        InfoMetadata info = metadata.getInfo().get(key);
        for (String value : values) {
          Integer number = null;
          try {
            number = Integer.parseInt(info.getNumber());;
          } catch (NumberFormatException ignored) {}
          // if the number is anything but 1, it might be a list of something else, represented as a string
          // in that case, we can't compare
          if (number != null && number == 1) {
            try {
              VcfUtils.convertProperty(info.getType(), value); // just test
            } catch (IllegalArgumentException e) {
              sf_logger.warn("Property {} is not of type {} (on line {})", key, info.getType(), m_lineNumber);
            }
          }
        }
      }

      sb.append(key);
      if (!values.isEmpty() && !(values.size() == 1 && values.get(0).isEmpty())) {
        sb.append("=").append(values.get(0));
        for (int i = 1; i < values.size(); i++) {
          sb.append(",").append(values.get(i));
        }
      }
      if (keys.hasNext()) {
        sb.append(";");
      }
    }
    sb.append("\t");
  }

  private void addStringOrElse(@Nullable Object object, @Nonnull String missingValue, @Nonnull StringBuilder sb) {
    if (object == null || object.toString().isEmpty()) {
      sb.append(missingValue);
    } else {
      sb.append(object.toString());
    }
    sb.append("\t");
  }

  private void addListOrElse(@Nonnull List list, @Nonnull String delimiter, @Nonnull String missingValue,
      @Nonnull StringBuilder sb) {
    if (list.isEmpty()) {
      sb.append(missingValue);
    } else {
      sb.append(list.get(0));
      for (int i = 1; i < list.size(); i++) {
        sb.append(delimiter).append(list.get(i));
      }
    }
    sb.append("\t");
  }

  private void printPropertyLines(@Nonnull String name, @Nonnull Collection list) {
    for (String string : list) {
      printLine("##" + name + "=" + string);
    }
  }

  private void printLines(@Nonnull String name, @Nonnull Collection list) {
    for (BaseMetadata metadata : list) {
      printLine(getAllProperties(name, metadata));
    }
  }

  private String getAllProperties(@Nonnull String name, @Nonnull BaseMetadata metadata) {
    StringBuilder sb = new StringBuilder("##");
    sb.append(name).append("=<");
    int i = 0;
    for (Map.Entry entry : metadata.getPropertiesRaw().entrySet()) {
      if (i > 0) {
        sb.append(",");
      }
      sb.append(entry.getKey()).append("=").append(entry.getValue());
      i++;
    }
    sb.append(">");
    return sb.toString();
  }

  public static class Builder {

    private Path m_file;
    private PrintWriter m_writer;

    public Builder toFile(Path file) {
      m_file = file;
      return this;
    }

    public Builder toWriter(PrintWriter writer) {
      m_writer = writer;
      return this;
    }

    public VcfWriter build() throws IOException {
      if (m_file != null) {
        m_writer = new PrintWriter(new BufferedWriter(new FileWriter(m_file.toFile()), 65536));
      }
      if (m_writer == null) {
        throw new IllegalStateException("Must specify either file or writer");
      }
      return new VcfWriter(m_file, m_writer);
    }

  }

  private void printLine(@Nonnull Object line) {
    String string = line.toString();
    if (string.contains("\n")) {
      throw new RuntimeException("Something went wrong writing line #" + m_lineNumber + ": [[[" + string +
          "]]] contains more than one line");
    }
    m_writer.println(line);
    m_lineNumber++;
    if (m_lineNumber % 1000 == 0) {
      sf_logger.info("Wrote {} lines{}", m_lineNumber, (m_file == null ? "" : " to " + m_file));
    }
  }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy