All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.pharmgkb.parser.vcf.model.VcfMetadata Maven / Gradle / Ivy

There is a newer version: 0.3.1
Show newest version
package org.pharmgkb.parser.vcf.model;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.SortedSet;
import java.util.TreeSet;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import com.google.common.base.Preconditions;
import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.ListMultimap;
import org.pharmgkb.parser.vcf.VcfUtils;


/**
 * This class captures all of the VCF metadata from a VCF file.
 *
 * @author Mark Woon
 */
public class VcfMetadata {
  private String m_fileFormat;
  private Map m_alt;
  private Map m_info;
  private Map m_filter;
  private Map m_format;
  private List m_columns;
  private ListMultimap m_properties;
  private Map m_contig;
  private Map m_sample;
  private List m_pedigree;


  private VcfMetadata(@Nonnull String fileFormat, @Nullable Map alt,
      @Nullable Map info, @Nullable Map filter,
      @Nullable Map format, @Nullable Map contig,
      @Nullable Map sample, @Nullable List pedigree,
      @Nonnull List columns, @Nullable ListMultimap properties) {
    Preconditions.checkNotNull(fileFormat);
    Preconditions.checkNotNull(columns);
    m_fileFormat = fileFormat;
    m_alt        = alt==null?        new HashMap<>()            : alt;
    m_info       = info==null?       new HashMap<>()            : info;
    m_filter     = filter==null?     new HashMap<>()            : filter;
    m_format     = format==null?     new HashMap<>()            : format;
    m_contig     = contig==null?     new HashMap<>()            : contig;
    m_sample     = sample==null?     new HashMap<>()            : sample;
    m_pedigree   = pedigree==null?   new ArrayList<>()          : pedigree;
    m_properties = properties==null? ArrayListMultimap.create() : properties;
    m_columns    = columns;
  }


  public @Nonnull String getFileFormat() {
    return m_fileFormat;
  }

  public void setFileFormat(@Nonnull String fileFormat) {
    if (!VcfUtils.FILE_FORMAT_PATTERN.matcher(fileFormat).matches()) {
      throw new IllegalArgumentException("VCF format must look like ex: VCFv4.2; was " + fileFormat);
    }
    m_fileFormat = fileFormat;
  }

  public @Nonnull Map getAlts() {
    return m_alt;
  }

  /**
   * Gets the ALT metadata for the given ID.
   *
   * @param id the ID to lookup, will unwrap ID's enclosed in angle brackets (e.g. <CN1> will get converted to CN1)
   */
  @Nullable
  public IdDescriptionMetadata getAlt(@Nonnull String id) {
    IdDescriptionMetadata md = m_alt.get(id);
    if (md == null && id.startsWith("<") && id.endsWith(">")) {
      md = m_alt.get(id.substring(1, id.length() - 1));
    }
    return md;
  }


  public @Nonnull Map getInfo() {
    return m_info;
  }

  public @Nonnull Map getFilters() {
    return m_filter;
  }

  public @Nonnull Map getFormats() {
    return m_format;
  }

  public @Nonnull Map getContigs() {
    return m_contig;
  }

  public @Nonnull List getPedigrees() {
    return m_pedigree;
  }

  public @Nonnull Map getSamples() {
    return m_sample;
  }

  /**
   * @return The URLs from the field(s) in the assembly metadata line(s)
   */
  public @Nonnull List getAssemblies() {
    // spec says: ##assembly=url (without angle brackets)
    return m_properties.get("assembly");
  }

  /**
   * @return The URLs from the field(s) in the pedigreeDB metadata line(s), including angle brackets if any
   */
  public @Nonnull List getPedigreeDatabases() {
    // spec says: ##pedigreeDB= (with angle brackets)
    return m_properties.get("pedigreeDB");
  }

  /**
   * Adds {@code value} to the map of ALT metadata, using its {@link IdDescriptionMetadata#getId() ID} as the key.
   */
  public void addAlt(@Nonnull IdDescriptionMetadata value) {
    m_alt.put(value.getId(), value);
  }

  /**
   * Adds {@code value} to the map of INFO metadata, using its {@link InfoMetadata#getId() ID} as the key.
   */
  public void addInfo(@Nonnull InfoMetadata value) {
    m_info.put(value.getId(), value);
  }

  /**
   * Adds {@code value} to the map of FORMAT metadata, using its {@link FormatMetadata#getId() ID} as the key.
   */
  public void addFormat(@Nonnull FormatMetadata value) {
    m_format.put(value.getId(), value);
  }

  /**
   * Adds {@code value} to the map of CONTIG metadata, using its {@link ContigMetadata#getId() ID} as the key.
   */
  public void addContig(@Nonnull ContigMetadata value) {
    m_contig.put(value.getId(), value);
  }

  /**
   * Adds {@code value} to the map of FILTER metadata, using its {@link IdDescriptionMetadata#getId() ID} as the key.
   */
  public void addFilter(@Nonnull IdDescriptionMetadata value) {
    m_filter.put(value.getId(), value);
  }

  /**
   * Adds {@code value} to the list of assembly metadata.
   * @param value Should not be wrapped in angle brackets
   */
  public void addAssembly(@Nonnull String value) {
    m_properties.put("assembly", value);
  }

  /**
   * Adds {@code value} to the list of pedigreeDB.
   * @param value Must be wrapped in angle brackets
   * @throws IllegalArgumentException If {@code value} is not wrapped in angle brackets
   */
  public void addPedigreeDatabase(@Nonnull String value) {
    if (value.startsWith("<") && value.endsWith(">")) {
      m_properties.put("pedigreeDB", value);
    } else {
      throw new IllegalArgumentException("pedigreeDB string " + value + " should be enclosed in angle brackets according to spec");
    }
  }

  public void removeAlt(@Nonnull IdDescriptionMetadata value) {
    m_alt.remove(value.getId());
  }

  public void removeInfo(@Nonnull InfoMetadata value) {
    m_info.remove(value.getId());
  }

  public void removeFormat(@Nonnull FormatMetadata value) {
    m_format.remove(value.getId());
  }

  public void removeContig(@Nonnull ContigMetadata value) {
    m_contig.remove(value.getId());
  }

  public void removeFilter(@Nonnull IdDescriptionMetadata value) {
    m_filter.remove(value.getId());
  }

  public void removeAssembly(@Nonnull String value) {
    m_properties.remove("assembly", value);
  }

  /**
   * Adds {@code value} to the list of pedigreeDB.
   * @param value Must be wrapped in angle brackets
   * @throws IllegalArgumentException If {@code value} is not wrapped in angle brackets
   */
  public void removePedigreeDb(@Nonnull String value) {
    if (value.startsWith("<") && value.endsWith(">")) {
      m_properties.remove("pedigreeDB", value);
    } else { // be strict to avoid needing to delete both value and 
      throw new IllegalArgumentException("pedigreeDB string " + value + " should be enclosed in angle brackets according to spec");
    }
  }

  /**
   * Returns a map from every property key to each of its values.
   * Call {@link ListMultimap#asMap} to get a Map<String, Collection<String>>.
   * @return Contains every property except those contained in:
   * 
    *
  • {@link #getInfo}
  • *
  • {@link #getFilters}
  • *
  • {@link #getFormats}
  • *
  • {@link #getContigs}
  • *
  • {@link #getPedigrees}
  • *
  • {@link #getInfo}
  • *
  • {@link #getSamples}
  • *
* However, contains any in {@link #getAssemblies} and {@link #getPedigreeDatabases}. */ public @Nonnull ListMultimap getRawProperties() { return m_properties; } /** * Returns the value of a property, or null if the property is not set or has no value. * This method will return null for a reserved property of the form XX=<ID=value,ID=value,...>; * {@code assembly} and {@code pedigreeDB} are still included. */ public @Nonnull List getRawValuesOfProperty(@Nonnull String propertyKey) { return m_properties.get(propertyKey); } /** * Returns a list of the properties defined. * Reserved properties of the form XX=<ID=value,ID=value,...> are excluded, though {@code assembly} * and {@code pedigreeDB} are still included. * @return Contains every property except those contained in: *
    *
  • {@link #getInfo}
  • *
  • {@link #getFilters}
  • *
  • {@link #getFormats}
  • *
  • {@link #getContigs}
  • *
  • {@link #getPedigrees}
  • *
  • {@link #getInfo}
  • *
  • {@link #getSamples}
  • *
* However, contains any in {@link #getAssemblies} and {@link #getPedigreeDatabases}. */ public @Nonnull SortedSet getRawPropertyKeys() { return new TreeSet<>(m_properties.keySet()); } public int getColumnIndex(@Nonnull String column) { return m_columns.indexOf(column); } /** * Sample numbering starts at 0. */ public int getSampleIndex(@Nonnull String sampleId) { return m_columns.indexOf(sampleId) - 9; } /** * Gets the number of samples in the VCF file. */ public int getNumSamples() { if (m_columns.size() < 9) { return 0; // necessary because if we have no samples, we'll be missing FORMAT } return m_columns.size() - 9; } /** * Gets the sample name (column name). * * @param idx sample index, first sample is at index 0 * * @throws ArrayIndexOutOfBoundsException If the sample doesn't exist */ public @Nonnull String getSampleName(int idx) { return m_columns.get(9 + idx); } public static class Builder { private String m_fileFormat; private Map m_alt = new HashMap<>(); private Map m_info = new HashMap<>(); private Map m_filter = new HashMap<>(); private Map m_format = new HashMap<>(); private Map m_contig = new HashMap<>(); private Map m_sample = new HashMap<>(); private List m_pedigree = new ArrayList<>(); private List m_columns = new ArrayList<>(); private ListMultimap m_properties = ArrayListMultimap.create(); /** * Sets the VCF version string. * @param fileFormat Ex: "VCFv4.2" */ public Builder setFileFormat(@Nonnull String fileFormat) { m_fileFormat = fileFormat; if (!VcfUtils.FILE_FORMAT_PATTERN.matcher(fileFormat).matches()) { throw new IllegalStateException("Not a VCF file: fileformat is " + m_fileFormat); } return this; } public Builder addAlt(@Nonnull IdDescriptionMetadata md) { if (m_alt.containsKey(md.getId())) { throw new IllegalArgumentException("Duplicate ID " + md.getId() + " for ALT"); } m_alt.put(md.getId(), md); return this; } public Builder addInfo(@Nonnull InfoMetadata md) { if (m_info.containsKey(md.getId())) { throw new IllegalArgumentException("Duplicate ID " + md.getId() + " for INFO"); } m_info.put(md.getId(), md); return this; } public Builder addFilter(@Nonnull IdDescriptionMetadata md) { if (m_filter.containsKey(md.getId())) { throw new IllegalArgumentException("Duplicate ID " + md.getId() + " for FILTER"); } m_filter.put(md.getId(), md); return this; } public Builder addFormat(@Nonnull FormatMetadata md) { if (m_format.containsKey(md.getId())) { throw new IllegalArgumentException("Duplicate ID " + md.getId() + " for FORMAT"); } m_format.put(md.getId(), md); return this; } public Builder addContig(@Nonnull ContigMetadata md) { if (m_contig.containsKey(md.getId())) { throw new IllegalArgumentException("Duplicate ID " + md.getId() + " for CONTIG"); } m_contig.put(md.getId(), md); return this; } public Builder addSample(@Nonnull IdDescriptionMetadata md) { if (m_sample.containsKey(md.getId())) { throw new IllegalArgumentException("Duplicate ID " + md.getId() + " for SAMPLE"); } m_sample.put(md.getId(), md); return this; } public Builder addPedigree(@Nonnull BaseMetadata md) { m_pedigree.add(md); return this; } public Builder addRawProperty(@Nonnull String name, @Nonnull String value) { m_properties.put(name, value); return this; } public Builder setColumns(@Nonnull List cols) { m_columns = cols; return this; } @Nonnull public VcfMetadata build() { if (m_fileFormat == null) { throw new IllegalStateException("Not a VCF file: no ##fileformat line"); } return new VcfMetadata(m_fileFormat, m_alt, m_info, m_filter, m_format, m_contig, m_sample, m_pedigree, m_columns, m_properties); } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy