All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.pharmgkb.parser.vcf.model.VcfPosition Maven / Gradle / Ivy

There is a newer version: 0.3.1
Show newest version
package org.pharmgkb.parser.vcf.model;

import com.google.common.base.Joiner;
import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.ListMultimap;
import org.pharmgkb.parser.vcf.VcfUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import java.lang.invoke.MethodHandles;
import java.math.BigDecimal;
import java.util.*;
import java.util.regex.Pattern;


/**
 * This class contains the basic data (the first 9 columns) for a VCF position line.
 * 
 * 0 - CHROM
 * 1 - POS
 * 2 - ID
 * 3 - REF
 * 4 - ALT
 * 5 - QUAL
 * 6 - FILTER
 * 7 - INFO
 * 8 - FORMAT
 * 
* * @author Mark Woon */ public class VcfPosition { private static final Logger sf_logger = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); private static final Joiner sf_commaJoiner = Joiner.on(","); private static final Pattern sf_whitespace = Pattern.compile(".*\\s.*"); private String m_chromosome; private long m_position; private List m_ids = new ArrayList<>(); private String m_refBases; private List m_altBases = new ArrayList<>(); private List m_alleles = new ArrayList<>(); private BigDecimal m_quality; private List m_filter = new ArrayList<>(); private ListMultimap m_info = ArrayListMultimap.create(); private List m_format = new ArrayList<>(); public VcfPosition(@Nonnull String chr, long pos, @Nullable List ids, @Nonnull String ref, @Nullable List altBases, @Nullable BigDecimal qual, @Nullable List filter, @Nullable ListMultimap info, @Nullable List format) { /* 1. Check the arguments, in order */ if (sf_whitespace.matcher(chr).matches() || chr.contains(":")) { throw new IllegalArgumentException("CHROM column \"" + chr + "\" contains whitespace or colons"); } // allow pos < 1 because that's reserved for telomers if (ids != null) { for (String id : ids) { if (sf_whitespace.matcher(id).matches() || id.contains(";")) { throw new IllegalArgumentException("ID \"" + id + "\" contains whitespace or semicolons"); } } } if (!VcfUtils.REF_BASE_PATTERN.matcher(ref).matches()) { throw new IllegalArgumentException("Invalid reference base '" + ref + "' (must match " + VcfUtils.REF_BASE_PATTERN +")"); } if (altBases != null) { for (String base : altBases) { if (!VcfUtils.ALT_BASE_PATTERN.matcher(base).matches()) { throw new IllegalArgumentException("Invalid alternate base '" + base + "' (must be [AaGgCcTtNn\\*]+ or <.+>)"); } } } if (filter != null) { for (String f : filter) { if (sf_whitespace.matcher(f).matches()) { throw new IllegalArgumentException("FILTER column entry \"" + f + "\" contains whitespace"); } if (f.equals("0")) { throw new IllegalArgumentException("FILTER column entry should not be 0"); } if (f.equals("PASS")) { if (filter.size() == 1) { // a user is likely to pass "PASS" instead of an empty list or null sf_logger.warn("FILTER is PASS, but should have been passed as null. Converting to null"); filter = null; break; // unnecessary, but gets rid of the warning } else { // but this is illegal per VCF spec throw new IllegalArgumentException("FILTER contains PASS along with other filters!"); } } } } if (info != null) { for (Map.Entry entry : info.entries()) { if (sf_whitespace.matcher(entry.getKey()).matches() || sf_whitespace.matcher(entry.getValue()).matches()) { throw new IllegalArgumentException("INFO column entry \"" + entry.getKey() + "=" + entry.getValue() + "\" contains whitespace"); } } } if (format != null) { for (String f : format) { if (!VcfUtils.FORMAT_PATTERN.matcher(f).matches() || f.contains(":")) { throw new IllegalArgumentException("FORMAT ID does not match VCF spec"); } } } /* 2. Set the fields, in order */ // not resolving ID string m_chromosome = chr; // required m_position = pos; // required if (ids != null) { m_ids = ids; } m_refBases = ref; // required m_alleles.add(m_refBases); if (altBases != null) { m_altBases = altBases; m_alleles.addAll(altBases); } m_quality = qual; // required if (filter != null) { m_filter = filter; } if (info != null) { m_info = info; } if (format != null) { m_format = format; } } public VcfPosition(@Nonnull String chromosome, long position, @Nonnull String refBases, @Nonnull BigDecimal quality) { m_chromosome = chromosome; m_position = position; m_refBases = refBases; m_quality = quality; } /** * Gets an identifier from the reference genome or an angle-bracketed ID String ("{@code }") pointing to a contig * in the assembly file. */ public @Nonnull String getChromosome() { return m_chromosome; } public void setChromosome(@Nonnull String chromosome) { m_chromosome = chromosome; } public void setRef(@Nonnull String ref) { m_refBases = ref; } public long getPosition() { return m_position; } public void setPosition(long position) { m_position = position; } /** * Gets the list of unique identifiers for this position. */ public @Nonnull List getIds() { return m_ids; } /** * Gets the reference base(s) for this position. Each base must be an A, C, G, T, or N. */ public @Nonnull String getRef() { return m_refBases; } /** * Gets the alternate base(s) for this position. Each base must be an A, C, G, T, N or * unless it's an * angle-bracketed ID string ("{@code }"). *

* ID strings should reference a specific ALT metadata (obtainable via {@link VcfMetadata#getAlt(java.lang.String)}). *

*/ public @Nonnull List getAltBases() { return m_altBases; } /** * Gets the allele at the given index from a list of containing refBases + altBases. * * @throws IndexOutOfBoundsException if index is out of range */ public @Nonnull String getAllele(int index) { return m_alleles.get(index); } public @Nullable BigDecimal getQuality() { return m_quality; } public void setQuality(@Nullable BigDecimal quality) { m_quality = quality; } public boolean isPassingAllFilters() { return m_filter.isEmpty(); } /** * Returns a list of filters this position failed, if any. */ public @Nonnull List getFilters() { return m_filter; } /** * Gets all INFO fields for every key. */ public @Nonnull ListMultimap getInfo() { return m_info; } /** * Get INFO metadata with the specified ID. * * @return list of values or null if there is no INFO metadata for the specified id */ public @Nullable List getInfo(@Nonnull String id) { if (hasInfo(id)) { return m_info.get(id); } return null; } /** * Returns the value for the reserved property as the type specified by both {@link ReservedInfoProperty#getType()} * and {@link ReservedInfoProperty#isList()}. *

* Note that this method does NOT always return a list. *

* For example: *

{@code
   *   BigDecimal bq = vcfPosition.getInfoConverted(ReservedInfoProperty.BaseQuality);
   * }
   * 
* * @param The type specified by {@code ReservedInfoProperty.getType()} if {@code ReservedInfoProperty.isList()} * is false; * otherwise {@code List} where V is the type specified by {@code ReservedInfoProperty.getType()}. */ public @Nullable T getInfo(@Nonnull ReservedInfoProperty key) { if (!hasInfo(key.getId())) { return null; } List list = m_info.get(key.getId()); if (list.isEmpty()) { return null; } return VcfUtils.convertProperty(key, sf_commaJoiner.join(list)); } /** * Checks if there is INFO metadata with the specified ID. */ public boolean hasInfo(@Nonnull String id) { return m_info != null && m_info.containsKey(id); } /** * Checks if there is INFO metadata with the specified ID. */ public boolean hasInfo(@Nonnull ReservedInfoProperty key) { return hasInfo(key.getId()); } public @Nonnull List getFormat() { return m_format; } @Nonnull public Set getInfoKeys() { if (m_info == null) { return new HashSet<>(0); } return m_info.keySet(); } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy