pro.parseq.vcf.fields.Format Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of vcf-explorer Show documentation
Show all versions of vcf-explorer Show documentation
Library for Variant Call Format (VCF) files manipulation
/*******************************************************************************
* Copyright 2016-2017 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*******************************************************************************/
package pro.parseq.vcf.fields;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import pro.parseq.vcf.fields.types.FormatFieldType;
import pro.parseq.vcf.types.Metadata;
import pro.parseq.vcf.utils.VcfGrammar;
/**
* Represents VCF FORMAT meta-information field
*
* @author Alexander Afanasyev [email protected]
*/
public class Format extends Metadata {
private String id;
private String number;
private FormatFieldType type;
private String description;
public Format(String line) {
super(line);
Matcher idMatcher = VcfGrammar.formatIdPattern.matcher(line);
Matcher numberMatcher = VcfGrammar.formatNumberPattern.matcher(line);
Matcher typeMatcher = VcfGrammar.formatTypePattern.matcher(line);
Matcher descriptionMatcher = VcfGrammar.formatDescriptionPattern.matcher(line);
if (idMatcher.find() && numberMatcher.find() && typeMatcher.find() && descriptionMatcher.find()) {
id = line.substring(idMatcher.start(), idMatcher.end());
number = line.substring(numberMatcher.start(), numberMatcher.end());
type = FormatFieldType.getEnum(line.substring(typeMatcher.start(), typeMatcher.end()));
description = line.substring(descriptionMatcher.start(), descriptionMatcher.end());
}
}
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public String getNumber() {
return number;
}
public void setNumber(String number) {
this.number = number;
}
public FormatFieldType getType() {
return type;
}
public void setType(FormatFieldType type) {
this.type = type;
}
public String getDescription() {
return description;
}
public void setDescription(String description) {
this.description = description;
}
/**
* Checks whether specified genotype contains particular allele
*
* @param value Genotype format field value (e.g. 0/1 or 1|2)
* @param alleleNumber VCF data line allele number (0 for reference allele, 1,2.. for alternates)
* @return Whether allele is in the genotype or {@code null} of value is malformed
*/
public static final Boolean getGenotypeFieldValue(String value, int alleleNumber) {
if (value == null
|| !VcfGrammar.genotypeValuePattern.matcher(value).matches()) {
return null;
}
String[] alleleTokens = value.split(VcfGrammar.GENOTYPE_DELIMITER);
for (String alleleToken: alleleTokens) {
if (alleleNumber == Integer.parseInt(alleleToken)) {
return true;
}
}
return false;
}
/**
* Allows to extract field values from data lines
*
* @param value Data line FORMAT value from samples' details specified for the field
* @return {@link List} of found values (see VCFv4.2 specification for more details)
*/
public List extends Serializable> getValue(String value) {
if (value == null) {
return null;
}
String[] values = value.split(",");
if (VcfGrammar.exactCountNumberValuePattern.matcher(number).matches()) {
int numberValue = Integer.parseInt(number);
if (numberValue != values.length) {
// TODO: change this to throw proper exception
return null;
}
}
switch (type) {
case CHARACTER:
case STRING:
return Arrays.asList(values);
case FLOAT:
List doubleValues = new ArrayList<>();
for (int i = 0; i < values.length; ++i) {
try {
doubleValues.add(Double.parseDouble(values[i]));
} catch (NumberFormatException e) {
// TODO: change this to throw proper exception
return null;
}
}
return doubleValues;
case INTEGER:
List intValues = new ArrayList<>();
for (int i = 0; i < values.length; ++i) {
try {
intValues.add(Integer.parseInt(values[i]));
} catch (NumberFormatException e) {
// TODO: change this to throw proper exception
return null;
}
}
return intValues;
default:
return null;
}
}
@Override
protected Pattern getPattern() {
return VcfGrammar.formatPattern;
}
public static final boolean isFormat(String line) {
return VcfGrammar.formatPattern.matcher(line).matches();
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy