All Downloads are FREE. Search and download functionalities are using the official Maven repository.

pro.parseq.vcf.fields.Format Maven / Gradle / Ivy

There is a newer version: 1.1.1-RELEASE
Show newest version
/*******************************************************************************
 *     Copyright 2016-2017 the original author or authors.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 *  
 *******************************************************************************/
package pro.parseq.vcf.fields;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import pro.parseq.vcf.fields.types.FormatFieldType;
import pro.parseq.vcf.types.Metadata;
import pro.parseq.vcf.utils.VcfGrammar;

/**
 * Represents VCF FORMAT meta-information field
 * 
 * @author Alexander Afanasyev [email protected]
 */
public class Format extends Metadata {

	private String id;
	private String number;
	private FormatFieldType type;
	private String description;

	public Format(String line) {

		super(line);

		Matcher idMatcher = VcfGrammar.formatIdPattern.matcher(line);
		Matcher numberMatcher = VcfGrammar.formatNumberPattern.matcher(line);
		Matcher typeMatcher = VcfGrammar.formatTypePattern.matcher(line);
		Matcher descriptionMatcher = VcfGrammar.formatDescriptionPattern.matcher(line);
		if (idMatcher.find() && numberMatcher.find() && typeMatcher.find() && descriptionMatcher.find()) {
			id = line.substring(idMatcher.start(), idMatcher.end());
			number = line.substring(numberMatcher.start(), numberMatcher.end());
			type = FormatFieldType.getEnum(line.substring(typeMatcher.start(), typeMatcher.end()));
			description = line.substring(descriptionMatcher.start(), descriptionMatcher.end());
		}
	}

	public String getId() {
		return id;
	}

	public void setId(String id) {
		this.id = id;
	}

	public String getNumber() {
		return number;
	}

	public void setNumber(String number) {
		this.number = number;
	}

	public FormatFieldType getType() {
		return type;
	}

	public void setType(FormatFieldType type) {
		this.type = type;
	}

	public String getDescription() {
		return description;
	}

	public void setDescription(String description) {
		this.description = description;
	}

	/**
	 * Checks whether specified genotype contains particular allele
	 * 
	 * @param value Genotype format field value (e.g. 0/1 or 1|2)
	 * @param alleleNumber VCF data line allele number (0 for reference allele, 1,2.. for alternates)
	 * @return Whether allele is in the genotype or {@code null} of value is malformed
	 */
	public static final Boolean getGenotypeFieldValue(String value, int alleleNumber) {

		if (value == null
				|| !VcfGrammar.genotypeValuePattern.matcher(value).matches()) {
			return null;
		}

		String[] alleleTokens = value.split(VcfGrammar.GENOTYPE_DELIMITER);
		for (String alleleToken: alleleTokens) {
			if (alleleNumber == Integer.parseInt(alleleToken)) {
				return true;
			}
		}

		return false;
	}

	/**
	 * Allows to extract field values from data lines
	 * 
	 * @param value Data line FORMAT value from samples' details specified for the field
	 * @return {@link List} of found values (see VCFv4.2 specification for more details)
	 */
	public List getValue(String value) {

		if (value == null) {
			return null;
		}

		String[] values = value.split(",");
		if (VcfGrammar.exactCountNumberValuePattern.matcher(number).matches()) {
			int numberValue = Integer.parseInt(number);
			if (numberValue != values.length) {
				// TODO: change this to throw proper exception
				return null;
			}
		}

		switch (type) {
		case CHARACTER:
		case STRING:
			return Arrays.asList(values);
		case FLOAT:
			List doubleValues = new ArrayList<>();
			for (int i = 0; i < values.length; ++i) {
				try {
					doubleValues.add(Double.parseDouble(values[i]));
				} catch (NumberFormatException e) {
					// TODO: change this to throw proper exception
					return null;
				}
			}
			return doubleValues;
		case INTEGER:
			List intValues = new ArrayList<>();
			for (int i = 0; i < values.length; ++i) {
				try {
					intValues.add(Integer.parseInt(values[i]));
				} catch (NumberFormatException e) {
					// TODO: change this to throw proper exception
					return null;
				}
			}
			return intValues;
		default:
			return null;
		}
	}

	@Override
	protected Pattern getPattern() {
		return VcfGrammar.formatPattern;
	}

	public static final boolean isFormat(String line) {
		return VcfGrammar.formatPattern.matcher(line).matches();
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy