All Downloads are FREE. Search and download functionalities are using the official Maven repository.

pro.parseq.vcf.utils.VcfParserImpl Maven / Gradle / Ivy

There is a newer version: 1.1.1-RELEASE
Show newest version
/*******************************************************************************
 *     Copyright 2016-2017 the original author or authors.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 *  
 *******************************************************************************/
package pro.parseq.vcf.utils;

import java.util.List;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import pro.parseq.vcf.exceptions.InvalidVcfFileException;
import pro.parseq.vcf.fields.Filter;
import pro.parseq.vcf.fields.Format;
import pro.parseq.vcf.fields.Information;
import pro.parseq.vcf.types.DataLine;
import pro.parseq.vcf.types.Header;
import pro.parseq.vcf.types.Metadata;
import pro.parseq.vcf.types.VcfFile;

/**
 * {@link VcfParser} implementation based on VCFv4.2 specification
 * 
 * @author Alexander Afanasyev [email protected]
 */
public class VcfParserImpl implements VcfParser {

	private static final Logger logger = LoggerFactory.getLogger(VcfParserImpl.class);

	@Override
	public VcfFile parse(VcfReader reader, FaultTolerance mode) throws InvalidVcfFileException {

		VcfFile vcfData = new VcfFile();

		String fileformat = reader.readLine();
		if (fileformat == null || !VcfGrammar.fileformatPattern
				.matcher(fileformat).matches()) {
			logger.error("Invalid fileformat field: {}", fileformat);
			throw new InvalidVcfFileException(1, "Invalid fileformat field");
		}
		vcfData.putOtherMetadata(new Metadata(fileformat));

		String line;
		// Reading meta-information section 'til header line
		while ((line = reader.readLine()) != null) {

			if (Format.isFormat(line)) {

				Format formatDef = new Format(line);
				if (!formatDef.isValid()) {
					logger.error("Malformed FORMAT field meta-information line {}: {}",
							vcfData.size() + 1, formatDef);
					if (mode == FaultTolerance.FAIL_FAST) {
						throw new InvalidVcfFileException(vcfData.size() + 1,
								String.format("Malformaed FORMAT field meta-information line: %s",
										formatDef));
					}
				}

				vcfData.putFormat(formatDef);
			} else if (Filter.isFilter(line)) {

				Filter filterDef = new Filter(line);
				if (!filterDef.isValid()) {
					logger.error("Malformed FILTER field meta-information line {}: {}",
							vcfData.size() + 1, filterDef);
					if (mode == FaultTolerance.FAIL_FAST) {
						throw new InvalidVcfFileException(vcfData.size() + 1,
								String.format("Malformaed FILTER field meta-information line: %s",
										filterDef));
					}
				}

				vcfData.putFilter(filterDef);
			} else if (Information.isInformation(line)) {

				Information infoDef = new Information(line);
				if (!infoDef.isValid()) {
					logger.error("Malformaed INFO field meta-information line {}: {}",
							vcfData.size() + 1, infoDef);
					if (mode == FaultTolerance.FAIL_FAST) {
						throw new InvalidVcfFileException(vcfData.size() + 1,
								String.format("Malformaed INFO field meta-information line: %s",
										infoDef));
					}
				}

				vcfData.putInfo(infoDef);
			} else if (Header.isHeader(line)) {

				logger.info("Meta-information section is over, found header line {}: {}",
						vcfData.size() + 1, line);

				Header header = new Header(line);
				if (!header.isValid()) {
					logger.error("Malformed header line {}: {}",
							vcfData.size() + 1, header);
					if (mode == FaultTolerance.FAIL_FAST) {
						throw new InvalidVcfFileException(vcfData.size() + 1,
								String.format("Malformaed header line: %s",
										header));
					}
				}

				List sampleNames = header.getSampleNames();
				vcfData.setSampleNames(sampleNames);
				vcfData.addLine(header);
				logger.info("Input VCF contains {} samples", sampleNames.size());
				// Meta-information section end is reached
				break;
			} else {

				Metadata metadata = new Metadata(line);
				if (!metadata.isValid()) {
					logger.error("Malformed meta-information line {}: {}",
							vcfData.size() + 1, metadata);
					if (mode == FaultTolerance.FAIL_FAST) {
						throw new InvalidVcfFileException(vcfData.size() + 1,
								String.format("Malformaed meta-information line: %s",
										metadata));
					}
				}

				vcfData.putOtherMetadata(metadata);
			}
		}
		// Reading data lines section 'til end of file
		while ((line = reader.readLine()) != null) {

			DataLine dataLine = new DataLine(line, vcfData);
			if (!dataLine.isValid()) {

				logger.error("Malformed data line {}: {}",
						vcfData.size() + 1, dataLine);
				if (mode == FaultTolerance.FAIL_FAST) {
					throw new InvalidVcfFileException(vcfData.size() + 1,
							String.format("Malformaed data line: %s",
									dataLine));
				}
			}

			vcfData.addDataLineVariants(dataLine);
		}

		logger.info("VCF has been parsed. Total lines: {}", vcfData.size());

		return vcfData;
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy