pro.parseq.vcf.utils.VcfParserImpl Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of vcf-explorer Show documentation
Show all versions of vcf-explorer Show documentation
Library for Variant Call Format (VCF) files manipulation
/*******************************************************************************
* Copyright 2016-2017 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*******************************************************************************/
package pro.parseq.vcf.utils;
import java.util.List;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import pro.parseq.vcf.exceptions.InvalidVcfFileException;
import pro.parseq.vcf.fields.Filter;
import pro.parseq.vcf.fields.Format;
import pro.parseq.vcf.fields.Information;
import pro.parseq.vcf.types.DataLine;
import pro.parseq.vcf.types.Header;
import pro.parseq.vcf.types.Metadata;
import pro.parseq.vcf.types.VcfFile;
/**
* {@link VcfParser} implementation based on VCFv4.2 specification
*
* @author Alexander Afanasyev [email protected]
*/
public class VcfParserImpl implements VcfParser {
private static final Logger logger = LoggerFactory.getLogger(VcfParserImpl.class);
@Override
public VcfFile parse(VcfReader reader, FaultTolerance mode) throws InvalidVcfFileException {
VcfFile vcfData = new VcfFile();
String fileformat = reader.readLine();
if (fileformat == null || !VcfGrammar.fileformatPattern
.matcher(fileformat).matches()) {
logger.error("Invalid fileformat field: {}", fileformat);
throw new InvalidVcfFileException(1, "Invalid fileformat field");
}
vcfData.putOtherMetadata(new Metadata(fileformat));
String line;
// Reading meta-information section 'til header line
while ((line = reader.readLine()) != null) {
if (Format.isFormat(line)) {
Format formatDef = new Format(line);
if (!formatDef.isValid()) {
logger.error("Malformed FORMAT field meta-information line {}: {}",
vcfData.size() + 1, formatDef);
if (mode == FaultTolerance.FAIL_FAST) {
throw new InvalidVcfFileException(vcfData.size() + 1,
String.format("Malformaed FORMAT field meta-information line: %s",
formatDef));
}
}
vcfData.putFormat(formatDef);
} else if (Filter.isFilter(line)) {
Filter filterDef = new Filter(line);
if (!filterDef.isValid()) {
logger.error("Malformed FILTER field meta-information line {}: {}",
vcfData.size() + 1, filterDef);
if (mode == FaultTolerance.FAIL_FAST) {
throw new InvalidVcfFileException(vcfData.size() + 1,
String.format("Malformaed FILTER field meta-information line: %s",
filterDef));
}
}
vcfData.putFilter(filterDef);
} else if (Information.isInformation(line)) {
Information infoDef = new Information(line);
if (!infoDef.isValid()) {
logger.error("Malformaed INFO field meta-information line {}: {}",
vcfData.size() + 1, infoDef);
if (mode == FaultTolerance.FAIL_FAST) {
throw new InvalidVcfFileException(vcfData.size() + 1,
String.format("Malformaed INFO field meta-information line: %s",
infoDef));
}
}
vcfData.putInfo(infoDef);
} else if (Header.isHeader(line)) {
logger.info("Meta-information section is over, found header line {}: {}",
vcfData.size() + 1, line);
Header header = new Header(line);
if (!header.isValid()) {
logger.error("Malformed header line {}: {}",
vcfData.size() + 1, header);
if (mode == FaultTolerance.FAIL_FAST) {
throw new InvalidVcfFileException(vcfData.size() + 1,
String.format("Malformaed header line: %s",
header));
}
}
List sampleNames = header.getSampleNames();
vcfData.setSampleNames(sampleNames);
vcfData.addLine(header);
logger.info("Input VCF contains {} samples", sampleNames.size());
// Meta-information section end is reached
break;
} else {
Metadata metadata = new Metadata(line);
if (!metadata.isValid()) {
logger.error("Malformed meta-information line {}: {}",
vcfData.size() + 1, metadata);
if (mode == FaultTolerance.FAIL_FAST) {
throw new InvalidVcfFileException(vcfData.size() + 1,
String.format("Malformaed meta-information line: %s",
metadata));
}
}
vcfData.putOtherMetadata(metadata);
}
}
// Reading data lines section 'til end of file
while ((line = reader.readLine()) != null) {
DataLine dataLine = new DataLine(line, vcfData);
if (!dataLine.isValid()) {
logger.error("Malformed data line {}: {}",
vcfData.size() + 1, dataLine);
if (mode == FaultTolerance.FAIL_FAST) {
throw new InvalidVcfFileException(vcfData.size() + 1,
String.format("Malformaed data line: %s",
dataLine));
}
}
vcfData.addDataLineVariants(dataLine);
}
logger.info("VCF has been parsed. Total lines: {}", vcfData.size());
return vcfData;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy