All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.geneweaver.io.reader.FastVCFReader Maven / Gradle / Ivy

There is a newer version: 2.7.12
Show newest version
package org.geneweaver.io.reader;

import java.util.Arrays;
import java.util.List;

import org.apache.commons.beanutils.BeanMap;
import org.geneweaver.domain.Entity;
import org.geneweaver.domain.VariantCall;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * VCF files read as a stream exist here:
 *      
		    com.github.samtools
		    htsjdk
		    2.24.1
		

  However it is quite a large dependency to make on a small package like this one.
  More importantly this simple reader basically is designed to do just what we need when biulding 
  the geneweaver graph. This means that long lines of individual information are not parsed 
  or split meaning the stream processing this file can go *fast*
		
 * @author gerrim
 *
 */
public class FastVCFReader extends LineIteratorReader {
	
	private static Logger logger = LoggerFactory.getLogger(FastVCFReader.class);

	/**
	 * Create the reader by setting its data
	 * 
	 * @param reader
	 * @throws ReaderException
	 */
	@SuppressWarnings("unchecked")
	@Override
	public FastVCFReader init(ReaderRequest request) throws ReaderException {
		super.setup(request);
		setDelimiter("\t"); // Must be a tab only
		return this;
	}

	private List headerNames;

	/**
	 * Creates the.
	 *
	 * @param line the line
	 * @return the n
	 * @throws ReaderException the reader exception
	 */
	@SuppressWarnings("unchecked")
	@Override
	protected N create(String line) throws ReaderException {

		if (headerNames==null) {
			if (header==null || header.isEmpty()) {
				throw new ReaderException("VCF files must have a header!");
			}
			String headLine = header.get(header.size()-1);
			String[] names = headLine.substring(1).split(getDelimiter());
			
			// Something like: CHROM, POS, ID, REF, ALT, QUAL, FILTER, INFO, FORMAT, HG00096, ... 
			headerNames = Arrays.asList(names);
		}
		VariantCall bean = new VariantCall();
		BeanMap d = new BeanMap(bean);
		
		// Splitting these long lines is slow and we do not need the 
		// individual values, therefore we do not split instead we
		// substring the line for each delimiter	
		String sline = line;
		for(int i=0;i




© 2015 - 2025 Weber Informatics LLC | Privacy Policy