org.geneweaver.io.reader.FastVCFReader Maven / Gradle / Ivy

Go to download

Show more of this group Show more artifacts with this name
Show all versions of gweaver-stream-io Show documentation

The IO bundle for Geneweaver.

There is a newer version: 2.7.12

package org.geneweaver.io.reader;

import java.util.Arrays;
import java.util.List;

import org.apache.commons.beanutils.BeanMap;
import org.geneweaver.domain.Entity;
import org.geneweaver.domain.VariantCall;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * VCF files read as a stream exist here:
 *      
		    com.github.samtools
		    htsjdk
		    2.24.1
		

  However it is quite a large dependency to make on a small package like this one.
  More importantly this simple reader basically is designed to do just what we need when biulding 
  the geneweaver graph. This means that long lines of individual information are not parsed 
  or split meaning the stream processing this file can go *fast*
		
 * @author gerrim
 *
 */
public class FastVCFReader extends LineIteratorReader {
	
	private static Logger logger = LoggerFactory.getLogger(FastVCFReader.class);

	/**
	 * Create the reader by setting its data
	 * 
	 * @param reader
	 * @throws ReaderException
	 */
	@SuppressWarnings("unchecked")
	@Override
	public FastVCFReader init(ReaderRequest request) throws ReaderException {
		super.setup(request);
		setDelimiter("\t"); // Must be a tab only
		return this;
	}

	private List headerNames;

	/**
	 * Creates the.
	 *
	 * @param line the line
	 * @return the n
	 * @throws ReaderException the reader exception
	 */
	@SuppressWarnings("unchecked")
	@Override
	protected N create(String line) throws ReaderException {

		if (headerNames==null) {
			if (header==null || header.isEmpty()) {
				throw new ReaderException("VCF files must have a header!");
			}
			String headLine = header.get(header.size()-1);
			String[] names = headLine.substring(1).split(getDelimiter());
			
			// Something like: CHROM, POS, ID, REF, ALT, QUAL, FILTER, INFO, FORMAT, HG00096, ... 
			headerNames = Arrays.asList(names);
		}
		VariantCall bean = new VariantCall();
		BeanMap d = new BeanMap(bean);
		
		// Splitting these long lines is slow and we do not need the 
		// individual values, therefore we do not split instead we
		// substring the line for each delimiter	
		String sline = line;
		for(int i=0;i

    

    

    
            
    
            

    
        
            
                Related Artifacts
                
                     mysql-connector-java mysql
 facebook-messenger com.github.codedrinker
 selenium-java org.seleniumhq.selenium
 instagram-java com.github.sola92
 gson com.google.code.gson
 poi org.apache.poi
 httpclient org.apache.httpcomponents
 json org.json
 facebook-java-api com.google.code.facebook-java-api
 poi-ooxml org.apache.poi
 jackson-databind com.fasterxml.jackson.core
 junit junit
 primefaces org.primefaces
 ojdbc7 com.github.noraui
 jfoenix com.jfoenix
 testng org.testng
 json-simple com.googlecode.json-simple
 selenium-server org.seleniumhq.selenium
 itextpdf com.itextpdf
 spring-core org.springframework
                
            
        
        
            
                Related Groups
                
                     org.springframework
 org.apache.poi
 org.hibernate
 org.springframework.boot
 com.fasterxml.jackson.core
 com.itextpdf
 org.seleniumhq.selenium
 mysql
 org.finos.legend.engine
 org.apache.httpcomponents
 org.apache.logging.log4j
 org.openjfx
 org.apache.commons
 org.json
 com.google.guava
 com.google.zxing
 net.sf.jasperreports
 javax.xml.bind
 ojdbc
 com.google.code.facebook-java-api