All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.geneweaver.io.reader.VariantReader Maven / Gradle / Ivy

There is a newer version: 2.7.12
Show newest version
/*-
 * 
 * Copyright 2018, 2020  The Jackson Laboratory Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 * 
 * @author Matthew Gerring
 */
package org.geneweaver.io.reader;

import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.function.Function;
import java.util.stream.Stream;

import org.apache.commons.beanutils.BeanMap;
import org.geneweaver.domain.Entity;
import org.geneweaver.domain.GeneticEntity;
import org.geneweaver.domain.Variant;
import org.geneweaver.domain.VariantEffect;
import org.geneweaver.io.connector.VariantConnector;

// TODO: Auto-generated Javadoc
/**
 * Class which reads a file using Scanner such that even
 * large files may be parsed without all being in memory.
 * 
 * @author Matthew Gerring
 * @param   A node entity, either a Gene or a Transcript related to a Gene.
 *
 */
class VariantReader extends LineIteratorReader{
	
	/** The Constant VARIANTS. */
	// TODO Are all invariant types a Variant or are some ignored?
	public static final Collection VARIANTS = Arrays.asList("snv", "deletion", "insertion", "indel", "substitution");


	/**
	 * Create the reader by setting its data
	 * 
	 * @param reader
	 * @throws ReaderException
	 */
	@SuppressWarnings("unchecked")
	@Override
	public VariantReader init(ReaderRequest request) throws ReaderException {
		super.setup(request);
		setDelimiter("\t+"); // Must be a tab only
		return this;
	}

	/**
	 * Creates the.
	 *
	 * @param line the line
	 * @return the n
	 * @throws ReaderException the reader exception
	 */
	@SuppressWarnings("unchecked")
	@Override
	protected N create(String line) throws ReaderException {
        
		String[] rec = line.split(getDelimiter());
		Variant bean = new Variant();
         
        try {
			BeanMap d = new BeanMap(bean);
			populate(d, rec);
			
	        Map attributes = parseAttributes(rec[8]);
	        d.put("id", attributes.get("ID"));
	        d.put("rsId", attributes.get("Dbxref").toString().split(":")[1]);
	        d.put("dbxRef", attributes.get("Dbxref"));
	        d.put("altAllele", attributes.get("Variant_seq"));
	        d.put("refAllele", attributes.get("Reference_seq"));
	        if (request.isIncludeAll() ) { // Variant effects are used for connections. If we are not 
	        						   // writing connections, then we do not need to parse them.
	        	d.put("variantEffect", createVariantEffects(bean, attributes.get("Variant_effect")));
	        }
	        
        } catch (IllegalArgumentException ne) {
        	throw new ReaderException("The line "+line+" of bean type "+bean.getClass().getSimpleName()+" cannot be parsed ", ne);
        }
        
        return (N)bean;
	}

	/**
	 * Creates the variant effects.
	 *
	 * @param bean the bean
	 * @param varEff the var eff
	 * @return the sets the
	 */
	private Set createVariantEffects(Variant bean, String varEff) {
		if (varEff == null) return null;
		if (varEff.isEmpty()) return null;

		Set effects = new HashSet<>();
		for (String line : varEff.split(",")) {
			String[] vals = line.split(" ");
			// @see https://github.com/The-Sequence-Ontology/Specifications/blob/master/gvf.md
			// sequence_variant index feature_type feature_ID 
			VariantEffect effect = new VariantEffect();
			effect.setChr(bean.getChr());
			effect.setSequenceVariant(vals[0]);
			effect.setIndex(Integer.parseInt(vals[1]));
			effect.setFeatureType(vals[2]);
			effect.setFeatureId(vals[3]);
			effects.add(effect);
		}
		return effects;
	}

	/**
	 * Gets the assignment char.
	 *
	 * @return the assignment char
	 */
	protected String getAssignmentChar() {
		return "=";
	}

	@Override
	public  Function> getDefaultConnector() {
		return new VariantConnector<>();
	}

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy