org.biojava.nbio.structure.io.FastaStructureParser Maven / Gradle / Ivy

Go to download

Show more of this group Show more artifacts with this name
Show all versions of biojava-structure Show documentation

The protein structure modules of BioJava.

There is a newer version: 7.1.3

/*
 *                    BioJava development code
 *
 * This code may be freely distributed and modified under the
 * terms of the GNU Lesser General Public Licence.  This should
 * be distributed with the code.  If you do not have a copy,
 * see:
 *
 *      http://www.gnu.org/copyleft/lesser.html
 *
 * Copyright for this code is held jointly by the individual
 * authors.  These should be listed in @author doc comments.
 *
 * For more information on the BioJava project and its aims,
 * or to join the biojava-l mailing list, visit the home page
 * at:
 *
 *      http://www.biojava.org/
 *
 */
package org.biojava.nbio.structure.io;

import org.biojava.nbio.structure.ResidueNumber;
import org.biojava.nbio.structure.Structure;
import org.biojava.nbio.structure.StructureException;
import org.biojava.nbio.structure.align.util.AtomCache;
import org.biojava.nbio.core.sequence.ProteinSequence;
import org.biojava.nbio.core.sequence.compound.AminoAcidCompound;
import org.biojava.nbio.core.sequence.io.FastaReader;
import org.biojava.nbio.core.sequence.io.template.SequenceCreatorInterface;
import org.biojava.nbio.core.sequence.io.template.SequenceHeaderParserInterface;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.util.LinkedHashMap;
import java.util.Map;


/**
 * Reads a protein sequence from a fasta file and attempts to match it to a
 * 3D structure. Any gaps ('-') in the fasta file are preserved as null atoms in
 * the output, allowing structural alignments to be read from fasta files.
 *
 * Structures are loaded from an AtomCache. For this to work, the accession
 * for each protein should be parsed from the fasta header line into a form
 * understood by {@link AtomCache#getStructure(String)}.
 *
 * 
Lowercase letters are sometimes used to specify unaligned residues.
 * This information can be preserved by using a CasePreservingSequenceCreator,
 * which allows the case of residues to be accessed through the
 * {@link ProteinSequence#getUserCollection()} method.
 *
 * @author Spencer Bliven
 *
 */
public class FastaStructureParser {

	// inputs
	private FastaReader reader;
	private AtomCache cache;

	// cache processed data
	private String[] accessions;
	private ProteinSequence[] sequences;
	private Structure[] structures;
	private ResidueNumber[][] residues;

	public FastaStructureParser(InputStream is,
			SequenceHeaderParserInterface headerParser,
			SequenceCreatorInterface sequenceCreator,
			AtomCache cache)
	{
		this(new FastaReader(
				is, headerParser, sequenceCreator),cache);
	}

	public FastaStructureParser(File file,
			SequenceHeaderParserInterface headerParser,
			SequenceCreatorInterface sequenceCreator,
			AtomCache cache) throws FileNotFoundException
	{
		this(new FastaReader(
				file, headerParser, sequenceCreator), cache);
	}

	public FastaStructureParser(FastaReader reader,
			AtomCache cache) {
		this.reader = reader;
		this.cache = cache;
		this.accessions = null;
		this.sequences = null;
		this.structures = null;
		this.residues = null;
	}


	/**
	 * Parses the fasta file and loads it into memory.
	 *
	 * Information can be subsequently accessed through
	 * {@link #getSequences()},
	 * {@link #getStructures()},
	 * {@link #getResidues()}, and
	 * {@link #getAccessions()}.
	 *
	 * @throws IOException
	 * @throws StructureException
	 */
	public void process() throws IOException, StructureException {
		if(sequences == null) { // only process once, then return cached values
			Map sequenceMap = reader.process();

			sequences = sequenceMap.values().toArray(new ProteinSequence[0]);
			accessions = new String[sequences.length];
			structures = new Structure[sequences.length];
			residues = new ResidueNumber[sequences.length][];

			// Match each sequence  to a series of PDB Residue numbers
			for(int i=0;i

    

    

    
                
 
            
    
                
 
            

    
        
            
                Related Artifacts
                
                     mysql-connector-java mysql
 facebook-messenger com.github.codedrinker
 selenium-java org.seleniumhq.selenium
 instagram-java com.github.sola92
 gson com.google.code.gson
 poi org.apache.poi
 httpclient org.apache.httpcomponents
 json org.json
 facebook-java-api com.google.code.facebook-java-api
 poi-ooxml org.apache.poi
 jackson-databind com.fasterxml.jackson.core
 junit junit
 primefaces org.primefaces
 ojdbc7 com.github.noraui
 jfoenix com.jfoenix
 testng org.testng
 json-simple com.googlecode.json-simple
 selenium-server org.seleniumhq.selenium
 itextpdf com.itextpdf
 spring-core org.springframework
                
            
        
        
            
                Related Groups
                
                     org.springframework
 org.apache.poi
 org.hibernate
 org.springframework.boot
 com.fasterxml.jackson.core
 com.itextpdf
 org.seleniumhq.selenium
 mysql
 org.finos.legend.engine
 org.apache.httpcomponents
 org.apache.logging.log4j
 org.openjfx
 org.apache.commons
 org.json
 com.google.guava
 com.google.zxing
 net.sf.jasperreports
 javax.xml.bind
 ojdbc
 com.google.code.facebook-java-api