All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.biojava.nbio.aaproperties.IPeptideProperties Maven / Gradle / Ivy

There is a newer version: 7.1.3
Show newest version
/*
 *                    BioJava development code
 *
 * This code may be freely distributed and modified under the
 * terms of the GNU Lesser General Public Licence.  This should
 * be distributed with the code.  If you do not have a copy,
 * see:
 *
 *      http://www.gnu.org/copyleft/lesser.html
 *
 * Copyright for this code is held jointly by the individual
 * authors.  These should be listed in @author doc comments.
 *
 * For more information on the BioJava project and its aims,
 * or to join the biojava-l mailing list, visit the home page
 * at:
 *
 *      http://www.biojava.org/
 *
 * Created on 2011.05.09 by kohchuanhock
 *
 */
package org.biojava.nbio.aaproperties;

import org.biojava.nbio.aaproperties.xml.AminoAcidCompositionTable;
import org.biojava.nbio.core.sequence.ProteinSequence;
import org.biojava.nbio.core.sequence.compound.AminoAcidCompound;

import jakarta.xml.bind.JAXBException;
import java.io.File;
import java.io.FileNotFoundException;
import java.util.Map;


/**
 * An interface to generate some basic physico-chemical properties of protein sequences.
* The following properties could be generated: *

* Molecular weight
* Absorbance
* Extinction coefficient
* Instability index
* Apliphatic index
* Average hydropathy value
* Isoelectric point
* Net charge at pH 7
* Composition of specified amino acid
* Composition of the 20 standard amino acid
* @author kohchuanhock * @version 2011.05.09 * @see PeptideProperties */ public interface IPeptideProperties{ /** * Returns the molecular weight of sequence. The sequence argument must be a protein sequence consisting of only non-ambiguous characters. * This method will sum the molecular weight of each amino acid in the * sequence. Molecular weights are based on here. * * @param sequence * a protein sequence consisting of non-ambiguous characters only * @return the total molecular weight of sequence + weight of water molecule * @see ProteinSequence */ public double getMolecularWeight(ProteinSequence sequence); /** * Returns the molecular weight of sequence. The sequence argument must be a protein sequence consisting of only non-ambiguous characters. * This method will sum the molecular weight of each amino acid in the * sequence. Molecular weights are based on the input files. These input files must be XML using the defined schema. * Note that it assumes that ElementMass.xml file can be found in default location. * * @param sequence * a protein sequence consisting of non-ambiguous characters only * xml file that details the mass of each elements and isotopes * @param aminoAcidCompositionFile * xml file that details the composition of amino acids * @return the total molecular weight of sequence + weight of water molecule * @throws JAXBException * thrown if unable to properly parse either elementMassFile or aminoAcidCompositionFile * @throws FileNotFoundException * thrown if either elementMassFile or aminoAcidCompositionFile are not found */ public double getMolecularWeight(ProteinSequence sequence, File aminoAcidCompositionFile) throws JAXBException, FileNotFoundException; /** * Returns the molecular weight of sequence. The sequence argument must be a protein sequence consisting of only non-ambiguous characters. * This method will sum the molecular weight of each amino acid in the * sequence. Molecular weights are based on the input files. These input files must be XML using the defined schema. * * @param sequence * a protein sequence consisting of non-ambiguous characters only * @param elementMassFile * xml file that details the mass of each elements and isotopes * @param aminoAcidCompositionFile * xml file that details the composition of amino acids * @return the total molecular weight of sequence + weight of water molecule * @throws JAXBException * thrown if unable to properly parse either elementMassFile or aminoAcidCompositionFile * @throws FileNotFoundException * thrown if either elementMassFile or aminoAcidCompositionFile are not found */ public double getMolecularWeight(ProteinSequence sequence, File elementMassFile, File aminoAcidCompositionFile) throws JAXBException, FileNotFoundException; /** * Returns the molecular weight of sequence. The sequence argument must be a protein sequence consisting of only non-ambiguous characters. * This method will sum the molecular weight of each amino acid in the * sequence. Molecular weights are based on the AminoAcidCompositionTable. * Those input files must be XML using the defined schema. * * @param sequence * a protein sequence consisting of non-ambiguous characters only * @param aminoAcidCompositionTable * a amino acid composition table obtained by calling IPeptideProperties.obtainAminoAcidCompositionTable * @return the total molecular weight of sequence + weight of water molecule */ public double getMolecularWeightBasedOnXML(ProteinSequence sequence, AminoAcidCompositionTable aminoAcidCompositionTable); /** * This method would initialize amino acid composition table based on the input xml files and stores the table for usage in future calls to * IPeptideProperties.getMolecularWeightBasedOnXML(ProteinSequence, AminoAcidCompositionTable). * Note that ElementMass.xml is assumed to be able to be seen in default location. * * @param aminoAcidCompositionFile * xml file that details the composition of amino acids * @return the initialized amino acid composition table * @throws JAXBException * thrown if unable to properly parse either elementMassFile or aminoAcidCompositionFile * @throws FileNotFoundException * thrown if either elementMassFile or aminoAcidCompositionFile are not found */ public AminoAcidCompositionTable obtainAminoAcidCompositionTable(File aminoAcidCompositionFile) throws JAXBException, FileNotFoundException; /** * This method would initialize amino acid composition table based on the input xml files and stores the table for usage in future calls to * IPeptideProperties.getMolecularWeightBasedOnXML(ProteinSequence, AminoAcidCompositionTable). * * @param elementMassFile * xml file that details the mass of each elements and isotopes * @param aminoAcidCompositionFile * xml file that details the composition of amino acids * @return the initialized amino acid composition table * @throws JAXBException * thrown if unable to properly parse either elementMassFile or aminoAcidCompositionFile * @throws FileNotFoundException * thrown if either elementMassFile or aminoAcidCompositionFile are not found */ public AminoAcidCompositionTable obtainAminoAcidCompositionTable(File elementMassFile, File aminoAcidCompositionFile) throws JAXBException, FileNotFoundException; /** * Returns the extinction coefficient of sequence. The sequence argument * must be a protein sequence consisting of only non-ambiguous characters. * The extinction coefficient indicates how much light a protein absorbs at * a certain wavelength. It is useful to have an estimation of this * coefficient for following a protein which a spectrophotometer when * purifying it. The computation of extinction coefficient follows the * documentation in here. * * @param sequence * a protein sequence consisting of non-ambiguous characters only * @param assumeCysReduced * true if Cys are assumed to be reduced and false if Cys are * assumed to form cystines * @return the extinction coefficient of sequence * @see ProteinSequence */ public double getExtinctionCoefficient(ProteinSequence sequence, boolean assumeCysReduced); /** * Returns the absorbance (optical density) of sequence. The sequence argument * must be a protein sequence consisting of only non-ambiguous characters. * The computation of absorbance (optical density) follows the * documentation in here. * * @param sequence * a protein sequence consisting of non-ambiguous characters only * @param assumeCysReduced * true if Cys are assumed to be reduced and false if Cys are * assumed to form cystines * @return the absorbance (optical density) of sequence * @see ProteinSequence */ public double getAbsorbance(ProteinSequence sequence, boolean assumeCysReduced); /** * Returns the instability index of sequence. The sequence argument must be * a protein sequence consisting of only non-ambiguous characters. * The instability index provides an estimate of the stability of your * protein in a test tube. The computation of instability index follows the * documentation in here. * * @param sequence * a protein sequence consisting of non-ambiguous characters only * @return the instability index of sequence * @see ProteinSequence */ public double getInstabilityIndex(ProteinSequence sequence); /** * Returns the apliphatic index of sequence. The sequence argument must be a * protein sequence consisting of only non-ambiguous characters. * The aliphatic index of a protein is defined as the relative volume * occupied by aliphatic side chains (alanine, valine, isoleucine, and * leucine). It may be regarded as a positive factor for the increase of * thermostability of globular proteins. The computation of aliphatic index * follows the documentation in here. * A protein whose instability index is smaller than 40 is predicted as stable, a value above 40 predicts that the protein may be unstable. * * @param sequence * a protein sequence consisting of non-ambiguous characters only * @return the aliphatic index of sequence * @see ProteinSequence */ public double getApliphaticIndex(ProteinSequence sequence); /** * Returns the average hydropathy value of sequence. The sequence argument * must be a protein sequence consisting of only non-ambiguous characters. * The average value for a sequence is calculated as the sum of hydropathy * values of all the amino acids, divided by the number of residues in the * sequence. Hydropathy values are based on (Kyte, J. and Doolittle, R.F. * (1982) A simple method for displaying the hydropathic character of a * protein. J. Mol. Biol. 157, 105-132). * * @param sequence * a protein sequence consisting of non-ambiguous characters only * @return the average hydropathy value of sequence * @see ProteinSequence */ public double getAvgHydropathy(ProteinSequence sequence); /** * Returns the isoelectric point of sequence. The sequence argument must be * a protein sequence consisting of only non-ambiguous characters. * The isoelectric point is the pH at which the protein carries no net * electrical charge. The isoelectric point will be computed based on * approach stated in * here * * pKa values used will be either * those used by Expasy which referenced "Electrophoresis 1994, 15, 529-539" * OR * A.Lehninger, Principles of Biochemistry, 4th Edition (2005), Chapter 3, page78, Table 3-1. * @param sequence * a protein sequence consisting of non-ambiguous characters only * @param useExpasyValues * whether to use Expasy values (Default) or Innovagen values * @return the isoelectric point of sequence * @see ProteinSequence */ public double getIsoelectricPoint(ProteinSequence sequence, boolean useExpasyValues); public double getIsoelectricPoint(ProteinSequence seuqence); /** * Returns the net charge of sequence at pH 7. The sequence argument must be * a protein sequence consisting of only non-ambiguous characters. * The net charge will be computed using the approach stated in * getAAComposition(ProteinSequence sequence); /** * Calculates the aromaticity value of a protein according to Lobry, 1994. * It is simply the relative frequency of Phe+Trp+Tyr. * * @param sequence a protein sequence consisting of non-ambiguous characters only * @return the aromaticity of a protein sequence * @see ProteinSequence */ public double getAromaticity(ProteinSequence sequence); }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy