All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.biojava.nbio.aaproperties.PeptidePropertiesImpl Maven / Gradle / Ivy

The newest version!
/*
 *                    BioJava development code
 *
 * This code may be freely distributed and modified under the
 * terms of the GNU Lesser General Public Licence.  This should
 * be distributed with the code.  If you do not have a copy,
 * see:
 *
 *      http://www.gnu.org/copyleft/lesser.html
 *
 * Copyright for this code is held jointly by the individual
 * authors.  These should be listed in @author doc comments.
 *
 * For more information on the BioJava project and its aims,
 * or to join the biojava-l mailing list, visit the home page
 * at:
 *
 *      http://www.biojava.org/
 *
 */
package org.biojava.nbio.aaproperties;

import org.biojava.nbio.aaproperties.xml.AminoAcidCompositionTable;
import org.biojava.nbio.aaproperties.xml.ElementTable;
import org.biojava.nbio.aaproperties.xml.MyValidationEventHandler;
import org.biojava.nbio.core.sequence.ProteinSequence;
import org.biojava.nbio.core.sequence.compound.AminoAcidCompound;
import org.biojava.nbio.core.sequence.compound.AminoAcidCompoundSet;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import jakarta.xml.bind.JAXBContext;
import jakarta.xml.bind.JAXBException;
import jakarta.xml.bind.Unmarshaller;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.util.HashMap;
import java.util.Map;

/**
 * This class contains the actual implementation of IPeptideProperties and is wrapped around by PeptideProperties for ease of use.
 *
 * @author kohchuanhock
 * @version 2011.08.22
 * @since 3.0.2
 * @see IPeptideProperties
 * @see PeptideProperties
 */
public class PeptidePropertiesImpl implements IPeptideProperties{

	private final static Logger logger = LoggerFactory.getLogger(PeptidePropertiesImpl.class);

	/**
	 * @return the molecular weight of water
	 */
	private double getWaterMoleculeWeight(){
		final double hydrogenMW = 1.0079;
		final double hydroxideMW = 17.0073;
		//H	1.0079	OH	17.0073
		return hydrogenMW + hydroxideMW;
	}

	private char[] getSequence(String sequence, boolean ignoreCase){
		if(ignoreCase){
			return sequence.toUpperCase().toCharArray();
		}else{
			return sequence.toCharArray();
		}
	}

	@Override
	public double getMolecularWeight(ProteinSequence sequence) {
		double value = 0.0;
		AminoAcidCompoundSet aaSet = new AminoAcidCompoundSet();
		char[] seq = getSequence(sequence.toString(), true);//ignore case
		for(char aa:seq){
			AminoAcidCompound c = aaSet.getCompoundForString(String.valueOf(aa));
			if(Constraints.aa2MolecularWeight.containsKey(c)){
				value += Constraints.aa2MolecularWeight.get(c);
			}
		}
		if(value == 0)
			return value;
		else
			return value + getWaterMoleculeWeight();
	}

	@Override
	public double getMolecularWeight(ProteinSequence sequence, File aminoAcidCompositionFile) throws JAXBException, FileNotFoundException {
		File elementMassFile = new File("./src/main/resources/ElementMass.xml");
		if(!elementMassFile.exists()){
			throw new FileNotFoundException("Cannot locate ElementMass.xml. " +
					"Please use getMolecularWeight(ProteinSequence, File, File) to specify ElementMass.xml location.");
		}
		return getMolecularWeightBasedOnXML(sequence, obtainAminoAcidCompositionTable(elementMassFile, aminoAcidCompositionFile));
	}

	@Override
	public double getMolecularWeight(ProteinSequence sequence, File elementMassFile, File aminoAcidCompositionFile)
			throws JAXBException, FileNotFoundException{
		return getMolecularWeightBasedOnXML(sequence, obtainAminoAcidCompositionTable(elementMassFile, aminoAcidCompositionFile));
	}

	@Override
	public double getMolecularWeightBasedOnXML(ProteinSequence sequence, AminoAcidCompositionTable aminoAcidCompositionTable){
		double value = 0.0;
		char[] seq = sequence.toString().toCharArray();
		for(char aa:seq){
			Double weight = aminoAcidCompositionTable.getMolecularWeight(aa);
			if(weight != null){
				value += weight;
			}
		}
		if(value == 0.0)
			return value;
		else
			return value + getWaterMoleculeWeight();
	}

	@Override
	public AminoAcidCompositionTable obtainAminoAcidCompositionTable(File aminoAcidCompositionFile)
		throws JAXBException, FileNotFoundException{
		File elementMassFile = new File("./src/main/resources/ElementMass.xml");
		if(!elementMassFile.exists()){
			throw new FileNotFoundException("Cannot locate ElementMass.xml. " +
					"Please use getMolecularWeight(ProteinSequence, File, File) to specify ElementMass.xml location.");
		}
		return obtainAminoAcidCompositionTable(elementMassFile, aminoAcidCompositionFile);
	}

	@Override
	public AminoAcidCompositionTable obtainAminoAcidCompositionTable(File elementMassFile, File aminoAcidCompositionFile)
		throws JAXBException, FileNotFoundException{
		//Parse elementMassFile
		ElementTable iTable = new ElementTable();
		// Get a JAXB Context for the object we created above
		JAXBContext jc = JAXBContext.newInstance(iTable.getClass());
		Unmarshaller u = jc.createUnmarshaller();
		u.setEventHandler(new MyValidationEventHandler());
		iTable = (ElementTable)u.unmarshal(new FileInputStream(elementMassFile));
		iTable.populateMaps();

		//Parse aminoAcidCompositionFile
		AminoAcidCompositionTable aTable = new AminoAcidCompositionTable();
		// Get a JAXB Context for the object we created above
		JAXBContext jc2 = JAXBContext.newInstance(aTable.getClass());
		Unmarshaller u2 = jc2.createUnmarshaller();
		u2.setEventHandler(new MyValidationEventHandler());
		aTable = (AminoAcidCompositionTable)u2.unmarshal(new FileInputStream(aminoAcidCompositionFile));
		aTable.computeMolecularWeight(iTable);
		return aTable;
	}

	@Override
	public double getExtinctionCoefficient(ProteinSequence sequence, boolean assumeCysReduced) {
		//Tyr => Y
		//Trp => W
		//Cys => C
		//E(Prot) = Numb(Tyr)*Ext(Tyr) + Numb(Trp)*Ext(Trp) + Numb(Cystine)*Ext(Cystine)
		//where (for proteins in water measured at 280 nm): Ext(Tyr) = 1490, Ext(Trp) = 5500, Ext(Cystine) = 125;
		AminoAcidCompoundSet aaSet = new AminoAcidCompoundSet();
		Map extinctAA2Count = this.getExtinctAACount(sequence);

		double eProt;
		if(!assumeCysReduced){
			eProt = extinctAA2Count.get(aaSet.getCompoundForString("Y")) *
				Constraints.aa2ExtinctionCoefficient.get(aaSet.getCompoundForString("Y")) +
				extinctAA2Count.get(aaSet.getCompoundForString("W")) *
				Constraints.aa2ExtinctionCoefficient.get(aaSet.getCompoundForString("W")) +
				extinctAA2Count.get(aaSet.getCompoundForString("C")) *
				Constraints.aa2ExtinctionCoefficient.get(aaSet.getCompoundForString("C"));
		}else
			eProt = extinctAA2Count.get(aaSet.getCompoundForString("Y")) *
				Constraints.aa2ExtinctionCoefficient.get(aaSet.getCompoundForString("Y")) +
				extinctAA2Count.get(aaSet.getCompoundForString("W")) *
				Constraints.aa2ExtinctionCoefficient.get(aaSet.getCompoundForString("W"));

		return eProt;
	}

	@Override
	public double getAbsorbance(ProteinSequence sequence, boolean assumeCysReduced){
		//Absorb(Prot) = E(Prot) / Molecular_weight
		double mw = this.getMolecularWeight(sequence);
		double eProt = this.getExtinctionCoefficient(sequence, assumeCysReduced);
		if (mw == 0.0) {
			logger.warn("Molecular weight is 0.0, can't divide by 0: setting absorbance to 0.0");
			return 0.0;
		}
		return eProt / mw;
	}

	private Map getExtinctAACount(ProteinSequence sequence){
		//Cys => C, Tyr => Y, Trp => W
		int numW = 0;
		int smallW = 0;
		double numC = 0;
		double smallC = 0;
		int numY = 0;
		int smallY = 0;
		for(char aa:sequence.getSequenceAsString().toCharArray()){
			switch(aa){
			case 'W': numW++; break;
			case 'w': smallW++; break;
			case 'C': numC += 0.5; break;
			case 'c': smallC += 0.5; break;
			case 'Y': numY++; break;
			case 'y': smallY++; break;
			}
		}
		AminoAcidCompoundSet aaSet = new AminoAcidCompoundSet();
		Map extinctAA2Count = new HashMap<>();
		//Ignore Case is always true
		extinctAA2Count.put(aaSet.getCompoundForString("W"), numW + smallW);
		extinctAA2Count.put(aaSet.getCompoundForString("C"), (int) (numC + smallC));
		extinctAA2Count.put(aaSet.getCompoundForString("Y"), numY + smallY);
		return extinctAA2Count;
	}

	@Override
	public double getInstabilityIndex(ProteinSequence sequence) {
		double sum = 0.0;
		String s = sequence.getSequenceAsString().toUpperCase();
		for(int i = 0; i < sequence.getLength() - 1; i++){
			String dipeptide = s.substring(i, i+2);
			if(Constraints.diAA2Instability.containsKey(dipeptide)){
				sum += Constraints.diAA2Instability.get(dipeptide);
			}
		}
		int denominator = s.length() - Utils.getNumberOfInvalidChar(s, null, true);

		if (denominator==0) {
			logger.warn("Valid length of sequence is 0, can't divide by 0 to calculate instability index: setting instability index value to 0.0");
			return 0.0;
		}
		return sum * 10.0 / denominator;
	}

	@Override
	public double getApliphaticIndex(ProteinSequence sequence) {
//		Aliphatic index = X(Ala) + a * X(Val) + b * ( X(Ile) + X(Leu) )
//		where X(Ala), X(Val), X(Ile), and X(Leu) are mole percent (100 X mole fraction)
//		of alanine, valine, isoleucine, and leucine.
//		The coefficients a and b are the relative volume of valine side chain (a = 2.9)
//		and of Leu/Ile side chains (b = 3.9) to the side chain of alanine.
//		Ala => A, Val => V, Ile => I, Leu => L
		AminoAcidCompoundSet aaSet = new AminoAcidCompoundSet();
		Map aa2Composition = getAAComposition(sequence);
		final double a = 2.9;
		final double b = 3.9;
		double xAla = aa2Composition.get(aaSet.getCompoundForString("A"));
		double xVal = aa2Composition.get(aaSet.getCompoundForString("V"));
		double xIle = aa2Composition.get(aaSet.getCompoundForString("I"));
		double xLeu = aa2Composition.get(aaSet.getCompoundForString("L"));
		return (xAla + (a * xVal) + (b * (xIle + xLeu))) * 100;
	}

	@Override
	public double getAvgHydropathy(ProteinSequence sequence) {
		int validLength = 0;
		double total = 0.0;
		AminoAcidCompoundSet aaSet = new AminoAcidCompoundSet();
		char[] seq = this.getSequence(sequence.toString(), true);
		for(char aa:seq){
			AminoAcidCompound c = aaSet.getCompoundForString(String.valueOf(aa));
			if(Constraints.aa2Hydrophathicity.containsKey(c)){
				total += Constraints.aa2Hydrophathicity.get(c);
				validLength++;
			}
		}
		if (validLength==0) {
			logger.warn("Valid length of sequence is 0, can't divide by 0 to calculate average hydropathy: setting average hydropathy to 0");
			return 0.0;
		}

		return total / validLength;
	}

	@Override
	public double getIsoelectricPoint(ProteinSequence sequence, boolean useExpasyValues) {
		if(useExpasyValues){
			return this.getIsoelectricPointExpasy(sequence.toString().toUpperCase());
		}else{
			return this.getIsoelectricPointInnovagen(sequence);
		}
	}

	private double getIsoelectricPointInnovagen(ProteinSequence sequence){
		double currentPH = 7.0;
		double changeSize = 7.0;
		String sequenceString = sequence.toString();
		char nTerminalChar = sequenceString.charAt(0);
		char cTerminalChar = sequenceString.charAt(sequenceString.length() - 1);

		Map chargedAA2Count = this.getChargedAACount(sequence);
		double margin;
		final double difference = 0.0001;

		while(true){
			margin = this.getNetChargeInnovagen(chargedAA2Count, currentPH, nTerminalChar, cTerminalChar);
			//Within allowed difference
			if(margin <= difference && margin >= -difference) break;
			changeSize /= 2.0;
			if(margin > 0){
				currentPH += changeSize;
			}else{
				currentPH -= changeSize;
			}
		}
		return currentPH;
	}

	/*
	 *  Pseudo code obtained from email correspondance with ExPASy Helpdesk, Gregoire Rossier
	 */
	//
	// Table of pk values :
	// Note: the current algorithm does not use the last two columns.
	// Each row corresponds to an amino acid starting with Ala. J, O and U are
	// inexistant, but here only in order to have the complete alphabet.
	//
	// Ct Nt Sm Sc Sn
	//
	private final double[][] cPk = {
			{3.55, 7.59, 0.0},  // A
			{3.55, 7.50, 0.0},  // B
			{3.55, 7.50, 9.00}, // C
//			{4.55, 7.50, 4.05}, // D
//			{4.75, 7.70, 4.45}, // E
			{3.55, 7.50, 4.05}, // D
			{3.55, 7.70, 4.45}, // E
			{3.55, 7.50, 0}, // F
			{3.55, 7.50, 0}, // G
			{3.55, 7.50, 5.98}, // H
			{3.55, 7.50, 0.0}, // I
			{0.0, 0.0, 0.0}, // J
			{3.55, 7.50, 10.00}, // K
			{3.55, 7.50, 0.0}, // L
			{3.55, 7.00, 0.0},// M
			{3.55, 7.50, 0.0},// N
			{0.00, 0.00, 0.0},// O
			{3.55, 8.36, 0.0},// P
			{3.55, 7.50, 0.0}, // Q
			{3.55, 7.50, 12.0},// R
			{3.55, 6.93, 0.0},// S
			{3.55, 6.82, 0.0}, // T
			{0.00, 0.00, 0.0}, // U
			{3.55, 7.44, 0.0},// V
			{3.55, 7.50, 0.0},// W
			{3.55, 7.50, 0.0},// X
			{3.55, 7.50, 10.00},// Y
			{3.55, 7.50, 0.0}}; // Z

	private final double PH_MIN = 0.0; /* minimum pH value */
	private final double PH_MAX = 14.0; /* maximum pH value */
	private final double MAXLOOP = 2000.0; /* maximum number of iterations */
	private final double EPSI = 0.0001; /* desired precision */

	private double exp10(double pka){
		return Math.pow(10, pka);
	}

	private double getIsoelectricPointExpasy(String sequence){
		//
		// Compute the amino-acid composition.
		//
		int[] comp = new int[26];
		for(int i = 0; i < sequence.length(); i++){
			int index = sequence.charAt(i) - 'A';
			if(index < 0 || index >= 26) continue;
			comp[index]++;
		}
		//
		// Look up N-terminal and C-terminal residue.
		//
		int nTermResidue = -1;
		int index = 0;
		while((nTermResidue < 0 || nTermResidue >= 26) && index < 25){
			nTermResidue = sequence.charAt(index++) - 'A';
		}

		int cTermResidue = -1;
		index = 1;
		while((cTermResidue < 0 || cTermResidue >= 26) && index < 25){
			cTermResidue = sequence.charAt(sequence.length() - index++) - 'A';
		}

		double phMin = PH_MIN;
		double phMax = PH_MAX;

		double phMid = 0.0;
		double charge = 1.0;
		for (int i = 0; i < MAXLOOP && (phMax - phMin) > EPSI; i++){
			phMid = phMin + (phMax - phMin) / 2.0;

			charge = getNetChargeExpasy(comp, nTermResidue, cTermResidue, phMid);

			if (charge > 0.0) phMin = phMid;
			else phMax = phMid;
		}
		return phMid;
	}

	@Override
	public double getIsoelectricPoint(ProteinSequence sequence){
		return getIsoelectricPoint(sequence, true);
	}

	@Override
	public double getNetCharge(ProteinSequence sequence) {
		return getNetCharge(sequence, true);
	}

	@Override
	public double getNetCharge(ProteinSequence sequence, boolean useExpasyValues){
		return getNetCharge(sequence, true, 7.0);
	}

	@Override
	public double getNetCharge(ProteinSequence sequence, boolean useExpasyValues, double pHPoint){
		if(useExpasyValues){
			return getNetChargeExpasy(sequence.toString().toUpperCase(), pHPoint);
		}else{
			return getNetChargeInnovagen(sequence, pHPoint);
		}
	}

	private double getNetChargeExpasy(String sequence, double pHPoint){
		//
		// Compute the amino-acid composition.
		//
		int[] comp = new int[26];
		for(int i = 0; i < sequence.length(); i++){
			int index = sequence.charAt(i) - 'A';
			if(index < 0 || index >= 26) continue;
			comp[index]++;
		}
		//
		// Look up N-terminal and C-terminal residue.
		//
		int nTermResidue = sequence.charAt(0) - 'A';
		int cTermResidue = sequence.charAt(sequence.length() - 1) - 'A';
		return getNetChargeExpasy(comp, nTermResidue, cTermResidue, pHPoint);
	}

	private double getNetChargeExpasy(int[] comp, int nTermResidue, int cTermResidue, double ph){
		double cter = 0.0;
		if(cTermResidue >= 0 && cTermResidue < 26) cter = exp10(-cPk[cTermResidue][0]) / (exp10(-cPk[cTermResidue][0]) + exp10(-ph));
		double nter = 0.0;
		if(nTermResidue >= 0 && nTermResidue < 26) nter = exp10(-ph) / (exp10(-cPk[nTermResidue][1]) + exp10(-ph));

		double carg = comp['R' - 'A'] * exp10(-ph) / (exp10(-cPk['R' - 'A'][2]) + exp10(-ph));
		double chis = comp['H' - 'A'] * exp10(-ph) / (exp10(-cPk['H' - 'A'][2]) + exp10(-ph));
		double clys = comp['K' - 'A'] * exp10(-ph) / (exp10(-cPk['K' - 'A'][2]) + exp10(-ph));

		double casp = comp['D' - 'A'] * exp10(-cPk['D' - 'A'][2]) / (exp10(-cPk['D' - 'A'][2]) + exp10(-ph));
		double cglu = comp['E' - 'A'] * exp10(-cPk['E' - 'A'][2]) / (exp10(-cPk['E' - 'A'][2]) + exp10(-ph));

		double ccys = comp['C' - 'A'] * exp10(-cPk['C' - 'A'][2]) / (exp10(-cPk['C' - 'A'][2]) + exp10(-ph));
		double ctyr = comp['Y' - 'A'] * exp10(-cPk['Y' - 'A'][2]) / (exp10(-cPk['Y' - 'A'][2]) + exp10(-ph));

		return (carg + clys + chis + nter) - (casp + cglu + ctyr + ccys + cter);
	}

	private double getNetChargeInnovagen(ProteinSequence sequence, double pHPoint) {
		Map chargedAA2Count = this.getChargedAACount(sequence);
		String sequenceString = sequence.getSequenceAsString();
		return getNetChargeInnovagen(chargedAA2Count, pHPoint, sequenceString.charAt(0), sequenceString.charAt(sequenceString.length() - 1));
	}

	private double getNetChargeInnovagen(Map chargedAA2Count, double ph, char nTerminalChar, char cTerminalChar){
		//Constraints.aa2PKa is aleady reinitialized in getChargedAACount hence no need to do it again

		//Lys => K, Arg => R, His => H
		//Asp => D, Glu => E, Cys => C, Tyr => Y
		AminoAcidCompoundSet aaSet = new AminoAcidCompoundSet();

		double nTerminalCharge = 0.0;
		AminoAcidCompound nTermCompound = aaSet.getCompoundForString(String.valueOf(nTerminalChar));
		if(Constraints.aa2NTerminalPka.containsKey(nTermCompound)){
			nTerminalCharge = this.getPosCharge(Constraints.aa2NTerminalPka.get(nTermCompound), ph);
		}

		double cTerminalCharge = 0.0;
		AminoAcidCompound cTermCompound = aaSet.getCompoundForString(String.valueOf(cTerminalChar));
		if(Constraints.aa2CTerminalPka.containsKey(cTermCompound)){
			cTerminalCharge = this.getNegCharge(Constraints.aa2CTerminalPka.get(cTermCompound), ph);
		}

		double kCharge = chargedAA2Count.get(aaSet.getCompoundForString("K")) * this.getPosCharge(Constraints.aa2PKa.get(aaSet.getCompoundForString("K")), ph);
		double rCharge = chargedAA2Count.get(aaSet.getCompoundForString("R")) * this.getPosCharge(Constraints.aa2PKa.get(aaSet.getCompoundForString("R")), ph);
		double hCharge = chargedAA2Count.get(aaSet.getCompoundForString("H")) * this.getPosCharge(Constraints.aa2PKa.get(aaSet.getCompoundForString("H")), ph);
		double dCharge = chargedAA2Count.get(aaSet.getCompoundForString("D")) * this.getNegCharge(Constraints.aa2PKa.get(aaSet.getCompoundForString("D")), ph);
		double eCharge = chargedAA2Count.get(aaSet.getCompoundForString("E")) * this.getNegCharge(Constraints.aa2PKa.get(aaSet.getCompoundForString("E")), ph);
		double cCharge = chargedAA2Count.get(aaSet.getCompoundForString("C")) * this.getNegCharge(Constraints.aa2PKa.get(aaSet.getCompoundForString("C")), ph);
		double yCharge = chargedAA2Count.get(aaSet.getCompoundForString("Y")) * this.getNegCharge(Constraints.aa2PKa.get(aaSet.getCompoundForString("Y")), ph);
//		if((kCharge + rCharge + hCharge) == 0.0 && (dCharge + eCharge + cCharge + yCharge) == 0.0){
//			return 0.0;
//		}
		return (nTerminalCharge + kCharge + rCharge + hCharge) - (dCharge + eCharge + cCharge + yCharge + cTerminalCharge);
	}

	private double getPosCharge(double pka, double ph){
		return Math.pow(10, pka) / (Math.pow(10, pka) + Math.pow(10, ph));
	}

	private double getNegCharge(double pka, double ph){
		return Math.pow(10, ph) / (Math.pow(10, pka) + Math.pow(10, ph));
	}

	private Map getChargedAACount(ProteinSequence sequence){
		//Lys => K, Arg => R, His => H
		//Asp => D, Glu => E, Cys => C, Tyr => Y
		int numK = 0;
		int numR = 0;
		int numH = 0;
		int numD = 0;
		int numE = 0;
		int numC = 0;
		int numY = 0;
		char[] seq = this.getSequence(sequence.getSequenceAsString(), true);
		for(char aa:seq){
			switch(aa){
			case 'K': numK++; break;
			case 'R': numR++; break;
			case 'H': numH++; break;
			case 'D': numD++; break;
			case 'E': numE++; break;
			case 'C': numC++; break;
			case 'Y': numY++; break;
			}
		}
		AminoAcidCompoundSet aaSet = new AminoAcidCompoundSet();
		Map chargedAA2Count = new HashMap<>();
		chargedAA2Count.put(aaSet.getCompoundForString("K"), numK);
		chargedAA2Count.put(aaSet.getCompoundForString("R"), numR);
		chargedAA2Count.put(aaSet.getCompoundForString("H"), numH);
		chargedAA2Count.put(aaSet.getCompoundForString("D"), numD);
		chargedAA2Count.put(aaSet.getCompoundForString("E"), numE);
		chargedAA2Count.put(aaSet.getCompoundForString("C"), numC);
		chargedAA2Count.put(aaSet.getCompoundForString("Y"), numY);
		return chargedAA2Count;
	}

	@Override
	public double getEnrichment(ProteinSequence sequence, AminoAcidCompound aminoAcidCode) {
		double counter = 0.0;
		char[] seq = this.getSequence(sequence.getSequenceAsString(), true);
		for(char aa:seq){
			if(aminoAcidCode.getShortName().equals(String.valueOf(aa))){
				counter++;
			}
		}
		return counter/sequence.getLength();
	}

	@Override
	public Map getAAComposition(ProteinSequence sequence) {
		int validLength = 0;
		Map aa2Composition = new HashMap<>();
		AminoAcidCompoundSet aaSet = new AminoAcidCompoundSet();
		for(AminoAcidCompound aa:aaSet.getAllCompounds()){
			aa2Composition.put(aa, 0.0);
		}
		char[] seq = this.getSequence(sequence.toString(), true);
		for(char aa:seq){
			if(PeptideProperties.standardAASet.contains(aa)){
				AminoAcidCompound compound = aaSet.getCompoundForString(String.valueOf(aa));
				aa2Composition.put(compound, aa2Composition.get(compound) + 1.0);
				validLength++;
			}
		}
		if(validLength > 0){
			for(AminoAcidCompound aa:aaSet.getAllCompounds()){
				aa2Composition.put(aa, aa2Composition.get(aa) / validLength);
			}
		}else{
			for(AminoAcidCompound aa:aaSet.getAllCompounds()){
				aa2Composition.put(aa, 0.0);
			}
		}
		return aa2Composition;
	}


	@Override
	public double getAromaticity(ProteinSequence sequence) {
		int validLength = sequence.getSequenceAsString().length();

		if (validLength == 0) {
			logger.warn("Valid length of sequence is 0, can't divide by 0 to calculate aromaticity: setting aromaticity to 0");
			return 0.0;
		}

		//Phe - Phenylalanine
		int totalF = 0;
		//Tyr - Tyrosine
		int totalY = 0;
		//Trp - Tryptophan
		int totalW = 0;

		char[] seq = this.getSequence(sequence.toString(), true);
		for (char aa : seq) {
			char amino = Character.toUpperCase(aa);
			switch (amino) {
				case 'F':
					totalF++;
					break;
				case 'Y':
					totalY++;
					break;
				case 'W':
					totalW++;
					break;
			}
		}

		return (totalF + totalY + totalW) / (double) (validLength);
	}
}





© 2015 - 2024 Weber Informatics LLC | Privacy Policy