All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.hfg.bio.AminoAcidSet Maven / Gradle / Ivy

There is a newer version: 20240423
Show newest version
package com.hfg.bio;

import java.util.*;

import com.hfg.exception.UnmodifyableObjectException;
import com.hfg.util.CompareUtil;
import com.hfg.xml.XMLNode;
import com.hfg.xml.XMLTag;
import com.hfg.util.StringUtil;



//------------------------------------------------------------------------------
/**
 Mapping of sequence characters to AminoAcids.
 
@author J. Alex Taylor, hairyfatguy.com
*/ //------------------------------------------------------------------------------ // com.hfg XML/HTML Coding Library // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // // J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com // [email protected] //------------------------------------------------------------------------------ public class AminoAcidSet implements Set, Cloneable { //########################################################################## // PUBLIC FIELDS //########################################################################## public static final AminoAcidSet STANDARD_LC = new AminoAcidSet(); public static final AminoAcidSet STANDARD_UC = new AminoAcidSet(); /** Contains both upper and lowercase standard mappings. */ public static final AminoAcidSet STANDARD = new AminoAcidSet(); /** Contains both upper and lowercase standard mappings plus B, J, and Z ambiguity codes. */ public static final AminoAcidSet EXTENDED = new AminoAcidSet(); static { STANDARD_LC.setMapping('a', AminoAcid.ALANINE); STANDARD_LC.setMapping('c', AminoAcid.CYSTEINE); STANDARD_LC.setMapping('d', AminoAcid.ASPARTIC_ACID); STANDARD_LC.setMapping('e', AminoAcid.GLUTAMIC_ACID); STANDARD_LC.setMapping('f', AminoAcid.PHENYLALANINE); STANDARD_LC.setMapping('g', AminoAcid.GLYCINE); STANDARD_LC.setMapping('h', AminoAcid.HISTIDINE); STANDARD_LC.setMapping('i', AminoAcid.ISOLEUCINE); STANDARD_LC.setMapping('k', AminoAcid.LYSINE); STANDARD_LC.setMapping('l', AminoAcid.LEUCINE); STANDARD_LC.setMapping('m', AminoAcid.METHIONINE); STANDARD_LC.setMapping('n', AminoAcid.ASPARAGINE); STANDARD_LC.setMapping('p', AminoAcid.PROLINE); STANDARD_LC.setMapping('q', AminoAcid.GLUTAMINE); STANDARD_LC.setMapping('r', AminoAcid.ARGININE); STANDARD_LC.setMapping('s', AminoAcid.SERINE); STANDARD_LC.setMapping('t', AminoAcid.THREONIE); STANDARD_LC.setMapping('v', AminoAcid.VALINE); STANDARD_LC.setMapping('w', AminoAcid.TRYPTOPHAN); STANDARD_LC.setMapping('y', AminoAcid.TYROSINE); STANDARD_LC.setMapping('x', AminoAcid.UNDEFINED); STANDARD_LC.setMapping('*', AminoAcid.STOP); STANDARD_LC.setName("Standard (lower case)"); STANDARD_LC.lock(); STANDARD_UC.setMapping('A', AminoAcid.ALANINE); STANDARD_UC.setMapping('C', AminoAcid.CYSTEINE); STANDARD_UC.setMapping('D', AminoAcid.ASPARTIC_ACID); STANDARD_UC.setMapping('E', AminoAcid.GLUTAMIC_ACID); STANDARD_UC.setMapping('F', AminoAcid.PHENYLALANINE); STANDARD_UC.setMapping('G', AminoAcid.GLYCINE); STANDARD_UC.setMapping('H', AminoAcid.HISTIDINE); STANDARD_UC.setMapping('I', AminoAcid.ISOLEUCINE); STANDARD_UC.setMapping('K', AminoAcid.LYSINE); STANDARD_UC.setMapping('L', AminoAcid.LEUCINE); STANDARD_UC.setMapping('M', AminoAcid.METHIONINE); STANDARD_UC.setMapping('N', AminoAcid.ASPARAGINE); STANDARD_UC.setMapping('P', AminoAcid.PROLINE); STANDARD_UC.setMapping('Q', AminoAcid.GLUTAMINE); STANDARD_UC.setMapping('R', AminoAcid.ARGININE); STANDARD_UC.setMapping('S', AminoAcid.SERINE); STANDARD_UC.setMapping('T', AminoAcid.THREONIE); STANDARD_UC.setMapping('V', AminoAcid.VALINE); STANDARD_UC.setMapping('W', AminoAcid.TRYPTOPHAN); STANDARD_UC.setMapping('Y', AminoAcid.TYROSINE); STANDARD_UC.setMapping('X', AminoAcid.UNDEFINED); STANDARD_UC.setMapping('*', AminoAcid.STOP); STANDARD_UC.setName("Standard (upper case)"); STANDARD_UC.lock(); STANDARD.setMappings(STANDARD_LC); STANDARD.setMappings(STANDARD_UC); STANDARD.setName("Standard"); STANDARD.lock(); EXTENDED.setMappings(STANDARD); EXTENDED.setMapping('B', AminoAcid.ASP_ASN_AVG); EXTENDED.setMapping('b', AminoAcid.ASP_ASN_AVG); EXTENDED.setMapping('J', AminoAcid.ILE_LEU_AVG); EXTENDED.setMapping('j', AminoAcid.ILE_LEU_AVG); EXTENDED.setMapping('Z', AminoAcid.GLU_GLN_AVG); EXTENDED.setMapping('z', AminoAcid.GLU_GLN_AVG); EXTENDED.setName("Extended"); EXTENDED.lock(); } //########################################################################## // PRIVATE FIELDS //########################################################################## private String mName; private NTerminalGroup mNTerminalGroup = NTerminalGroup.UNMODIFIED_N_TERMINUS; private CTerminalGroup mCTerminalGroup = CTerminalGroup.UNMODIFIED_C_TERMINUS; private boolean mLocked; private Map mMap = new HashMap<>(); // Since the map values may have the same AA mapped to different characters, // we will cache the unique set of AA's for performance. private Collection mCachedSet; //########################################################################## // CONSTRUCTORS //########################################################################## //-------------------------------------------------------------------------- public AminoAcidSet() { } //-------------------------------------------------------------------------- public AminoAcidSet(AminoAcidSet inAASet) { setMappings(inAASet); setNTerminalGroup(inAASet.getNTerminalGroup()); setCTerminalGroup(inAASet.getCTerminalGroup()); } //-------------------------------------------------------------------------- public static AminoAcidSet instantiate(XMLNode inXMLNode) { if (!inXMLNode.getTagName().equals(HfgBioXML.AASET_TAG)) { throw new RuntimeException("Cannot construct an " + AminoAcidSet.class.getSimpleName() + " from a " + inXMLNode.getTagName() + " tag!"); } AminoAcidSet aaSet = null; String name = inXMLNode.getAttributeValue(HfgBioXML.NAME_ATT); XMLNode aminoAcidsTag = inXMLNode.getOptionalSubtagByName(HfgBioXML.AMINO_ACIDS_TAG); if (aminoAcidsTag != null) { aaSet = new AminoAcidSet().setName(name); for (XMLNode subtag : aminoAcidsTag.getXMLNodeSubtags()) { aaSet.setMapping(subtag.getAttributeValue(HfgBioXML.MAPPING_ATT).charAt(0), new AminoAcid(subtag)); } aaSet.setNTerminalGroup(new NTerminalGroup((XMLNode) inXMLNode.getRequiredSubtagByName(HfgBioXML.NTERM_TAG))); aaSet.setCTerminalGroup(new CTerminalGroup((XMLNode) inXMLNode.getRequiredSubtagByName(HfgBioXML.CTERM_TAG))); } else { // Pre-defined amino acid sets can be specified with just a name. for (AminoAcidSet predefinedAASet : new AminoAcidSet[] { STANDARD, STANDARD_LC, STANDARD_UC }) { if (name.equals(predefinedAASet.getName())) { aaSet = predefinedAASet; break; } } } return aaSet; } //########################################################################## // PUBLIC METHODS //########################################################################## //-------------------------------------------------------------------------- public XMLNode toXMLNode() { XMLNode node = new XMLTag(HfgBioXML.AASET_TAG); if (StringUtil.isSet(getName())) { node.setAttribute(HfgBioXML.NAME_ATT, getName()); } if (! isPredefinedSet()) { XMLNode aminoAcidsTag = new XMLTag(HfgBioXML.AMINO_ACIDS_TAG); node.addSubtag(aminoAcidsTag); for (Map.Entry mapping : mMap.entrySet()) { XMLNode aaTag = mapping.getValue().toXMLNode(); aaTag.setAttribute(HfgBioXML.MAPPING_ATT, mapping.getKey() + ""); aminoAcidsTag.addSubtag(aaTag); } node.addSubtag(mNTerminalGroup.toXMLNode()); node.addSubtag(mCTerminalGroup.toXMLNode()); } return node; } //-------------------------------------------------------------------------- public AminoAcidSet clone() { AminoAcidSet newObj; try { newObj = (AminoAcidSet) super.clone(); } catch (CloneNotSupportedException e) { throw new RuntimeException(e); } newObj.mMap = new HashMap<>(mMap); newObj.mLocked = false; return newObj; } //-------------------------------------------------------------------------- public boolean isLocked() { return mLocked; } //-------------------------------------------------------------------------- public void lock() { mLocked = true; } //-------------------------------------------------------------------------- public AminoAcidSet setName(String inValue) { if (mLocked) throw new UnmodifyableObjectException(mName + " is locked and cannot be modified!"); mName = inValue; return this; } //-------------------------------------------------------------------------- public String getName() { return mName; } //-------------------------------------------------------------------------- public AminoAcidSet setNTerminalGroup(NTerminalGroup inValue) { if (mLocked) throw new UnmodifyableObjectException(mName + " is locked and cannot be modified!"); mNTerminalGroup = inValue; return this; } //-------------------------------------------------------------------------- public NTerminalGroup getNTerminalGroup() { return mNTerminalGroup; } //-------------------------------------------------------------------------- public AminoAcidSet setCTerminalGroup(CTerminalGroup inValue) { if (mLocked) throw new UnmodifyableObjectException(mName + " is locked and cannot be modified!"); mCTerminalGroup = inValue; return this; } //-------------------------------------------------------------------------- public CTerminalGroup getCTerminalGroup() { return mCTerminalGroup; } //-------------------------------------------------------------------------- public boolean setMapping(char inChar, AminoAcid inAA) { if (mLocked) throw new UnmodifyableObjectException(mName + " is locked and cannot be modified!"); boolean result = mMap.containsKey(inChar); mMap.put(inChar, inAA); clearCachedValues(); return result; } //-------------------------------------------------------------------------- public void setMappings(AminoAcidSet inAASet) { if (mLocked) throw new UnmodifyableObjectException(mName + " is locked and cannot be modified!"); if (inAASet != null) { Iterator iter = inAASet.mapIterator(); while (iter.hasNext()) { Character letter = (Character) iter.next(); mMap.put(letter, inAASet.getAA(letter)); } clearCachedValues(); } } //-------------------------------------------------------------------------- public Collection getAminoAcids() { if (null == mCachedSet) { List aaList = new ArrayList<>(new HashSet<>(mMap.values())); Collections.sort(aaList, AminoAcid.AA_ORDINAL_COMPARATOR); mCachedSet = Collections.unmodifiableCollection(aaList);; } return mCachedSet; } //-------------------------------------------------------------------------- /** Returns an iteration of the Characters mapped to amino acids @return iteration of the Characters mapped to amino acids */ public Iterator mapIterator() { return mMap.keySet().iterator(); } //-------------------------------------------------------------------------- public Set getResidueChars() { return Collections.unmodifiableSet(mMap.keySet()); } //-------------------------------------------------------------------------- public Set getMapping(AminoAcid inAA) { Set residues = new HashSet<>(20); for (Character mappedResidue : mMap.keySet()) { if (mMap.get(mappedResidue).equals(inAA)) { residues.add(mappedResidue); } } return residues; } //-------------------------------------------------------------------------- public AminoAcid getAA(char inResidue) { return getAA(new Character(inResidue)); } //-------------------------------------------------------------------------- public AminoAcid getAA(Character inResidue) { return mMap.get(inResidue); } //-------------------------------------------------------------------------- public int size() { return mMap.size(); } //-------------------------------------------------------------------------- @Override public boolean isEmpty() { return mMap.isEmpty(); } //-------------------------------------------------------------------------- @Override public boolean contains(Object inObject) { return inObject instanceof AminoAcid ? mMap.values().contains(inObject) : false; } //-------------------------------------------------------------------------- @Override public Iterator iterator() { return getAminoAcids().iterator(); // Want to reduce values to the unique set before iterating. } //-------------------------------------------------------------------------- @Override public Object[] toArray() { return (Object[]) getAminoAcids().toArray(); } //-------------------------------------------------------------------------- @Override public T[] toArray(T[] inArray) { if (inArray.length < size()) { return (T[]) Arrays.copyOf(getAminoAcids().toArray(), size(), inArray.getClass()); } System.arraycopy(getAminoAcids().toArray(), 0, inArray, 0, size()); return inArray; } //-------------------------------------------------------------------------- @Override public boolean add(AminoAcid inAminoAcid) { clearCachedValues(); return setMapping(inAminoAcid.getOneLetterCode(), inAminoAcid); } //-------------------------------------------------------------------------- @Override public boolean remove(Object inObj) { if (mLocked) throw new UnmodifyableObjectException(mName + " is locked and cannot be modified!"); boolean result = false; if (inObj instanceof AminoAcid) { for (Character key : mMap.keySet()) { if (mMap.get(key).equals(inObj)) { mMap.remove(key); result = true; clearCachedValues(); } } } return result; } //-------------------------------------------------------------------------- @Override public boolean containsAll(Collection inCollection) { boolean result = true; for (Object obj : inCollection) { if (! contains(obj)) { result = false; break; } } return result; } //-------------------------------------------------------------------------- @Override public boolean addAll(Collection inCollection) { boolean result = false; for (AminoAcid obj : inCollection) { if (add(obj)) { result = true; } } return result; } //-------------------------------------------------------------------------- @Override public boolean retainAll(Collection inCollection) { if (mLocked) throw new UnmodifyableObjectException(mName + " is locked and cannot be modified!"); boolean result = false; for (Character key : mMap.keySet()) { if (! inCollection.contains(mMap.get(key))) { mMap.remove(key); result = true; clearCachedValues(); } } return result; } //-------------------------------------------------------------------------- @Override public boolean removeAll(Collection inCollection) { boolean result = false; for (Object obj : inCollection) { if (remove(obj)) { result = true; } } return result; } //-------------------------------------------------------------------------- @Override public void clear() { if (mLocked) throw new UnmodifyableObjectException(mName + " is locked and cannot be modified!"); mMap.clear(); clearCachedValues(); } //-------------------------------------------------------------------------- @Override public boolean equals(Object inObj2) { boolean result = true; if (inObj2 instanceof AminoAcidSet) { AminoAcidSet aaSet2 = (AminoAcidSet) inObj2; if (aaSet2.size() != size()) { result = false; } else { for (Character residue : mMap.keySet()) { if (! getAA(residue).equals(aaSet2.getAA(residue))) { result = false; break; } } if (result && ! getNTerminalGroup().equals(aaSet2.getNTerminalGroup())) { result = false; } if (result && ! getAminoAcids().containsAll(aaSet2.getAminoAcids())) { result = false; } } } else { result = false; } return result; } //-------------------------------------------------------------------------- @Override public int hashCode() { int hashCode = 0; for (AminoAcid aa : getAminoAcids()) { hashCode += 31 * aa.hashCode(); } hashCode += 31 * getNTerminalGroup().hashCode(); hashCode += 31 * getCTerminalGroup().hashCode(); return hashCode; } //-------------------------------------------------------------------------- public AminoAcid remove(Character inChar) { clearCachedValues(); return mMap.remove(inChar); } //-------------------------------------------------------------------------- public AminoAcidSet subtract(AminoAcidSet inAminoAcidSet2) { AminoAcidSet subtractedSet = clone(); if (inAminoAcidSet2 != null) { for (Character aaChar : mMap.keySet()) { AminoAcid currentAA = mMap.get(aaChar); AminoAcid oldAA = inAminoAcidSet2.getAA(aaChar); if (currentAA != null && oldAA != null) { int comparison = CompareUtil.compare(currentAA.getElementalComposition(), oldAA.getElementalComposition()); if (0 == comparison) { if (StringUtil.isSet(currentAA.getChemicalFormula()) || StringUtil.isSet(oldAA.getChemicalFormula()) // Compare the names if neither has a composition || currentAA.name().equals(oldAA.name())) { subtractedSet.remove(aaChar); } } } } if (getNTerminalGroup() != null && inAminoAcidSet2.getNTerminalGroup() != null && getNTerminalGroup().equals(inAminoAcidSet2.getNTerminalGroup())) { subtractedSet.setNTerminalGroup(null); } if (getCTerminalGroup() != null && inAminoAcidSet2.getCTerminalGroup() != null && getCTerminalGroup().equals(inAminoAcidSet2.getCTerminalGroup())) { subtractedSet.setCTerminalGroup(null); } clearCachedValues(); } return subtractedSet; } //-------------------------------------------------------------------------- private void clearCachedValues() { mCachedSet = null; } //-------------------------------------------------------------------------- private boolean isPredefinedSet() { return (this == STANDARD || this == STANDARD_LC || this == STANDARD_UC); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy