com.hfg.bio.AminoAcidSet Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of com_hfg Show documentation
Show all versions of com_hfg Show documentation
com.hfg xml, html, svg, and bioinformatics utility library
package com.hfg.bio;
import java.util.*;
import com.hfg.exception.UnmodifyableObjectException;
import com.hfg.util.CompareUtil;
import com.hfg.xml.XMLNode;
import com.hfg.xml.XMLTag;
import com.hfg.util.StringUtil;
//------------------------------------------------------------------------------
/**
Mapping of sequence characters to AminoAcids.
@author J. Alex Taylor, hairyfatguy.com
*/
//------------------------------------------------------------------------------
// com.hfg XML/HTML Coding Library
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
// J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com
// [email protected]
//------------------------------------------------------------------------------
public class AminoAcidSet implements Set, Cloneable
{
//##########################################################################
// PUBLIC FIELDS
//##########################################################################
public static final AminoAcidSet STANDARD_LC = new AminoAcidSet();
public static final AminoAcidSet STANDARD_UC = new AminoAcidSet();
/**
Contains both upper and lowercase standard mappings.
*/
public static final AminoAcidSet STANDARD = new AminoAcidSet();
/**
Contains both upper and lowercase standard mappings plus B, J, and Z ambiguity codes.
*/
public static final AminoAcidSet EXTENDED = new AminoAcidSet();
static
{
STANDARD_LC.setMapping('a', AminoAcid.ALANINE);
STANDARD_LC.setMapping('c', AminoAcid.CYSTEINE);
STANDARD_LC.setMapping('d', AminoAcid.ASPARTIC_ACID);
STANDARD_LC.setMapping('e', AminoAcid.GLUTAMIC_ACID);
STANDARD_LC.setMapping('f', AminoAcid.PHENYLALANINE);
STANDARD_LC.setMapping('g', AminoAcid.GLYCINE);
STANDARD_LC.setMapping('h', AminoAcid.HISTIDINE);
STANDARD_LC.setMapping('i', AminoAcid.ISOLEUCINE);
STANDARD_LC.setMapping('k', AminoAcid.LYSINE);
STANDARD_LC.setMapping('l', AminoAcid.LEUCINE);
STANDARD_LC.setMapping('m', AminoAcid.METHIONINE);
STANDARD_LC.setMapping('n', AminoAcid.ASPARAGINE);
STANDARD_LC.setMapping('p', AminoAcid.PROLINE);
STANDARD_LC.setMapping('q', AminoAcid.GLUTAMINE);
STANDARD_LC.setMapping('r', AminoAcid.ARGININE);
STANDARD_LC.setMapping('s', AminoAcid.SERINE);
STANDARD_LC.setMapping('t', AminoAcid.THREONIE);
STANDARD_LC.setMapping('v', AminoAcid.VALINE);
STANDARD_LC.setMapping('w', AminoAcid.TRYPTOPHAN);
STANDARD_LC.setMapping('y', AminoAcid.TYROSINE);
STANDARD_LC.setMapping('x', AminoAcid.UNDEFINED);
STANDARD_LC.setMapping('*', AminoAcid.STOP);
STANDARD_LC.setName("Standard (lower case)");
STANDARD_LC.lock();
STANDARD_UC.setMapping('A', AminoAcid.ALANINE);
STANDARD_UC.setMapping('C', AminoAcid.CYSTEINE);
STANDARD_UC.setMapping('D', AminoAcid.ASPARTIC_ACID);
STANDARD_UC.setMapping('E', AminoAcid.GLUTAMIC_ACID);
STANDARD_UC.setMapping('F', AminoAcid.PHENYLALANINE);
STANDARD_UC.setMapping('G', AminoAcid.GLYCINE);
STANDARD_UC.setMapping('H', AminoAcid.HISTIDINE);
STANDARD_UC.setMapping('I', AminoAcid.ISOLEUCINE);
STANDARD_UC.setMapping('K', AminoAcid.LYSINE);
STANDARD_UC.setMapping('L', AminoAcid.LEUCINE);
STANDARD_UC.setMapping('M', AminoAcid.METHIONINE);
STANDARD_UC.setMapping('N', AminoAcid.ASPARAGINE);
STANDARD_UC.setMapping('P', AminoAcid.PROLINE);
STANDARD_UC.setMapping('Q', AminoAcid.GLUTAMINE);
STANDARD_UC.setMapping('R', AminoAcid.ARGININE);
STANDARD_UC.setMapping('S', AminoAcid.SERINE);
STANDARD_UC.setMapping('T', AminoAcid.THREONIE);
STANDARD_UC.setMapping('V', AminoAcid.VALINE);
STANDARD_UC.setMapping('W', AminoAcid.TRYPTOPHAN);
STANDARD_UC.setMapping('Y', AminoAcid.TYROSINE);
STANDARD_UC.setMapping('X', AminoAcid.UNDEFINED);
STANDARD_UC.setMapping('*', AminoAcid.STOP);
STANDARD_UC.setName("Standard (upper case)");
STANDARD_UC.lock();
STANDARD.setMappings(STANDARD_LC);
STANDARD.setMappings(STANDARD_UC);
STANDARD.setName("Standard");
STANDARD.lock();
EXTENDED.setMappings(STANDARD);
EXTENDED.setMapping('B', AminoAcid.ASP_ASN_AVG);
EXTENDED.setMapping('b', AminoAcid.ASP_ASN_AVG);
EXTENDED.setMapping('J', AminoAcid.ILE_LEU_AVG);
EXTENDED.setMapping('j', AminoAcid.ILE_LEU_AVG);
EXTENDED.setMapping('Z', AminoAcid.GLU_GLN_AVG);
EXTENDED.setMapping('z', AminoAcid.GLU_GLN_AVG);
EXTENDED.setName("Extended");
EXTENDED.lock();
}
//##########################################################################
// PRIVATE FIELDS
//##########################################################################
private String mName;
private NTerminalGroup mNTerminalGroup = NTerminalGroup.UNMODIFIED_N_TERMINUS;
private CTerminalGroup mCTerminalGroup = CTerminalGroup.UNMODIFIED_C_TERMINUS;
private boolean mLocked;
private Map mMap = new HashMap<>();
// Since the map values may have the same AA mapped to different characters,
// we will cache the unique set of AA's for performance.
private Collection mCachedSet;
//##########################################################################
// CONSTRUCTORS
//##########################################################################
//--------------------------------------------------------------------------
public AminoAcidSet()
{
}
//--------------------------------------------------------------------------
public AminoAcidSet(AminoAcidSet inAASet)
{
setMappings(inAASet);
setNTerminalGroup(inAASet.getNTerminalGroup());
setCTerminalGroup(inAASet.getCTerminalGroup());
}
//--------------------------------------------------------------------------
public static AminoAcidSet instantiate(XMLNode inXMLNode)
{
if (!inXMLNode.getTagName().equals(HfgBioXML.AASET_TAG))
{
throw new RuntimeException("Cannot construct an " + AminoAcidSet.class.getSimpleName() + " from a " + inXMLNode.getTagName() + " tag!");
}
AminoAcidSet aaSet = null;
String name = inXMLNode.getAttributeValue(HfgBioXML.NAME_ATT);
XMLNode aminoAcidsTag = inXMLNode.getOptionalSubtagByName(HfgBioXML.AMINO_ACIDS_TAG);
if (aminoAcidsTag != null)
{
aaSet = new AminoAcidSet().setName(name);
for (XMLNode subtag : aminoAcidsTag.getXMLNodeSubtags())
{
aaSet.setMapping(subtag.getAttributeValue(HfgBioXML.MAPPING_ATT).charAt(0), new AminoAcid(subtag));
}
aaSet.setNTerminalGroup(new NTerminalGroup((XMLNode) inXMLNode.getRequiredSubtagByName(HfgBioXML.NTERM_TAG)));
aaSet.setCTerminalGroup(new CTerminalGroup((XMLNode) inXMLNode.getRequiredSubtagByName(HfgBioXML.CTERM_TAG)));
}
else
{
// Pre-defined amino acid sets can be specified with just a name.
for (AminoAcidSet predefinedAASet : new AminoAcidSet[] { STANDARD, STANDARD_LC, STANDARD_UC })
{
if (name.equals(predefinedAASet.getName()))
{
aaSet = predefinedAASet;
break;
}
}
}
return aaSet;
}
//##########################################################################
// PUBLIC METHODS
//##########################################################################
//--------------------------------------------------------------------------
public XMLNode toXMLNode()
{
XMLNode node = new XMLTag(HfgBioXML.AASET_TAG);
if (StringUtil.isSet(getName()))
{
node.setAttribute(HfgBioXML.NAME_ATT, getName());
}
if (! isPredefinedSet())
{
XMLNode aminoAcidsTag = new XMLTag(HfgBioXML.AMINO_ACIDS_TAG);
node.addSubtag(aminoAcidsTag);
for (Map.Entry mapping : mMap.entrySet())
{
XMLNode aaTag = mapping.getValue().toXMLNode();
aaTag.setAttribute(HfgBioXML.MAPPING_ATT, mapping.getKey() + "");
aminoAcidsTag.addSubtag(aaTag);
}
node.addSubtag(mNTerminalGroup.toXMLNode());
node.addSubtag(mCTerminalGroup.toXMLNode());
}
return node;
}
//--------------------------------------------------------------------------
public AminoAcidSet clone()
{
AminoAcidSet newObj;
try
{
newObj = (AminoAcidSet) super.clone();
}
catch (CloneNotSupportedException e)
{
throw new RuntimeException(e);
}
newObj.mMap = new HashMap<>(mMap);
newObj.mLocked = false;
return newObj;
}
//--------------------------------------------------------------------------
public boolean isLocked()
{
return mLocked;
}
//--------------------------------------------------------------------------
public void lock()
{
mLocked = true;
}
//--------------------------------------------------------------------------
public AminoAcidSet setName(String inValue)
{
if (mLocked) throw new UnmodifyableObjectException(mName + " is locked and cannot be modified!");
mName = inValue;
return this;
}
//--------------------------------------------------------------------------
public String getName()
{
return mName;
}
//--------------------------------------------------------------------------
public AminoAcidSet setNTerminalGroup(NTerminalGroup inValue)
{
if (mLocked) throw new UnmodifyableObjectException(mName + " is locked and cannot be modified!");
mNTerminalGroup = inValue;
return this;
}
//--------------------------------------------------------------------------
public NTerminalGroup getNTerminalGroup()
{
return mNTerminalGroup;
}
//--------------------------------------------------------------------------
public AminoAcidSet setCTerminalGroup(CTerminalGroup inValue)
{
if (mLocked) throw new UnmodifyableObjectException(mName + " is locked and cannot be modified!");
mCTerminalGroup = inValue;
return this;
}
//--------------------------------------------------------------------------
public CTerminalGroup getCTerminalGroup()
{
return mCTerminalGroup;
}
//--------------------------------------------------------------------------
public boolean setMapping(char inChar, AminoAcid inAA)
{
if (mLocked) throw new UnmodifyableObjectException(mName + " is locked and cannot be modified!");
boolean result = mMap.containsKey(inChar);
mMap.put(inChar, inAA);
clearCachedValues();
return result;
}
//--------------------------------------------------------------------------
public void setMappings(AminoAcidSet inAASet)
{
if (mLocked) throw new UnmodifyableObjectException(mName + " is locked and cannot be modified!");
if (inAASet != null)
{
Iterator iter = inAASet.mapIterator();
while (iter.hasNext())
{
Character letter = (Character) iter.next();
mMap.put(letter, inAASet.getAA(letter));
}
clearCachedValues();
}
}
//--------------------------------------------------------------------------
public Collection getAminoAcids()
{
if (null == mCachedSet)
{
List aaList = new ArrayList<>(new HashSet<>(mMap.values()));
Collections.sort(aaList, AminoAcid.AA_ORDINAL_COMPARATOR);
mCachedSet = Collections.unmodifiableCollection(aaList);;
}
return mCachedSet;
}
//--------------------------------------------------------------------------
/**
Returns an iteration of the Characters mapped to amino acids
@return iteration of the Characters mapped to amino acids
*/
public Iterator mapIterator()
{
return mMap.keySet().iterator();
}
//--------------------------------------------------------------------------
public Set getResidueChars()
{
return Collections.unmodifiableSet(mMap.keySet());
}
//--------------------------------------------------------------------------
public Set getMapping(AminoAcid inAA)
{
Set residues = new HashSet<>(20);
for (Character mappedResidue : mMap.keySet())
{
if (mMap.get(mappedResidue).equals(inAA))
{
residues.add(mappedResidue);
}
}
return residues;
}
//--------------------------------------------------------------------------
public AminoAcid getAA(char inResidue)
{
return getAA(new Character(inResidue));
}
//--------------------------------------------------------------------------
public AminoAcid getAA(Character inResidue)
{
return mMap.get(inResidue);
}
//--------------------------------------------------------------------------
public int size()
{
return mMap.size();
}
//--------------------------------------------------------------------------
@Override
public boolean isEmpty()
{
return mMap.isEmpty();
}
//--------------------------------------------------------------------------
@Override
public boolean contains(Object inObject)
{
return inObject instanceof AminoAcid ? mMap.values().contains(inObject) : false;
}
//--------------------------------------------------------------------------
@Override
public Iterator iterator()
{
return getAminoAcids().iterator(); // Want to reduce values to the unique set before iterating.
}
//--------------------------------------------------------------------------
@Override
public Object[] toArray()
{
return (Object[]) getAminoAcids().toArray();
}
//--------------------------------------------------------------------------
@Override
public T[] toArray(T[] inArray)
{
if (inArray.length < size())
{
return (T[]) Arrays.copyOf(getAminoAcids().toArray(), size(), inArray.getClass());
}
System.arraycopy(getAminoAcids().toArray(), 0, inArray, 0, size());
return inArray;
}
//--------------------------------------------------------------------------
@Override
public boolean add(AminoAcid inAminoAcid)
{
clearCachedValues();
return setMapping(inAminoAcid.getOneLetterCode(), inAminoAcid);
}
//--------------------------------------------------------------------------
@Override
public boolean remove(Object inObj)
{
if (mLocked) throw new UnmodifyableObjectException(mName + " is locked and cannot be modified!");
boolean result = false;
if (inObj instanceof AminoAcid)
{
for (Character key : mMap.keySet())
{
if (mMap.get(key).equals(inObj))
{
mMap.remove(key);
result = true;
clearCachedValues();
}
}
}
return result;
}
//--------------------------------------------------------------------------
@Override
public boolean containsAll(Collection> inCollection)
{
boolean result = true;
for (Object obj : inCollection)
{
if (! contains(obj))
{
result = false;
break;
}
}
return result;
}
//--------------------------------------------------------------------------
@Override
public boolean addAll(Collection extends AminoAcid> inCollection)
{
boolean result = false;
for (AminoAcid obj : inCollection)
{
if (add(obj))
{
result = true;
}
}
return result;
}
//--------------------------------------------------------------------------
@Override
public boolean retainAll(Collection> inCollection)
{
if (mLocked) throw new UnmodifyableObjectException(mName + " is locked and cannot be modified!");
boolean result = false;
for (Character key : mMap.keySet())
{
if (! inCollection.contains(mMap.get(key)))
{
mMap.remove(key);
result = true;
clearCachedValues();
}
}
return result;
}
//--------------------------------------------------------------------------
@Override
public boolean removeAll(Collection> inCollection)
{
boolean result = false;
for (Object obj : inCollection)
{
if (remove(obj))
{
result = true;
}
}
return result;
}
//--------------------------------------------------------------------------
@Override
public void clear()
{
if (mLocked) throw new UnmodifyableObjectException(mName + " is locked and cannot be modified!");
mMap.clear();
clearCachedValues();
}
//--------------------------------------------------------------------------
@Override
public boolean equals(Object inObj2)
{
boolean result = true;
if (inObj2 instanceof AminoAcidSet)
{
AminoAcidSet aaSet2 = (AminoAcidSet) inObj2;
if (aaSet2.size() != size())
{
result = false;
}
else
{
for (Character residue : mMap.keySet())
{
if (! getAA(residue).equals(aaSet2.getAA(residue)))
{
result = false;
break;
}
}
if (result
&& ! getNTerminalGroup().equals(aaSet2.getNTerminalGroup()))
{
result = false;
}
if (result
&& ! getAminoAcids().containsAll(aaSet2.getAminoAcids()))
{
result = false;
}
}
}
else
{
result = false;
}
return result;
}
//--------------------------------------------------------------------------
@Override
public int hashCode()
{
int hashCode = 0;
for (AminoAcid aa : getAminoAcids())
{
hashCode += 31 * aa.hashCode();
}
hashCode += 31 * getNTerminalGroup().hashCode();
hashCode += 31 * getCTerminalGroup().hashCode();
return hashCode;
}
//--------------------------------------------------------------------------
public AminoAcid remove(Character inChar)
{
clearCachedValues();
return mMap.remove(inChar);
}
//--------------------------------------------------------------------------
public AminoAcidSet subtract(AminoAcidSet inAminoAcidSet2)
{
AminoAcidSet subtractedSet = clone();
if (inAminoAcidSet2 != null)
{
for (Character aaChar : mMap.keySet())
{
AminoAcid currentAA = mMap.get(aaChar);
AminoAcid oldAA = inAminoAcidSet2.getAA(aaChar);
if (currentAA != null
&& oldAA != null)
{
int comparison = CompareUtil.compare(currentAA.getElementalComposition(), oldAA.getElementalComposition());
if (0 == comparison)
{
if (StringUtil.isSet(currentAA.getChemicalFormula())
|| StringUtil.isSet(oldAA.getChemicalFormula())
// Compare the names if neither has a composition
|| currentAA.name().equals(oldAA.name()))
{
subtractedSet.remove(aaChar);
}
}
}
}
if (getNTerminalGroup() != null
&& inAminoAcidSet2.getNTerminalGroup() != null
&& getNTerminalGroup().equals(inAminoAcidSet2.getNTerminalGroup()))
{
subtractedSet.setNTerminalGroup(null);
}
if (getCTerminalGroup() != null
&& inAminoAcidSet2.getCTerminalGroup() != null
&& getCTerminalGroup().equals(inAminoAcidSet2.getCTerminalGroup()))
{
subtractedSet.setCTerminalGroup(null);
}
clearCachedValues();
}
return subtractedSet;
}
//--------------------------------------------------------------------------
private void clearCachedValues()
{
mCachedSet = null;
}
//--------------------------------------------------------------------------
private boolean isPredefinedSet()
{
return (this == STANDARD
|| this == STANDARD_LC
|| this == STANDARD_UC);
}
}