org.biojava.nbio.structure.Chain Maven / Gradle / Ivy
Show all versions of biojava-structure Show documentation
/*
* BioJava development code
*
* This code may be freely distributed and modified under the
* terms of the GNU Lesser General Public Licence. This should
* be distributed with the code. If you do not have a copy,
* see:
*
* http://www.gnu.org/copyleft/lesser.html
*
* Copyright for this code is held jointly by the individual
* authors. These should be listed in @author doc comments.
*
* For more information on the BioJava project and its aims,
* or to join the biojava-l mailing list, visit the home page
* at:
*
* http://www.biojava.org/
*
* Created on 25.04.2004
* @author Andreas Prlic
*
*/
package org.biojava.nbio.structure;
import org.biojava.nbio.core.sequence.template.Sequence;
import org.biojava.nbio.structure.chem.ChemComp;
import java.io.Serializable;
import java.util.List;
/**
*
* Defines the interface for a Chain. A Chain corresponds to a Chain in a PDB file.
* A chain consists of a list of {@link Group} objects. A Group can either be
* an {@link AminoAcid}, {@link HetatomImpl Hetatom} or {@link NucleotideImpl Nucleotide}.
*
*
*
* The BioJava API provides access to both the ATOM and SEQRES records in a PDB file.
* During parsing of a PDB file it aligns the ATOM and SEQRES groups and joins them.
* The SEQRES sequence can be accessed via {@link #getSeqResGroups()} and the
* ATOM groups via {@link #getAtomGroups()}. Groups that have been observed
* (i.e. they are in the ATOM records) can be detected by {@link Group}.has3D()
*
*
* @author Andreas Prlic
* @version %I% %G%
* @since 1.4
*/
public interface Chain extends Serializable {
/**
* Returns an identical copy of this Chain.
* @return an identical copy of this Chain
*/
Object clone();
/**
* Add a group to the list of ATOM record group of this chain.
* To add SEQRES records a more complex alignment between ATOM and SEQRES residues
* is required, please see SeqRes2AtomAligner for more details on that.
* @param group a Group object
*/
void addGroup(Group group);
/**
* Get the 'private' asymId (internal chain IDs in mmCif) for this chain.
*
* @return the asymId
* @see #setId(String)
* @see #getName()
*/
String getId() ;
/**
* Set the 'private' asymId (internal chain IDs in mmCif) for this chain.
*
* @param asymId the internal chain Id
*/
void setId(String asymId) ;
/**
* Set the 'public' authId (chain ID in PDB file)
*
* @param authId the 'public' authId (chain ID in PDB file)
* @see #getId()
*/
void setName(String authId);
/**
* Get the 'public' authId (chain ID in PDB file)
*
* @return the authId for this chain.
* @see #getId()
*/
String getName();
/**
* Return the Group at given position,
* from within Groups with observed density in the chain, i.e.
* those with coordinates in ATOM and HETATMS (including waters) records.
* @param position an int
* @return a Group object
* @see #getAtomLength()
* @see #getAtomGroups()
* @see #getSeqResGroup(int)
*/
Group getAtomGroup (int position);
/**
* Return the Group at given position,
* from within groups in the SEQRES records of the chain, i.e.
* the aminoacids/nucleotides in the construct.
* @param position an int
* @return a Group object
* @see #getSeqResLength()
* @see #getSeqResGroups()
* @see #getAtomGroup(int)
*/
Group getSeqResGroup (int position);
/**
* Return all Groups with observed density in the chain, i.e.
* those with coordinates in ATOM and HETATMS (including waters) records.
*
* @return a List object representing the Groups of this Chain.
* @see #setAtomGroups(List)
* @see #getAtomLength()
* @see #getSeqResGroups()
*/
List getAtomGroups();
/**
* Set all Groups with observed density in the chain, i.e.
* those with coordinates in ATOM and HETATMs (including waters) records.
* @param groups a List object representing the Groups of this Chain.
* @see #getAtomGroups()
*/
void setAtomGroups(List groups);
/**
* Return a List of all (observed) Groups of a special type, one of: {@link GroupType#AMINOACID},
* {@link GroupType#HETATM} or {@link GroupType#NUCLEOTIDE}.
* Note that if a standard aminoacid appears as a HETATM (because it is part of a ligand) then
* it is still considered as {@link GroupType#AMINOACID} and not as {@link GroupType#HETATM}.
* @param type GroupType
* @return a List object
* @see #setAtomGroups(List)
*/
List getAtomGroups (GroupType type);
/**
* Get a group by its PDB residue numbering. If the PDB residue number is not known,
* throws a StructureException.
*
* @param resNum the PDB residue number of the group
* @return the matching group
* @throws StructureException
*/
Group getGroupByPDB(ResidueNumber resNum) throws StructureException;
/**
* Get all groups that are located between two PDB residue numbers.
*
* @param pdbresnumStart PDB residue number of start. If null, defaults to the chain start.
* @param pdbresnumEnd PDB residue number of end. If null, defaults to the chain end.
* @return Groups in between. or throws a StructureException if either start or end can not be found,
* @throws StructureException
*/
Group[] getGroupsByPDB(ResidueNumber pdbresnumStart, ResidueNumber pdbresnumEnd) throws StructureException;
/**
* Get all groups that are located between two PDB residue numbers. In contrast to getGroupsByPDB
* this method call ignores if the exact outer groups are not found. This is useful e.g. when requesting the range
* of groups as specified by the DBREF records - these frequently are rather inaccurate.
*
*
* @param pdbresnumStart PDB residue number of start. If null, defaults to the chain start.
* @param pdbresnumEnd PDB residue number of end. If null, defaults to the chain end.
* @param ignoreMissing ignore missing groups in this range.
* @return Groups in between. or throws a StructureException if either start or end can not be found,
* @throws StructureException
*
*/
Group[] getGroupsByPDB(ResidueNumber pdbresnumStart, ResidueNumber pdbresnumEnd,boolean ignoreMissing) throws StructureException;
/**
* Returns the number of Groups with observed density in the chain, i.e.
* those with coordinates in ATOM and HETATMs (including waters) records
*
* @return the length
* @see #getAtomGroup(int)
* @see #getAtomGroups()
* @see #getSeqResLength())
*/
int getAtomLength();
/**
* Returns the number of groups in the SEQRES records of the chain, i.e.
* the number of aminoacids/nucleotides in the construct
*
* @return the length
* @see #getSeqResGroup(int)
* @see #getSeqResGroups()
* @see #getAtomLength()
*/
int getSeqResLength();
/**
* Sets the Entity information
* @param entityInfo the EntityInfo
* @see #getEntityInfo()
*/
void setEntityInfo(EntityInfo entityInfo);
/**
* Returns the EntityInfo for this chain.
*
* @return the EntityInfo object
* @see #setEntityInfo(EntityInfo)
*/
EntityInfo getEntityInfo();
@Override
String toString();
/**
* Converts the SEQRES groups of a Chain to a Biojava Sequence object.
*
* @return the SEQRES groups of the Chain as a Sequence object.
*/
Sequence> getBJSequence() ;
/**
* Returns the sequence of amino acids as it has been provided in the ATOM records.
* @return amino acid sequence as string
* @see #getSeqResSequence()
*/
String getAtomSequence();
/**
* Returns the PDB SEQRES sequence as a one-letter sequence string.
* Non-standard residues are represented by an "X".
* @return one-letter PDB SEQRES sequence as string
* @see #getAtomSequence()
*/
String getSeqResSequence();
/**
* Returns a List of all SEQRES groups of a special type, one of: {@link GroupType#AMINOACID},
* {@link GroupType#HETATM} or {@link GroupType#NUCLEOTIDE}.
* @param type a GroupType
* @return an List object
* @see #setSeqResGroups(List)
*/
List getSeqResGroups (GroupType type);
/**
* Returns a list of all groups in SEQRES records of the chain, i.e.
* the aminoacids/nucleotides in the construct.
* @return a List of all Group objects of this chain
* @see #setSeqResGroups(List)
* @see #getSeqResLength()
* @see #getAtomGroups()
*/
List getSeqResGroups ();
/**
* Sets the list of SeqResGroups for this chain.
*
* @param seqResGroups a List of Group objects that from the SEQRES groups of this chain.
* @see #getSeqResGroups()
*/
void setSeqResGroups(List seqResGroups);
/**
* Sets the back-reference to its parent Structure.
*
* @param parent
*/
void setStructure(Structure parent) ;
/**
* Returns the parent Structure of this chain.
*
* @return the parent Structure object
* @see #setStructure(Structure)
*/
Structure getStructure() ;
/**
* Convert this Chain to a String in PDB format
* @return
*/
String toPDB();
/**
* Convert this Chain to a String in mmCIF format
* @return
*/
String toMMCIF();
/**
* Sets annotated sequence mismatches for this chain. This is based on the STRUCT_REF_SEQ_DIF mmCif category
*
* @param seqMisMatches
*/
void setSeqMisMatches(List seqMisMatches);
/**
* Gets annotated sequence mismatches for this chain. This is based on the STRUCT_REF_SEQ_DIF mmCif category
*
* @return a list of sequence mismatches (or null if none found)
*/
List getSeqMisMatches();
/**
* Returns the EntityType of this chain. Equivalent to getEntityInfo().getType()
* @return
* @see EntityType
*/
EntityType getEntityType();
/**
* Tests if a chain is consisting of water molecules only
*
* @return true if there are only solvent molecules in this chain.
*/
public boolean isWaterOnly();
/**
* Returns true if the given chain is composed of non-polymeric (including water) groups only.
*
* @return true if only non-polymeric groups in this chain.
*/
public boolean isPureNonPolymer();
/**
* Get the predominant {@link GroupType} for a given Chain, following these
* rules: if the ratio of number of residues of a certain
* {@link GroupType} to total non-water residues is above the threshold
* {@value org.biojava.nbio.structure.StructureTools#RATIO_RESIDUES_TO_TOTAL}, then that {@link GroupType} is
* returned if there is no {@link GroupType} that is above the
* threshold then the {@link GroupType} with most members is chosen, logging
* it
*
* See also {@link ChemComp#getPolymerType()} and
* {@link ChemComp#getResidueType()} which follow the PDB chemical component
* dictionary and provide a much more accurate description of groups and
* their linking.
*
*
* @return the predominant group type
*/
GroupType getPredominantGroupType();
/**
* Tell whether given chain is a protein chain
*
* @return true if protein, false if nucleotide or ligand
* @see #getPredominantGroupType()
*/
boolean isProtein();
/**
* Tell whether given chain is DNA or RNA
*
* @return true if nucleic acid, false if protein or ligand
* @see #getPredominantGroupType()
*/
boolean isNucleicAcid();
}