org.biojava.nbio.structure.contact.StructureInterface Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of biojava-structure Show documentation
Show all versions of biojava-structure Show documentation
The protein structure modules of BioJava.
/*
* BioJava development code
*
* This code may be freely distributed and modified under the
* terms of the GNU Lesser General Public Licence. This should
* be distributed with the code. If you do not have a copy,
* see:
*
* http://www.gnu.org/copyleft/lesser.html
*
* Copyright for this code is held jointly by the individual
* authors. These should be listed in @author doc comments.
*
* For more information on the BioJava project and its aims,
* or to join the biojava-l mailing list, visit the home page
* at:
*
* http://www.biojava.org/
*
*/
package org.biojava.nbio.structure.contact;
import org.biojava.nbio.structure.Atom;
import org.biojava.nbio.structure.Chain;
import org.biojava.nbio.structure.Element;
import org.biojava.nbio.structure.EntityInfo;
import org.biojava.nbio.structure.Group;
import org.biojava.nbio.structure.GroupType;
import org.biojava.nbio.structure.ResidueNumber;
import org.biojava.nbio.structure.Structure;
import org.biojava.nbio.structure.asa.AsaCalculator;
import org.biojava.nbio.structure.asa.GroupAsa;
import org.biojava.nbio.structure.chem.ChemComp;
import org.biojava.nbio.structure.chem.PolymerType;
import org.biojava.nbio.structure.io.FileConvert;
import org.biojava.nbio.structure.io.FileParsingParameters;
import org.biojava.nbio.structure.io.cif.AbstractCifFileSupplier;
import org.biojava.nbio.structure.xtal.CrystalTransform;
import org.rcsb.cif.CifBuilder;
import org.rcsb.cif.CifIO;
import org.rcsb.cif.model.Category;
import org.rcsb.cif.schema.StandardSchemata;
import org.rcsb.cif.schema.mm.MmCifBlockBuilder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.io.Serializable;
import java.io.UncheckedIOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
/**
* An interface between 2 molecules (2 sets of atoms).
*
* @author duarte_j
*
*/
public class StructureInterface implements Serializable, Comparable {
private static final long serialVersionUID = 1L;
private static final Logger logger = LoggerFactory.getLogger(StructureInterface.class);
/**
* Interfaces with larger inverse self contact overlap score will be considered isologous
*/
private static final double SELF_SCORE_FOR_ISOLOGOUS = 0.3;
private int id;
private double totalArea;
private AtomContactSet contacts;
private GroupContactSet groupContacts;
private Pair molecules;
/**
* The identifier for each of the atom arrays (usually a chain identifier, i.e. a single capital letter)
* Serves to identify the molecules within the Asymmetric Unit of the crystal
*/
private Pair moleculeIds;
/**
* The transformations (crystal operators) applied to each molecule (if applicable)
*/
private Pair transforms;
private Map groupAsas1;
private Map groupAsas2;
private StructureInterfaceCluster cluster;
/**
* Constructs a StructureInterface
* @param firstMolecule the atoms of the first molecule
* @param secondMolecule the atoms of the second molecule
* @param firstMoleculeId an identifier that identifies the first molecule within the Asymmetric Unit
* @param secondMoleculeId an identifier that identifies the second molecule within the Asymmetric Unit
* @param contacts the contacts between the 2 molecules
* @param firstTransf the transformation (crystal operator) applied to first molecule
* @param secondTransf the transformation (crystal operator) applied to second molecule
*/
public StructureInterface(
Atom[] firstMolecule, Atom[] secondMolecule,
String firstMoleculeId, String secondMoleculeId,
AtomContactSet contacts,
CrystalTransform firstTransf, CrystalTransform secondTransf) {
this.molecules = new Pair<>(firstMolecule, secondMolecule);
this.moleculeIds = new Pair<>(firstMoleculeId,secondMoleculeId);
this.contacts = contacts;
this.transforms = new Pair<>(firstTransf, secondTransf);
this.groupAsas1 = new TreeMap<>();
this.groupAsas2 = new TreeMap<>();
}
/**
* Constructs an empty StructureInterface
*/
public StructureInterface() {
this.groupAsas1 = new TreeMap<>();
this.groupAsas2 = new TreeMap<>();
}
public int getId() {
return id;
}
public void setId(int id) {
this.id = id;
}
/**
* Returns a pair of identifiers for each of the 2 member molecules that
* identify them uniquely in the crystal:
* <molecule id (asym unit id)>+<operator id>+<crystal translation>
* @return
*/
public Pair getCrystalIds() {
return new Pair<>(
moleculeIds.getFirst()+transforms.getFirst().getTransformId()+transforms.getFirst().getCrystalTranslation(),
moleculeIds.getSecond()+transforms.getSecond().getTransformId()+transforms.getSecond().getCrystalTranslation());
}
/**
* Returns the total area buried upon formation of this interface,
* defined as: 1/2[ (ASA1u-ASA1c) + (ASA2u-ASA2u) ] , with:
* ASAxu = ASA of first/second unbound chain
* ASAxc = ASA of first/second complexed chain
* In the area calculation HETATOM groups not part of the main protein/nucleotide chain
* are not included.
* @return
*/
public double getTotalArea() {
return totalArea;
}
public void setTotalArea(double totalArea) {
this.totalArea = totalArea;
}
public AtomContactSet getContacts() {
return contacts;
}
public void setContacts(AtomContactSet contacts) {
this.contacts = contacts;
}
public Pair getMolecules() {
return molecules;
}
public void setMolecules(Pair molecules) {
this.molecules = molecules;
}
/**
* Return the pair of identifiers identifying each of the 2 molecules of this interface
* in the asymmetry unit (usually the chain identifier if this interface is between 2 chains)
* @return
*/
public Pair getMoleculeIds() {
return moleculeIds;
}
public void setMoleculeIds(Pair moleculeIds) {
this.moleculeIds = moleculeIds;
}
/**
* Return the 2 crystal transform operations performed on each of the
* molecules of this interface.
* @return
*/
public Pair getTransforms() {
return transforms;
}
public void setTransforms(Pair transforms) {
this.transforms = transforms;
}
/**
* Set ASA annotations by passing the uncomplexed ASA values of the 2 partners.
* This will calculate complexed ASA and set the ASA values in the member variables.
* @param asas1 ASA values for atoms of partner 1
* @param asas2 ASA values for atoms of partner 2
* @param nSpherePoints the number of sphere points to be used for complexed ASA calculation
* @param nThreads the number of threads to be used for complexed ASA calculation
* @param cofactorSizeToUse the minimum size of cofactor molecule (non-chain HET atoms) that will be used in ASA calculation
*/
void setAsas(double[] asas1, double[] asas2, int nSpherePoints, int nThreads, int cofactorSizeToUse) {
Atom[] atoms = getAtomsForAsa(cofactorSizeToUse);
AsaCalculator asaCalc = new AsaCalculator(atoms,
AsaCalculator.DEFAULT_PROBE_SIZE, nSpherePoints, nThreads);
double[] complexAsas = asaCalc.calculateAsas();
if (complexAsas.length!=asas1.length+asas2.length)
throw new IllegalArgumentException("The size of ASAs of complex doesn't match that of ASAs 1 + ASAs 2");
groupAsas1 = new TreeMap<>();
groupAsas2 = new TreeMap<>();
this.totalArea = 0;
for (int i=0;i atoms = new ArrayList<>();
for (Atom a:m){
if (a.getElement()==Element.H) continue;
Group g = a.getGroup();
if (g.getType().equals(GroupType.HETATM) &&
!isInChain(g) &&
getSizeNoH(g) getFirstGroupAsas() {
return groupAsas1;
}
/**
* Gets the GroupAsa for the corresponding residue number of first chain
* @param resNum
* @return
*/
public GroupAsa getFirstGroupAsa(ResidueNumber resNum) {
return groupAsas1.get(resNum);
}
public void setFirstGroupAsa(GroupAsa groupAsa) {
groupAsas1.put(groupAsa.getGroup().getResidueNumber(), groupAsa);
}
public void setFirstGroupAsas(Map firstGroupAsas) {
this.groupAsas1 = firstGroupAsas;
}
public void setSecondGroupAsas(Map secondGroupAsas) {
this.groupAsas2 = secondGroupAsas;
}
/**
* Gets a map of ResidueNumbers to GroupAsas for all groups of second chain.
* @return
*/
public Map getSecondGroupAsas() {
return groupAsas2;
}
public void setSecondGroupAsa(GroupAsa groupAsa) {
groupAsas2.put(groupAsa.getGroup().getResidueNumber(), groupAsa);
}
/**
* Gets the GroupAsa for the corresponding residue number of second chain
* @param resNum
* @return
*/
public GroupAsa getSecondGroupAsa(ResidueNumber resNum) {
return groupAsas2.get(resNum);
}
/**
* Returns the residues belonging to the interface core, defined as those residues at
* the interface (BSA>0) and for which the BSA/ASA ratio is above the given bsaToAsaCutoff
* @param bsaToAsaCutoff
* @param minAsaForSurface the minimum ASA to consider a residue on the surface
* @return
*/
public Pair> getCoreResidues(double bsaToAsaCutoff, double minAsaForSurface) {
List core1 = new ArrayList<>();
List core2 = new ArrayList<>();
for (GroupAsa groupAsa:groupAsas1.values()) {
if (groupAsa.getAsaU()>minAsaForSurface && groupAsa.getBsa()>0) {
if (groupAsa.getBsaToAsaRatio()minAsaForSurface && groupAsa.getBsa()>0) {
if (groupAsa.getBsaToAsaRatio()(core1, core2);
}
/**
* Returns the residues belonging to the interface rim, defined as those residues at
* the interface (BSA>0) and for which the BSA/ASA ratio is below the given bsaToAsaCutoff
* @param bsaToAsaCutoff
* @param minAsaForSurface the minimum ASA to consider a residue on the surface
* @return
*/
public Pair> getRimResidues(double bsaToAsaCutoff, double minAsaForSurface) {
List rim1 = new ArrayList<>();
List rim2 = new ArrayList<>();
for (GroupAsa groupAsa:groupAsas1.values()) {
if (groupAsa.getAsaU()>minAsaForSurface && groupAsa.getBsa()>0) {
if (groupAsa.getBsaToAsaRatio()minAsaForSurface && groupAsa.getBsa()>0) {
if (groupAsa.getBsaToAsaRatio()(rim1, rim2);
}
/**
* Returns the residues belonging to the interface, i.e. the residues
* at the surface with BSA>0
* @param minAsaForSurface the minimum ASA to consider a residue on the surface
* @return
*/
public Pair> getInterfacingResidues(double minAsaForSurface) {
List interf1 = new ArrayList<>();
List interf2 = new ArrayList<>();
for (GroupAsa groupAsa:groupAsas1.values()) {
if (groupAsa.getAsaU()>minAsaForSurface && groupAsa.getBsa()>0) {
interf1.add(groupAsa.getGroup());
}
}
for (GroupAsa groupAsa:groupAsas2.values()) {
if (groupAsa.getAsaU()>minAsaForSurface && groupAsa.getBsa()>0) {
interf2.add(groupAsa.getGroup());
}
}
return new Pair<>(interf1, interf2);
}
/**
* Returns the residues belonging to the surface
* @param minAsaForSurface the minimum ASA to consider a residue on the surface
* @return
*/
public Pair> getSurfaceResidues(double minAsaForSurface) {
List surf1 = new ArrayList<>();
List surf2 = new ArrayList<>();
for (GroupAsa groupAsa:groupAsas1.values()) {
if (groupAsa.getAsaU()>minAsaForSurface) {
surf1.add(groupAsa.getGroup());
}
}
for (GroupAsa groupAsa:groupAsas2.values()) {
if (groupAsa.getAsaU()>minAsaForSurface) {
surf2.add(groupAsa.getGroup());
}
}
return new Pair<>(surf1, surf2);
}
public StructureInterfaceCluster getCluster() {
return cluster;
}
public void setCluster(StructureInterfaceCluster cluster) {
this.cluster = cluster;
}
/**
* Calculates the Jaccard contact set score (intersection over union) between this StructureInterface and
* the given one. The calculation assumes that both interfaces come from the same structure. The output
* will not necessarily make sense if the two interfaces come from different structures.
* The two sides of the given StructureInterface need to match this StructureInterface
* in the sense that they must come from the same Entity, i.e.
* their residue numbers need to align with 100% identity, except for unobserved
* density residues. The SEQRES indices obtained through {@link EntityInfo#getAlignedResIndex(Group, Chain)} are
* used to match residues, thus if no SEQRES is present or if {@link FileParsingParameters#setAlignSeqRes(boolean)}
* is not used, this calculation is not guaranteed to work properly.
* @param other the interface to be compared to this one
* @param invert if false the comparison will be done first-to-first and second-to-second,
* if true the match will be first-to-second and second-to-first
* @return the contact overlap score, range [0.0,1.0]
*/
public double getContactOverlapScore(StructureInterface other, boolean invert) {
Pair thisChains = getParentChains();
Pair otherChains = other.getParentChains();
if (thisChains.getFirst().getEntityInfo() == null || thisChains.getSecond().getEntityInfo() == null ||
otherChains.getFirst().getEntityInfo() == null || otherChains.getSecond().getEntityInfo() == null ) {
// this happens in cases like 2uub
logger.warn("Found chains with null compounds while comparing interfaces {} and {}. Contact overlap score for them will be 0.",
this.getId(), other.getId());
return 0;
}
Pair thisCompounds = new Pair<>(thisChains.getFirst().getEntityInfo(), thisChains.getSecond().getEntityInfo());
Pair otherCompounds = new Pair<>(otherChains.getFirst().getEntityInfo(), otherChains.getSecond().getEntityInfo());
if (checkMolIdMatch(thisCompounds,otherCompounds)) {
int common = 0;
GroupContactSet thisContacts = getGroupContacts();
GroupContactSet otherContacts = other.getGroupContacts();
for (GroupContact thisContact:thisContacts) {
ResidueIdentifier first;
ResidueIdentifier second;
if (!invert) {
first = new ResidueIdentifier(thisContact.getPair().getFirst());
second = new ResidueIdentifier(thisContact.getPair().getSecond());
} else {
first = new ResidueIdentifier(thisContact.getPair().getSecond());
second = new ResidueIdentifier(thisContact.getPair().getFirst());
}
if (otherContacts.hasContact(first,second)) {
common++;
}
}
return (2.0*common)/(thisContacts.size()+otherContacts.size());
} else {
logger.debug("Chain pairs {},{} and {},{} belong to different compound pairs, contact overlap score will be 0 ",
thisChains.getFirst().getId(),thisChains.getSecond().getId(),
otherChains.getFirst().getId(),otherChains.getSecond().getId());
return 0.0;
}
}
/**
* This method check if two compounds have same MolIds or not.
* @param thisCompounds
* @param otherCompounds
* @return
*/
private boolean checkMolIdMatch(Pair thisCompounds, Pair otherCompounds){
boolean firstMatch = thisCompounds.getFirst().getMolId() == otherCompounds.getFirst().getMolId() && thisCompounds.getSecond().getMolId() == otherCompounds.getSecond().getMolId();
boolean secondMatch = thisCompounds.getFirst().getMolId() == otherCompounds.getSecond().getMolId() && thisCompounds.getSecond().getMolId() == otherCompounds.getFirst().getMolId();
return firstMatch || secondMatch;
}
public GroupContactSet getGroupContacts() {
if (groupContacts==null) {
this.groupContacts = new GroupContactSet(contacts);
}
return this.groupContacts;
}
/**
* Tell whether the interface is isologous, i.e. it is formed
* by the same patches of same entity on both sides.
*
* @return true if isologous, false if heterologous
*/
public boolean isIsologous() {
double scoreInverse = this.getContactOverlapScore(this, true);
logger.debug("Interface {} contact overlap score with itself inverted: {}",
getId(), scoreInverse);
return (scoreInverse>SELF_SCORE_FOR_ISOLOGOUS);
}
/**
* Finds the parent chains by looking up the references of first atom of each side of this interface
* @return
*/
public Pair getParentChains() {
Atom[] firstMol = this.molecules.getFirst();
Atom[] secondMol = this.molecules.getSecond();
if (firstMol.length==0 || secondMol.length==0) {
logger.warn("No atoms found in first or second molecule, can't get parent Chains");
return null;
}
return new Pair<>(firstMol[0].getGroup().getChain(), secondMol[0].getGroup().getChain());
}
/**
* Finds the parent entities by looking up the references of first atom of each side of this interface
* @return
*/
public Pair getParentCompounds() {
Pair chains = getParentChains();
if (chains == null) {
logger.warn("Could not find parents chains, compounds will be null");
return null;
}
return new Pair<>(chains.getFirst().getEntityInfo(), chains.getSecond().getEntityInfo());
}
private Structure getParentStructure() {
Atom[] firstMol = this.molecules.getFirst();
if (firstMol.length==0) {
logger.warn("No atoms found in first molecule, can't get parent Structure");
return null;
}
return firstMol[0].getGroup().getChain().getStructure();
}
/**
* Return a String representing the 2 molecules of this interface in PDB format.
* If the molecule ids (i.e. chain ids) are the same for both molecules, then the second
* one will be replaced by the next letter in alphabet (or A for Z)
* @return the PDB-formatted string
*/
public String toPDB() {
String molecId1 = getMoleculeIds().getFirst();
String molecId2 = getMoleculeIds().getSecond();
if (molecId2.equals(molecId1)) {
// if both chains are named equally we want to still named them differently in the output pdb file
// so that molecular viewers can handle properly the 2 chains as separate entities
char letter = molecId1.charAt(0);
if (letter!='Z' && letter!='z') {
molecId2 = Character.toString((char)(letter+1)); // i.e. next letter in alphabet
} else {
molecId2 = Character.toString((char)(letter-25)); //i.e. 'A' or 'a'
}
}
StringBuilder sb = new StringBuilder();
for (Atom atom:this.molecules.getFirst()) {
sb.append(FileConvert.toPDB(atom, molecId1));
}
sb.append("TER");
sb.append(System.getProperty("line.separator"));
for (Atom atom:this.molecules.getSecond()) {
sb.append(FileConvert.toPDB(atom,molecId2));
}
sb.append("TER");
sb.append(System.getProperty("line.separator"));
sb.append("END");
sb.append(System.getProperty("line.separator"));
return sb.toString();
}
/**
* Return a String representing the 2 molecules of this interface in mmCIF format.
* If the molecule ids (i.e. chain ids) are the same for both molecules, then the second
* one will be written as chainId_operatorId (with operatorId taken from {@link #getTransforms()}
* @return the mmCIF-formatted string
*/
public String toMMCIF() {
String molecId1 = getMoleculeIds().getFirst();
String molecId2 = getMoleculeIds().getSecond();
if (isSymRelated()) {
// if both chains are named equally we want to still named them differently in the output mmcif file
// so that molecular viewers can handle properly the 2 chains as separate entities
molecId2 = molecId2 + "_" + getTransforms().getSecond().getTransformId();
}
MmCifBlockBuilder mmCifBlockBuilder = CifBuilder.enterFile(StandardSchemata.MMCIF)
.enterBlock("BioJava_interface_" + getId());
// we reassign atom ids if sym related (otherwise atom ids would be duplicated and some molecular viewers can't cope with that)
int atomId = 1;
List wrappedAtoms = new ArrayList<>();
for (Atom atom : this.molecules.getFirst()) {
if (isSymRelated()) {
wrappedAtoms.add(new AbstractCifFileSupplier.WrappedAtom(1, molecId1, molecId1, atom, atomId));
} else {
wrappedAtoms.add(new AbstractCifFileSupplier.WrappedAtom(1, molecId1, molecId1, atom, atom.getPDBserial()));
}
atomId++;
}
for (Atom atom : this.molecules.getSecond()) {
if (isSymRelated()) {
wrappedAtoms.add(new AbstractCifFileSupplier.WrappedAtom(1, molecId2, molecId2, atom, atomId));
} else {
wrappedAtoms.add(new AbstractCifFileSupplier.WrappedAtom(1, molecId2, molecId2, atom, atom.getPDBserial()));
}
atomId++;
}
Category atomSite = wrappedAtoms.stream().collect(AbstractCifFileSupplier.toAtomSite());
mmCifBlockBuilder.addCategory(atomSite);
try {
return new String(CifIO.writeText(mmCifBlockBuilder.leaveBlock().leaveFile()));
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}
@Override
public int compareTo(StructureInterface o) {
// this will sort descending on interface areas
return (Double.compare(o.totalArea,this.totalArea));
}
@Override
public String toString() {
return String.format("StructureInterface %d (%s, %.0f A, <%s; %s>)", id, moleculeIds,totalArea,transforms.getFirst().toXYZString(),transforms.getSecond().toXYZString());
}
}