All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.biojava.nbio.structure.contact.StructureInterface Maven / Gradle / Ivy

There is a newer version: 7.2.2
Show newest version
/*
 *                    BioJava development code
 *
 * This code may be freely distributed and modified under the
 * terms of the GNU Lesser General Public Licence.  This should
 * be distributed with the code.  If you do not have a copy,
 * see:
 *
 *      http://www.gnu.org/copyleft/lesser.html
 *
 * Copyright for this code is held jointly by the individual
 * authors.  These should be listed in @author doc comments.
 *
 * For more information on the BioJava project and its aims,
 * or to join the biojava-l mailing list, visit the home page
 * at:
 *
 *      http://www.biojava.org/
 *
 */
package org.biojava.nbio.structure.contact;

import org.biojava.nbio.structure.*;
import org.biojava.nbio.structure.asa.AsaCalculator;
import org.biojava.nbio.structure.asa.GroupAsa;
import org.biojava.nbio.structure.io.FileConvert;
import org.biojava.nbio.structure.io.FileParsingParameters;
import org.biojava.nbio.structure.io.mmcif.MMCIFFileTools;
import org.biojava.nbio.structure.io.mmcif.SimpleMMcifParser;
import org.biojava.nbio.structure.io.mmcif.chem.PolymerType;
import org.biojava.nbio.structure.io.mmcif.model.ChemComp;
import org.biojava.nbio.structure.xtal.CrystalTransform;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;


/**
 * An interface between 2 molecules (2 sets of atoms).
 *
 * @author duarte_j
 *
 */
public class StructureInterface implements Serializable, Comparable {

	private static final long serialVersionUID = 1L;

	private static final Logger logger = LoggerFactory.getLogger(StructureInterface.class);

	/**
	 * Interfaces with larger inverse self contact overlap score will be considered isologous
	 */
	private static final double SELF_SCORE_FOR_ISOLOGOUS = 0.3;

	private int id;
	private double totalArea;
	private AtomContactSet contacts;
	private GroupContactSet groupContacts;

	private Pair molecules;

	/**
	 * The identifier for each of the atom arrays (usually a chain identifier, i.e. a single capital letter)
	 * Serves to identify the molecules within the Asymmetric Unit of the crystal
	 */
	private Pair moleculeIds;

	/**
	 * The transformations (crystal operators) applied to each molecule (if applicable)
	 */
	private Pair transforms;

	private Map groupAsas1;
	private Map groupAsas2;

	private StructureInterfaceCluster cluster;

	/**
	 * Constructs a StructureInterface
	 * @param firstMolecule the atoms of the first molecule
	 * @param secondMolecule the atoms of the second molecule
	 * @param firstMoleculeId an identifier that identifies the first molecule within the Asymmetric Unit
	 * @param secondMoleculeId an identifier that identifies the second molecule within the Asymmetric Unit
	 * @param contacts the contacts between the 2 molecules
	 * @param firstTransf the transformation (crystal operator) applied to first molecule
	 * @param secondTransf the transformation (crystal operator) applied to second molecule
	 */
	public StructureInterface(
			Atom[] firstMolecule, Atom[] secondMolecule,
			String firstMoleculeId, String secondMoleculeId,
			AtomContactSet contacts,
			CrystalTransform firstTransf, CrystalTransform secondTransf) {

		this.molecules = new Pair(firstMolecule, secondMolecule);
		this.moleculeIds = new Pair(firstMoleculeId,secondMoleculeId);
		this.contacts = contacts;
		this.transforms = new Pair(firstTransf, secondTransf);
	}

	/**
	 * Constructs an empty StructureInterface
	 */
	public StructureInterface() {
		this.groupAsas1 = new TreeMap();
		this.groupAsas2 = new TreeMap();
	}

	public int getId() {
		return id;
	}

	public void setId(int id) {
		this.id = id;
	}

	/**
	 * Returns a pair of identifiers for each of the 2 member molecules that
	 * identify them uniquely in the crystal:
	 *   <molecule id (asym unit id)>+<operator id>+<crystal translation>
	 * @return
	 */
	public Pair getCrystalIds() {
		return new Pair(
			moleculeIds.getFirst()+transforms.getFirst().getTransformId()+transforms.getFirst().getCrystalTranslation(),
			moleculeIds.getSecond()+transforms.getSecond().getTransformId()+transforms.getSecond().getCrystalTranslation());
	}

	/**
	 * Returns the total area buried upon formation of this interface,
	 * defined as: 1/2[ (ASA1u-ASA1c) + (ASA2u-ASA2u) ] , with:
	 *  

ASAxu = ASA of first/second unbound chain

*

ASAxc = ASA of first/second complexed chain

* In the area calculation HETATOM groups not part of the main protein/nucleotide chain * are not included. * @return */ public double getTotalArea() { return totalArea; } public void setTotalArea(double totalArea) { this.totalArea = totalArea; } public AtomContactSet getContacts() { return contacts; } public void setContacts(AtomContactSet contacts) { this.contacts = contacts; } public Pair getMolecules() { return molecules; } public void setMolecules(Pair molecules) { this.molecules = molecules; } /** * Return the pair of identifiers identifying each of the 2 molecules of this interface * in the asymmetry unit (usually the chain identifier if this interface is between 2 chains) * @return */ public Pair getMoleculeIds() { return moleculeIds; } public void setMoleculeIds(Pair moleculeIds) { this.moleculeIds = moleculeIds; } /** * Return the 2 crystal transform operations performed on each of the * molecules of this interface. * @return */ public Pair getTransforms() { return transforms; } public void setTransforms(Pair transforms) { this.transforms = transforms; } protected void setAsas(double[] asas1, double[] asas2, int nSpherePoints, int nThreads, int cofactorSizeToUse) { Atom[] atoms = getAtomsForAsa(cofactorSizeToUse); AsaCalculator asaCalc = new AsaCalculator(atoms, AsaCalculator.DEFAULT_PROBE_SIZE, nSpherePoints, nThreads); double[] complexAsas = asaCalc.calculateAsas(); if (complexAsas.length!=asas1.length+asas2.length) throw new IllegalArgumentException("The size of ASAs of complex doesn't match that of ASAs 1 + ASAs 2"); groupAsas1 = new TreeMap(); groupAsas2 = new TreeMap(); this.totalArea = 0; for (int i=0;i atoms = new ArrayList(); for (Atom a:m){ if (a.getElement()==Element.H) continue; Group g = a.getGroup(); if (g.getType().equals(GroupType.HETATM) && !isInChain(g) && getSizeNoH(g) getFirstGroupAsas() { return groupAsas1; } /** * Gets the GroupAsa for the corresponding residue number of first chain * @param resNum * @return */ public GroupAsa getFirstGroupAsa(ResidueNumber resNum) { return groupAsas1.get(resNum); } public void setFirstGroupAsa(GroupAsa groupAsa) { groupAsas1.put(groupAsa.getGroup().getResidueNumber(), groupAsa); } /** * Gets a map of ResidueNumbers to GroupAsas for all groups of second chain. * @return */ public Map getSecondGroupAsas() { return groupAsas2; } public void setSecondGroupAsa(GroupAsa groupAsa) { groupAsas2.put(groupAsa.getGroup().getResidueNumber(), groupAsa); } /** * Gets the GroupAsa for the corresponding residue number of second chain * @param resNum * @return */ public GroupAsa getSecondGroupAsa(ResidueNumber resNum) { return groupAsas2.get(resNum); } /** * Returns the residues belonging to the interface core, defined as those residues at * the interface (BSA>0) and for which the BSA/ASA ratio is above the given bsaToAsaCutoff * @param bsaToAsaCutoff * @param minAsaForSurface the minimum ASA to consider a residue on the surface * @return */ public Pair> getCoreResidues(double bsaToAsaCutoff, double minAsaForSurface) { List core1 = new ArrayList(); List core2 = new ArrayList(); for (GroupAsa groupAsa:groupAsas1.values()) { if (groupAsa.getAsaU()>minAsaForSurface && groupAsa.getBsa()>0) { if (groupAsa.getBsaToAsaRatio()minAsaForSurface && groupAsa.getBsa()>0) { if (groupAsa.getBsaToAsaRatio()>(core1, core2); } /** * Returns the residues belonging to the interface rim, defined as those residues at * the interface (BSA>0) and for which the BSA/ASA ratio is below the given bsaToAsaCutoff * @param bsaToAsaCutoff * @param minAsaForSurface the minimum ASA to consider a residue on the surface * @return */ public Pair> getRimResidues(double bsaToAsaCutoff, double minAsaForSurface) { List rim1 = new ArrayList(); List rim2 = new ArrayList(); for (GroupAsa groupAsa:groupAsas1.values()) { if (groupAsa.getAsaU()>minAsaForSurface && groupAsa.getBsa()>0) { if (groupAsa.getBsaToAsaRatio()minAsaForSurface && groupAsa.getBsa()>0) { if (groupAsa.getBsaToAsaRatio()>(rim1, rim2); } /** * Returns the residues belonging to the interface, i.e. the residues * at the surface with BSA>0 * @param minAsaForSurface the minimum ASA to consider a residue on the surface * @return */ public Pair> getInterfacingResidues(double minAsaForSurface) { List interf1 = new ArrayList(); List interf2 = new ArrayList(); for (GroupAsa groupAsa:groupAsas1.values()) { if (groupAsa.getAsaU()>minAsaForSurface && groupAsa.getBsa()>0) { interf1.add(groupAsa.getGroup()); } } for (GroupAsa groupAsa:groupAsas2.values()) { if (groupAsa.getAsaU()>minAsaForSurface && groupAsa.getBsa()>0) { interf2.add(groupAsa.getGroup()); } } return new Pair>(interf1, interf2); } /** * Returns the residues belonging to the surface * @param minAsaForSurface the minimum ASA to consider a residue on the surface * @return */ public Pair> getSurfaceResidues(double minAsaForSurface) { List surf1 = new ArrayList(); List surf2 = new ArrayList(); for (GroupAsa groupAsa:groupAsas1.values()) { if (groupAsa.getAsaU()>minAsaForSurface) { surf1.add(groupAsa.getGroup()); } } for (GroupAsa groupAsa:groupAsas2.values()) { if (groupAsa.getAsaU()>minAsaForSurface) { surf2.add(groupAsa.getGroup()); } } return new Pair>(surf1, surf2); } public StructureInterfaceCluster getCluster() { return cluster; } public void setCluster(StructureInterfaceCluster cluster) { this.cluster = cluster; } /** * Calculates the contact overlap score between this StructureInterface and * the given one. * The two sides of the given StructureInterface need to match this StructureInterface * in the sense that they must come from the same Compound (Entity), i.e. * their residue numbers need to align with 100% identity, except for unobserved * density residues. The SEQRES indices obtained through {@link EntityInfo#getAlignedResIndex(Group, Chain)} are * used to match residues, thus if no SEQRES is present or if {@link FileParsingParameters#setAlignSeqRes(boolean)} * is not used, this calculation is not guaranteed to work properly. * @param other * @param invert if false the comparison will be done first-to-first and second-to-second, * if true the match will be first-to-second and second-to-first * @return the contact overlap score, range [0.0,1.0] */ public double getContactOverlapScore(StructureInterface other, boolean invert) { Structure thisStruct = getParentStructure(); Structure otherStruct = other.getParentStructure(); if (thisStruct!=otherStruct) { // in the current implementation, comparison between different structure doesn't make much sense // and won't even work since the compounds of both will never match. We warn because it // really is not what this is intended for at the moment logger.warn("Comparing interfaces from different structures, contact overlap score will be 0"); return 0; } Pair thisChains = getParentChains(); Pair otherChains = other.getParentChains(); if (thisChains.getFirst().getEntityInfo() == null || thisChains.getSecond().getEntityInfo() == null || otherChains.getFirst().getEntityInfo() == null || otherChains.getSecond().getEntityInfo() == null ) { // this happens in cases like 2uub logger.warn("Found chains with null compounds while comparing interfaces {} and {}. Contact overlap score for them will be 0.", this.getId(), other.getId()); return 0; } Pair thisCompounds = new Pair(thisChains.getFirst().getEntityInfo(), thisChains.getSecond().getEntityInfo()); Pair otherCompounds = new Pair(otherChains.getFirst().getEntityInfo(), otherChains.getSecond().getEntityInfo()); if ( ( (thisCompounds.getFirst() == otherCompounds.getFirst()) && (thisCompounds.getSecond() == otherCompounds.getSecond()) ) || ( (thisCompounds.getFirst() == otherCompounds.getSecond()) && (thisCompounds.getSecond() == otherCompounds.getFirst()) ) ) { int common = 0; GroupContactSet thisContacts = getGroupContacts(); GroupContactSet otherContacts = other.getGroupContacts(); for (GroupContact thisContact:thisContacts) { ResidueIdentifier first = null; ResidueIdentifier second = null; if (!invert) { first = new ResidueIdentifier(thisContact.getPair().getFirst()); second = new ResidueIdentifier(thisContact.getPair().getSecond()); } else { first = new ResidueIdentifier(thisContact.getPair().getSecond()); second = new ResidueIdentifier(thisContact.getPair().getFirst()); } if (otherContacts.hasContact(first,second)) { common++; } } return (2.0*common)/(thisContacts.size()+otherContacts.size()); } else { logger.debug("Chain pairs {},{} and {},{} belong to different compound pairs, contact overlap score will be 0 ", thisChains.getFirst().getChainID(),thisChains.getSecond().getChainID(), otherChains.getFirst().getChainID(),otherChains.getSecond().getChainID()); return 0.0; } } public GroupContactSet getGroupContacts() { if (groupContacts==null) { this.groupContacts = new GroupContactSet(contacts); } return this.groupContacts; } /** * Tell whether the interface is isologous, i.e. it is formed * by the same patches of same Compound on both sides. * * @return true if isologous, false if heterologous */ public boolean isIsologous() { double scoreInverse = this.getContactOverlapScore(this, true); logger.debug("Interface {} contact overlap score with itself inverted: {}", getId(), scoreInverse); return (scoreInverse>SELF_SCORE_FOR_ISOLOGOUS); } /** * Finds the parent chains by looking up the references of first atom of each side of this interface * @return */ public Pair getParentChains() { Atom[] firstMol = this.molecules.getFirst(); Atom[] secondMol = this.molecules.getSecond(); if (firstMol.length==0 || secondMol.length==0) { logger.warn("No atoms found in first or second molecule, can't get parent Chains"); return null; } return new Pair(firstMol[0].getGroup().getChain(), secondMol[0].getGroup().getChain()); } /** * Finds the parent compounds by looking up the references of first atom of each side of this interface * @return */ public Pair getParentCompounds() { Pair chains = getParentChains(); if (chains == null) { logger.warn("Could not find parents chains, compounds will be null"); return null; } return new Pair(chains.getFirst().getEntityInfo(), chains.getSecond().getEntityInfo()); } private Structure getParentStructure() { Atom[] firstMol = this.molecules.getFirst(); if (firstMol.length==0) { logger.warn("No atoms found in first molecule, can't get parent Structure"); return null; } return firstMol[0].getGroup().getChain().getStructure(); } /** * Return a String representing the 2 molecules of this interface in PDB format. * If the molecule ids (i.e. chain ids) are the same for both molecules, then the second * one will be replaced by the next letter in alphabet (or A for Z) * @return */ public String toPDB() { String molecId1 = getMoleculeIds().getFirst(); String molecId2 = getMoleculeIds().getSecond(); if (molecId2.equals(molecId1)) { // if both chains are named equally we want to still named them differently in the output pdb file // so that molecular viewers can handle properly the 2 chains as separate entities char letter = molecId1.charAt(0); if (letter!='Z' && letter!='z') { molecId2 = Character.toString((char)(letter+1)); // i.e. next letter in alphabet } else { molecId2 = Character.toString((char)(letter-25)); //i.e. 'A' or 'a' } } StringBuilder sb = new StringBuilder(); for (Atom atom:this.molecules.getFirst()) { sb.append(FileConvert.toPDB(atom, molecId1)); } sb.append("TER"); sb.append(System.getProperty("line.separator")); for (Atom atom:this.molecules.getSecond()) { sb.append(FileConvert.toPDB(atom,molecId2)); } sb.append("TER"); sb.append(System.getProperty("line.separator")); sb.append("END"); sb.append(System.getProperty("line.separator")); return sb.toString(); } /** * Return a String representing the 2 molecules of this interface in mmCIF format. * If the molecule ids (i.e. chain ids) are the same for both molecules, then the second * one will be written as chainId_operatorId (with operatorId taken from {@link #getTransforms()} * @return */ public String toMMCIF() { StringBuilder sb = new StringBuilder(); String molecId1 = getMoleculeIds().getFirst(); String molecId2 = getMoleculeIds().getSecond(); if (isSymRelated()) { // if both chains are named equally we want to still named them differently in the output mmcif file // so that molecular viewers can handle properly the 2 chains as separate entities molecId2 = molecId2 + "_" +getTransforms().getSecond().getTransformId(); } sb.append(SimpleMMcifParser.MMCIF_TOP_HEADER+"BioJava_interface_"+getId()+System.getProperty("line.separator")); sb.append(FileConvert.getAtomSiteHeader()); // we reassign atom ids if sym related (otherwise atom ids would be duplicated and some molecular viewers can't cope with that) int atomId = 1; List atomSites = new ArrayList(); for (Atom atom:this.molecules.getFirst()) { if (isSymRelated()) { atomSites.add(MMCIFFileTools.convertAtomToAtomSite(atom, 1, molecId1, molecId1, atomId)); } else { atomSites.add(MMCIFFileTools.convertAtomToAtomSite(atom, 1, molecId1, molecId1)); } atomId++; } for (Atom atom:this.molecules.getSecond()) { if (isSymRelated()) { atomSites.add(MMCIFFileTools.convertAtomToAtomSite(atom, 1, molecId2, molecId2, atomId)); } else { atomSites.add(MMCIFFileTools.convertAtomToAtomSite(atom, 1, molecId2, molecId2)); } atomId++; } sb.append(MMCIFFileTools.toMMCIF(atomSites)); return sb.toString(); } @Override public int compareTo(StructureInterface o) { // this will sort descending on interface areas return (Double.compare(o.totalArea,this.totalArea)); } @Override public String toString() { return String.format("StructureInterface %d (%s, %.0f A, <%s; %s>)", id, moleculeIds,totalArea,transforms.getFirst().toXYZString(),transforms.getSecond().toXYZString()); } }