All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.biojava.nbio.structure.contact.StructureInterface Maven / Gradle / Ivy

There is a newer version: 7.1.3
Show newest version
/*
 *                    BioJava development code
 *
 * This code may be freely distributed and modified under the
 * terms of the GNU Lesser General Public Licence.  This should
 * be distributed with the code.  If you do not have a copy,
 * see:
 *
 *      http://www.gnu.org/copyleft/lesser.html
 *
 * Copyright for this code is held jointly by the individual
 * authors.  These should be listed in @author doc comments.
 *
 * For more information on the BioJava project and its aims,
 * or to join the biojava-l mailing list, visit the home page
 * at:
 *
 *      http://www.biojava.org/
 *
 */
package org.biojava.nbio.structure.contact;

import org.biojava.nbio.structure.Atom;
import org.biojava.nbio.structure.Chain;
import org.biojava.nbio.structure.Element;
import org.biojava.nbio.structure.EntityInfo;
import org.biojava.nbio.structure.Group;
import org.biojava.nbio.structure.GroupType;
import org.biojava.nbio.structure.ResidueNumber;
import org.biojava.nbio.structure.Structure;
import org.biojava.nbio.structure.asa.AsaCalculator;
import org.biojava.nbio.structure.asa.GroupAsa;
import org.biojava.nbio.structure.chem.ChemComp;
import org.biojava.nbio.structure.chem.PolymerType;
import org.biojava.nbio.structure.io.FileConvert;
import org.biojava.nbio.structure.io.FileParsingParameters;
import org.biojava.nbio.structure.io.cif.AbstractCifFileSupplier;
import org.biojava.nbio.structure.xtal.CrystalTransform;
import org.rcsb.cif.CifBuilder;
import org.rcsb.cif.CifIO;
import org.rcsb.cif.model.Category;
import org.rcsb.cif.schema.StandardSchemata;
import org.rcsb.cif.schema.mm.MmCifBlockBuilder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.io.Serializable;
import java.io.UncheckedIOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;


/**
 * An interface between 2 molecules (2 sets of atoms).
 *
 * @author duarte_j
 *
 */
public class StructureInterface implements Serializable, Comparable {

	private static final long serialVersionUID = 1L;

	private static final Logger logger = LoggerFactory.getLogger(StructureInterface.class);

	/**
	 * Interfaces with larger inverse self contact overlap score will be considered isologous
	 */
	private static final double SELF_SCORE_FOR_ISOLOGOUS = 0.3;

	private int id;
	private double totalArea;
	private AtomContactSet contacts;
	private GroupContactSet groupContacts;

	private Pair molecules;

	/**
	 * The identifier for each of the atom arrays (usually a chain identifier, i.e. a single capital letter)
	 * Serves to identify the molecules within the Asymmetric Unit of the crystal
	 */
	private Pair moleculeIds;

	/**
	 * The transformations (crystal operators) applied to each molecule (if applicable)
	 */
	private Pair transforms;

	private Map groupAsas1;
	private Map groupAsas2;

	private StructureInterfaceCluster cluster;

	/**
	 * Constructs a StructureInterface
	 * @param firstMolecule the atoms of the first molecule
	 * @param secondMolecule the atoms of the second molecule
	 * @param firstMoleculeId an identifier that identifies the first molecule within the Asymmetric Unit
	 * @param secondMoleculeId an identifier that identifies the second molecule within the Asymmetric Unit
	 * @param contacts the contacts between the 2 molecules
	 * @param firstTransf the transformation (crystal operator) applied to first molecule
	 * @param secondTransf the transformation (crystal operator) applied to second molecule
	 */
	public StructureInterface(
			Atom[] firstMolecule, Atom[] secondMolecule,
			String firstMoleculeId, String secondMoleculeId,
			AtomContactSet contacts,
			CrystalTransform firstTransf, CrystalTransform secondTransf) {

		this.molecules = new Pair<>(firstMolecule, secondMolecule);
		this.moleculeIds = new Pair<>(firstMoleculeId,secondMoleculeId);
		this.contacts = contacts;
		this.transforms = new Pair<>(firstTransf, secondTransf);

		this.groupAsas1 = new TreeMap<>();
		this.groupAsas2 = new TreeMap<>();
	}

	/**
	 * Constructs an empty StructureInterface
	 */
	public StructureInterface() {
		this.groupAsas1 = new TreeMap<>();
		this.groupAsas2 = new TreeMap<>();
	}

	public int getId() {
		return id;
	}

	public void setId(int id) {
		this.id = id;
	}

	/**
	 * Returns a pair of identifiers for each of the 2 member molecules that
	 * identify them uniquely in the crystal:
	 *   <molecule id (asym unit id)>+<operator id>+<crystal translation>
	 * @return
	 */
	public Pair getCrystalIds() {
		return new Pair<>(
			moleculeIds.getFirst()+transforms.getFirst().getTransformId()+transforms.getFirst().getCrystalTranslation(),
			moleculeIds.getSecond()+transforms.getSecond().getTransformId()+transforms.getSecond().getCrystalTranslation());
	}

	/**
	 * Returns the total area buried upon formation of this interface,
	 * defined as: 1/2[ (ASA1u-ASA1c) + (ASA2u-ASA2u) ] , with:
	 *  

ASAxu = ASA of first/second unbound chain

*

ASAxc = ASA of first/second complexed chain

* In the area calculation HETATOM groups not part of the main protein/nucleotide chain * are not included. * @return */ public double getTotalArea() { return totalArea; } public void setTotalArea(double totalArea) { this.totalArea = totalArea; } public AtomContactSet getContacts() { return contacts; } public void setContacts(AtomContactSet contacts) { this.contacts = contacts; } public Pair getMolecules() { return molecules; } public void setMolecules(Pair molecules) { this.molecules = molecules; } /** * Return the pair of identifiers identifying each of the 2 molecules of this interface * in the asymmetry unit (usually the chain identifier if this interface is between 2 chains) * @return */ public Pair getMoleculeIds() { return moleculeIds; } public void setMoleculeIds(Pair moleculeIds) { this.moleculeIds = moleculeIds; } /** * Return the 2 crystal transform operations performed on each of the * molecules of this interface. * @return */ public Pair getTransforms() { return transforms; } public void setTransforms(Pair transforms) { this.transforms = transforms; } /** * Set ASA annotations by passing the uncomplexed ASA values of the 2 partners. * This will calculate complexed ASA and set the ASA values in the member variables. * @param asas1 ASA values for atoms of partner 1 * @param asas2 ASA values for atoms of partner 2 * @param nSpherePoints the number of sphere points to be used for complexed ASA calculation * @param nThreads the number of threads to be used for complexed ASA calculation * @param cofactorSizeToUse the minimum size of cofactor molecule (non-chain HET atoms) that will be used in ASA calculation */ void setAsas(double[] asas1, double[] asas2, int nSpherePoints, int nThreads, int cofactorSizeToUse) { Atom[] atoms = getAtomsForAsa(cofactorSizeToUse); AsaCalculator asaCalc = new AsaCalculator(atoms, AsaCalculator.DEFAULT_PROBE_SIZE, nSpherePoints, nThreads); double[] complexAsas = asaCalc.calculateAsas(); if (complexAsas.length!=asas1.length+asas2.length) throw new IllegalArgumentException("The size of ASAs of complex doesn't match that of ASAs 1 + ASAs 2"); groupAsas1 = new TreeMap<>(); groupAsas2 = new TreeMap<>(); this.totalArea = 0; for (int i=0;i atoms = new ArrayList<>(); for (Atom a:m){ if (a.getElement()==Element.H) continue; Group g = a.getGroup(); if (g.getType().equals(GroupType.HETATM) && !isInChain(g) && getSizeNoH(g) getFirstGroupAsas() { return groupAsas1; } /** * Gets the GroupAsa for the corresponding residue number of first chain * @param resNum * @return */ public GroupAsa getFirstGroupAsa(ResidueNumber resNum) { return groupAsas1.get(resNum); } public void setFirstGroupAsa(GroupAsa groupAsa) { groupAsas1.put(groupAsa.getGroup().getResidueNumber(), groupAsa); } public void setFirstGroupAsas(Map firstGroupAsas) { this.groupAsas1 = firstGroupAsas; } public void setSecondGroupAsas(Map secondGroupAsas) { this.groupAsas2 = secondGroupAsas; } /** * Gets a map of ResidueNumbers to GroupAsas for all groups of second chain. * @return */ public Map getSecondGroupAsas() { return groupAsas2; } public void setSecondGroupAsa(GroupAsa groupAsa) { groupAsas2.put(groupAsa.getGroup().getResidueNumber(), groupAsa); } /** * Gets the GroupAsa for the corresponding residue number of second chain * @param resNum * @return */ public GroupAsa getSecondGroupAsa(ResidueNumber resNum) { return groupAsas2.get(resNum); } /** * Returns the residues belonging to the interface core, defined as those residues at * the interface (BSA>0) and for which the BSA/ASA ratio is above the given bsaToAsaCutoff * @param bsaToAsaCutoff * @param minAsaForSurface the minimum ASA to consider a residue on the surface * @return */ public Pair> getCoreResidues(double bsaToAsaCutoff, double minAsaForSurface) { List core1 = new ArrayList<>(); List core2 = new ArrayList<>(); for (GroupAsa groupAsa:groupAsas1.values()) { if (groupAsa.getAsaU()>minAsaForSurface && groupAsa.getBsa()>0) { if (groupAsa.getBsaToAsaRatio()minAsaForSurface && groupAsa.getBsa()>0) { if (groupAsa.getBsaToAsaRatio()(core1, core2); } /** * Returns the residues belonging to the interface rim, defined as those residues at * the interface (BSA>0) and for which the BSA/ASA ratio is below the given bsaToAsaCutoff * @param bsaToAsaCutoff * @param minAsaForSurface the minimum ASA to consider a residue on the surface * @return */ public Pair> getRimResidues(double bsaToAsaCutoff, double minAsaForSurface) { List rim1 = new ArrayList<>(); List rim2 = new ArrayList<>(); for (GroupAsa groupAsa:groupAsas1.values()) { if (groupAsa.getAsaU()>minAsaForSurface && groupAsa.getBsa()>0) { if (groupAsa.getBsaToAsaRatio()minAsaForSurface && groupAsa.getBsa()>0) { if (groupAsa.getBsaToAsaRatio()(rim1, rim2); } /** * Returns the residues belonging to the interface, i.e. the residues * at the surface with BSA>0 * @param minAsaForSurface the minimum ASA to consider a residue on the surface * @return */ public Pair> getInterfacingResidues(double minAsaForSurface) { List interf1 = new ArrayList<>(); List interf2 = new ArrayList<>(); for (GroupAsa groupAsa:groupAsas1.values()) { if (groupAsa.getAsaU()>minAsaForSurface && groupAsa.getBsa()>0) { interf1.add(groupAsa.getGroup()); } } for (GroupAsa groupAsa:groupAsas2.values()) { if (groupAsa.getAsaU()>minAsaForSurface && groupAsa.getBsa()>0) { interf2.add(groupAsa.getGroup()); } } return new Pair<>(interf1, interf2); } /** * Returns the residues belonging to the surface * @param minAsaForSurface the minimum ASA to consider a residue on the surface * @return */ public Pair> getSurfaceResidues(double minAsaForSurface) { List surf1 = new ArrayList<>(); List surf2 = new ArrayList<>(); for (GroupAsa groupAsa:groupAsas1.values()) { if (groupAsa.getAsaU()>minAsaForSurface) { surf1.add(groupAsa.getGroup()); } } for (GroupAsa groupAsa:groupAsas2.values()) { if (groupAsa.getAsaU()>minAsaForSurface) { surf2.add(groupAsa.getGroup()); } } return new Pair<>(surf1, surf2); } public StructureInterfaceCluster getCluster() { return cluster; } public void setCluster(StructureInterfaceCluster cluster) { this.cluster = cluster; } /** * Calculates the Jaccard contact set score (intersection over union) between this StructureInterface and * the given one. The calculation assumes that both interfaces come from the same structure. The output * will not necessarily make sense if the two interfaces come from different structures. * The two sides of the given StructureInterface need to match this StructureInterface * in the sense that they must come from the same Entity, i.e. * their residue numbers need to align with 100% identity, except for unobserved * density residues. The SEQRES indices obtained through {@link EntityInfo#getAlignedResIndex(Group, Chain)} are * used to match residues, thus if no SEQRES is present or if {@link FileParsingParameters#setAlignSeqRes(boolean)} * is not used, this calculation is not guaranteed to work properly. * @param other the interface to be compared to this one * @param invert if false the comparison will be done first-to-first and second-to-second, * if true the match will be first-to-second and second-to-first * @return the contact overlap score, range [0.0,1.0] */ public double getContactOverlapScore(StructureInterface other, boolean invert) { Pair thisChains = getParentChains(); Pair otherChains = other.getParentChains(); if (thisChains.getFirst().getEntityInfo() == null || thisChains.getSecond().getEntityInfo() == null || otherChains.getFirst().getEntityInfo() == null || otherChains.getSecond().getEntityInfo() == null ) { // this happens in cases like 2uub logger.warn("Found chains with null compounds while comparing interfaces {} and {}. Contact overlap score for them will be 0.", this.getId(), other.getId()); return 0; } Pair thisCompounds = new Pair<>(thisChains.getFirst().getEntityInfo(), thisChains.getSecond().getEntityInfo()); Pair otherCompounds = new Pair<>(otherChains.getFirst().getEntityInfo(), otherChains.getSecond().getEntityInfo()); if (checkMolIdMatch(thisCompounds,otherCompounds)) { int common = 0; GroupContactSet thisContacts = getGroupContacts(); GroupContactSet otherContacts = other.getGroupContacts(); for (GroupContact thisContact:thisContacts) { ResidueIdentifier first; ResidueIdentifier second; if (!invert) { first = new ResidueIdentifier(thisContact.getPair().getFirst()); second = new ResidueIdentifier(thisContact.getPair().getSecond()); } else { first = new ResidueIdentifier(thisContact.getPair().getSecond()); second = new ResidueIdentifier(thisContact.getPair().getFirst()); } if (otherContacts.hasContact(first,second)) { common++; } } return (2.0*common)/(thisContacts.size()+otherContacts.size()); } else { logger.debug("Chain pairs {},{} and {},{} belong to different compound pairs, contact overlap score will be 0 ", thisChains.getFirst().getId(),thisChains.getSecond().getId(), otherChains.getFirst().getId(),otherChains.getSecond().getId()); return 0.0; } } /** * This method check if two compounds have same MolIds or not. * @param thisCompounds * @param otherCompounds * @return */ private boolean checkMolIdMatch(Pair thisCompounds, Pair otherCompounds){ boolean firstMatch = thisCompounds.getFirst().getMolId() == otherCompounds.getFirst().getMolId() && thisCompounds.getSecond().getMolId() == otherCompounds.getSecond().getMolId(); boolean secondMatch = thisCompounds.getFirst().getMolId() == otherCompounds.getSecond().getMolId() && thisCompounds.getSecond().getMolId() == otherCompounds.getFirst().getMolId(); return firstMatch || secondMatch; } public GroupContactSet getGroupContacts() { if (groupContacts==null) { this.groupContacts = new GroupContactSet(contacts); } return this.groupContacts; } /** * Tell whether the interface is isologous, i.e. it is formed * by the same patches of same entity on both sides. * * @return true if isologous, false if heterologous */ public boolean isIsologous() { double scoreInverse = this.getContactOverlapScore(this, true); logger.debug("Interface {} contact overlap score with itself inverted: {}", getId(), scoreInverse); return (scoreInverse>SELF_SCORE_FOR_ISOLOGOUS); } /** * Finds the parent chains by looking up the references of first atom of each side of this interface * @return */ public Pair getParentChains() { Atom[] firstMol = this.molecules.getFirst(); Atom[] secondMol = this.molecules.getSecond(); if (firstMol.length==0 || secondMol.length==0) { logger.warn("No atoms found in first or second molecule, can't get parent Chains"); return null; } return new Pair<>(firstMol[0].getGroup().getChain(), secondMol[0].getGroup().getChain()); } /** * Finds the parent entities by looking up the references of first atom of each side of this interface * @return */ public Pair getParentCompounds() { Pair chains = getParentChains(); if (chains == null) { logger.warn("Could not find parents chains, compounds will be null"); return null; } return new Pair<>(chains.getFirst().getEntityInfo(), chains.getSecond().getEntityInfo()); } private Structure getParentStructure() { Atom[] firstMol = this.molecules.getFirst(); if (firstMol.length==0) { logger.warn("No atoms found in first molecule, can't get parent Structure"); return null; } return firstMol[0].getGroup().getChain().getStructure(); } /** * Return a String representing the 2 molecules of this interface in PDB format. * If the molecule ids (i.e. chain ids) are the same for both molecules, then the second * one will be replaced by the next letter in alphabet (or A for Z) * @return the PDB-formatted string */ public String toPDB() { String molecId1 = getMoleculeIds().getFirst(); String molecId2 = getMoleculeIds().getSecond(); if (molecId2.equals(molecId1)) { // if both chains are named equally we want to still named them differently in the output pdb file // so that molecular viewers can handle properly the 2 chains as separate entities char letter = molecId1.charAt(0); if (letter!='Z' && letter!='z') { molecId2 = Character.toString((char)(letter+1)); // i.e. next letter in alphabet } else { molecId2 = Character.toString((char)(letter-25)); //i.e. 'A' or 'a' } } StringBuilder sb = new StringBuilder(); for (Atom atom:this.molecules.getFirst()) { sb.append(FileConvert.toPDB(atom, molecId1)); } sb.append("TER"); sb.append(System.getProperty("line.separator")); for (Atom atom:this.molecules.getSecond()) { sb.append(FileConvert.toPDB(atom,molecId2)); } sb.append("TER"); sb.append(System.getProperty("line.separator")); sb.append("END"); sb.append(System.getProperty("line.separator")); return sb.toString(); } /** * Return a String representing the 2 molecules of this interface in mmCIF format. * If the molecule ids (i.e. chain ids) are the same for both molecules, then the second * one will be written as chainId_operatorId (with operatorId taken from {@link #getTransforms()} * @return the mmCIF-formatted string */ public String toMMCIF() { String molecId1 = getMoleculeIds().getFirst(); String molecId2 = getMoleculeIds().getSecond(); if (isSymRelated()) { // if both chains are named equally we want to still named them differently in the output mmcif file // so that molecular viewers can handle properly the 2 chains as separate entities molecId2 = molecId2 + "_" + getTransforms().getSecond().getTransformId(); } MmCifBlockBuilder mmCifBlockBuilder = CifBuilder.enterFile(StandardSchemata.MMCIF) .enterBlock("BioJava_interface_" + getId()); // we reassign atom ids if sym related (otherwise atom ids would be duplicated and some molecular viewers can't cope with that) int atomId = 1; List wrappedAtoms = new ArrayList<>(); for (Atom atom : this.molecules.getFirst()) { if (isSymRelated()) { wrappedAtoms.add(new AbstractCifFileSupplier.WrappedAtom(1, molecId1, molecId1, atom, atomId)); } else { wrappedAtoms.add(new AbstractCifFileSupplier.WrappedAtom(1, molecId1, molecId1, atom, atom.getPDBserial())); } atomId++; } for (Atom atom : this.molecules.getSecond()) { if (isSymRelated()) { wrappedAtoms.add(new AbstractCifFileSupplier.WrappedAtom(1, molecId2, molecId2, atom, atomId)); } else { wrappedAtoms.add(new AbstractCifFileSupplier.WrappedAtom(1, molecId2, molecId2, atom, atom.getPDBserial())); } atomId++; } Category atomSite = wrappedAtoms.stream().collect(AbstractCifFileSupplier.toAtomSite()); mmCifBlockBuilder.addCategory(atomSite); try { return new String(CifIO.writeText(mmCifBlockBuilder.leaveBlock().leaveFile())); } catch (IOException e) { throw new UncheckedIOException(e); } } @Override public int compareTo(StructureInterface o) { // this will sort descending on interface areas return (Double.compare(o.totalArea,this.totalArea)); } @Override public String toString() { return String.format("StructureInterface %d (%s, %.0f A, <%s; %s>)", id, moleculeIds,totalArea,transforms.getFirst().toXYZString(),transforms.getSecond().toXYZString()); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy