org.biojava.nbio.structure.xtal.CrystalBuilder Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of biojava-structure Show documentation
Show all versions of biojava-structure Show documentation
The protein structure modules of BioJava.
/*
* BioJava development code
*
* This code may be freely distributed and modified under the
* terms of the GNU Lesser General Public Licence. This should
* be distributed with the code. If you do not have a copy,
* see:
*
* http://www.gnu.org/copyleft/lesser.html
*
* Copyright for this code is held jointly by the individual
* authors. These should be listed in @author doc comments.
*
* For more information on the BioJava project and its aims,
* or to join the biojava-l mailing list, visit the home page
* at:
*
* http://www.biojava.org/
*
*/
package org.biojava.nbio.structure.xtal;
import org.biojava.nbio.structure.*;
import org.biojava.nbio.structure.contact.AtomContactSet;
import org.biojava.nbio.structure.contact.StructureInterface;
import org.biojava.nbio.structure.contact.StructureInterfaceList;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.vecmath.Matrix4d;
import javax.vecmath.Point3i;
import javax.vecmath.Vector3d;
import java.util.*;
/**
* A class containing methods to find interfaces in a given crystallographic Structure by
* reconstructing the crystal lattice through application of symmetry operators
*
* @author Jose Duarte
*
*/
public class CrystalBuilder {
public static final String NCS_CHAINID_SUFFIX_CHAR = "n";
// Default number of cell neighbors to try in interface search (in 3 directions of space).
// In the search, only bounding box overlaps are tried, thus there's not so much overhead in adding
// more cells. We actually tested it and using numCells from 1 to 10 didn't change runtimes at all.
// Examples with interfaces in distant neighbor cells:
// 2nd neighbors: 3hz3, 1wqj, 2de3, 1jcd
// 3rd neighbors: 3bd3, 1men, 2gkp, 1wui
// 5th neighbors: 2ahf, 2h2z
// 6th neighbors: 1was (in fact interfaces appear only at 5th neighbors for it)
// Maybe this could be avoided by previously translating the given molecule to the first cell,
// BUT! some bona fide cases exist, e.g. 2d3e: it is properly placed at the origin but the molecule
// is enormously long in comparison with the dimensions of the unit cell, some interfaces come at the 7th neighbor.
// After a scan of the whole PDB (Oct 2013) using numCells=50, the highest one was 4jgc with
// interfaces up to the 11th neighbor. Other high ones (9th neighbors) are 4jbm and 4k3t.
// We set the default value to 20 to be on the safe side. Runtime does not seem to be affected at all - JD 2020-01-12
// Some good examples in this posting in CCP4: https://www.jiscmail.ac.uk/cgi-bin/webadmin?A2=CCP4BB;45b2755d.2001
// in any case the 5m3h example in the posting seems to have contacts only up to the 11th neighbor.
public static final int DEF_NUM_CELLS = 20;
/**
* Default maximum distance between two chains to be considered an interface.
* @see #getUniqueInterfaces(double)
*/
public static final double DEFAULT_INTERFACE_DISTANCE_CUTOFF = 5.5;
public static final Matrix4d IDENTITY = new Matrix4d(1,0,0,0, 0,1,0,0, 0,0,1,0, 0,0,0,1);
/**
* Whether to consider HETATOMs in contact calculations
*/
private static final boolean INCLUDE_HETATOMS = true;
private Structure structure;
private PDBCrystallographicInfo crystallographicInfo;
private int numPolyChainsAu;
private int numOperatorsSg;
private Map chainNcsOps = null;
private Map chainOrigNames = null;
private static final Logger logger = LoggerFactory.getLogger(CrystalBuilder.class);
private int numCells;
private ArrayList visitedCrystalTransforms;
private Map> visitedNcsChainPairs = null;
private boolean searchBeyondAU;
private Matrix4d[] ops;
/**
* Special constructor for NCS-aware CrystalBuilder.
* The output list of interfaces will be pre-clustered by NCS-equivalence.
* Run {@link CrystalBuilder#expandNcsOps(Structure, Map, Map)} first to extend the AU
* and get the equivalence information.
* @param structure
* NCS-extended structure
* @param chainOrigNames
* chain names mapped to the original chain names (pre-NCS extension)
* @param chainNcsOps
* chain names mapped to the ncs operators that was used to generate them
* @since 5.0.0
*/
public CrystalBuilder(Structure structure, Map chainOrigNames, Map chainNcsOps) {
this(structure);
this.chainOrigNames = chainOrigNames;
this.chainNcsOps = chainNcsOps;
}
public CrystalBuilder(Structure structure) {
this.structure = structure;
this.crystallographicInfo = structure.getCrystallographicInfo();
this.numPolyChainsAu = structure.getPolyChains().size();
this.searchBeyondAU = false;
if (structure.isCrystallographic()) {
this.searchBeyondAU = true;
// we need to check space group not null for the cases where the entry is crystallographic but
// the space group is not a standard one recognized by biojava, e.g. 1mnk (SG: 'I 21')
if (this.crystallographicInfo.isNonStandardSg()) {
logger.warn("Space group is non-standard, will only calculate asymmetric unit interfaces.");
this.searchBeyondAU = false;
}
// just in case we still check for space group null (a user pdb file could potentially be crystallographic and have no space group)
if (this.crystallographicInfo.getSpaceGroup() == null) {
logger.warn("Space group is null, will only calculate asymmetric unit interfaces.");
this.searchBeyondAU = false;
}
// we need to check crystal cell not null for the rare cases where the entry is crystallographic but
// the crystal cell is not given, e.g. 2i68, 2xkm, 4bpq
if (this.crystallographicInfo.getCrystalCell() == null) {
logger.warn("Could not find a crystal cell definition, will only calculate asymmetric unit interfaces.");
this.searchBeyondAU = false;
}
// check for cases like 4hhb that are in a non-standard coordinate frame convention, see https://github.com/eppic-team/owl/issues/4
if (this.crystallographicInfo.isNonStandardCoordFrameConvention()) {
logger.warn("Non-standard coordinate frame convention, will only calculate asymmetric unit interfaces.");
this.searchBeyondAU = false;
}
}
if (this.searchBeyondAU) {
// explore the crystal
this.numOperatorsSg = this.crystallographicInfo.getSpaceGroup().getMultiplicity();
this.ops = this.crystallographicInfo.getTransformationsOrthonormal();
} else {
// look for contacts within structure as given
this.numOperatorsSg = 1;
this.ops = new Matrix4d[1];
this.ops[0] = new Matrix4d(IDENTITY);
}
this.numCells = DEF_NUM_CELLS;
}
/**
* @return true if this CrystalBuilder is NCS-aware.
* @since 5.0.0
*/
public boolean hasNcsOps() {
return chainNcsOps != null;
}
/**
* Set the number of neighboring crystal cells that will be used in the search for contacts
* @param numCells
*/
public void setNumCells(int numCells) {
this.numCells = numCells;
}
private void initialiseVisited() {
visitedCrystalTransforms = new ArrayList<>();
if(this.hasNcsOps()) {
visitedNcsChainPairs = new HashMap<>();
}
}
/**
* Returns the list of unique interfaces that the given Structure has upon
* generation of all crystal symmetry mates. An interface is defined as any pair of chains
* that contact, i.e. for which there is at least a pair of atoms (one from each chain) within
* the default cutoff distance.
* @return
* @see #DEFAULT_INTERFACE_DISTANCE_CUTOFF
*/
public StructureInterfaceList getUniqueInterfaces() {
return getUniqueInterfaces(DEFAULT_INTERFACE_DISTANCE_CUTOFF);
}
/**
* Returns the list of unique interfaces that the given Structure has upon
* generation of all crystal symmetry mates. An interface is defined as any pair of chains
* that contact, i.e. for which there is at least a pair of atoms (one from each chain) within
* the given cutoff distance.
* @param cutoff the distance cutoff for 2 chains to be considered in contact
* @return
*/
public StructureInterfaceList getUniqueInterfaces(double cutoff) {
StructureInterfaceList set = new StructureInterfaceList();
// certain structures in the PDB are not macromolecules (contain no polymeric chains at all), e.g. 1ao2
// with the current mmCIF parsing, those will be empty since purely non-polymeric chains are removed
// see commit e9562781f23da0ebf3547146a307d7edd5741090
if (numPolyChainsAu==0) {
logger.warn("No chains present in the structure! No interfaces will be calculated");
return set;
}
// pass the chainOrigNames map in NCS case so that StructureInterfaceList can deal with original to NCS chain names conversion
if (chainOrigNames!=null) {
set.setChainOrigNamesMap(chainOrigNames);
}
// initialising the visited ArrayList for keeping track of symmetry redundancy
initialiseVisited();
// the isCrystallographic() condition covers 3 cases:
// a) entries with expMethod X-RAY/other diffraction and defined crystalCell (most usual case)
// b) entries with expMethod null but defined crystalCell (e.g. PDB file with CRYST1 record but no expMethod annotation)
// c) entries with expMethod not X-RAY (e.g. NMR) and defined crystalCell (NMR entries do have a dummy CRYST1 record "1 1 1 90 90 90 P1")
// d) isCrystallographic will be false if the structure is crystallographic but the space group was not recognized
calcInterfacesCrystal(set, cutoff);
return set;
}
/**
* Calculate interfaces between original asymmetric unit and neighboring
* whole unit cells, including the original full unit cell i.e. i=0,j=0,k=0
* @param set
* @param cutoff
*/
private void calcInterfacesCrystal(StructureInterfaceList set, double cutoff) {
// initialising debugging vars
long start = -1;
long end = -1;
int trialCount = 0;
int skippedRedundant = 0;
int skippedAUsNoOverlap = 0;
int skippedChainsNoOverlap = 0;
int skippedSelfEquivalent = 0;
// The bounding boxes of all AUs of the unit cell
UnitCellBoundingBox bbGrid = new UnitCellBoundingBox(numOperatorsSg, numPolyChainsAu);;
// we calculate all the bounds of each of the asym units, those will then be reused and translated
bbGrid.setBbs(structure, ops, INCLUDE_HETATOMS);
// if not crystallographic there's no search to do in other cells, only chains within "AU" will be checked
if (!searchBeyondAU) numCells = 0;
boolean verbose = logger.isDebugEnabled();
if (verbose) {
trialCount = 0;
start= System.currentTimeMillis();
int neighbors = (2*numCells+1)*(2*numCells+1)*(2*numCells+1)-1;
int auTrials = (numPolyChainsAu*(numPolyChainsAu-1))/2;
int trials = numPolyChainsAu*numOperatorsSg*numPolyChainsAu*neighbors;
logger.debug("Chain clash trials within original AU: {}", auTrials);
logger.debug(
"Chain clash trials between the original AU and the neighbouring "+neighbors+
" whole unit cells ("+numCells+" neighbours)" +
"(2x"+numPolyChainsAu+"chains x "+numOperatorsSg+"AUs x "+neighbors+"cells) : "+trials);
logger.debug("Total trials: {}", (auTrials+trials));
}
List polyChains = structure.getPolyChains();
for (int a=-numCells;a<=numCells;a++) {
for (int b=-numCells;b<=numCells;b++) {
for (int c=-numCells;c<=numCells;c++) {
Point3i trans = new Point3i(a,b,c);
Vector3d transOrth = new Vector3d(a,b,c);
if (a!=0 || b!=0 || c!=0) {
// we avoid doing the transformation for 0,0,0 (in case it's not crystallographic)
this.crystallographicInfo.getCrystalCell().transfToOrthonormal(transOrth);
}
UnitCellBoundingBox bbGridTrans = bbGrid.getTranslatedBbs(transOrth);
for (int n=0;ni
// we set a flag and do that within the loop below
selfEquivalent = true;
}
StringBuilder builder = null;
if (verbose) builder = new StringBuilder(String.valueOf(tt)).append(" ");
// Now that we know that boxes overlap and operator is not redundant, we have to go to the details
int contactsFound = 0;
for (int j=0;ji)) {
// in case of self equivalency of the operator we can safely skip half of the matrix
skippedSelfEquivalent++;
continue;
}
// special case of original AU, we don't compare a chain to itself
if (n==0 && a==0 && b==0 && c==0 && i==j) continue;
// before calculating the AtomContactSet we check for overlap, then we save putting atoms into the grid
if (!bbGrid.getChainBoundingBox(0,i).overlaps(bbGridTrans.getChainBoundingBox(n,j),cutoff)) {
skippedChainsNoOverlap++;
if (verbose) {
builder.append(".");
}
continue;
}
trialCount++;
// finally we've gone through all short-cuts and the 2 chains seem to be close enough:
// we do the calculation of contacts
Chain chaini = polyChains.get(i);
Chain chainj = polyChains.get(j);
if (n!=0 || a!=0 || b!=0 || c!=0) {
Matrix4d mJCryst = new Matrix4d(ops[n]);
translate(mJCryst, transOrth);
chainj = (Chain)chainj.clone();
Calc.transform(chainj,mJCryst);
}
StructureInterface interf = calcContacts(chaini, chainj, cutoff, tt, builder);
if (interf == null) {
continue;
}
contactsFound++;
if(this.hasNcsOps()) {
StructureInterface interfNcsRef = findNcsRef(interf);
set.addNcsEquivalent(interf,interfNcsRef);
} else {
set.add(interf);
}
}
}
if( verbose ) {
if (a==0 && b==0 && c==0 && n==0)
builder.append(" "+contactsFound+"("+(numPolyChainsAu*(numPolyChainsAu-1))/2+")");
else if (selfEquivalent)
builder.append(" "+contactsFound+"("+(numPolyChainsAu*(numPolyChainsAu+1))/2+")");
else
builder.append(" "+contactsFound+"("+numPolyChainsAu*numPolyChainsAu+")");
logger.debug(builder.toString());
}
}
}
}
}
end = System.currentTimeMillis();
logger.debug("\n{} chain-chain clash trials done. Time {}{}s", trialCount, (end-start), 1000);
logger.debug(" skipped (not overlapping AUs) : {}", skippedAUsNoOverlap);
logger.debug(" skipped (not overlapping chains) : {}", skippedChainsNoOverlap);
logger.debug(" skipped (sym redundant op pairs) : {}", skippedRedundant);
logger.debug(" skipped (sym redundant self op) : {}", skippedSelfEquivalent);
logger.debug("Found {} interfaces.", set.size());
}
/**
* Checks whether given interface is NCS-redundant, i.e., an identical interface between NCS copies of
* these molecules has already been seen, and returns this (reference) interface.
*
* @param interf
* StructureInterface
* @return already seen interface that is NCS-equivalent to interf,
* null if such interface is not found.
*/
private StructureInterface findNcsRef(StructureInterface interf) {
if (!this.hasNcsOps()) {
return null;
}
String chainIName = interf.getMoleculeIds().getFirst();
String iOrigName = chainOrigNames.get(chainIName);
String chainJName = interf.getMoleculeIds().getSecond();
String jOrigName = chainOrigNames.get(chainJName);
Matrix4d mJCryst;
if(this.searchBeyondAU) {
mJCryst = interf.getTransforms().getSecond().getMatTransform();
mJCryst = crystallographicInfo.getCrystalCell().transfToOrthonormal(mJCryst);
} else {
mJCryst = IDENTITY;
}
// Let X1,...Xn be the original coords, before NCS transforms (M1...Mk)
// current chain i: M_i * X_i
// current chain j: Cn * M_j * X_j
// transformation to bring chain j near X_i: M_i^(-1) * Cn * M_j
// transformation to bring chain i near X_j: (Cn * M_j)^(-1) * M_i = (M_i^(-1) * Cn * M_j)^(-1)
Matrix4d mChainIInv = new Matrix4d(chainNcsOps.get(chainIName));
mChainIInv.invert();
Matrix4d mJNcs = new Matrix4d(chainNcsOps.get(chainJName));
Matrix4d j2iNcsOrigin = new Matrix4d(mChainIInv);
j2iNcsOrigin.mul(mJCryst);
//overall transformation to bring current chainj from its NCS origin to i's
j2iNcsOrigin.mul(mJNcs);
//overall transformation to bring current chaini from its NCS origin to j's
Matrix4d i2jNcsOrigin = new Matrix4d(j2iNcsOrigin);
i2jNcsOrigin.invert();
String matchChainIdsIJ = iOrigName + jOrigName;
String matchChainIdsJI = jOrigName + iOrigName;
// same original chain names
Optional matchDirect =
visitedNcsChainPairs.computeIfAbsent(matchChainIdsIJ, k-> new HashMap<>()).entrySet().stream().
map(r->r.getKey()).
filter(r->r.epsilonEquals(j2iNcsOrigin,0.01)).
findFirst();
Matrix4d matchMatrix = matchDirect.orElse(null);
String matchChainIds = matchChainIdsIJ;
if(matchMatrix == null) {
// reversed original chain names with inverted transform
Optional matchInverse =
visitedNcsChainPairs.computeIfAbsent(matchChainIdsJI, k-> new HashMap<>()).entrySet().stream().
map(r->r.getKey()).
filter(r->r.epsilonEquals(i2jNcsOrigin,0.01)).
findFirst();
matchMatrix = matchInverse.orElse(null);
matchChainIds = matchChainIdsJI;
}
StructureInterface matchInterface = null;
if (matchMatrix == null) {
visitedNcsChainPairs.get(matchChainIdsIJ).put(j2iNcsOrigin,interf);
} else {
matchInterface = visitedNcsChainPairs.get(matchChainIds).get(matchMatrix);
}
return matchInterface;
}
private StructureInterface calcContacts(Chain chaini, Chain chainj, double cutoff, CrystalTransform tt, StringBuilder builder) {
// note that we don't consider hydrogens when calculating contacts
AtomContactSet graph = StructureTools.getAtomsInContact(chaini, chainj, cutoff, INCLUDE_HETATOMS);
if (graph.size()>0) {
if (builder != null) builder.append("x");
CrystalTransform transf = new CrystalTransform(this.crystallographicInfo.getSpaceGroup());
StructureInterface interf = new StructureInterface(
StructureTools.getAllAtomArray(chaini), StructureTools.getAllAtomArray(chainj),
chaini.getName(), chainj.getName(),
graph,
transf, tt);
return interf;
} else {
if (builder != null) builder.append("o");
return null;
}
}
private void addVisitedTransform(CrystalTransform tt) {
visitedCrystalTransforms.add(tt);
}
/**
* Checks whether given transformId/translation is symmetry redundant
* Two transformations are symmetry redundant if their matrices (4d) multiplication gives the identity, i.e.
* if one is the inverse of the other.
* @param tt
* @return
*/
private boolean isRedundantTransform(CrystalTransform tt) {
Iterator it = visitedCrystalTransforms.iterator();
while (it.hasNext()) {
CrystalTransform v = it.next();
if (tt.isEquivalent(v)) {
logger.debug("Skipping redundant transformation: "+tt+", equivalent to "+v);
// there's only 1 possible equivalent partner for each visited matrix
// (since the equivalent is its inverse matrix and the inverse matrix is unique)
// thus once the partner has been seen, we don't need to check it ever again
it.remove();
return true;
}
}
return false;
}
public void translate(Matrix4d m, Vector3d translation) {
m.m03 = m.m03+translation.x;
m.m13 = m.m13+translation.y;
m.m23 = m.m23+translation.z;
}
/**
* Apply the NCS operators in the given Structure adding new chains as needed.
* All chains are (re)assigned ids of the form: original_chain_id+ncs_operator_index+{@value #NCS_CHAINID_SUFFIX_CHAR}.
* @param structure
* the structure to expand
* @param chainOrigNames
* new chain names mapped to the original chain names
* @param chainNcsOps
* new chain names mapped to the ncs operators that was used to generate them
* @since 5.0.0
*/
public static void expandNcsOps(Structure structure, Map chainOrigNames, Map chainNcsOps) {
PDBCrystallographicInfo xtalInfo = structure.getCrystallographicInfo();
if (xtalInfo ==null) return;
if (xtalInfo.getNcsOperators()==null || xtalInfo.getNcsOperators().length==0)
return;
List chainsToAdd = new ArrayList<>();
Matrix4d identity = new Matrix4d();
identity.setIdentity();
Matrix4d[] ncsOps = xtalInfo.getNcsOperators();
for (Chain c:structure.getChains()) {
String cOrigId = c.getId();
String cOrigName = c.getName();
for (int iOperator = 0; iOperator < ncsOps.length; iOperator++) {
Matrix4d m = ncsOps[iOperator];
Chain clonedChain = (Chain)c.clone();
String newChainId = cOrigId+(iOperator+1)+NCS_CHAINID_SUFFIX_CHAR;
String newChainName = cOrigName+(iOperator+1)+NCS_CHAINID_SUFFIX_CHAR;
clonedChain.setId(newChainId);
clonedChain.setName(newChainName);
setChainIdsInResidueNumbers(clonedChain, newChainName);
Calc.transform(clonedChain, m);
chainsToAdd.add(clonedChain);
c.getEntityInfo().addChain(clonedChain);
chainOrigNames.put(newChainName,cOrigName);
chainNcsOps.put(newChainName,m);
}
chainNcsOps.put(cOrigName,identity);
chainOrigNames.put(cOrigName,cOrigName);
}
chainsToAdd.forEach(structure::addChain);
}
/**
* Auxiliary method to reset chain ids of residue numbers in a chain.
* Used when cloning chains and resetting their ids: one needs to take care of
* resetting the ids within residue numbers too.
* @param c
* @param newChainName
*/
private static void setChainIdsInResidueNumbers(Chain c, String newChainName) {
for (Group g:c.getAtomGroups()) {
g.setResidueNumber(newChainName, g.getResidueNumber().getSeqNum(), g.getResidueNumber().getInsCode());
}
for (Group g:c.getSeqResGroups()) {
if (g.getResidueNumber()==null) continue;
g.setResidueNumber(newChainName, g.getResidueNumber().getSeqNum(), g.getResidueNumber().getInsCode());
}
}
}