All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.biojava.nbio.structure.xtal.CrystalBuilder Maven / Gradle / Ivy

There is a newer version: 7.2.2
Show newest version
/*
 *                    BioJava development code
 *
 * This code may be freely distributed and modified under the
 * terms of the GNU Lesser General Public Licence.  This should
 * be distributed with the code.  If you do not have a copy,
 * see:
 *
 *      http://www.gnu.org/copyleft/lesser.html
 *
 * Copyright for this code is held jointly by the individual
 * authors.  These should be listed in @author doc comments.
 *
 * For more information on the BioJava project and its aims,
 * or to join the biojava-l mailing list, visit the home page
 * at:
 *
 *      http://www.biojava.org/
 *
 */
package org.biojava.nbio.structure.xtal;


import org.biojava.nbio.structure.*;
import org.biojava.nbio.structure.contact.AtomContactSet;
import org.biojava.nbio.structure.contact.StructureInterface;
import org.biojava.nbio.structure.contact.StructureInterfaceList;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.vecmath.Matrix4d;
import javax.vecmath.Point3i;
import javax.vecmath.Vector3d;
import java.util.ArrayList;
import java.util.Iterator;



/**
 * A class containing methods to find interfaces in a given crystallographic Structure by
 * reconstructing the crystal lattice through application of symmetry operators
 *
 * @author duarte_j
 *
 */
public class CrystalBuilder {

	// Default number of cell neighbors to try in interface search (in 3 directions of space).
	// In the search, only bounding box overlaps are tried, thus there's not so much overhead in adding
	// more cells. We actually tested it and using numCells from 1 to 10 didn't change runtimes at all.
	// Examples with interfaces in distant neighbor cells:
	//   2nd neighbors: 3hz3, 1wqj, 2de3, 1jcd
	//   3rd neighbors: 3bd3, 1men, 2gkp, 1wui
	//   5th neighbors: 2ahf, 2h2z
	//   6th neighbors: 1was (in fact interfaces appear only at 5th neighbors for it)
	// Maybe this could be avoided by previously translating the given molecule to the first cell,
	// BUT! some bona fide cases exist, e.g. 2d3e: it is properly placed at the origin but the molecule
	// is enormously long in comparison with the dimensions of the unit cell, some interfaces come at the 7th neighbor.
	// After a scan of the whole PDB (Oct 2013) using numCells=50, the highest one was 4jgc with
	// interfaces up to the 11th neighbor. Other high ones (9th neighbors) are 4jbm and 4k3t.
	// We set the default value to 12 based on that (having not seen any difference in runtime)
	public static final int DEF_NUM_CELLS = 12;

	/**
	 * Default maximum distance between two chains to be considered an interface.
	 * @see #getUniqueInterfaces(double)
	 */
	public static final double DEFAULT_INTERFACE_DISTANCE_CUTOFF = 5.5;

	public static final Matrix4d IDENTITY = new Matrix4d(1,0,0,0, 0,1,0,0, 0,0,1,0, 0,0,0,1);


	/**
	 * Whether to consider HETATOMs in contact calculations
	 */
	private static final boolean INCLUDE_HETATOMS = true;

	private Structure structure;
	private PDBCrystallographicInfo crystallographicInfo;
	private int numChainsAu;
	private int numOperatorsSg;

	private static final Logger logger = LoggerFactory.getLogger(CrystalBuilder.class);

	private int numCells;

	private ArrayList visited;

	private boolean isCrystallographic;



	public CrystalBuilder(Structure structure) {
		this.structure = structure;

		this.crystallographicInfo = structure.getCrystallographicInfo();

		this.numChainsAu = structure.getChains().size();
		this.numOperatorsSg = 1;


		if (structure.isCrystallographic()) {

			this.isCrystallographic = true;
			// we need to check space group not null for the cases where the entry is crystallographic but
			// the space group is not a standard one recognized by biojava, e.g. 1mnk (SG: 'I 21')
			if (this.crystallographicInfo.getSpaceGroup()==null) {
				logger.warn("Could not find a space group, will only calculate asymmetric unit interfaces.");
				this.isCrystallographic = false;
			} else {
				this.numOperatorsSg = this.crystallographicInfo.getSpaceGroup().getMultiplicity();
			}
			// we need to check crystal cell not null for the rare cases where the entry is crystallographic but
			// the crystal cell is not given, e.g. 2i68, 2xkm, 4bpq
			if (this.crystallographicInfo.getCrystalCell()==null) {
				logger.warn("Could not find a crystal cell definition, will only calculate asymmetric unit interfaces.");
				this.isCrystallographic = false;
			}

		} else {
			this.isCrystallographic = false;
		}

		this.numCells = DEF_NUM_CELLS;

	}

	/**
	 * Set the number of neighboring crystal cells that will be used in the search for contacts
	 * @param numCells
	 */
	public void setNumCells(int numCells) {
		this.numCells = numCells;
	}

	private void initialiseVisited() {
		visited = new ArrayList();
	}



	/**
	 * Returns the list of unique interfaces that the given Structure has upon
	 * generation of all crystal symmetry mates. An interface is defined as any pair of chains
	 * that contact, i.e. for which there is at least a pair of atoms (one from each chain) within
	 * the default cutoff distance.
	 * @return
	 * @see #DEFAULT_INTERFACE_DISTANCE_CUTOFF
	 */
	public StructureInterfaceList getUniqueInterfaces() {
		return getUniqueInterfaces(DEFAULT_INTERFACE_DISTANCE_CUTOFF);
	}

	/**
	 * Returns the list of unique interfaces that the given Structure has upon
	 * generation of all crystal symmetry mates. An interface is defined as any pair of chains
	 * that contact, i.e. for which there is at least a pair of atoms (one from each chain) within
	 * the given cutoff distance.
	 * @param cutoff the distance cutoff for 2 chains to be considered in contact
	 * @return
	 */
	public StructureInterfaceList getUniqueInterfaces(double cutoff) {


		StructureInterfaceList set = new StructureInterfaceList();

		// certain structures in the PDB are not macromolecules (contain no polymeric chains at all), e.g. 1ao2
		// with the current mmCIF parsing, those will be empty since purely non-polymeric chains are removed
		// see commit e9562781f23da0ebf3547146a307d7edd5741090
		if (structure.getChains().size()==0) {
			logger.warn("No chains present in the structure! No interfaces will be calculated");
			return set;
		}



		// initialising the visited ArrayList for keeping track of symmetry redundancy
		initialiseVisited();



		// the isCrystallographic() condition covers 3 cases:
		// a) entries with expMethod X-RAY/other diffraction and defined crystalCell (most usual case)
		// b) entries with expMethod null but defined crystalCell (e.g. PDB file with CRYST1 record but no expMethod annotation)
		// c) entries with expMethod not X-RAY (e.g. NMR) and defined crystalCell (NMR entries do have a dummy CRYST1 record "1 1 1 90 90 90 P1")
		// d) isCrystallographic will be false if the structure is crystallographic but the space group was not recognized


		calcInterfacesCrystal(set, cutoff);


		return set;
	}

	/**
	 * Calculate interfaces between original asymmetric unit and neighboring
	 * whole unit cells, including the original full unit cell i.e. i=0,j=0,k=0
	 * @param set
	 * @param cutoff
	 */
	private void calcInterfacesCrystal(StructureInterfaceList set, double cutoff) {


		// initialising debugging vars
		long start = -1;
		long end = -1;
		int trialCount = 0;
		int skippedRedundant = 0;
		int skippedAUsNoOverlap = 0;
		int skippedChainsNoOverlap = 0;
		int skippedSelfEquivalent = 0;


		Matrix4d[] ops = null;
		if (isCrystallographic) {
			ops = structure.getCrystallographicInfo().getTransformationsOrthonormal();
		} else {
			ops = new Matrix4d[1];
			ops[0] = new Matrix4d(IDENTITY);
		}

		// The bounding boxes of all AUs of the unit cell
		UnitCellBoundingBox bbGrid = new UnitCellBoundingBox(numOperatorsSg, numChainsAu);;
		// we calculate all the bounds of each of the asym units, those will then be reused and translated
		bbGrid.setBbs(structure, ops, INCLUDE_HETATOMS);


		// if not crystallographic there's no search to do in other cells, only chains within "AU" will be checked
		if (!isCrystallographic) numCells = 0;

		boolean verbose = logger.isDebugEnabled();

		if (verbose) {
			trialCount = 0;
			start= System.currentTimeMillis();
			int neighbors = (2*numCells+1)*(2*numCells+1)*(2*numCells+1)-1;
			int auTrials = (numChainsAu*(numChainsAu-1))/2;
			int trials = numChainsAu*numOperatorsSg*numChainsAu*neighbors;
			logger.debug("Chain clash trials within original AU: "+auTrials);
			logger.debug(
					"Chain clash trials between the original AU and the neighbouring "+neighbors+
					" whole unit cells ("+numCells+" neighbours)" +
					"(2x"+numChainsAu+"chains x "+numOperatorsSg+"AUs x "+neighbors+"cells) : "+trials);
			logger.debug("Total trials: "+(auTrials+trials));
		}


		for (int a=-numCells;a<=numCells;a++) {
			for (int b=-numCells;b<=numCells;b++) {
				for (int c=-numCells;c<=numCells;c++) {

					Point3i trans = new Point3i(a,b,c);
					Vector3d transOrth = new Vector3d(a,b,c);
					if (a!=0 || b!=0 || c!=0)
						// we avoid doing the transformation for 0,0,0 (in case it's not crystallographic)
						this.crystallographicInfo.getCrystalCell().transfToOrthonormal(transOrth);
					UnitCellBoundingBox bbGridTrans = bbGrid.getTranslatedBbs(transOrth);

					for (int n=0;ni
							// we set a flag and do that within the loop below
							selfEquivalent = true;
						}

						StringBuilder builder = null;
						if (verbose) builder = new StringBuilder(tt+" ");

						// Now that we know that boxes overlap and operator is not redundant, we have to go to the details
						int contactsFound = 0;

						for (int j=0;ji)) {
									// in case of self equivalency of the operator we can safely skip half of the matrix
									skippedSelfEquivalent++;
									continue;
								}
								// special case of original AU, we don't compare a chain to itself
								if (n==0 && a==0 && b==0 && c==0 && i==j) continue;

								// before calculating the AtomContactSet we check for overlap, then we save putting atoms into the grid
								if (!bbGrid.getChainBoundingBox(0,i).overlaps(bbGridTrans.getChainBoundingBox(n,j),cutoff)) {
									skippedChainsNoOverlap++;
									if (verbose) {
										builder.append(".");
									}
									continue;
								}
								trialCount++;

								// finally we've gone through all short-cuts and the 2 chains seem to be close enough:
								// we do the calculation of contacts
								Chain chainj = null;
								Chain chaini = structure.getChain(i);

								if (n==0 && a==0 && b==0 && c==0) {
									chainj = structure.getChain(j);
								} else {
									chainj = (Chain)structure.getChain(j).clone();
									Matrix4d m = new Matrix4d(ops[n]);
									translate(m, transOrth);
									Calc.transform(chainj,m);
								}

								StructureInterface interf = calcContacts(chaini, chainj, cutoff, tt, builder);

								if (interf!=null) {
									contactsFound++;
									set.add(interf);
								}
							}
						}

						if( verbose ) {
							if (a==0 && b==0 && c==0 && n==0)
								builder.append(" "+contactsFound+"("+(numChainsAu*(numChainsAu-1))/2+")");
							else if (selfEquivalent)
								builder.append(" "+contactsFound+"("+(numChainsAu*(numChainsAu+1))/2+")");
							else
								builder.append(" "+contactsFound+"("+numChainsAu*numChainsAu+")");

							logger.debug(builder.toString());
						}
					}
				}
			}
		}

		end = System.currentTimeMillis();
		logger.debug("\n"+trialCount+" chain-chain clash trials done. Time "+(end-start)/1000+"s");
		logger.debug("  skipped (not overlapping AUs)       : "+skippedAUsNoOverlap);
		logger.debug("  skipped (not overlapping chains)    : "+skippedChainsNoOverlap);
		logger.debug("  skipped (sym redundant op pairs)    : "+skippedRedundant);
		logger.debug("  skipped (sym redundant self op)     : "+skippedSelfEquivalent);

		logger.debug("Found "+set.size()+" interfaces.");
	}

	private StructureInterface calcContacts(Chain chaini, Chain chainj, double cutoff, CrystalTransform tt, StringBuilder builder) {
		// note that we don't consider hydrogens when calculating contacts
		AtomContactSet graph = StructureTools.getAtomsInContact(chaini, chainj, cutoff, INCLUDE_HETATOMS);

		if (graph.size()>0) {
			if (builder != null) builder.append("x");

			CrystalTransform transf = new CrystalTransform(this.crystallographicInfo.getSpaceGroup());
			StructureInterface interf = new StructureInterface(
					StructureTools.getAllAtomArray(chaini), StructureTools.getAllAtomArray(chainj),
					chaini.getChainID(), chainj.getChainID(),
					graph,
					transf, tt);

			return interf;

		} else {
			if (builder != null) builder.append("o");
			return null;
		}
	}

	private void addVisited(CrystalTransform tt) {
		visited.add(tt);
	}

	/**
	 * Checks whether given transformId/translation is symmetry redundant
	 * Two transformations are symmetry redundant if their matrices (4d) multiplication gives the identity, i.e.
	 * if one is the inverse of the other.
	 * @param tt
	 * @return
	 */
	private boolean isRedundant(CrystalTransform tt) {

		Iterator it = visited.iterator();
		while (it.hasNext()) {
			CrystalTransform v = it.next();

			if (tt.isEquivalent(v)) {

				logger.debug("Skipping redundant transformation: "+tt+", equivalent to "+v);

				// there's only 1 possible equivalent partner for each visited matrix
				// (since the equivalent is its inverse matrix and the inverse matrix is unique)
				// thus once the partner has been seen, we don't need to check it ever again
				it.remove();

				return true;
			}
		}

		return false;
	}

	public void translate(Matrix4d m, Vector3d translation) {
		m.m03 = m.m03+translation.x;
		m.m13 = m.m13+translation.y;
		m.m23 = m.m23+translation.z;
	}

//	/**
//	 * If NCS operators are given in MTRIX records, a bigger AU has to be constructed based on those.
//	 * Later they have to be removed with {@link #removeExtraChains()}
//	 */
//	private void constructFullStructure() {
//
//		if (this.crystallographicInfo.getNcsOperators()==null ||
//			this.crystallographicInfo.getNcsOperators().length==0) {
//			// normal case: nothing to do
//			return;
//		}
//
//		// first we store the original chains in a new list to be able to restore the structure to its original state afterwards
//		origChains = new ArrayList();
//		for (Chain chain:structure.getChains()) {
//			origChains.add(chain);
//		}
//
//		// if we are here, it means that the NCS operators exist and we have to complete the given AU by applying them
//		Matrix4d[] ncsOps = this.crystallographicInfo.getNcsOperators();
//
//		if (verbose)
//			System.out.println(ncsOps.length+" NCS operators found, generating new AU...");
//
//
//		for (int i=0;i




© 2015 - 2025 Weber Informatics LLC | Privacy Policy