All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.biojava.nbio.structure.asa.AsaCalculator Maven / Gradle / Ivy

There is a newer version: 7.1.3
Show newest version
/*
 *                    BioJava development code
 *
 * This code may be freely distributed and modified under the
 * terms of the GNU Lesser General Public Licence.  This should
 * be distributed with the code.  If you do not have a copy,
 * see:
 *
 *      http://www.gnu.org/copyleft/lesser.html
 *
 * Copyright for this code is held jointly by the individual
 * authors.  These should be listed in @author doc comments.
 *
 * For more information on the BioJava project and its aims,
 * or to join the biojava-l mailing list, visit the home page
 * at:
 *
 *      http://www.biojava.org/
 *
 */
package org.biojava.nbio.structure.asa;

import org.biojava.nbio.structure.*;
import org.biojava.nbio.structure.contact.Contact;
import org.biojava.nbio.structure.contact.Grid;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.vecmath.Point3d;
import javax.vecmath.Vector3d;
import java.util.*;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;


/**
 * Class to calculate Accessible Surface Areas based on
 * the rolling ball algorithm by Shrake and Rupley.
 * 

* The code is adapted from a python implementation at http://boscoh.com/protein/asapy * (now source is available at https://github.com/boscoh/asa). * Thanks to Bosco K. Ho for a great piece of code and for his fantastic blog. *

* A few optimizations come from Eisenhaber et al, J Comp Chemistry 1994 * (https://onlinelibrary.wiley.com/doi/epdf/10.1002/jcc.540160303) *

* See * Shrake, A., and J. A. Rupley. "Environment and Exposure to Solvent of Protein Atoms. * Lysozyme and Insulin." JMB (1973) 79:351-371. * Lee, B., and Richards, F.M. "The interpretation of Protein Structures: Estimation of * Static Accessibility" JMB (1971) 55:379-400 * * @author Jose Duarte */ public class AsaCalculator { private static final Logger logger = LoggerFactory.getLogger(AsaCalculator.class); /** * The default value for number of sphere points to sample. * See this paper for a nice study on the effect of this parameter: https://f1000research.com/articles/5-189/v1 */ public static final int DEFAULT_N_SPHERE_POINTS = 1000; public static final double DEFAULT_PROBE_SIZE = 1.4; public static final int DEFAULT_NTHREADS = 1; private static final boolean DEFAULT_USE_SPATIAL_HASHING = true; // Chothia's amino acid atoms vdw radii public static final double TRIGONAL_CARBON_VDW = 1.76; public static final double TETRAHEDRAL_CARBON_VDW = 1.87; public static final double TRIGONAL_NITROGEN_VDW = 1.65; public static final double TETRAHEDRAL_NITROGEN_VDW = 1.50; public static final double SULFUR_VDW = 1.85; public static final double OXIGEN_VDW = 1.40; // Chothia's nucleotide atoms vdw radii public static final double NUC_CARBON_VDW = 1.80; public static final double NUC_NITROGEN_VDW = 1.60; public static final double PHOSPHOROUS_VDW = 1.90; private class AsaCalcWorker implements Runnable { private final int i; private final double[] asas; private AsaCalcWorker(int i, double[] asas) { this.i = i; this.asas = asas; } @Override public void run() { asas[i] = calcSingleAsa(i); } } static class IndexAndDistance { final int index; final double dist; IndexAndDistance(int index, double dist) { this.index = index; this.dist = dist; } } private final Point3d[] atomCoords; private final Atom[] atoms; private final double[] radii; private final double probe; private final int nThreads; private Vector3d[] spherePoints; private double cons; private IndexAndDistance[][] neighborIndices; private boolean useSpatialHashingForNeighbors; /** * Constructs a new AsaCalculator. Subsequently call {@link #calculateAsas()} * or {@link #getGroupAsas()} to calculate the ASAs * Only non-Hydrogen atoms are considered in the calculation. * @param structure the structure, all non-H atoms will be used * @param probe the probe size * @param nSpherePoints the number of points to be used in generating the spherical * dot-density, the more points the more accurate (and slower) calculation * @param nThreads the number of parallel threads to use for the calculation * @param hetAtoms if true HET residues are considered, if false they aren't, equivalent to * NACCESS' -h option */ public AsaCalculator(Structure structure, double probe, int nSpherePoints, int nThreads, boolean hetAtoms) { this.atoms = StructureTools.getAllNonHAtomArray(structure, hetAtoms); this.atomCoords = Calc.atomsToPoints(atoms); this.probe = probe; this.nThreads = nThreads; this.useSpatialHashingForNeighbors = DEFAULT_USE_SPATIAL_HASHING; // initialising the radii by looking them up through AtomRadii radii = new double[atomCoords.length]; for (int i=0;i asas = new TreeMap<>(); double[] asasPerAtom = calculateAsas(); for (int i=0;i thisNbIndices = new ArrayList<>(initialCapacity); for (int i = 0; i < atomCoords.length; i++) { if (i == k) continue; double dist = atomCoords[i].distance(atomCoords[k]); if (dist < radius + radii[i]) { thisNbIndices.add(new IndexAndDistance(i, dist)); } } IndexAndDistance[] indicesArray = thisNbIndices.toArray(new IndexAndDistance[0]); nbsIndices[k] = indicesArray; } return nbsIndices; } /** * Returns the 2-dimensional array with neighbor indices for every atom, * using spatial hashing to avoid all to all distance calculation. * @return 2-dimensional array of size: n_atoms x n_neighbors_per_atom */ IndexAndDistance[][] findNeighborIndicesSpatialHashing() { // looking at a typical protein case, number of neighbours are from ~10 to ~50, with an average of ~30 int initialCapacity = 60; List contactList = calcContacts(); Map> indices = new HashMap<>(atomCoords.length); for (Contact contact : contactList) { // note contacts are stored 1-way only, with j>i int i = contact.getI(); int j = contact.getJ(); List iIndices; List jIndices; if (!indices.containsKey(i)) { iIndices = new ArrayList<>(initialCapacity); indices.put(i, iIndices); } else { iIndices = indices.get(i); } if (!indices.containsKey(j)) { jIndices = new ArrayList<>(initialCapacity); indices.put(j, jIndices); } else { jIndices = indices.get(j); } double radius = radii[i] + probe + probe; double dist = contact.getDistance(); if (dist < radius + radii[j]) { iIndices.add(new IndexAndDistance(j, dist)); jIndices.add(new IndexAndDistance(i, dist)); } } // convert map to array for fast access IndexAndDistance[][] nbsIndices = new IndexAndDistance[atomCoords.length][]; for (Map.Entry> entry : indices.entrySet()) { List list = entry.getValue(); IndexAndDistance[] indexAndDistances = list.toArray(new IndexAndDistance[0]); nbsIndices[entry.getKey()] = indexAndDistances; } // important: some atoms might have no neighbors at all: we need to initialise to empty arrays for (int i=0; i calcContacts() { if (atomCoords.length == 0) return new ArrayList<>(); double maxRadius = 0; OptionalDouble optionalDouble = Arrays.stream(radii).max(); if (optionalDouble.isPresent()) maxRadius = optionalDouble.getAsDouble(); double cutoff = maxRadius + maxRadius + probe + probe; logger.debug("Max radius is {}, cutoff is {}", maxRadius, cutoff); Grid grid = new Grid(cutoff); grid.addCoords(atomCoords); return grid.getIndicesContacts(); } private double calcSingleAsa(int i) { Point3d atom_i = atomCoords[i]; int n_neighbor = neighborIndices[i].length; IndexAndDistance[] neighbor_indices = neighborIndices[i]; // Sorting by closest to farthest away neighbors achieves faster runtimes when checking for occluded // sphere sample points below. This follows the ideas exposed in // Eisenhaber et al, J Comp Chemistry 1994 (https://onlinelibrary.wiley.com/doi/epdf/10.1002/jcc.540160303) // This is essential for performance. In my tests this brings down the number of occlusion checks in loop below to // an average of n_sphere_points/10 per atom i, producing ~ x4 performance gain overall Arrays.sort(neighbor_indices, Comparator.comparingDouble(o -> o.dist)); double radius_i = probe + radii[i]; int n_accessible_point = 0; // purely for debugging int[] numDistsCalced = null; if (logger.isDebugEnabled()) numDistsCalced = new int[n_neighbor]; // now we precalculate anything depending only on i,j in equation 3 in Eisenhaber 1994 double[] sqRadii = new double[n_neighbor]; Vector3d[] aj_minus_ais = new Vector3d[n_neighbor]; for (int nbArrayInd =0; nbArrayInd sqRadii[nbArrayInd]) { is_accessible = false; break; } } if (is_accessible) { n_accessible_point++; } } // purely for debugging if (numDistsCalced!=null) { int sum = 0; for (int numDistCalcedForJ : numDistsCalced) sum += numDistCalcedForJ; logger.debug("Number of sample points distances calculated for neighbors of i={} : average {}, all {}", i, (double) sum / (double) n_neighbor, numDistsCalced); } return cons*n_accessible_point*radius_i*radius_i; } /** * Gets the radius for given amino acid and atom * @param amino * @param atom * @return */ private static double getRadiusForAmino(AminoAcid amino, Atom atom) { if (atom.getElement().equals(Element.H)) return Element.H.getVDWRadius(); // some unusual entries (e.g. 1tes) contain Deuterium atoms in standard aminoacids if (atom.getElement().equals(Element.D)) return Element.D.getVDWRadius(); String atomCode = atom.getName(); char aa = amino.getAminoType(); // here we use the values that Chothia gives in his paper (as NACCESS does) if (atom.getElement()==Element.O) { return OXIGEN_VDW; } else if (atom.getElement()==Element.S) { return SULFUR_VDW; } else if (atom.getElement()==Element.N) { if ("NZ".equals(atomCode)) return TETRAHEDRAL_NITROGEN_VDW; // tetrahedral Nitrogen return TRIGONAL_NITROGEN_VDW; // trigonal Nitrogen } else if (atom.getElement()==Element.C) { // it must be a carbon if ("C".equals(atomCode) || "CE1".equals(atomCode) || "CE2".equals(atomCode) || "CE3".equals(atomCode) || "CH2".equals(atomCode) || "CZ".equals(atomCode) || "CZ2".equals(atomCode) || "CZ3".equals(atomCode)) { return TRIGONAL_CARBON_VDW; // trigonal Carbon } else if ("CA".equals(atomCode) || "CB".equals(atomCode) || "CE".equals(atomCode) || "CG1".equals(atomCode) || "CG2".equals(atomCode)) { return TETRAHEDRAL_CARBON_VDW; // tetrahedral Carbon } // the rest of the cases (CD, CD1, CD2, CG) depend on amino acid else { switch (aa) { case 'F': case 'W': case 'Y': case 'H': case 'D': case 'N': return TRIGONAL_CARBON_VDW; case 'P': case 'K': case 'R': case 'M': case 'I': case 'L': return TETRAHEDRAL_CARBON_VDW; case 'Q': case 'E': if ("CD".equals(atomCode)) return TRIGONAL_CARBON_VDW; else if ("CG".equals(atomCode)) return TETRAHEDRAL_CARBON_VDW; default: logger.info("Unexpected carbon atom {} for aminoacid {}, assigning its standard vdw radius", atomCode, aa); return Element.C.getVDWRadius(); } } // not any of the expected atoms } else { // non standard aas, (e.g. MSE, LLP) will always have this problem, logger.debug("Unexpected atom {} for aminoacid {} ({}), assigning its standard vdw radius", atomCode, aa, amino.getPDBName()); return atom.getElement().getVDWRadius(); } } /** * Gets the radius for given nucleotide atom * @param atom * @return */ private static double getRadiusForNucl(NucleotideImpl nuc, Atom atom) { if (atom.getElement().equals(Element.H)) return Element.H.getVDWRadius(); if (atom.getElement().equals(Element.D)) return Element.D.getVDWRadius(); if (atom.getElement()==Element.C) return NUC_CARBON_VDW; if (atom.getElement()==Element.N) return NUC_NITROGEN_VDW; if (atom.getElement()==Element.P) return PHOSPHOROUS_VDW; if (atom.getElement()==Element.O) return OXIGEN_VDW; logger.info("Unexpected atom "+atom.getName()+" for nucleotide "+nuc.getPDBName()+", assigning its standard vdw radius"); return atom.getElement().getVDWRadius(); } /** * Gets the van der Waals radius of the given atom following the values defined by * Chothia (1976) J.Mol.Biol.105,1-14 * NOTE: the vdw values defined by the paper assume no Hydrogens and thus "inflates" * slightly the heavy atoms to account for Hydrogens. Thus this method cannot be used * in a structure that contains Hydrogens! * * If atom is neither part of a nucleotide nor of a standard aminoacid, * the default vdw radius for the element is returned. If atom is of * unknown type (element) the vdw radius of {@link Element().N} is returned * * @param atom * @return */ public static double getRadius(Atom atom) { if (atom.getElement()==null) { logger.warn("Unrecognised atom "+atom.getName()+" with serial "+atom.getPDBserial()+ ", assigning the default vdw radius (Nitrogen vdw radius)."); return Element.N.getVDWRadius(); } Group res = atom.getGroup(); if (res==null) { logger.warn("Unknown parent residue for atom "+atom.getName()+" with serial "+ atom.getPDBserial()+", assigning its default vdw radius"); return atom.getElement().getVDWRadius(); } GroupType type = res.getType(); if (type == GroupType.AMINOACID) return getRadiusForAmino(((AminoAcid)res), atom); if (type == GroupType.NUCLEOTIDE) return getRadiusForNucl((NucleotideImpl)res,atom); return atom.getElement().getVDWRadius(); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy