org.biojava.nbio.structure.asa.AsaCalculator Maven / Gradle / Ivy
Show all versions of biojava-structure Show documentation
/*
* BioJava development code
*
* This code may be freely distributed and modified under the
* terms of the GNU Lesser General Public Licence. This should
* be distributed with the code. If you do not have a copy,
* see:
*
* http://www.gnu.org/copyleft/lesser.html
*
* Copyright for this code is held jointly by the individual
* authors. These should be listed in @author doc comments.
*
* For more information on the BioJava project and its aims,
* or to join the biojava-l mailing list, visit the home page
* at:
*
* http://www.biojava.org/
*
*/
package org.biojava.nbio.structure.asa;
import org.biojava.nbio.structure.*;
import org.biojava.nbio.structure.contact.Contact;
import org.biojava.nbio.structure.contact.Grid;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.vecmath.Point3d;
import javax.vecmath.Vector3d;
import java.util.*;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
/**
* Class to calculate Accessible Surface Areas based on
* the rolling ball algorithm by Shrake and Rupley.
*
* The code is adapted from a python implementation at http://boscoh.com/protein/asapy
* (now source is available at https://github.com/boscoh/asa).
* Thanks to Bosco K. Ho for a great piece of code and for his fantastic blog.
*
* A few optimizations come from Eisenhaber et al, J Comp Chemistry 1994
* (https://onlinelibrary.wiley.com/doi/epdf/10.1002/jcc.540160303)
*
* See
* Shrake, A., and J. A. Rupley. "Environment and Exposure to Solvent of Protein Atoms.
* Lysozyme and Insulin." JMB (1973) 79:351-371.
* Lee, B., and Richards, F.M. "The interpretation of Protein Structures: Estimation of
* Static Accessibility" JMB (1971) 55:379-400
*
* @author Jose Duarte
*/
public class AsaCalculator {
private static final Logger logger = LoggerFactory.getLogger(AsaCalculator.class);
/**
* The default value for number of sphere points to sample.
* See this paper for a nice study on the effect of this parameter: https://f1000research.com/articles/5-189/v1
*/
public static final int DEFAULT_N_SPHERE_POINTS = 1000;
public static final double DEFAULT_PROBE_SIZE = 1.4;
public static final int DEFAULT_NTHREADS = 1;
private static final boolean DEFAULT_USE_SPATIAL_HASHING = true;
// Chothia's amino acid atoms vdw radii
public static final double TRIGONAL_CARBON_VDW = 1.76;
public static final double TETRAHEDRAL_CARBON_VDW = 1.87;
public static final double TRIGONAL_NITROGEN_VDW = 1.65;
public static final double TETRAHEDRAL_NITROGEN_VDW = 1.50;
public static final double SULFUR_VDW = 1.85;
public static final double OXIGEN_VDW = 1.40;
// Chothia's nucleotide atoms vdw radii
public static final double NUC_CARBON_VDW = 1.80;
public static final double NUC_NITROGEN_VDW = 1.60;
public static final double PHOSPHOROUS_VDW = 1.90;
private class AsaCalcWorker implements Runnable {
private final int i;
private final double[] asas;
private AsaCalcWorker(int i, double[] asas) {
this.i = i;
this.asas = asas;
}
@Override
public void run() {
asas[i] = calcSingleAsa(i);
}
}
static class IndexAndDistance {
final int index;
final double dist;
IndexAndDistance(int index, double dist) {
this.index = index;
this.dist = dist;
}
}
private final Point3d[] atomCoords;
private final Atom[] atoms;
private final double[] radii;
private final double probe;
private final int nThreads;
private Vector3d[] spherePoints;
private double cons;
private IndexAndDistance[][] neighborIndices;
private boolean useSpatialHashingForNeighbors;
/**
* Constructs a new AsaCalculator. Subsequently call {@link #calculateAsas()}
* or {@link #getGroupAsas()} to calculate the ASAs
* Only non-Hydrogen atoms are considered in the calculation.
* @param structure the structure, all non-H atoms will be used
* @param probe the probe size
* @param nSpherePoints the number of points to be used in generating the spherical
* dot-density, the more points the more accurate (and slower) calculation
* @param nThreads the number of parallel threads to use for the calculation
* @param hetAtoms if true HET residues are considered, if false they aren't, equivalent to
* NACCESS' -h option
*/
public AsaCalculator(Structure structure, double probe, int nSpherePoints, int nThreads, boolean hetAtoms) {
this.atoms = StructureTools.getAllNonHAtomArray(structure, hetAtoms);
this.atomCoords = Calc.atomsToPoints(atoms);
this.probe = probe;
this.nThreads = nThreads;
this.useSpatialHashingForNeighbors = DEFAULT_USE_SPATIAL_HASHING;
// initialising the radii by looking them up through AtomRadii
radii = new double[atomCoords.length];
for (int i=0;i asas = new TreeMap<>();
double[] asasPerAtom = calculateAsas();
for (int i=0;i thisNbIndices = new ArrayList<>(initialCapacity);
for (int i = 0; i < atomCoords.length; i++) {
if (i == k) continue;
double dist = atomCoords[i].distance(atomCoords[k]);
if (dist < radius + radii[i]) {
thisNbIndices.add(new IndexAndDistance(i, dist));
}
}
IndexAndDistance[] indicesArray = thisNbIndices.toArray(new IndexAndDistance[0]);
nbsIndices[k] = indicesArray;
}
return nbsIndices;
}
/**
* Returns the 2-dimensional array with neighbor indices for every atom,
* using spatial hashing to avoid all to all distance calculation.
* @return 2-dimensional array of size: n_atoms x n_neighbors_per_atom
*/
IndexAndDistance[][] findNeighborIndicesSpatialHashing() {
// looking at a typical protein case, number of neighbours are from ~10 to ~50, with an average of ~30
int initialCapacity = 60;
List contactList = calcContacts();
Map> indices = new HashMap<>(atomCoords.length);
for (Contact contact : contactList) {
// note contacts are stored 1-way only, with j>i
int i = contact.getI();
int j = contact.getJ();
List iIndices;
List jIndices;
if (!indices.containsKey(i)) {
iIndices = new ArrayList<>(initialCapacity);
indices.put(i, iIndices);
} else {
iIndices = indices.get(i);
}
if (!indices.containsKey(j)) {
jIndices = new ArrayList<>(initialCapacity);
indices.put(j, jIndices);
} else {
jIndices = indices.get(j);
}
double radius = radii[i] + probe + probe;
double dist = contact.getDistance();
if (dist < radius + radii[j]) {
iIndices.add(new IndexAndDistance(j, dist));
jIndices.add(new IndexAndDistance(i, dist));
}
}
// convert map to array for fast access
IndexAndDistance[][] nbsIndices = new IndexAndDistance[atomCoords.length][];
for (Map.Entry> entry : indices.entrySet()) {
List list = entry.getValue();
IndexAndDistance[] indexAndDistances = list.toArray(new IndexAndDistance[0]);
nbsIndices[entry.getKey()] = indexAndDistances;
}
// important: some atoms might have no neighbors at all: we need to initialise to empty arrays
for (int i=0; i calcContacts() {
if (atomCoords.length == 0)
return new ArrayList<>();
double maxRadius = 0;
OptionalDouble optionalDouble = Arrays.stream(radii).max();
if (optionalDouble.isPresent())
maxRadius = optionalDouble.getAsDouble();
double cutoff = maxRadius + maxRadius + probe + probe;
logger.debug("Max radius is {}, cutoff is {}", maxRadius, cutoff);
Grid grid = new Grid(cutoff);
grid.addCoords(atomCoords);
return grid.getIndicesContacts();
}
private double calcSingleAsa(int i) {
Point3d atom_i = atomCoords[i];
int n_neighbor = neighborIndices[i].length;
IndexAndDistance[] neighbor_indices = neighborIndices[i];
// Sorting by closest to farthest away neighbors achieves faster runtimes when checking for occluded
// sphere sample points below. This follows the ideas exposed in
// Eisenhaber et al, J Comp Chemistry 1994 (https://onlinelibrary.wiley.com/doi/epdf/10.1002/jcc.540160303)
// This is essential for performance. In my tests this brings down the number of occlusion checks in loop below to
// an average of n_sphere_points/10 per atom i, producing ~ x4 performance gain overall
Arrays.sort(neighbor_indices, Comparator.comparingDouble(o -> o.dist));
double radius_i = probe + radii[i];
int n_accessible_point = 0;
// purely for debugging
int[] numDistsCalced = null;
if (logger.isDebugEnabled()) numDistsCalced = new int[n_neighbor];
// now we precalculate anything depending only on i,j in equation 3 in Eisenhaber 1994
double[] sqRadii = new double[n_neighbor];
Vector3d[] aj_minus_ais = new Vector3d[n_neighbor];
for (int nbArrayInd =0; nbArrayInd sqRadii[nbArrayInd]) {
is_accessible = false;
break;
}
}
if (is_accessible) {
n_accessible_point++;
}
}
// purely for debugging
if (numDistsCalced!=null) {
int sum = 0;
for (int numDistCalcedForJ : numDistsCalced) sum += numDistCalcedForJ;
logger.debug("Number of sample points distances calculated for neighbors of i={} : average {}, all {}", i, (double) sum / (double) n_neighbor, numDistsCalced);
}
return cons*n_accessible_point*radius_i*radius_i;
}
/**
* Gets the radius for given amino acid and atom
* @param amino
* @param atom
* @return
*/
private static double getRadiusForAmino(AminoAcid amino, Atom atom) {
if (atom.getElement().equals(Element.H)) return Element.H.getVDWRadius();
// some unusual entries (e.g. 1tes) contain Deuterium atoms in standard aminoacids
if (atom.getElement().equals(Element.D)) return Element.D.getVDWRadius();
String atomCode = atom.getName();
char aa = amino.getAminoType();
// here we use the values that Chothia gives in his paper (as NACCESS does)
if (atom.getElement()==Element.O) {
return OXIGEN_VDW;
}
else if (atom.getElement()==Element.S) {
return SULFUR_VDW;
}
else if (atom.getElement()==Element.N) {
if ("NZ".equals(atomCode)) return TETRAHEDRAL_NITROGEN_VDW; // tetrahedral Nitrogen
return TRIGONAL_NITROGEN_VDW; // trigonal Nitrogen
}
else if (atom.getElement()==Element.C) { // it must be a carbon
if ("C".equals(atomCode) ||
"CE1".equals(atomCode) || "CE2".equals(atomCode) || "CE3".equals(atomCode) ||
"CH2".equals(atomCode) ||
"CZ".equals(atomCode) || "CZ2".equals(atomCode) || "CZ3".equals(atomCode)) {
return TRIGONAL_CARBON_VDW; // trigonal Carbon
}
else if ("CA".equals(atomCode) || "CB".equals(atomCode) ||
"CE".equals(atomCode) ||
"CG1".equals(atomCode) || "CG2".equals(atomCode)) {
return TETRAHEDRAL_CARBON_VDW; // tetrahedral Carbon
}
// the rest of the cases (CD, CD1, CD2, CG) depend on amino acid
else {
switch (aa) {
case 'F':
case 'W':
case 'Y':
case 'H':
case 'D':
case 'N':
return TRIGONAL_CARBON_VDW;
case 'P':
case 'K':
case 'R':
case 'M':
case 'I':
case 'L':
return TETRAHEDRAL_CARBON_VDW;
case 'Q':
case 'E':
if ("CD".equals(atomCode)) return TRIGONAL_CARBON_VDW;
else if ("CG".equals(atomCode)) return TETRAHEDRAL_CARBON_VDW;
default:
logger.info("Unexpected carbon atom {} for aminoacid {}, assigning its standard vdw radius", atomCode, aa);
return Element.C.getVDWRadius();
}
}
// not any of the expected atoms
} else {
// non standard aas, (e.g. MSE, LLP) will always have this problem,
logger.debug("Unexpected atom {} for aminoacid {} ({}), assigning its standard vdw radius", atomCode, aa, amino.getPDBName());
return atom.getElement().getVDWRadius();
}
}
/**
* Gets the radius for given nucleotide atom
* @param atom
* @return
*/
private static double getRadiusForNucl(NucleotideImpl nuc, Atom atom) {
if (atom.getElement().equals(Element.H)) return Element.H.getVDWRadius();
if (atom.getElement().equals(Element.D)) return Element.D.getVDWRadius();
if (atom.getElement()==Element.C) return NUC_CARBON_VDW;
if (atom.getElement()==Element.N) return NUC_NITROGEN_VDW;
if (atom.getElement()==Element.P) return PHOSPHOROUS_VDW;
if (atom.getElement()==Element.O) return OXIGEN_VDW;
logger.info("Unexpected atom "+atom.getName()+" for nucleotide "+nuc.getPDBName()+", assigning its standard vdw radius");
return atom.getElement().getVDWRadius();
}
/**
* Gets the van der Waals radius of the given atom following the values defined by
* Chothia (1976) J.Mol.Biol.105,1-14
* NOTE: the vdw values defined by the paper assume no Hydrogens and thus "inflates"
* slightly the heavy atoms to account for Hydrogens. Thus this method cannot be used
* in a structure that contains Hydrogens!
*
* If atom is neither part of a nucleotide nor of a standard aminoacid,
* the default vdw radius for the element is returned. If atom is of
* unknown type (element) the vdw radius of {@link Element().N} is returned
*
* @param atom
* @return
*/
public static double getRadius(Atom atom) {
if (atom.getElement()==null) {
logger.warn("Unrecognised atom "+atom.getName()+" with serial "+atom.getPDBserial()+
", assigning the default vdw radius (Nitrogen vdw radius).");
return Element.N.getVDWRadius();
}
Group res = atom.getGroup();
if (res==null) {
logger.warn("Unknown parent residue for atom "+atom.getName()+" with serial "+
atom.getPDBserial()+", assigning its default vdw radius");
return atom.getElement().getVDWRadius();
}
GroupType type = res.getType();
if (type == GroupType.AMINOACID) return getRadiusForAmino(((AminoAcid)res), atom);
if (type == GroupType.NUCLEOTIDE) return getRadiusForNucl((NucleotideImpl)res,atom);
return atom.getElement().getVDWRadius();
}
}