org.biojava.nbio.structure.align.quaternary.QsAlign Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of biojava-structure Show documentation
The protein structure modules of BioJava.
There is a newer version: 7.1.3
/*
 *                    BioJava development code
 *
 * This code may be freely distributed and modified under the
 * terms of the GNU Lesser General Public Licence.  This should
 * be distributed with the code.  If you do not have a copy,
 * see:
 *
 *      http://www.gnu.org/copyleft/lesser.html
 *
 * Copyright for this code is held jointly by the individual
 * authors.  These should be listed in @author doc comments.
 *
 * For more information on the BioJava project and its aims,
 * or to join the biojava-l mailing list, visit the home page
 * at:
 *
 *      http://www.biojava.org/
 *
 */
package org.biojava.nbio.structure.align.quaternary;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;

import javax.vecmath.Matrix4d;

import org.biojava.nbio.structure.Atom;
import org.biojava.nbio.structure.Calc;
import org.biojava.nbio.structure.Structure;
import org.biojava.nbio.structure.StructureException;
import org.biojava.nbio.structure.StructureTools;
import org.biojava.nbio.structure.align.multiple.Block;
import org.biojava.nbio.structure.align.multiple.BlockImpl;
import org.biojava.nbio.structure.align.multiple.BlockSet;
import org.biojava.nbio.structure.align.multiple.BlockSetImpl;
import org.biojava.nbio.structure.align.multiple.MultipleAlignment;
import org.biojava.nbio.structure.align.multiple.MultipleAlignmentEnsembleImpl;
import org.biojava.nbio.structure.align.multiple.MultipleAlignmentImpl;
import org.biojava.nbio.structure.align.multiple.util.MultipleAlignmentScorer;
import org.biojava.nbio.structure.align.multiple.util.ReferenceSuperimposer;
import org.biojava.nbio.structure.cluster.Subunit;
import org.biojava.nbio.structure.cluster.SubunitCluster;
import org.biojava.nbio.structure.cluster.SubunitClusterer;
import org.biojava.nbio.structure.cluster.SubunitClustererParameters;
import org.biojava.nbio.structure.cluster.SubunitExtractor;
import org.biojava.nbio.structure.contact.Pair;
import org.biojava.nbio.structure.geometry.SuperPositions;
import org.biojava.nbio.structure.geometry.UnitQuaternions;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * Quaternary Structure Alignment (QS-Align). The algorithm takes as input two
 * protein structures at the quaternary structure level (multiple chains or
 * subunits) and calculates the equivalent subunit matching and a residue-based
 * alignment, together with usual alignment quality scores.
 *
 * @author Aleix Lafita
 * @since 5.0.0
 *
 */
public class QsAlign {

	private static final Logger logger = LoggerFactory.getLogger(QsAlign.class);

	public static QsAlignResult align(Structure s1, Structure s2,
			SubunitClustererParameters cParams, QsAlignParameters aParams)
			throws StructureException {
		return align(
				SubunitExtractor.extractSubunits(s1,
						cParams.getAbsoluteMinimumSequenceLength(),
						cParams.getMinimumSequenceLengthFraction(),
						cParams.getMinimumSequenceLength()),
				SubunitExtractor.extractSubunits(s2,
						cParams.getAbsoluteMinimumSequenceLength(),
						cParams.getMinimumSequenceLengthFraction(),
						cParams.getMinimumSequenceLength()), cParams, aParams);
	}

	public static QsAlignResult align(List s1, List s2,
			SubunitClustererParameters cParams, QsAlignParameters aParams)
			throws StructureException {

		QsAlignResult result = new QsAlignResult(s1, s2);

		// SETP 1: cluster each group of subunits O(N^2*L^2) - intra

		List c1 = SubunitClusterer.cluster(s1, cParams).getClusters();
		List c2 = SubunitClusterer.cluster(s2, cParams).getClusters();

		// STEP 2: match each subunit cluster between groups O(N^2*L^2) - inter
		Map clusterMap = new HashMap<>();
		for (int i = 0; i < c1.size(); i++) {
			for (int j = 0; j < c2.size(); j++) {

				if (clusterMap.containsKey(i))
					break;
				if (clusterMap.containsValue(j))
					continue;

				// Use structural alignment to match the subunit clusters
				if (c1.get(i).mergeStructure(c2.get(j),cParams)) {
					clusterMap.put(i, j);
				}
			}
		}

		logger.info("Cluster Map: " + clusterMap.toString());
		result.setClusters(c1);

		// STEP 3: Align the assemblies for each cluster match O(N^2*L)
		for (int globalKey : clusterMap.keySet()) {

			// Obtain the clusters
			SubunitCluster clust1 = c1.get(globalKey);
			SubunitCluster clust2 = c2.get(clusterMap.get(globalKey));

			// Take a cluster match as reference
			int index1 = 0;
			int index2 = clust1.size() - clust2.size();
			Map subunitMap = new HashMap<>();
			subunitMap.put(index1, index2);

			// Map cluster id to their subunit matching
			Map> clustSubunitMap = new HashMap<>();
			clustSubunitMap.put(globalKey, subunitMap);

			// Change order of key set so that globalKey is first
			List keySet = new ArrayList<>(clusterMap.keySet());
			keySet.remove((Integer) globalKey);
			keySet.add(0, globalKey);

			for (int key : clusterMap.keySet()) {

				// Recover subunitMap if it is the reference, new one otherwise
				if (key == globalKey)
					subunitMap = clustSubunitMap.get(key);
				else
					subunitMap = new HashMap<>();

				// Obtain the clusters of each subunit group
				clust1 = c1.get(key);
				clust2 = c2.get(clusterMap.get(key));

				// Get the initial subunit indices of each group
				index1 = 0;
				index2 = clust1.size() - clust2.size();

				for (int i = 0; i < index2; i++) {
					for (int j = index2; j < clust1.size(); j++) {

						if (subunitMap.containsKey(i))
							break;
						if (subunitMap.containsValue(j))
							continue;

						// Obtain cumulative transformation matrix
						Matrix4d transform = getTransformForClusterSubunitMap(
								c1, clustSubunitMap);

						// Obtain Atom arrays of the subunit pair to match
						Atom[] atoms1 = clust1.getAlignedAtomsSubunit(i);
						Atom[] atoms2 = clust1.getAlignedAtomsSubunit(j);

						// Obtain centroids and transform the second
						Atom centr1 = Calc.getCentroid(atoms1);
						Atom centr2 = Calc.getCentroid(atoms2);
						Calc.transform(centr2, transform);

						// 1- Check that the distance fulfills maximum
						double dCentroid = Calc.getDistance(centr1, centr2);
						if (dCentroid > aParams.getdCutoff()) {
							logger.debug(String.format("Subunit matching %d "
									+ "vs %d of cluster %d could not be "
									+ "matched, because centroid distance is "
									+ "%.2f", index1, index2, key, dCentroid));
							continue;
						}

						// Transform coordinates of second
						Atom[] atoms2c = StructureTools.cloneAtomArray(atoms2);
						Calc.transform(atoms2c, transform);

						// 2- Check the orientation metric condition
						double qOrient = UnitQuaternions.orientationAngle(
								Calc.atomsToPoints(atoms1),
								Calc.atomsToPoints(atoms2c), false);
						qOrient = Math.min(Math.abs(2*Math.PI - qOrient), qOrient);
						if (qOrient > aParams.getMaxOrientationAngle()) {
							logger.debug(String.format("Subunit matching %d "
									+ "vs %d of cluster %d could not be "
									+ "matched, because orientation metric is "
									+ "%.2f", i, j, key, qOrient));
							continue;
						}

						// 3- Check the RMSD condition
						double rmsd = Calc.rmsd(atoms1, atoms2c);
						if (rmsd > aParams.getMaxRmsd()) {
							logger.debug(String.format("Subunit matching %d "
									+ "vs %d of cluster %d could not be "
									+ "matched, because RMSD is %.2f", i,
									j, key, rmsd));
							continue;
						}

						logger.info(String.format("Subunit matching %d vs %d"
								+ " of cluster %d with centroid distance %.2f"
								+ ", orientation metric %.2f and RMSD %.2f",
								i, j, key, dCentroid, qOrient, rmsd));

						subunitMap.put(i, j);
					}
				}

				clustSubunitMap.put(key, subunitMap);
			}

			logger.info("Cluster Subunit Map: " + clustSubunitMap.toString());

			// Unfold the nested map into subunit map and alignment
			subunitMap = new HashMap<>();
			List alignRes1 = new ArrayList<>();
			List alignRes2 = new ArrayList<>();
			List atomArray1 = new ArrayList<>();
			List atomArray2 = new ArrayList<>();

			for (int key : clustSubunitMap.keySet()) {

				// Obtain the cluster and the alignment in it
				SubunitCluster cluster = c1.get(key);
				List> clusterEqrs = cluster
						.getMultipleAlignment().getBlock(0).getAlignRes();

				for (Entry pair : clustSubunitMap.get(key)
						.entrySet()) {

					int i = pair.getKey();
					int j = pair.getValue();

					// Obtain the indices of the original Subunit Lists
					int orig1 = s1.indexOf(cluster.getSubunits().get(i));
					int orig2 = s2.indexOf(cluster.getSubunits().get(j));

					// Append rescaled aligned residue indices
					for (Integer eqr : clusterEqrs.get(i))
						alignRes1.add(eqr + atomArray1.size());
					for (Integer eqr : clusterEqrs.get(j))
						alignRes2.add(eqr + atomArray2.size());

					// Apend atoms to the arrays
					atomArray1.addAll(Arrays.asList(s1.get(orig1)
							.getRepresentativeAtoms()));
					atomArray2.addAll(Arrays.asList(s2.get(orig2)
							.getRepresentativeAtoms()));

					subunitMap.put(orig1, orig2);
				}
			}

			// Evaluate the goodness of the match with an alignment object
			MultipleAlignment msa = new MultipleAlignmentImpl();
			msa.setEnsemble(new MultipleAlignmentEnsembleImpl());
			msa.getEnsemble().setAtomArrays(
					Arrays.asList(new Atom[][] {
							atomArray1.toArray(new Atom[atomArray1.size()]),
							atomArray2.toArray(new Atom[atomArray2.size()]) }));

			// Fill in the alignment information
			BlockSet bs = new BlockSetImpl(msa);
			Block b = new BlockImpl(bs);
			List> alignRes = new ArrayList<>(2);
			alignRes.add(alignRes1);
			alignRes.add(alignRes2);
			b.setAlignRes(alignRes);

			// Fill in the transformation matrices
			new ReferenceSuperimposer().superimpose(msa);

			// Calculate some scores
			MultipleAlignmentScorer.calculateScores(msa);

			// If it is the best match found so far store it
			if (subunitMap.size() > result.getSubunitMap().size()) {
				result.setSubunitMap(subunitMap);
				result.setAlignment(msa);
				logger.info("Better result found: " + result.toString());
			} else if (subunitMap.size() == result.getSubunitMap().size()) {
				if (result.getAlignment() == null) {
					result.setSubunitMap(subunitMap);
					result.setAlignment(msa);
				} else if (msa.getScore(MultipleAlignmentScorer.RMSD) < result.getRmsd()) {
					result.setSubunitMap(subunitMap);
					result.setAlignment(msa);
					logger.info("Better result found: " + result.toString());
				}
			}

		}

		return result;
	}

	/**
	 * Returns a pair of Atom arrays corresponding to the alignment of subunit
	 * matchings, in order of appearance. Superposition of the two Atom sets
	 * gives the transformation of the complex.
	 * 
	 * Utility method to cumulative calculate the alignment Atoms.
	 *
	 * @param clusters
	 *            List of SubunitClusters
	 * @param clusterSubunitMap
	 *            map from cluster id to subunit matching
	 * @return pair of atom arrays to be superposed
	 */
	private static Pair getAlignedAtomsForClusterSubunitMap(
			List clusters,
			Map> clusterSubunitMap) {

		List atomArray1 = new ArrayList<>();
		List atomArray2 = new ArrayList<>();

		// For each cluster of subunits
		for (int key : clusterSubunitMap.keySet()) {

			// Obtain the cluster and the alignment in it
			SubunitCluster cluster = clusters.get(key);

			// For each subunit matching in the cluster
			for (Entry pair : clusterSubunitMap.get(key)
					.entrySet()) {

				int i = pair.getKey();
				int j = pair.getValue();

				// Apend atoms to the arrays
				atomArray1.addAll(Arrays.asList(cluster
						.getAlignedAtomsSubunit(i)));
				atomArray2.addAll(Arrays.asList(cluster
						.getAlignedAtomsSubunit(j)));
			}

		}
		return new Pair<>(
				atomArray1.toArray(new Atom[0]),
				atomArray2.toArray(new Atom[0]));
	}

	/**
	 * Returns the transformation matrix corresponding to the alignment of
	 * subunit matchings.
	 * 
	 * Utility method to cumulative calculate the alignment transformation.
	 *
	 * @param clusters
	 *            List of SubunitClusters
	 * @param clusterSubunitMap
	 *            map from cluster id to subunit matching
	 * @return transformation matrix
	 */
	private static Matrix4d getTransformForClusterSubunitMap(
			List clusters,
			Map> clusterSubunitMap) {

		Pair pair = getAlignedAtomsForClusterSubunitMap(clusters,
				clusterSubunitMap);

		return SuperPositions.superpose(Calc.atomsToPoints(pair.getFirst()),
				Calc.atomsToPoints(pair.getSecond()));

	}
}