All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.biojava.nbio.structure.symmetry.internal.SymmOptimizer Maven / Gradle / Ivy

There is a newer version: 7.2.2
Show newest version
/*
 *                    BioJava development code
 *
 * This code may be freely distributed and modified under the
 * terms of the GNU Lesser General Public Licence.  This should
 * be distributed with the code.  If you do not have a copy,
 * see:
 *
 *      http://www.gnu.org/copyleft/lesser.html
 *
 * Copyright for this code is held jointly by the individual
 * authors.  These should be listed in @author doc comments.
 *
 * For more information on the BioJava project and its aims,
 * or to join the biojava-l mailing list, visit the home page
 * at:
 *
 *      http://www.biojava.org/
 *
 */
package org.biojava.nbio.structure.symmetry.internal;

import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Random;

import org.biojava.nbio.structure.Atom;
import org.biojava.nbio.structure.StructureException;
import org.biojava.nbio.structure.align.multiple.Block;
import org.biojava.nbio.structure.align.multiple.MultipleAlignment;
import org.biojava.nbio.structure.align.multiple.MultipleAlignmentEnsemble;
import org.biojava.nbio.structure.align.multiple.util.MultipleAlignmentScorer;
import org.biojava.nbio.structure.align.multiple.util.MultipleAlignmentTools;
import org.biojava.nbio.structure.jama.Matrix;
import org.biojava.nbio.structure.symmetry.utils.SymmetryTools;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * Optimizes a symmetry alignment by a Monte Carlo score optimization of the
 * repeat multiple alignment. The superposition of the repeats is not free
 * (felxible), because it is constrained on the symmetry axes found in the
 * structure. This is the main difference to the {@link MultipleMC} algorithm in
 * biojava. Another major difference is that the free Pool is shared for all
 * repeats, so that no residue can appear to more than one repeat at a time.
 * 

* This algorithm does not use a unfiform distribution for selecting moves, * farther residues have more probability to be shrinked or gapped. This * modification of the algorithm improves convergence and running time. *

* Use call method to parallelize optimizations, or use optimize method instead. * Because gaps are allowed in the repeats, a {@link MultipleAlignment} format * is returned. * * @author Aleix Lafita * @since 4.1.1 * */ public class SymmOptimizer { private static final Logger logger = LoggerFactory .getLogger(SymmOptimizer.class); private Random rnd; // Optimization parameters private int Rmin = 2; // min aligned repeats per column private int Lmin; // min repeat length private int maxIter; // max iterations private double C = 20; // probability of accept bad moves constant // Score function parameters private static final double Gopen = 20.0; // Penalty for opening gap private static final double Gextend = 10.0; // Penalty for extending gaps private double dCutoff; // Alignment Information private MultipleAlignment msa; private SymmetryAxes axes; private Atom[] atoms; private int order; private int length; // total alignment columns (block size) private int repeatCore; // core length (without gaps) // Aligned Residues and Score private List> block; // residues aligned private List freePool; // residues not aligned private double mcScore; // alignment score to optimize // Variables that store the history of the optimization - slower if on private static final boolean history = false; private static final int saveStep = 100; private static final String pathToHistory = "results/symm-opt/"; private List timeHistory; private List lengthHistory; private List rmsdHistory; private List tmScoreHistory; private List mcScoreHistory; /** * Constructor with a seed MultipleAligment storing a refined symmetry * alignment of the repeats. To perform the optimization use the call or * optimize methods after instantiation. * * @param symmResult * CeSymmResult with all the information * @throws RefinerFailedException * @throws StructureException */ public SymmOptimizer(CeSymmResult symmResult) { this.axes = symmResult.getAxes(); this.rnd = new Random(symmResult.getParams().getRndSeed()); this.Lmin = symmResult.getParams().getMinCoreLength(); this.dCutoff = symmResult.getParams().getDistanceCutoff(); MultipleAlignmentEnsemble e = symmResult.getMultipleAlignment() .getEnsemble().clone(); this.msa = e.getMultipleAlignment(0); this.atoms = msa.getAtomArrays().get(0); this.order = msa.size(); this.repeatCore = msa.getCoreLength(); // 50% of the structures aligned (minimum) or all (no gaps) if (symmResult.getParams().isGaps()) Rmin = Math.max(order / 2, 2); else Rmin = order; maxIter = symmResult.getParams().getOptimizationSteps(); if (maxIter < 1) maxIter = 100 * atoms.length; } private void initialize() throws StructureException, RefinerFailedException { if (order == 1) throw new RefinerFailedException( "Non-symmetric seed alignment: order = 1"); if (repeatCore < 1) throw new RefinerFailedException( "Seed alignment too short: repeat core length < 1"); // Initialize the history variables timeHistory = new ArrayList(); lengthHistory = new ArrayList(); rmsdHistory = new ArrayList(); mcScoreHistory = new ArrayList(); tmScoreHistory = new ArrayList(); C = 20 * order; // Initialize alignment variables block = msa.getBlock(0).getAlignRes(); freePool = new ArrayList(); length = block.get(0).size(); // Store the residues aligned in the block List aligned = new ArrayList(); for (int su = 0; su < order; su++) aligned.addAll(block.get(su)); // Add any residue not aligned to the free pool for (int i = 0; i < atoms.length; i++) { if (!aligned.contains(i)) freePool.add(i); } checkGaps(); // Set the MC score of the initial state (seed alignment) updateMultipleAlignment(); mcScore = MultipleAlignmentScorer.getMCScore(msa, Gopen, Gextend, dCutoff); } /** * Optimization method based in a Monte-Carlo approach. Starting from the * refined alignment uses 4 types of moves: *

*

  • 1- Shift Row: if there are enough freePool residues available. *
  • 2- Expand Block: add another alignment column. *
  • 3- Shrink Block: move a block column to the freePool. *
  • 4- Insert gap: insert a gap in a position of the alignment. * * @throws StructureException * @throws RefinerFailedException * if the alignment is not symmetric or too short. */ public MultipleAlignment optimize() throws StructureException, RefinerFailedException { initialize(); // Save the optimal alignment List> optBlock = new ArrayList>(); List optFreePool = new ArrayList(); optFreePool.addAll(freePool); for (int k = 0; k < order; k++) { List b = new ArrayList(); b.addAll(block.get(k)); optBlock.add(b); } double optScore = mcScore; int conv = 0; // Number of steps without an alignment improvement int i = 1; int stepsToConverge = Math.max(maxIter / 50, 1000); long initialTime = System.nanoTime()/1000000; while (i < maxIter && conv < stepsToConverge) { // Save the state of the system List> lastBlock = new ArrayList>(); List lastFreePool = new ArrayList(); lastFreePool.addAll(freePool); for (int k = 0; k < order; k++) { List b = new ArrayList(); b.addAll(block.get(k)); lastBlock.add(b); } double lastScore = mcScore; int lastRepeatCore = repeatCore; boolean moved = false; while (!moved) { // Randomly select one of the steps to modify the alignment. // Because of biased moves, the probabilities are not the same double move = rnd.nextDouble(); if (move < 0.4) { moved = shiftRow(); logger.debug("did shift"); } else if (move < 0.7) { moved = expandBlock(); logger.debug("did expand"); } else if (move < 0.85) { moved = shrinkBlock(); logger.debug("did shrink"); } else { moved = insertGap(); logger.debug("did insert gap"); } } // Get the properties of the new alignment updateMultipleAlignment(); mcScore = MultipleAlignmentScorer.getMCScore(msa, Gopen, Gextend, dCutoff); // Calculate change in the optimization Score double AS = mcScore - lastScore; double prob = 1.0; if (AS < 0) { // Probability of accepting bad move prob = probabilityFunction(AS, i, maxIter); double p = rnd.nextDouble(); // Reject the move if (p > prob) { block = lastBlock; freePool = lastFreePool; length = block.get(0).size(); repeatCore = lastRepeatCore; mcScore = lastScore; conv++; // no change in score if rejected } else conv = 0; // if accepted } else conv = 0; // if positive change logger.debug(i + ": --prob: " + prob + ", --score: " + AS + ", --conv: " + conv); // Store as the optimal alignment if better if (mcScore > optScore) { optBlock = new ArrayList>(); optFreePool = new ArrayList(); optFreePool.addAll(freePool); for (int k = 0; k < order; k++) { List b = new ArrayList(); b.addAll(block.get(k)); optBlock.add(b); } optScore = mcScore; } if (history) { if (i % saveStep == 1) { // Get the correct superposition again updateMultipleAlignment(); timeHistory.add(System.nanoTime()/1000000 - initialTime); lengthHistory.add(length); rmsdHistory.add(msa.getScore(MultipleAlignmentScorer.RMSD)); tmScoreHistory.add(msa .getScore(MultipleAlignmentScorer.AVGTM_SCORE)); mcScoreHistory.add(mcScore); } } i++; } // Use the optimal alignment of the trajectory block = optBlock; freePool = optFreePool; mcScore = optScore; // Superimpose and calculate final scores updateMultipleAlignment(); msa.putScore(MultipleAlignmentScorer.MC_SCORE, mcScore); // Save the history to the results folder of the symmetry project if (history) { try { saveHistory(pathToHistory); } catch (Exception e) { logger.warn("Could not save history file: " + e.getMessage()); } } return msa; } /** * This method translates the internal data structures to a * MultipleAlignment of the repeats in order to use the methods to score * MultipleAlignments. * * @throws StructureException * @throws RefinerFailedException */ private void updateMultipleAlignment() throws StructureException, RefinerFailedException { msa.clear(); // Override the alignment with the new information Block b = msa.getBlock(0); b.setAlignRes(block); repeatCore = b.getCoreLength(); if (repeatCore < 1) throw new RefinerFailedException( "Optimization converged to length 0"); SymmetryTools.updateSymmetryTransformation(axes, msa, atoms); } /** * Method that loops through all the alignment columns and checks that there * are no more gaps than the maximum allowed: Rmin. *

    * There must be at least Rmin residues different than null in every * alignment column. In case there is a column with more gaps than allowed * it will be shrinked (moved to freePool). * * @return true if any columns has been shrinked and false otherwise */ private boolean checkGaps() { List shrinkColumns = new ArrayList(); // Loop for each column for (int res = 0; res < length; res++) { int gapCount = 0; // Loop for each repeat and count the gaps for (int su = 0; su < order; su++) { if (block.get(su).get(res) == null) gapCount++; } if ((order - gapCount) < Rmin) { // Add the column to the shrink list shrinkColumns.add(res); } } // Shrink the columns that have more gaps than allowed for (int col = shrinkColumns.size() - 1; col >= 0; col--) { for (int su = 0; su < order; su++) { Integer residue = block.get(su).get(shrinkColumns.get(col)); block.get(su).remove((int) shrinkColumns.get(col)); if (residue != null) freePool.add(residue); Collections.sort(freePool); } length--; } if (shrinkColumns.size() != 0) return true; else return false; } /** * Insert a gap in one of the repeats into selected position (by higher * distances) in the alignment. Calculates the average residue distance to * make the choice. A gap is a null in the block. * * @throws StructureException * @throws RefinerFailedException */ private boolean insertGap() throws StructureException, RefinerFailedException { // Let gaps only if the repeat is larger than the minimum length if (repeatCore <= Lmin) return false; // Select residue by maximum distance updateMultipleAlignment(); Matrix residueDistances = MultipleAlignmentTools .getAverageResidueDistances(msa); double maxDist = Double.MIN_VALUE; int su = 0; int res = 0; for (int col = 0; col < length; col++) { for (int s = 0; s < order; s++) { if (residueDistances.get(s, col) != -1) { if (residueDistances.get(s, col) > maxDist) { // geometric distribution if (rnd.nextDouble() > 0.5) { su = s; res = col; maxDist = residueDistances.get(s, col); } } } } } // Insert the gap at the position Integer residueL = block.get(su).get(res); if (residueL != null) { freePool.add(residueL); Collections.sort(freePool); } else return false; // If there was a gap already in the position. block.get(su).set(res, null); checkGaps(); return true; } /** * Move all the block residues of one repeat one position to the left or * right and move the corresponding boundary residues from the freePool to * the block, and viceversa. *

    * The boundaries are determined by any irregularity (either a gap or a * discontinuity in the alignment. */ private boolean shiftRow() { int su = rnd.nextInt(order); // Select the repeat int rl = rnd.nextInt(2); // Select between moving right (0) or left (1) int res = rnd.nextInt(length); // Residue as a pivot // When the pivot residue is null try to add a residue from the freePool if (block.get(su).get(res) == null) { int right = res; int left = res; // Find the boundary to the right abd left while (block.get(su).get(right) == null && right < length - 1) { right++; } while (block.get(su).get(left) == null && left > 0) { left--; } // If they both are null the whole block is null if (block.get(su).get(left) == null && block.get(su).get(right) == null) { return false; } else if (block.get(su).get(left) == null) { // Choose the sequentially previous residue of the known one Integer residue = block.get(su).get(right) - 1; if (freePool.contains(residue)) { block.get(su).set(res, residue); freePool.remove(residue); } else return false; } else if (block.get(su).get(right) == null) { // Choose the sequentially next residue of the known one Integer residue = block.get(su).get(left) + 1; if (freePool.contains(residue)) { block.get(su).set(res, residue); freePool.remove(residue); } else return false; } else { // If boundaries are consecutive swap null and position (R or L) if (block.get(su).get(right) == block.get(su).get(left) + 1) { switch (rl) { case 0: // to the right block.get(su).set(right - 1, block.get(su).get(right)); block.get(su).set(right, null); break; case 1: // to the left block.get(su).set(left + 1, block.get(su).get(left)); block.get(su).set(left, null); break; } } else { // Choose randomly a residue in between left and right to // add Integer residue = rnd.nextInt(block.get(su).get(right) - block.get(su).get(left) - 1) + block.get(su).get(left) + 1; if (freePool.contains(residue)) { block.get(su).set(res, residue); freePool.remove(residue); } } } return true; } // When the residue is different than null switch (rl) { case 0: // Move to the right int leftBoundary = res - 1; int leftPrevRes = res; while (true) { if (leftBoundary < 0) break; else { if (block.get(su).get(leftBoundary) == null) { break; // gap } else if (block.get(su).get(leftPrevRes) > block.get(su) .get(leftBoundary) + 1) { break; // discontinuity } } leftPrevRes = leftBoundary; leftBoundary--; } leftBoundary++; int rightBoundary = res + 1; int rightPrevRes = res; while (true) { if (rightBoundary == length) break; else { if (block.get(su).get(rightBoundary) == null) { break; // gap } else if (block.get(su).get(rightPrevRes) + 1 < block.get( su).get(rightBoundary)) { break; // discontinuity } } rightPrevRes = rightBoundary; rightBoundary++; } rightBoundary--; // Residues at the boundary Integer residueR0 = block.get(su).get(rightBoundary); Integer residueL0 = block.get(su).get(leftBoundary); // Remove residue at the right of the block and add to the freePool block.get(su).remove(rightBoundary); if (residueR0 != null) { freePool.add(residueR0); Collections.sort(freePool); } // Add the residue at the left of the block residueL0 -= 1; // cannot be null, throw exception if it is if (freePool.contains(residueL0)) { block.get(su).add(leftBoundary, residueL0); freePool.remove(residueL0); } else { block.get(su).add(leftBoundary, null); } break; case 1: // Move to the left int leftBoundary1 = res - 1; int leftPrevRes1 = res; while (true) { if (leftBoundary1 < 0) break; else { if (block.get(su).get(leftBoundary1) == null) { break; // gap } else if (block.get(su).get(leftPrevRes1) > block.get(su) .get(leftBoundary1) + 1) { break; // discontinuity } } leftPrevRes1 = leftBoundary1; leftBoundary1--; } leftBoundary1++; int rightBoundary1 = res + 1; int rightPrevRes1 = res; while (true) { if (rightBoundary1 == length) break; else { if (block.get(su).get(rightBoundary1) == null) { break; // gap } else if (block.get(su).get(rightPrevRes1) + 1 < block .get(su).get(rightBoundary1)) { break; // discontinuity } } rightPrevRes1 = rightBoundary1; rightBoundary1++; } rightBoundary1--; // Residues at the boundary Integer residueR1 = block.get(su).get(rightBoundary1); Integer residueL1 = block.get(su).get(leftBoundary1); // Add the residue at the right of the block residueR1 += 1; // cannot be null if (freePool.contains(residueR1)) { if (rightBoundary1 == length - 1) block.get(su).add(residueR1); else block.get(su).add(rightBoundary1 + 1, residueR1); freePool.remove(residueR1); } else { block.get(su).add(rightBoundary1 + 1, null); } // Remove the residue at the left of the block block.get(su).remove(leftBoundary1); freePool.add(residueL1); Collections.sort(freePool); break; } checkGaps(); return true; } /** * It extends the alignment one position to the right or to the left of a * randomly selected position by moving the consecutive residues of each * repeat (if present) from the freePool to the block. *

    * If there are not enough residues in the freePool it introduces gaps. */ private boolean expandBlock() { boolean moved = false; int rl = rnd.nextInt(2); // Select between right (0) or left (1) int res = rnd.nextInt(length); // Residue as a pivot switch (rl) { case 0: int rightBoundary = res; int[] previousPos = new int[order]; for (int su = 0; su < order; su++) previousPos[su] = -1; // Search a position to the right that has at minimum Rmin while (length - 1 > rightBoundary) { int noncontinuous = 0; for (int su = 0; su < order; su++) { if (block.get(su).get(rightBoundary) == null) { continue; } else if (previousPos[su] == -1) { previousPos[su] = block.get(su).get(rightBoundary); } else if (block.get(su).get(rightBoundary) > previousPos[su] + 1) { noncontinuous++; } } if (noncontinuous < Rmin) rightBoundary++; else break; } if (rightBoundary > 0) rightBoundary--; // Expand the block with the residues at the repeat boundaries for (int su = 0; su < order; su++) { Integer residueR = block.get(su).get(rightBoundary); if (residueR == null) { if (rightBoundary == length - 1) block.get(su).add(null); else block.get(su).add(rightBoundary + 1, null); } else if (freePool.contains(residueR + 1)) { Integer residueAdd = residueR + 1; if (rightBoundary == length - 1) { block.get(su).add(residueAdd); } else block.get(su).add(rightBoundary + 1, residueAdd); freePool.remove(residueAdd); } else { if (rightBoundary == length - 1) block.get(su).add(null); else block.get(su).add(rightBoundary + 1, null); } } length++; moved = true; break; case 1: int leftBoundary = res; int[] nextPos = new int[order]; for (int su = 0; su < order; su++) nextPos[su] = -1; // Search a position to the right that has at minimum Rmin while (leftBoundary > 0) { int noncontinuous = 0; for (int su = 0; su < order; su++) { if (block.get(su).get(leftBoundary) == null) { continue; } else if (nextPos[su] == -1) { nextPos[su] = block.get(su).get(leftBoundary); } else if (block.get(su).get(leftBoundary) < nextPos[su] - 1) { noncontinuous++; } } if (noncontinuous < Rmin) leftBoundary--; else break; } // Expand the block with the residues at the repeat boundaries for (int su = 0; su < order; su++) { Integer residueL = block.get(su).get(leftBoundary); if (residueL == null) { block.get(su).add(leftBoundary, null); } else if (freePool.contains(residueL - 1)) { Integer residueAdd = residueL - 1; block.get(su).add(leftBoundary, residueAdd); freePool.remove(residueAdd); } else { block.get(su).add(leftBoundary, null); } } length++; moved = true; break; } if (moved) return !checkGaps(); return moved; } /** * Deletes an alignment column at a randomly selected position. * * @throws StructureException * @throws RefinerFailedException */ private boolean shrinkBlock() throws StructureException, RefinerFailedException { // Let shrink moves only if the repeat is larger enough if (repeatCore <= Lmin) return false; // Select column by maximum distance updateMultipleAlignment(); Matrix residueDistances = MultipleAlignmentTools .getAverageResidueDistances(msa); double maxDist = Double.MIN_VALUE; double[] colDistances = new double[length]; int res = 0; for (int col = 0; col < length; col++) { int normalize = 0; for (int s = 0; s < order; s++) { if (residueDistances.get(s, col) != -1) { colDistances[col] += residueDistances.get(s, col); normalize++; } } colDistances[col] /= normalize; if (colDistances[col] > maxDist) { // geometric distribution if (rnd.nextDouble() > 0.5) { maxDist = colDistances[col]; res = col; } } } for (int su = 0; su < order; su++) { Integer residue = block.get(su).get(res); block.get(su).remove(res); if (residue != null) freePool.add(residue); Collections.sort(freePool); } length--; checkGaps(); return true; } /** * Calculates the probability of accepting a bad move given the iteration * step and the score change. *

    * Function: p=(C-AS)/(C*sqrt(step)) Added a normalization factor so that * the probability approaches 0 when the maxIter is reached. */ private double probabilityFunction(double AS, int m, int maxIter) { double prob = (C + AS) / (C * Math.sqrt(m)); double norm = (1 - (m * 1.0) / maxIter); // Normalization factor return Math.min(Math.max(prob * norm, 0.0), 1.0); } /** * Save the evolution of the optimization process as a csv file. */ private void saveHistory(String folder) throws IOException { String name = msa.getStructureIdentifier(0).getIdentifier(); FileWriter writer = new FileWriter(folder + name + "-symm_opt.csv"); writer.append("Step,Time,RepeatLength,RMSD,TMscore,MCscore\n"); for (int i = 0; i < lengthHistory.size(); i++) { writer.append(i * saveStep + ","); writer.append(timeHistory.get(i) + ","); writer.append(lengthHistory.get(i) + ","); writer.append(rmsdHistory.get(i) + ","); writer.append(tmScoreHistory.get(i) + ","); writer.append(mcScoreHistory.get(i) + "\n"); } writer.flush(); writer.close(); } }





  • © 2015 - 2025 Weber Informatics LLC | Privacy Policy