All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.biojava.nbio.structure.align.ce.CeCPMain Maven / Gradle / Ivy

There is a newer version: 7.2.2
Show newest version
/*
 *                    BioJava development code
 *
 * This code may be freely distributed and modified under the
 * terms of the GNU Lesser General Public Licence.  This should
 * be distributed with the code.  If you do not have a copy,
 * see:
 *
 *      http://www.gnu.org/copyleft/lesser.html
 *
 * Copyright for this code is held jointly by the individual
 * authors.  These should be listed in @author doc comments.
 *
 * For more information on the BioJava project and its aims,
 * or to join the biojava-l mailing list, visit the home page
 * at:
 *
 *      http://www.biojava.org/
 *
 * Created on Mar 9, 2010
 * Author: Spencer Bliven
 *
 */

package org.biojava.nbio.structure.align.ce;


import org.biojava.nbio.structure.*;
import org.biojava.nbio.structure.align.model.AFPChain;
import org.biojava.nbio.structure.align.util.AFPAlignmentDisplay;
import org.biojava.nbio.structure.align.util.ConfigurationException;
import org.biojava.nbio.structure.jama.Matrix;

import java.lang.reflect.InvocationTargetException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

/**
 * A wrapper for {@link CeMain} which sets default parameters to be appropriate for finding
 * circular permutations.
 * 

* A circular permutation consists of a single cleavage point and rearrangement * between two structures, for example: *

 * ABCDEFG
 * DEFGABC
 * 
* @author Spencer Bliven. * */ public class CeCPMain extends CeMain { private static boolean debug = false; public static final String algorithmName = "jCE Circular Permutation"; /** * version history: * 1.5 - Added more parameters to the command line, including -maxOptRMSD * 1.4 - Added DuplicationHint parameter & default to duplicating the shorter chain * 1.3 - Short CPs are now discarded * 1.2 - now supports check AlignmentTools.isSequentialAlignment. XML protocol * 1.1 - skipped, (trying to avoid confusion with jfatcat in all vs. all comparisons) * 1.0 - initial release */ public static final String version = "1.5"; public CeCPMain(){ super(); params = new CECPParameters(); } @Override public String getAlgorithmName() { return CeCPMain.algorithmName; } @Override public String getVersion() { return CeCPMain.version; } public static void main(String[] args) throws ConfigurationException { CeCPUserArgumentProcessor processor = new CeCPUserArgumentProcessor(); //Responsible for creating a CeCPMain instance processor.process(args); } /** * Aligns ca1 and ca2 using a heuristic to check for CPs. *

* Aligns ca1 against a doubled ca2, then cleans up the alignment. * @param ca1 * @param ca2 * @param param * @return the alignment, possibly containing a CP. * @throws StructureException */ @Override public AFPChain align(Atom[] ca1, Atom[] ca2, Object param) throws StructureException{ if ( ! (param instanceof CECPParameters)) throw new IllegalArgumentException("CE algorithm needs an object of call CeParameters as argument."); CECPParameters cpparams = (CECPParameters) param; this.params = cpparams; boolean duplicateRight; switch( cpparams.getDuplicationHint() ) { case LEFT: duplicateRight = false; break; case RIGHT: duplicateRight = true; break; case SHORTER: duplicateRight = ca1.length >= ca2.length; break; default: duplicateRight = true; } if( duplicateRight ) { return alignRight(ca1, ca2, cpparams); } else { if(debug) { System.out.println("Swapping alignment order."); } AFPChain afpChain = this.alignRight(ca2, ca1, cpparams); return invertAlignment(afpChain); } } /** * Aligns the structures, duplicating ca2 regardless of * {@link CECPParameters.getDuplicationHint() param.getDuplicationHint}. * @param ca1 * @param ca2 * @param cpparams * @return * @throws StructureException */ private AFPChain alignRight(Atom[] ca1, Atom[] ca2, CECPParameters cpparams) throws StructureException { long startTime = System.currentTimeMillis(); Atom[] ca2m = StructureTools.duplicateCA2(ca2); if(debug) { System.out.format("Duplicating ca2 took %s ms\n",System.currentTimeMillis()-startTime); startTime = System.currentTimeMillis(); } // Do alignment AFPChain afpChain = super.align(ca1, ca2m,params); // since the process of creating ca2m strips the name info away, set it explicitely try { afpChain.setName2(ca2[0].getGroup().getChain().getStructure().getName()); } catch( Exception e) {} if(debug) { System.out.format("Running %dx2*%d alignment took %s ms\n",ca1.length,ca2.length,System.currentTimeMillis()-startTime); startTime = System.currentTimeMillis(); } afpChain = postProcessAlignment(afpChain, ca1, ca2m, calculator, cpparams); if(debug) { System.out.format("Finding CP point took %s ms\n",System.currentTimeMillis()-startTime); startTime = System.currentTimeMillis(); } return afpChain; } /** Circular permutation specific code to be run after the standard CE alignment * * @param afpChain The finished alignment * @param ca1 CA atoms of the first protein * @param ca2m A duplicated copy of the second protein * @param calculator The CECalculator used to create afpChain * @throws StructureException */ public static AFPChain postProcessAlignment(AFPChain afpChain, Atom[] ca1, Atom[] ca2m,CECalculator calculator ) throws StructureException{ return postProcessAlignment(afpChain, ca1, ca2m, calculator, null); } /** Circular permutation specific code to be run after the standard CE alignment * * @param afpChain The finished alignment * @param ca1 CA atoms of the first protein * @param ca2m A duplicated copy of the second protein * @param calculator The CECalculator used to create afpChain * @param param Parameters * @throws StructureException */ public static AFPChain postProcessAlignment(AFPChain afpChain, Atom[] ca1, Atom[] ca2m,CECalculator calculator, CECPParameters param ) throws StructureException{ // remove bottom half of the matrix Matrix doubledMatrix = afpChain.getDistanceMatrix(); // the matrix can be null if the alignment is too short. if ( doubledMatrix != null ) { assert(doubledMatrix.getRowDimension() == ca1.length); assert(doubledMatrix.getColumnDimension() == ca2m.length); Matrix singleMatrix = doubledMatrix.getMatrix(0, ca1.length-1, 0, (ca2m.length/2)-1); assert(singleMatrix.getRowDimension() == ca1.length); assert(singleMatrix.getColumnDimension() == (ca2m.length/2)); afpChain.setDistanceMatrix(singleMatrix); } // Check for circular permutations int alignLen = afpChain.getOptLength(); if ( alignLen > 0) { afpChain = filterDuplicateAFPs(afpChain,calculator,ca1,ca2m,param); } return afpChain; } /** * Swaps the order of structures in an AFPChain * @param a * @return */ public AFPChain invertAlignment(AFPChain a) { String name1 = a.getName1(); String name2 = a.getName2(); a.setName1(name2); a.setName2(name1); int len1 = a.getCa1Length(); a.setCa1Length( a.getCa2Length() ); a.setCa2Length( len1 ); int beg1 = a.getAlnbeg1(); a.setAlnbeg1(a.getAlnbeg2()); a.setAlnbeg2(beg1); char[] alnseq1 = a.getAlnseq1(); a.setAlnseq1(a.getAlnseq2()); a.setAlnseq2(alnseq1); Matrix distab1 = a.getDisTable1(); a.setDisTable1(a.getDisTable2()); a.setDisTable2(distab1); int[] focusRes1 = a.getFocusRes1(); a.setFocusRes1(a.getFocusRes2()); a.setFocusRes2(focusRes1); //What are aftIndex and befIndex used for? How are they indexed? //a.getAfpAftIndex() String[][][] pdbAln = a.getPdbAln(); if( pdbAln != null) { for(int block = 0; block < a.getBlockNum(); block++) { String[] paln1 = pdbAln[block][0]; pdbAln[block][0] = pdbAln[block][1]; pdbAln[block][1] = paln1; } } int[][][] optAln = a.getOptAln(); if( optAln != null ) { for(int block = 0; block < a.getBlockNum(); block++) { int[] aln1 = optAln[block][0]; optAln[block][0] = optAln[block][1]; optAln[block][1] = aln1; } } a.setOptAln(optAln); // triggers invalidate() Matrix distmat = a.getDistanceMatrix(); if(distmat != null) a.setDistanceMatrix(distmat.transpose()); // invert the rotation matrices Matrix[] blockRotMat = a.getBlockRotationMatrix(); Atom[] shiftVec = a.getBlockShiftVector(); if( blockRotMat != null) { for(int block = 0; block < a.getBlockNum(); block++) { if(blockRotMat[block] != null) { // if y=x*A+b, then x=y*inv(A)-b*inv(A) blockRotMat[block] = blockRotMat[block].inverse(); Calc.rotate(shiftVec[block],blockRotMat[block]); shiftVec[block] = Calc.invert(shiftVec[block]); } } } return a; } /** * Takes as input an AFPChain where ca2 has been artificially duplicated. * This raises the possibility that some residues of ca2 will appear in * multiple AFPs. This method filters out duplicates and makes sure that * all AFPs are numbered relative to the original ca2. * *

The current version chooses a CP site such that the length of the * alignment is maximized. * *

This method does not update scores to reflect the filtered alignment. * It does update the RMSD and superposition. * * @param afpChain The alignment between ca1 and ca2-ca2. Blindly assumes * that ca2 has been duplicated. * @return A new AFPChain consisting of ca1 to ca2, with each residue in * at most 1 AFP. * @throws StructureException */ public static AFPChain filterDuplicateAFPs(AFPChain afpChain, CECalculator ceCalc, Atom[] ca1, Atom[] ca2duplicated) throws StructureException { return filterDuplicateAFPs(afpChain, ceCalc, ca1, ca2duplicated, null); } public static AFPChain filterDuplicateAFPs(AFPChain afpChain, CECalculator ceCalc, Atom[] ca1, Atom[] ca2duplicated, CECPParameters params) throws StructureException { AFPChain newAFPChain = new AFPChain(afpChain); if(params == null) params = new CECPParameters(); final int minCPlength = params.getMinCPLength(); int ca2len = afpChain.getCa2Length()/2; newAFPChain.setCa2Length(ca2len); // Fix optimal alignment int[][][] optAln = afpChain.getOptAln(); int[] optLen = afpChain.getOptLen(); int alignLen = afpChain.getOptLength(); if (alignLen < 1) return newAFPChain; assert(afpChain.getBlockNum() == 1); // Assume that CE returns just one block // Determine the region where ca2 and ca2' overlap // The bounds of the alignment wrt ca2-ca2' int nStart = optAln[0][1][0]; //alignment N-terminal int cEnd = optAln[0][1][alignLen-1]; // alignment C-terminal // overlap is between nStart and cEnd int firstRes = nStart; // start res number after trimming int lastRes = nStart+ca2len; // last res number after trimming if(nStart >= ca2len || cEnd < ca2len) { // no circular permutation firstRes=nStart; lastRes=cEnd; } else { // Rule: maximize the length of the alignment int overlapLength = cEnd+1 - nStart - ca2len; if(overlapLength <= 0) { // no overlap! CPRange minCP = calculateMinCP(optAln[0][1], alignLen, ca2len, minCPlength); firstRes=nStart; lastRes=cEnd; // Remove short blocks if(firstRes > minCP.n) { firstRes = ca2len; if(debug) { System.out.format("Discarding n-terminal block as too " + "short (%d residues, needs %d)\n", minCP.mid, minCPlength); } } if(lastRes < minCP.c) { lastRes = ca2len-1; if(debug) { System.out.format("Discarding c-terminal block as too " + "short (%d residues, needs %d)\n", optLen[0] - minCP.mid, minCPlength); } } } else { // overlap! CutPoint cp = calculateCutPoint(optAln[0][1], nStart, cEnd, overlapLength, alignLen, minCPlength, ca2len, firstRes); // Adjust alignment length for trimming //alignLen -= cp.numResiduesCut; //TODO inaccurate firstRes = cp.firstRes; lastRes = cp.lastRes; //TODO Now have CP site, and could do a NxM alignment for further optimization. // For now, does not appear to be worth the 50% increase in time //TODO Bug: scores need to be recalculated } } // Fix numbering: // First, split up the atoms into left and right blocks List< ResiduePair > left = new ArrayList(); // residues from left of duplication List< ResiduePair > right = new ArrayList(); // residues from right of duplication for(int i=0;i= firstRes && optAln[0][1][i] <= lastRes ) { // not trimmed if(optAln[0][1][i] < ca2len) { // in first half of ca2 left.add(new ResiduePair(optAln[0][0][i],optAln[0][1][i])); } else { right.add(new ResiduePair(optAln[0][0][i],optAln[0][1][i]-ca2len)); } } } //assert(left.size()+right.size() == alignLen); alignLen = 0; // Now we don't care about left/right, so just call them "blocks" List> blocks = new ArrayList>(2); if( !left.isEmpty() ) { blocks.add(left); alignLen += left.size(); } if( !right.isEmpty()) { blocks.add(right); alignLen += right.size(); } left=null; right = null; // Put the blocks back together into arrays for the AFPChain int[][][] newAlign = new int[blocks.size()][][]; int[] blockLengths = new int[blocks.size()]; for(int blockNum = 0; blockNum < blocks.size(); blockNum++) { //Alignment List block = blocks.get(blockNum); newAlign[blockNum] = new int[2][block.size()]; for(int i=0;i block:blocks ) { for(ResiduePair pair:block) { atoms1[pos] = ca1[pair.a]; // Clone residue to allow modification Atom atom2 = ca2duplicated[pair.b]; Group g = (Group) atom2.getGroup().clone(); atoms2[pos] = g.getAtom( atom2.getName() ); pos++; } } assert(pos == alignLen); // Sets the rotation matrix in ceCalc to the proper value double rmsd = -1; double tmScore = 0.; double[] blockRMSDs = new double[blocks.size()]; Matrix[] blockRotationMatrices = new Matrix[blocks.size()]; Atom[] blockShifts = new Atom[blocks.size()]; if(alignLen>0) { // superimpose SVDSuperimposer svd = new SVDSuperimposer(atoms1, atoms2); Matrix matrix = svd.getRotation(); Atom shift = svd.getTranslation(); for( Atom a : atoms2 ) { Calc.rotate(a.getGroup(), matrix); Calc.shift(a, shift); } //and get overall rmsd rmsd = SVDSuperimposer.getRMS(atoms1, atoms2); tmScore = SVDSuperimposer.getTMScore(atoms1, atoms2, ca1.length, ca2len); // set all block rotations to the overall rotation // It's not well documented if this is the expected behavior, but // it seems to work. blockRotationMatrices[0] = matrix; blockShifts[0] = shift; blockRMSDs[0] = -1; for(int i=1;i=0;i--) { // i starts at the c-term and increases to the left if(block[alignPos] == cEnd - overlapLength+1 + i) { // matches the aligned pair // the c-term contains the -ith overlapping residue cTermResCount[i] = cTermResCount[i+1]+1; alignPos--; } else { cTermResCount[i] = cTermResCount[i+1]; } } return cTermResCount; } private static int[] countNtermResidues(int[] block, int nStart, int overlapLength) { int[] nTermResCount = new int[overlapLength+1]; // increases monotonically nTermResCount[0] = 0; int alignPos = 0; // index of the next aligned pair for(int i=1;i<=overlapLength;i++) { if(block[alignPos] == nStart + i-1 ) { // matches the aligned pair // the n-term contains the ith overlapping residue nTermResCount[i] = nTermResCount[i-1]+1; alignPos++; } else { nTermResCount[i] = nTermResCount[i-1]; } } return nTermResCount; } /** * A light class to store an alignment between two residues. * @author Spencer Bliven * @see #filterDuplicateAFPs() */ private static class ResiduePair { public int a; public int b; public ResiduePair(int a, int b) { this.a=a; this.b=b; } @Override public String toString() { return a+":"+b; } } /** * Tiny wrapper for the disallowed regions of an alignment. * @see CeCPMain#calculateMinCP(int[], int, int, int) * @author Spencer Bliven * */ protected static class CPRange { /** * last allowed n-term */ public int n; /** * midpoint of the alignment */ public int mid; /** * first allowed c-term */ public int c; } /** * Finds the alignment index of the residues minCPlength before and after * the duplication. * * @param block The permuted block being considered, generally optAln[0][1] * @param blockLen The length of the block (in case extra memory was allocated in block) * @param ca2len The length, in residues, of the protein specified by block * @param minCPlength The minimum number of residues allowed for a CP * @return a CPRange with the following components: *

n
Index into block of the residue such that * minCPlength residues remain to the end of ca2len, * or -1 if no residue fits that criterium.
*
mid
Index of the first residue higher than ca2len.
*
c
Index of minCPlength-th residue after ca2len, * or ca2len*2 if no residue fits that criterium.
*
*/ protected static CPRange calculateMinCP(int[] block, int blockLen, int ca2len, int minCPlength) { CPRange range = new CPRange(); // Find the cut point within the alignment. // Either returns the index i of the alignment such that block[i] == ca2len, // or else returns -i-1 where block[i] is the first element > ca2len. int middle = Arrays.binarySearch(block, ca2len); if(middle < 0) { middle = -middle -1; } // Middle is now the first res after the duplication range.mid = middle; int minCPntermIndex = middle-minCPlength; if(minCPntermIndex >= 0) { range.n = block[minCPntermIndex]; } else { range.n = -1; } int minCPctermIndex = middle+minCPlength-1; if(minCPctermIndex < blockLen) { range.c = block[minCPctermIndex]; } else { range.c = ca2len*2; } // Stub: // Best-case: assume all residues in the termini are aligned //range.n = ca2len - minCPlength; //range.c = ca2len + minCPlength-1; return range; } private static class CutPoint { public int numResiduesCut; public int firstRes; public int lastRes; } private static CutPoint calculateCutPoint(int[] block,int nStart, int cEnd, int overlapLength, int alignLen, int minCPlength, int ca2len, int firstRes) { // We require at least minCPlength residues in a block. //TODO calculate these explicitely based on the alignment // The last valid n-term CPRange minCP = calculateMinCP(block, alignLen, ca2len, minCPlength); int minCPnterm = minCP.n; // The first valid c-term int minCPcterm = minCP.c; // # res at or to the left of i within the overlap int[] nTermResCount = countNtermResidues(block, nStart, overlapLength); // Determine the position with the largest sum of lengths int[] cTermResCount = countCtermResidues(block, alignLen, cEnd, overlapLength); // Alignment length for a cut at the left of the overlap int maxResCount=-1; for(int i=0;i<=overlapLength;i++) { // i is the position of the CP within the overlap region // Calculate number of residues which remain after the CP int nRemain,cRemain; if(nStart+i <= minCPnterm) { nRemain = nTermResCount[overlapLength]-nTermResCount[i]; } else { nRemain = 0; } if(cEnd-overlapLength+i >= minCPcterm) { cRemain = cTermResCount[0] - cTermResCount[i]; } else { cRemain = 0; } // Look for the cut point which cuts off the minimum number of res if(nRemain + cRemain > maxResCount ) { // '>' biases towards keeping the n-term maxResCount = nRemain + cRemain; firstRes = nStart+ i; } } // Calculate the number of residues cut within the overlap int numResiduesCut = nTermResCount[overlapLength]+cTermResCount[0]-maxResCount; // Remove short blocks if(firstRes > minCPnterm) { // update number of residues cut for those outside the overlap numResiduesCut += 0; //TODO firstRes = ca2len; } int lastRes = firstRes+ca2len-1; if(lastRes < minCPcterm) { // update number of residues cut for those outside the overlap numResiduesCut += 0; //TODO lastRes = ca2len-1; } CutPoint cp = new CutPoint(); cp.firstRes=firstRes; cp.numResiduesCut = numResiduesCut; cp.lastRes = lastRes; if(debug) { System.out.format("Found a CP at residue %d. Trimming %d aligned residues from %d-%d of block 0 and %d-%d of block 1.\n", firstRes,cp.numResiduesCut,nStart,firstRes-1,firstRes, cEnd-ca2len); } return cp; } // try showing a GUI // requires additional dependencies biojava-structure-gui and JmolApplet // TODO dmyersturnbull: This should probably be in structure-gui @SuppressWarnings("unused") private static void displayAlignment(AFPChain afpChain, Atom[] ca1, Atom[] ca2) throws ClassNotFoundException, NoSuchMethodException, InvocationTargetException, IllegalAccessException, StructureException { Atom[] ca1clone = StructureTools.cloneAtomArray(ca1); Atom[] ca2clone = StructureTools.cloneAtomArray(ca2); if (! GuiWrapper.isGuiModuleInstalled()) { System.err.println("The biojava-structure-gui and/or JmolApplet modules are not installed. Please install!"); // display alignment in console System.out.println(afpChain.toCE(ca1clone, ca2clone)); } else { Object jmol = GuiWrapper.display(afpChain,ca1clone,ca2clone); GuiWrapper.showAlignmentImage(afpChain, ca1clone,ca2clone,jmol); } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy