org.biojava.nbio.structure.align.ce.OptimalCECPMain Maven / Gradle / Ivy
Show all versions of biojava-structure Show documentation
/*
* BioJava development code
*
* This code may be freely distributed and modified under the
* terms of the GNU Lesser General Public Licence. This should
* be distributed with the code. If you do not have a copy,
* see:
*
* http://www.gnu.org/copyleft/lesser.html
*
* Copyright for this code is held jointly by the individual
* authors. These should be listed in @author doc comments.
*
* For more information on the BioJava project and its aims,
* or to join the biojava-l mailing list, visit the home page
* at:
*
* http://www.biojava.org/
*
* Created on Mar 9, 2010
* Author: Spencer Bliven
*
*/
package org.biojava.nbio.structure.align.ce;
import org.biojava.nbio.structure.Atom;
import org.biojava.nbio.structure.StructureException;
import org.biojava.nbio.structure.StructureTools;
import org.biojava.nbio.structure.align.model.AFPChain;
import org.biojava.nbio.structure.align.util.AFPChainScorer;
import org.biojava.nbio.structure.align.util.AtomCache;
import org.biojava.nbio.structure.jama.Matrix;
import java.lang.reflect.InvocationTargetException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Scanner;
/**
* A wrapper for {@link CeMain} which sets default parameters to be appropriate for finding
* circular permutations.
*
* A circular permutation consists of a single cleavage point and rearrangement
* between two structures, for example:
*
* ABCDEFG
* DEFGABC
*
* @author Spencer Bliven.
*
*/
public class OptimalCECPMain extends CeMain {
private static final boolean debug = true;
public static final String algorithmName = "jCE Optimal Circular Permutation";
/**
* version history:
* 1.0 - Initial version
*/
public static final String version = "1.0";
protected OptimalCECPParameters params;
/**
*
*/
public OptimalCECPMain() {
super();
params = new OptimalCECPParameters();
}
@Override
public String getAlgorithmName() {
return OptimalCECPMain.algorithmName;
}
@Override
public String getVersion() {
return OptimalCECPMain.version;
}
/**
* @return an {@link OptimalCECPParameters} object
*/
@Override
public ConfigStrucAligParams getParameters() {
return params;
}
/**
* @param params Should be an {@link OptimalCECPParameters} object specifying alignment options
*/
@Override
public void setParameters(ConfigStrucAligParams params){
if (! (params instanceof OptimalCECPParameters )){
throw new IllegalArgumentException("provided parameter object is not of type CeParameter");
}
this.params = (OptimalCECPParameters) params;
}
/**
* Circularly permutes arr in place.
*
* Similar to {@link Collections#rotate(List, int)} but with reversed
* direction. Perhaps it would be more efficient to use the Collections version?
* @param
* @param arr The array to be permuted
* @param cp The number of residues to shift leftward, or equivalently, the index of
* the first element after the permutation point.
*/
private static void permuteArray(T[] arr, int cp) {
// Allow negative cp points for convenience.
if(cp == 0) {
return;
}
if(cp < 0) {
cp = arr.length+cp;
}
if(cp < 0 || cp >= arr.length) {
throw new ArrayIndexOutOfBoundsException(
"Permutation point ("+cp+") must be between -ca2.length and ca2.length-1" );
}
List temp = new ArrayList<>(cp);
// shift residues left
for(int i=0;iSimilar to {@link Collections#rotate(List, int)} but with reversed
* direction. Perhaps it would be more efficient to use the Collections version?
* @param
* @param arr The array to be permuted
* @param cp The number of residues to shift leftward, or equivalently, the index of
* the first element after the permutation point.
*
private static void permuteArray(int[] arr, int cp) {
// Allow negative cp points for convenience.
if(cp == 0) {
return;
}
if(cp < 0) {
cp = arr.length+cp;
}
if(cp < 0 || cp >= arr.length) {
throw new ArrayIndexOutOfBoundsException(
"Permutation point ("+cp+") must be between -ca2.length and ca2.length-1" );
}
List temp = new ArrayList(cp);
// shift residues left
for(int i=0;icp residues.
* WARNING: Modifies ca2 during the permutation. Be sure
* to make a copy before calling this method.
*
* @param ca1
* @param ca2
* @param param
* @param cp
* @return
* @throws StructureException
*/
public AFPChain alignPermuted(Atom[] ca1, Atom[] ca2, Object param, int cp) throws StructureException {
// initial permutation
permuteArray(ca2,cp);
// perform alignment
AFPChain afpChain = super.align(ca1, ca2, param);
// un-permute alignment
permuteAFPChain(afpChain, -cp);
if(afpChain.getName2() != null) {
afpChain.setName2(afpChain.getName2()+" CP="+cp);
}
// Specify the permuted
return afpChain;
}
/**
* Permute the second protein of afpChain by the specified number of residues.
* @param afpChain Input alignment
* @param cp Amount leftwards (or rightward, if negative) to shift the
* @return A new alignment equivalent to afpChain after the permutations
*/
private static void permuteAFPChain(AFPChain afpChain, int cp) {
int ca2len = afpChain.getCa2Length();
//fix up cp to be positive
if(cp == 0) {
return;
}
if(cp < 0) {
cp = ca2len+cp;
}
if(cp < 0 || cp >= ca2len) {
throw new ArrayIndexOutOfBoundsException(
"Permutation point ("+cp+") must be between -ca2.length and ca2.length-1" );
}
// Fix up optAln
permuteOptAln(afpChain,cp);
if(afpChain.getBlockNum() > 1)
afpChain.setSequentialAlignment(false);
// fix up matrices
// ca1 corresponds to row indices, while ca2 corresponds to column indices.
afpChain.setDistanceMatrix(permuteMatrix(afpChain.getDistanceMatrix(),0,-cp));
// this is square, so permute both
afpChain.setDisTable2(permuteMatrix(afpChain.getDisTable2(),-cp,-cp));
//TODO fix up other AFP parameters?
}
/**
* Permutes mat by moving the rows of the matrix upwards by cp
* rows.
* @param mat The original matrix
* @param cpRows Number of rows upward to move entries
* @param cpCols Number of columns leftward to move entries
* @return The permuted matrix
*/
private static Matrix permuteMatrix(Matrix mat, int cpRows, int cpCols) {
//fix up cp to be positive
if(cpRows == 0 && cpCols == 0) {
return mat.copy();
}
if(cpRows < 0) {
cpRows = mat.getRowDimension()+cpRows;
}
if(cpRows < 0 || cpRows >= mat.getRowDimension()) {
throw new ArrayIndexOutOfBoundsException( String.format(
"Can't permute rows by %d: only %d rows.",
cpRows, mat.getRowDimension() )
);
}
if(cpCols < 0) {
cpCols = mat.getColumnDimension()+cpCols;
}
if(cpCols < 0 || cpCols >= mat.getColumnDimension()) {
throw new ArrayIndexOutOfBoundsException( String.format(
"Can't permute cols by %d: only %d rows.",
cpCols, mat.getColumnDimension() )
);
}
int[] rows = new int[mat.getRowDimension()];
for(int i=0;i(i-cp)%len
*
* @param afpChain
* @param cp Amount leftwards (or rightward, if negative) to shift the
*/
private static void permuteOptAln(AFPChain afpChain, int cp)
{
int ca2len = afpChain.getCa2Length();
if( ca2len <= 0) {
throw new IllegalArgumentException("No Ca2Length specified in "+afpChain);
}
// Allow negative cp points for convenience.
if(cp == 0) {
return;
}
if(cp <= -ca2len || cp >= ca2len) {
// could just take cp%ca2len, but probably its a bug if abs(cp)>=ca2len
throw new ArrayIndexOutOfBoundsException( String.format(
"Permutation point %d must be between %d and %d for %s",
cp, 1-ca2len,ca2len-1, afpChain.getName2() ) );
}
if(cp < 0) {
cp = cp + ca2len;
}
// the unprocessed alignment
int[][][] optAln = afpChain.getOptAln();
int[] optLen = afpChain.getOptLen();
// the processed alignment
List>> blocks = new ArrayList<>(afpChain.getBlockNum()*2);
//Update residue indices
// newi = (oldi-cp) % N
for(int block = 0; block < afpChain.getBlockNum(); block++) {
if(optLen[block]<1)
continue;
// set up storage for the current block
List> currBlock = new ArrayList<>(2);
currBlock.add( new ArrayList());
currBlock.add( new ArrayList());
blocks.add(currBlock);
// pos = 0 case
currBlock.get(0).add( optAln[block][0][0] );
currBlock.get(1).add( (optAln[block][1][0]+cp ) % ca2len);
for(int pos = 1; pos < optLen[block]; pos++) {
//check if we need to start a new block
//this happens when the new alignment crosses the protein terminus
if( optAln[block][1][pos-1]+cp= ca2len) {
currBlock = new ArrayList<>(2);
currBlock.add( new ArrayList());
currBlock.add( new ArrayList());
blocks.add(currBlock);
}
currBlock.get(0).add( optAln[block][0][pos] );
currBlock.get(1).add( (optAln[block][1][pos]+cp ) % ca2len);
}
}
// save permuted blocks to afpChain
assignOptAln(afpChain,blocks);
}
/**
* Sometimes it's convenient to store an alignment using java collections,
* where blocks.get(blockNum).get(0).get(pos) specifies the aligned
* residue at position pos of block blockNum of the first
* protein.
*
* This method takes such a collection and stores it into the afpChain's
* {@link AFPChain#setOptAln(int[][][]) optAln}, setting the associated
* length variables as well.
*
* @param afpChain
* @param blocks
*/
private static void assignOptAln(AFPChain afpChain, List>> blocks)
{
int[][][] optAln = new int[blocks.size()][][];
int[] optLen = new int[blocks.size()];
int optLength = 0;
int numBlocks = blocks.size();
for(int block = 0; block < numBlocks; block++) {
// block should be 2xN rectangular
assert(blocks.get(block).size() == 2);
assert( blocks.get(block).get(0).size() == blocks.get(block).get(1).size());
optLen[block] = blocks.get(block).get(0).size();
optLength+=optLen[block];
optAln[block] = new int[][] {
new int[optLen[block]],
new int[optLen[block]]
};
for(int pos = 0; pos < optLen[block]; pos++) {
optAln[block][0][pos] = blocks.get(block).get(0).get(pos);
optAln[block][1][pos] = blocks.get(block).get(1).get(pos);
}
}
afpChain.setBlockNum(numBlocks);
afpChain.setOptAln(optAln);
afpChain.setOptLen(optLen);
afpChain.setOptLength(optLength);
// TODO I don't know what these do. Should they be set?
//afpChain.setBlockSize(blockSize);
//afpChain.setBlockResList(blockResList);
//afpChain.setChainLen(chainLen);
}
/**
* Finds the optimal alignment between two proteins allowing for a circular
* permutation (CP).
*
* The precise algorithm is controlled by the
* {@link OptimalCECPParameters parameters}. If the parameter
* {@link OptimalCECPParameters#isTryAllCPs() tryAllCPs} is true, all possible
* CP sites are tried and the optimal site is returned. Otherwise, the
* {@link OptimalCECPParameters#getCPPoint() cpPoint} parameter is used to
* determine the CP point, greatly reducing the computation required.
*
* @param ca1 CA atoms of the first protein
* @param ca2 CA atoms of the second protein
* @param param {@link CeParameters} object
* @return The best-scoring alignment
* @throws StructureException
*
* @see #alignOptimal(Atom[], Atom[], Object, AFPChain[])
*/
@Override
public AFPChain align(Atom[] ca1, Atom[] ca2, Object param)
throws StructureException
{
if(params.isTryAllCPs()) {
return alignOptimal(ca1,ca2,param,null);
} else {
int cpPoint = params.getCPPoint();
return alignPermuted(ca1, ca2, param, cpPoint);
}
}
/**
* Finds the optimal alignment between two proteins allowing for a circular
* permutation (CP).
*
* This algorithm performs a CE alignment for each possible CP site. This is
* quite slow. Use {@link #alignHeuristic(Atom[], Atom[], Object)} for a
* faster algorithm.
*
* @param ca1 CA atoms of the first protein
* @param ca2 CA atoms of the second protein
* @param param {@link CeParameters} object
* @param alignments If not null, should be an empty array of the same length as
* ca2. This will be filled with the alignments from permuting ca2 by
* 0 to n-1 residues.
* @return The best-scoring alignment
* @throws StructureException
*/
public AFPChain alignOptimal(Atom[] ca1, Atom[] ca2, Object param, AFPChain[] alignments)
throws StructureException
{
long startTime = System.currentTimeMillis();
if(alignments != null && alignments.length != ca2.length) {
throw new IllegalArgumentException("scores param should have same length as ca2");
}
AFPChain unaligned = super.align(ca1, ca2, param);
AFPChain bestAlignment = unaligned;
if(debug) {
// print progress bar header
System.out.print("|");
for(int cp=1;cprequires additional dependencies biojava-structure-gui and JmolApplet
*
* @param afpChain
* @param ca1
* @param ca2
* @throws ClassNotFoundException
* @throws NoSuchMethodException
* @throws InvocationTargetException
* @throws IllegalAccessException
* @throws StructureException
*/
private static void displayAlignment(AFPChain afpChain, Atom[] ca1, Atom[] ca2) throws ClassNotFoundException, NoSuchMethodException, InvocationTargetException, IllegalAccessException, StructureException {
Atom[] ca1clone = StructureTools.cloneAtomArray(ca1);
Atom[] ca2clone = StructureTools.cloneAtomArray(ca2);
if (! GuiWrapper.isGuiModuleInstalled()) {
System.err.println("The biojava-structure-gui and/or JmolApplet modules are not installed. Please install!");
// display alignment in console
System.out.println(afpChain.toCE(ca1clone, ca2clone));
} else {
Object jmol = GuiWrapper.display(afpChain,ca1clone,ca2clone);
GuiWrapper.showAlignmentImage(afpChain, ca1clone,ca2clone,jmol);
}
}
}