org.biojava.nbio.structure.symmetry.internal.SequenceFunctionRefiner Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of biojava-structure Show documentation
Show all versions of biojava-structure Show documentation
The protein structure modules of BioJava.
/*
* BioJava development code
*
* This code may be freely distributed and modified under the
* terms of the GNU Lesser General Public Licence. This should
* be distributed with the code. If you do not have a copy,
* see:
*
* http://www.gnu.org/copyleft/lesser.html
*
* Copyright for this code is held jointly by the individual
* authors. These should be listed in @author doc comments.
*
* For more information on the BioJava project and its aims,
* or to join the biojava-l mailing list, visit the home page
* at:
*
* http://www.biojava.org/
*
*/
package org.biojava.nbio.structure.symmetry.internal;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.NavigableSet;
import java.util.TreeSet;
import org.biojava.nbio.structure.Atom;
import org.biojava.nbio.structure.StructureException;
import org.biojava.nbio.structure.align.model.AFPChain;
import org.biojava.nbio.structure.align.multiple.MultipleAlignment;
import org.biojava.nbio.structure.align.util.AlignmentTools;
import org.biojava.nbio.structure.symmetry.utils.SymmetryTools;
/**
* Creates a refined alignment with the CE-Symm alternative self-alignment.
* Needs the order of symmetry and assumes that the last repeat aligns
* with the first, being thus a CLOSE symmetry.
*
* @author Spencer Bliven
* @author Aleix Lafita
* @since 4.2.0
*
*/
public class SequenceFunctionRefiner implements SymmetryRefiner {
@Override
public MultipleAlignment refine(AFPChain selfAlignment, Atom[] atoms,
int order) throws RefinerFailedException, StructureException {
if (order < 2) throw new RefinerFailedException(
"Symmetry not found in the structure: order < 2.");
AFPChain afp = refineSymmetry(selfAlignment, atoms, atoms, order);
return SymmetryTools.fromAFP(afp, atoms);
}
/**
* Refines a CE-Symm alignment so that it is perfectly symmetric.
*
* The resulting alignment will have a one-to-one correspondance between
* aligned residues of each symmetric part.
*
* @param afpChain Input alignment from CE-Symm
* @param k Symmetry order. This can be guessed by {@link CeSymm#getSymmetryOrder(AFPChain)}
* @return The refined alignment
* @throws StructureException
* @throws RefinerFailedException
*/
public static AFPChain refineSymmetry(AFPChain afpChain, Atom[] ca1, Atom[] ca2, int k)
throws StructureException, RefinerFailedException {
// Transform alignment to a Map
Map alignment = AlignmentTools.alignmentAsMap(afpChain);
// Refine the alignment Map
Map refined = refineSymmetry(alignment, k);
if (refined.size() < 1)
throw new RefinerFailedException("Refiner returned empty alignment");
//Substitute and partition the alignment
try {
AFPChain refinedAFP = AlignmentTools.replaceOptAln(afpChain, ca1, ca2, refined);
refinedAFP = partitionAFPchain(refinedAFP, ca1, ca2, k);
if (refinedAFP.getOptLength() < 1)
throw new IndexOutOfBoundsException("Refined alignment is empty.");
return refinedAFP;
} catch (IndexOutOfBoundsException e){
// This Exception is thrown when the refined alignment is not consistent
throw new RefinerFailedException("Refiner failure: non-consistent result", e);
}
}
/**
* Refines a CE-Symm alignment so that it is perfectly symmetric.
*
* The resulting alignment will have a one-to-one correspondance between
* aligned residues of each symmetric part.
* @param alignment The input alignment, as a map. This will be modified.
* @param k Symmetry order. This can be guessed by {@link CeSymm#getSymmetryOrder(AFPChain)}
* @return A modified map with the refined alignment
* @throws StructureException
*/
public static Map refineSymmetry(Map alignment,int k) throws StructureException {
// Store scores
Map scores = null;
scores = initializeScores(alignment,scores, k);
// Store eligible residues
// Eligible if:
// 1. score(x)>0
// 2. f^K-1(x) is defined
// 3. score(f^K-1(x))>0
TreeSet forwardLoops = new TreeSet<>();
TreeSet backwardLoops = new TreeSet<>();
List eligible = null;
eligible = initializeEligible(alignment,scores,eligible,k,forwardLoops,backwardLoops);
/* For future heap implementation
Comparator scoreComparator = new Comparator() {
@Override public int compare(Integer o1, Integer o2) {
if(scores.containsKey(o1)) {
if(scores.containsKey(o2)) {
// If both have defined scores, compare the scores
return scores.get(o1).compareTo(scores.get(o2));
} else {
// o2 has infinite score, so o1 < o2
return -1;
}
} else {
//o1 has infinite score
if(scores.containsKey(o2)) {
// o1 > o2
return 1;
} else {
//both undefined
return 0;
}
}
}
};
PriorityQueue heap = new PriorityQueue(alignment.size(), scoreComparator);
*/
//int step = 0;
while (!eligible.isEmpty()) {
//System.out.format("Step %d: %s%n", ++step, AlignmentTools.toConciseAlignmentString(alignment));
// Find eligible residue with lowest scores
Integer bestRes = null;
double bestResScore = Double.POSITIVE_INFINITY;
for(Integer res : eligible) {
Double score = scores.get(res);
if (score != null && score < bestResScore) {
bestResScore = score;
bestRes = res;
}
}
// Find f^k-1(bestRes)
Integer resK1 = bestRes;
for (int i = 0; i < k - 1; i++) {
assert (resK1 != null);
resK1 = alignment.get(resK1);
// Update scores
scores.put(resK1, 0.0);
}
scores.put(bestRes, 0.0);
// Modify alignment
alignment.put(resK1, bestRes);
scores = initializeScores(alignment, scores, k);
Map virginScores = initializeScores(alignment, null, k);
if (scores.size() != virginScores.size()) {
System.out.println("Size missmatch");
} else {
for (Integer key : scores.keySet()) {
if (!virginScores.containsKey(key) || !scores.get(key).equals(virginScores.get(key))) {
System.out.format("Mismatch %d -> %f/%f%n", key, scores.get(key), virginScores.get(key));
}
}
}
// Update eligible
// TODO only update residues which could become ineligible
eligible = initializeEligible(alignment, scores, eligible, k, forwardLoops, backwardLoops);
// System.out.format("Modifying %d -> %d. %d now eligible.%n", resK1,bestRes,eligible.size());
}
//System.out.format("Step %d: %s%n", ++step, AlignmentTools.toConciseAlignmentString(alignment));
// Remove remaining edges
Iterator alignmentIt = alignment.keySet().iterator();
while (alignmentIt.hasNext()) {
Integer res = alignmentIt.next();
Double score = scores.get(res);
if (score == null || score > 0.0) {
alignmentIt.remove();
}
}
//System.out.format("Step %d: %s%n", ++step, AlignmentTools.toConciseAlignmentString(alignment));
return alignment;
}
/**
* Helper method to initialize eligible residues.
*
* Eligible if:
* 1. score(x)>0
* 2. f^K-1(x) is defined
* 3. score(f^K-1(x))>0
* 4. For all y, score(y)==0 implies sign(f^K-1(x)-y) == sign(x-f(y) )
* @param alignment The alignment with respect to which to calculate eligibility
* @param scores An up-to-date map from residues to their scores
* @param eligible Starting list of eligible residues. If null will be generated.
* @param k
* @param backwardLoops
* @param forwardLoops
* @return eligible after modification
*/
private static List initializeEligible(Map alignment,
Map scores, List eligible, int k, NavigableSet forwardLoops, NavigableSet backwardLoops) {
// Eligible if:
// 1. score(x)>0
// 2. f^K-1(x) is defined
// 3. score(f^K-1(x))>0
// 4. For all y, score(y)==0 implies sign(f^K-1(x)-y) == sign(x-f(y) )
// 5. Not in a loop of length less than k
// Assume all residues are eligible to start
if(eligible == null) {
eligible = new LinkedList<>(alignment.keySet());
}
// Precalculate f^K-1(x)
// Map alignK1 = AlignmentTools.applyAlignment(alignment, k-1);
Map alignK1 = applyAlignmentAndCheckCycles(alignment, k - 1, eligible);
// Remove ineligible residues
Iterator eligibleIt = eligible.iterator();
while(eligibleIt.hasNext()) {
Integer res = eligibleIt.next();
// 2. f^K-1(x) is defined
if(!alignK1.containsKey(res)) {
eligibleIt.remove();
continue;
}
Integer k1 = alignK1.get(res);
if(k1 == null) {
eligibleIt.remove();
continue;
}
// 1. score(x)>0
Double score = scores.get(res);
if(score == null || score <= 0.0) {
eligibleIt.remove();
// res is in a loop. Add it to the proper set
if(res <= alignment.get(res)) {
//forward
forwardLoops.add(res);
} else {
//backward
backwardLoops.add(res);
}
continue;
}
// 3. score(f^K-1(x))>0
Double scoreK1 = scores.get(k1);
if(scoreK1 == null || scoreK1 <= 0.0) {
eligibleIt.remove();
continue;
}
}
// Now that loops are up-to-date, check for loop crossings
eligibleIt = eligible.iterator();
while(eligibleIt.hasNext()) {
Integer res = eligibleIt.next();
//4. For all y, score(y)==0 implies sign(f^K-1(x)-y) == sign(x-f(y) )
//Test equivalent: All loop edges should be properly ordered wrt edge f^k-1(x) -> x
Integer src = alignK1.get(res);
if( src < res ) {
//forward
// get interval [a,b) containing res
Integer a = forwardLoops.floor(src);
Integer b = forwardLoops.higher(src);
// Ineligible unless f(a) < res < f(b)
if(a != null && alignment.get(a) > res ) {
eligibleIt.remove();
continue;
}
if(b != null && alignment.get(b) < res ) {
eligibleIt.remove();
continue;
}
}
}
return eligible;
}
/**
* Like {@link AlignmentTools#applyAlignment(Map, int)}, returns a map of k applications of alignmentMap. However,
* it also sets loops of size less than k as ineligible.
*
* @param alignmentMap
* f(x)
* @param k
* @param eligible
* Eligible residues. Residues from small cycles are removed.
* @return f^k(x)
*/
private static Map applyAlignmentAndCheckCycles(Map alignmentMap, int k, List eligible) {
// Convert to lists to establish a fixed order (avoid concurrent modification)
List preimage = new ArrayList<>(alignmentMap.keySet()); // currently unmodified
List image = new ArrayList<>(preimage);
for (int n = 1; n <= k; n++) {
// apply alignment
for (int i = 0; i < image.size(); i++) {
final Integer pre = image.get(i);
final Integer post = (pre == null ? null : alignmentMap.get(pre));
image.set(i, post);
// Make cycles ineligible
if (post != null && post.equals(preimage.get(i))) {
eligible.remove(preimage.get(i)); // Could be O(n) with List impl
}
}
}
Map imageMap = new HashMap<>(alignmentMap.size());
// now populate with actual values
for (int i = 0; i < preimage.size(); i++) {
Integer pre = preimage.get(i);
Integer postK = image.get(i);
imageMap.put(pre, postK);
}
return imageMap;
}
/**
* Calculates all scores for an alignment
* @param alignment
* @param scores A mapping from residues to scores, which will be updated or
* created if null
* @return scores
*/
private static Map initializeScores(Map alignment,
Map scores, int k) {
if(scores == null) {
scores = new HashMap<>(alignment.size());
} else {
scores.clear();
}
Map alignK = AlignmentTools.applyAlignment(alignment, k);
// calculate input range
int maxPre = Integer.MIN_VALUE;
int minPre = Integer.MAX_VALUE;
for(Integer pre : alignment.keySet()) {
if(pre>maxPre) maxPre = pre;
if(pre 0)
error += (double)(pre-minPre)/(1+maxPre-minPre);
return error;
}
/**
* Partitions an afpChain alignment into order blocks of aligned residues.
*
* @param afpChain
* @param ca1
* @param ca2
* @param order
* @return
* @throws StructureException
*/
private static AFPChain partitionAFPchain(AFPChain afpChain,
Atom[] ca1, Atom[] ca2, int order) throws StructureException{
int[][][] newAlgn = new int[order][][];
int repeatLen = afpChain.getOptLength()/order;
//Extract all the residues considered in the first chain of the alignment
List alignedRes = new ArrayList<>();
for (int su=0; su