All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.biojava.nbio.structure.symmetry.internal.SequenceFunctionRefiner Maven / Gradle / Ivy

The newest version!
/*
 *                    BioJava development code
 *
 * This code may be freely distributed and modified under the
 * terms of the GNU Lesser General Public Licence.  This should
 * be distributed with the code.  If you do not have a copy,
 * see:
 *
 *      http://www.gnu.org/copyleft/lesser.html
 *
 * Copyright for this code is held jointly by the individual
 * authors.  These should be listed in @author doc comments.
 *
 * For more information on the BioJava project and its aims,
 * or to join the biojava-l mailing list, visit the home page
 * at:
 *
 *      http://www.biojava.org/
 *
 */
package org.biojava.nbio.structure.symmetry.internal;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.NavigableSet;
import java.util.TreeSet;

import org.biojava.nbio.structure.Atom;
import org.biojava.nbio.structure.StructureException;
import org.biojava.nbio.structure.align.model.AFPChain;
import org.biojava.nbio.structure.align.multiple.MultipleAlignment;
import org.biojava.nbio.structure.align.util.AlignmentTools;
import org.biojava.nbio.structure.symmetry.utils.SymmetryTools;

/**
 * Creates a refined alignment with the CE-Symm alternative self-alignment.
 * Needs the order of symmetry and assumes that the last repeat aligns
 * with the first, being thus a CLOSE symmetry.
 *
 * @author Spencer Bliven
 * @author Aleix Lafita
 * @since 4.2.0
 *
 */
public class SequenceFunctionRefiner implements SymmetryRefiner {

	@Override
	public MultipleAlignment refine(AFPChain selfAlignment, Atom[] atoms,
			int order) throws RefinerFailedException, StructureException {

		if (order < 2)	throw new RefinerFailedException(
				"Symmetry not found in the structure: order < 2.");

		AFPChain afp = refineSymmetry(selfAlignment, atoms, atoms, order);
		return SymmetryTools.fromAFP(afp, atoms);
	}

	/**
	 * Refines a CE-Symm alignment so that it is perfectly symmetric.
	 * 

* The resulting alignment will have a one-to-one correspondance between * aligned residues of each symmetric part. * * @param afpChain Input alignment from CE-Symm * @param k Symmetry order. This can be guessed by {@link AlignmentTools#getSymmetryOrder(Map, Map, int, float)} * @return The refined alignment * @throws StructureException * @throws RefinerFailedException */ public static AFPChain refineSymmetry(AFPChain afpChain, Atom[] ca1, Atom[] ca2, int k) throws StructureException, RefinerFailedException { // Transform alignment to a Map Map alignment = AlignmentTools.alignmentAsMap(afpChain); // Refine the alignment Map Map refined = refineSymmetry(alignment, k); if (refined.size() < 1) throw new RefinerFailedException("Refiner returned empty alignment"); //Substitute and partition the alignment try { AFPChain refinedAFP = AlignmentTools.replaceOptAln(afpChain, ca1, ca2, refined); refinedAFP = partitionAFPchain(refinedAFP, ca1, ca2, k); if (refinedAFP.getOptLength() < 1) throw new IndexOutOfBoundsException("Refined alignment is empty."); return refinedAFP; } catch (IndexOutOfBoundsException e){ // This Exception is thrown when the refined alignment is not consistent throw new RefinerFailedException("Refiner failure: non-consistent result", e); } } /** * Refines a CE-Symm alignment so that it is perfectly symmetric. *

* The resulting alignment will have a one-to-one correspondance between * aligned residues of each symmetric part. * @param alignment The input alignment, as a map. This will be modified. * @param k Symmetry order. This can be guessed by {@link AlignmentTools#getSymmetryOrder(Map, Map, int, float)} * @return A modified map with the refined alignment * @throws StructureException */ public static Map refineSymmetry(Map alignment,int k) throws StructureException { // Store scores Map scores = null; scores = initializeScores(alignment,scores, k); // Store eligible residues // Eligible if: // 1. score(x)>0 // 2. f^K-1(x) is defined // 3. score(f^K-1(x))>0 TreeSet forwardLoops = new TreeSet<>(); TreeSet backwardLoops = new TreeSet<>(); List eligible = null; eligible = initializeEligible(alignment,scores,eligible,k,forwardLoops,backwardLoops); /* For future heap implementation Comparator scoreComparator = new Comparator() { @Override public int compare(Integer o1, Integer o2) { if(scores.containsKey(o1)) { if(scores.containsKey(o2)) { // If both have defined scores, compare the scores return scores.get(o1).compareTo(scores.get(o2)); } else { // o2 has infinite score, so o1 < o2 return -1; } } else { //o1 has infinite score if(scores.containsKey(o2)) { // o1 > o2 return 1; } else { //both undefined return 0; } } } }; PriorityQueue heap = new PriorityQueue(alignment.size(), scoreComparator); */ //int step = 0; while (!eligible.isEmpty()) { //System.out.format("Step %d: %s%n", ++step, AlignmentTools.toConciseAlignmentString(alignment)); // Find eligible residue with lowest scores Integer bestRes = null; double bestResScore = Double.POSITIVE_INFINITY; for(Integer res : eligible) { Double score = scores.get(res); if (score != null && score < bestResScore) { bestResScore = score; bestRes = res; } } // Find f^k-1(bestRes) Integer resK1 = bestRes; for (int i = 0; i < k - 1; i++) { assert (resK1 != null); resK1 = alignment.get(resK1); // Update scores scores.put(resK1, 0.0); } scores.put(bestRes, 0.0); // Modify alignment alignment.put(resK1, bestRes); scores = initializeScores(alignment, scores, k); Map virginScores = initializeScores(alignment, null, k); if (scores.size() != virginScores.size()) { System.out.println("Size missmatch"); } else { for (Integer key : scores.keySet()) { if (!virginScores.containsKey(key) || !scores.get(key).equals(virginScores.get(key))) { System.out.format("Mismatch %d -> %f/%f%n", key, scores.get(key), virginScores.get(key)); } } } // Update eligible // TODO only update residues which could become ineligible eligible = initializeEligible(alignment, scores, eligible, k, forwardLoops, backwardLoops); // System.out.format("Modifying %d -> %d. %d now eligible.%n", resK1,bestRes,eligible.size()); } //System.out.format("Step %d: %s%n", ++step, AlignmentTools.toConciseAlignmentString(alignment)); // Remove remaining edges Iterator alignmentIt = alignment.keySet().iterator(); while (alignmentIt.hasNext()) { Integer res = alignmentIt.next(); Double score = scores.get(res); if (score == null || score > 0.0) { alignmentIt.remove(); } } //System.out.format("Step %d: %s%n", ++step, AlignmentTools.toConciseAlignmentString(alignment)); return alignment; } /** * Helper method to initialize eligible residues. * * Eligible if: * 1. score(x)>0 * 2. f^K-1(x) is defined * 3. score(f^K-1(x))>0 * 4. For all y, score(y)==0 implies sign(f^K-1(x)-y) == sign(x-f(y) ) * @param alignment The alignment with respect to which to calculate eligibility * @param scores An up-to-date map from residues to their scores * @param eligible Starting list of eligible residues. If null will be generated. * @param k * @param backwardLoops * @param forwardLoops * @return eligible after modification */ private static List initializeEligible(Map alignment, Map scores, List eligible, int k, NavigableSet forwardLoops, NavigableSet backwardLoops) { // Eligible if: // 1. score(x)>0 // 2. f^K-1(x) is defined // 3. score(f^K-1(x))>0 // 4. For all y, score(y)==0 implies sign(f^K-1(x)-y) == sign(x-f(y) ) // 5. Not in a loop of length less than k // Assume all residues are eligible to start if(eligible == null) { eligible = new LinkedList<>(alignment.keySet()); } // Precalculate f^K-1(x) // Map alignK1 = AlignmentTools.applyAlignment(alignment, k-1); Map alignK1 = applyAlignmentAndCheckCycles(alignment, k - 1, eligible); // Remove ineligible residues Iterator eligibleIt = eligible.iterator(); while(eligibleIt.hasNext()) { Integer res = eligibleIt.next(); // 2. f^K-1(x) is defined if(!alignK1.containsKey(res)) { eligibleIt.remove(); continue; } Integer k1 = alignK1.get(res); if(k1 == null) { eligibleIt.remove(); continue; } // 1. score(x)>0 Double score = scores.get(res); if(score == null || score <= 0.0) { eligibleIt.remove(); // res is in a loop. Add it to the proper set if(res <= alignment.get(res)) { //forward forwardLoops.add(res); } else { //backward backwardLoops.add(res); } continue; } // 3. score(f^K-1(x))>0 Double scoreK1 = scores.get(k1); if(scoreK1 == null || scoreK1 <= 0.0) { eligibleIt.remove(); continue; } } // Now that loops are up-to-date, check for loop crossings eligibleIt = eligible.iterator(); while(eligibleIt.hasNext()) { Integer res = eligibleIt.next(); //4. For all y, score(y)==0 implies sign(f^K-1(x)-y) == sign(x-f(y) ) //Test equivalent: All loop edges should be properly ordered wrt edge f^k-1(x) -> x Integer src = alignK1.get(res); if( src < res ) { //forward // get interval [a,b) containing res Integer a = forwardLoops.floor(src); Integer b = forwardLoops.higher(src); // Ineligible unless f(a) < res < f(b) if(a != null && alignment.get(a) > res ) { eligibleIt.remove(); continue; } if(b != null && alignment.get(b) < res ) { eligibleIt.remove(); continue; } } } return eligible; } /** * Like {@link AlignmentTools#applyAlignment(Map, int)}, returns a map of k applications of alignmentMap. However, * it also sets loops of size less than k as ineligible. * * @param alignmentMap * f(x) * @param k * @param eligible * Eligible residues. Residues from small cycles are removed. * @return f^k(x) */ private static Map applyAlignmentAndCheckCycles(Map alignmentMap, int k, List eligible) { // Convert to lists to establish a fixed order (avoid concurrent modification) List preimage = new ArrayList<>(alignmentMap.keySet()); // currently unmodified List image = new ArrayList<>(preimage); for (int n = 1; n <= k; n++) { // apply alignment for (int i = 0; i < image.size(); i++) { final Integer pre = image.get(i); final Integer post = (pre == null ? null : alignmentMap.get(pre)); image.set(i, post); // Make cycles ineligible if (post != null && post.equals(preimage.get(i))) { eligible.remove(preimage.get(i)); // Could be O(n) with List impl } } } Map imageMap = new HashMap<>(alignmentMap.size()); // now populate with actual values for (int i = 0; i < preimage.size(); i++) { Integer pre = preimage.get(i); Integer postK = image.get(i); imageMap.put(pre, postK); } return imageMap; } /** * Calculates all scores for an alignment * @param alignment * @param scores A mapping from residues to scores, which will be updated or * created if null * @return scores */ private static Map initializeScores(Map alignment, Map scores, int k) { if(scores == null) { scores = new HashMap<>(alignment.size()); } else { scores.clear(); } Map alignK = AlignmentTools.applyAlignment(alignment, k); // calculate input range int maxPre = Integer.MIN_VALUE; int minPre = Integer.MAX_VALUE; for(Integer pre : alignment.keySet()) { if(pre>maxPre) maxPre = pre; if(pre 0) error += (double)(pre-minPre)/(1+maxPre-minPre); return error; } /** * Partitions an afpChain alignment into order blocks of aligned residues. * * @param afpChain * @param ca1 * @param ca2 * @param order * @return * @throws StructureException */ private static AFPChain partitionAFPchain(AFPChain afpChain, Atom[] ca1, Atom[] ca2, int order) throws StructureException{ int[][][] newAlgn = new int[order][][]; int repeatLen = afpChain.getOptLength()/order; //Extract all the residues considered in the first chain of the alignment List alignedRes = new ArrayList<>(); for (int su=0; su





© 2015 - 2025 Weber Informatics LLC | Privacy Policy