All Downloads are FREE. Search and download functionalities are using the official Maven repository.

de.citec.tcs.alignment.AbstractAffineAlignmentAlgorithm Maven / Gradle / Ivy

/* 
 * TCS Alignment Toolbox
 * 
 * Copyright (C) 2013-2015
 * Benjamin Paaßen, Georg Zentgraf
 * AG Theoretical Computer Science
 * Centre of Excellence Cognitive Interaction Technology (CITEC)
 * University of Bielefeld
 * 
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as
 * published by the Free Software Foundation, either version 3 of the
 * License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 * 
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see .
 */
package de.citec.tcs.alignment;

import de.citec.tcs.alignment.comparators.SkipComparator;
import de.citec.tcs.alignment.sequence.Node;
import de.citec.tcs.alignment.sequence.Sequence;
import java.util.ArrayList;
import java.util.EnumMap;

/**
 * This is an implementation of the affine alignment algorithm scheme
 * combining actually two approaches: First the algorithm of Gotoh et al.
 * for gap extensions (meaning, that we allow to skip longer regions in the
 * middle of an alignment at low cost) and second the traditional local
 * alignment in the sense that we allow skips at the beginning and end of an
 * alignment. The task here is to identify those regions of the two input
 * sequence that match the best and skip the rest of the sequences at low cost.
 *
 * A special thanks goes to Prof. Robert Giegerich, Dr. Stefan Janssen and
 * Benedikt Löwes at Bielefeld University for teaching me Advanced Dynamic
 * Programming. Otherwise this implementation here would not have been possible.
 *
 * The abstract algorithm that inspires this implementation is described in
 * Dr. Georg Sauthoffs "Bellman's Gap" GAP-Language in the file
 * bellmans_gap/affine_alignment.gap.
 *
 * @author Benjamin Paassen - [email protected]
 * @param  the result class of this alignment algorithm.
 */
public abstract class AbstractAffineAlignmentAlgorithm implements SkipAlignmentAlgorithm {

	private final Class resultClass;
	private final AlignmentSpecification alignmentSpecification;
	private double weightThreshold = 0;
	private int minMiddleSkips = 4;

	public AbstractAffineAlignmentAlgorithm(Class resultClass,
			AlignmentSpecification alignmentSpecification) {
		this.resultClass = resultClass;
		this.alignmentSpecification = alignmentSpecification;
	}

	/**
	 * {@inheritDoc }
	 */
	@Override
	public AlignmentSpecification getSpecification() {
		return alignmentSpecification;
	}

	/**
	 * {@inheritDoc }
	 */
	@Override
	public Class getResultClass() {
		return resultClass;
	}

	/**
	 * Set a weight threshold (between 0 and 1) that determines which keywords
	 * should be ignored during calculation because their weight is negligible.
	 *
	 * The default value is 0.
	 *
	 * @param weightThreshold a weight threshold (between 0 and 1)
	 */
	public void setWeightThreshold(double weightThreshold) {
		if (weightThreshold < 0 || weightThreshold > 1) {
			throw new RuntimeException("A weight threshold has to be between 0 and 1!");
		}
		this.weightThreshold = weightThreshold;
	}

	/**
	 *
	 * @return The current weight threshold (0 per default).
	 */
	public double getWeightThreshold() {
		return weightThreshold;
	}

	/**
	 * The minimum number of skips that have to be done in the middle
	 * of an alignment. Otherwise only deletions and insertions are allowed.
	 * This is 4 per default.
	 *
	 * @param minMiddleSkips the minimum number of skips that have to be done in
	 * the middle of an alignment.
	 */
	public void setMinMiddleSkips(int minMiddleSkips) {
		this.minMiddleSkips = minMiddleSkips;
	}

	/**
	 * The minimum number of skips that have to be done in the middle
	 * of an alignment. Otherwise only deletions and insertions are allowed.
	 * This is 4 per default.
	 *
	 * @return the minimum number of skips that have to be done in the middle
	 * of an alignment.
	 */
	public int getMinMiddleSkips() {
		return minMiddleSkips;
	}

	/**
	 * {@inheritDoc }
	 */
	@Override
	public R calculateAlignment(Sequence a, Sequence b) {

		//check validity
		if (a.getNodeSpecification() != alignmentSpecification.getNodeSpecification()
				&& !a.getNodeSpecification().equals(alignmentSpecification.getNodeSpecification())) {
			throw new RuntimeException(
					"The first input sequence has an unexpected node specification!");
		}
		if (a.getNodeSpecification() != b.getNodeSpecification()
				&& !a.getNodeSpecification().equals(b.getNodeSpecification())) {
			throw new RuntimeException(
					"The node specifications of both input sequences to not match!");
		}
		//check validity of comparators.
		for (int k = 0; k < alignmentSpecification.size(); k++) {
			if (!(alignmentSpecification.getComparator(k) instanceof SkipComparator)) {
				throw new RuntimeException("The comparator for keyword "
						+ alignmentSpecification.getKeyword(k) + " does not support skips!");
			}
		}

		//identify the subset of comparators that have an above threshold weighting.
		final ArrayList relevantIndices = new ArrayList<>();
		for (int k = 0; k < alignmentSpecification.size(); k++) {
			if (alignmentSpecification.getWeighting()[k] > weightThreshold) {
				relevantIndices.add(k);
			}
		}

		//pre-calculation caching.
		final SkipComparator[] comparators = new SkipComparator[relevantIndices.size()];
		final double[] weights = new double[relevantIndices.size()];
		final int[] originalIndices = new int[relevantIndices.size()];
		for (int k = 0; k < comparators.length; k++) {
			comparators[k] = (SkipComparator) alignmentSpecification.getComparator(relevantIndices.get(k));
			weights[k] = alignmentSpecification.getWeighting()[relevantIndices.get(k)];
			originalIndices[k] = alignmentSpecification.getOriginalIndex(relevantIndices.get(k));
		}
		final int M = a.getNodes().size();
		final int N = b.getNodes().size();

		/*
		 * Initialize the dynamic programming tables for each recurrence.
		 */
		final EnumMap dp_tables = new EnumMap<>(Recurrence.class);
		for (final Recurrence rec : Recurrence.values()) {
			dp_tables.put(rec, new double[M + 1][N + 1]);
		}

		/*
		 * As we have more recurrences than operations it is probably better
		 * to pre-calculate the local operation costs.
		 */
		Node left;
		Node right;
		final double[][] compareMatrix = new double[M][N];
		if (N > 0) {
			for (int i = 0; i < M; i++) {
				for (int j = 0; j < N; j++) {
					left = a.getNodes().get(i);
					right = b.getNodes().get(j);
					for (int k = 0; k < comparators.length; k++) {
						compareMatrix[i][j] += weights[k] * comparators[k].compare(
								left.getValue(originalIndices[k]),
								right.getValue(originalIndices[k]));
					}
				}
			}
		}
		final double[] delMatrix = new double[M];
		final double[] insMatrix = new double[N];
		final double[] skipDelMatrix = new double[M];
		final double[] skipInsMatrix = new double[N];

		for (int i = 0; i < M; i++) {
			left = a.getNodes().get(i);
			for (int k = 0; k < comparators.length; k++) {
				delMatrix[i] += weights[k] * comparators[k].delete(
						left.getValue(originalIndices[k]));
				skipDelMatrix[i] += weights[k] * comparators[k].skipDelete(
						left.getValue(originalIndices[k]));
			}
		}

		for (int j = 0; j < N; j++) {
			right = b.getNodes().get(j);
			for (int k = 0; k < comparators.length; k++) {
				insMatrix[j] += weights[k] * comparators[k].insert(
						right.getValue(originalIndices[k]));
				skipInsMatrix[j] += weights[k] * comparators[k].skipInsert(
						right.getValue(originalIndices[k]));
			}
		}

		/*
		 * I'll quote the corresponding line of the Bellmans Gap grammar
		 * here to make clear, what I'm doing.
		 */
		/*
		 * Calculate the SKIPINS_END and SKIPDEL_END recurrence first.
		 *
		 * SKIPINS_END = skip_ins(, SKIPINS_END) |
		 * nil() #h;
		 * SKIPDEL_END = skip_del(, SKIPDEL_END) | SKIPINS_END
		 * #h;
		 *
		 * This leads to:
		 * SKIPINS_END(M,N) = 0
		 * SKIPINS_END(M,j) = SKIPINS_END(M,j+1) + skip_ins(j) where j < N
		 * SKIPDEL_END(M,j) = SKIPINS_END(M,j)
		 * SKIPDEL_END(i,j) = SKIPINS_END(i+1,j) + skip_del(i) where i < M
		 */
		for (int j = N - 1; j >= 0; j--) {
			dp_tables.get(Recurrence.SKIPINS_END)[M][j]
					= dp_tables.get(Recurrence.SKIPINS_END)[M][j + 1]
					+ skipInsMatrix[j];
			dp_tables.get(Recurrence.SKIPDEL_END)[M][j]
					= dp_tables.get(Recurrence.SKIPINS_END)[M][j];
		}
		for (int i = M - 1; i >= 0; i--) {
			for (int j = N; j >= 0; j--) {
				dp_tables.get(Recurrence.SKIPDEL_END)[i][j]
						= dp_tables.get(Recurrence.SKIPDEL_END)[i + 1][j]
						+ skipDelMatrix[i];
			}
		}
		/*
		 * Now we can initialize the last row and column of the ALI
		 * recurrence, which is defined as:
		 *
		 * ALI = del(, DEL) | ins(, INS) |
		 * rep(, ALI) |
		 * SKIPDEL_MIDDLE | SKIPINS_MIDDLE | SKIPDEL_END #h;
		 *
		 * The DEL, INS, SKIPDEL_MIDDLE and SKIPINS_MIDDLE recurrences
		 * depend on ALI themselves, thus the only choice at the end is
		 * SKIPDEL_END.
		 *
		 * ALI = SKIPDEL_END;
		 *
		 */
		/*
		 * Calculate this part of the ALI matrix:
		 *
		 * -----------
		 * -----------
		 * -----------
		 * -----------
		 * -----------
		 * xxxxxxxxxx-
		 */
		for (int i = M - 1; i >= 0; i--) {
			dp_tables.get(Recurrence.ALI)[i][N]
					= dp_tables.get(Recurrence.SKIPDEL_END)[i][N];
		}
		/*
		 * Calculate this part of the ALI matrix:
		 *
		 * ----------x
		 * ----------x
		 * ----------x
		 * ----------x
		 * ----------x
		 * ----------x
		 */
		for (int j = N; j >= 0; j--) {
			dp_tables.get(Recurrence.ALI)[M][j]
					= dp_tables.get(Recurrence.SKIPDEL_END)[M][j];
		}

		/*
		 * We now calculate the part of ALI where we can not yet use the
		 * middle skips because we have not yet reached the necessary
		 * minimum number of letters to skip (and we also need one replacement
		 * at the end). That marks this part of the ALI matrix:
		 *
		 * -----------
		 * ------xxxx-
		 * ------xxxx-
		 * ------xxxx-
		 * ------xxxx-
		 * -----------
		 */
		final int leftPreMiddleSkipsLimit = Math.max(-1, M - minMiddleSkips - 1);
		final int rightPreMiddleSkipsLimit = Math.max(-1, N - minMiddleSkips - 1);

		/*
		 * At position [M-1][N-1] of ALI, we can either use
		 * SKIPDEL_END or replacements. We can also initialize that position
		 * for the DEL and INS recurrence, because it is very similar.
		 *
		 * ALI = rep(, ALI) | SKIPDEL_END #h;
		 * DEL = rep(, ALI);
		 * INS = rep(, ALI);
		 *
		 * -----------
		 * -----------
		 * -----------
		 * -----------
		 * ---------x-
		 * -----------
		 */
		if (M > 0 && N > 0) {
			final double repLocal = dp_tables.get(Recurrence.ALI)[M][N]
					+ compareMatrix[M - 1][N - 1];
			dp_tables.get(Recurrence.ALI)[M - 1][N - 1] = choice(
					dp_tables.get(Recurrence.SKIPDEL_END)[M - 1][N - 1],
					repLocal);
			dp_tables.get(Recurrence.DEL)[M - 1][N - 1] = repLocal;
			dp_tables.get(Recurrence.INS)[M - 1][N - 1] = repLocal;
		}
		/*
		 * In the second-last row we can use deletions, but no insertions.
		 * We will also calculate the DEL and INS recurrence here, which are
		 * defined as:
		 *
		 * DEL = del(, DEL) | ins(, INS) |
		 * rep(, ALI) #h;
		 * INS = ins(, INS) | rep(, ALI) #h;
		 *
		 * Relevant for this part are:
		 *
		 * ALI = del(, DEL) | rep(, ALI) | SKIPDEL_END
		 * #h;
		 * DEL = del(, DEL) | rep(, ALI) #h;
		 * INS = rep(, ALI) #h;
		 *
		 * Calculate this part of the ALI matrix:
		 *
		 * -----------
		 * -----------
		 * -----------
		 * -----------
		 * -----xxxx--
		 * -----------
		 */
		if (N > 0) {
			for (int i = M - 2; i >= 0; i--) {
				final double repLocal
						= dp_tables.get(Recurrence.ALI)[i + 1][N]
						+ compareMatrix[i][N - 1];
				final double delLocal
						= dp_tables.get(Recurrence.DEL)[i + 1][N - 1]
						+ delMatrix[i];
				//calculate ALI
				if (i > leftPreMiddleSkipsLimit) {
					dp_tables.get(Recurrence.ALI)[i][N - 1] = choice(
							repLocal,
							delLocal,
							dp_tables.get(Recurrence.SKIPDEL_END)[i][N - 1]);
				}
				//calculate DEL
				dp_tables.get(Recurrence.DEL)[i][N - 1] = choice(
						repLocal, delLocal);
				//calculate INS
				dp_tables.get(Recurrence.INS)[i][N - 1] = repLocal;
			}
		}
		/*
		 * In the second last column we can use insertions, but no deletions.
		 *
		 * Relevant for this part is:
		 *
		 * ALI = ins(, INS) | rep(, ALI) |
		 * SKIPDEL_END #h;
		 *
		 * DEL = ins(, INS) | rep(, ALI) #h;
		 *
		 * INS = ins(, INS) | rep(, ALI) #h;
		 *
		 * Calculate this part of the ALI matrix:
		 *
		 * -----------
		 * ---------x-
		 * ---------x-
		 * ---------x-
		 * -----------
		 * -----------
		 */
		if (M > 0) {
			for (int j = N - 2; j >= 0; j--) {
				final double repLocal
						= dp_tables.get(Recurrence.ALI)[M][j + 1]
						+ compareMatrix[M - 1][j];
				final double insLocal
						= dp_tables.get(Recurrence.INS)[M - 1][j + 1]
						+ insMatrix[j];
				//calculate ALI
				if (j > rightPreMiddleSkipsLimit) {
					dp_tables.get(Recurrence.ALI)[M - 1][j] = choice(
							repLocal,
							insLocal,
							dp_tables.get(Recurrence.SKIPDEL_END)[M - 1][j]);
				}
				//calculate DEL
				dp_tables.get(Recurrence.DEL)[M - 1][j] = choice(
						repLocal, insLocal);
				//calculate INS
				dp_tables.get(Recurrence.INS)[M - 1][j] = choice(
						repLocal, insLocal);
			}
		}
		/*
		 * In the next part of the matrix, we can calculate insertions as well
		 * as deletions, but no skips yet. So the relevant recurrences are:
		 *
		 * ALI = del(, DEL) | ins(, INS) |
		 * rep(, ALI) | SKIPDEL_END #h;
		 *
		 * DEL = del(, DEL) | ins(, INS) |
		 * rep(, ALI) #h;
		 *
		 * INS = ins(, INS) | rep(, ALI) #h;
		 */
		/*
		 * Calculate this part of the ALI matrix:
		 *
		 * -----------
		 * ------xxx--
		 * ------xxx--
		 * ------xxx--
		 * -----------
		 * -----------
		 */
		if (N > 1) {
			for (int i = M - 2; i > leftPreMiddleSkipsLimit; i--) {
				for (int j = N - 2; j > rightPreMiddleSkipsLimit; j--) {
					final double[] choices = new double[4];
					choices[0] = dp_tables.get(Recurrence.DEL)[i + 1][j]
							+ delMatrix[i];
					choices[1] = dp_tables.get(Recurrence.INS)[i][j + 1]
							+ insMatrix[j];
					choices[2] = dp_tables.get(Recurrence.ALI)[i + 1][j + 1]
							+ compareMatrix[i][j];
					choices[3] = dp_tables.get(Recurrence.SKIPDEL_END)[i][j];
					dp_tables.get(Recurrence.ALI)[i][j] = choice(choices);
					dp_tables.get(Recurrence.DEL)[i][j] = choice(
							choices[0], choices[1], choices[2]);
					dp_tables.get(Recurrence.INS)[i][j] = choice(
							choices[1], choices[2]);
				}
			}
		}

		/*
		 * We do the same for the part of the ALI matrix where we only can
		 * use deletions and skip-deletions, but no insertions. So the
		 * relevant recurrences are:
		 *
		 * ALI = del(, DEL) | rep(, ALI) |
		 * SKIPDEL_MIDDLE | SKIPDEL_END #h;
		 *
		 * DEL = del(, DEL) | ins(, INS) |
		 * rep(, ALI) #h;
		 *
		 * SKIPDEL_MIDDLE = skip_del(,
		 * skip_del(,
		 * skip_del(,
		 * skip_del(,
		 * SKIPDEL_MIDDLE_LOOP)))) #h;
		 *
		 * SKIPDEL_MIDDLE_LOOP = skip_del(, SKIPDEL_MIDDLE_LOOP) |
		 * rep(,ALI) #h;
		 *
		 * So we also need to calculate the SKIPDEL_MIDDLE_LOOP recurrence,
		 * which we call SKIPDEL_MIDDLE here, because we do not need to tabulate
		 * the SKIPDEL_MIDDLE reccurence. This can be circumvented by
		 * converting the grammar to:
		 *
		 * ALI = del(, DEL) | rep(, ALI) |
		 * skip_del(,
		 * skip_del(,
		 * skip_del(,
		 * skip_del(,
		 * SKIPDEL_MIDDLE_LOOP)))) | SKIPDEL_END #h;
		 *
		 * So the SKIPDEL_MIDDLE recurrence is just for better readability in
		 * the gap-L-code and not needed per se.
		 *
		 * calculate this part of the ALI matrix:
		 *
		 * -----------
		 * -----------
		 * -----------
		 * -----------
		 * xxxxxx-----
		 * -----------
		 *
		 */
		if (M > minMiddleSkips + 1 && N > 0) {
			//initialize second-last column of SKIPDEL_MIDDLE
			for (int j = N - 1; j >= 0; j--) {
				dp_tables.get(Recurrence.SKIPDEL_MIDDLE)[M - 1][j]
						= dp_tables.get(Recurrence.ALI)[M][j + 1]
						+ compareMatrix[M - 1][j];
			}
			//initialize second-last row of SKIPDEL_MIDDLE up to the desired point.
			for (int i = M - 2; i > leftPreMiddleSkipsLimit; i--) {
				final double repLocal = dp_tables.get(Recurrence.ALI)[i + 1][N]
						+ compareMatrix[i][N - 1];
				dp_tables.get(Recurrence.SKIPDEL_MIDDLE)[i][N - 1] = choice(
						dp_tables.get(Recurrence.SKIPDEL_MIDDLE)[i + 1][N - 1]
						+ skipDelMatrix[i], repLocal);
			}
			for (int i = leftPreMiddleSkipsLimit; i >= 0; i--) {
				//calculate SKIPDEL_MIDDLE
				final double repLocal = dp_tables.get(Recurrence.ALI)[i + 1][N]
						+ compareMatrix[i][N - 1];
				if (i >= minMiddleSkips) {
					dp_tables.get(Recurrence.SKIPDEL_MIDDLE)[i][N - 1] = choice(
							dp_tables.get(Recurrence.SKIPDEL_MIDDLE)[i + 1][N - 1]
							+ skipDelMatrix[i], repLocal);
				}
				//calculate ALI
				double skipDelMiddle = dp_tables.get(
						Recurrence.SKIPDEL_MIDDLE)[i + minMiddleSkips][N - 1];
				for (int i2 = 0; i2 < minMiddleSkips; i2++) {
					skipDelMiddle += skipDelMatrix[i + i2];
				}
				dp_tables.get(Recurrence.ALI)[i][N - 1] = choice(
						skipDelMiddle,
						repLocal,
						dp_tables.get(Recurrence.DEL)[i + 1][N - 1]
						+ delMatrix[i]
				);
			}
		}

		/*
		 * Now we calculate this part of the ALI matrix:
		 *
		 * ---------x-
		 * -----------
		 * -----------
		 * -----------
		 * -----------
		 * -----------
		 *
		 * where it is possible to insert and skip insert, but not to use
		 * deletions. The relevant recurrences are.
		 *
		 * ALI = ins(, INS) | rep(, ALI) |
		 * SKIPINS_MIDDLE | SKIPDEL_END #h;
		 *
		 * INS = ins(, INS) | rep(, ALI) #h;
		 *
		 * SKIPINS_MIDDLE = skip_ins(,
		 * skip_ins(,
		 * skip_ins(,
		 * skip_ins(,
		 * SKIPINS_MIDDLE_LOOP)))) #h;
		 *
		 * SKIPINS_MIDDLE_LOOP = skip_ins(, SKIPINS_MIDDLE_LOOP) |
		 * rep(,ALI) #h;
		 *
		 * So we also need to calculate the SKIPINS_MIDDLE_LOOP recurrence,
		 * which we call SKIPINS_MIDDLE here, because we do not need to tabulate
		 * the SKIPINS_MIDDLE reccurence. This can be circumvented by
		 * converting the grammar to:
		 *
		 * ALI = ins(, INS) | rep(, ALI) |
		 * skip_ins(,
		 * skip_ins(,
		 * skip_ins(,
		 * skip_ins(,
		 * SKIPINS_MIDDLE_LOOP)))) | SKIPDEL_END #h;
		 *
		 * So the SKIPINS_MIDDLE recurrence is just for better readability in
		 * the gap-L-code and not needed per se.
		 */
		if (M > 0 && N > minMiddleSkips + 1) {
			//initialize second-last row of SKIPINS_MIDDLE
			for (int i = M - 1; i >= 0; i--) {
				dp_tables.get(Recurrence.SKIPINS_MIDDLE)[i][N - 1]
						= dp_tables.get(Recurrence.ALI)[i + 1][N]
						+ compareMatrix[i][N - 1];
			}
			//initialize second last column up to the desired point.
			for (int j = N - 2; j > rightPreMiddleSkipsLimit; j--) {
				final double repLocal = dp_tables.get(Recurrence.ALI)[M][j + 1]
						+ compareMatrix[M - 1][j];
				dp_tables.get(Recurrence.SKIPINS_MIDDLE)[M - 1][j] = choice(
						dp_tables.get(Recurrence.SKIPINS_MIDDLE)[M - 1][j + 1]
						+ skipInsMatrix[j], repLocal);
			}
			for (int j = rightPreMiddleSkipsLimit; j >= 0; j--) {
				//calculate SKIPINS_MIDDLE
				final double repLocal = dp_tables.get(Recurrence.ALI)[M][j + 1]
						+ compareMatrix[M - 1][j];
				if (j >= minMiddleSkips) {
					dp_tables.get(Recurrence.SKIPINS_MIDDLE)[M - 1][j] = choice(
							dp_tables.get(Recurrence.SKIPINS_MIDDLE)[M - 1][j + 1]
							+ skipInsMatrix[j], repLocal);
				}
				//calculate ALI
				double skipInsMiddle = dp_tables.get(
						Recurrence.SKIPINS_MIDDLE)[M - 1][j + minMiddleSkips];
				for (int j2 = 0; j2 < minMiddleSkips; j2++) {
					skipInsMiddle += skipInsMatrix[j + j2];
				}
				dp_tables.get(Recurrence.ALI)[M - 1][j] = choice(
						skipInsMiddle,
						repLocal,
						dp_tables.get(Recurrence.INS)[M - 1][j + 1]
						+ insMatrix[j]
				);
			}
		}

		/*
		 * Calculate SKIPINS_MIDDLE and SKIPDEL_MIDDLE for this section
		 * of the matrix as well:
		 *
		 * -----------
		 * ------xxx--
		 * ------xxx--
		 * ------xxx--
		 * -----------
		 * -----------
		 *
		 */
		if (N > 1) {
			for (int i = M - 2; i > leftPreMiddleSkipsLimit; i--) {
				for (int j = N - 2; j > rightPreMiddleSkipsLimit; j--) {
					final double repLocal
							= dp_tables.get(Recurrence.ALI)[i + 1][j + 1]
							+ compareMatrix[i][j];
					dp_tables.get(Recurrence.SKIPDEL_MIDDLE)[i][j] = choice(
							dp_tables.get(Recurrence.SKIPDEL_MIDDLE)[i + 1][j]
							+ skipDelMatrix[i], repLocal);
					dp_tables.get(Recurrence.SKIPINS_MIDDLE)[i][j] = choice(
							dp_tables.get(Recurrence.SKIPINS_MIDDLE)[i][j + 1]
							+ skipInsMatrix[j], repLocal);
				}
			}
		}

		/*
		 * Now we calculate that part of the ALI matrix where we can use
		 * deletions, skip deletions and insertions, but no skip insertions.
		 *
		 * So the relevant part of the grammar ist:
		 *
		 * ALI = del(, DEL) | ins(, INS) |
		 * rep(, ALI) | SKIPDEL_MIDDLE | SKIPDEL_END #h;
		 *
		 * DEL = del(, DEL) | ins(, INS) |
		 * rep(, ALI) #h;
		 *
		 * INS = ins(, INS) | rep(, ALI) #h;
		 *
		 * SKIPDEL_MIDDLE = skip_del(,
		 * skip_del(,
		 * skip_del(,
		 * skip_del(,
		 * SKIPDEL_MIDDLE_LOOP)))) #h;
		 *
		 * SKIPDEL_MIDDLE_LOOP = skip_del(, SKIPDEL_MIDDLE_LOOP) |
		 * rep(,ALI) #h;
		 *
		 * Calculate this part of the matrix:
		 *
		 * -----------
		 * xxxxxx-----
		 * xxxxxx-----
		 * xxxxxx-----
		 * -----------
		 * -----------
		 */
		if (M > minMiddleSkips + 1) {
			for (int j = N - 2; j > rightPreMiddleSkipsLimit; j--) {
				for (int i = leftPreMiddleSkipsLimit; i >= 0; i--) {
					//calculate choices
					final double[] choices = new double[5];
					choices[0] = dp_tables.get(Recurrence.DEL)[i + 1][j] + delMatrix[i];
					choices[1] = dp_tables.get(Recurrence.INS)[i][j + 1] + insMatrix[j];
					choices[2] = dp_tables.get(Recurrence.ALI)[i + 1][j + 1]
							+ compareMatrix[i][j];
					choices[3] = dp_tables.get(Recurrence.SKIPDEL_END)[i][j];
					//calculate SKIPDEL_MIDDLE
					double skipDelMiddle = dp_tables.get(
							Recurrence.SKIPDEL_MIDDLE)[i + minMiddleSkips][j];
					for (int i2 = 0; i2 < minMiddleSkips; i2++) {
						skipDelMiddle += skipDelMatrix[i + i2];
					}
					choices[4] = skipDelMiddle;
					dp_tables.get(Recurrence.ALI)[i][j] = choice(choices);
					dp_tables.get(Recurrence.DEL)[i][j] = choice(
							choices[0], choices[1], choices[2]);
					dp_tables.get(Recurrence.INS)[i][j] = choice(
							choices[1], choices[2]);
					//also calculate the SKIPDEL_MIDDLE and SKIPINS_MIDDLE
					//recurrence.
					if (i >= minMiddleSkips) {
						dp_tables.get(Recurrence.SKIPDEL_MIDDLE)[i][j] = choice(
								choices[2],
								dp_tables.get(Recurrence.SKIPDEL_MIDDLE)[i + 1][j]
								+ skipDelMatrix[i]
						);
					}
					if (j >= minMiddleSkips) {
						dp_tables.get(Recurrence.SKIPINS_MIDDLE)[i][j] = choice(
								choices[2],
								dp_tables.get(Recurrence.SKIPINS_MIDDLE)[i][j + 1]
								+ skipInsMatrix[j]
						);
					}
				}
			}
		}

		/*
		 * Now we calculate the part of the ALI matrix, where we can use
		 * insertions, skip insertions and deletions, but no skip deletions.
		 * So the relevant part of the grammar ist:
		 *
		 * ALI = del(, DEL) | ins(, INS) |
		 * rep(, ALI) |
		 * SKIPDEL_MIDDLE | SKIPINS_MIDDLE | SKIPDEL_END #h;
		 *
		 * DEL = del(, DEL) | ins(, INS) |
		 * rep(, ALI) #h;
		 *
		 * INS = ins(, INS) | rep(, ALI) #h;
		 *
		 * SKIPINS_MIDDLE = skip_ins(,
		 * skip_ins(,
		 * skip_ins(,
		 * skip_ins(,
		 * SKIPINS_MIDDLE_LOOP)))) #h;
		 *
		 * SKIPINS_MIDDLE_LOOP = skip_ins(, SKIPINS_MIDDLE_LOOP) |
		 * rep(,ALI) #h;
		 *
		 * Calculate this part of the matrix:
		 *
		 * ------xxx--
		 * -----------
		 * -----------
		 * -----------
		 * -----------
		 * -----------
		 */
		if (N > minMiddleSkips + 1) {
			for (int i = M - 2; i > leftPreMiddleSkipsLimit; i--) {
				for (int j = rightPreMiddleSkipsLimit; j >= 0; j--) {
					//calculate choices
					final double[] choices = new double[5];
					choices[0] = dp_tables.get(Recurrence.DEL)[i + 1][j] + delMatrix[i];
					choices[1] = dp_tables.get(Recurrence.INS)[i][j + 1] + insMatrix[j];
					choices[2] = dp_tables.get(Recurrence.ALI)[i + 1][j + 1]
							+ compareMatrix[i][j];
					choices[3] = dp_tables.get(Recurrence.SKIPDEL_END)[i][j];
					//calculate SKIPINS_MIDDLE
					double skipInsMiddle = dp_tables.get(
							Recurrence.SKIPINS_MIDDLE)[i][j + minMiddleSkips];
					for (int j2 = 0; j2 < minMiddleSkips; j2++) {
						skipInsMiddle += skipInsMatrix[j + j2];
					}
					choices[4] = skipInsMiddle;
					dp_tables.get(Recurrence.ALI)[i][j] = choice(choices);
					dp_tables.get(Recurrence.DEL)[i][j] = choice(
							choices[0], choices[1], choices[2]);
					dp_tables.get(Recurrence.INS)[i][j] = choice(
							choices[1], choices[2]);
					//also calculate the SKIPDEL_MIDDLE and SKIPINS_MIDDLE
					//recurrence.
					if (i >= minMiddleSkips) {
						dp_tables.get(Recurrence.SKIPDEL_MIDDLE)[i][j] = choice(
								choices[2],
								dp_tables.get(Recurrence.SKIPDEL_MIDDLE)[i + 1][j]
								+ skipDelMatrix[i]
						);
					}
					if (j >= minMiddleSkips) {
						dp_tables.get(Recurrence.SKIPINS_MIDDLE)[i][j] = choice(
								choices[2],
								dp_tables.get(Recurrence.SKIPINS_MIDDLE)[i][j + 1]
								+ skipInsMatrix[j]
						);
					}
				}
			}
		}

		/*
		 * Finally we calculate the rest of the ALI matrix, where we can use
		 * the full reccurence:
		 *
		 * ALI = del(, DEL) | ins(, INS) |
		 * rep(, ALI) | SKIPDEL_MIDDLE | SKIPINS_MIDDLE |
		 * SKIPDEL_END #h;
		 *
		 * DEL = del(, DEL) | ins(, INS) |
		 * rep(, ALI) #h;
		 *
		 * INS = ins(, INS) | rep(, ALI) #h;
		 *
		 * SKIPDEL_MIDDLE = skip_del(,
		 * skip_del(,
		 * skip_del(,
		 * skip_del(,
		 * SKIPDEL_MIDDLE_LOOP)))) #h;
		 *
		 * SKIPDEL_MIDDLE_LOOP = skip_del(, SKIPDEL_MIDDLE_LOOP) |
		 * rep(,ALI) #h;
		 *
		 * SKIPINS_MIDDLE = skip_ins(,
		 * skip_ins(,
		 * skip_ins(,
		 * skip_ins(,
		 * SKIPINS_MIDDLE_LOOP)))) #h;
		 *
		 * SKIPINS_MIDDLE_LOOP = skip_ins(, SKIPINS_MIDDLE_LOOP) |
		 * rep(,ALI) #h;
		 *
		 *
		 * Calculate the rest of the matrix:
		 *
		 * xxxxxx-----
		 * -----------
		 * -----------
		 * -----------
		 * -----------
		 * -----------
		 */
		if (M > minMiddleSkips + 1 && N > minMiddleSkips + 1) {
			for (int i = leftPreMiddleSkipsLimit; i >= 0; i--) {
				for (int j = rightPreMiddleSkipsLimit; j >= 0; j--) {
					//calculate SKIPDEL_MIDDLE
					double skipDelMiddle = dp_tables.get(
							Recurrence.SKIPDEL_MIDDLE)[i + minMiddleSkips][j];
					for (int i2 = 0; i2 < minMiddleSkips; i2++) {
						skipDelMiddle += skipDelMatrix[i + i2];
					}
					if (i >= minMiddleSkips) {
						dp_tables.get(Recurrence.SKIPDEL_MIDDLE)[i][j] = choice(
								dp_tables.get(Recurrence.ALI)[i + 1][j + 1]
								+ compareMatrix[i][j],
								dp_tables.get(Recurrence.SKIPDEL_MIDDLE)[i + 1][j]
								+ skipDelMatrix[i]
						);
					}
					//calculate SKIPINS_MIDDLE
					double skipInsMiddle = dp_tables.get(
							Recurrence.SKIPINS_MIDDLE)[i][j + minMiddleSkips];
					for (int j2 = 0; j2 < minMiddleSkips; j2++) {
						skipInsMiddle += skipInsMatrix[j + j2];
					}
					if (j >= minMiddleSkips) {
						dp_tables.get(Recurrence.SKIPINS_MIDDLE)[i][j] = choice(
								dp_tables.get(Recurrence.ALI)[i + 1][j + 1]
								+ compareMatrix[i][j],
								dp_tables.get(Recurrence.SKIPINS_MIDDLE)[i][j + 1]
								+ skipInsMatrix[j]
						);
					}

					//calculate other choices
					final double[] choices = new double[6];
					choices[0] = dp_tables.get(Recurrence.DEL)[i + 1][j]
							+ delMatrix[i];
					choices[1] = dp_tables.get(Recurrence.INS)[i][j + 1]
							+ insMatrix[j];
					choices[2] = dp_tables.get(Recurrence.ALI)[i + 1][j + 1]
							+ compareMatrix[i][j];
					choices[3] = dp_tables.get(Recurrence.SKIPDEL_END)[i][j];
					choices[4] = skipDelMiddle;
					choices[5] = skipInsMiddle;

					//calculate ALI, DEL and INS.
					dp_tables.get(Recurrence.ALI)[i][j] = choice(choices);
					dp_tables.get(Recurrence.DEL)[i][j] = choice(
							choices[0], choices[1], choices[2]);
					dp_tables.get(Recurrence.INS)[i][j] = choice(
							choices[1], choices[2]);
				}
			}
		}

		/*
		 * After that it gets easy again: We can now compute the SKIPINS_START
		 * recurrence.
		 *
		 * SKIPINS_START = skip_ins(, SKIPINS_START) |
		 * rep(, ALI) | nil() #h;
		 */
		//initialize last column
		for (int j = N - 1; j >= 0; j--) {
			dp_tables.get(Recurrence.SKIPINS_START)[M][j]
					= dp_tables.get(Recurrence.SKIPINS_START)[M][j + 1] + skipInsMatrix[j];
		}
		//initialize second-last row.
		if (N > 0) {
			for (int i = M - 1; i >= 0; i--) {
				dp_tables.get(Recurrence.SKIPINS_START)[i][N - 1]
						= dp_tables.get(Recurrence.ALI)[i + 1][N] + compareMatrix[i][N - 1];
			}
		}
		//calculate the rest of the matrix.
		if (N > 1) {
			for (int i = M - 1; i >= 0; i--) {
				for (int j = N - 2; j >= 0; j--) {
					dp_tables.get(Recurrence.SKIPINS_START)[i][j] = choice(
							dp_tables.get(Recurrence.SKIPINS_START)[i][j + 1] + skipInsMatrix[j],
							dp_tables.get(Recurrence.ALI)[i + 1][j + 1] + compareMatrix[i][j]);
				}
			}
		}
		/*
		 * And finally we can compute the SKIPDEL_START recurrence.
		 *
		 * SKIPDEL_START = skip_del(, SKIPDEL_START) | SKIPINS_START
		 * #h;
		 */
		//as this only goes to left we just need the first row here.
		//initialize last entry.
		dp_tables.get(Recurrence.SKIPDEL_START)[M][0]
				= dp_tables.get(Recurrence.SKIPINS_START)[M][0];
		//calculate the rest of the row
		if (N > 0) {
			for (int i = M - 1; i >= 0; i--) {
				dp_tables.get(Recurrence.SKIPDEL_START)[i][0] = choice(
						dp_tables.get(Recurrence.SKIPDEL_START)[i + 1][0]
						+ skipDelMatrix[i],
						dp_tables.get(Recurrence.SKIPINS_START)[i][0]
				);
			}
		} else {
			for (int i = M - 1; i >= 0; i--) {
				dp_tables.get(Recurrence.SKIPDEL_START)[i][0]
						= dp_tables.get(Recurrence.SKIPDEL_START)[i + 1][0]
						+ skipDelMatrix[i];

			}
		}

		return transformToResult(dp_tables,
				compareMatrix,
				delMatrix, insMatrix,
				skipDelMatrix, skipInsMatrix,
				a, b);
	}

	public static enum Recurrence {

		SKIPDEL_START,
		SKIPINS_START,
		ALI,
		DEL,
		INS,
		SKIPINS_MIDDLE,
		SKIPDEL_MIDDLE,
		SKIPINS_END,
		SKIPDEL_END;
	}

	/**
	 * This should implement the choice function as the term is used
	 * in the Bellman's Gap context. In the strict case this is a minimum,
	 * in the soft case a softmin applied on the input.
	 *
	 * @param choices the costs of all choices.
	 *
	 * @return a cost chosen from the given input.
	 */
	public abstract double choice(double... choices);

	/**
	 * This method should not be called from outside!
	 *
	 * The subclass uses this method to transform the alignment matrix and the
	 * input sequences to the actual alignment result.
	 *
	 * @param dp_tables the dynamic programming matrices used during
	 * calculation.
	 * @param compareMatrix the local cost for replacing node i from the first
	 * sequence with node j from the second sequence.
	 * @param deletionMatrix the local cost for deleting node i from the first
	 * sequence.
	 * @param insertionMatrix the local cost for inserting node j from the
	 * second sequence into the first sequence.
	 * @param skipDeletionMatrix the local cost for skipping node i in the
	 * first sequence.
	 * @param skipInsertionMatrix the local cost for skipping node j in the
	 * second sequence.
	 * @param a the first sequence.
	 * @param b th second sequence.
	 *
	 * @return the actual alignment result.
	 */
	public abstract R transformToResult(
			final EnumMap dp_tables,
			final double[][] compareMatrix,
			final double[] deletionMatrix,
			final double[] insertionMatrix,
			final double[] skipDeletionMatrix,
			final double[] skipInsertionMatrix,
			final Sequence a,
			final Sequence b);

	//This is debugging code i leave here in case I have to debug dynamic programming
	//matrices again.
//	private static void debugMatrixPrint(EnumMap dp_matrices,
//			Recurrence... recurrences) {
//		for (final Recurrence rec : recurrences) {
//			System.out.println("\n" + rec + "\n");
//			final double[][] mat = dp_matrices.get(rec);
//			final int M = mat.length - 1;
//			final int N = mat[0].length - 1;
//			//header.
//			System.out.print("  ");
//			for (int i = 0; i <= M; i++) {
//				System.out.print("| ");
//				final String idxStr = Integer.toString(i);
//				System.out.print(idxStr);
//				for (int blanks = 2 - idxStr.length(); blanks > 0; blanks--) {
//					System.out.print(" ");
//				}
//			}
//			System.out.println("");
//			//lines.
//			for (int j = 0; j <= N; j++) {
//				//front.
//				final String idxStr = Integer.toString(j);
//				System.out.print(idxStr);
//				for (int blanks = 2 - idxStr.length(); blanks > 0; blanks--) {
//					System.out.print(" ");
//				}
//				//entries.
//				for (int i = 0; i <= M; i++) {
//					System.out.print("|");
//					double rounded = (double) Math.round(mat[i][j] * 10) / 10;
//					final String numStr = Double.toString(rounded);
//					System.out.print(numStr);
//					for (int blanks = 3 - numStr.length(); blanks > 0; blanks--) {
//						System.out.print(" ");
//					}
//				}
//				System.out.println("");
//			}
//		}
//	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy