All Downloads are FREE. Search and download functionalities are using the official Maven repository.

de.citec.tcs.alignment.StrictAffineAlignmentFullAlgorithm Maven / Gradle / Ivy

/* 
 * TCS Alignment Toolbox
 * 
 * Copyright (C) 2013-2015
 * Benjamin Paaßen, Georg Zentgraf
 * AG Theoretical Computer Science
 * Centre of Excellence Cognitive Interaction Technology (CITEC)
 * University of Bielefeld
 * 
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as
 * published by the Free Software Foundation, either version 3 of the
 * License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 * 
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see .
 */
package de.citec.tcs.alignment;

import de.citec.tcs.alignment.comparators.OperationType;
import de.citec.tcs.alignment.sequence.Node;
import de.citec.tcs.alignment.sequence.Sequence;
import java.util.ArrayList;
import java.util.EnumMap;
import java.util.EnumSet;
import java.util.Random;

/**
 * This implements a local affine alignment similar to Smith-Waterman
 * and Gotoh. More information about this alignment can be found in the
 * AbstractLocalAlignmentAlgorithm.
 *
 * Note that you are allowed to specify higher deletion, insertion or comparison
 * costs than 1 in your comparators here, because it is still possible to skip
 * both sequences entirely in the worst case. It is, however, still recommended
 * to set the local comparator costs between 0 and 1 as well.
 *
 * The end result is a path of an exemplary, optimal alignment. If more than
 * one optimal path exists, a random one is chosen.
 *
 * @author Benjamin Paassen - [email protected]
 */
public class StrictAffineAlignmentFullAlgorithm
		extends AbstractAffineAlignmentAlgorithm {

	private OperationType[] preferredCooptimal = null;

	/**
	 * This allows to specify an order in which operations shall be preferred if
	 * cooptimal alignments occur. The first elements in the input array will be
	 * counted as preferred. If this is set to null co-optimal operations will
	 * be chosen randomly.
	 *
	 * @param preferredCooptimal
	 */
	public void setPreferredCooptimal(OperationType[] preferredCooptimal) {
		// ensure that this contains all operations.
		final EnumSet all = EnumSet.allOf(OperationType.class);
		for (final OperationType t : preferredCooptimal) {
			if (!all.remove(t)) {
				throw new IllegalArgumentException("The given preference array "
						+ "contianed " + t + " multiple times!");
			}
		}
		for (final OperationType r : all) {
			throw new IllegalArgumentException("A preference array of Operation "
					+ "types must contain all OperationTypes but " + r + " was missing!");
		}
		this.preferredCooptimal = preferredCooptimal;
	}

	/**
	 * This allows to specify an order in which operations shall be preferred if
	 * cooptimal alignments occur. The first elements in the input array will be
	 * counted as preferred. If this is set to null co-optimal operations will
	 * be chosen randomly.
	 *
	 * @return
	 */
	public OperationType[] getPreferredCooptimal() {
		return preferredCooptimal;
	}

	public StrictAffineAlignmentFullAlgorithm(
			AlignmentSpecification alignmentSpecification) {
		super(AlignmentPath.class, alignmentSpecification);
	}

	/**
	 * This implements the strict maximum of the given input doubles.
	 *
	 * @return the strict maximum of the given input doubles.
	 */
	@Override
	public double choice(double... choices) {
		if (choices == null || choices.length == 0) {
			throw new IllegalArgumentException("Choice input was invalid!");
		}
		double min = choices[0];
		for (int i = 1; i < choices.length; i++) {
			if (choices[i] < min) {
				min = choices[i];
			}
		}
		return min;
	}

	/**
	 * {@inheritDoc }
	 */
	@Override
	public AlignmentPath transformToResult(EnumMap dp_tables,
			double[][] compareMatrix,
			double[] deletionMatrix, double[] insertionMatrix,
			double[] skipDeletionMatrix, double[] skipInsertionMatrix,
			Sequence a, Sequence b) {
		final double accum_score = dp_tables.get(Recurrence.SKIPDEL_START)[0][0];
		final double score;
		if (accum_score > 0) {
			//normalize it by the cost for skipping both sequences entirely.
			double worstScore = 0;
			for (int i = 0; i < skipDeletionMatrix.length; i++) {
				worstScore += skipDeletionMatrix[i];
			}
			for (int j = 0; j < skipInsertionMatrix.length; j++) {
				worstScore += skipInsertionMatrix[j];
			}
			score = Math.min(1, accum_score / worstScore);
		} else {
			score = 0;
		}
		/*
		 * We reconstruct an optimal alignment path by backtracing. I can not
		 * explain everything in detail here, because the local alignment is a
		 * fairly complicated algorithm, but again I will refer to the
		 * Bellman's Gap code that i wrote and highlight the important passages.
		 */
		final AlignmentPath path = new AlignmentPath(getSpecification(),
				a, b, score);
		int i = 0;
		int j = 0;
		final int M = a.getNodes().size();
		final int N = b.getNodes().size();
		Recurrence current = Recurrence.SKIPDEL_START;
		final Random random = new Random();
		double currentScore;
		while (i < M || j < N) {
			currentScore = dp_tables.get(current)[i][j];
			//determine co-optimals.
			final ArrayList cooptimals = new ArrayList<>();
			final ArrayList cooptimalOps = new ArrayList<>();
			final ArrayList cooptimalCounts = new ArrayList<>();
			switch (current) {
				/*
				 * SKIPDEL_START = skip_del(, SKIPDEL_START) |
				 * SKIPINS_START #h;
				 */
				case SKIPDEL_START: {
					if (j > 0) {
						throw new UnsupportedOperationException(
								"Unexpected internal error: "
								+ "SKIPDEL_START was used in an "
								+ "ill-defined region!");
					}
					if (i < M) {
						//skip_del(, SKIPDEL_START)
						final double skipDelOld
								= dp_tables.get(Recurrence.SKIPDEL_START)[i + 1][j];
						if (skipDelOld + skipDeletionMatrix[i] == currentScore) {
							cooptimals.add(Recurrence.SKIPDEL_START);
							cooptimalOps.add(OperationType.SKIPDELETION);
							cooptimalCounts.add(1);
						}
					}
					//SKIPINS_START
					if (dp_tables.get(Recurrence.SKIPINS_START)[i][j] == currentScore) {
						cooptimals.add(Recurrence.SKIPINS_START);
						cooptimalOps.add(null);
						cooptimalCounts.add(0);
					}
				}
				break;
				/*
				 * SKIPINS_START = skip_ins(, SKIPINS_START) |
				 * rep(, ALI) |
				 * nil() #h;
				 */
				case SKIPINS_START: {
					if (j == N && i < M) {
						throw new UnsupportedOperationException(
								"Unexpected internal error: "
								+ "SKIPINS_START was used in an "
								+ "ill-defined region!");
					}
					if (j < N) {
						//skip_ins(, SKIPINS_START)
						if (j < N - 1 || i == M) {
							final double skipInsOld
									= dp_tables.get(Recurrence.SKIPINS_START)[i][j + 1];
							if (skipInsOld + skipInsertionMatrix[j] == currentScore) {
								cooptimals.add(Recurrence.SKIPINS_START);
								cooptimalOps.add(OperationType.SKIPINSERTION);
								cooptimalCounts.add(1);
							}
						}
						//rep(, ALI)
						if (i < M) {
							final double repOld
									= dp_tables.get(Recurrence.ALI)[i + 1][j + 1];
							if (repOld + compareMatrix[i][j] == currentScore) {
								cooptimals.add(Recurrence.ALI);
								cooptimalOps.add(OperationType.REPLACEMENT);
								cooptimalCounts.add(1);
							}
						}
					}
					//nil does not have to be considered here.
				}
				break;
				/*
				 * ALI = del(, DEL) | ins(, INS) |
				 * rep(, ALI) |
				 * SKIPDEL_MIDDLE | SKIPINS_MIDDLE | SKIPDEL_END #h;
				 *
				 */
				case ALI: {
					if (i < M && j < N) {
						//del(, DEL)
						if (i < M - 1) {
							final double delOld
									= dp_tables.get(Recurrence.DEL)[i + 1][j];
							if (delOld + deletionMatrix[i] == currentScore) {
								cooptimals.add(Recurrence.DEL);
								cooptimalOps.add(OperationType.DELETION);
								cooptimalCounts.add(1);
							}
						}
						//ins(, INS)
						if (j < N - 1) {
							final double insOld
									= dp_tables.get(Recurrence.INS)[i][j + 1];
							if (insOld + insertionMatrix[j] == currentScore) {
								cooptimals.add(Recurrence.INS);
								cooptimalOps.add(OperationType.INSERTION);
								cooptimalCounts.add(1);
							}
						}
						//rep(, ALI)
						final double repOld
								= dp_tables.get(Recurrence.ALI)[i + 1][j + 1];
						if (repOld + compareMatrix[i][j] == currentScore) {
							cooptimals.add(Recurrence.ALI);
							cooptimalOps.add(OperationType.REPLACEMENT);
							cooptimalCounts.add(1);
						}
						/*
						 * SKIPDEL_MIDDLE or, better said:
						 * skip_del(,
						 * skip_del(,
						 * skip_del(,
						 * skip_del(,
						 * SKIPDEL_MIDDLE_LOOP))))
						 */
						if (i < M - getMinMiddleSkips()) {
							double skipDelMiddleCost
									= dp_tables.get(Recurrence.SKIPDEL_MIDDLE)[i + getMinMiddleSkips()][j];
							for (int i2 = 0; i2 < getMinMiddleSkips(); i2++) {
								skipDelMiddleCost += skipDeletionMatrix[i + i2];
							}
							if (skipDelMiddleCost == currentScore) {
								cooptimals.add(Recurrence.SKIPDEL_MIDDLE);
								cooptimalOps.add(OperationType.SKIPDELETION);
								cooptimalCounts.add(getMinMiddleSkips());
							}
						}
						/*
						 * SKIPINS_MIDDLE or, better said:
						 * skip_ins(,
						 * skip_ins(,
						 * skip_ins(,
						 * skip_ins(,
						 * SKIPINS_MIDDLE_LOOP)))) #h;
						 */
						if (j < N - getMinMiddleSkips()) {
							double skipInsMiddleCost
									= dp_tables.get(Recurrence.SKIPINS_MIDDLE)[i][j + getMinMiddleSkips()];
							for (int j2 = 0; j2 < getMinMiddleSkips(); j2++) {
								skipInsMiddleCost += skipInsertionMatrix[j + j2];
							}
							if (skipInsMiddleCost == currentScore) {
								cooptimals.add(Recurrence.SKIPINS_MIDDLE);
								cooptimalOps.add(OperationType.SKIPINSERTION);
								cooptimalCounts.add(getMinMiddleSkips());
							}
						}
					}
					//SKIPDEL_END
					if (dp_tables.get(Recurrence.SKIPDEL_END)[i][j] == currentScore) {
						cooptimals.add(Recurrence.SKIPDEL_END);
						cooptimalOps.add(null);
						cooptimalCounts.add(0);
					}
				}
				break;
				/*
				 * DEL = del(, DEL) | ins(, INS) |
				 * rep(, ALI) #h;
				 */
				case DEL: {
					if (i == M || j == N) {
						throw new UnsupportedOperationException(
								"Unexpected internal error: "
								+ "DEL was used in an "
								+ "ill-defined region!");
					}
					if (i < M && j < N) {
						//del(, DEL)
						if (i < M - 1) {
							final double delOld
									= dp_tables.get(Recurrence.DEL)[i + 1][j];
							if (delOld + deletionMatrix[i] == currentScore) {
								cooptimals.add(Recurrence.DEL);
								cooptimalOps.add(OperationType.DELETION);
								cooptimalCounts.add(1);
							}
						}
						//ins(, INS)
						if (j < N - 1) {
							final double insOld
									= dp_tables.get(Recurrence.INS)[i][j + 1];
							if (insOld + insertionMatrix[j] == currentScore) {
								cooptimals.add(Recurrence.INS);
								cooptimalOps.add(OperationType.INSERTION);
								cooptimalCounts.add(1);
							}
						}
						//rep(, ALI)
						final double repOld
								= dp_tables.get(Recurrence.ALI)[i + 1][j + 1];
						if (repOld + compareMatrix[i][j] == currentScore) {
							cooptimals.add(Recurrence.ALI);
							cooptimalOps.add(OperationType.REPLACEMENT);
							cooptimalCounts.add(1);
						}
					}
				}
				break;
				/*
				 * INS = ins(, INS) | rep(, ALI) #h;
				 */
				case INS: {
					if (i == M || j == N) {
						throw new UnsupportedOperationException(
								"Unexpected internal error: "
								+ "INS was used in an "
								+ "ill-defined region!");
					}
					if (i < M && j < N) {
						//ins(, INS)
						if (j < N - 1) {
							final double insOld
									= dp_tables.get(Recurrence.INS)[i][j + 1];
							if (insOld + insertionMatrix[j] == currentScore) {
								cooptimals.add(Recurrence.INS);
								cooptimalOps.add(OperationType.INSERTION);
								cooptimalCounts.add(1);
							}
						}
						//rep(, ALI)
						final double repOld
								= dp_tables.get(Recurrence.ALI)[i + 1][j + 1];
						if (repOld + compareMatrix[i][j] == currentScore) {
							cooptimals.add(Recurrence.ALI);
							cooptimalOps.add(OperationType.REPLACEMENT);
							cooptimalCounts.add(1);
						}
					}
				}
				break;
				/*
				 * SKIPDEL_MIDDLE_LOOP = skip_del(,
				 * SKIPDEL_MIDDLE_LOOP) |
				 * rep(,ALI) #h;
				 */
				case SKIPDEL_MIDDLE: {
					if (i == M || j == N) {
						throw new UnsupportedOperationException(
								"Unexpected internal error: "
								+ "SKIPDEL_MIDDLE was used in an "
								+ "ill-defined region!");
					}
					//skip_del(, SKIPDEL_MIDDLE_LOOP)
					if (i < M && j < N) {
						if (i < M - 1) {
							final double skipDelOld
									= dp_tables.get(Recurrence.SKIPDEL_MIDDLE)[i + 1][j];
							if (skipDelOld + skipDeletionMatrix[i] == currentScore) {
								cooptimals.add(Recurrence.SKIPDEL_MIDDLE);
								cooptimalOps.add(OperationType.SKIPDELETION);
								cooptimalCounts.add(1);
							}
						}
						//rep(,ALI)
						final double repOld
								= dp_tables.get(Recurrence.ALI)[i + 1][j + 1];
						if (repOld + compareMatrix[i][j] == currentScore) {
							cooptimals.add(Recurrence.ALI);
							cooptimalOps.add(OperationType.REPLACEMENT);
							cooptimalCounts.add(1);
						}
					}
				}
				break;
				/*
				 * SKIPINS_MIDDLE_LOOP = skip_ins(,
				 * SKIPINS_MIDDLE_LOOP) |
				 * rep(,ALI) #h;
				 */
				case SKIPINS_MIDDLE: {
					if (i == M || j == N) {
						throw new UnsupportedOperationException(
								"Unexpected internal error: "
								+ "SKIPINS_MIDDLE was used in an "
								+ "ill-defined region!");
					}
					if (i < M && j < N) {
						if (j < N - 1) {
							//skip_ins(, SKIPINS_MIDDLE_LOOP)
							final double skipInsOld
									= dp_tables.get(Recurrence.SKIPINS_MIDDLE)[i][j + 1];
							if (skipInsOld + skipInsertionMatrix[j] == currentScore) {
								cooptimals.add(Recurrence.SKIPINS_MIDDLE);
								cooptimalOps.add(OperationType.SKIPINSERTION);
								cooptimalCounts.add(1);
							}
						}
						//rep(,ALI) 
						final double repOld
								= dp_tables.get(Recurrence.ALI)[i + 1][j + 1];
						if (repOld + compareMatrix[i][j] == currentScore) {
							cooptimals.add(Recurrence.ALI);
							cooptimalOps.add(OperationType.REPLACEMENT);
							cooptimalCounts.add(1);
						}
					}
				}
				break;
				/*
				 * SKIPDEL_END = skip_del(, SKIPDEL_END) |
				 * SKIPINS_END #h;
				 */
				case SKIPDEL_END: {
					if (i < M) {
						final double skipDelOld
								= dp_tables.get(Recurrence.SKIPDEL_END)[i + 1][j];
						if (skipDelOld + skipDeletionMatrix[i] == currentScore) {
							cooptimals.add(Recurrence.SKIPDEL_END);
							cooptimalOps.add(OperationType.SKIPDELETION);
							cooptimalCounts.add(1);
						}
					}
					if (dp_tables.get(Recurrence.SKIPINS_END)[i][j] == currentScore) {
						cooptimals.add(Recurrence.SKIPINS_END);
						cooptimalOps.add(null);
						cooptimalCounts.add(0);
					}
				}
				break;
				case SKIPINS_END: {
					if (i < M) {
						throw new UnsupportedOperationException(
								"Unexpected internal error: "
								+ "SKIPINS_END was used in an "
								+ "ill-defined region!");
					}
					cooptimals.add(Recurrence.SKIPINS_END);
					cooptimalOps.add(OperationType.SKIPINSERTION);
					cooptimalCounts.add(1);
				}
				break;

				default:
					throw new UnsupportedOperationException(
							"Unknown recurrence! " + current);
			}
			if (cooptimals.isEmpty()) {
				throw new RuntimeException("Unexpected internal error!"
						+ "No alignment choices!");
			}
			//choose a cooptimal to continue.
			final Recurrence next;
			final OperationType nextType;
			final int nextCount;
			if (cooptimals.size() > 1) {
				if (preferredCooptimal == null) {
					// choose randomly when we have no explicit preference.
					final int r = random.nextInt(cooptimals.size());
					next = cooptimals.get(r);
					nextType = cooptimalOps.get(r);
					nextCount = cooptimalCounts.get(r);
				} else {
					// otherwise choose according to preference.
					int pref = 0;
					int prefIdx = preferredCooptimal.length + 1;
					for (int r = 0; r < cooptimals.size(); r++) {
						final OperationType op = cooptimalOps.get(r);
						if (op == null) {
							continue;
						}
						int p = 0;
						while (preferredCooptimal[p] != op) {
							p++;
						}
						if (p < prefIdx) {
							prefIdx = p;
							pref = r;
						}
					}
					next = cooptimals.get(pref);
					nextType = cooptimalOps.get(pref);
					nextCount = cooptimalCounts.get(pref);
				}
			} else {
				next = cooptimals.get(0);
				nextType = cooptimalOps.get(0);
				nextCount = cooptimalCounts.get(0);

			}
			for (int s = 0; s < nextCount; s++) {
				final double[] localCosts;
				final Node left;
				final Node right;
				switch (nextType) {
					case SKIPDELETION:
						left = a.getNodes().get(i + s);
						right = null;
						localCosts = getSpecification().
								calculateSkipDeletionCosts(left);
						break;
					case DELETION:
						left = a.getNodes().get(i + s);
						right = null;
						localCosts = getSpecification().
								calculateDeletionCosts(left);
						break;
					case SKIPINSERTION:
						left = null;
						right = b.getNodes().get(j + s);
						localCosts = getSpecification().
								calculateSkipInsertionCosts(right);
						break;
					case INSERTION:
						left = null;
						right = b.getNodes().get(j + s);
						localCosts = getSpecification().
								calculateInsertionCosts(right);
						break;
					case REPLACEMENT:
						left = a.getNodes().get(i + s);
						right = b.getNodes().get(j + s);
						localCosts = getSpecification().
								calculateReplacementCosts(left, right);
						break;
					default:
						throw new UnsupportedOperationException(
								"Unknown operation! " + nextType);
				}
				final Operation op = new Operation(left, right,
						nextType, path);
				op.setComparatorDistances(localCosts);
				path.getOperations().add(op);
			}
			//adjust position
			if (nextType != null) {
				switch (nextType) {
					case SKIPDELETION:
					case DELETION:
						i += nextCount;
						break;
					case SKIPINSERTION:
					case INSERTION:
						j += nextCount;
						break;
					case REPLACEMENT:
						i += nextCount;
						j += nextCount;
						break;
				}
			}
			//continue calculation.
			current = next;
		}

		return path;
	}

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy