de.citec.tcs.alignment.adp.SparseStrictADPFullAlgorithm Maven / Gradle / Ivy

Go to download
/* 
 * TCS Alignment Toolbox Version 3
 * 
 * Copyright (C) 2016
 * Benjamin Paaßen
 * AG Theoretical Computer Science
 * Centre of Excellence Cognitive Interaction Technology (CITEC)
 * University of Bielefeld
 * 
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as
 * published by the Free Software Foundation, either version 3 of the
 * License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 * 
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see .
 */

package de.citec.tcs.alignment.adp;

import de.citec.tcs.alignment.Alignment;
import de.citec.tcs.alignment.Operation;
import de.citec.tcs.alignment.comparators.Comparator;
import de.citec.tcs.alignment.comparators.OperationType;
import de.citec.tcs.alignment.parallel.MatrixEngine.MatrixCoordinate;
import java.util.EnumMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Random;
import lombok.NonNull;

/**
 * This algorithm calculates an optimal AlignmentPath using the given ADP grammar and
 * algebra/Comparator.
 *
 * @author Benjamin Paassen - bpaassen(at)techfak.uni-bielefeld.de
 * @param  the class of the elements in the left input sequence.
 * @param  the class of the elements in the right input sequence.
 * @param  the enum listing the nonterminal symbols for the input grammar.
 */
public class SparseStrictADPFullAlgorithm> extends SparseAbstractADPAlgorithm {

	public SparseStrictADPFullAlgorithm(@NonNull SparseGrammar grammar,
		@NonNull Comparator comparator) {
		super(grammar, Alignment.class, comparator);
	}

	@Override
	public double choice(double[] choices) {
		double min = choices[0];
		for (int l = 1; l < choices.length; l++) {
			if (choices[l] < min) {
				min = choices[l];
			}
		}
		return min;
	}

	@Override
	public Alignment transformToResult(@NonNull EnumMap dpTables,
		@NonNull EnumMap repMatrices,
		@NonNull EnumMap delVectors,
		@NonNull EnumMap insVectors,
		@NonNull List a, @NonNull List b) {
		final double acum_score = dpTables.get(getGrammar().getAxiom()).get(
			new MatrixCoordinate(0, 0));
		// we normalize the score
		final double normalized_score = normalizeDissimilarity(acum_score, a, b);

		// now we try to reconstruct the (or better: an) optimal bath via
		// backtracing. The idea is to start at the beginning and look
		// which production rules lead to optimal costs.
		// If multiple of such rules exists (co-optimal rules) we randomly
		// decide for one of them.
		final Alignment alignment = new Alignment<>(getComparator(), a, b, normalized_score);
		int i = 0;
		int j = 0;
		N nonterminal = getGrammar().getAxiom();
		final int m = a.size();
		final int n = b.size();
		final Random rand = new Random();
		while (i < m || j < n) {
			// calculate the production rules we could apply in the current
			// situation.
			final List> possibleRules;
			{
				final int leftSize = m - i;
				final int rightSize = n - j;
				possibleRules = getGrammar().getPossibleRules(nonterminal,
					leftSize, rightSize, m, n);
			}
			// now ensure that only one rule is left.
			final ProductionRule optimalRule;
			if (possibleRules.size() > 1) {
				// if we have more than one choice in the beginning we
				// restrict first by only choosing co-optimals.
				final LinkedList> cooptimals = new LinkedList<>();
				final double optimal = dpTables.get(nonterminal).get(
					new MatrixCoordinate(i, j));
				for (final ProductionRule rule : possibleRules) {
					// calculate the cost for applying this rule.
					double ruleCost = 0;
					int deltaI = 0;
					int deltaJ = 0;

					for (final OperationType op : rule.getOperations()) {
						switch (op) {
							case REPLACEMENT:
							case DELETIONREPLACEMENT:
							case INSERTIONREPLACEMENT:
								ruleCost += repMatrices.get(op).get(new MatrixCoordinate(i + deltaI, j + deltaJ));
								if (op != OperationType.INSERTIONREPLACEMENT) {
									deltaI++;
								}
								if (op != OperationType.DELETIONREPLACEMENT) {
									deltaJ++;
								}
								break;
							case DELETION:
							case SKIPDELETION:
								ruleCost += delVectors.get(op)[i + deltaI];
								deltaI++;
								break;
							case INSERTION:
							case SKIPINSERTION:
								ruleCost += insVectors.get(op)[j + deltaJ];
								deltaJ++;
								break;
							default:
								throw new UnsupportedOperationException("Unsupported operation: " + op);
						}
					}
					// then check if this rule is optimal.
					final double old = dpTables.get(rule.getNonterminal()).get(
						new MatrixCoordinate(i + deltaI, j + deltaJ));
					if (old + ruleCost == optimal) {
						// if so we store this rule.
						cooptimals.add(rule);
					}
				}
				// check if we have more than one co-optimal rule.
				if (cooptimals.size() > 1) {
					// if so we have to choose randomly.
					optimalRule = cooptimals.get(rand.nextInt(cooptimals.size()));
				} else {
					// if there is only one optimal rule, we take that.
					optimalRule = cooptimals.get(0);
				}
			} else {
				// if only one rule is possible, that has to be the optimal one.
				optimalRule = possibleRules.get(0);
			}
			// transform the optimal rule to the correct format.
			for (final OperationType op : optimalRule.getOperations()) {
				X left = null;
				Y right = null;
				final double cost;
				switch (op) {
					case REPLACEMENT:
					case DELETIONREPLACEMENT:
					case INSERTIONREPLACEMENT:
						left = a.get(i);
						right = b.get(j);
						cost = repMatrices.get(op).get(new MatrixCoordinate(i, j));
						if (op != OperationType.INSERTIONREPLACEMENT) {
							i++;
						}
						if (op != OperationType.DELETIONREPLACEMENT) {
							j++;
						}
						break;
					case DELETION:
					case SKIPDELETION:
						left = a.get(i);
						cost = delVectors.get(op)[i];
						i++;
						break;
					case INSERTION:
					case SKIPINSERTION:
						right = b.get(j);
						cost = insVectors.get(op)[j];
						j++;
						break;
					default:
						throw new UnsupportedOperationException("Unsupported operation: " + op);
				}

				final Operation operation = new Operation<>(left, right, op, cost);
				alignment.add(operation);
			}
			nonterminal = optimalRule.getNonterminal();
		}
		return alignment;
	}

}