de.citec.tcs.alignment.adp.StrictADPFullAlgorithm Maven / Gradle / Ivy

Go to download
/* 
 * TCS Alignment Toolbox
 * 
 * Copyright (C) 2013-2015
 * Benjamin Paaßen, Georg Zentgraf
 * AG Theoretical Computer Science
 * Centre of Excellence Cognitive Interaction Technology (CITEC)
 * University of Bielefeld
 * 
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as
 * published by the Free Software Foundation, either version 3 of the
 * License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 * 
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see .
 */
package de.citec.tcs.alignment.adp;

import de.citec.tcs.alignment.AlignmentPath;
import de.citec.tcs.alignment.AlignmentSpecification;
import de.citec.tcs.alignment.Operation;
import de.citec.tcs.alignment.comparators.OperationType;
import de.citec.tcs.alignment.sequence.Node;
import de.citec.tcs.alignment.sequence.Sequence;
import java.util.EnumMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Random;

/**
 * This algorithm calculates an optimal AlignmentPath using the given
 * ADP grammar and algebra/AlignmentSpecification.
 *
 * @author Benjamin Paassen - bpaassen(at)techfak.uni-bielefeld.de
 * @param  the enum listing the nonterminal symbols for the input grammar.
 */
public class StrictADPFullAlgorithm> extends AbstractADPAlgorithm {

	public StrictADPFullAlgorithm(Grammar grammar, AlignmentSpecification alignmentSpecification) {
		super(grammar, AlignmentPath.class, alignmentSpecification);
	}

	/**
	 * {@inheritDoc}
	 */
	@Override
	public double choice(double[] choices) {
		double min = choices[0];
		for (int l = 1; l < choices.length; l++) {
			if (choices[l] < min) {
				min = choices[l];
			}
		}
		return min;
	}

	/**
	 * {@inheritDoc}
	 */
	@Override
	public AlignmentPath transformToResult(EnumMap dpTables,
			double[][] compareMatrix,
			double[] deletionMatrix, double[] insertionMatrix,
			double[] skipDeletionMatrix, double[] skipInsertionMatrix,
			Sequence a, Sequence b) {
		final double acum_score = dpTables.get(getGrammar().getAxiom())[0][0];
		// we normalize the score by the added length of both sequences.
		// For most skip cost schemes this will never be reached.
		final double normalized_score = normalizeDissimilarity(acum_score, a, b);

		// now we try to reconstruct the (or better: an) optimal bath via
		// backtracing. The idea is to start at the beginning and look
		// which production rules lead to optimal costs.
		// If multiple of such rules exists (co-optimal rules) we randomly
		// decide for one of them.
		final AlignmentPath path = new AlignmentPath(getSpecification(),
				a, b, normalized_score);
		int i = 0;
		int j = 0;
		N nonterminal = getGrammar().getAxiom();
		final int M = a.getNodes().size();
		final int N = b.getNodes().size();
		final Random rand = new Random();
		while (i < M || j < N) {
			// calculate the production rules we could apply in the current
			// situation.
			final List> possibleRules;
			{
				final int leftSize = M - i;
				final int rightSize = N - j;
				possibleRules = getGrammar().getPossibleRules(nonterminal,
						leftSize, rightSize, M, N);
			}
			// now ensure that only one rule is left.
			final ProductionRule optimalRule;
			if (possibleRules.size() > 1) {
				// if we have more than one choice in the beginning we
				// restrict first by only choosing co-optimals.
				final LinkedList> cooptimals = new LinkedList<>();
				final double optimal = dpTables.get(nonterminal)[i][j];
				for (final ProductionRule rule : possibleRules) {
					// calculate the cost for applying this rule.
					double ruleCost = 0;
					int deltaI = 0;
					int deltaJ = 0;
					for (final OperationType op : rule.getOperations()) {
						switch (op) {
							case DELETION:
								ruleCost += deletionMatrix[i + deltaI];
								deltaI++;
								break;
							case INSERTION:
								ruleCost += insertionMatrix[j + deltaJ];
								deltaJ++;
								break;
							case REPLACEMENT:
								ruleCost += compareMatrix[i + deltaI][j + deltaJ];
								deltaI++;
								deltaJ++;
								break;
							case SKIPDELETION:
								ruleCost += skipDeletionMatrix[i + deltaI];
								deltaI++;
								break;
							case SKIPINSERTION:
								ruleCost += skipInsertionMatrix[j + deltaJ];
								deltaJ++;
								break;
							case DELETIONREPLACEMENT:
								ruleCost += compareMatrix[i + deltaI][j + deltaJ];
								deltaI++;
								break;
							case INSERTIONREPLACEMENT:
								ruleCost += compareMatrix[i + deltaI][j + deltaJ];
								deltaJ++;
								break;
							default:
								throw new UnsupportedOperationException("Unsupported OperationType: " + op);
						}
					}
					// then check if this rule is optimal.
					if (dpTables.get(rule.getNonterminal())[i + deltaI][j + deltaJ]
							+ ruleCost == optimal) {
						// if so we store this rule.
						cooptimals.add(rule);
					}
				}
				// check if we have more than one co-optimal rule.
				if (cooptimals.size() > 1) {
					// if so we have to choose randomly.
					optimalRule = cooptimals.get(rand.nextInt(cooptimals.size()));
				} else {
					// if there is only one optimal rule, we take that.
					optimalRule = cooptimals.get(0);
				}
			} else {
				// if only one rule is possible, that has to be the optimal one.
				optimalRule = possibleRules.get(0);
			}
			// transform the optimal rule to the correct format.
			for (final OperationType op : optimalRule.getOperations()) {
				Node left = null;
				switch (op) {
					case REPLACEMENT:
					case DELETION:
					case SKIPDELETION:
					case DELETIONREPLACEMENT:
					case INSERTIONREPLACEMENT:
						left = a.getNodes().get(i);
						i++;
				}
				if (op == OperationType.INSERTIONREPLACEMENT) {
					i--;
				}
				Node right = null;
				switch (op) {
					case REPLACEMENT:
					case INSERTION:
					case SKIPINSERTION:
					case DELETIONREPLACEMENT:
					case INSERTIONREPLACEMENT:
						right = b.getNodes().get(j);
						j++;
				}
				if (op == OperationType.DELETIONREPLACEMENT) {
					j--;
				}

				final Operation operation = new Operation(left, right, op, path);
				operation.setComparatorDistances(
						Operation.calculateComparatorDistances(op, left, right,
								getSpecification()));
				path.getOperations().add(operation);
			}
			nonterminal = optimalRule.getNonterminal();
		}
		return path;
	}

}