
de.citec.tcs.alignment.adp.StrictADPFullAlgorithm Maven / Gradle / Ivy
/*
* TCS Alignment Toolbox
*
* Copyright (C) 2013-2015
* Benjamin Paaßen, Georg Zentgraf
* AG Theoretical Computer Science
* Centre of Excellence Cognitive Interaction Technology (CITEC)
* University of Bielefeld
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see .
*/
package de.citec.tcs.alignment.adp;
import de.citec.tcs.alignment.AlignmentPath;
import de.citec.tcs.alignment.AlignmentSpecification;
import de.citec.tcs.alignment.Operation;
import de.citec.tcs.alignment.comparators.OperationType;
import de.citec.tcs.alignment.sequence.Node;
import de.citec.tcs.alignment.sequence.Sequence;
import java.util.EnumMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Random;
/**
* This algorithm calculates an optimal AlignmentPath using the given
* ADP grammar and algebra/AlignmentSpecification.
*
* @author Benjamin Paassen - bpaassen(at)techfak.uni-bielefeld.de
* @param the enum listing the nonterminal symbols for the input grammar.
*/
public class StrictADPFullAlgorithm> extends AbstractADPAlgorithm {
public StrictADPFullAlgorithm(Grammar grammar, AlignmentSpecification alignmentSpecification) {
super(grammar, AlignmentPath.class, alignmentSpecification);
}
/**
* {@inheritDoc}
*/
@Override
public double choice(double[] choices) {
double min = choices[0];
for (int l = 1; l < choices.length; l++) {
if (choices[l] < min) {
min = choices[l];
}
}
return min;
}
/**
* {@inheritDoc}
*/
@Override
public AlignmentPath transformToResult(EnumMap dpTables,
double[][] compareMatrix,
double[] deletionMatrix, double[] insertionMatrix,
double[] skipDeletionMatrix, double[] skipInsertionMatrix,
Sequence a, Sequence b) {
final double acum_score = dpTables.get(getGrammar().getAxiom())[0][0];
// we normalize the score by the added length of both sequences.
// For most skip cost schemes this will never be reached.
final double normalized_score = normalizeDissimilarity(acum_score, a, b);
// now we try to reconstruct the (or better: an) optimal bath via
// backtracing. The idea is to start at the beginning and look
// which production rules lead to optimal costs.
// If multiple of such rules exists (co-optimal rules) we randomly
// decide for one of them.
final AlignmentPath path = new AlignmentPath(getSpecification(),
a, b, normalized_score);
int i = 0;
int j = 0;
N nonterminal = getGrammar().getAxiom();
final int M = a.getNodes().size();
final int N = b.getNodes().size();
final Random rand = new Random();
while (i < M || j < N) {
// calculate the production rules we could apply in the current
// situation.
final List> possibleRules;
{
final int leftSize = M - i;
final int rightSize = N - j;
possibleRules = getGrammar().getPossibleRules(nonterminal,
leftSize, rightSize, M, N);
}
// now ensure that only one rule is left.
final ProductionRule optimalRule;
if (possibleRules.size() > 1) {
// if we have more than one choice in the beginning we
// restrict first by only choosing co-optimals.
final LinkedList> cooptimals = new LinkedList<>();
final double optimal = dpTables.get(nonterminal)[i][j];
for (final ProductionRule rule : possibleRules) {
// calculate the cost for applying this rule.
double ruleCost = 0;
int deltaI = 0;
int deltaJ = 0;
for (final OperationType op : rule.getOperations()) {
switch (op) {
case DELETION:
ruleCost += deletionMatrix[i + deltaI];
deltaI++;
break;
case INSERTION:
ruleCost += insertionMatrix[j + deltaJ];
deltaJ++;
break;
case REPLACEMENT:
ruleCost += compareMatrix[i + deltaI][j + deltaJ];
deltaI++;
deltaJ++;
break;
case SKIPDELETION:
ruleCost += skipDeletionMatrix[i + deltaI];
deltaI++;
break;
case SKIPINSERTION:
ruleCost += skipInsertionMatrix[j + deltaJ];
deltaJ++;
break;
case DELETIONREPLACEMENT:
ruleCost += compareMatrix[i + deltaI][j + deltaJ];
deltaI++;
break;
case INSERTIONREPLACEMENT:
ruleCost += compareMatrix[i + deltaI][j + deltaJ];
deltaJ++;
break;
default:
throw new UnsupportedOperationException("Unsupported OperationType: " + op);
}
}
// then check if this rule is optimal.
if (dpTables.get(rule.getNonterminal())[i + deltaI][j + deltaJ]
+ ruleCost == optimal) {
// if so we store this rule.
cooptimals.add(rule);
}
}
// check if we have more than one co-optimal rule.
if (cooptimals.size() > 1) {
// if so we have to choose randomly.
optimalRule = cooptimals.get(rand.nextInt(cooptimals.size()));
} else {
// if there is only one optimal rule, we take that.
optimalRule = cooptimals.get(0);
}
} else {
// if only one rule is possible, that has to be the optimal one.
optimalRule = possibleRules.get(0);
}
// transform the optimal rule to the correct format.
for (final OperationType op : optimalRule.getOperations()) {
Node left = null;
switch (op) {
case REPLACEMENT:
case DELETION:
case SKIPDELETION:
case DELETIONREPLACEMENT:
case INSERTIONREPLACEMENT:
left = a.getNodes().get(i);
i++;
}
if (op == OperationType.INSERTIONREPLACEMENT) {
i--;
}
Node right = null;
switch (op) {
case REPLACEMENT:
case INSERTION:
case SKIPINSERTION:
case DELETIONREPLACEMENT:
case INSERTIONREPLACEMENT:
right = b.getNodes().get(j);
j++;
}
if (op == OperationType.DELETIONREPLACEMENT) {
j--;
}
final Operation operation = new Operation(left, right, op, path);
operation.setComparatorDistances(
Operation.calculateComparatorDistances(op, left, right,
getSpecification()));
path.getOperations().add(operation);
}
nonterminal = optimalRule.getNonterminal();
}
return path;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy