de.citec.tcs.alignment.adp.SparseStrictADPFullAlgorithm Maven / Gradle / Ivy
/*
* TCS Alignment Toolbox Version 3
*
* Copyright (C) 2016
* Benjamin Paaßen
* AG Theoretical Computer Science
* Centre of Excellence Cognitive Interaction Technology (CITEC)
* University of Bielefeld
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see .
*/
package de.citec.tcs.alignment.adp;
import de.citec.tcs.alignment.Alignment;
import de.citec.tcs.alignment.Operation;
import de.citec.tcs.alignment.comparators.Comparator;
import de.citec.tcs.alignment.comparators.OperationType;
import de.citec.tcs.alignment.parallel.MatrixEngine.MatrixCoordinate;
import java.util.EnumMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Random;
import lombok.NonNull;
/**
* This algorithm calculates an optimal AlignmentPath using the given ADP grammar and
* algebra/Comparator.
*
* @author Benjamin Paassen - bpaassen(at)techfak.uni-bielefeld.de
* @param the class of the elements in the left input sequence.
* @param the class of the elements in the right input sequence.
* @param the enum listing the nonterminal symbols for the input grammar.
*/
public class SparseStrictADPFullAlgorithm> extends SparseAbstractADPAlgorithm {
public SparseStrictADPFullAlgorithm(@NonNull SparseGrammar grammar,
@NonNull Comparator comparator) {
super(grammar, Alignment.class, comparator);
}
@Override
public double choice(double[] choices) {
double min = choices[0];
for (int l = 1; l < choices.length; l++) {
if (choices[l] < min) {
min = choices[l];
}
}
return min;
}
@Override
public Alignment transformToResult(@NonNull EnumMap dpTables,
@NonNull EnumMap repMatrices,
@NonNull EnumMap delVectors,
@NonNull EnumMap insVectors,
@NonNull List a, @NonNull List b) {
final double acum_score = dpTables.get(getGrammar().getAxiom()).get(
new MatrixCoordinate(0, 0));
// we normalize the score
final double normalized_score = normalizeDissimilarity(acum_score, a, b);
// now we try to reconstruct the (or better: an) optimal bath via
// backtracing. The idea is to start at the beginning and look
// which production rules lead to optimal costs.
// If multiple of such rules exists (co-optimal rules) we randomly
// decide for one of them.
final Alignment alignment = new Alignment<>(getComparator(), a, b, normalized_score);
int i = 0;
int j = 0;
N nonterminal = getGrammar().getAxiom();
final int m = a.size();
final int n = b.size();
final Random rand = new Random();
while (i < m || j < n) {
// calculate the production rules we could apply in the current
// situation.
final List> possibleRules;
{
final int leftSize = m - i;
final int rightSize = n - j;
possibleRules = getGrammar().getPossibleRules(nonterminal,
leftSize, rightSize, m, n);
}
// now ensure that only one rule is left.
final ProductionRule optimalRule;
if (possibleRules.size() > 1) {
// if we have more than one choice in the beginning we
// restrict first by only choosing co-optimals.
final LinkedList> cooptimals = new LinkedList<>();
final double optimal = dpTables.get(nonterminal).get(
new MatrixCoordinate(i, j));
for (final ProductionRule rule : possibleRules) {
// calculate the cost for applying this rule.
double ruleCost = 0;
int deltaI = 0;
int deltaJ = 0;
for (final OperationType op : rule.getOperations()) {
switch (op) {
case REPLACEMENT:
case DELETIONREPLACEMENT:
case INSERTIONREPLACEMENT:
ruleCost += repMatrices.get(op).get(new MatrixCoordinate(i + deltaI, j + deltaJ));
if (op != OperationType.INSERTIONREPLACEMENT) {
deltaI++;
}
if (op != OperationType.DELETIONREPLACEMENT) {
deltaJ++;
}
break;
case DELETION:
case SKIPDELETION:
ruleCost += delVectors.get(op)[i + deltaI];
deltaI++;
break;
case INSERTION:
case SKIPINSERTION:
ruleCost += insVectors.get(op)[j + deltaJ];
deltaJ++;
break;
default:
throw new UnsupportedOperationException("Unsupported operation: " + op);
}
}
// then check if this rule is optimal.
final double old = dpTables.get(rule.getNonterminal()).get(
new MatrixCoordinate(i + deltaI, j + deltaJ));
if (old + ruleCost == optimal) {
// if so we store this rule.
cooptimals.add(rule);
}
}
// check if we have more than one co-optimal rule.
if (cooptimals.size() > 1) {
// if so we have to choose randomly.
optimalRule = cooptimals.get(rand.nextInt(cooptimals.size()));
} else {
// if there is only one optimal rule, we take that.
optimalRule = cooptimals.get(0);
}
} else {
// if only one rule is possible, that has to be the optimal one.
optimalRule = possibleRules.get(0);
}
// transform the optimal rule to the correct format.
for (final OperationType op : optimalRule.getOperations()) {
X left = null;
Y right = null;
final double cost;
switch (op) {
case REPLACEMENT:
case DELETIONREPLACEMENT:
case INSERTIONREPLACEMENT:
left = a.get(i);
right = b.get(j);
cost = repMatrices.get(op).get(new MatrixCoordinate(i, j));
if (op != OperationType.INSERTIONREPLACEMENT) {
i++;
}
if (op != OperationType.DELETIONREPLACEMENT) {
j++;
}
break;
case DELETION:
case SKIPDELETION:
left = a.get(i);
cost = delVectors.get(op)[i];
i++;
break;
case INSERTION:
case SKIPINSERTION:
right = b.get(j);
cost = insVectors.get(op)[j];
j++;
break;
default:
throw new UnsupportedOperationException("Unsupported operation: " + op);
}
final Operation operation = new Operation<>(left, right, op, cost);
alignment.add(operation);
}
nonterminal = optimalRule.getNonterminal();
}
return alignment;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy