
de.citec.tcs.alignment.adp.Grammar Maven / Gradle / Ivy
/*
* TCS Alignment Toolbox
*
* Copyright (C) 2013-2015
* Benjamin Paaßen, Georg Zentgraf
* AG Theoretical Computer Science
* Centre of Excellence Cognitive Interaction Technology (CITEC)
* University of Bielefeld
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see .
*/
package de.citec.tcs.alignment.adp;
import java.util.EnumSet;
import java.util.List;
/**
* An Algebraic Dynamic Programming (ADP) grammar is a regular tree
* grammar. It produces trees that represent alignments.
*
* Formally it is a 4-tuple (N, axiom, A, Delta)
*
* where N is the set of nonterminal symbols (here an enum), the axiom is one
* particular nonterminal symbol from N that is used at the start, A is a
* subset of N marking the accepting nonterminal symbols and Delta is a set
* of production rules (see there).
*
* Consider the easy example grammar ({ALI}, ALI, {ALI}, Delta) with
* Delta = {
* ALI -> REPLACEMENT(left, ALI, right),
* ALI -> DELETION(left, ALI),
* ALI -> INSERTION(ALI, right)
* }
*
* This grammar formulates a straightforward global alignment. left and right
* can be arbitrary nodes.
*
* Consider as input only string sequences, namely the two sequences "ab" and
* "b". Then all possible trees for this input as constructed by this grammar
* are:
*
* REPLACEMENT('a', DELETION('b', end), 'b'),
* DELETION('a', REPLACEMENT('b', end, 'b')),
* DELETION('a', DELETION('b', INSERTION(end, 'b'))),
* DELETION('a', INSERTION(DELETION('b', end), 'b')),
* INSERTION(DELETION('a', DELETION('b', end)), 'b')
*
* What an ADP algorithm does now is to consider this set of all possible trees
* for the given input, apply an algebra on them that translates them to scores
* and choose the best score using a choice function. That, however, is
* decoupled from the grammar design itself. The ADP algorithms provided in
* this package work on any grammar, that implements this interface here and
* fulfils some very basic conditions (like not containing endless loops).
*
* If you want to create grammars for yourself you can either implement this
* interface here or you can instantiate a FlexibleGrammar object. Implementing
* the interface directly, however, enables you to achieve considerably better
* runtime (regarding constant factors. The asymptotic runtime is always
* quadratic).
*
* @author Benjamin Paassen - bpaassen(at)techfak.uni-bielefeld.de
* @param the enum that lists all nonterminal symbols for this grammar.
*/
public interface Grammar> {
/**
* Returns the enum class that lists the nonterminal symbols of
* this grammar.
*
* @return the enum class that lists the nonterminal symbols of
* this grammar.
*/
public Class getNonterminalClass();
/**
* Returns the nonterminals of this grammar sorted according to their
* dependencies on one another. A dependency in this sense is a production
* rule of the form A = B;
* If such a rule exists we say that A depends on B (because every entry
* of the dynamic programming table in B has to be pre-computed in order
* to compute the entry in A for the same indices).
* This sortic procedure will ensure that B comes before A in the list.
*
* @return the nonterminals of this grammar sorted according to their
* dependencies on one another.
*/
public N[] dependencySort();
/**
* Returns the axiom of this grammar.
*
* @return the axiom of this grammar.
*/
public N getAxiom();
/**
* Returns the set of all accepting nonterminals of this grammar.
*
* @return the set of all accepting nonterminals of this grammar.
*/
public EnumSet getAccepting();
/**
* Returns all production rules of this grammar that can be applied
* in the current situation.
*
* Assume the simple example grammar from above:
*
* Delta = {
* ALI -> REPLACEMENT(left, ALI, right),
* ALI -> DELETION(left, ALI),
* ALI -> INSERTION(ALI, right)
* }
*
* If we call getPossibleRules(ALI, 0, 0) then no rules can be applied
* because there is no node available that could be replaced, deleted
* or inserted.
*
* If we call getPossibleRules(ALI, 1, 0) then the rule
* ALI -> DELETION(left, ALI)
* should be returned.
*
* Conversely getPossibleRules(ALI, 0, 1) should return the rule
* ALI -> INSERTION(ALI, right)
*
* For all higher input integers, that is leftSize >0 _and_ rightSize >0
* all three rules should be returned.
*
* This method is called very often during alignment calculation. Therefore
* computational efficiency in the implementation here is key.
*
* @param nonterminal the nonterminal symbol on the left side of all
* returned production rules.
* @param leftSize the nodes available in the left input sequence.
* @param rightSize the nodes available in the right right sequence.
* @return all production rules of this grammar that can be applied
* in the current situation.
*/
public List> getPossibleRules(N nonterminal, int leftSize, int rightSize, int M, int N);
/**
* This should return true if and only if this grammar contains
* at least one production rule with the OperationType DELETION
* or INSERTION.
*
* @return true if and only if this grammar contains
* at least one production rule with the OperationType DELETION
* or INSERTION.
*/
public boolean containsGaps();
/**
* This should return true if and only if this grammar contains
* at least one production rule with the OperationType SKIPDELETION
* or SKIPINSERTION.
*
* @return true if and only if this grammar contains
* at least one production rule with the OperationType SKIPDELETION
* or SKIPINSERTION.
*/
public boolean containsSkips();
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy