de.citec.tcs.alignment.adp.Grammar Maven / Gradle / Ivy

Go to download
/* 
 * TCS Alignment Toolbox
 * 
 * Copyright (C) 2013-2015
 * Benjamin Paaßen, Georg Zentgraf
 * AG Theoretical Computer Science
 * Centre of Excellence Cognitive Interaction Technology (CITEC)
 * University of Bielefeld
 * 
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as
 * published by the Free Software Foundation, either version 3 of the
 * License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 * 
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see .
 */
package de.citec.tcs.alignment.adp;

import java.util.EnumSet;
import java.util.List;

/**
 * An Algebraic Dynamic Programming (ADP) grammar is a regular tree
 * grammar. It produces trees that represent alignments.
 *
 * Formally it is a 4-tuple (N, axiom, A, Delta)
 *
 * where N is the set of nonterminal symbols (here an enum), the axiom is one
 * particular nonterminal symbol from N that is used at the start, A is a
 * subset of N marking the accepting nonterminal symbols and Delta is a set
 * of production rules (see there).
 *
 * Consider the easy example grammar ({ALI}, ALI, {ALI}, Delta) with
 * Delta = {
 * ALI -> REPLACEMENT(left, ALI, right),
 * ALI -> DELETION(left, ALI),
 * ALI -> INSERTION(ALI, right)
 * }
 *
 * This grammar formulates a straightforward global alignment. left and right
 * can be arbitrary nodes.
 *
 * Consider as input only string sequences, namely the two sequences "ab" and
 * "b". Then all possible trees for this input as constructed by this grammar
 * are:
 *
 * REPLACEMENT('a', DELETION('b', end), 'b'),
 * DELETION('a', REPLACEMENT('b', end, 'b')),
 * DELETION('a', DELETION('b', INSERTION(end, 'b'))),
 * DELETION('a', INSERTION(DELETION('b', end), 'b')),
 * INSERTION(DELETION('a', DELETION('b', end)), 'b')
 *
 * What an ADP algorithm does now is to consider this set of all possible trees
 * for the given input, apply an algebra on them that translates them to scores
 * and choose the best score using a choice function. That, however, is
 * decoupled from the grammar design itself. The ADP algorithms provided in
 * this package work on any grammar, that implements this interface here and
 * fulfils some very basic conditions (like not containing endless loops).
 *
 * If you want to create grammars for yourself you can either implement this
 * interface here or you can instantiate a FlexibleGrammar object. Implementing
 * the interface directly, however, enables you to achieve considerably better
 * runtime (regarding constant factors. The asymptotic runtime is always
 * quadratic).
 *
 * @author Benjamin Paassen - bpaassen(at)techfak.uni-bielefeld.de
 * @param  the enum that lists all nonterminal symbols for this grammar.
 */
public interface Grammar> {

	/**
	 * Returns the enum class that lists the nonterminal symbols of
	 * this grammar.
	 *
	 * @return the enum class that lists the nonterminal symbols of
	 * this grammar.
	 */
	public Class getNonterminalClass();

	/**
	 * Returns the nonterminals of this grammar sorted according to their
	 * dependencies on one another. A dependency in this sense is a production
	 * rule of the form A = B;
	 * If such a rule exists we say that A depends on B (because every entry
	 * of the dynamic programming table in B has to be pre-computed in order
	 * to compute the entry in A for the same indices).
	 * This sortic procedure will ensure that B comes before A in the list.
	 *
	 * @return the nonterminals of this grammar sorted according to their
	 * dependencies on one another.
	 */
	public N[] dependencySort();

	/**
	 * Returns the axiom of this grammar.
	 *
	 * @return the axiom of this grammar.
	 */
	public N getAxiom();

	/**
	 * Returns the set of all accepting nonterminals of this grammar.
	 *
	 * @return the set of all accepting nonterminals of this grammar.
	 */
	public EnumSet getAccepting();

	/**
	 * Returns all production rules of this grammar that can be applied
	 * in the current situation.
	 *
	 * Assume the simple example grammar from above:
	 *
	 * Delta = {
	 * ALI -> REPLACEMENT(left, ALI, right),
	 * ALI -> DELETION(left, ALI),
	 * ALI -> INSERTION(ALI, right)
	 * }
	 *
	 * If we call getPossibleRules(ALI, 0, 0) then no rules can be applied
	 * because there is no node available that could be replaced, deleted
	 * or inserted.
	 *
	 * If we call getPossibleRules(ALI, 1, 0) then the rule
	 * ALI -> DELETION(left, ALI)
	 * should be returned.
	 *
	 * Conversely getPossibleRules(ALI, 0, 1) should return the rule
	 * ALI -> INSERTION(ALI, right)
	 *
	 * For all higher input integers, that is leftSize >0 _and_ rightSize >0
	 * all three rules should be returned.
	 * 
	 * This method is called very often during alignment calculation. Therefore
	 * computational efficiency in the implementation here is key.
	 *
	 * @param nonterminal the nonterminal symbol on the left side of all
	 * returned production rules.
	 * @param leftSize the nodes available in the left input sequence.
	 * @param rightSize the nodes available in the right right sequence.
	 * @return all production rules of this grammar that can be applied
	 * in the current situation.
	 */
	public List> getPossibleRules(N nonterminal, int leftSize, int rightSize, int M, int N);

	/**
	 * This should return true if and only if this grammar contains
	 * at least one production rule with the OperationType DELETION
	 * or INSERTION.
	 *
	 * @return true if and only if this grammar contains
	 * at least one production rule with the OperationType DELETION
	 * or INSERTION.
	 */
	public boolean containsGaps();

	/**
	 * This should return true if and only if this grammar contains
	 * at least one production rule with the OperationType SKIPDELETION
	 * or SKIPINSERTION.
	 *
	 * @return true if and only if this grammar contains
	 * at least one production rule with the OperationType SKIPDELETION
	 * or SKIPINSERTION.
	 */
	public boolean containsSkips();
}