All Downloads are FREE. Search and download functionalities are using the official Maven repository.

de.citec.tcs.alignment.sequence.Sequence Maven / Gradle / Ivy

Go to download

This module contains the sequence datastructure of the TCS Alignment Toolbox. It defines the possible value sets in the ValueType enum as well as the different KeywordSpecification classes, namely: 1.) StringKeywordSpecification for string type values. 2.) SymbolicKeywordSpecification for values from a discrete alphabet (also refer to the Alphabet class) 3.) VectorialKeywordSpecification for vectors of some length (or for scalars) A NodeSpecification is a vector of such KeywordSpecifications and defines the order of value sets. A node, then, is defined as a vector of values from these value sets (also refer to the Value interface as well as the StringValue, SymbolicValue and VectorialValue classes). Finally a sequence is defined as a list of such nodes.

There is a newer version: 3.1.1
Show newest version
/* 
 * TCS Alignment Toolbox Version 3
 * 
 * Copyright (C) 2016
 * Benjamin Paaßen
 * AG Theoretical Computer Science
 * Centre of Excellence Cognitive Interaction Technology (CITEC)
 * University of Bielefeld
 * 
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as
 * published by the Free Software Foundation, either version 3 of the
 * License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 * 
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see .
 */

package de.citec.tcs.alignment.sequence;

import java.util.ArrayList;
import java.util.TreeSet;
import lombok.NonNull;

/**
 * Usually a sequence for alignments is defined as a sequence of symbols.
 * However we provide a more general definition here: We define a sequence of
 * nodes that aggregate key-value pairs.
 *
 * More formally: A sequence is build according to a NodeSpecification
 * that defines a set of permitted keywords K as well as for each keyword k in K
 * the allowed space of values V_k.
 *
 * A Node is defined as a mapping from keywords to concrete values n(k)
 * in V_k.
 *
 * The sequence than is defined as a sequence of such Nodes that are all valid
 * to the same NodeSpecification.
 *
 * @author Benjamin Paassen - [email protected]
 */
public class Sequence extends ArrayList {

	public static final String DEFAULTKEYWORD = "content";

	private final NodeSpecification nodeSpecification;

	/**
	 * Sets up an empty sequence with the given NodeSpecification as basis.
	 *
	 * @param nodeSpecficiation The NodeSpecificaton that is the basis for this
	 * Sequence.
	 */
	public Sequence(@NonNull NodeSpecification nodeSpecficiation) {
		this.nodeSpecification = nodeSpecficiation;
	}

	/**
	 * Sets up a trivial sequence with only one keyword (DEFAULTKEYWORD) and treats
	 * the string content between | delimiters in the input string as symbolic
	 * values. If no delimiters are there the characters of the input string are
	 * treated as symbolic values.
	 *
	 * @param sequenceString an input string.
	 */
	public Sequence(@NonNull String sequenceString) {
		if (!sequenceString.contains("|")) {
			//if we have no delimiters, use the character representation to build up the alphabet.
			final TreeSet charSet = new TreeSet<>();
			final char[] chars = sequenceString.toCharArray();
			for (int i = 0; i < chars.length; i++) {
				charSet.add(new String(new char[]{chars[i]}));
			}
			final Alphabet alphabet = new Alphabet(charSet.toArray(new String[0]));
			//then set up the node specification.
			final SymbolicKeywordSpecification spec = new SymbolicKeywordSpecification(
					alphabet, DEFAULTKEYWORD);
			this.nodeSpecification = new NodeSpecification(new KeywordSpecification[]{spec});
			//then set up the sequence itself.
			for (final char c : chars) {
				final Node node = new Node(this);
				node.setValue(DEFAULTKEYWORD, new SymbolicValue(alphabet, new String(new char[]{c})));
				this.add(node);
			}
		} else {
			final String[] split = sequenceString.split("\\|");
			//set up alphabet
			final TreeSet symbolSet = new TreeSet<>();
			for (int i = 0; i < split.length; i++) {
				if (!split[i].isEmpty() || (i > 0 && i < split.length - 1)) {
					symbolSet.add(split[i]);
				}
			}
			final Alphabet alphabet = new Alphabet(symbolSet.toArray(new String[0]));
			//then set up the node specification.
			final SymbolicKeywordSpecification spec = new SymbolicKeywordSpecification(
					alphabet, DEFAULTKEYWORD);
			this.nodeSpecification = new NodeSpecification(new KeywordSpecification[]{spec});
			//then set up the sequence itself.
			for (int i = 0; i < split.length; i++) {
				if (!split[i].isEmpty() || (i > 0 && i < split.length - 1)) {
					final Node node = new Node(this);
					node.setValue(DEFAULTKEYWORD, new SymbolicValue(alphabet, split[i]));
					this.add(node);
				}
			}
		}
	}

	/**
	 * Sets up a trivial sequence with only one keyword (DEFAULTKEYWORD) and treats
	 * the string content between | delimiters in the input string as symbolic
	 * values. If no delimiters are there the characters of the input string are
	 * treated as symbolic values.
	 *
	 * @param sequenceString an input string.
	 * @param alphabet the alphabet that is the basis for the given input string
	 * and contains all symbols that are used in the input sequence.
	 */
	public Sequence(@NonNull String sequenceString, @NonNull Alphabet alphabet) {
		if (!sequenceString.contains("|")) {
			//then set up the node specification.
			final SymbolicKeywordSpecification spec
					= new SymbolicKeywordSpecification(alphabet, DEFAULTKEYWORD);
			this.nodeSpecification = new NodeSpecification(new KeywordSpecification[]{spec});
			//then set up the sequence itself.
			for (final char c : sequenceString.toCharArray()) {
				final Node node = new Node(this);
				node.setValue(DEFAULTKEYWORD, new SymbolicValue(alphabet, new String(new char[]{c})));
				this.add(node);
			}
		} else {
			final String[] split = sequenceString.split("\\|");
			//then set up the node specification.
			final SymbolicKeywordSpecification spec
					= new SymbolicKeywordSpecification(alphabet, DEFAULTKEYWORD);
			this.nodeSpecification = new NodeSpecification(new KeywordSpecification[]{spec});
			//then set up the sequence itself.
			for (int i = 0; i < split.length; i++) {
				if (!split[i].isEmpty() || (i > 0 && i < split.length - 1)) {
					final Node node = new Node(this);
					node.setValue(DEFAULTKEYWORD, new SymbolicValue(alphabet, split[i]));
					this.add(node);
				}
			}
		}
	}

	/**
	 * Returns the NodeSpecification all Nodes in this sequence belong to.
	 *
	 * @return the NodeSpecification all Nodes in this sequence belong to.
	 */
	public NodeSpecification getNodeSpecification() {
		return nodeSpecification;
	}

	@Override
	public String toString() {
		final StringBuilder builder = new StringBuilder();
		final boolean labeling = nodeSpecification.size() > 1;
		for (final Node node : this) {
			//append all values for each node and label them if necessary
			for (int k = 0; k < nodeSpecification.size(); k++) {
				if (labeling) {
					builder.append(nodeSpecification.getKeyword(k));
					builder.append("#");
				}
				final Value value = node.getValue(k);
				if (value != null) {
					builder.append(value.toString());
				}
				if (labeling) {
					builder.append(";");
				}
			}
			if (labeling) {
				builder.delete(builder.length() - 1, builder.length());
			}
			builder.append("|");
		}
		if (builder.length() > 0) {
			builder.delete(builder.length() - 1, builder.length());
		}
		return builder.toString();
	}

	@Override
	public int hashCode() {
		int hash = 7;
		hash = 79 * hash + (this.nodeSpecification != null ? this.nodeSpecification.hashCode() : 0);
		hash = 79 * hash + super.hashCode();
		return hash;
	}

	@Override
	public boolean equals(Object obj) {
		if (obj == null) {
			return false;
		}
		if (getClass() != obj.getClass()) {
			return false;
		}
		final Sequence other = (Sequence) obj;
		if (this.nodeSpecification != other.nodeSpecification
				&& (this.nodeSpecification == null
				|| !this.nodeSpecification.equals(other.nodeSpecification))) {
			return false;
		}
		if (!super.equals(other)) {
			return false;
		}
		return true;
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy