All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.automatalib.words.Word Maven / Gradle / Ivy

Go to download

This artifact contains the API of AutomataLib, which mainly consists of interfaces for the various concepts and automaton models supported by the AutomataLib core. In addition to that, it also defines some fundamental classes for dealing with words of symbols.

There is a newer version: 0.11.0
Show newest version
/* Copyright (C) 2013 TU Dortmund
 * This file is part of AutomataLib, http://www.automatalib.net/.
 * 
 * AutomataLib is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License version 3.0 as published by the Free Software Foundation.
 * 
 * AutomataLib is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 * 
 * You should have received a copy of the GNU Lesser General Public
 * License along with AutomataLib; if not, see
 * http://www.gnu.de/documents/lgpl.en.html.
 */
package net.automatalib.words;

import java.io.IOException;
import java.util.AbstractList;
import java.util.Collections;
import java.util.List;
import java.util.Objects;

import net.automatalib.commons.util.array.ArrayWritable;
import net.automatalib.commons.util.strings.AbstractPrintable;
import net.automatalib.commons.util.strings.StringUtil;

/**
 * A word is an ordered sequence of symbols. {@link Word}s are generally immutable,
 * i.e., a single {@link Word} object will never change (unless symbol objects are modified,
 * which is however highly discouraged).
 * 

* This class provides the following static methods for creating words in the most common scenarios: *

    *
  • {@link #epsilon()} returns the empty word of length 0 *
  • {@link #fromLetter(Object)} turns a single letter into a word of length 1 *
  • {@link #fromSymbols(Object...)} creates a word from an array of symbols *
  • {@link #fromArray(Object[], int, int)} creates a word from a subrange of a symbols array *
  • {@link #fromList(List)} creates a word from a {@link List} of symbols *
*

* Modification operations like {@link #append(Object)} or {@link #concat(Word...)} create * new objects, subsequently invoking these operations on the respective objects returned is * therefore highly inefficient. If words need to be dynamically created, a {@link WordBuilder} * should be used. *

* This is an abstract base class for word representations. Implementing classes only need to implement *

    *
  • {@link #getSymbol(int)} *
  • {@link #length()} *
*

* However, for the sake of efficiency it is highly encouraged to overwrite the other methods * as well, providing specialized realizations. * * @param symbol class * * @author Malte Isberner */ public abstract class Word extends AbstractPrintable implements ArrayWritable, Iterable { /** * Retrieves the empty word. * @return the empty word. * @see Collections#emptyList() */ @SuppressWarnings("unchecked") public static Word epsilon() { return (Word)(Word)EmptyWord.INSTANCE; } /** * Constructs a word from a single letter. * @param letter the letter * @return a word consisting of only this letter */ public static Word fromLetter(I letter) { return new LetterWord<>(letter); } /** * Creates a word from an array of symbols. * @param symbols the symbol array * @return a word containing the symbols in the specified array */ @SafeVarargs public static Word fromSymbols(I ...symbols) { if(symbols.length == 0) return epsilon(); if(symbols.length == 1) return fromLetter(symbols[0]); Object[] array = new Object[symbols.length]; System.arraycopy(symbols, 0, array, 0, symbols.length); return new SharedWord<>(symbols); } /** * Creates a word from a subrange of an array of symbols. Note that to * ensure immutability, internally a copy of the array is made. * @param symbols the symbols array * @param offset the starting index in the array * @param length the length of the resulting word (from the starting index on) * @return the word consisting of the symbols in the range */ public static Word fromArray(I[] symbols, int offset, int length) { if(length == 0) return epsilon(); if(length == 1) return fromLetter(symbols[offset]); Object[] array = new Object[length]; System.arraycopy(symbols, offset, array, 0, length); return new SharedWord<>(symbols); } /** * Creates a word from a list of symbols * @param symbolList the list of symbols * @return the resulting word */ public static Word fromList(List symbolList) { int siz = symbolList.size(); if(siz == 0) return epsilon(); if(siz == 1) return Word.fromLetter(symbolList.get(0)); return new SharedWord<>(symbolList); } public static Word fromString(String str) { int len = str.length(); Character[] chars = new Character[str.length()]; for(int i = 0; i < len; i++) chars[i] = Character.valueOf(str.charAt(i)); return new SharedWord<>(chars); } /* * General word iterator */ private class Iterator implements java.util.Iterator { private int index = 0; /* * (non-Javadoc) * @see java.util.Iterator#hasNext() */ @Override public boolean hasNext() { return (index < Word.this.length()); } /* * (non-Javadoc) * @see java.util.Iterator#next() */ @Override public I next() { return Word.this.getSymbol(index++); } /* * (non-Javadoc) * @see java.util.Iterator#remove() */ @Override public void remove() { throw new UnsupportedOperationException(); } } /* * Representing a word as a list. */ private class AsList extends AbstractList { /* * (non-Javadoc) * @see java.util.AbstractList#get(int) */ @Override public I get(int index) { return Word.this.getSymbol(index); } /* * (non-Javadoc) * @see java.util.AbstractCollection#size() */ @Override public int size() { return Word.this.length(); } /* * (non-Javadoc) * @see java.util.AbstractList#iterator() */ @Override public java.util.Iterator iterator() { return Word.this.iterator(); } } /** * Return symbol that is at the specified position * @param i the position * @return symbol at position i, null if no such symbol exists */ public abstract I getSymbol(int index); /** * Retrieves the length of this word. * @return the length of this word. */ public abstract int length(); /* * (non-Javadoc) * @see java.lang.Object#hashCode() */ @Override public int hashCode() { int hash = 5; for(I sym : this) { hash *= 89; hash += (sym != null) ? sym.hashCode() : 0; } return hash; } /* * (non-Javadoc) * @see java.lang.Object#equals(java.lang.Object) */ @Override public boolean equals(Object other) { if(this == other) return true; if(other == null) return false; if(!(other instanceof Word)) return false; Word otherWord = (Word)other; int len = otherWord.length(); if(len != length()) return false; java.util.Iterator thisIt = iterator(); java.util.Iterator otherIt = otherWord.iterator(); while(thisIt.hasNext()) { I thisSym = thisIt.next(); Object otherSym = otherIt.next(); if(!Objects.equals(thisSym, otherSym)) return false; } return true; } /* * (non-Javadoc) * @see net.automatalib.commons.util.strings.Printable#print(java.lang.Appendable) */ @Override public void print(Appendable a) throws IOException { StringUtil.appendIterable(a, this, " "); } /* * (non-Javadoc) * @see net.automatalib.commons.util.array.ArrayWritable#size() */ @Override public final int size() { return length(); } /** * Retrieves a word representing the specified subrange of this word. * As words are immutable, this function usually can be realized quite efficient * (implementing classes should take care of this). * * @param fromIndex the first index, inclusive. * @param toIndex the last index, exclusive. * @return the word representing the specified subrange. */ public final Word subWord(int fromIndex, int toIndex) { if(fromIndex < 0 || toIndex < fromIndex || toIndex > length()) throw new IndexOutOfBoundsException("Invalid subword range [" + fromIndex + ", " + toIndex + ")"); return _subWord(fromIndex, toIndex); } /** * Retrieves the subword of this word starting at the given index and extending * until the end of this word. Calling this method is equivalent to calling *

w.subWord(fromIndex, w.length())
* @param fromIndex the first index, inclusive * @return the word representing the specified subrange */ public final Word subWord(int fromIndex) { if(fromIndex <= 0) { if(fromIndex == 0) return this; throw new IndexOutOfBoundsException("Invalid subword range [" + fromIndex + ",)"); } return _subWord(fromIndex, length()); } /** * Internal subword operation implementation. In contrast to {@link #subWord(int, int)}, * no range checks need to be performed. As this method is flagged as protected, * implementations may rely on the specified indices being valid. * @param fromIndex the first index, inclusive (guaranteed to be valid) * @param toIndex the last index, exclusive (guaranteed to be valid) * @return the word representing the specified subrange */ protected Word _subWord(int fromIndex, int toIndex) { int len = toIndex - fromIndex; Object[] array = new Object[len]; writeToArray(fromIndex, array, 0, len); return new SharedWord<>(array); } /* * (non-Javadoc) * @see java.lang.Iterable#iterator() */ @Override public java.util.Iterator iterator() { return new Iterator(); } /* * (non-Javadoc) * @see net.automatalib.commons.util.array.ArrayWritable#writeToArray(int, java.lang.Object[], int, int) */ @Override public void writeToArray(int offset, Object[] array, int tgtOffset, int length) { int idx = offset, arrayIdx = tgtOffset; while(length-- > 0) array[arrayIdx++] = getSymbol(idx++); } /** * Retrieves a {@link List} view on the contents of this word. * @return an unmodifiable list of the contained symbols. */ public List asList() { return new AsList(); } /** * Retrieves a prefix of the given length. If length * is negative, then a prefix consisting of all but the last * -length symbols is returned. * * @param prefixLen the length of the prefix (may be negative, see above). * @return the prefix of the given length. */ public final Word prefix(int prefixLen) { if(prefixLen < 0) prefixLen = length() + prefixLen; return subWord(0, prefixLen); } /** * Retrieves a suffix of the given length. If length is * negative, then a suffix consisting of all but the first * -length symbols is returned. * * @param suffixLen the length of the suffix (may be negative, see above). * @return the suffix of the given length. */ public final Word suffix(int suffixLen) { int wordLen = length(); int startIdx = (suffixLen < 0) ? -suffixLen : (wordLen - suffixLen); return subWord(startIdx, wordLen); } /** * Retrieves the list of all prefixes of this word. In the default implementation, * the prefixes are lazily instantiated upon the respective calls of {@link List#get(int)} * or {@link Iterator#next()}. * @param longestFirst whether to start with the longest prefix (otherwise, the first prefix * in the list will be the shortest). * @return a (non-materialized) list containing all prefixes */ public List> prefixes(boolean longestFirst) { return new SubwordList<>(this, true, longestFirst); } /** * Retrieves the list of all suffixes of this word. In the default implementation, * the suffixes are lazily instantiated upon the respective calls of {@link List#get(int)} * or {@link Iterator#next()}. * @param longestFirst whether to start with the longest suffix (otherwise, the first suffix * in the list will be the shortest). * @return a (non-materialized) list containing all suffix */ public List> suffixes(boolean longestFirst) { return new SubwordList<>(this, false, longestFirst); } /** * Retrieves the next word after this in canonical order. Figuratively * speaking, if there are k alphabet symbols, one can think of * a word of length n as an n-digit radix-k * representation of the number. The next word in canonical order * is the representation for the number represented by this word plus one. * @param sigma the alphabet * @return the next word in canonical order */ public Word canonicalNext(Alphabet sigma) { int len = length(); Object[] symbols = new Object[len]; writeToArray(0, symbols, 0, len); int alphabetSize = sigma.size(); int i = 0; boolean overflow = true; for(I sym : this) { int nextIdx = (sigma.getSymbolIndex(sym) + 1) % alphabetSize; symbols[i++] = sigma.getSymbol(nextIdx); if(nextIdx != 0) { overflow = false; break; } } while(i < len) { symbols[i] = getSymbol(i); i++; } if(overflow) { Object[] newSymbols = new Object[len+1]; newSymbols[0] = sigma.getSymbol(0); System.arraycopy(symbols, 0, newSymbols, 1, len); symbols = newSymbols; } return new SharedWord<>(symbols); } /** * Retrieves the last symbol of this word. * @return the last symbol of this word. */ public I lastSymbol() { return getSymbol(length() - 1); } /** * Retrieves the first symbol of this word. * @return the first symbol of this word */ public I firstSymbol() { return getSymbol(0); } /** * Appends a symbol to this word and returns the result as a new word. * @param symbol the symbol to append * @return the word plus the given symbol */ public Word append(I symbol) { int len = length(); Object[] array = new Object[len + 1]; writeToArray(0, array, 0, len); array[len] = symbol; return new SharedWord<>(array); } /** * Prepends a symbol to this word and returns the result as a new word. * @param symbol the symbol to prepend * @return the given symbol plus to word. */ public Word prepend(I symbol) { int len = length(); Object[] array = new Object[len+1]; array[0] = symbol; writeToArray(0, array, 1, len); return new SharedWord<>(array); } /** * Concatenates this word with several other words and returns the result * as a new word. * * Note that this method cannot be overridden. Implementing classes need to * override the {@link #_concat(Word...)} method instead. * * @param words the words to concatenate with this word * @return the result of the concatenation * @see #_concat(Word...) */ @SafeVarargs public final Word concat(Word ...words) { return _concat(words); } /** * Realizes the concatenation of this word with several other words. * @param words the words to concatenate * @return the results of the concatenation */ @SuppressWarnings("unchecked") protected Word _concat(Word ...words) { if(words.length == 0) return this; int len = length(); int totalSize = len; for(int i = 0; i < words.length; i++) totalSize += words[i].length(); Object[] array = new Object[totalSize]; writeToArray(0, array, 0, len); int currOfs = len; for(int i = 0; i < words.length; i++) { Word w = words[i]; int wLen = w.length(); w.writeToArray(0, array, currOfs, wLen); currOfs += wLen; } return new SharedWord<>(array); } /** * Checks if this word is a prefix of another word. * @param other the other word * @return true if this word is a prefix of the other word, false * otherwise. */ public boolean isPrefixOf(Word other) { int len = length(), otherLen = other.length(); if(otherLen < len) return false; for(int i = 0; i < len; i++) { I sym1 = getSymbol(i), sym2 = other.getSymbol(i); if(!Objects.equals(sym1, sym2)) return false; } return true; } /** * Determines the longest common prefix of this word and another * word. * @param other the other word * @return the longest common prefix of this word and the other word */ public Word longestCommonPrefix(Word other) { int len = length(), otherLen = other.length(); int maxIdx = (len < otherLen) ? len : otherLen; int i = 0; while(i < maxIdx) { I sym1 = getSymbol(i), sym2 = getSymbol(i); if(!Objects.equals(sym1, sym2)) break; i++; } return prefix(i); } /** * Checks if this word is a suffix of another word * @param other the other word * @return true if this word is a suffix of the other word, false * otherwise. */ public boolean isSuffixOf(Word other) { int len = length(), otherLen = other.length(); if(otherLen < len) return false; int ofs = otherLen - len; for(int i = 0; i < len; i++) { I sym1 = getSymbol(i), sym2 = other.getSymbol(ofs + i); if(!Objects.equals(sym1, sym2)) return false; } return true; } /** * Determines the longest common suffix of this word and another word. * @param other the other word * @return the longest common suffix */ public Word longestCommonSuffix(Word other) { int len = length(), otherLen = other.length(); int minLen = (len < otherLen) ? len : otherLen; int idx1 = len, idx2 = otherLen; int i = 0; while(i < minLen) { I sym1 = getSymbol(--idx1), sym2 = other.getSymbol(--idx2); if(!Objects.equals(sym1, sym2)) break; i++; } return suffix(i); } /** * Retrieves a "flattened" version of this word, i.e., without any hierarchical structure * attached. * This can be helpful if {@link Word} is subclassed to allow representing, e.g., a concatenation * dynamically, but due to performance concerns not too many levels of indirection * should be introduced. * @return a flattened version of this word. */ public Word flatten() { int len = length(); Object[] array = new Object[len]; writeToArray(0, array, 0, len); return new SharedWord<>(array); } public Word trimmed() { int len = length(); Object[] array = new Object[len]; writeToArray(0, array, 0, len); return new SharedWord<>(array); } /** * Checks if this word is empty, i.e., contains no symbols. * @return true if this word is empty, false otherwise. */ public boolean isEmpty() { return (length() == 0); } }