eu.interedition.collatex.medite.SuffixTree Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of collatex-core Show documentation
Show all versions of collatex-core Show documentation
A Java library for collating textual sources, for example, to produce an apparatus.
/*
* Copyright (c) 2015 The Interedition Development Group.
*
* This file is part of CollateX.
*
* CollateX is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* CollateX is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with CollateX. If not, see .
*/
package eu.interedition.collatex.medite;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.Deque;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Optional;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
/**
* @author Gregor Middell
*/
class SuffixTree {
final Comparator comparator;
final Comparator sourceComparator;
final T[] source;
final Node root;
@SafeVarargs
static SuffixTree build(Comparator comparator, T... source) {
return new SuffixTree<>(comparator, source).build();
}
@SafeVarargs
private SuffixTree(Comparator comparator, T... source) {
this.comparator = comparator;
this.sourceComparator = new SentinelAwareComparator(comparator);
this.source = source;
this.root = new Node();
}
public Cursor cursor() {
return new Cursor();
}
public Iterable match(final Iterable str) {
return () -> new Iterator() {
final Iterator it = str.iterator();
Optional cursor = Optional.ofNullable(it.hasNext() ? cursor().move(it.next()) : null);
@Override
public boolean hasNext() {
return cursor.isPresent();
}
@Override
public EquivalenceClass next() {
final EquivalenceClass next = cursor.get().matchedClass();
cursor = Optional.ofNullable(it.hasNext() ? cursor.get().move(it.next()) : null);
return next;
}
};
}
private SuffixTree build() {
for (int suffixStart = 0; suffixStart <= source.length; suffixStart++) {
root.addSuffix(suffixStart);
}
compactNodes(root);
return this;
}
private void compactNodes(Node node) {
for (Node child : node.children) {
while (child.children.size() == 1) {
final Node firstGrandChild = child.children.iterator().next();
child.incomingLabel.add(firstGrandChild.incomingLabel.getFirst());
child.children = firstGrandChild.children;
for (Node formerGrandchild : child.children) {
formerGrandchild.parent = child;
}
}
compactNodes(child);
}
}
@Override
public String toString() {
final StringBuilder sb = new StringBuilder();
final Deque nodes = new ArrayDeque<>(Collections.singleton(root));
while (!nodes.isEmpty()) {
final Node node = nodes.remove();
sb.append(IntStream.range(0, node.depth()).mapToObj(i -> "\t").collect(Collectors.joining())).append(node).append("\n");
node.children.forEach(nodes::addFirst);
}
return sb.toString();
}
/**
* @author Gregor Middell
*/
class Node {
final LinkedList incomingLabel;
Node parent;
List children = new ArrayList<>();
public Node(Node parent, int firstIndex) {
this.parent = parent;
this.incomingLabel = new LinkedList<>(Collections.singleton(new EquivalenceClass(firstIndex)));
}
public Node() {
this.parent = null;
this.incomingLabel = null;
}
public int depth() {
int depth = 0;
for (Node parent = this.parent; parent != null; parent = parent.parent) {
depth++;
}
return depth;
}
public void addSuffix(int start) {
addSuffix(this, start);
}
private Node addSuffix(Node node, int start) {
for (Node child : node.children) {
EquivalenceClass childClass = child.incomingLabel.getFirst();
if (childClass.isMember(start)) {
childClass.add(start);
start++;
if (start == (source.length + 1)) {
return child;
}
return addSuffix(child, start);
}
}
while (start <= source.length) {
Node child = new Node(node, start);
node.children.add(child);
node = child;
start++;
}
return node;
}
@Override
public String toString() {
return Optional.ofNullable(incomingLabel).map(label -> label.stream().map(Object::toString).collect(Collectors.joining(", "))).orElse("");
}
}
class EquivalenceClass implements Comparable {
int[] members = new int[2];
int length = 0;
EquivalenceClass(int first) {
members[length++] = first;
}
void add(int member) {
if (length == members.length) {
members = Arrays.copyOf(members, members.length * 2);
}
members[length++] = member;
}
boolean isMember(int index) {
return sourceComparator.compare(index, members[0]) == 0;
}
public boolean isMember(T symbol) {
return (members[0] != source.length && comparator.compare(symbol, source[members[0]]) == 0);
}
@Override
public boolean equals(Object obj) {
if (obj != null && obj instanceof SuffixTree>.EquivalenceClass) {
return members[0] == ((SuffixTree>.EquivalenceClass) obj).members[0];
}
return super.equals(obj);
}
@Override
public int hashCode() {
return members[0];
}
@Override
public int compareTo(EquivalenceClass o) {
return (members[0] - o.members[0]);
}
@Override
public String toString() {
return String.format("{%s}", Arrays.stream(members, 0, length)
.mapToObj(member -> "<[" + member + "] " + (member == source.length ? "$" : source[member].toString()) + ">")
.collect(Collectors.joining(", ")));
}
}
class SentinelAwareComparator implements Comparator {
final Comparator comparator;
SentinelAwareComparator(Comparator comparator) {
this.comparator = comparator;
}
@Override
public int compare(Integer o1, Integer o2) {
if (o1 == source.length || o2 == source.length) {
return (o2 - o1);
}
return comparator.compare(source[o1], source[o2]);
}
}
public class Cursor {
final Node node;
final int offset;
Cursor() {
this(root, 0);
}
Cursor(Node node, int offset) {
this.node = node;
this.offset = offset;
}
public Cursor move(T symbol) {
if (node.incomingLabel == null || (offset + 1) == node.incomingLabel.size()) {
for (Node child : node.children) {
final Cursor next = new Cursor(child, 0);
if (next.matchedClass().isMember(symbol)) {
return next;
}
}
return null;
}
return (node.incomingLabel.get(offset + 1).isMember(symbol) ? new Cursor(node, offset + 1) : null);
}
EquivalenceClass matchedClass() {
return node.incomingLabel.get(offset);
}
}
}