org.apache.lucene.search.suggest.analyzing.FSTUtil Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of lucene-suggest Show documentation
Show all versions of lucene-suggest Show documentation
Apache Lucene (module: suggest)
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search.suggest.analyzing;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.util.IntsRefBuilder;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.Transition;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.Util;
// TODO: move to core? nobody else uses it yet though...
/**
* Exposes a utility method to enumerate all paths intersecting an {@link Automaton} with an {@link
* FST}.
*/
public class FSTUtil {
private FSTUtil() {}
/** Holds a pair (automaton, fst) of states and accumulated output in the intersected machine. */
public static final class Path {
/** Node in the automaton where path ends: */
public final int state;
/** Node in the FST where path ends: */
public final FST.Arc fstNode;
/** Output of the path so far: */
public final T output;
/** Input of the path so far: */
public final IntsRefBuilder input;
/** Sole constructor. */
public Path(int state, FST.Arc fstNode, T output, IntsRefBuilder input) {
this.state = state;
this.fstNode = fstNode;
this.output = output;
this.input = input;
}
}
/**
* Enumerates all minimal prefix paths in the automaton that also intersect the FST, accumulating
* the FST end node and output for each path.
*/
public static List> intersectPrefixPaths(Automaton a, FST fst) throws IOException {
assert a.isDeterministic();
final List> queue = new ArrayList<>();
final List> endNodes = new ArrayList<>();
if (a.getNumStates() == 0) {
return endNodes;
}
queue.add(
new Path<>(
0, fst.getFirstArc(new FST.Arc()), fst.outputs.getNoOutput(), new IntsRefBuilder()));
final FST.Arc scratchArc = new FST.Arc<>();
final FST.BytesReader fstReader = fst.getBytesReader();
Transition t = new Transition();
while (queue.size() != 0) {
final Path path = queue.remove(queue.size() - 1);
if (a.isAccept(path.state)) {
endNodes.add(path);
// we can stop here if we accept this path,
// we accept all further paths too
continue;
}
IntsRefBuilder currentInput = path.input;
int count = a.initTransition(path.state, t);
for (int i = 0; i < count; i++) {
a.getNextTransition(t);
final int min = t.min;
final int max = t.max;
if (min == max) {
final FST.Arc nextArc = fst.findTargetArc(t.min, path.fstNode, scratchArc, fstReader);
if (nextArc != null) {
final IntsRefBuilder newInput = new IntsRefBuilder();
newInput.copyInts(currentInput.get());
newInput.append(t.min);
queue.add(
new Path<>(
t.dest,
new FST.Arc().copyFrom(nextArc),
fst.outputs.add(path.output, nextArc.output()),
newInput));
}
} else {
// TODO: if this transition's TO state is accepting, and
// it accepts the entire range possible in the FST (ie. 0 to 255),
// we can simply use the prefix as the accepted state instead of
// looking up all the ranges and terminate early
// here. This just shifts the work from one queue
// (this one) to another (the completion search
// done in AnalyzingSuggester).
FST.Arc nextArc = Util.readCeilArc(min, fst, path.fstNode, scratchArc, fstReader);
while (nextArc != null && nextArc.label() <= max) {
assert nextArc.label() <= max;
assert nextArc.label() >= min : nextArc.label() + " " + min;
final IntsRefBuilder newInput = new IntsRefBuilder();
newInput.copyInts(currentInput.get());
newInput.append(nextArc.label());
queue.add(
new Path<>(
t.dest,
new FST.Arc().copyFrom(nextArc),
fst.outputs.add(path.output, nextArc.output()),
newInput));
final int label = nextArc.label(); // used in assert
nextArc = nextArc.isLast() ? null : fst.readNextRealArc(nextArc, fstReader);
assert nextArc == null || label < nextArc.label()
: "last: " + label + " next: " + nextArc.label();
}
}
}
}
return endNodes;
}
}