All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.search.suggest.analyzing.FSTUtil Maven / Gradle / Ivy

There is a newer version: 10.0.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.search.suggest.analyzing;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.util.IntsRefBuilder;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.Transition;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.Util;

// TODO: move to core?  nobody else uses it yet though...

/**
 * Exposes a utility method to enumerate all paths intersecting an {@link Automaton} with an {@link
 * FST}.
 */
public class FSTUtil {

  private FSTUtil() {}

  /** Holds a pair (automaton, fst) of states and accumulated output in the intersected machine. */
  public static final class Path {

    /** Node in the automaton where path ends: */
    public final int state;

    /** Node in the FST where path ends: */
    public final FST.Arc fstNode;

    /** Output of the path so far: */
    public final T output;

    /** Input of the path so far: */
    public final IntsRefBuilder input;

    /** Sole constructor. */
    public Path(int state, FST.Arc fstNode, T output, IntsRefBuilder input) {
      this.state = state;
      this.fstNode = fstNode;
      this.output = output;
      this.input = input;
    }
  }

  /**
   * Enumerates all minimal prefix paths in the automaton that also intersect the FST, accumulating
   * the FST end node and output for each path.
   */
  public static  List> intersectPrefixPaths(Automaton a, FST fst) throws IOException {
    assert a.isDeterministic();
    final List> queue = new ArrayList<>();
    final List> endNodes = new ArrayList<>();
    if (a.getNumStates() == 0) {
      return endNodes;
    }

    queue.add(
        new Path<>(
            0, fst.getFirstArc(new FST.Arc()), fst.outputs.getNoOutput(), new IntsRefBuilder()));

    final FST.Arc scratchArc = new FST.Arc<>();
    final FST.BytesReader fstReader = fst.getBytesReader();

    Transition t = new Transition();

    while (queue.size() != 0) {
      final Path path = queue.remove(queue.size() - 1);
      if (a.isAccept(path.state)) {
        endNodes.add(path);
        // we can stop here if we accept this path,
        // we accept all further paths too
        continue;
      }

      IntsRefBuilder currentInput = path.input;
      int count = a.initTransition(path.state, t);
      for (int i = 0; i < count; i++) {
        a.getNextTransition(t);
        final int min = t.min;
        final int max = t.max;
        if (min == max) {
          final FST.Arc nextArc = fst.findTargetArc(t.min, path.fstNode, scratchArc, fstReader);
          if (nextArc != null) {
            final IntsRefBuilder newInput = new IntsRefBuilder();
            newInput.copyInts(currentInput.get());
            newInput.append(t.min);
            queue.add(
                new Path<>(
                    t.dest,
                    new FST.Arc().copyFrom(nextArc),
                    fst.outputs.add(path.output, nextArc.output()),
                    newInput));
          }
        } else {
          // TODO: if this transition's TO state is accepting, and
          // it accepts the entire range possible in the FST (ie. 0 to 255),
          // we can simply use the prefix as the accepted state instead of
          // looking up all the ranges and terminate early
          // here.  This just shifts the work from one queue
          // (this one) to another (the completion search
          // done in AnalyzingSuggester).
          FST.Arc nextArc = Util.readCeilArc(min, fst, path.fstNode, scratchArc, fstReader);
          while (nextArc != null && nextArc.label() <= max) {
            assert nextArc.label() <= max;
            assert nextArc.label() >= min : nextArc.label() + " " + min;
            final IntsRefBuilder newInput = new IntsRefBuilder();
            newInput.copyInts(currentInput.get());
            newInput.append(nextArc.label());
            queue.add(
                new Path<>(
                    t.dest,
                    new FST.Arc().copyFrom(nextArc),
                    fst.outputs.add(path.output, nextArc.output()),
                    newInput));
            final int label = nextArc.label(); // used in assert
            nextArc = nextArc.isLast() ? null : fst.readNextRealArc(nextArc, fstReader);
            assert nextArc == null || label < nextArc.label()
                : "last: " + label + " next: " + nextArc.label();
          }
        }
      }
    }
    return endNodes;
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy