All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.search.suggest.analyzing.FSTUtil Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.search.suggest.analyzing;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.lucene.util.IntsRefBuilder;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.Transition;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.Util;

// TODO: move to core?  nobody else uses it yet though...

/**
 * Exposes a utility method to enumerate all paths
 * intersecting an {@link Automaton} with an {@link FST}.
 */
public class FSTUtil {

  private FSTUtil() {
  }

  /** Holds a pair (automaton, fst) of states and accumulated output in the intersected machine. */
  public static final class Path {

    /** Node in the automaton where path ends: */
    public final int state;

    /** Node in the FST where path ends: */
    public final FST.Arc fstNode;

    /** Output of the path so far: */
    public final T output;

    /** Input of the path so far: */
    public final IntsRefBuilder input;

    /** Sole constructor. */
    public Path(int state, FST.Arc fstNode, T output, IntsRefBuilder input) {
      this.state = state;
      this.fstNode = fstNode;
      this.output = output;
      this.input = input;
    }
  }

  /**
   * Enumerates all minimal prefix paths in the automaton that also intersect the FST,
   * accumulating the FST end node and output for each path.
   */
  public static  List> intersectPrefixPaths(Automaton a, FST fst)
      throws IOException {
    assert a.isDeterministic();
    final List> queue = new ArrayList<>();
    final List> endNodes = new ArrayList<>();
    if (a.getNumStates() == 0) {
      return endNodes;
    }

    queue.add(new Path<>(0, fst
        .getFirstArc(new FST.Arc()), fst.outputs.getNoOutput(),
        new IntsRefBuilder()));
    
    final FST.Arc scratchArc = new FST.Arc<>();
    final FST.BytesReader fstReader = fst.getBytesReader();

    Transition t = new Transition();

    while (queue.size() != 0) {
      final Path path = queue.remove(queue.size() - 1);
      if (a.isAccept(path.state)) {
        endNodes.add(path);
        // we can stop here if we accept this path,
        // we accept all further paths too
        continue;
      }
      
      IntsRefBuilder currentInput = path.input;
      int count = a.initTransition(path.state, t);
      for (int i=0;i nextArc = fst.findTargetArc(t.min,
              path.fstNode, scratchArc, fstReader);
          if (nextArc != null) {
            final IntsRefBuilder newInput = new IntsRefBuilder();
            newInput.copyInts(currentInput.get());
            newInput.append(t.min);
            queue.add(new Path<>(t.dest, new FST.Arc()
                .copyFrom(nextArc), fst.outputs
                .add(path.output, nextArc.output()), newInput));
          }
        } else {
          // TODO: if this transition's TO state is accepting, and
          // it accepts the entire range possible in the FST (ie. 0 to 255),
          // we can simply use the prefix as the accepted state instead of
          // looking up all the ranges and terminate early
          // here.  This just shifts the work from one queue
          // (this one) to another (the completion search
          // done in AnalyzingSuggester).
          FST.Arc nextArc = Util.readCeilArc(min, fst, path.fstNode,
              scratchArc, fstReader);
          while (nextArc != null && nextArc.label() <= max) {
            assert nextArc.label() <=  max;
            assert nextArc.label() >= min : nextArc.label() + " "
                + min;
            final IntsRefBuilder newInput = new IntsRefBuilder();
            newInput.copyInts(currentInput.get());
            newInput.append(nextArc.label());
            queue.add(new Path<>(t.dest, new FST.Arc()
                .copyFrom(nextArc), fst.outputs
                .add(path.output, nextArc.output()), newInput));
            final int label = nextArc.label(); // used in assert
            nextArc = nextArc.isLast() ? null : fst.readNextRealArc(nextArc,
                fstReader);
            assert nextArc == null || label < nextArc.label() : "last: " + label
                + " next: " + nextArc.label();
          }
        }
      }
    }
    return endNodes;
  }
  
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy