All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.util.fst.package-info Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/**
 * Finite state transducers
 * 

* This package implements * Finite State Transducers with the following characteristics: *

    *
  • Fast and low memory overhead construction of the minimal FST * (but inputs must be provided in sorted order)
  • *
  • Low object overhead and quick deserialization (byte[] representation)
  • *
  • Optional two-pass compression: {@link org.apache.lucene.util.fst.FST#pack FST.pack()}
  • *
  • {@link org.apache.lucene.util.fst.Util#getByOutput Lookup-by-output} when the * outputs are in sorted order (e.g., ordinals or file pointers)
  • *
  • Pluggable {@link org.apache.lucene.util.fst.Outputs Outputs} representation
  • *
  • {@link org.apache.lucene.util.fst.Util#shortestPaths N-shortest-paths} search by * weight
  • *
  • Enumerators ({@link org.apache.lucene.util.fst.IntsRefFSTEnum IntsRef} and {@link org.apache.lucene.util.fst.BytesRefFSTEnum BytesRef}) that behave like {@link java.util.SortedMap SortedMap} iterators *
*

* FST Construction example: *

 *     // Input values (keys). These must be provided to Builder in Unicode sorted order!
 *     String inputValues[] = {"cat", "dog", "dogs"};
 *     long outputValues[] = {5, 7, 12};
 *     
 *     PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
 *     Builder<Long> builder = new Builder<Long>(INPUT_TYPE.BYTE1, outputs);
 *     BytesRef scratchBytes = new BytesRef();
 *     IntsRefBuilder scratchInts = new IntsRefBuilder();
 *     for (int i = 0; i < inputValues.length; i++) {
 *       scratchBytes.copyChars(inputValues[i]);
 *       builder.add(Util.toIntsRef(scratchBytes, scratchInts), outputValues[i]);
 *     }
 *     FST<Long> fst = builder.finish();
 * 
* Retrieval by key: *
 *     Long value = Util.get(fst, new BytesRef("dog"));
 *     System.out.println(value); // 7
 * 
* Retrieval by value: *
 *     // Only works because outputs are also in sorted order
 *     IntsRef key = Util.getByOutput(fst, 12);
 *     System.out.println(Util.toBytesRef(key, scratchBytes).utf8ToString()); // dogs
 * 
* Iterate over key-value pairs in sorted order: *
 *     // Like TermsEnum, this also supports seeking (advance)
 *     BytesRefFSTEnum<Long> iterator = new BytesRefFSTEnum<Long>(fst);
 *     while (iterator.next() != null) {
 *       InputOutput<Long> mapEntry = iterator.current();
 *       System.out.println(mapEntry.input.utf8ToString());
 *       System.out.println(mapEntry.output);
 *     }
 * 
* N-shortest paths by weight: *
 *     Comparator<Long> comparator = new Comparator<Long>() {
 *       public int compare(Long left, Long right) {
 *         return left.compareTo(right);
 *       }
 *     };
 *     Arc<Long> firstArc = fst.getFirstArc(new Arc<Long>());
 *     MinResult<Long> paths[] = Util.shortestPaths(fst, firstArc, comparator, 2);
 *     System.out.println(Util.toBytesRef(paths[0].input, scratchBytes).utf8ToString()); // cat
 *     System.out.println(paths[0].output); // 5
 *     System.out.println(Util.toBytesRef(paths[1].input, scratchBytes).utf8ToString()); // dog
 *     System.out.println(paths[1].output); // 7
 * 
*/ package org.apache.lucene.util.fst;




© 2015 - 2025 Weber Informatics LLC | Privacy Policy