
org.apache.lucene.util.automaton.CompiledAutomaton Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.util.automaton;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.index.SingleTermsEnum;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.UnicodeUtil;
/**
* Immutable class holding compiled details for a given
* Automaton. The Automaton is deterministic, must not have
* dead states but is not necessarily minimal.
*
* @lucene.experimental
*/
public class CompiledAutomaton {
/**
* Automata are compiled into different internal forms for the
* most efficient execution depending upon the language they accept.
*/
public enum AUTOMATON_TYPE {
/** Automaton that accepts no strings. */
NONE,
/** Automaton that accepts all possible strings. */
ALL,
/** Automaton that accepts only a single fixed string. */
SINGLE,
/** Catch-all for any other automata. */
NORMAL
};
/** If simplify is true this will be the "simplified" type; else, this is NORMAL */
public final AUTOMATON_TYPE type;
/**
* For {@link AUTOMATON_TYPE#SINGLE} this is the singleton term.
*/
public final BytesRef term;
/**
* Matcher for quickly determining if a byte[] is accepted.
* only valid for {@link AUTOMATON_TYPE#NORMAL}.
*/
public final ByteRunAutomaton runAutomaton;
/**
* Two dimensional array of transitions, indexed by state
* number for traversal. The state numbering is consistent with
* {@link #runAutomaton}.
* Only valid for {@link AUTOMATON_TYPE#NORMAL}.
*/
public final Automaton automaton;
/**
* Shared common suffix accepted by the automaton. Only valid
* for {@link AUTOMATON_TYPE#NORMAL}, and only when the
* automaton accepts an infinite language. This will be null
* if the common prefix is length 0.
*/
public final BytesRef commonSuffixRef;
/**
* Indicates if the automaton accepts a finite set of strings.
* Null if this was not computed.
* Only valid for {@link AUTOMATON_TYPE#NORMAL}.
*/
public final Boolean finite;
/** Which state, if any, accepts all suffixes, else -1. */
public final int sinkState;
/** Create this, passing simplify=true and finite=null, so that we try
* to simplify the automaton and determine if it is finite. */
public CompiledAutomaton(Automaton automaton) {
this(automaton, null, true);
}
/** Returns sink state, if present, else -1. */
private static int findSinkState(Automaton automaton) {
int numStates = automaton.getNumStates();
Transition t = new Transition();
int foundState = -1;
for (int s=0;s© 2015 - 2025 Weber Informatics LLC | Privacy Policy