
org.apache.lucene.util.automaton.XCompiledAutomaton Maven / Gradle / Ivy
package org.apache.lucene.util.automaton;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.XBytesRefBuilder;
/**
* Immutable class holding compiled details for a given
* Automaton. The Automaton is deterministic, must not have
* dead states but is not necessarily minimal.
*
* @lucene.experimental
*/
public class XCompiledAutomaton {
/**
* Automata are compiled into different internal forms for the
* most efficient execution depending upon the language they accept.
*/
public enum AUTOMATON_TYPE {
/** Automaton that accepts no strings. */
NONE,
/** Automaton that accepts all possible strings. */
ALL,
/** Automaton that accepts only a single fixed string. */
SINGLE,
/** Automaton that matches all Strings with a constant prefix. */
PREFIX,
/** Catch-all for any other automata. */
NORMAL
};
/** If simplify is true this will be the "simplified" type; else, this is NORMAL */
public final AUTOMATON_TYPE type;
/**
* For {@link AUTOMATON_TYPE#PREFIX}, this is the prefix term;
* for {@link AUTOMATON_TYPE#SINGLE} this is the singleton term.
*/
public final BytesRef term;
/**
* Matcher for quickly determining if a byte[] is accepted.
* only valid for {@link AUTOMATON_TYPE#NORMAL}.
*/
public final XByteRunAutomaton runAutomaton;
/**
* Two dimensional array of transitions, indexed by state
* number for traversal. The state numbering is consistent with
* {@link #runAutomaton}.
* Only valid for {@link AUTOMATON_TYPE#NORMAL}.
*/
public final XAutomaton automaton;
/**
* Shared common suffix accepted by the automaton. Only valid
* for {@link AUTOMATON_TYPE#NORMAL}, and only when the
* automaton accepts an infinite language.
*/
public final BytesRef commonSuffixRef;
/**
* Indicates if the automaton accepts a finite set of strings.
* Null if this was not computed.
* Only valid for {@link AUTOMATON_TYPE#NORMAL}.
*/
public final Boolean finite;
/** Create this, passing simplify=true and finite=null, so that we try
* to simplify the automaton and determine if it is finite. */
public XCompiledAutomaton(XAutomaton automaton) {
this(automaton, null, true);
}
/** Create this. If finite is null, we use {@link XOperations#isFinite}
* to determine whether it is finite. If simplify is true, we run
* possibly expensive operations to determine if the automaton is one
* the cases in {@link XCompiledAutomaton.AUTOMATON_TYPE}. */
public XCompiledAutomaton(XAutomaton automaton, Boolean finite, boolean simplify) {
this(automaton, finite, simplify, XOperations.DEFAULT_MAX_DETERMINIZED_STATES);
}
/** Create this. If finite is null, we use {@link XOperations#isFinite}
* to determine whether it is finite. If simplify is true, we run
* possibly expensive operations to determine if the automaton is one
* the cases in {@link XCompiledAutomaton.AUTOMATON_TYPE}. If simplify
* requires determinizing the autaomaton then only maxDeterminizedStates
* will be created. Any more than that will cause a
* TooComplexToDeterminizeException.
*/
public XCompiledAutomaton(XAutomaton automaton, Boolean finite, boolean simplify,
int maxDeterminizedStates) {
if (automaton.getNumStates() == 0) {
automaton = new XAutomaton();
automaton.createState();
}
if (simplify) {
// Test whether the automaton is a "simple" form and
// if so, don't create a runAutomaton. Note that on a
// large automaton these tests could be costly:
if (XOperations.isEmpty(automaton)) {
// matches nothing
type = AUTOMATON_TYPE.NONE;
term = null;
commonSuffixRef = null;
runAutomaton = null;
this.automaton = null;
this.finite = null;
return;
// NOTE: only approximate, because automaton may not be minimal:
} else if (XOperations.isTotal(automaton)) {
// matches all possible strings
type = AUTOMATON_TYPE.ALL;
term = null;
commonSuffixRef = null;
runAutomaton = null;
this.automaton = null;
this.finite = null;
return;
} else {
automaton = XOperations.determinize(automaton, maxDeterminizedStates);
final String commonPrefix = XOperations.getCommonPrefix(automaton);
final String singleton;
if (commonPrefix.length() > 0 && XOperations.sameLanguage(automaton, XAutomata.makeString(commonPrefix))) {
singleton = commonPrefix;
} else {
singleton = null;
}
if (singleton != null) {
// matches a fixed string
type = AUTOMATON_TYPE.SINGLE;
term = new BytesRef(singleton);
commonSuffixRef = null;
runAutomaton = null;
this.automaton = null;
this.finite = null;
return;
} else if (commonPrefix.length() > 0) {
XAutomaton other = XOperations.concatenate(XAutomata.makeString(commonPrefix), XAutomata.makeAnyString());
other = XOperations.determinize(other, maxDeterminizedStates);
assert XOperations.hasDeadStates(other) == false;
if (XOperations.sameLanguage(automaton, other)) {
// matches a constant prefix
type = AUTOMATON_TYPE.PREFIX;
term = new BytesRef(commonPrefix);
commonSuffixRef = null;
runAutomaton = null;
this.automaton = null;
this.finite = null;
return;
}
}
}
}
type = AUTOMATON_TYPE.NORMAL;
term = null;
if (finite == null) {
this.finite = XOperations.isFinite(automaton);
} else {
this.finite = finite;
}
XAutomaton utf8 = new XUTF32ToUTF8().convert(automaton);
if (this.finite) {
commonSuffixRef = null;
} else {
commonSuffixRef = XOperations.getCommonSuffixBytesRef(utf8, maxDeterminizedStates);
}
runAutomaton = new XByteRunAutomaton(utf8, true, maxDeterminizedStates);
this.automaton = runAutomaton.automaton;
}
private XTransition transition = new XTransition();
//private static final boolean DEBUG = BlockTreeTermsWriter.DEBUG;
private BytesRef addTail(int state, XBytesRefBuilder term, int idx, int leadLabel) {
//System.out.println("addTail state=" + state + " term=" + term.utf8ToString() + " idx=" + idx + " leadLabel=" + (char) leadLabel);
//System.out.println(automaton.toDot());
// Find biggest transition that's < label
// TODO: use binary search here
int maxIndex = -1;
int numTransitions = automaton.initTransition(state, transition);
for(int i=0;i
Related Artifacts
Related Groups
-->
© 2015 - 2025 Weber Informatics LLC | Privacy Policy