org.apache.lucene.util.automaton.Automaton Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.util.automaton;
//import java.io.IOException;
//import java.io.PrintWriter;
import java.util.Arrays;
import java.util.BitSet;
import java.util.HashSet;
import java.util.Set;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.InPlaceMergeSorter;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.Sorter;
// TODO
// - could use packed int arrays instead
// - could encode dest w/ delta from to?
/** Represents an automaton and all its states and transitions. States
* are integers and must be created using {@link #createState}. Mark a
* state as an accept state using {@link #setAccept}. Add transitions
* using {@link #addTransition}. Each state must have all of its
* transitions added at once; if this is too restrictive then use
* {@link Automaton.Builder} instead. State 0 is always the
* initial state. Once a state is finished, either
* because you've starting adding transitions to another state or you
* call {@link #finishState}, then that states transitions are sorted
* (first by min, then max, then dest) and reduced (transitions with
* adjacent labels going to the same dest are combined).
*
* @lucene.experimental */
public class Automaton implements Accountable {
/** Where we next write to the int[] states; this increments by 2 for
* each added state because we pack a pointer to the transitions
* array and a count of how many transitions leave the state. */
private int nextState;
/** Where we next write to in int[] transitions; this
* increments by 3 for each added transition because we
* pack min, max, dest in sequence. */
private int nextTransition;
/** Current state we are adding transitions to; the caller
* must add all transitions for this state before moving
* onto another state. */
private int curState = -1;
/** Index in the transitions array, where this states
* leaving transitions are stored, or -1 if this state
* has not added any transitions yet, followed by number
* of transitions. */
private int[] states;
private final BitSet isAccept;
/** Holds toState, min, max for each transition. */
private int[] transitions;
/** True if no state has two transitions leaving with the same label. */
private boolean deterministic = true;
/** Sole constructor; creates an automaton with no states. */
public Automaton() {
this(2, 2);
}
/**
* Constructor which creates an automaton with enough space for the given
* number of states and transitions.
*
* @param numStates
* Number of states.
* @param numTransitions
* Number of transitions.
*/
public Automaton(int numStates, int numTransitions) {
states = new int[numStates * 2];
isAccept = new BitSet(numStates);
transitions = new int[numTransitions * 3];
}
/** Create a new state. */
public int createState() {
growStates();
int state = nextState/2;
states[nextState] = -1;
nextState += 2;
return state;
}
/** Set or clear this state as an accept state. */
public void setAccept(int state, boolean accept) {
if (state >= getNumStates()) {
throw new IllegalArgumentException("state=" + state + " is out of bounds (numStates=" + getNumStates() + ")");
}
if (accept) {
isAccept.set(state);
} else {
isAccept.clear(state);
}
}
/** Sugar to get all transitions for all states. This is
* object-heavy; it's better to iterate state by state instead. */
public Transition[][] getSortedTransitions() {
int numStates = getNumStates();
Transition[][] transitions = new Transition[numStates][];
for(int s=0;s= nextState/2) {
throw new IllegalArgumentException("source=" + source + " is out of bounds (maxState is " + (nextState/2-1) + ")");
}
if (dest >= nextState/2) {
throw new IllegalArgumentException("dest=" + dest + " is out of bounds (max state is " + (nextState/2-1) + ")");
}
growTransitions();
if (curState != source) {
if (curState != -1) {
finishCurrentState();
}
// Move to next source:
curState = source;
if (states[2*curState] != -1) {
throw new IllegalStateException("from state (" + source + ") already had transitions added");
}
assert states[2*curState+1] == 0;
states[2*curState] = nextTransition;
}
transitions[nextTransition++] = dest;
transitions[nextTransition++] = min;
transitions[nextTransition++] = max;
// Increment transition count for this state
states[2*curState+1]++;
}
/** Add a [virtual] epsilon transition between source and dest.
* Dest state must already have all transitions added because this
* method simply copies those same transitions over to source. */
public void addEpsilon(int source, int dest) {
Transition t = new Transition();
int count = initTransition(dest, t);
for(int i=0;i 0;
int offset = states[2*curState];
int start = offset/3;
destMinMaxSorter.sort(start, start+numTransitions);
// Reduce any "adjacent" transitions:
int upto = 0;
int min = -1;
int max = -1;
int dest = -1;
for(int i=0;i max) {
max = tMax;
}
} else {
if (dest != -1) {
transitions[offset+3*upto] = dest;
transitions[offset+3*upto+1] = min;
transitions[offset+3*upto+2] = max;
upto++;
}
min = tMin;
max = tMax;
}
} else {
if (dest != -1) {
transitions[offset+3*upto] = dest;
transitions[offset+3*upto+1] = min;
transitions[offset+3*upto+2] = max;
upto++;
}
dest = tDest;
min = tMin;
max = tMax;
}
}
if (dest != -1) {
// Last transition
transitions[offset+3*upto] = dest;
transitions[offset+3*upto+1] = min;
transitions[offset+3*upto+2] = max;
upto++;
}
nextTransition -= (numTransitions-upto)*3;
states[2*curState+1] = upto;
// Sort transitions by min/max/dest:
minMaxDestSorter.sort(start, start+upto);
if (deterministic && upto > 1) {
int lastMax = transitions[offset+2];
for(int i=1;i= 0;
int count = states[2*state+1];
if (count == -1) {
return 0;
} else {
return count;
}
}
private void growStates() {
if (nextState+2 > states.length) {
states = ArrayUtil.grow(states, nextState+2);
}
}
private void growTransitions() {
if (nextTransition+3 > transitions.length) {
transitions = ArrayUtil.grow(transitions, nextTransition+3);
}
}
/** Sorts transitions by dest, ascending, then min label ascending, then max label ascending */
private final Sorter destMinMaxSorter = new InPlaceMergeSorter() {
private void swapOne(int i, int j) {
int x = transitions[i];
transitions[i] = transitions[j];
transitions[j] = x;
}
@Override
protected void swap(int i, int j) {
int iStart = 3*i;
int jStart = 3*j;
swapOne(iStart, jStart);
swapOne(iStart+1, jStart+1);
swapOne(iStart+2, jStart+2);
};
@Override
protected int compare(int i, int j) {
int iStart = 3*i;
int jStart = 3*j;
// First dest:
int iDest = transitions[iStart];
int jDest = transitions[jStart];
if (iDest < jDest) {
return -1;
} else if (iDest > jDest) {
return 1;
}
// Then min:
int iMin = transitions[iStart+1];
int jMin = transitions[jStart+1];
if (iMin < jMin) {
return -1;
} else if (iMin > jMin) {
return 1;
}
// Then max:
int iMax = transitions[iStart+2];
int jMax = transitions[jStart+2];
if (iMax < jMax) {
return -1;
} else if (iMax > jMax) {
return 1;
}
return 0;
}
};
/** Sorts transitions by min label, ascending, then max label ascending, then dest ascending */
private final Sorter minMaxDestSorter = new InPlaceMergeSorter() {
private void swapOne(int i, int j) {
int x = transitions[i];
transitions[i] = transitions[j];
transitions[j] = x;
}
@Override
protected void swap(int i, int j) {
int iStart = 3*i;
int jStart = 3*j;
swapOne(iStart, jStart);
swapOne(iStart+1, jStart+1);
swapOne(iStart+2, jStart+2);
};
@Override
protected int compare(int i, int j) {
int iStart = 3*i;
int jStart = 3*j;
// First min:
int iMin = transitions[iStart+1];
int jMin = transitions[jStart+1];
if (iMin < jMin) {
return -1;
} else if (iMin > jMin) {
return 1;
}
// Then max:
int iMax = transitions[iStart+2];
int jMax = transitions[jStart+2];
if (iMax < jMax) {
return -1;
} else if (iMax > jMax) {
return 1;
}
// Then dest:
int iDest = transitions[iStart];
int jDest = transitions[jStart];
if (iDest < jDest) {
return -1;
} else if (iDest > jDest) {
return 1;
}
return 0;
}
};
/** Initialize the provided Transition to iterate through all transitions
* leaving the specified state. You must call {@link #getNextTransition} to
* get each transition. Returns the number of transitions
* leaving this state. */
public int initTransition(int state, Transition t) {
assert state < nextState/2: "state=" + state + " nextState=" + nextState;
t.source = state;
t.transitionUpto = states[2*state];
return getNumTransitions(state);
}
/** Iterate to the next transition after the provided one */
public void getNextTransition(Transition t) {
// Make sure there is still a transition left:
assert (t.transitionUpto+3 - states[2*t.source]) <= 3*states[2*t.source+1];
// Make sure transitions are in fact sorted:
assert transitionSorted(t);
t.dest = transitions[t.transitionUpto++];
t.min = transitions[t.transitionUpto++];
t.max = transitions[t.transitionUpto++];
}
private boolean transitionSorted(Transition t) {
int upto = t.transitionUpto;
if (upto == states[2*t.source]) {
// Transition isn't initialzed yet (this is the first transition); don't check:
return true;
}
int nextDest = transitions[upto];
int nextMin = transitions[upto+1];
int nextMax = transitions[upto+2];
if (nextMin > t.min) {
return true;
} else if (nextMin < t.min) {
return false;
}
// Min is equal, now test max:
if (nextMax > t.max) {
return true;
} else if (nextMax < t.max) {
return false;
}
// Max is also equal, now test dest:
if (nextDest > t.dest) {
return true;
} else if (nextDest < t.dest) {
return false;
}
// We should never see fully equal transitions here:
return false;
}
/** Fill the provided {@link Transition} with the index'th
* transition leaving the specified state. */
public void getTransition(int state, int index, Transition t) {
int i = states[2*state] + 3*index;
t.source = state;
t.dest = transitions[i++];
t.min = transitions[i++];
t.max = transitions[i++];
}
static void appendCharString(int c, StringBuilder b) {
if (c >= 0x21 && c <= 0x7e && c != '\\' && c != '"') b.appendCodePoint(c);
else {
b.append("\\\\U");
String s = Integer.toHexString(c);
if (c < 0x10) b.append("0000000").append(s);
else if (c < 0x100) b.append("000000").append(s);
else if (c < 0x1000) b.append("00000").append(s);
else if (c < 0x10000) b.append("0000").append(s);
else if (c < 0x100000) b.append("000").append(s);
else if (c < 0x1000000) b.append("00").append(s);
else if (c < 0x10000000) b.append("0").append(s);
else b.append(s);
}
}
/*
public void writeDot(String fileName) {
if (fileName.indexOf('/') == -1) {
fileName = "/l/la/lucene/core/" + fileName + ".dot";
}
try {
PrintWriter pw = new PrintWriter(fileName);
pw.println(toDot());
pw.close();
} catch (IOException ioe) {
throw new RuntimeException(ioe);
}
}
*/
/** Returns the dot (graphviz) representation of this automaton.
* This is extremely useful for visualizing the automaton. */
public String toDot() {
// TODO: breadth first search so we can get layered output...
StringBuilder b = new StringBuilder();
b.append("digraph Automaton {\n");
b.append(" rankdir = LR\n");
b.append(" node [width=0.2, height=0.2, fontsize=8]\n");
final int numStates = getNumStates();
if (numStates > 0) {
b.append(" initial [shape=plaintext,label=\"\"]\n");
b.append(" initial -> 0\n");
}
Transition t = new Transition();
for(int state=0;state ");
b.append(t.dest);
b.append(" [label=\"");
appendCharString(t.min, b);
if (t.max != t.min) {
b.append('-');
appendCharString(t.max, b);
}
b.append("\"]\n");
//System.out.println(" t=" + t);
}
}
b.append('}');
return b.toString();
}
/**
* Returns sorted array of all interval start points.
*/
int[] getStartPoints() {
Set pointset = new HashSet<>();
pointset.add(Character.MIN_CODE_POINT);
//System.out.println("getStartPoints");
for (int s=0;s
© 2015 - 2025 Weber Informatics LLC | Privacy Policy