
org.apache.lucene.util.automaton.Automaton Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of lucene Show documentation
Show all versions of lucene Show documentation
Libraries for Elasticsearch
The newest version!
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.util.automaton;
//import java.io.IOException;
//import java.io.PrintWriter;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.FutureObjects;
import org.apache.lucene.util.InPlaceMergeSorter;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.Sorter;
import java.util.Arrays;
import java.util.BitSet;
import java.util.HashSet;
import java.util.Set;
// TODO
// - could use packed int arrays instead
// - could encode dest w/ delta from to?
/** Represents an automaton and all its states and transitions. States
* are integers and must be created using {@link #createState}. Mark a
* state as an accept state using {@link #setAccept}. Add transitions
* using {@link #addTransition}. Each state must have all of its
* transitions added at once; if this is too restrictive then use
* {@link Builder} instead. State 0 is always the
* initial state. Once a state is finished, either
* because you've starting adding transitions to another state or you
* call {@link #finishState}, then that states transitions are sorted
* (first by min, then max, then dest) and reduced (transitions with
* adjacent labels going to the same dest are combined).
*
* @lucene.experimental */
public class Automaton implements Accountable {
/** Where we next write to the int[] states; this increments by 2 for
* each added state because we pack a pointer to the transitions
* array and a count of how many transitions leave the state. */
private int nextState;
/** Where we next write to in int[] transitions; this
* increments by 3 for each added transition because we
* pack min, max, dest in sequence. */
private int nextTransition;
/** Current state we are adding transitions to; the caller
* must add all transitions for this state before moving
* onto another state. */
private int curState = -1;
/** Index in the transitions array, where this states
* leaving transitions are stored, or -1 if this state
* has not added any transitions yet, followed by number
* of transitions. */
private int[] states;
private final BitSet isAccept;
/** Holds toState, min, max for each transition. */
private int[] transitions;
/** True if no state has two transitions leaving with the same label. */
private boolean deterministic = true;
/** Sole constructor; creates an automaton with no states. */
public Automaton() {
this(2, 2);
}
/**
* Constructor which creates an automaton with enough space for the given
* number of states and transitions.
*
* @param numStates
* Number of states.
* @param numTransitions
* Number of transitions.
*/
public Automaton(int numStates, int numTransitions) {
states = new int[numStates * 2];
isAccept = new BitSet(numStates);
transitions = new int[numTransitions * 3];
}
/** Create a new state. */
public int createState() {
growStates();
int state = nextState/2;
states[nextState] = -1;
nextState += 2;
return state;
}
/** Set or clear this state as an accept state. */
public void setAccept(int state, boolean accept) {
FutureObjects.checkIndex(state, getNumStates());
isAccept.set(state, accept);
}
/** Sugar to get all transitions for all states. This is
* object-heavy; it's better to iterate state by state instead. */
public Transition[][] getSortedTransitions() {
int numStates = getNumStates();
Transition[][] transitions = new Transition[numStates][];
for(int s=0;s 0;
int offset = states[2*curState];
int start = offset/3;
destMinMaxSorter.sort(start, start+numTransitions);
// Reduce any "adjacent" transitions:
int upto = 0;
int min = -1;
int max = -1;
int dest = -1;
for(int i=0;i max) {
max = tMax;
}
} else {
if (dest != -1) {
transitions[offset+3*upto] = dest;
transitions[offset+3*upto+1] = min;
transitions[offset+3*upto+2] = max;
upto++;
}
min = tMin;
max = tMax;
}
} else {
if (dest != -1) {
transitions[offset+3*upto] = dest;
transitions[offset+3*upto+1] = min;
transitions[offset+3*upto+2] = max;
upto++;
}
dest = tDest;
min = tMin;
max = tMax;
}
}
if (dest != -1) {
// Last transition
transitions[offset+3*upto] = dest;
transitions[offset+3*upto+1] = min;
transitions[offset+3*upto+2] = max;
upto++;
}
nextTransition -= (numTransitions-upto)*3;
states[2*curState+1] = upto;
// Sort transitions by min/max/dest:
minMaxDestSorter.sort(start, start+upto);
if (deterministic && upto > 1) {
int lastMax = transitions[offset+2];
for(int i=1;i= 0;
int count = states[2*state+1];
if (count == -1) {
return 0;
} else {
return count;
}
}
private void growStates() {
if (nextState+2 > states.length) {
states = ArrayUtil.grow(states, nextState+2);
}
}
private void growTransitions() {
if (nextTransition+3 > transitions.length) {
transitions = ArrayUtil.grow(transitions, nextTransition+3);
}
}
/** Sorts transitions by dest, ascending, then min label ascending, then max label ascending */
private final Sorter destMinMaxSorter = new InPlaceMergeSorter() {
private void swapOne(int i, int j) {
int x = transitions[i];
transitions[i] = transitions[j];
transitions[j] = x;
}
@Override
protected void swap(int i, int j) {
int iStart = 3*i;
int jStart = 3*j;
swapOne(iStart, jStart);
swapOne(iStart+1, jStart+1);
swapOne(iStart+2, jStart+2);
};
@Override
protected int compare(int i, int j) {
int iStart = 3*i;
int jStart = 3*j;
// First dest:
int iDest = transitions[iStart];
int jDest = transitions[jStart];
if (iDest < jDest) {
return -1;
} else if (iDest > jDest) {
return 1;
}
// Then min:
int iMin = transitions[iStart+1];
int jMin = transitions[jStart+1];
if (iMin < jMin) {
return -1;
} else if (iMin > jMin) {
return 1;
}
// Then max:
int iMax = transitions[iStart+2];
int jMax = transitions[jStart+2];
if (iMax < jMax) {
return -1;
} else if (iMax > jMax) {
return 1;
}
return 0;
}
};
/** Sorts transitions by min label, ascending, then max label ascending, then dest ascending */
private final Sorter minMaxDestSorter = new InPlaceMergeSorter() {
private void swapOne(int i, int j) {
int x = transitions[i];
transitions[i] = transitions[j];
transitions[j] = x;
}
@Override
protected void swap(int i, int j) {
int iStart = 3*i;
int jStart = 3*j;
swapOne(iStart, jStart);
swapOne(iStart+1, jStart+1);
swapOne(iStart+2, jStart+2);
};
@Override
protected int compare(int i, int j) {
int iStart = 3*i;
int jStart = 3*j;
// First min:
int iMin = transitions[iStart+1];
int jMin = transitions[jStart+1];
if (iMin < jMin) {
return -1;
} else if (iMin > jMin) {
return 1;
}
// Then max:
int iMax = transitions[iStart+2];
int jMax = transitions[jStart+2];
if (iMax < jMax) {
return -1;
} else if (iMax > jMax) {
return 1;
}
// Then dest:
int iDest = transitions[iStart];
int jDest = transitions[jStart];
if (iDest < jDest) {
return -1;
} else if (iDest > jDest) {
return 1;
}
return 0;
}
};
/** Initialize the provided Transition to iterate through all transitions
* leaving the specified state. You must call {@link #getNextTransition} to
* get each transition. Returns the number of transitions
* leaving this state. */
public int initTransition(int state, Transition t) {
assert state < nextState/2: "state=" + state + " nextState=" + nextState;
t.source = state;
t.transitionUpto = states[2*state];
return getNumTransitions(state);
}
/** Iterate to the next transition after the provided one */
public void getNextTransition(Transition t) {
// Make sure there is still a transition left:
assert (t.transitionUpto+3 - states[2*t.source]) <= 3*states[2*t.source+1];
// Make sure transitions are in fact sorted:
assert transitionSorted(t);
t.dest = transitions[t.transitionUpto++];
t.min = transitions[t.transitionUpto++];
t.max = transitions[t.transitionUpto++];
}
private boolean transitionSorted(Transition t) {
int upto = t.transitionUpto;
if (upto == states[2*t.source]) {
// Transition isn't initialzed yet (this is the first transition); don't check:
return true;
}
int nextDest = transitions[upto];
int nextMin = transitions[upto+1];
int nextMax = transitions[upto+2];
if (nextMin > t.min) {
return true;
} else if (nextMin < t.min) {
return false;
}
// Min is equal, now test max:
if (nextMax > t.max) {
return true;
} else if (nextMax < t.max) {
return false;
}
// Max is also equal, now test dest:
if (nextDest > t.dest) {
return true;
} else if (nextDest < t.dest) {
return false;
}
// We should never see fully equal transitions here:
return false;
}
/** Fill the provided {@link Transition} with the index'th
* transition leaving the specified state. */
public void getTransition(int state, int index, Transition t) {
int i = states[2*state] + 3*index;
t.source = state;
t.dest = transitions[i++];
t.min = transitions[i++];
t.max = transitions[i++];
}
static void appendCharString(int c, StringBuilder b) {
if (c >= 0x21 && c <= 0x7e && c != '\\' && c != '"') b.appendCodePoint(c);
else {
b.append("\\\\U");
String s = Integer.toHexString(c);
if (c < 0x10) b.append("0000000").append(s);
else if (c < 0x100) b.append("000000").append(s);
else if (c < 0x1000) b.append("00000").append(s);
else if (c < 0x10000) b.append("0000").append(s);
else if (c < 0x100000) b.append("000").append(s);
else if (c < 0x1000000) b.append("00").append(s);
else if (c < 0x10000000) b.append("0").append(s);
else b.append(s);
}
}
/*
public void writeDot(String fileName) {
if (fileName.indexOf('/') == -1) {
fileName = "/l/la/lucene/core/" + fileName + ".dot";
}
try {
PrintWriter pw = new PrintWriter(fileName);
pw.println(toDot());
pw.close();
} catch (IOException ioe) {
throw new RuntimeException(ioe);
}
}
*/
/** Returns the dot (graphviz) representation of this automaton.
* This is extremely useful for visualizing the automaton. */
public String toDot() {
// TODO: breadth first search so we can get layered output...
StringBuilder b = new StringBuilder();
b.append("digraph Automaton {\n");
b.append(" rankdir = LR\n");
b.append(" node [width=0.2, height=0.2, fontsize=8]\n");
final int numStates = getNumStates();
if (numStates > 0) {
b.append(" initial [shape=plaintext,label=\"\"]\n");
b.append(" initial -> 0\n");
}
Transition t = new Transition();
for(int state=0;state ");
b.append(t.dest);
b.append(" [label=\"");
appendCharString(t.min, b);
if (t.max != t.min) {
b.append('-');
appendCharString(t.max, b);
}
b.append("\"]\n");
//System.out.println(" t=" + t);
}
}
b.append('}');
return b.toString();
}
/**
* Returns sorted array of all interval start points.
*/
public int[] getStartPoints() {
Set pointset = new HashSet<>();
pointset.add(Character.MIN_CODE_POINT);
//System.out.println("getStartPoints");
for (int s=0;s
Related Artifacts
Related Groups
-->
© 2015 - 2025 Weber Informatics LLC | Privacy Policy