
com.almondtools.stringsandchars.search.BNDM Maven / Gradle / Ivy
package com.almondtools.stringsandchars.search;
import static com.almondtools.util.text.CharUtils.computeMaxChar;
import static com.almondtools.util.text.CharUtils.computeMinChar;
import static java.util.Arrays.fill;
import com.almondtools.stringsandchars.io.CharProvider;
import com.almondtools.util.map.CharLongMap;
import com.almondtools.util.map.CharObjectMap;
/**
* An implementation of the String Search Algorithm BNDM (Backward Nondeterministic Dawg Matching).
*
* This algorithm takes a single pattern as input and generates a finder which can find this pattern in documents
*/
public class BNDM implements StringSearchAlgorithm {
private int patternLength;
private BitMapStates states;
public BNDM(String pattern) {
this.patternLength = pattern.length();
this.states = computeStates(pattern.toCharArray());
}
private static BitMapStates computeStates(char[] pattern) {
if (isCompactRange(pattern)) {
if (pattern.length > 64) {
return new QuickMultiLongStates(pattern);
} else {
return new QuickSingleLongStates(pattern);
}
} else {
if (pattern.length > 64) {
return new SmartMultiLongStates(pattern);
} else {
return new SmartSingleLongStates(pattern);
}
}
}
private static boolean isCompactRange(char[] pattern) {
char minChar = computeMinChar(pattern);
char maxChar = computeMaxChar(pattern);
return maxChar - minChar < 256 || maxChar - minChar < pattern.length * 2;
}
@Override
public int getPatternLength() {
return patternLength;
}
@Override
public StringFinder createFinder(CharProvider chars, StringFinderOption... options) {
if (states.supportsSingle()) {
return new LongFinder(chars, options);
} else {
return new MultiLongFinder(chars, options);
}
}
private abstract class Finder extends AbstractStringFinder {
protected final long finalstate;
protected final long activeStates;
protected CharProvider chars;
public Finder(CharProvider chars, StringFinderOption... options) {
super(options);
this.finalstate = 1l << ((patternLength - 1) % 64);
this.activeStates = (finalstate - 1) | finalstate;
this.chars = chars;
}
protected StringMatch createMatch() {
long start = chars.current();
long end = start + patternLength;
String s = chars.slice(start, end);
return new StringMatch(start, end, s);
}
}
private class LongFinder extends Finder {
private long state;
public LongFinder(CharProvider chars, StringFinderOption... options) {
super(chars, options);
this.state = activeStates;
}
@Override
public void skipTo(long pos) {
chars.move(pos);
state = activeStates;
}
@Override
public StringMatch findNext() {
while (!chars.finished(patternLength - 1)) {
state = activeStates;
int j = patternLength - 1;
int last = patternLength;
while (state != 0l) {
char currentChar = chars.lookahead(j);
long single = states.single(currentChar);
state &= single;
if ((state & finalstate) != 0l) {
if (j > 0) {
last = j;
} else {
StringMatch createMatch = createMatch();
chars.forward(last);
return createMatch;
}
}
j--;
state = (state << 1) & activeStates;
}
chars.forward(last);
}
return null;
}
}
private class MultiLongFinder extends Finder {
private long[] state;
public MultiLongFinder(CharProvider chars, StringFinderOption... options) {
super(chars, options);
this.state = initial(patternLength);
}
private long[] initial(int patternLength) {
return init(new long[((patternLength - 1) / 64) + 1]);
}
private long[] init(long[] state) {
fill(state, -1l);
state[0] = activeStates;
return state;
}
@Override
public void skipTo(long pos) {
chars.move(pos);
init(state);
}
@Override
public StringMatch findNext() {
while (!chars.finished(patternLength - 1)) {
state = initial(patternLength);
int j = patternLength - 1;
int last = patternLength;
while (zero(state)) {
char currentChar = chars.lookahead(j);
long[] all = states.all(currentChar);
state = join(state, all);
if ((state[0] & finalstate) != 0l) {
if (j > 0) {
last = j;
} else {
StringMatch createMatch = createMatch();
chars.forward(last);
return createMatch;
}
}
j--;
state = next(state);
}
chars.forward(last);
}
return null;
}
private long[] next(long[] state) {
for (int i = 0; i < state.length; i++) {
int j = i + 1;
long leastBit = j < state.length ? state[j] >>> 63 : 0l;
state[i] = state[i] << 1 | leastBit;
}
state[0] &= activeStates;
return state;
}
private boolean zero(long[] state) {
for (int i = 0; i < state.length; i++) {
if (state[i] != 0l) {
return true;
}
}
return false;
}
private long[] join(long[] state, long[] bits) {
for (int i = 0; i < state.length; i++) {
state[i] = state[i] & bits[i];
}
return state;
}
}
public static class Factory implements StringSearchAlgorithmFactory {
@Override
public StringSearchAlgorithm of(String pattern) {
return new BNDM(pattern);
}
}
private abstract static class SingleLongBitMapStates implements BitMapStates {
@Override
public boolean supportsSingle() {
return true;
}
@Override
public long[] all(char c) {
return new long[] { single(c) };
}
}
private static class QuickSingleLongStates extends SingleLongBitMapStates {
private char minChar;
private char maxChar;
private long[] characters;
public QuickSingleLongStates(char[] pattern) {
this.minChar = computeMinChar(pattern);
this.maxChar = computeMaxChar(pattern);
this.characters = computeStates(pattern, this.minChar, this.maxChar);
}
private static long[] computeStates(char[] pattern, char min, char max) {
long[] characters = new long[max - min + 1];
for (int i = 0; i < pattern.length; i++) {
char c = pattern[i];
int j = pattern.length - i - 1;
characters[c - min] |= 1l << j;
}
return characters;
}
@Override
public long single(char c) {
if (c < minChar || c > maxChar) {
return 0l;
}
return characters[c - minChar];
}
}
private static class SmartSingleLongStates extends SingleLongBitMapStates {
private CharLongMap states;
public SmartSingleLongStates(char[] pattern) {
this.states = computeStates(pattern);
}
private static CharLongMap computeStates(char[] pattern) {
CharLongMap.Builder mapBuilder = new CharLongMap.Builder(0l);
for (int i = 0; i < pattern.length; i++) {
char c = pattern[i];
int j = pattern.length - i - 1;
long newState = mapBuilder.get(c) | (1l << j);
mapBuilder.put(c, newState);
}
return mapBuilder.perfectMinimal();
}
@Override
public long single(char c) {
return states.get(c);
}
}
private abstract static class MultiLongBitMapStates implements BitMapStates {
public static long[] computeZero(int length) {
return new long[((length - 1) / 64) + 1];
}
@Override
public boolean supportsSingle() {
return false;
}
@Override
public long single(char c) {
throw new UnsupportedOperationException();
}
}
private static class QuickMultiLongStates extends MultiLongBitMapStates {
private char minChar;
private char maxChar;
private long[][] characters;
private long[] zero;
public QuickMultiLongStates(char[] pattern) {
this.minChar = computeMinChar(pattern);
this.maxChar = computeMaxChar(pattern);
this.characters = computeStates(pattern, this.minChar, this.maxChar);
this.zero = computeZero(pattern.length);
}
private static long[][] computeStates(char[] pattern, char min, char max) {
long[][] characters = new long[max - min + 1][];
for (int c = min; c <= max; c++) {
characters[c - min] = computeZero(pattern.length);
}
for (int i = 0; i < pattern.length; i++) {
char c = pattern[i];
int j = pattern.length - i - 1;
int slot = ((pattern.length - 1) / 64) - j / 64;
int offset = j % 64;
characters[c - min][slot] |= 1l << offset;
}
return characters;
}
@Override
public long[] all(char c) {
if (c < minChar || c > maxChar) {
return zero;
}
return characters[c - minChar];
}
}
private static class SmartMultiLongStates extends MultiLongBitMapStates {
private CharObjectMap states;
public SmartMultiLongStates(char[] pattern) {
this.states = computeStates(pattern);
}
private static CharObjectMap computeStates(char[] pattern) {
long[] zero = computeZero(pattern.length);
CharObjectMap.Builder mapBuilder = new CharObjectMap.Builder<>(zero);
for (int i = 0; i < pattern.length; i++) {
char c = pattern[i];
int j = pattern.length - i - 1;
int slot = ((pattern.length - 1) / 64) - j / 64;
int offset = j % 64;
long[] newState = mapBuilder.get(c);
if (newState == zero) {
newState = computeZero(pattern.length);
}
newState[slot] |= 1l << offset;
mapBuilder.put(c, newState);
}
return mapBuilder.perfectMinimal();
}
@Override
public long[] all(char c) {
return states.get(c);
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy