gw.internal.gosu.util.RabinKarpHash Maven / Gradle / Ivy
/*
* Copyright 2014 Guidewire Software, Inc.
*/
package gw.internal.gosu.util;
import java.util.HashSet;
import java.util.Random;
import java.util.Set;
/**
* Fast multi-pattern string matcher.
*
* Quick and dirty implementation of Rabin-Karp algorithm. Used for paths matching.
*
* We mach patterns from the end to reduce amount of possible collisions (filesystem paths tend to have common prefixes,
* like package names, etc).
*
* FIXME: Tests...
*
* @see Rabin–Karp algorithm
*/
public class RabinKarpHash {
private final static int A = 31;
private final int _block; // block length
private final int _Apowblock; // 'a' constant in power of block length
private Set _hashes = new HashSet();
private String[] _patterns;
// Hash values for characters
private static int CHAR_HASHES[] = new int[1 << 16];
static {
// Generate random hashes for characters
Random r = new Random();
for (int k = 0; k < CHAR_HASHES.length; ++k) {
CHAR_HASHES[k] = r.nextInt();
}
}
public RabinKarpHash(String... patterns) {
_block = minLen(patterns);
int apowbl = 1;
for (int i = 0; i < _block; ++i) {
apowbl *= A;
}
_Apowblock = apowbl;
// Remember hashes last 'block' characters of all patterns.
_patterns = patterns;
for (String pattern : patterns) {
int hash = reverseHash(pattern);
_hashes.add(hash);
}
}
/**
* Find the shortest of all patterns.
* @param patterns
* @return
*/
private static int minLen(String... patterns) {
int minLen = patterns[0].length();
for (String str : patterns) {
if (str.length() < minLen) {
minLen = str.length();
}
}
return minLen;
}
public boolean matches(String str) {
// String is shorter than longest pattern
int len = str.length();
if (len < _block) {
return false;
}
// Get reverse hash of the tail of the string
int hash = reverseHash(str);
// Start matching the hash
for(int i = 0; i < len - _block; i++) {
if (_hashes.contains(hash) && exactMatch(str, i)) {
return true;
}
hash = rollHash(hash, str, i);
}
// Last iteration
return _hashes.contains(hash) && exactMatch(str, len - _block);
}
/**
* Check for exact match.
* FIXME:
* @param str
* @param i
* @return
*/
private boolean exactMatch(String str, int i) {
int end = str.length() - i;
for (String p : _patterns) {
int start = end - p.length();
if (start >= 0 && str.regionMatches(start, p, 0, p.length())) {
return true;
}
}
return false;
}
/**
* Update rolling hash values.
* @param hashvalue
* @param str
* @param i
* @return
*/
private int rollHash(int hashvalue, String str, int i) {
// 'roll' hash
char outchar = str.charAt(str.length() - 1 - i);
char inchar = str.charAt(str.length() - _block - 1 - i);
hashvalue = A * hashvalue + CHAR_HASHES[inchar] - _Apowblock * CHAR_HASHES[outchar];
return hashvalue;
}
/**
* Take rolling hash of last 'block' characters. Start from the end of the string.
* @param str
* @return
*/
private int reverseHash(String str) {
int hash = 0;
int len = str.length();
for (int i = 0; i < _block; i++) {
char c = str.charAt(len - i - 1);
hash = A * hash + CHAR_HASHES[c];
}
return hash;
}
}