All Downloads are FREE. Search and download functionalities are using the official Maven repository.

gw.internal.gosu.util.RabinKarpHash Maven / Gradle / Ivy

There is a newer version: 1.18.2
Show newest version
/*
 * Copyright 2014 Guidewire Software, Inc.
 */

package gw.internal.gosu.util;

import java.util.HashSet;
import java.util.Random;
import java.util.Set;

/**
 * Fast multi-pattern string matcher.
 *
 * Quick and dirty implementation of Rabin-Karp algorithm. Used for paths matching.
 *
 * We mach patterns from the end to reduce amount of possible collisions (filesystem paths tend to have common prefixes,
 * like package names, etc).
 *
 * FIXME: Tests...
 *
 * @see Rabin–Karp algorithm
 */
public class RabinKarpHash {

  private final static int A = 31;
  private final int _block; // block length
  private final int _Apowblock; // 'a' constant in power of block length


  private Set _hashes = new HashSet();
  private String[] _patterns;

  // Hash values for characters
  private static int CHAR_HASHES[] = new int[1 << 16];
  static {
    // Generate random hashes for characters
    Random r = new Random();
    for (int k = 0; k < CHAR_HASHES.length; ++k) {
      CHAR_HASHES[k] = r.nextInt();
    }
  }


  public RabinKarpHash(String... patterns) {
    _block = minLen(patterns);
    int apowbl = 1;
    for (int i = 0; i < _block; ++i) {
      apowbl *= A;
    }
    _Apowblock = apowbl;

    // Remember hashes last 'block' characters of all patterns.
    _patterns = patterns;
    for (String pattern : patterns) {
      int hash = reverseHash(pattern);
      _hashes.add(hash);
    }
  }

  /**
   * Find the shortest of all patterns.
   * @param patterns
   * @return
   */
  private static int minLen(String... patterns) {
    int minLen = patterns[0].length();
    for (String str : patterns) {
      if (str.length() < minLen) {
        minLen = str.length();
      }
    }
    return minLen;
  }

  public boolean matches(String str) {
    // String is shorter than longest pattern
    int len = str.length();
    if (len < _block) {
      return false;
    }
    // Get reverse hash of the tail of the string
    int hash = reverseHash(str);

    // Start matching the hash
    for(int i = 0; i < len - _block; i++) {
      if (_hashes.contains(hash) && exactMatch(str, i)) {
        return true;
      }
      hash = rollHash(hash, str, i);
    }

    // Last iteration
    return _hashes.contains(hash) && exactMatch(str, len - _block);
  }

  /**
   * Check for exact match.
   * FIXME:
   * @param str
   * @param i
   * @return
   */
  private boolean exactMatch(String str, int i) {
    int end = str.length() - i;
    for (String p : _patterns) {
      int start = end - p.length();
      if (start >= 0 && str.regionMatches(start, p, 0, p.length())) {
        return true;
      }
    }
    return false;
  }

  /**
   * Update rolling hash values.
   * @param hashvalue
   * @param str
   * @param i
   * @return
   */
  private int rollHash(int hashvalue, String str, int i) {
    // 'roll' hash
    char outchar = str.charAt(str.length() - 1 - i);
    char inchar = str.charAt(str.length() - _block - 1 - i);
    hashvalue = A * hashvalue + CHAR_HASHES[inchar] - _Apowblock * CHAR_HASHES[outchar];
    return hashvalue;
  }

  /**
   * Take rolling hash of last 'block' characters. Start from the end of the string.
   * @param str
   * @return
   */
  private int reverseHash(String str) {
    int hash = 0;
    int len = str.length();
    for (int i = 0; i < _block; i++) {
      char c = str.charAt(len - i - 1);
      hash = A * hash + CHAR_HASHES[c];
    }
    return hash;
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy