All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.google.gwt.dev.util.editdistance.PatternBitmap Maven / Gradle / Ivy

/*
 * Copyright 2010 Google Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */
package com.google.gwt.dev.util.editdistance;

/**
 * A bitmap array generator for the positions that a given character
 * appears within a pattern.  The array of bitmaps is indexed by a
 * CharIndex that represents the alphabet for the pattern.
 * One bitmap is produced for each character.  The bitmap for
 * a character "c" has the (1< bit set for each position "i"
 * where that character appears in the pattern.
 *
 * Several implementations are provided for different string
 * sizes: int, long, int[].  The caller provides a (zeroed) array of the
 * desired type, sized according to the CharIndex; the array is
 * returned as a convenience.
 */
public class PatternBitmap {
  /**
   * Computes a pattern bitmap for a short character sequence.
   * For each character in the alphabet (represented by a CharIndex),
   * bits are set where that character appears in the sequence.
   * For this generator, the pattern must fit in an "int": the
   * character sequence must not be longer than Integer.SIZE.
   * @param s the character sequence defining the bits to be set
   * @param idx the alphabet for which bitmaps should be produced
   * @param result an array (of size idx.size() or greater) to
   *        hold the resulting bitmaps
   * @return the result array passed as a parameter, for convenience
   */
  public static int[] map(CharSequence s, CharIndex idx, int[] result) {
    int len = s.length();
    assert (len <= Integer.SIZE);
    for (int i = 0; i < len; i++) {
      result[idx.lookup(s.charAt(i))] |= (1 << i);
    }
    return result;
  }

  /**
   * Computes a pattern bitmap for a character sequence of any length.
   * For each character in the alphabet (represented by a CharIndex),
   * bits are set where that character appears in the sequence.
   * Each (per-character) bitmap is represented by an "int[]".
   * @param s the character sequence defining the bits to be set
   * @param idx the alphabet for which bitmaps should be produced
   * @param result an array (of size idx.size() or greater) to
   *        hold the resulting bitmaps
   * @param width how many bits to be use in each word, no more
   *        than Integer.SIZE
   * @return the result array passed as a parameter, for convenience
   */
  public static int[][] map(CharSequence s, CharIndex idx,
                             int[][] result, int width) {
    assert (width <= Integer.SIZE);
    int len = s.length();
    int rowSize = (len + width - 1) / width;

    /*
     * Use one zero-filled bitmap for alphabet characters not in the pattern
     */
    int[] nullElement = new int[rowSize];
    java.util.Arrays.fill(result, nullElement);

    int wordIndex = 0;          /* Which word we are on now */
    int bitWithinWord = 0;      /* Which bit within that word */

    for (int i = 0; i < s.length(); i++) {
      int[] r = result[idx.lookup(s.charAt(i))];
      if (r == nullElement) {

        /* Create a separate zero-filled bitmap for this alphabet character */
        r = result[idx.lookup(s.charAt(i))] = new int[rowSize];
      }
      r[wordIndex] |= (1 << bitWithinWord);

      /* Step to the next bit (and word if appropriate) */
      if (++bitWithinWord == width) {
        bitWithinWord = 0;
        wordIndex++;
      }
    }
    return result;
  }

  /**
   * Computes a pattern bitmap for a medium character sequence.
   * For each character in the alphabet (represented by a CharIndex),
   * bits are set where that character appears in the sequence.
   * For this generator, the pattern must fit in a "long": the
   * character sequence must not be longer than Long.SIZE.
   * @param s the character sequence defining the bits to be set
   * @param idx the alphabet for which bitmaps should be produced
   * @param result an array (of size idx.size() or greater) to
   *        hold the resulting bitmaps
   * @return the result array passed as a parameter, for convenience
   */
  public static long[] map(CharSequence s, CharIndex idx, long[] result) {
    int len = s.length();
    assert (len <= Long.SIZE);
    for (int i = 0; i < len; i++) {
      result[idx.lookup(s.charAt(i))] |= (1L << i);
    }
    return result;
  }
  private PatternBitmap() { /* Prevent instantiation */ }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy