All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.google.javascript.jscomp.regex.CharRanges Maven / Gradle / Ivy

/*
 * Copyright 2011 The Closure Compiler Authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.google.javascript.jscomp.regex;

import static java.lang.Math.max;
import static java.lang.Math.min;

import java.util.Arrays;

/**
 * An immutable sparse bitset that deals well where the data is chunky:
 * where P(bit[x+1] == bit[x]).  E.g. [101,102,103,104,105,1001,1002,1003,1004]
 * is chunky.
 */
final class CharRanges {
  /**
   * A strictly increasing set of bit indices where even members are the
   * inclusive starts of ranges, and odd members are the exclusive ends.
   * 

* E.g., { 1, 5, 6, 10 } represents the set ( 1, 2, 3, 4, 6, 7, 8, 9 ). */ private final int[] ranges; public static final CharRanges EMPTY = new CharRanges(new int[0]); public static final CharRanges ALL_CODE_UNITS = new CharRanges(new int[] { 0, 0x10000 }); public static CharRanges inclusive(int start, int end) { if (start > end) { throw new IndexOutOfBoundsException(start + " > " + end); } return new CharRanges(new int[] { start, end + 1 }); } /** * Returns an instance containing all and only the given members. */ public static CharRanges withMembers(int... members) { return new CharRanges(intArrayToRanges(members)); } /** * Returns an instance containing the given ranges. * @param ranges An even-length ordered sequence of non-overlapping, * non-contiguous, [inclusive start, exclusive end) ranges. */ public static CharRanges withRanges(int... ranges) { if ((ranges.length & 1) != 0) { throw new IllegalArgumentException(); } for (int i = 1; i < ranges.length; ++i) { if (ranges[i] <= ranges[i - 1]) { throw new IllegalArgumentException(ranges[i] + " > " + ranges[i - 1]); } } return new CharRanges(ranges); } private CharRanges(int[] ranges) { this.ranges = ranges; } private static int[] intArrayToRanges(int[] members) { int nMembers = members.length; if (nMembers == 0) { return new int[0]; } Arrays.sort(members); // Count the number of runs. int nRuns = 1; for (int i = 1; i < nMembers; ++i) { int current = members[i], last = members[i - 1]; if (current == last) { continue; } if (current != last + 1) { ++nRuns; } } int[] ranges = new int[nRuns * 2]; ranges[0] = members[0]; int k = 0; for (int i = 1; k + 2 < ranges.length; ++i) { int current = members[i], last = members[i - 1]; if (current == last) { continue; } if (current != last + 1) { ranges[++k] = last + 1; // add 1 to make end exclusive ranges[++k] = current; } } ranges[++k] = members[nMembers - 1] + 1; // add 1 to make end exclusive return ranges; } public boolean contains(int bit) { return (Arrays.binarySearch(ranges, bit) & 1) == 0; // By the contract of Arrays.binarySearch, its result is either the position // of bit in ranges or it is the bitwise inverse of the position of the // least element greater than bit. // Two cases // case (idx >= 0) // We ended up exactly on a range boundary. // Starts are inclusive and ends are both exclusive, so this contains // bit iff idx is even. // // case (idx < 0) // If the least element greater than bit is an odd element, // then bit must be greater than a start and less than an end, so // contained. // // If bit is greater than all elements, then idx will be past the end of // the array, and will be even since ranges.length is even. // // Otherwise, bit must be in the space between two runs, so not // contained. // // In all cases, oddness is equivalent to containedness. // Those two cases lead to // idx >= 0 ? ((idx & 1) == 0) : ((~idx & 1) == 1) // But ~n & bit == bit <=> n & bit == 0, so // idx >= 0 ? ((idx & 1) == 0) : ((~idx & 1) == 1) // => idx >= 0 ? ((idx & 1) == 0) : ((idx & 1) == 0) // => (idx & 1) == 0 } public boolean isEmpty() { return ranges.length == 0; } public int getNumRanges() { return ranges.length >> 1; } public int start(int i) { return ranges[i << 1]; } public int end(int i) { return ranges[(i << 1) | 1]; } public CharRanges union(CharRanges other) { // Index of the input ranges int[] q = this.ranges, r = other.ranges; // Lengths of the inputs int m = q.length, n = r.length; if (m == 0) { return other; } if (n == 0) { return this; } // The output array. The length is m+n in the worst case when all the // ranges in a are disjoint from the ranges in b. int[] out = new int[m + n]; // Indexes into the various arrays int i = 0, j = 0, k = 0; // Since there are three arrays, and indices into them the following // should never occur in this function: // (1) q[j] or q[k] -- q is indexed by i // (2) r[i] or r[k] -- r is indexed by j // (3) out[i] or out[j] -- out is indexed by k // (4) i < n or j < m -- index compared to wrong limit // This loop exits because we always increment at least one of i,j. while (i < m && j < n) { // Range starts and ends. int a0 = q[i], a1 = q[i + 1], b0 = r[j], b1 = r[j + 1]; if (a1 < b0) { // [a0, a1) ends before [b0, b1) starts out[k++] = a0; out[k++] = a1; i += 2; } else if (b1 < a0) { // [b0, b1) ends before [a0, a1) starts out[k++] = b0; out[k++] = b1; j += 2; } else { // ranges overlap // We need to compute a new range based on the set of ranges that // transitively overlap. // AAAAAAAAA AAA // BBB BBB* BBB // In the range above, the start comes from one set, and the end from // another. The range with the asterisk next to it is subsumed entirely // by a range from the other, and so not all ranges on the input // contribute a value to the output. // The last BBB run serves only as a bridge -- it overlaps two // disjoint ranges in the other one so establishes that they // transitively overlap. int start = min(a0, b0); // Guess at the end, and lookahead to come up with a more complete // estimate. int end = max(a1, b1); i += 2; j += 2; while (i < m || j < n) { if (i < m && q[i] <= end) { end = max(end, q[i + 1]); i += 2; } else if (j < n && r[j] <= end) { end = max(end, r[j + 1]); j += 2; } else { break; } } out[k++] = start; out[k++] = end; } } // There may be unprocessed ranges at the end of one of the inputs. if (i < m) { System.arraycopy(q, i, out, k, m - i); k += m - i; } else if (j < n) { System.arraycopy(r, j, out, k, n - j); k += n - j; } // We guessed at the output length above. Cut off the tail. if (k != out.length) { int[] clipped = Arrays.copyOf(out, k); out = clipped; } return new CharRanges(out); } public CharRanges intersection(CharRanges other) { int[] aRanges = ranges, bRanges = other.ranges; int aLen = aRanges.length, bLen = bRanges.length; if (aLen == 0) { return this; } if (bLen == 0) { return other; } int aIdx = 0, bIdx = 0; int[] intersection = new int[min(aLen, bLen)]; int intersectionIdx = 0; int pos = min(aRanges[0], bRanges[0]); while (aIdx < aLen && bIdx < bLen) { if (aRanges[aIdx + 1] <= pos) { aIdx += 2; } else if (bRanges[bIdx + 1] <= pos) { bIdx += 2; } else { int start = max(aRanges[aIdx], bRanges[bIdx]); if (pos < start) { // Advance to start of common block. pos = start; } else { // Now we know that pos is less than the ends of the two ranges and // greater or equal to the starts of the two ranges. int end = min(aRanges[aIdx + 1], bRanges[bIdx + 1]); if (intersectionIdx != 0 && pos == intersection[intersectionIdx - 1]) { intersection[intersectionIdx - 1] = end; } else { if (intersectionIdx == intersection.length) { int[] newArr = new int[intersectionIdx * 2]; System.arraycopy(intersection, 0, newArr, 0, intersectionIdx); intersection = newArr; } intersection[intersectionIdx++] = pos; intersection[intersectionIdx++] = end; } pos = end; } } } if (intersectionIdx != intersection.length) { int[] newArr = Arrays.copyOf(intersection, intersectionIdx); intersection = newArr; } return new CharRanges(intersection); } public CharRanges difference(CharRanges subtrahendRanges) { // difference = minuend - subtrahend int[] minuend = this.ranges; int[] subtrahend = subtrahendRanges.ranges; int mn = minuend.length, sn = subtrahend.length; if (mn == 0 || sn == 0) { return this; } int[] difference = new int[minuend.length]; // Indices into minuend.ranges, subtrahend.ranges, and difference. int mIdx = 0, sIdx = 0, dIdx = 0; int pos = minuend[0]; while (mIdx < mn) { if (pos >= minuend[mIdx + 1]) { mIdx += 2; } else if (pos < minuend[mIdx]) { // Skip gaps in the minuend. pos = minuend[mIdx]; } else if (sIdx < sn && pos >= subtrahend[sIdx]) { // Skip over a removed part. pos = subtrahend[sIdx + 1]; sIdx += 2; } else { // Now we know that pos is between [minuend[i], minuend[i + 1]) // and outside [subtrahend[j], subtrahend[j + 1]). int end = sIdx < sn ? min(minuend[mIdx + 1], subtrahend[sIdx]) : minuend[mIdx + 1]; if (dIdx != 0 && difference[dIdx - 1] == pos) { difference[dIdx - 1] = pos; } else { if (dIdx == difference.length) { int[] newArr = new int[dIdx * 2]; System.arraycopy(difference, 0, newArr, 0, dIdx); difference = newArr; } difference[dIdx++] = pos; difference[dIdx++] = end; } pos = end; } } if (dIdx != difference.length) { int[] newArr = Arrays.copyOf(difference, dIdx); difference = newArr; } return new CharRanges(difference); } public boolean containsAll(CharRanges sub) { int[] superRanges = this.ranges; int[] subRanges = sub.ranges; int superIdx = 0, subIdx = 0; int superLen = superRanges.length, subLen = subRanges.length; while (subIdx < subLen) { if (superIdx == superLen) { return false; } if (superRanges[superIdx + 1] <= subRanges[subIdx]) { // Super range ends before subRange starts. superIdx += 2; } else if (superRanges[superIdx] > subRanges[subIdx]) { // Uncontained portion at start of sub range. return false; } else if (superRanges[superIdx + 1] >= subRanges[subIdx + 1]) { // A sub range is completely contained in the super range. // We know this because of the above condition and we have already // ruled out that subRanges[subIdx] < superRanges[superIdx]. subIdx += 2; } else { // Uncontained portion at end of sub range. return false; } } return subIdx == subLen; } /** * Shifts the bits matched by the given delta. * So if this has the bits (a, b, c, ..., z) set then the result has the bits * ((a - delta), (b - delta), (c - delta), ...., (z - delta)) set. * * @throws IndexOutOfBoundsException if shifting by delta would cause an * overflow or underflow in a 32 bit {@code signed int} range boundary. * Since the end boundaries of ranges are exclusive, even if there is no * range containing {@link Integer#MAX_VALUE}, shifting by a delta of 1 * can cause an overflow. */ public CharRanges shift(int delta) { int n = ranges.length; if (delta == 0 || n == 0) { return this; } // Test overflow/underflow if (delta < 0) { long lmin = ranges[0] + delta; if (lmin < Integer.MIN_VALUE) { throw new IndexOutOfBoundsException(); } } else { long lmax = ranges[n - 1] + delta; if (lmax > Integer.MAX_VALUE) { throw new IndexOutOfBoundsException(); } } // Create a shifted range. int[] shiftedRanges = new int[n]; for (int i = n; --i >= 0;) { shiftedRanges[i] = ranges[i] + delta; } return new CharRanges(shiftedRanges); } @Override public String toString() { StringBuilder sb = new StringBuilder(); sb.append('['); for (int i = 0; i < ranges.length; ++i) { if ((i & 1) != 0 && ranges[i] == ranges[i - 1] + 1) { continue; } if (i != 0) { sb.append((i & 1) == 0 ? ' ' : '-'); } sb.append("0x").append(Integer.toString(ranges[i] - (i & 1), 16)); } sb.append(']'); return sb.toString(); } @Override public boolean equals(Object o) { if (!(o instanceof CharRanges)) { return false; } return Arrays.equals(this.ranges, ((CharRanges) o).ranges); } @Override public int hashCode() { int hc = 0; for (int i = 0, n = min(16, ranges.length); i < n; ++i) { hc = (hc << 2) + ranges[i]; } return hc; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy