All Downloads are FREE. Search and download functionalities are using the official Maven repository.

goog.i18n.graphemebreak.js Maven / Gradle / Ivy

// Copyright 2006 The Closure Library Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS-IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

/**
 * @fileoverview Detect Grapheme Cluster Break in a pair of codepoints. Follows
 * Unicode 5.1 UAX#29. Tailoring for Virama × Indic Consonants is used.
 *
 */

goog.provide('goog.i18n.GraphemeBreak');
goog.require('goog.structs.InversionMap');


/**
 * Enum for all Grapheme Cluster Break properties.
 * These enums directly corresponds to Grapheme_Cluster_Break property values
 * mentioned in http://unicode.org/reports/tr29 table 2. VIRAMA and
 * INDIC_CONSONANT are for the Virama × Base tailoring mentioned in the notes.
 *
 * CR and LF are moved to the bottom of the list because they occur only once
 * and so good candidates to take 2 decimal digit values.
 * @enum {number}
 * @protected
 */
goog.i18n.GraphemeBreak.property = {
  ANY: 0,
  CONTROL: 1,
  EXTEND: 2,
  PREPEND: 3,
  SPACING_MARK: 4,
  INDIC_CONSONANT: 5,
  VIRAMA: 6,
  L: 7,
  V: 8,
  T: 9,
  LV: 10,
  LVT: 11,
  CR: 12,
  LF: 13,
  REGIONAL_INDICATOR: 14
};


/**
 * Grapheme Cluster Break property values for all codepoints as inversion map.
 * Constructed lazily.
 *
 * @type {goog.structs.InversionMap}
 * @private
 */
goog.i18n.GraphemeBreak.inversions_ = null;


/**
 * There are two kinds of grapheme clusters: 1) Legacy 2)Extended. This method
 * is to check for legacy rules.
 *
 * @param {number} prop_a The property enum value of the first character.
 * @param {number} prop_b The property enum value of the second character.
 * @return {boolean} True if a & b do not form a cluster; False otherwise.
 * @private
 */
goog.i18n.GraphemeBreak.applyLegacyBreakRules_ = function(prop_a, prop_b) {

  var prop = goog.i18n.GraphemeBreak.property;

  if (prop_a == prop.CR && prop_b == prop.LF) {
    return false;
  }
  if (prop_a == prop.CONTROL || prop_a == prop.CR || prop_a == prop.LF) {
    return true;
  }
  if (prop_b == prop.CONTROL || prop_b == prop.CR || prop_b == prop.LF) {
    return true;
  }
  if ((prop_a == prop.L) && (prop_b == prop.L || prop_b == prop.V ||
                             prop_b == prop.LV || prop_b == prop.LVT)) {
    return false;
  }
  if ((prop_a == prop.LV || prop_a == prop.V) &&
      (prop_b == prop.V || prop_b == prop.T)) {
    return false;
  }
  if ((prop_a == prop.LVT || prop_a == prop.T) && (prop_b == prop.T)) {
    return false;
  }
  if (prop_b == prop.EXTEND || prop_b == prop.VIRAMA) {
    return false;
  }
  if (prop_a == prop.VIRAMA && prop_b == prop.INDIC_CONSONANT) {
    return false;
  }
  return true;
};


/**
 * Method to return property enum value of the codepoint. If it is Hangul LV or
 * LVT, then it is computed; for the rest it is picked from the inversion map.
 * @param {number} acode The code point value of the character.
 * @return {number} Property enum value of codepoint.
 * @private
 */
goog.i18n.GraphemeBreak.getBreakProp_ = function(acode) {
  if (0xAC00 <= acode && acode <= 0xD7A3) {
    var prop = goog.i18n.GraphemeBreak.property;
    if (acode % 0x1C == 0x10) {
      return prop.LV;
    }
    return prop.LVT;
  } else {
    if (!goog.i18n.GraphemeBreak.inversions_) {
      goog.i18n.GraphemeBreak.inversions_ = new goog.structs.InversionMap(
          [
            0,    10,  1,      2,   1,     18,    95,  33,    13,   1,    594,
            112,  275, 7,      263, 45,    1,     1,   1,     2,    1,    2,
            1,    1,   56,     5,   11,    11,    48,  21,    16,   1,    101,
            7,    1,   1,      6,   2,     2,     1,   4,     33,   1,    1,
            1,    30,  27,     91,  11,    58,    9,   34,    4,    1,    9,
            1,    3,   1,      5,   43,    3,     136, 31,    1,    17,   37,
            1,    1,   1,      1,   3,     8,     4,   1,     2,    1,    7,
            8,    2,   2,      21,  8,     1,     2,   17,    39,   1,    1,
            1,    2,   6,      6,   1,     9,     5,   4,     2,    2,    12,
            2,    15,  2,      1,   17,    39,    2,   3,     12,   4,    8,
            6,    17,  2,      3,   14,    1,     17,  39,    1,    1,    3,
            8,    4,   1,      20,  2,     29,    1,   2,     17,   39,   1,
            1,    2,   1,      6,   6,     9,     6,   4,     2,    2,    13,
            1,    16,  1,      18,  41,    1,     1,   1,     12,   1,    9,
            1,    41,  3,      17,  37,    4,     3,   5,     7,    8,    3,
            2,    8,   2,      30,  2,     17,    39,  1,     1,    1,    1,
            2,    1,   3,      1,   5,     1,     8,   9,     1,    3,    2,
            30,   2,   17,     38,  3,     1,     2,   5,     7,    1,    9,
            1,    10,  2,      30,  2,     22,    48,  5,     1,    2,    6,
            7,    19,  2,      13,  46,    2,     1,   1,     1,    6,    1,
            12,   8,   50,     46,  2,     1,     1,   1,     9,    11,   6,
            14,   2,   58,     2,   27,    1,     1,   1,     1,    1,    4,
            2,    49,  14,     1,   4,     1,     1,   2,     5,    48,   9,
            1,    57,  33,     12,  4,     1,     6,   1,     2,    2,    2,
            1,    16,  2,      4,   2,     2,     4,   3,     1,    3,    2,
            7,    3,   4,      13,  1,     1,     1,   2,     6,    1,    1,
            14,   1,   98,     96,  72,    88,    349, 3,     931,  15,   2,
            1,    14,  15,     2,   1,     14,    15,  2,     15,   15,   14,
            35,   17,  2,      1,   7,     8,     1,   2,     9,    1,    1,
            9,    1,   45,     3,   155,   1,     87,  31,    3,    4,    2,
            9,    1,   6,      3,   20,    19,    29,  44,    9,    3,    2,
            1,    69,  23,     2,   3,     4,     45,  6,     2,    1,    1,
            1,    8,   1,      1,   1,     2,     8,   6,     13,   128,  4,
            1,    14,  33,     1,   1,     5,     1,   1,     5,    1,    1,
            1,    7,   31,     9,   12,    2,     1,   7,     23,   1,    4,
            2,    2,   2,      2,   2,     11,    3,   2,     36,   2,    1,
            1,    2,   3,      1,   1,     3,     2,   12,    36,   8,    8,
            2,    2,   21,     3,   128,   3,     1,   13,    1,    7,    4,
            1,    4,   2,      1,   203,   64,    523, 1,     2,    2,    24,
            7,    49,  16,     96,  33,    3070,  3,   141,   1,    96,   32,
            554,  6,   105,    2,   30164, 4,     1,   10,    33,   1,    80,
            2,    272, 1,      3,   1,     4,     1,   23,    2,    2,    1,
            24,   30,  4,      4,   3,     8,     1,   1,     13,   2,    16,
            34,   16,  1,      27,  18,    24,    24,  4,     8,    2,    23,
            11,   1,   1,      12,  32,    3,     1,   5,     3,    3,    36,
            1,    2,   4,      2,   1,     3,     1,   69,    35,   6,    2,
            2,    2,   2,      12,  1,     8,     1,   1,     18,   16,   1,
            3,    6,   1,      5,   48,    1,     1,   3,     2,    2,    5,
            2,    1,   1,      32,  9,     1,     2,   2,     5,    1,    1,
            201,  14,  2,      1,   1,     9,     8,   2,     1,    2,    1,
            2,    1,   1,      1,   18,    11184, 27,  49,    1028, 1024, 6942,
            1,    737, 16,     16,  7,     216,   1,   158,   2,    89,   3,
            513,  1,   2051,   15,  40,    7,     1,   1472,  1,    1,    1,
            53,   14,  1,      57,  2,     1,     45,  3,     4,    2,    1,
            1,    2,   1,      66,  3,     36,    5,   1,     6,    2,    75,
            2,    1,   48,     3,   9,     1,     1,   1258,  1,    1,    1,
            2,    6,   1,      1,   22681, 62,    4,   25042, 1,    1,    3,
            3,    1,   5,      8,   8,     2,     7,   30,    4,    148,  3,
            8097, 26,  790017, 255
          ],
          [
            1, 13, 1, 12, 1, 0, 1, 0, 1,  0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2,
            0, 2,  0, 1,  0, 2, 0, 2, 0,  2, 0, 2, 1, 0, 2, 0, 2, 0, 2, 0, 1,
            0, 2,  0, 2,  0, 2, 0, 2, 0,  2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 4,
            0, 5,  2, 4,  2, 0, 4, 2, 4,  6, 4, 0, 2, 5, 0, 2, 0, 5, 2, 4, 0,
            5, 2,  0, 2,  4, 2, 4, 6, 0,  2, 5, 0, 2, 0, 5, 0, 2, 4, 0, 5, 2,
            4, 2,  6, 2,  5, 0, 2, 0, 2,  4, 0, 5, 2, 0, 4, 2, 4, 6, 0, 2, 0,
            2, 4,  0, 5,  2, 0, 2, 4, 2,  4, 6, 2, 5, 0, 2, 0, 5, 0, 2, 0, 5,
            2, 4,  2, 4,  6, 0, 2, 0, 4,  0, 5, 0, 2, 4, 2, 6, 2, 5, 0, 2, 0,
            4, 0,  5, 2,  0, 4, 2, 4, 2,  4, 2, 4, 2, 6, 2, 5, 0, 2, 0, 4, 0,
            5, 0,  2, 4,  2, 4, 6, 0, 2,  0, 2, 0, 4, 0, 5, 6, 2, 4, 2, 4, 2,
            4, 0,  5, 0,  2, 0, 4, 2, 6,  0, 2, 0, 5, 0, 2, 0, 4, 2, 0, 2, 0,
            5, 0,  2, 0,  2, 0, 2, 0, 2,  0, 4, 5, 2, 4, 2, 6, 0, 2, 0, 2, 0,
            2, 0,  5, 0,  2, 4, 2, 0, 6,  4, 2, 5, 0, 5, 0, 4, 2, 5, 2, 5, 0,
            5, 0,  5, 2,  5, 2, 0, 4, 2,  0, 2, 5, 0, 2, 0, 7, 8, 9, 0, 2, 0,
            5, 2,  6, 0,  5, 2, 6, 0, 5,  2, 0, 5, 2, 5, 0, 2, 4, 2, 4, 2, 4,
            2, 6,  2, 0,  2, 0, 2, 0, 2,  0, 5, 2, 4, 2, 4, 2, 4, 2, 0, 5, 0,
            5, 0,  4, 0,  4, 0, 5, 2, 4,  0, 5, 0, 5, 4, 2, 4, 2, 6, 0, 2, 0,
            2, 4,  2, 0,  2, 4, 0, 5, 2,  4, 2, 4, 2, 4, 2, 4, 6, 5, 0, 2, 0,
            2, 4,  0, 5,  4, 2, 4, 2, 6,  4, 5, 0, 5, 0, 5, 0, 2, 4, 2, 4, 2,
            4, 2,  6, 0,  5, 4, 2, 4, 2,  0, 5, 0, 2, 0, 2, 4, 2, 0, 2, 0, 4,
            2, 0,  2, 0,  1, 2, 1, 0, 1,  0, 1, 0, 2, 0, 2, 0, 6, 0, 2, 0, 2,
            0, 2,  0, 2,  0, 2, 0, 2, 0,  2, 0, 2, 0, 6, 5, 2, 5, 4, 2, 4, 0,
            5, 0,  5, 0,  5, 0, 5, 0, 4,  0, 5, 4, 6, 0, 2, 0, 5, 0, 2, 0, 5,
            2, 4,  6, 0,  7, 2, 4, 0, 5,  0, 5, 2, 4, 2, 4, 2, 4, 6, 0, 5, 2,
            4, 2,  4, 2,  0, 2, 0, 2, 4,  0, 5, 0, 5, 0, 5, 0, 5, 2, 0, 2, 0,
            2, 0,  2, 0,  2, 0, 5, 4, 2,  4, 0, 4, 6, 0, 5, 0, 5, 0, 5, 0, 4,
            2, 4,  2, 4,  0, 4, 6, 0, 11, 8, 9, 0, 2, 0, 2, 0, 2, 0, 2, 0, 1,
            0, 2,  0, 1,  0, 2, 0, 2, 0,  2, 6, 0, 4, 2, 4, 0, 2, 6, 0, 2, 4,
            0, 4,  2, 4,  6, 2, 0, 1, 0,  2, 0, 2, 4, 2, 6, 0, 2, 4, 0, 4, 2,
            4, 6,  0, 2,  4, 2, 4, 2, 6,  2, 0, 4, 2, 0, 2, 4, 2, 0, 4, 2, 1,
            2, 0,  2, 0,  2, 0, 2, 0, 14, 0, 1, 2
          ],
          true);
    }
    return /** @type {number} */ (
        goog.i18n.GraphemeBreak.inversions_.at(acode));
  }
};


/**
 * There are two kinds of grapheme clusters: 1) Legacy 2)Extended. This method
 * is to check for both using a boolean flag to switch between them.
 * @param {number} a The code point value of the first character.
 * @param {number} b The code point value of the second character.
 * @param {boolean=} opt_extended If true, indicates extended grapheme cluster;
 *     If false, indicates legacy cluster.
 * @return {boolean} True if a & b do not form a cluster; False otherwise.
 */
goog.i18n.GraphemeBreak.hasGraphemeBreak = function(a, b, opt_extended) {

  var prop_a = goog.i18n.GraphemeBreak.getBreakProp_(a);
  var prop_b = goog.i18n.GraphemeBreak.getBreakProp_(b);
  var prop = goog.i18n.GraphemeBreak.property;

  return goog.i18n.GraphemeBreak.applyLegacyBreakRules_(prop_a, prop_b) &&
      !(opt_extended &&
        (prop_a == prop.PREPEND || prop_b == prop.SPACING_MARK));
};




© 2015 - 2025 Weber Informatics LLC | Privacy Policy