All Downloads are FREE. Search and download functionalities are using the official Maven repository.

goog.string.string.js Maven / Gradle / Ivy

Go to download

The Google Closure Library is a collection of JavaScript code designed for use with the Google Closure JavaScript Compiler. This non-official distribution was prepared by the ClojureScript team at http://clojure.org/

There is a newer version: 0.0-20230227-c7c0a541
Show newest version
// Copyright 2006 The Closure Library Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS-IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

/**
 * @fileoverview Utilities for string manipulation.
 * @author [email protected] (Erik Arvidsson)
 */


/**
 * Namespace for string utilities
 */
goog.provide('goog.string');
goog.provide('goog.string.Unicode');


/**
 * @define {boolean} Enables HTML escaping of lowercase letter "e" which helps
 * with detection of double-escaping as this letter is frequently used.
 */
goog.define('goog.string.DETECT_DOUBLE_ESCAPING', false);


/**
 * @define {boolean} Whether to force non-dom html unescaping.
 */
goog.define('goog.string.FORCE_NON_DOM_HTML_UNESCAPING', false);


/**
 * Common Unicode string characters.
 * @enum {string}
 */
goog.string.Unicode = {
  NBSP: '\xa0'
};


/**
 * Fast prefix-checker.
 * @param {string} str The string to check.
 * @param {string} prefix A string to look for at the start of {@code str}.
 * @return {boolean} True if {@code str} begins with {@code prefix}.
 */
goog.string.startsWith = function(str, prefix) {
  return str.lastIndexOf(prefix, 0) == 0;
};


/**
 * Fast suffix-checker.
 * @param {string} str The string to check.
 * @param {string} suffix A string to look for at the end of {@code str}.
 * @return {boolean} True if {@code str} ends with {@code suffix}.
 */
goog.string.endsWith = function(str, suffix) {
  var l = str.length - suffix.length;
  return l >= 0 && str.indexOf(suffix, l) == l;
};


/**
 * Case-insensitive prefix-checker.
 * @param {string} str The string to check.
 * @param {string} prefix  A string to look for at the end of {@code str}.
 * @return {boolean} True if {@code str} begins with {@code prefix} (ignoring
 *     case).
 */
goog.string.caseInsensitiveStartsWith = function(str, prefix) {
  return goog.string.caseInsensitiveCompare(
             prefix, str.substr(0, prefix.length)) == 0;
};


/**
 * Case-insensitive suffix-checker.
 * @param {string} str The string to check.
 * @param {string} suffix A string to look for at the end of {@code str}.
 * @return {boolean} True if {@code str} ends with {@code suffix} (ignoring
 *     case).
 */
goog.string.caseInsensitiveEndsWith = function(str, suffix) {
  return goog.string.caseInsensitiveCompare(
             suffix, str.substr(str.length - suffix.length, suffix.length)) ==
      0;
};


/**
 * Case-insensitive equality checker.
 * @param {string} str1 First string to check.
 * @param {string} str2 Second string to check.
 * @return {boolean} True if {@code str1} and {@code str2} are the same string,
 *     ignoring case.
 */
goog.string.caseInsensitiveEquals = function(str1, str2) {
  return str1.toLowerCase() == str2.toLowerCase();
};


/**
 * Does simple python-style string substitution.
 * subs("foo%s hot%s", "bar", "dog") becomes "foobar hotdog".
 * @param {string} str The string containing the pattern.
 * @param {...*} var_args The items to substitute into the pattern.
 * @return {string} A copy of {@code str} in which each occurrence of
 *     {@code %s} has been replaced an argument from {@code var_args}.
 */
goog.string.subs = function(str, var_args) {
  var splitParts = str.split('%s');
  var returnString = '';

  var subsArguments = Array.prototype.slice.call(arguments, 1);
  while (subsArguments.length &&
         // Replace up to the last split part. We are inserting in the
         // positions between split parts.
         splitParts.length > 1) {
    returnString += splitParts.shift() + subsArguments.shift();
  }

  return returnString + splitParts.join('%s');  // Join unused '%s'
};


/**
 * Converts multiple whitespace chars (spaces, non-breaking-spaces, new lines
 * and tabs) to a single space, and strips leading and trailing whitespace.
 * @param {string} str Input string.
 * @return {string} A copy of {@code str} with collapsed whitespace.
 */
goog.string.collapseWhitespace = function(str) {
  // Since IE doesn't include non-breaking-space (0xa0) in their \s character
  // class (as required by section 7.2 of the ECMAScript spec), we explicitly
  // include it in the regexp to enforce consistent cross-browser behavior.
  return str.replace(/[\s\xa0]+/g, ' ').replace(/^\s+|\s+$/g, '');
};


/**
 * Checks if a string is empty or contains only whitespaces.
 * @param {string} str The string to check.
 * @return {boolean} Whether {@code str} is empty or whitespace only.
 */
goog.string.isEmptyOrWhitespace = function(str) {
  // testing length == 0 first is actually slower in all browsers (about the
  // same in Opera).
  // Since IE doesn't include non-breaking-space (0xa0) in their \s character
  // class (as required by section 7.2 of the ECMAScript spec), we explicitly
  // include it in the regexp to enforce consistent cross-browser behavior.
  return /^[\s\xa0]*$/.test(str);
};


/**
 * Checks if a string is empty.
 * @param {string} str The string to check.
 * @return {boolean} Whether {@code str} is empty.
 */
goog.string.isEmptyString = function(str) {
  return str.length == 0;
};


/**
 * Checks if a string is empty or contains only whitespaces.
 *
 * TODO(user): Deprecate this when clients have been switched over to
 * goog.string.isEmptyOrWhitespace.
 *
 * @param {string} str The string to check.
 * @return {boolean} Whether {@code str} is empty or whitespace only.
 */
goog.string.isEmpty = goog.string.isEmptyOrWhitespace;


/**
 * Checks if a string is null, undefined, empty or contains only whitespaces.
 * @param {*} str The string to check.
 * @return {boolean} Whether {@code str} is null, undefined, empty, or
 *     whitespace only.
 * @deprecated Use goog.string.isEmptyOrWhitespace(goog.string.makeSafe(str))
 *     instead.
 */
goog.string.isEmptyOrWhitespaceSafe = function(str) {
  return goog.string.isEmptyOrWhitespace(goog.string.makeSafe(str));
};


/**
 * Checks if a string is null, undefined, empty or contains only whitespaces.
 *
 * TODO(user): Deprecate this when clients have been switched over to
 * goog.string.isEmptyOrWhitespaceSafe.
 *
 * @param {*} str The string to check.
 * @return {boolean} Whether {@code str} is null, undefined, empty, or
 *     whitespace only.
 */
goog.string.isEmptySafe = goog.string.isEmptyOrWhitespaceSafe;


/**
 * Checks if a string is all breaking whitespace.
 * @param {string} str The string to check.
 * @return {boolean} Whether the string is all breaking whitespace.
 */
goog.string.isBreakingWhitespace = function(str) {
  return !/[^\t\n\r ]/.test(str);
};


/**
 * Checks if a string contains all letters.
 * @param {string} str string to check.
 * @return {boolean} True if {@code str} consists entirely of letters.
 */
goog.string.isAlpha = function(str) {
  return !/[^a-zA-Z]/.test(str);
};


/**
 * Checks if a string contains only numbers.
 * @param {*} str string to check. If not a string, it will be
 *     casted to one.
 * @return {boolean} True if {@code str} is numeric.
 */
goog.string.isNumeric = function(str) {
  return !/[^0-9]/.test(str);
};


/**
 * Checks if a string contains only numbers or letters.
 * @param {string} str string to check.
 * @return {boolean} True if {@code str} is alphanumeric.
 */
goog.string.isAlphaNumeric = function(str) {
  return !/[^a-zA-Z0-9]/.test(str);
};


/**
 * Checks if a character is a space character.
 * @param {string} ch Character to check.
 * @return {boolean} True if {@code ch} is a space.
 */
goog.string.isSpace = function(ch) {
  return ch == ' ';
};


/**
 * Checks if a character is a valid unicode character.
 * @param {string} ch Character to check.
 * @return {boolean} True if {@code ch} is a valid unicode character.
 */
goog.string.isUnicodeChar = function(ch) {
  return ch.length == 1 && ch >= ' ' && ch <= '~' ||
      ch >= '\u0080' && ch <= '\uFFFD';
};


/**
 * Takes a string and replaces newlines with a space. Multiple lines are
 * replaced with a single space.
 * @param {string} str The string from which to strip newlines.
 * @return {string} A copy of {@code str} stripped of newlines.
 */
goog.string.stripNewlines = function(str) {
  return str.replace(/(\r\n|\r|\n)+/g, ' ');
};


/**
 * Replaces Windows and Mac new lines with unix style: \r or \r\n with \n.
 * @param {string} str The string to in which to canonicalize newlines.
 * @return {string} {@code str} A copy of {@code} with canonicalized newlines.
 */
goog.string.canonicalizeNewlines = function(str) {
  return str.replace(/(\r\n|\r|\n)/g, '\n');
};


/**
 * Normalizes whitespace in a string, replacing all whitespace chars with
 * a space.
 * @param {string} str The string in which to normalize whitespace.
 * @return {string} A copy of {@code str} with all whitespace normalized.
 */
goog.string.normalizeWhitespace = function(str) {
  return str.replace(/\xa0|\s/g, ' ');
};


/**
 * Normalizes spaces in a string, replacing all consecutive spaces and tabs
 * with a single space. Replaces non-breaking space with a space.
 * @param {string} str The string in which to normalize spaces.
 * @return {string} A copy of {@code str} with all consecutive spaces and tabs
 *    replaced with a single space.
 */
goog.string.normalizeSpaces = function(str) {
  return str.replace(/\xa0|[ \t]+/g, ' ');
};


/**
 * Removes the breaking spaces from the left and right of the string and
 * collapses the sequences of breaking spaces in the middle into single spaces.
 * The original and the result strings render the same way in HTML.
 * @param {string} str A string in which to collapse spaces.
 * @return {string} Copy of the string with normalized breaking spaces.
 */
goog.string.collapseBreakingSpaces = function(str) {
  return str.replace(/[\t\r\n ]+/g, ' ')
      .replace(/^[\t\r\n ]+|[\t\r\n ]+$/g, '');
};


/**
 * Trims white spaces to the left and right of a string.
 * @param {string} str The string to trim.
 * @return {string} A trimmed copy of {@code str}.
 */
goog.string.trim =
    (goog.TRUSTED_SITE && String.prototype.trim) ? function(str) {
      return str.trim();
    } : function(str) {
      // Since IE doesn't include non-breaking-space (0xa0) in their \s
      // character class (as required by section 7.2 of the ECMAScript spec),
      // we explicitly include it in the regexp to enforce consistent
      // cross-browser behavior.
      return str.replace(/^[\s\xa0]+|[\s\xa0]+$/g, '');
    };


/**
 * Trims whitespaces at the left end of a string.
 * @param {string} str The string to left trim.
 * @return {string} A trimmed copy of {@code str}.
 */
goog.string.trimLeft = function(str) {
  // Since IE doesn't include non-breaking-space (0xa0) in their \s character
  // class (as required by section 7.2 of the ECMAScript spec), we explicitly
  // include it in the regexp to enforce consistent cross-browser behavior.
  return str.replace(/^[\s\xa0]+/, '');
};


/**
 * Trims whitespaces at the right end of a string.
 * @param {string} str The string to right trim.
 * @return {string} A trimmed copy of {@code str}.
 */
goog.string.trimRight = function(str) {
  // Since IE doesn't include non-breaking-space (0xa0) in their \s character
  // class (as required by section 7.2 of the ECMAScript spec), we explicitly
  // include it in the regexp to enforce consistent cross-browser behavior.
  return str.replace(/[\s\xa0]+$/, '');
};


/**
 * A string comparator that ignores case.
 * -1 = str1 less than str2
 *  0 = str1 equals str2
 *  1 = str1 greater than str2
 *
 * @param {string} str1 The string to compare.
 * @param {string} str2 The string to compare {@code str1} to.
 * @return {number} The comparator result, as described above.
 */
goog.string.caseInsensitiveCompare = function(str1, str2) {
  var test1 = String(str1).toLowerCase();
  var test2 = String(str2).toLowerCase();

  if (test1 < test2) {
    return -1;
  } else if (test1 == test2) {
    return 0;
  } else {
    return 1;
  }
};


/**
 * Compares two strings interpreting their numeric substrings as numbers.
 *
 * @param {string} str1 First string.
 * @param {string} str2 Second string.
 * @param {!RegExp} tokenizerRegExp Splits a string into substrings of
 *     non-negative integers, non-numeric characters and optionally fractional
 *     numbers starting with a decimal point.
 * @return {number} Negative if str1 < str2, 0 is str1 == str2, positive if
 *     str1 > str2.
 * @private
 */
goog.string.numberAwareCompare_ = function(str1, str2, tokenizerRegExp) {
  if (str1 == str2) {
    return 0;
  }
  if (!str1) {
    return -1;
  }
  if (!str2) {
    return 1;
  }

  // Using match to split the entire string ahead of time turns out to be faster
  // for most inputs than using RegExp.exec or iterating over each character.
  var tokens1 = str1.toLowerCase().match(tokenizerRegExp);
  var tokens2 = str2.toLowerCase().match(tokenizerRegExp);

  var count = Math.min(tokens1.length, tokens2.length);

  for (var i = 0; i < count; i++) {
    var a = tokens1[i];
    var b = tokens2[i];

    // Compare pairs of tokens, returning if one token sorts before the other.
    if (a != b) {
      // Only if both tokens are integers is a special comparison required.
      // Decimal numbers are sorted as strings (e.g., '.09' < '.1').
      var num1 = parseInt(a, 10);
      if (!isNaN(num1)) {
        var num2 = parseInt(b, 10);
        if (!isNaN(num2) && num1 - num2) {
          return num1 - num2;
        }
      }
      return a < b ? -1 : 1;
    }
  }

  // If one string is a substring of the other, the shorter string sorts first.
  if (tokens1.length != tokens2.length) {
    return tokens1.length - tokens2.length;
  }

  // The two strings must be equivalent except for case (perfect equality is
  // tested at the head of the function.) Revert to default ASCII string
  // comparison to stabilize the sort.
  return str1 < str2 ? -1 : 1;
};


/**
 * String comparison function that handles non-negative integer numbers in a
 * way humans might expect. Using this function, the string 'File 2.jpg' sorts
 * before 'File 10.jpg', and 'Version 1.9' before 'Version 1.10'. The comparison
 * is mostly case-insensitive, though strings that are identical except for case
 * are sorted with the upper-case strings before lower-case.
 *
 * This comparison function is up to 50x slower than either the default or the
 * case-insensitive compare. It should not be used in time-critical code, but
 * should be fast enough to sort several hundred short strings (like filenames)
 * with a reasonable delay.
 *
 * @param {string} str1 The string to compare in a numerically sensitive way.
 * @param {string} str2 The string to compare {@code str1} to.
 * @return {number} less than 0 if str1 < str2, 0 if str1 == str2, greater than
 *     0 if str1 > str2.
 */
goog.string.intAwareCompare = function(str1, str2) {
  return goog.string.numberAwareCompare_(str1, str2, /\d+|\D+/g);
};


/**
 * String comparison function that handles non-negative integer and fractional
 * numbers in a way humans might expect. Using this function, the string
 * 'File 2.jpg' sorts before 'File 10.jpg', and '3.14' before '3.2'. Equivalent
 * to {@link goog.string.intAwareCompare} apart from the way how it interprets
 * dots.
 *
 * @param {string} str1 The string to compare in a numerically sensitive way.
 * @param {string} str2 The string to compare {@code str1} to.
 * @return {number} less than 0 if str1 < str2, 0 if str1 == str2, greater than
 *     0 if str1 > str2.
 */
goog.string.floatAwareCompare = function(str1, str2) {
  return goog.string.numberAwareCompare_(str1, str2, /\d+|\.\d+|\D+/g);
};


/**
 * Alias for {@link goog.string.floatAwareCompare}.
 *
 * @param {string} str1
 * @param {string} str2
 * @return {number}
 */
goog.string.numerateCompare = goog.string.floatAwareCompare;


/**
 * URL-encodes a string
 * @param {*} str The string to url-encode.
 * @return {string} An encoded copy of {@code str} that is safe for urls.
 *     Note that '#', ':', and other characters used to delimit portions
 *     of URLs *will* be encoded.
 */
goog.string.urlEncode = function(str) {
  return encodeURIComponent(String(str));
};


/**
 * URL-decodes the string. We need to specially handle '+'s because
 * the javascript library doesn't convert them to spaces.
 * @param {string} str The string to url decode.
 * @return {string} The decoded {@code str}.
 */
goog.string.urlDecode = function(str) {
  return decodeURIComponent(str.replace(/\+/g, ' '));
};


/**
 * Converts \n to 
s or
s. * @param {string} str The string in which to convert newlines. * @param {boolean=} opt_xml Whether to use XML compatible tags. * @return {string} A copy of {@code str} with converted newlines. */ goog.string.newLineToBr = function(str, opt_xml) { return str.replace(/(\r\n|\r|\n)/g, opt_xml ? '
' : '
'); }; /** * Escapes double quote '"' and single quote '\'' characters in addition to * '&', '<', and '>' so that a string can be included in an HTML tag attribute * value within double or single quotes. * * It should be noted that > doesn't need to be escaped for the HTML or XML to * be valid, but it has been decided to escape it for consistency with other * implementations. * * With goog.string.DETECT_DOUBLE_ESCAPING, this function escapes also the * lowercase letter "e". * * NOTE(user): * HtmlEscape is often called during the generation of large blocks of HTML. * Using statics for the regular expressions and strings is an optimization * that can more than half the amount of time IE spends in this function for * large apps, since strings and regexes both contribute to GC allocations. * * Testing for the presence of a character before escaping increases the number * of function calls, but actually provides a speed increase for the average * case -- since the average case often doesn't require the escaping of all 4 * characters and indexOf() is much cheaper than replace(). * The worst case does suffer slightly from the additional calls, therefore the * opt_isLikelyToContainHtmlChars option has been included for situations * where all 4 HTML entities are very likely to be present and need escaping. * * Some benchmarks (times tended to fluctuate +-0.05ms): * FireFox IE6 * (no chars / average (mix of cases) / all 4 chars) * no checks 0.13 / 0.22 / 0.22 0.23 / 0.53 / 0.80 * indexOf 0.08 / 0.17 / 0.26 0.22 / 0.54 / 0.84 * indexOf + re test 0.07 / 0.17 / 0.28 0.19 / 0.50 / 0.85 * * An additional advantage of checking if replace actually needs to be called * is a reduction in the number of object allocations, so as the size of the * application grows the difference between the various methods would increase. * * @param {string} str string to be escaped. * @param {boolean=} opt_isLikelyToContainHtmlChars Don't perform a check to see * if the character needs replacing - use this option if you expect each of * the characters to appear often. Leave false if you expect few html * characters to occur in your strings, such as if you are escaping HTML. * @return {string} An escaped copy of {@code str}. */ goog.string.htmlEscape = function(str, opt_isLikelyToContainHtmlChars) { if (opt_isLikelyToContainHtmlChars) { str = str.replace(goog.string.AMP_RE_, '&') .replace(goog.string.LT_RE_, '<') .replace(goog.string.GT_RE_, '>') .replace(goog.string.QUOT_RE_, '"') .replace(goog.string.SINGLE_QUOTE_RE_, ''') .replace(goog.string.NULL_RE_, '�'); if (goog.string.DETECT_DOUBLE_ESCAPING) { str = str.replace(goog.string.E_RE_, 'e'); } return str; } else { // quick test helps in the case when there are no chars to replace, in // worst case this makes barely a difference to the time taken if (!goog.string.ALL_RE_.test(str)) return str; // str.indexOf is faster than regex.test in this case if (str.indexOf('&') != -1) { str = str.replace(goog.string.AMP_RE_, '&'); } if (str.indexOf('<') != -1) { str = str.replace(goog.string.LT_RE_, '<'); } if (str.indexOf('>') != -1) { str = str.replace(goog.string.GT_RE_, '>'); } if (str.indexOf('"') != -1) { str = str.replace(goog.string.QUOT_RE_, '"'); } if (str.indexOf('\'') != -1) { str = str.replace(goog.string.SINGLE_QUOTE_RE_, '''); } if (str.indexOf('\x00') != -1) { str = str.replace(goog.string.NULL_RE_, '�'); } if (goog.string.DETECT_DOUBLE_ESCAPING && str.indexOf('e') != -1) { str = str.replace(goog.string.E_RE_, 'e'); } return str; } }; /** * Regular expression that matches an ampersand, for use in escaping. * @const {!RegExp} * @private */ goog.string.AMP_RE_ = /&/g; /** * Regular expression that matches a less than sign, for use in escaping. * @const {!RegExp} * @private */ goog.string.LT_RE_ = //g; /** * Regular expression that matches a double quote, for use in escaping. * @const {!RegExp} * @private */ goog.string.QUOT_RE_ = /"/g; /** * Regular expression that matches a single quote, for use in escaping. * @const {!RegExp} * @private */ goog.string.SINGLE_QUOTE_RE_ = /'/g; /** * Regular expression that matches null character, for use in escaping. * @const {!RegExp} * @private */ goog.string.NULL_RE_ = /\x00/g; /** * Regular expression that matches a lowercase letter "e", for use in escaping. * @const {!RegExp} * @private */ goog.string.E_RE_ = /e/g; /** * Regular expression that matches any character that needs to be escaped. * @const {!RegExp} * @private */ goog.string.ALL_RE_ = (goog.string.DETECT_DOUBLE_ESCAPING ? /[\x00&<>"'e]/ : /[\x00&<>"']/); /** * Unescapes an HTML string. * * @param {string} str The string to unescape. * @return {string} An unescaped copy of {@code str}. */ goog.string.unescapeEntities = function(str) { if (goog.string.contains(str, '&')) { // We are careful not to use a DOM if we do not have one or we explicitly // requested non-DOM html unescaping. if (!goog.string.FORCE_NON_DOM_HTML_UNESCAPING && 'document' in goog.global) { return goog.string.unescapeEntitiesUsingDom_(str); } else { // Fall back on pure XML entities return goog.string.unescapePureXmlEntities_(str); } } return str; }; /** * Unescapes a HTML string using the provided document. * * @param {string} str The string to unescape. * @param {!Document} document A document to use in escaping the string. * @return {string} An unescaped copy of {@code str}. */ goog.string.unescapeEntitiesWithDocument = function(str, document) { if (goog.string.contains(str, '&')) { return goog.string.unescapeEntitiesUsingDom_(str, document); } return str; }; /** * Unescapes an HTML string using a DOM to resolve non-XML, non-numeric * entities. This function is XSS-safe and whitespace-preserving. * @private * @param {string} str The string to unescape. * @param {Document=} opt_document An optional document to use for creating * elements. If this is not specified then the default window.document * will be used. * @return {string} The unescaped {@code str} string. */ goog.string.unescapeEntitiesUsingDom_ = function(str, opt_document) { /** @type {!Object} */ var seen = {'&': '&', '<': '<', '>': '>', '"': '"'}; var div; if (opt_document) { div = opt_document.createElement('div'); } else { div = goog.global.document.createElement('div'); } // Match as many valid entity characters as possible. If the actual entity // happens to be shorter, it will still work as innerHTML will return the // trailing characters unchanged. Since the entity characters do not include // open angle bracket, there is no chance of XSS from the innerHTML use. // Since no whitespace is passed to innerHTML, whitespace is preserved. return str.replace(goog.string.HTML_ENTITY_PATTERN_, function(s, entity) { // Check for cached entity. var value = seen[s]; if (value) { return value; } // Check for numeric entity. if (entity.charAt(0) == '#') { // Prefix with 0 so that hex entities (e.g. ) parse as hex numbers. var n = Number('0' + entity.substr(1)); if (!isNaN(n)) { value = String.fromCharCode(n); } } // Fall back to innerHTML otherwise. if (!value) { // Append a non-entity character to avoid a bug in Webkit that parses // an invalid entity at the end of innerHTML text as the empty string. div.innerHTML = s + ' '; // Then remove the trailing character from the result. value = div.firstChild.nodeValue.slice(0, -1); } // Cache and return. return seen[s] = value; }); }; /** * Unescapes XML entities. * @private * @param {string} str The string to unescape. * @return {string} An unescaped copy of {@code str}. */ goog.string.unescapePureXmlEntities_ = function(str) { return str.replace(/&([^;]+);/g, function(s, entity) { switch (entity) { case 'amp': return '&'; case 'lt': return '<'; case 'gt': return '>'; case 'quot': return '"'; default: if (entity.charAt(0) == '#') { // Prefix with 0 so that hex entities (e.g. ) parse as hex. var n = Number('0' + entity.substr(1)); if (!isNaN(n)) { return String.fromCharCode(n); } } // For invalid entities we just return the entity return s; } }); }; /** * Regular expression that matches an HTML entity. * See also HTML5: Tokenization / Tokenizing character references. * @private * @type {!RegExp} */ goog.string.HTML_ENTITY_PATTERN_ = /&([^;\s<&]+);?/g; /** * Do escaping of whitespace to preserve spatial formatting. We use character * entity #160 to make it safer for xml. * @param {string} str The string in which to escape whitespace. * @param {boolean=} opt_xml Whether to use XML compatible tags. * @return {string} An escaped copy of {@code str}. */ goog.string.whitespaceEscape = function(str, opt_xml) { // This doesn't use goog.string.preserveSpaces for backwards compatibility. return goog.string.newLineToBr(str.replace(/ /g, '  '), opt_xml); }; /** * Preserve spaces that would be otherwise collapsed in HTML by replacing them * with non-breaking space Unicode characters. * @param {string} str The string in which to preserve whitespace. * @return {string} A copy of {@code str} with preserved whitespace. */ goog.string.preserveSpaces = function(str) { return str.replace(/(^|[\n ]) /g, '$1' + goog.string.Unicode.NBSP); }; /** * Strip quote characters around a string. The second argument is a string of * characters to treat as quotes. This can be a single character or a string of * multiple character and in that case each of those are treated as possible * quote characters. For example: * *
 * goog.string.stripQuotes('"abc"', '"`') --> 'abc'
 * goog.string.stripQuotes('`abc`', '"`') --> 'abc'
 * 
* * @param {string} str The string to strip. * @param {string} quoteChars The quote characters to strip. * @return {string} A copy of {@code str} without the quotes. */ goog.string.stripQuotes = function(str, quoteChars) { var length = quoteChars.length; for (var i = 0; i < length; i++) { var quoteChar = length == 1 ? quoteChars : quoteChars.charAt(i); if (str.charAt(0) == quoteChar && str.charAt(str.length - 1) == quoteChar) { return str.substring(1, str.length - 1); } } return str; }; /** * Truncates a string to a certain length and adds '...' if necessary. The * length also accounts for the ellipsis, so a maximum length of 10 and a string * 'Hello World!' produces 'Hello W...'. * @param {string} str The string to truncate. * @param {number} chars Max number of characters. * @param {boolean=} opt_protectEscapedCharacters Whether to protect escaped * characters from being cut off in the middle. * @return {string} The truncated {@code str} string. */ goog.string.truncate = function(str, chars, opt_protectEscapedCharacters) { if (opt_protectEscapedCharacters) { str = goog.string.unescapeEntities(str); } if (str.length > chars) { str = str.substring(0, chars - 3) + '...'; } if (opt_protectEscapedCharacters) { str = goog.string.htmlEscape(str); } return str; }; /** * Truncate a string in the middle, adding "..." if necessary, * and favoring the beginning of the string. * @param {string} str The string to truncate the middle of. * @param {number} chars Max number of characters. * @param {boolean=} opt_protectEscapedCharacters Whether to protect escaped * characters from being cutoff in the middle. * @param {number=} opt_trailingChars Optional number of trailing characters to * leave at the end of the string, instead of truncating as close to the * middle as possible. * @return {string} A truncated copy of {@code str}. */ goog.string.truncateMiddle = function( str, chars, opt_protectEscapedCharacters, opt_trailingChars) { if (opt_protectEscapedCharacters) { str = goog.string.unescapeEntities(str); } if (opt_trailingChars && str.length > chars) { if (opt_trailingChars > chars) { opt_trailingChars = chars; } var endPoint = str.length - opt_trailingChars; var startPoint = chars - opt_trailingChars; str = str.substring(0, startPoint) + '...' + str.substring(endPoint); } else if (str.length > chars) { // Favor the beginning of the string: var half = Math.floor(chars / 2); var endPos = str.length - half; half += chars % 2; str = str.substring(0, half) + '...' + str.substring(endPos); } if (opt_protectEscapedCharacters) { str = goog.string.htmlEscape(str); } return str; }; /** * Special chars that need to be escaped for goog.string.quote. * @private {!Object} */ goog.string.specialEscapeChars_ = { '\0': '\\0', '\b': '\\b', '\f': '\\f', '\n': '\\n', '\r': '\\r', '\t': '\\t', '\x0B': '\\x0B', // '\v' is not supported in JScript '"': '\\"', '\\': '\\\\', // To support the use case of embedding quoted strings inside of script // tags, we have to make sure HTML comments and opening/closing script tags do // not appear in the resulting string. The specific strings that must be // escaped are documented at: // http://www.w3.org/TR/html51/semantics.html#restrictions-for-contents-of-script-elements '<': '\x3c' }; /** * Character mappings used internally for goog.string.escapeChar. * @private {!Object} */ goog.string.jsEscapeCache_ = { '\'': '\\\'' }; /** * Encloses a string in double quotes and escapes characters so that the * string is a valid JS string. The resulting string is safe to embed in * `