com.squarespace.less.core.EncodeUtils Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of less-compiler Show documentation
Squarespace LESS compiler
The newest version!
/**
 * Copyright (c) 2014 SQUARESPACE, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.squarespace.less.core;

import static com.squarespace.less.core.CharClass.CLASSIFIER;
import static com.squarespace.less.core.CharClass.DIGIT;
import static com.squarespace.less.core.CharClass.ENCODE_URI;
import static com.squarespace.less.core.CharClass.ENCODE_URI_COMPONENT;
import static com.squarespace.less.core.CharClass.LOWERCASE;
import static com.squarespace.less.core.CharClass.UPPERCASE;


/**
 * Implements the encodeURI and encodeURIComponent functions, which are available
 * as built-ins for JavaScript, but which no efficient version could be found for
 * Java.
 *
 * Straightforward implementation following the V8 engine, with one exception:
 * this avoids raising an error for invalid Unicode character sequences, choosing
 * instead to ignore and skip over them.
 */
public class EncodeUtils {

  /**
   * Defines the character classes that are transformed by the
   * {@link #encodeURI(String)} method.
   */
  private static final int ENCODE_URI_CLASS = ENCODE_URI | LOWERCASE | UPPERCASE | DIGIT;

  /**
   * Defines the character classes that are transformed by the
   * {@link #encodeURIComponent(String)} method.
   */
  private static final int ENCODE_URI_COMPONENT_CLASS = ENCODE_URI_COMPONENT | LOWERCASE | UPPERCASE | DIGIT;

  private EncodeUtils() {
  }

  /**
   * Interface for testing a character's membership in a class.
   */
  private interface CharTest {
    boolean member(char ch);
  }

  /**
   * Tests character membership for {@link #encodeURI(String)}
   */
  private static final CharTest ENCODE_URI_TEST = new CharTest() {
    @Override
    public boolean member(char ch) {
      return CLASSIFIER.isMember(ch, ENCODE_URI_CLASS);
    }
  };

  /**
   * Tests character membership for {@link #encodeURIComponent(String)}
   */
  private static final CharTest ENCODE_URI_COMPONENT_TEST = new CharTest() {
    @Override
    public boolean member(char ch) {
      return CLASSIFIER.isMember(ch, ENCODE_URI_COMPONENT_CLASS);
    }
  };

  /**
   * Tests character membership for {@link #escape(String)}
   */
  private static final CharTest ESCAPE_TEST = new CharTest() {
    @Override
    public boolean member(char ch) {
      return CLASSIFIER.isMember(ch, ENCODE_URI_CLASS) && !CLASSIFIER.isMember(ch, CharClass.ESCAPE);
    }
  };

  /**
   * Implementation of JavaScript's encodeURI function.
   */
  public static String encodeURI(String uri) {
    return encodeChars(uri, ENCODE_URI_TEST);
  }

  /**
   * Implementation of JavaScript's encodeURIComponent function.
   */
  public static String encodeURIComponent(String uri) {
    return encodeChars(uri, ENCODE_URI_COMPONENT_TEST);
  }

  /**
   * Extends encodeURI with further character escapes.
   */
  public static String escape(String uri) {
    return encodeChars(uri, ESCAPE_TEST);
  }

  /**
   * Emits UTF-8 hex escape sequences for all characters which are not members of
   * the given character class. It ignores bad Unicode sequences.
   */
  private static String encodeChars(String uri, CharTest charTest) {
    StringBuilder buf = new StringBuilder();
    int size = uri.length();
    int i = 0;
    while (i < size) {
      char ch = uri.charAt(i);
      if (charTest.member(ch)) {
        buf.append(ch);
        i++;
        continue;

      } else if (ch <= 0x7F) {
        hexoctet(buf, ch);
        i++;
        continue;

      } else if (ch >= 0xDC00 && ch <= 0xDFFF) {
        // skip
        i++;
        continue;

      } else if (ch < 0xD800 || ch > 0xDBFF) {
        // Unicode single char
        // Emit UTF-8 escape sequence
        int x = (ch >> 12) & 0xF;
        int y = (ch >> 6) & 0x3F;
        int z = ch & 0x3F;
        if (ch <= 0x07FF) {
          hexoctet(buf, y + 192);
          hexoctet(buf, z + 128);
        } else {
          hexoctet(buf, x + 224);
          hexoctet(buf, y + 128);
          hexoctet(buf, z + 128);
        }
        i++;
        continue;
      }

      // Unicode surrogate pair
      i++;
      if (i == size) {
        break;
      }
      char ch2 = uri.charAt(i);
      if (ch2 < 0xDC00 || ch2 > 0xDFFF) {
        // skip
        continue;
      }

      // Emit UTF-8 escape sequence
      int u = ((ch >> 6) & 0xF) + 1;
      int w = (ch >> 2) & 0xF;
      int x = ch & 3;
      int y = (ch2 >> 6) & 0xF;
      int z = ch2 & 0x3F;
      hexoctet(buf, (u >> 2) + 240);
      hexoctet(buf, (((u & 3) << 4) | w) + 128);
      hexoctet(buf, ((x << 4) | y) + 128);
      hexoctet(buf, z + 128);
      i++;
    }
    return buf.toString();
  }

  /**
   * Appends a hexadecimal encoding of the character code.
   */
  private static void hexoctet(StringBuilder buf, int octet) {
    buf.append('%').append(Chars.hexchar(octet >> 4)).append(Chars.hexchar(octet & 0xF));
  }

}