All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.thaiopensource.util.UriEncoder Maven / Gradle / Ivy

package com.thaiopensource.util;

/**
 *  Selectively percent-encodes characters in a URI.
 */
public class UriEncoder {
  /**
   * Flag to include U+0000 - U+001F.
   */
  static private final int C0_CONTROL = 0x01;
  /**
   * Flag to include U+0020
   */
  static private final int SPACE = 0x02;
  /**
   * Flag to include '<', '>', '"'
   */
  static private final int DELIM = 0x04;
  /**
   * Flag to include '{', '}', '|', '\\', '^', U+007E
   */
  static private final int UNWISE = 0x08;
  /**
   * Flag to include U+007F
   */
  static private final int DELETE = 0x10;
  /**
   * Flag to include U+0080 - U+009F
   */
  static private final int C1_CONTROL = 0x20;
  /**
   * Flag to include any non-ASCII character with category Zs, Zl, and Zp
   */
  static private final int NON_ASCII_SEPARATOR = 0x40;
  /**
   * Flag to include any other character with code-point >= U+0080
   */
  static private final int OTHER_NON_ASCII = 0x80;

  static private final int ASCII_CONTROL = C0_CONTROL|DELETE;
  static private final int CONTROL = ASCII_CONTROL|C1_CONTROL;
  static private final int SEPARATOR = NON_ASCII_SEPARATOR|SPACE;
  static private final int ASCII_GRAPHIC_FORBIDDEN = DELIM|UNWISE;
  static private final int ASCII_PRINTABLE_FORBIDDEN = ASCII_GRAPHIC_FORBIDDEN|SPACE;
  static private final int ASCII_FORBIDDEN = ASCII_CONTROL|ASCII_PRINTABLE_FORBIDDEN;
  static private final int NON_ASCII = C1_CONTROL|NON_ASCII_SEPARATOR|OTHER_NON_ASCII;
  static private final int JAVA_URI_FORBIDDEN = CONTROL|SEPARATOR|ASCII_PRINTABLE_FORBIDDEN;
  static private final int URI_FORBIDDEN = ASCII_FORBIDDEN|NON_ASCII;

  static public String encode(String s) {
    return encode(s, JAVA_URI_FORBIDDEN);
  }

  static public String encodeAsAscii(String s) {
    return encode(s, URI_FORBIDDEN);
  }

  static private String encode(String s, int flags) {
    StringBuffer encoded = null;
    final int len = s.length();
    for (int i = 0; i < len; i++) {
      char c = s.charAt(i);
      boolean mustEncode;
      switch (c) {
      case '<':
      case '>':
      case '"':
        mustEncode = ((flags & DELIM) != 0);
        break;
      case '{':
      case '}':
      case '|':
      case '\\':
      case '^':
      case '`':
        mustEncode = ((flags & UNWISE) != 0);
        break;
      case 0x20:
        mustEncode = ((flags & SPACE) != 0);
        break;
      case 0x7F:
        mustEncode = ((flags & DELETE) != 0);
        break;
      default:
        if (c < 0x20)
          mustEncode = ((flags & C0_CONTROL) != 0);
        else if (c < 0x80)
          mustEncode = false;
        else {
          switch (flags & NON_ASCII) {
          case NON_ASCII:
            // all non-ASCII chars need to be escaped
            mustEncode = true;
            break;
          case 0:
            // no non-ASCII chars need to be escaped
            mustEncode = false;
            break;
          default:
            if (Character.isISOControl(c))
              mustEncode = ((flags & C1_CONTROL) != 0);
            else if (Character.isSpaceChar(c))
              mustEncode = ((flags & NON_ASCII_SEPARATOR) != 0);
            else
              mustEncode = ((flags & OTHER_NON_ASCII) != 0);
            break;
          }
        }
      }
      if (mustEncode) {
        if (encoded == null)
          encoded = new StringBuffer(s.substring(0, i));
        int codePoint;
        if (Utf16.isSurrogate1(c)
            && i + 1 < len
            && Utf16.isSurrogate2(s.charAt(i + 1)))
          codePoint = Utf16.scalarValue(c, s.charAt(++i));
        else
          codePoint = c;
        encoded.append(percentEncode(Utf8.encode(codePoint)));
      }
      else if (encoded != null)
        encoded.append(c);
    }
    if (encoded != null)
      return encoded.toString();
    return s;
  }

  static private final String hexDigits = "0123456789ABCDEF";

  static char[] percentEncode(byte[] bytes) {
    char[] buf = new char[bytes.length * 3];
    int j = 0;
    for (int i = 0; i < bytes.length; i++) {
      int b = bytes[i];
      buf[j++] = '%';
      buf[j++] = hexDigits.charAt((b >> 4) & 0xF);
      buf[j++] = hexDigits.charAt(b & 0xF);
    }
    return buf;
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy