All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.adobe.xfa.formcalc.BuiltinEncode Maven / Gradle / Ivy

There is a newer version: 2024.11.18751.20241128T090041Z-241100
Show newest version
/*
 * ADOBE CONFIDENTIAL
 *
 * Copyright 2007 Adobe Systems Incorporated All Rights Reserved.
 *
 * NOTICE: All information contained herein is, and remains the property of
 * Adobe Systems Incorporated and its suppliers, if any. The intellectual and
 * technical concepts contained herein are proprietary to Adobe Systems
 * Incorporated and its suppliers and may be covered by U.S. and Foreign
 * Patents, patents in process, and are protected by trade secret or copyright
 * law. Dissemination of this information or reproduction of this material
 * is strictly forbidden unless prior written permission is obtained from
 * Adobe Systems Incorporated.
 */
package com.adobe.xfa.formcalc;

import java.io.UnsupportedEncodingException;

import com.adobe.xfa.formcalc.CalcParser.LegacyVersion;


/**
 * This class defines static methods to implement
 * the FormCalc encode and decode functions.
 *
 * @author Mike P. Tardif
 *
 * @exclude from published api.
 */
final class BuiltinEncode {

	/*
	 *  Disallow instances of this class.
	 */
	private BuiltinEncode() {
	}

	private static final String hexDigits = "0123456789abcdef0123456789ABCDEF";

	private static final String decDigits = "0123456789";

    private static final byte safe[] = {
        /*     0 1 2 3 4 5 6 7 8 9 A B C D E F */
        /*20*/ 0,1,0,0,1,0,0,1,1,1,1,0,1,1,1,0, /*  !"#$%&'()*+,-./ */
        /*30*/ 1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0, /* 0123456789:;<=>? */
        /*40*/ 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* @ABCDEFGHIJKLMNO */
        /*50*/ 1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1, /* PQRSTUVWXYZ[\]^_ */
        /*60*/ 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* `abcdefghijklmno */
        /*70*/ 1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0  /* pqrstuvwxyz{|}~? */
    };

	/*
	 *  urlEncode -- URL encode a string.
	 *
	 * As per RFC 1738, URLs syntax states that only alphanumerics
	 * and the special characters, $-_.+!*'(), and the reserved
	 * characters, ;/?:@&= (when used for their reserved purposes)
	 * may be unencoded within an URL.  All others are unsafe and
	 * need to be escaped.
	 */
	private static String urlEncode(String src, boolean bLegacyScripting) {
	    //
        // Watson 2321416: Current W3C recommendation is to Url-encode
        // the UTF-8 represensations of Unicode characters.
        //
		byte[] srcBuf = new byte[0];
		if (! bLegacyScripting) {
		    try {
                srcBuf = src.getBytes("UTF-8");
            } catch (UnsupportedEncodingException e) {
                assert false; // not possible - UTF-8 is always supported
            }
		}
		int n = (bLegacyScripting) ? src.length() : srcBuf.length;
		int needs_encoding = 0;
		for (int i = 0; i < n; i++) {
			int chr = (bLegacyScripting) ? src.charAt(i) : srcBuf[i];
			if (chr < '\u0020' || '\u007F' < chr || safe[chr - 32] == 0)
				needs_encoding++;
		}
		if (needs_encoding == 0)
			return src;
		StringBuilder dst = new StringBuilder(src.length() + needs_encoding * 2);
		for (int i = 0; i < n; i++) {
			int chr = (bLegacyScripting) ? src.charAt(i) : srcBuf[i];
			if (chr < '\u0020' || '\u007F' < chr || safe[chr - 32] == 0) {
				dst.append('%');
				dst.append(hexDigits.charAt(chr >> 4 & 0xF));
				dst.append(hexDigits.charAt(chr      & 0xF));
			}
			else
				dst.append((char) chr);
		}
		return dst.toString();
	}


	/*
	 * htmlEncode -- HTML-encode a string
	 *
	 * Note:  Character entity references in HTML 3.2 have the format &#nnn;
	 * where n is any decimal digit.  Note that the number must be zero
	 * padded to 3 digit value, which effectively limits entities to 8-bit
	 * characters in the 0-255 range.  But then, HTML 3.2 is only meant to
	 * handle 8-bit characters.
	 * In HTML 4.0, character entity references are as per XML 1.0.
	 */
	private static String htmlEncode(String src) {
		int needs_encoding = 0;
		for (int i = 0, n = src.length(); i < n; i++) {
			int chr = src.charAt(i);
			if (chr == '<' || chr == '>' || chr == '&' || chr == '\"')
				needs_encoding++;
			else if (chr > '\u007F')
				needs_encoding++;
		}
		if (needs_encoding == 0)
			return src;
		StringBuilder dst = new StringBuilder(src.length() + needs_encoding * 10);
		for (int i = 0, n = src.length(); i < n; ) {
			int chr = src.codePointAt(i);
			i += (chr <= 0xFFFF) ? 1 : 2;
			if (chr == '<')
				dst.append("<");
			else if (chr == '>')
				dst.append(">");
			else if (chr == '&')
				dst.append("&");
			else if (chr == '\"')
				dst.append(""");
			else if (chr > '\u007F') { /* as per HTML 4.1 spec */
				dst.append("&#x");
				if ((chr & 0xF00000) != 0) {
					dst.append(hexDigits.charAt(chr >> 20 & 0xF));
					dst.append(hexDigits.charAt(chr >> 16 & 0xF));
					dst.append(hexDigits.charAt(chr >> 12 & 0xF));
					dst.append(hexDigits.charAt(chr >> 8  & 0xF));
					dst.append(hexDigits.charAt(chr >> 4  & 0xF));
					dst.append(hexDigits.charAt(chr       & 0xF));
				}
				else if ((chr & 0xF0000) != 0) {
					dst.append(hexDigits.charAt(chr >> 16 & 0xF));
					dst.append(hexDigits.charAt(chr >> 12 & 0xF));
					dst.append(hexDigits.charAt(chr >> 8  & 0xF));
					dst.append(hexDigits.charAt(chr >> 4  & 0xF));
					dst.append(hexDigits.charAt(chr       & 0xF));
				}
				else if ((chr & 0xF000) != 0) {
					dst.append(hexDigits.charAt(chr >> 12 & 0xF));
					dst.append(hexDigits.charAt(chr >> 8  & 0xF));
					dst.append(hexDigits.charAt(chr >> 4  & 0xF));
					dst.append(hexDigits.charAt(chr       & 0xF));
				}
				else if ((chr & 0x0F00) != 0) {
					dst.append(hexDigits.charAt(chr >> 8  & 0xF));
					dst.append(hexDigits.charAt(chr >> 4  & 0xF));
					dst.append(hexDigits.charAt(chr       & 0xF));
				}
				else if ((chr & 0x00F0) != 0) {
					dst.append(hexDigits.charAt(chr >> 4  & 0xF));
					dst.append(hexDigits.charAt(chr       & 0xF));
				}
				else {
					dst.append(hexDigits.charAt(chr       & 0xF));
				}
				dst.append(';');
			}
			else
				dst.append((char) chr);
		}
		return dst.toString();  
	}


	/*
	 * xmlEncode -- XML-encode a string
	 *
	 * In XML 1.0, character entity references have the format &#n...n; and
	 * &#xh...h; where n is any decimal digit, and h is any hex digit, and
	 * where the number of digits is unlimited.
	 */
	private static String xmlEncode(String src) {
		int needs_encoding = 0;
		for (int i = 0, n = src.length(); i < n; i++) {
			int chr = src.charAt(i);
			if (chr == '<' || chr == '>' || chr == '&'
			|| chr == '\'' || chr == '\"')
				needs_encoding++;
			else if (chr > '\u007F')
				needs_encoding++;
		}
		if (needs_encoding == 0)
			return src;
		StringBuilder dst = new StringBuilder(src.length() + needs_encoding * 10);
		for (int i = 0, n = src.length(); i < n; ) {
			int chr = src.codePointAt(i);
			i += (chr <= 0xFFFF) ? 1 : 2;
			if (chr == '<')
				dst.append("<");
			else if (chr == '>')
				dst.append(">");
			else if (chr == '&')
				dst.append("&");
			else if (chr == '\'')
				dst.append("'");
			else if (chr == '\"')
				dst.append(""");
			else if (chr > '\u007F') { /* as per XML 1.1 spec */
				dst.append("&#x");
				if ((chr & 0xF00000) != 0) {
					dst.append(hexDigits.charAt(chr >> 20 & 0xF));
					dst.append(hexDigits.charAt(chr >> 16 & 0xF));
					dst.append(hexDigits.charAt(chr >> 12 & 0xF));
					dst.append(hexDigits.charAt(chr >> 8  & 0xF));
					dst.append(hexDigits.charAt(chr >> 4  & 0xF));
					dst.append(hexDigits.charAt(chr       & 0xF));
				}
				else if ((chr & 0xF0000) != 0) {
					dst.append(hexDigits.charAt(chr >> 16 & 0xF));
					dst.append(hexDigits.charAt(chr >> 12 & 0xF));
					dst.append(hexDigits.charAt(chr >> 8  & 0xF));
					dst.append(hexDigits.charAt(chr >> 4  & 0xF));
					dst.append(hexDigits.charAt(chr       & 0xF));
				}
				else if ((chr & 0xF000) != 0) {
					dst.append(hexDigits.charAt(chr >> 12 & 0xF));
					dst.append(hexDigits.charAt(chr >> 8  & 0xF));
					dst.append(hexDigits.charAt(chr >> 4  & 0xF));
					dst.append(hexDigits.charAt(chr       & 0xF));
				}
				else if ((chr & 0x0F00) != 0) {
					dst.append(hexDigits.charAt(chr >> 8  & 0xF));
					dst.append(hexDigits.charAt(chr >> 4  & 0xF));
					dst.append(hexDigits.charAt(chr       & 0xF));
				}
				else if ((chr & 0x00F0) != 0) {
					dst.append(hexDigits.charAt(chr >> 4  & 0xF));
					dst.append(hexDigits.charAt(chr       & 0xF));
				}
				else {
					dst.append(hexDigits.charAt(chr       & 0xF));
				}
				dst.append(';');
			}
			else
				dst.append((char) chr);
		}
		return dst.toString();  
	}


	/*
	 *  urlDecode -- URL decode a string.
	 */
	private static String urlDecode(String src) {
		StringBuilder dst = new StringBuilder(src.length());
		boolean needsDecoding = false;
		for (int i = 0, n = src.length(); i < n; i++) {
			int chr = src.charAt(i);
			if (chr == '+' ) {
				dst.append(' ');
				needsDecoding = true;
			}
			//
			// If its the first digit of hex number Then decode it.
			//
			else if (chr == '%') {
				if (i + 1 < n) {
					chr = src.charAt(++i);
					if ('0' <= chr && chr <= '9')
						chr -= '0';
					else if ('A' <= chr && chr <= 'F')
						chr -= 'A' - 10;
					else /* if ('a' <= chr && chr <= 'f') */
						chr -= 'a' - 10;
					int hex = chr * 16;
					if (i + 1 < n) {
						chr = src.charAt(++i);
						if ('0' <= chr && chr <= '9')
							chr -= '0';
						else if ('A' <= chr && chr <= 'F')
							chr -= 'A' - 10;
						else /* if ('a' <= chr && chr <= 'f') */
							chr -= 'a' - 10;
						hex += chr;
						dst.append((char) hex);
					}
				}
				needsDecoding = true;
			}
			//
			// Else its just a regular character
			//
			else
				dst.append((char) chr);
		}
		return needsDecoding ? dst.toString() : src;
	}


	private static class HTMLEntity {
        String name;
        char value;

		HTMLEntity(String n, char v) {
			name = n;
			value = v;
		}
	}

	/*
	 * keep in sorted order -- binary search used.
	 */
    private static final HTMLEntity[] entity = {
		new HTMLEntity("AElig",		'\u00c6' ), //	Latin capital letter ae
		new HTMLEntity("Aacute",	'\u00c1' ), //	Latin capital letter a with acute
		new HTMLEntity("Acirc",		'\u00c2' ), //	Latin capital letter a with circumflex
		new HTMLEntity("Agrave",	'\u00c0' ), //	Latin capital letter a with grave
		new HTMLEntity("Alpha",		'\u0391' ), //	Greek capital letter alpha
		new HTMLEntity("Aring",		'\u00c5' ), //	Latin capital letter a with ring above
		new HTMLEntity("Atilde",	'\u00c3' ), //	Latin capital letter a with tilde
		new HTMLEntity("Auml",		'\u00c4' ), //	Latin capital letter a with diaeresis
		new HTMLEntity("Beta",		'\u0392' ), //	Greek capital letter beta
		new HTMLEntity("Ccedil",	'\u00c7' ), //	Latin capital letter c with cedilla
		new HTMLEntity("Chi",		'\u03a7' ), //	Greek capital letter chi
		new HTMLEntity("Dagger",	'\u2021' ), //	double dagger
		new HTMLEntity("Delta",		'\u0394' ), //	Greek capital letter delta
		new HTMLEntity("ETH",		'\u00d0' ), //	Latin capital letter eth
		new HTMLEntity("Eacute",	'\u00c9' ), //	Latin capital letter e with acute
		new HTMLEntity("Ecirc",		'\u00ca' ), //	Latin capital letter e with circumflex
		new HTMLEntity("Egrave",	'\u00c8' ), //	Latin capital letter e with grave
		new HTMLEntity("Epsilon",	'\u0395' ), //	Greek capital letter epsilon
		new HTMLEntity("Eta",		'\u0397' ), //	Greek capital letter eta
		new HTMLEntity("Euml",		'\u00cb' ), //	Latin capital letter e with diaeresis
		new HTMLEntity("Gamma",		'\u0393' ), //	Greek capital letter gamma
		new HTMLEntity("Iacute",	'\u00cd' ), //	Latin capital letter i with acute
		new HTMLEntity("Icirc",		'\u00ce' ), //	Latin capital letter i with circumflex
		new HTMLEntity("Igrave",	'\u00cc' ), //	Latin capital letter i with grave
		new HTMLEntity("Iota",		'\u0399' ), //	Greek capital letter iota
		new HTMLEntity("Iuml",		'\u00cf' ), //	Latin capital letter i with diaeresis
		new HTMLEntity("Kappa",		'\u039a' ), //	Greek capital letter kappa
		new HTMLEntity("Lambda",	'\u039b' ), //	Greek capital letter lambda
		new HTMLEntity("Mu",		'\u039c' ), //	Greek capital letter mu
		new HTMLEntity("Ntilde",	'\u00d1' ), //	Latin capital letter n with tilde
		new HTMLEntity("Nu",		'\u039d' ), //	Greek capital letter nu
		new HTMLEntity("OElig",		'\u0152' ), //	Latin capital ligature oe
		new HTMLEntity("Oacute",	'\u00d3' ), //	Latin capital letter o with acute
		new HTMLEntity("Ocirc",		'\u00d4' ), //	Latin capital letter o with circumflex
		new HTMLEntity("Ograve",	'\u00d2' ), //	Latin capital letter o with grave
		new HTMLEntity("Omega",		'\u03a9' ), //	Greek capital letter omega
		new HTMLEntity("Omicron",	'\u039f' ), //	Greek capital letter omicron
		new HTMLEntity("Oslash",	'\u00d8' ), //	Latin capital letter o with stroke
		new HTMLEntity("Otilde",	'\u00d5' ), //	Latin capital letter o with tilde
		new HTMLEntity("Ouml",		'\u00d6' ), //	Latin capital letter o with diaeresis
		new HTMLEntity("Phi",		'\u03a6' ), //	Greek capital letter phi
		new HTMLEntity("Pi",		'\u03a0' ), //	Greek capital letter pi
		new HTMLEntity("Prime",		'\u2033' ), //	double prime
		new HTMLEntity("Psi",		'\u03a8' ), //	Greek capital letter psi
		new HTMLEntity("Rho",		'\u03a1' ), //	Greek capital letter rho
		new HTMLEntity("Scaron",	'\u0160' ), //	Latin capital letter s with caron
		new HTMLEntity("Sigma",		'\u03a3' ), //	Greek capital letter sigma
		new HTMLEntity("THORN",		'\u00de' ), //	Latin capital letter thorn
		new HTMLEntity("Tau",		'\u03a4' ), //	Greek capital letter tau
		new HTMLEntity("Theta",		'\u0398' ), //	Greek capital letter theta
		new HTMLEntity("Uacute",	'\u00da' ), //	Latin capital letter u with acute
		new HTMLEntity("Ucirc",		'\u00db' ), //	Latin capital letter u with circumflex
		new HTMLEntity("Ugrave",	'\u00d9' ), //	Latin capital letter u with grave
		new HTMLEntity("Upsilon",	'\u03a5' ), //	Greek capital letter upsilon
		new HTMLEntity("Uuml",		'\u00dc' ), //	Latin capital letter u with diaeresis
		new HTMLEntity("Xi",		'\u039e' ), //	Greek capital letter xi
		new HTMLEntity("Yacute",	'\u00dd' ), //	Latin capital letter y with acute
		new HTMLEntity("Yuml",		'\u0178' ), //	Latin capital letter y with diaeresis
		new HTMLEntity("Zeta",		'\u0396' ), //	Greek capital letter zeta
		new HTMLEntity("aacute",	'\u00e1' ), //	Latin small letter a with acute
		new HTMLEntity("acirc",		'\u00e2' ), //	Latin small letter a with circumflex
		new HTMLEntity("acute",		'\u00b4' ), //	acute accent
		new HTMLEntity("aelig",		'\u00e6' ), //	Latin lowercase ligature ae
		new HTMLEntity("agrave",	'\u00e0' ), //	Latin small letter a with grave
		new HTMLEntity("alefsym",	'\u2135' ), //	alef symbol
		new HTMLEntity("alpha",		'\u03b1' ), //	Greek small letter alpha
		new HTMLEntity("amp",		'\u0026' ), //	ampersand
		new HTMLEntity("and",		'\u2227' ), //	logical and
		new HTMLEntity("ang",		'\u2220' ), //	angle
		new HTMLEntity("aring",		'\u00e5' ), //	Latin small letter a with ring above
		new HTMLEntity("asymp",		'\u2248' ), //	almost equal to
		new HTMLEntity("atilde",	'\u00e3' ), //	Latin small letter a with tilde
		new HTMLEntity("auml",		'\u00e4' ), //	Latin small letter a with diaeresis
		new HTMLEntity("bdquo",		'\u201e' ), //	double low-9 quotation mark
		new HTMLEntity("beta",		'\u03b2' ), //	Greek small letter beta
		new HTMLEntity("brvbar",	'\u00a6' ), //	broken bar
		new HTMLEntity("bull",		'\u2022' ), //	bullet
		new HTMLEntity("cap",		'\u2229' ), //	intersection
		new HTMLEntity("ccedil",	'\u00e7' ), //	Latin small letter c with cedilla
		new HTMLEntity("cedil",		'\u00b8' ), //	cedilla
		new HTMLEntity("cent",		'\u00a2' ), //	cent sign
		new HTMLEntity("chi",		'\u03c7' ), //	Greek small letter chi
		new HTMLEntity("circ",		'\u02c6' ), //	modifier letter circumflex accent
		new HTMLEntity("clubs",		'\u2663' ), //	black club suit
		new HTMLEntity("cong",		'\u2245' ), //	congruent to
		new HTMLEntity("copy",		'\u00a9' ), //	copyright sign
		new HTMLEntity("crarr",		'\u21b5' ), //	downwards arrow with corner leftwards
		new HTMLEntity("cup",		'\u222a' ), //	union
		new HTMLEntity("curren",	'\u00a4' ), //	currency sign
		new HTMLEntity("dArr",		'\u21d3' ), //	downwards double arrow
		new HTMLEntity("dagger",	'\u2020' ), //	dagger
		new HTMLEntity("darr",		'\u2193' ), //	downwards arrow
		new HTMLEntity("deg",		'\u00b0' ), //	degree sign
		new HTMLEntity("delta",		'\u03b4' ), //	Greek small letter delta
		new HTMLEntity("diams",		'\u2666' ), //	black diamond suit
		new HTMLEntity("divide",	'\u00f7' ), //	division sign
		new HTMLEntity("eacute",	'\u00e9' ), //	Latin small letter e with acute
		new HTMLEntity("ecirc",		'\u00ea' ), //	Latin small letter e with circumflex
		new HTMLEntity("egrave",	'\u00e8' ), //	Latin small letter e with grave
		new HTMLEntity("empty",		'\u2205' ), //	empty set
		new HTMLEntity("emsp",		'\u2003' ), //	em space [2]
		new HTMLEntity("ensp",		'\u2002' ), //	en space [1]
		new HTMLEntity("epsilon",	'\u03b5' ), //	Greek small letter epsilon
		new HTMLEntity("equiv",		'\u2261' ), //	identical to
		new HTMLEntity("eta",		'\u03b7' ), //	Greek small letter eta
		new HTMLEntity("eth",		'\u00f0' ), //	Latin small letter eth
		new HTMLEntity("euml",		'\u00eb' ), //	Latin small letter e with diaeresis
		new HTMLEntity("euro",		'\u20ac' ), //	euro sign
		new HTMLEntity("exist",		'\u2203' ), //	there exists
		new HTMLEntity("fnof",		'\u0192' ), //	Latin small letter f with hook
		new HTMLEntity("forall",	'\u2200' ), //	for all
		new HTMLEntity("frac12",	'\u00bd' ), //	vulgar fraction one half
		new HTMLEntity("frac14",	'\u00bc' ), //	vulgar fraction one quarter
		new HTMLEntity("frac34",	'\u00be' ), //	vulgar fraction three quarters
		new HTMLEntity("frasl",		'\u2044' ), //	fraction slash
		new HTMLEntity("gamma",		'\u03b3' ), //	Greek small letter gamma
		new HTMLEntity("ge",		'\u2265' ), //	greater-than or equal to
		new HTMLEntity("gt",		'\u003e' ), //	greater-than sign
		new HTMLEntity("hArr",		'\u21d4' ), //	left right double arrow
		new HTMLEntity("harr",		'\u2194' ), //	left right arrow
		new HTMLEntity("hearts",	'\u2665' ), //	black heart suit
		new HTMLEntity("hellip",	'\u2026' ), //	horizontal ellipsis
		new HTMLEntity("iacute",	'\u00ed' ), //	Latin small letter i with acute
		new HTMLEntity("icirc",		'\u00ee' ), //	Latin small letter i with circumflex
		new HTMLEntity("iexcl",		'\u00a1' ), //	inverted exclamation mark
		new HTMLEntity("igrave",	'\u00ec' ), //	Latin small letter i with grave
		new HTMLEntity("image",		'\u2111' ), //	black-letter capital i
		new HTMLEntity("infin",		'\u221e' ), //	infinity
		new HTMLEntity("int",		'\u222b' ), //	integral
		new HTMLEntity("iota",		'\u03b9' ), //	Greek small letter iota
		new HTMLEntity("iquest",	'\u00bf' ), //	inverted question mark
		new HTMLEntity("isin",		'\u2208' ), //	element of
		new HTMLEntity("iuml",		'\u00ef' ), //	Latin small letter i with diaeresis
		new HTMLEntity("kappa",		'\u03ba' ), //	Greek small letter kappa
		new HTMLEntity("lArr",		'\u21d0' ), //	leftwards double arrow
		new HTMLEntity("lambda",	'\u03bb' ), //	Greek small letter lambda
		new HTMLEntity("lang",		'\u2329' ), //	left-pointing angle bracket
		new HTMLEntity("laquo",		'\u00ab' ), //	left-pointing double angle quotation mark
		new HTMLEntity("larr",		'\u2190' ), //	leftwards arrow
		new HTMLEntity("lceil",		'\u2308' ), //	left ceiling
		new HTMLEntity("ldquo",		'\u201c' ), //	left double quotation mark
		new HTMLEntity("le",		'\u2264' ), //	less-than or equal to
		new HTMLEntity("lfloor",	'\u230a' ), //	left floor
		new HTMLEntity("lowast",	'\u2217' ), //	asterisk operator
		new HTMLEntity("loz",		'\u25ca' ), //	lozenge
		new HTMLEntity("lrm",		'\u200e' ), //	left-to-right mark
		new HTMLEntity("lsaquo",	'\u2039' ), //	single left-pointing angle quotation mark
		new HTMLEntity("lsquo",		'\u2018' ), //	left single quotation mark
		new HTMLEntity("lt",		'\u003c' ), //	less-than sign
		new HTMLEntity("macr",		'\u00af' ), //	macron
		new HTMLEntity("mdash",		'\u2014' ), //	em dash
		new HTMLEntity("micro",		'\u00b5' ), //	micro sign
		new HTMLEntity("middot",	'\u00b7' ), //	middle dot
		new HTMLEntity("minus",		'\u2212' ), //	minus sign
		new HTMLEntity("mu",		'\u03bc' ), //	Greek small letter mu
		new HTMLEntity("nabla",		'\u2207' ), //	nabla
		new HTMLEntity("nbsp",		'\u00a0' ), //	no-break space
		new HTMLEntity("ndash",		'\u2013' ), //	en dash
		new HTMLEntity("ne",		'\u2260' ), //	not equal to
		new HTMLEntity("ni",		'\u220b' ), //	contains as member
		new HTMLEntity("not",		'\u00ac' ), //	not sign
		new HTMLEntity("notin",		'\u2209' ), //	not an element of
		new HTMLEntity("nsub",		'\u2284' ), //	not a subset of
		new HTMLEntity("ntilde",	'\u00f1' ), //	Latin small letter n with tilde
		new HTMLEntity("nu",		'\u03bd' ), //	Greek small letter nu
		new HTMLEntity("oacute",	'\u00f3' ), //	Latin small letter o with acute
		new HTMLEntity("ocirc",		'\u00f4' ), //	Latin small letter o with circumflex
		new HTMLEntity("oelig",		'\u0153' ), //	Latin small ligature oe
		new HTMLEntity("ograve",	'\u00f2' ), //	Latin small letter o with grave
		new HTMLEntity("oline",		'\u203e' ), //	overline
		new HTMLEntity("omega",		'\u03c9' ), //	Greek small letter omega
		new HTMLEntity("omicron",	'\u03bf' ), //	Greek small letter omicron
		new HTMLEntity("oplus",		'\u2295' ), //	circled plus
		new HTMLEntity("or",		'\u2228' ), //	logical or
		new HTMLEntity("ordf",		'\u00aa' ), //	feminine ordinal indicator
		new HTMLEntity("ordm",		'\u00ba' ), //	masculine ordinal indicator
		new HTMLEntity("oslash",	'\u00f8' ), //	Latin small letter o with stroke
		new HTMLEntity("otilde",	'\u00f5' ), //	Latin small letter o with tilde
		new HTMLEntity("otimes",	'\u2297' ), //	circled times
		new HTMLEntity("ouml",		'\u00f6' ), //	Latin small letter o with diaeresis
		new HTMLEntity("para",		'\u00b6' ), //	pilcrow sign
		new HTMLEntity("part",		'\u2202' ), //	partial differential
		new HTMLEntity("permil",	'\u2030' ), //	per mille sign
		new HTMLEntity("perp",		'\u22a5' ), //	up tack
		new HTMLEntity("phi",		'\u03c6' ), //	Greek small letter phi
		new HTMLEntity("pi",		'\u03c0' ), //	Greek small letter pi
		new HTMLEntity("piv",		'\u03d6' ), //	Greek pi symbol
		new HTMLEntity("plusmn",	'\u00b1' ), //	plus-minus sign
		new HTMLEntity("pound",		'\u00a3' ), //	pound sign
		new HTMLEntity("prime",		'\u2032' ), //	prime
		new HTMLEntity("prod",		'\u220f' ), //	n-ary product
		new HTMLEntity("prop",		'\u221d' ), //	proportional to
		new HTMLEntity("psi",		'\u03c8' ), //	Greek small letter psi
		new HTMLEntity("quot",		'\u0022' ), //	quotation mark
		new HTMLEntity("rArr",		'\u21d2' ), //	rightwards double arrow
		new HTMLEntity("radic",		'\u221a' ), //	square root
		new HTMLEntity("rang",		'\u232a' ), //	right-pointing angle bracket
		new HTMLEntity("raquo",		'\u00bb' ), //	right-pointing double angle quotation mark
		new HTMLEntity("rarr",		'\u2192' ), //	rightwards arrow
		new HTMLEntity("rceil",		'\u2309' ), //	right ceiling
		new HTMLEntity("rdquo",		'\u201d' ), //	right double quotation mark
		new HTMLEntity("real",		'\u211c' ), //	black-letter capital r
		new HTMLEntity("reg",		'\u00ae' ), //	registered sign
		new HTMLEntity("rfloor",	'\u230b' ), //	right floor
		new HTMLEntity("rho",		'\u03c1' ), //	Greek small letter rho
		new HTMLEntity("rlm",		'\u200f' ), //	right-to-left mark
		new HTMLEntity("rsaquo",	'\u203a' ), //	single right-pointing angle quotation mark
		new HTMLEntity("rsquo",		'\u2019' ), //	right single quotation mark
		new HTMLEntity("sbquo",		'\u201a' ), //	single low-9 quotation mark
		new HTMLEntity("scaron",	'\u0161' ), //	Latin small letter s with caron
		new HTMLEntity("sdot",		'\u22c5' ), //	dot operator
		new HTMLEntity("sect",		'\u00a7' ), //	section sign
		new HTMLEntity("shy",		'\u00ad' ), //	soft hyphen
		new HTMLEntity("sigma",		'\u03c3' ), //	Greek small letter sigma
		new HTMLEntity("sigmaf",	'\u03c2' ), //	Greek small letter final sigma
		new HTMLEntity("sim",		'\u223c' ), //	tilde operator
		new HTMLEntity("spades",	'\u2660' ), //	black spade suit
		new HTMLEntity("sub",		'\u2282' ), //	subset of
		new HTMLEntity("sube",		'\u2286' ), //	subset of or equal to
		new HTMLEntity("sum",		'\u2211' ), //	n-ary summation
		new HTMLEntity("sup",		'\u2283' ), //	superset of
		new HTMLEntity("sup1",		'\u00b9' ), //	superscript one
		new HTMLEntity("sup2",		'\u00b2' ), //	superscript two
		new HTMLEntity("sup3",		'\u00b3' ), //	superscript three
		new HTMLEntity("supe",		'\u2287' ), //	superset of or equal to
		new HTMLEntity("szlig",		'\u00df' ), //	Latin small letter sharp s
		new HTMLEntity("tau",		'\u03c4' ), //	Greek small letter tau
		new HTMLEntity("there4",	'\u2234' ), //	therefore
		new HTMLEntity("theta",		'\u03b8' ), //	Greek small letter theta
		new HTMLEntity("thetasym",	'\u03d1' ), //	Greek theta symbol
		new HTMLEntity("thinsp",	'\u2009' ), //	thin space [3]
		new HTMLEntity("thorn",		'\u00fe' ), //	Latin small letter thorn
		new HTMLEntity("tilde",		'\u02dc' ), //	small tilde
		new HTMLEntity("times",		'\u00d7' ), //	multiplication sign
		new HTMLEntity("trade",		'\u2122' ), //	trademark sign
		new HTMLEntity("uArr",		'\u21d1' ), //	upwards double arrow
		new HTMLEntity("uacute",	'\u00fa' ), //	Latin small letter u with acute
		new HTMLEntity("uarr",		'\u2191' ), //	upwards arrow
		new HTMLEntity("ucirc",		'\u00fb' ), //	Latin small letter u with circumflex
		new HTMLEntity("ugrave",	'\u00f9' ), //	Latin small letter u with grave
		new HTMLEntity("uml",		'\u00a8' ), //	diaeresis
		new HTMLEntity("upsih",		'\u03d2' ), //	Greek upsilon with hook symbol
		new HTMLEntity("upsilon",	'\u03c5' ), //	Greek small letter upsilon
		new HTMLEntity("uuml",		'\u00fc' ), //	Latin small letter u with diaeresis
		new HTMLEntity("weierp",	'\u2118' ), //	script capital p
		new HTMLEntity("xi",		'\u03be' ), //	Greek small letter xi
		new HTMLEntity("yacute",	'\u00fd' ), //	Latin small letter y with acute
		new HTMLEntity("yen",		'\u00a5' ), //	yen sign
		new HTMLEntity("yuml",		'\u00ff' ), //	Latin small letter y with diaeresis
		new HTMLEntity("zeta",		'\u03b6' ), //	Greek small letter zeta
		new HTMLEntity("zwj",		'\u200d' ), //	zero width joiner
		new HTMLEntity("zwnj",		'\u200c' )  //	zero width non-joiner
    };

	/*
	 * htmlDecode -- html-decode a string
	 */
	private static String htmlDecode(String src) {
		assert(src != null);
		int n = src.length();
		StringBuilder dst = new StringBuilder(n);
		boolean needsDecoding = false;
		for (int s = 0; s < n; s++) {
			int p;
			if (src.charAt(s) == '&' && (p = src.indexOf(';', s++)) >= 0) {
				String ent = src.substring(s, p);
				int lo = 0;
				int hi = entity.length - 1;
				while (lo <= hi) {
					int mid = (lo + hi) >>> 1;
					int cmp = ent.compareTo(entity[mid].name);
					if (cmp == 0) {
						dst.append(entity[mid].value);
						break;
					}
					else if (cmp < 0)
						hi = mid - 1;
					else /* if (cmp > 0) */
						lo = mid + 1;
				}
				if (src.charAt(s++) == '#') {
					int d = 0;
					int q;
					if (src.charAt(s) == 'x' || src.charAt(s) == 'X') {
						s++;
						while ((q = hexDigits.indexOf(src.charAt(s))) >= 0) {
							d = d * 16 + (q & 0xF);
							s++;
						}
					}
					else {
						while ((q = decDigits.indexOf(src.charAt(s))) >= 0) {
							d = d * 10 + q;
							s++;
						}
					}
					if (src.charAt(s) == ';')
						if (d > 0xFFFF) { // emit supplementary as surrogates.
							d -= 0x10000;
							dst.append((char) ((d >> 10) | 0xD800));
							dst.append((char) ((d & 0x3FF) | 0xDC00));
						}
						else
							dst.append((char) d);
				}
				s = p;
				needsDecoding = true;
			}
			else
				dst.append(src.charAt(s));
		}
		return needsDecoding ? dst.toString() : src;
	}


	/*
	 * xmlDecode -- XML-decode a string
	 */
	private static String xmlDecode(String src) {
		assert(src != null);
		int n = src.length();
		StringBuilder dst = new StringBuilder(n);
		boolean needsDecoding = false;
		for (int s = 0; s < n; s++) {
			int p;
			if (src.charAt(s) == '&' && (p = src.indexOf(';', s++)) >= 0) {
				String ent = src.substring(s, p);
				if (ent.equals("lt"))
					dst.append('<');
				else if (ent.equals("gt"))
					dst.append('>');
				else if (ent.equals("amp"))
					dst.append('&');
				else if (ent.equals("apos"))
					dst.append('\'');
				else if (ent.equals("quot"))
					dst.append('\"');
				else if (src.charAt(s++) == '#') {
					int d = 0;
					int q;
					if (src.charAt(s) == 'x' || src.charAt(s) == 'X') {
						s++;
						while ((q = hexDigits.indexOf(src.charAt(s))) >= 0) {
							d = d * 16 + (q & 0xF);
							s++;
						}
					}
					else {
						while ((q = decDigits.indexOf(src.charAt(s))) >= 0) {
							d = d * 10 + q;
							s++;
						}
					}
					if (src.charAt(s) == ';')
						if (d > 0xFFFF) { // emit supplementary as surrogates.
							d -= 0x10000;
							dst.append((char) ((d >> 10) | 0xD800));
							dst.append((char) ((d & 0x3FF) | 0xDC00));
						}
						else
							dst.append((char) d);
				}
				s = p;
				needsDecoding = true;
			}
			else
				dst.append(src.charAt(s));
		}
		return needsDecoding ? dst.toString() : src;
	}


	/*
	 * javaEncode -- Java(Script)-encode a string
	 *
	 * Encode each character to it's java(Script) Unicode-escaped sequence.
	 * For example, character A gets encoded to string \u0041.
	 */
	private static String javaEncode(String src, boolean bLegacyScripting) {
		int needs_encoding = src.length();
		StringBuilder dst = new StringBuilder(needs_encoding * 10);
		for (int i = 0, n = src.length(); i < n; ) {
			int chr = src.codePointAt(i);
			i += (chr <= 0xFFFF) ? 1 : 2;
			if (bLegacyScripting) {
	            if (chr > 0xFFFF)
	                dst.append("\\U");
	            else
	                dst.append("\\u");
	            if ((chr & 0xF00000) != 0) {
	                dst.append('0');
	                dst.append('0');
	                dst.append(hexDigits.charAt(chr >> 20 & 0xF));
	                dst.append(hexDigits.charAt(chr >> 16 & 0xF));
	            }
	            else if ((chr & 0xF0000) != 0) {
	                dst.append('0');
	                dst.append('0');
	                dst.append('0');
	                dst.append(hexDigits.charAt(chr >> 16 & 0xF));
	            }
	        }	
			else if (chr > 0xFFFF) {
				// compute high surrogate.
				chr -= 0x10000;
				int hs = (chr >> 10) | 0xD800;
				// emit high surrogate.
				dst.append("\\u");
				dst.append(hexDigits.charAt(hs >> 12 & 0xF));
				dst.append(hexDigits.charAt(hs >> 8  & 0xF));
				dst.append(hexDigits.charAt(hs >> 4  & 0xF));
				dst.append(hexDigits.charAt(hs       & 0xF));
				dst.append("\\u");
				// compute low surrogate.
				chr = (chr & 0x3FF) | 0xDC00;
			}
			if ((chr & 0xF000) != 0) {
				dst.append(hexDigits.charAt(chr >> 12 & 0xF));
				dst.append(hexDigits.charAt(chr >> 8  & 0xF));
				dst.append(hexDigits.charAt(chr >> 4  & 0xF));
				dst.append(hexDigits.charAt(chr       & 0xF));
			}
			else if ((chr & 0x0F00) != 0) {
				dst.append('0');
				dst.append(hexDigits.charAt(chr >> 8  & 0xF));
				dst.append(hexDigits.charAt(chr >> 4  & 0xF));
				dst.append(hexDigits.charAt(chr       & 0xF));
			}
			else if ((chr & 0x00F0) != 0) {
				dst.append('0');
				dst.append('0');
				dst.append(hexDigits.charAt(chr >> 4  & 0xF));
				dst.append(hexDigits.charAt(chr       & 0xF));
			}
			else {
				dst.append('0');
				dst.append('0');
				dst.append('0');
				dst.append(hexDigits.charAt(chr       & 0xF));
			}
		}
		return dst.toString();  
	}


	/*
	 * javaDecode -- java-decode a string
	 *
	 * Decode each character to it's java(Script) Unicode-escaped sequence.
	 * For example, the string \u0041 gets decoded to character A.
	 */
	private static String javaDecode(String src, boolean bLegacyScripting) {
		assert(src != null);
		int n = src.length();
		StringBuilder dst = new StringBuilder(n);
		boolean needsDecoding = false;
		for (int s = 0; s < n; s++) {
			if (src.charAt(s) == '\\') {
				int d = 0;
				s++;
				if (src.charAt(s) == 'u') {
					s++;
					int i = 0;
					int q;
					while ((q = hexDigits.indexOf(src.charAt(s))) >= 0) {
						d = d * 16 + (q & 0xF);
						if (++i == 4)
							break;
						s++;
					}
					if (i != 4)
						d = 0;
				}
				else if (bLegacyScripting && src.charAt(s) == 'U') { // for \UXXXXXXXX-style sequences.
					s++;
					int i = 0;
					int q;
					while ((q = hexDigits.indexOf(src.charAt(s))) >= 0) {
						d = d * 16 + (q & 0xF);
						if (++i == 8)
							break;
						s++;
					}
					if (i != 8)
						d = 0;
					if (d > 0xFFFF) { // emit supplementary as surrogates.
						d -= 0x10000;
						dst.append((char) ((d >> 10) | 0xD800));
						d = (d & 0x3FF) | 0xDC00;
					}
				}
				if (d > 0) // ignore illegal sequences.
					dst.append((char) d);
				needsDecoding = true;
			}
			else
				dst.append(src.charAt(s));
		}
		if (! bLegacyScripting) {
        	// JavaPort: in Java's String class, supplementary characters are not
        	// representable, hence converting surrogate pairs to their supplementary
		    // equivalent is not possible.
		}
		return needsDecoding ? dst.toString() : src;
	}


	/*
	 *  Encode(s1, n1)
	 *      where s1 is the string to be encoded,
	 *      and n1 is the type of encoding to perform, where:
	 *      1 requests URL encoded
	 *      2 requests HTML(entity) encoded
	 *      3 requests XML(entity) encoded
	 */
	static void Encode(CalcParser oParser, CalcSymbol[] oArgSym) {
		final int nArgs = oArgSym.length;
	    CalcSymbol oRetSym = null; 
		try {
			//
			// check the number of args vs the number required.
			//
	        Builtins.minArgs(nArgs, 2);
	        Builtins.maxArgs(nArgs, 2);
			//
			// check for error-valued, return-valued and null-valued args.
			//
	        Builtins.limitExceptionArgs(oArgSym);
	        Builtins.limitNullArgs(oParser, nArgs, oArgSym);
			//
			// retrieve the given arguments.
			//
			String sStr = oParser.getString(oArgSym[0]);
			String sEnc = oParser.getString(oArgSym[1]);
			//
			// Do requested encoding.
			//
			String sEncoded;
			if (sEnc.equalsIgnoreCase("html"))
				sEncoded = htmlEncode(sStr);
			else if (sEnc.equalsIgnoreCase("xml"))
				sEncoded = xmlEncode(sStr);
			else if (sEnc.equalsIgnoreCase("javascript"))
				sEncoded = javaEncode(sStr, oParser.moLegacyScripting.contains(LegacyVersion.V30_SCRIPTING));
			else /* if (sEnc.equalsIgnoreCase("url")) */
				sEncoded = urlEncode(sStr, oParser.moLegacyScripting.contains(LegacyVersion.V30_SCRIPTING));
	      oRetSym = new CalcSymbol(sEncoded);
		}
		catch(CalcException e) {
			oRetSym = e.getSymbol();
			if (oRetSym.getType() != CalcSymbol.TypeNull)
				  oParser.mbInThrow = true;
		}
		//
		// push the result on the stack.
		//
		oParser.mStack.push(oRetSym);
	}

	/*
	 *  Decode(s1, n1)
	 *      where s1 is the string to be decoded,
	 *      and n1 is the type of encoding to perform, where:
	 *      1 requests URL decoded
	 *      2 requests HTML(entity) decoded
	 *      3 requests XML(entity) decoded
	 */
	static void Decode(CalcParser oParser, CalcSymbol[] oArgSym) {
		final int nArgs = oArgSym.length;
		CalcSymbol oRetSym = null; 
		try {
			//
			// check the number of args vs the number required.
			//
	        Builtins.minArgs(nArgs, 2);
			Builtins.maxArgs(nArgs, 2);
			//
			// check for error-valued, return-valued and null-valued args.
			//
			Builtins.limitExceptionArgs(oArgSym);
			Builtins.limitNullArgs(oParser, nArgs, oArgSym);
			//
			// retrieve the given arguments.
			//
			String sStr = oParser.getString(oArgSym[0]);
			String sEnc = oParser.getString(oArgSym[1]);
			//
			// Do requested decoding.
			//
			String sDecoded;
			if (sEnc.equalsIgnoreCase("html"))
				sDecoded = htmlDecode(sStr);
			else if (sEnc.equalsIgnoreCase("xml"))
				sDecoded = xmlDecode(sStr);
			else if (sEnc.equalsIgnoreCase("javascript"))
				sDecoded = javaDecode(sStr, oParser.moLegacyScripting.contains(LegacyVersion.V30_SCRIPTING));
			else /* if (sEnc.equalsIgnoreCase("url")) */
				sDecoded = urlDecode(sStr);
			oRetSym = new CalcSymbol(sDecoded);
		}
		catch(CalcException e) {
			oRetSym = e.getSymbol();
		  if (oRetSym.getType() != CalcSymbol.TypeNull)
				oParser.mbInThrow = true;
		}
		//
		// push the result on the stack.
		//
		oParser.mStack.push(oRetSym);
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy