com.caucho.quercus.lib.HtmlModule Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of quercus Show documentation
Show all versions of quercus Show documentation
A PHP engine implemented in 100% Java
/*
* Copyright (c) 1998-2012 Caucho Technology -- all rights reserved
*
* This file is part of Resin(R) Open Source
*
* Each copy or derived work must preserve the copyright notice and this
* notice unmodified.
*
* Resin Open Source is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* Resin Open Source is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, or any warranty
* of NON-INFRINGEMENT. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License
* along with Resin Open Source; if not, write to the
*
* Free Software Foundation, Inc.
* 59 Temple Place, Suite 330
* Boston, MA 02111-1307 USA
*
* @author Scott Ferguson
*/
package com.caucho.quercus.lib;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.io.UnsupportedEncodingException;
import java.util.Iterator;
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;
import com.caucho.quercus.QuercusModuleException;
import com.caucho.quercus.annotation.Optional;
import com.caucho.quercus.env.ArrayValue;
import com.caucho.quercus.env.ArrayValueImpl;
import com.caucho.quercus.env.ConstArrayValue;
import com.caucho.quercus.env.Env;
import com.caucho.quercus.env.LongValue;
import com.caucho.quercus.env.StringBuilderValue;
import com.caucho.quercus.env.StringValue;
import com.caucho.quercus.env.UnicodeBuilderValue;
import com.caucho.quercus.env.Value;
import com.caucho.quercus.lib.i18n.Decoder;
import com.caucho.quercus.lib.i18n.Encoder;
import com.caucho.quercus.module.AbstractQuercusModule;
import com.caucho.util.L10N;
import com.caucho.vfs.Encoding;
import com.caucho.vfs.i18n.EncodingWriter;
/**
* PHP functions implementing html code.
*/
public class HtmlModule extends AbstractQuercusModule {
private static final Logger log
= Logger.getLogger(HtmlModule.class.getName());
private static final L10N L = new L10N(HtmlModule.class);
public static final int HTML_SPECIALCHARS = 0;
public static final int HTML_ENTITIES = 1;
public static final int ENT_HTML_QUOTE_NONE = 0;
public static final int ENT_HTML_QUOTE_SINGLE = 1;
public static final int ENT_HTML_QUOTE_DOUBLE = 2;
public static final int ENT_COMPAT = ENT_HTML_QUOTE_DOUBLE;
public static final int ENT_QUOTES =
ENT_HTML_QUOTE_SINGLE | ENT_HTML_QUOTE_DOUBLE;
public static final int ENT_NOQUOTES = ENT_HTML_QUOTE_NONE;
private static StringValue []HTML_SPECIALCHARS_MAP;
private static ArrayValue HTML_SPECIALCHARS_ARRAY;
private static ArrayValue HTML_ENTITIES_ARRAY;
private static ArrayValue HTML_ENTITIES_ARRAY_ENTITY_KEY;
private static ArrayValueImpl HTML_ENTITIES_ARRAY_UNICODE;
private static ArrayValueImpl HTML_SPECIALCHARS_ARRAY_UNICODE;
private static ArrayValueImpl HTML_ENTITIES_ARRAY_UNICODE_ENTITY_KEY;
public HtmlModule()
{
}
private static ConstArrayValue toUnicodeArray(Env env, ArrayValue array)
{
ArrayValueImpl copy = new ArrayValueImpl();
Iterator> iter = array.getIterator(env);
while (iter.hasNext()) {
Map.Entry entry = iter.next();
Value key = entry.getKey();
Value value = entry.getValue();
if (key.isString())
key = key.toUnicodeValue(env);
if (value.isString())
value = value.toUnicodeValue(env);
copy.put(key, value);
}
return new ConstArrayValue(copy);
}
/**
* Returns HTML translation tables.
*/
public Value get_html_translation_table(
Env env,
@Optional("HTML_SPECIALCHARS") int table,
@Optional("ENT_COMPAT") int quoteStyle) {
Value result;
if (! env.isUnicodeSemantics()) {
if (table == HTML_ENTITIES)
result = HTML_ENTITIES_ARRAY.copy();
else
result = HTML_SPECIALCHARS_ARRAY.copy();
}
else {
if (table == HTML_ENTITIES) {
if (HTML_ENTITIES_ARRAY_UNICODE == null) {
HTML_ENTITIES_ARRAY_UNICODE = toUnicodeArray(
env, HTML_ENTITIES_ARRAY);
}
result = HTML_ENTITIES_ARRAY_UNICODE.copy();
}
else {
if (HTML_SPECIALCHARS_ARRAY_UNICODE == null) {
HTML_SPECIALCHARS_ARRAY_UNICODE = toUnicodeArray(
env, HTML_SPECIALCHARS_ARRAY);
}
result = HTML_SPECIALCHARS_ARRAY_UNICODE.copy();
}
}
if ((quoteStyle & ENT_HTML_QUOTE_SINGLE) != 0)
result.put(env.createString('\''), env.createString("'"));
if ((quoteStyle & ENT_HTML_QUOTE_DOUBLE) != 0)
result.put(env.createString('"'), env.createString("""));
return result;
}
/**
* Converts escaped HTML entities back to characters.
*
* @param str escaped string
* @param quoteStyle optional quote style used
*/
public static StringValue htmlspecialchars_decode(Env env,
StringValue str,
@Optional("ENT_COMPAT") int quoteStyle)
{
int len = str.length();
StringValue sb = str.createStringBuilder(len * 4 / 5);
for (int i = 0; i < len; i++) {
char ch = str.charAt(i);
if (ch != '&') {
sb.append(ch);
continue;
}
switch (str.charAt(i + 1)) {
case 'a':
sb.append('&');
if (i + 4 < len
&& str.charAt(i + 2) == 'm'
&& str.charAt(i + 3) == 'p'
&& str.charAt(i + 4) == ';') {
i += 4;
}
break;
case 'q':
if ((quoteStyle & ENT_HTML_QUOTE_DOUBLE) != 0
&& i + 5 < len
&& str.charAt(i + 2) == 'u'
&& str.charAt(i + 3) == 'o'
&& str.charAt(i + 4) == 't'
&& str.charAt(i + 5) == ';') {
i += 5;
sb.append('"');
}
else
sb.append('&');
break;
case '#':
if ((quoteStyle & ENT_HTML_QUOTE_SINGLE) != 0
&& i + 5 < len
&& str.charAt(i + 2) == '0'
&& str.charAt(i + 3) == '3'
&& str.charAt(i + 4) == '9'
&& str.charAt(i + 5) == ';') {
i += 5;
sb.append('\'');
}
else
sb.append('&');
break;
case 'l':
if (i + 3 < len
&& str.charAt(i + 2) == 't'
&& str.charAt(i + 3) == ';') {
i += 3;
sb.append('<');
}
else
sb.append('&');
break;
case 'g':
if (i + 3 < len
&& str.charAt(i + 2) == 't'
&& str.charAt(i + 3) == ';') {
i += 3;
sb.append('>');
}
else
sb.append('&');
break;
default:
sb.append('&');
}
}
return sb;
}
/**
* Escapes HTML
*
* @param env the calling environment
* @param string the string to be trimmed
* @param quoteStyleV optional quote style
* @param charsetV optional charset style
* @return the trimmed string
*/
public static Value htmlspecialchars(Env env,
StringValue string,
@Optional("ENT_COMPAT") int quoteStyle,
@Optional String charset,
@Optional("true") boolean isDoubleEncode)
{
int len = string.length();
StringValue sb = string.createStringBuilder(len * 5 / 4);
forLoop:
for (int i = 0; i < len; i++) {
char ch = string.charAt(i);
switch (ch) {
case '&':
if (! isDoubleEncode) {
for (int j = i + 1; j < len && j < i + 12; j++) {
char ch2 = string.charAt(j);
if (ch2 == ';') {
sb.append(string, i, j + 1);
i = j;
continue forLoop;
}
}
}
sb.append("&");
break;
case '"':
if ((quoteStyle & ENT_HTML_QUOTE_DOUBLE) != 0)
sb.append(""");
else
sb.append(ch);
break;
case '\'':
if ((quoteStyle & ENT_HTML_QUOTE_SINGLE) != 0)
sb.append("'");
else
sb.append(ch);
break;
case '<':
sb.append("<");
break;
case '>':
sb.append(">");
break;
default:
sb.append(ch);
break;
}
}
return sb;
}
/**
* Escapes HTML
*
* @param env the calling environment
* @param stringV the string to be trimmed
* @param quoteStyleV optional quote style
* @param charsetV optional charset style
* @return the trimmed string
*/
public static Value htmlentities(Env env,
StringValue string,
@Optional("ENT_COMPAT") int quoteStyle,
@Optional String charset)
{
if (charset == null || charset.length() == 0) {
// php 5.4.0
charset = "UTF-8";
}
CharSequence unicodeStr;
if (string.isUnicode()) {
unicodeStr = string;
}
else {
try {
Decoder decoder = Decoder.create(charset);
decoder.setAllowMalformedOut(true);
unicodeStr = decoder.decode(env, string);
}
catch (Exception e) {
env.warning(L.l("unsupported encoding, defaulting to utf-8"), e);
charset = "UTF-8";
Decoder decoder = Decoder.create(charset);
decoder.setAllowMalformedOut(true);
unicodeStr = decoder.decode(env, string);
}
}
UnicodeBuilderValue sb = new UnicodeBuilderValue();
int len = unicodeStr.length();
for (int i = 0; i < len; i++) {
char ch = unicodeStr.charAt(i);
StringValue entity = HTML_SPECIALCHARS_MAP[ch & 0xffff];
if (ch == '"') {
if ((quoteStyle & ENT_HTML_QUOTE_DOUBLE) != 0)
sb.append(""");
else
sb.append('"');
}
else if (ch == '\'') {
if ((quoteStyle & ENT_HTML_QUOTE_SINGLE) != 0)
sb.append("'");
else
sb.append('\'');
}
else if (entity != null) {
sb.append(entity);
}
else {
sb.append((char) ch);
}
}
if (string.isUnicode()) {
return sb;
}
else {
Encoder encoder = Encoder.create(charset);
StringValue result = env.createBinaryBuilder();
return encoder.encode(result, sb);
}
}
/**
* Escapes HTML
*
* @param string the string to be trimmed
* @param quoteStyle optional quote style
* @param charset optional charset style
* @return the trimmed string
*/
public static StringValue html_entity_decode(Env env,
StringValue string,
@Optional int quoteStyle,
@Optional String charset)
{
if (string.length() == 0)
return env.getEmptyString();
ArrayValue htmlEntities = null;
boolean isUnicode = env.isUnicodeSemantics();
if (isUnicode) {
if (HTML_ENTITIES_ARRAY_UNICODE_ENTITY_KEY == null) {
HTML_ENTITIES_ARRAY_UNICODE_ENTITY_KEY = toUnicodeArray(
env, HTML_ENTITIES_ARRAY_ENTITY_KEY);
}
htmlEntities = HTML_ENTITIES_ARRAY_UNICODE_ENTITY_KEY;
}
else
htmlEntities = HTML_ENTITIES_ARRAY_ENTITY_KEY;
EncodingWriter out = null;
if (! isUnicode) {
if (charset == null || charset.length() == 0)
charset = env.getRuntimeEncoding();
out = Encoding.getWriteEncoding(charset);
}
int len = string.length();
int htmlEntityStart = -1;
StringValue result = env.createStringBuilder();
try {
// Loop through each character
for (int i = 0; i < len; i++) {
char ch = string.charAt(i);
// Check whether it's a html entity
// i.e. starts with '&' and ends with ';'
if (ch == '&' && htmlEntityStart < 0) {
htmlEntityStart = i;
}
else if (htmlEntityStart < 0) {
// else add it to result.
result.append(ch);
}
else if (ch == ';') {
// If so substitute the entity and add it to result.
StringValue entity = string.substring(htmlEntityStart, i + 1);
Value value = htmlEntities.get(entity);
if (value.isNull()) {
result.append(entity);
}
else if (isUnicode) {
result.append((char)value.toInt());
}
else {
out.write(result, (char)value.toInt());
}
htmlEntityStart = -1;
} else if (('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z')) {
}
else {
result.append('&');
i = htmlEntityStart;
htmlEntityStart = -1;
}
}
if (htmlEntityStart > 0) {
result.append(string, htmlEntityStart, len);
}
} catch (IOException e) {
log.log(Level.FINE, e.toString(), e);
}
return result;
}
/**
* Replaces newlines with HTML breaks.
*
* @param env the calling environment
*/
public static Value nl2br(Env env, StringValue string)
{
int strLen = string.length();
StringValue sb = string.createStringBuilder(strLen * 5 / 4);
for (int i = 0; i < strLen; i++) {
char ch = string.charAt(i);
if (ch == '\n') {
sb.append("
\n");
}
else if (ch == '\r') {
if (i + 1 < strLen && string.charAt(i + 1) == '\n') {
sb.append("
\r\n");
i++;
}
else {
sb.append("
\r");
}
}
else {
sb.append(ch);
}
}
return sb;
}
private static void entity(ArrayValue array, StringValue []map,
ArrayValue revMap, int ch, String entity)
{
// XXX: i18n and optimize static variables usage
array.put("" + (char) ch, entity);
StringValue entityValue = new StringBuilderValue(entity);
map[ch & 0xffff] = entityValue;
revMap.put(entityValue, LongValue.create(ch));
}
static {
ArrayValueImpl array = new ArrayValueImpl();
array.put("<", "<");
array.put(">", ">");
array.put("&", "&");
HTML_SPECIALCHARS_ARRAY = new ConstArrayValue(array);
StringValue []map = new StringValue[65536];
HTML_SPECIALCHARS_MAP = map;
ArrayValue revMap = new ArrayValueImpl();
HTML_ENTITIES_ARRAY_ENTITY_KEY = revMap;
array = new ArrayValueImpl();
entity(array, map, revMap, '<', "<");
entity(array, map, revMap, '>', ">");
entity(array, map, revMap, '&', "&");
entity(array, map, revMap, 160, " ");
entity(array, map, revMap, 161, "¡");
entity(array, map, revMap, 162, "¢");
entity(array, map, revMap, 163, "£");
entity(array, map, revMap, 164, "¤");
entity(array, map, revMap, 165, "¥");
entity(array, map, revMap, 166, "¦");
entity(array, map, revMap, 167, "§");
entity(array, map, revMap, 168, "¨");
entity(array, map, revMap, 169, "©");
entity(array, map, revMap, 170, "ª");
entity(array, map, revMap, 171, "«");
entity(array, map, revMap, 172, "¬");
entity(array, map, revMap, 173, "");
entity(array, map, revMap, 174, "®");
entity(array, map, revMap, 175, "¯");
entity(array, map, revMap, 176, "°");
entity(array, map, revMap, 177, "±");
entity(array, map, revMap, 178, "²");
entity(array, map, revMap, 179, "³");
entity(array, map, revMap, 180, "´");
entity(array, map, revMap, 181, "µ");
entity(array, map, revMap, 182, "¶");
entity(array, map, revMap, 183, "·");
entity(array, map, revMap, 184, "¸");
entity(array, map, revMap, 185, "¹");
entity(array, map, revMap, 186, "º");
entity(array, map, revMap, 187, "»");
entity(array, map, revMap, 188, "¼");
entity(array, map, revMap, 189, "½");
entity(array, map, revMap, 190, "¾");
entity(array, map, revMap, 191, "¿");
entity(array, map, revMap, 192, "À");
entity(array, map, revMap, 193, "Á");
entity(array, map, revMap, 194, "Â");
entity(array, map, revMap, 195, "Ã");
entity(array, map, revMap, 196, "Ä");
entity(array, map, revMap, 197, "Å");
entity(array, map, revMap, 198, "Æ");
entity(array, map, revMap, 199, "Ç");
entity(array, map, revMap, 200, "È");
entity(array, map, revMap, 201, "É");
entity(array, map, revMap, 202, "Ê");
entity(array, map, revMap, 203, "Ë");
entity(array, map, revMap, 204, "Ì");
entity(array, map, revMap, 205, "Í");
entity(array, map, revMap, 206, "Î");
entity(array, map, revMap, 207, "Ï");
entity(array, map, revMap, 208, "Ð");
entity(array, map, revMap, 209, "Ñ");
entity(array, map, revMap, 210, "Ò");
entity(array, map, revMap, 211, "Ó");
entity(array, map, revMap, 212, "Ô");
entity(array, map, revMap, 213, "Õ");
entity(array, map, revMap, 214, "Ö");
entity(array, map, revMap, 215, "×");
entity(array, map, revMap, 216, "Ø");
entity(array, map, revMap, 217, "Ù");
entity(array, map, revMap, 218, "Ú");
entity(array, map, revMap, 219, "Û");
entity(array, map, revMap, 220, "Ü");
entity(array, map, revMap, 221, "Ý");
entity(array, map, revMap, 222, "Þ");
entity(array, map, revMap, 223, "ß");
entity(array, map, revMap, 224, "à");
entity(array, map, revMap, 225, "á");
entity(array, map, revMap, 226, "â");
entity(array, map, revMap, 227, "ã");
entity(array, map, revMap, 228, "ä");
entity(array, map, revMap, 229, "å");
entity(array, map, revMap, 230, "æ");
entity(array, map, revMap, 231, "ç");
entity(array, map, revMap, 232, "è");
entity(array, map, revMap, 233, "é");
entity(array, map, revMap, 234, "ê");
entity(array, map, revMap, 235, "ë");
entity(array, map, revMap, 236, "ì");
entity(array, map, revMap, 237, "í");
entity(array, map, revMap, 238, "î");
entity(array, map, revMap, 239, "ï");
entity(array, map, revMap, 240, "ð");
entity(array, map, revMap, 241, "ñ");
entity(array, map, revMap, 242, "ò");
entity(array, map, revMap, 243, "ó");
entity(array, map, revMap, 244, "ô");
entity(array, map, revMap, 245, "õ");
entity(array, map, revMap, 246, "ö");
entity(array, map, revMap, 247, "÷");
entity(array, map, revMap, 248, "ø");
entity(array, map, revMap, 249, "ù");
entity(array, map, revMap, 250, "ú");
entity(array, map, revMap, 251, "û");
entity(array, map, revMap, 252, "ü");
entity(array, map, revMap, 253, "ý");
entity(array, map, revMap, 254, "þ");
entity(array, map, revMap, 255, "ÿ");
// XXX: charset, order it.
entity(array, map, revMap, 0x2002, " ");
entity(array, map, revMap, 0x2009, " ");
entity(array, map, revMap, 0x2018, "‘");
entity(array, map, revMap, 0x2020, "†");
entity(array, map, revMap, 0x2032, "′");
entity(array, map, revMap, 0x2044, "⁄");
entity(array, map, revMap, 0x20ac, "€");
HTML_ENTITIES_ARRAY = new ConstArrayValue(array);
}
}