All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.caucho.quercus.lib.HtmlModule Maven / Gradle / Ivy

There is a newer version: 4.0.66
Show newest version
/*
 * Copyright (c) 1998-2012 Caucho Technology -- all rights reserved
 *
 * This file is part of Resin(R) Open Source
 *
 * Each copy or derived work must preserve the copyright notice and this
 * notice unmodified.
 *
 * Resin Open Source is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * Resin Open Source is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, or any warranty
 * of NON-INFRINGEMENT.  See the GNU General Public License for more
 * details.
 *
 * You should have received a copy of the GNU General Public License
 * along with Resin Open Source; if not, write to the
 *
 *   Free Software Foundation, Inc.
 *   59 Temple Place, Suite 330
 *   Boston, MA 02111-1307  USA
 *
 * @author Scott Ferguson
 */

package com.caucho.quercus.lib;

import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.io.UnsupportedEncodingException;
import java.util.Iterator;
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;

import com.caucho.quercus.QuercusModuleException;
import com.caucho.quercus.annotation.Optional;
import com.caucho.quercus.env.ArrayValue;
import com.caucho.quercus.env.ArrayValueImpl;
import com.caucho.quercus.env.ConstArrayValue;
import com.caucho.quercus.env.Env;
import com.caucho.quercus.env.LongValue;
import com.caucho.quercus.env.StringBuilderValue;
import com.caucho.quercus.env.StringValue;
import com.caucho.quercus.env.UnicodeBuilderValue;
import com.caucho.quercus.env.Value;
import com.caucho.quercus.lib.i18n.Decoder;
import com.caucho.quercus.lib.i18n.Encoder;
import com.caucho.quercus.module.AbstractQuercusModule;
import com.caucho.util.L10N;
import com.caucho.vfs.Encoding;
import com.caucho.vfs.i18n.EncodingWriter;

/**
 * PHP functions implementing html code.
 */
public class HtmlModule extends AbstractQuercusModule {
  private static final Logger log
  = Logger.getLogger(HtmlModule.class.getName());

  private static final L10N L = new L10N(HtmlModule.class);

  public static final int HTML_SPECIALCHARS = 0;
  public static final int HTML_ENTITIES = 1;

  public static final int ENT_HTML_QUOTE_NONE = 0;
  public static final int ENT_HTML_QUOTE_SINGLE = 1;
  public static final int ENT_HTML_QUOTE_DOUBLE = 2;

  public static final int ENT_COMPAT = ENT_HTML_QUOTE_DOUBLE;
  public static final int ENT_QUOTES =
      ENT_HTML_QUOTE_SINGLE | ENT_HTML_QUOTE_DOUBLE;
  public static final int ENT_NOQUOTES = ENT_HTML_QUOTE_NONE;

  private static StringValue []HTML_SPECIALCHARS_MAP;

  private static ArrayValue HTML_SPECIALCHARS_ARRAY;
  private static ArrayValue HTML_ENTITIES_ARRAY;
  private static ArrayValue HTML_ENTITIES_ARRAY_ENTITY_KEY;

  private static ArrayValueImpl HTML_ENTITIES_ARRAY_UNICODE;
  private static ArrayValueImpl HTML_SPECIALCHARS_ARRAY_UNICODE;
  private static ArrayValueImpl HTML_ENTITIES_ARRAY_UNICODE_ENTITY_KEY;

  public HtmlModule()
  {
  }

  private static ConstArrayValue toUnicodeArray(Env env, ArrayValue array)
  {
    ArrayValueImpl copy = new ArrayValueImpl();

    Iterator> iter = array.getIterator(env);

    while (iter.hasNext()) {
      Map.Entry entry = iter.next();

      Value key = entry.getKey();
      Value value = entry.getValue();

      if (key.isString())
        key = key.toUnicodeValue(env);

      if (value.isString())
        value = value.toUnicodeValue(env);

      copy.put(key, value);
    }

    return new ConstArrayValue(copy);
  }

  /**
   * Returns HTML translation tables.
   */
  public Value get_html_translation_table(
      Env env,
      @Optional("HTML_SPECIALCHARS") int table,
      @Optional("ENT_COMPAT") int quoteStyle) {
    Value result;

    if (! env.isUnicodeSemantics()) {
      if (table == HTML_ENTITIES)
        result = HTML_ENTITIES_ARRAY.copy();
      else
        result = HTML_SPECIALCHARS_ARRAY.copy();
    }
    else {
      if (table == HTML_ENTITIES) {
        if (HTML_ENTITIES_ARRAY_UNICODE == null) {
          HTML_ENTITIES_ARRAY_UNICODE = toUnicodeArray(
              env, HTML_ENTITIES_ARRAY);
        }

        result = HTML_ENTITIES_ARRAY_UNICODE.copy();
      }
      else {
        if (HTML_SPECIALCHARS_ARRAY_UNICODE == null) {
          HTML_SPECIALCHARS_ARRAY_UNICODE = toUnicodeArray(
              env, HTML_SPECIALCHARS_ARRAY);
        }

        result = HTML_SPECIALCHARS_ARRAY_UNICODE.copy();
      }
    }

    if ((quoteStyle & ENT_HTML_QUOTE_SINGLE) != 0)
      result.put(env.createString('\''), env.createString("'"));

    if ((quoteStyle & ENT_HTML_QUOTE_DOUBLE) != 0)
      result.put(env.createString('"'), env.createString("""));

    return result;
  }

  /**
   * Converts escaped HTML entities back to characters.
   *
   * @param str escaped string
   * @param quoteStyle optional quote style used
   */
  public static StringValue htmlspecialchars_decode(Env env,
                                        StringValue str,
                                        @Optional("ENT_COMPAT") int quoteStyle)
  {
    int len = str.length();

    StringValue sb = str.createStringBuilder(len * 4 / 5);

    for (int i = 0; i < len; i++) {
      char ch = str.charAt(i);

      if (ch != '&') {
        sb.append(ch);

        continue;
      }

      switch (str.charAt(i + 1)) {
        case 'a':
          sb.append('&');
          if (i + 4 < len
              && str.charAt(i + 2) == 'm'
              && str.charAt(i + 3) == 'p'
              && str.charAt(i + 4) == ';') {
            i += 4;
          }
          break;

        case 'q':
          if ((quoteStyle & ENT_HTML_QUOTE_DOUBLE) != 0
              && i + 5 < len
              && str.charAt(i + 2) == 'u'
              && str.charAt(i + 3) == 'o'
              && str.charAt(i + 4) == 't'
              && str.charAt(i + 5) == ';') {
            i += 5;
            sb.append('"');
          }
          else
            sb.append('&');
          break;

        case '#':
          if ((quoteStyle & ENT_HTML_QUOTE_SINGLE) != 0
              && i + 5 < len
              && str.charAt(i + 2) == '0'
              && str.charAt(i + 3) == '3'
              && str.charAt(i + 4) == '9'
              && str.charAt(i + 5) == ';') {
            i += 5;
            sb.append('\'');
          }
          else
            sb.append('&');

          break;

        case 'l':
          if (i + 3 < len
              && str.charAt(i + 2) == 't'
              && str.charAt(i + 3) == ';') {
                i += 3;

                sb.append('<');
          }
          else
            sb.append('&');
          break;

        case 'g':
          if (i + 3 < len
              && str.charAt(i + 2) == 't'
              && str.charAt(i + 3) == ';') {
                i += 3;

                sb.append('>');
          }
          else
            sb.append('&');
          break;

        default:
          sb.append('&');
      }
    }

    return sb;
  }

  /**
   * Escapes HTML
   *
   * @param env the calling environment
   * @param string the string to be trimmed
   * @param quoteStyleV optional quote style
   * @param charsetV optional charset style
   * @return the trimmed string
   */
  public static Value htmlspecialchars(Env env,
                                       StringValue string,
                                       @Optional("ENT_COMPAT") int quoteStyle,
                                       @Optional String charset,
                                       @Optional("true") boolean isDoubleEncode)
  {
    int len = string.length();

    StringValue sb = string.createStringBuilder(len * 5 / 4);

    forLoop:
    for (int i = 0; i < len; i++) {
      char ch = string.charAt(i);

      switch (ch) {
        case '&':
          if (! isDoubleEncode) {
            for (int j = i + 1; j < len && j < i + 12; j++) {
              char ch2 = string.charAt(j);

              if (ch2 == ';') {
                sb.append(string, i, j + 1);

                i = j;

                continue forLoop;
              }
            }
          }

          sb.append("&");
          break;
        case '"':
          if ((quoteStyle & ENT_HTML_QUOTE_DOUBLE) != 0)
            sb.append(""");
          else
            sb.append(ch);
          break;
        case '\'':
          if ((quoteStyle & ENT_HTML_QUOTE_SINGLE) != 0)
            sb.append("'");
          else
            sb.append(ch);
          break;
        case '<':
          sb.append("<");
          break;
        case '>':
          sb.append(">");
          break;
        default:
          sb.append(ch);
          break;
      }
    }

    return sb;
  }

  /**
   * Escapes HTML
   *
   * @param env the calling environment
   * @param stringV the string to be trimmed
   * @param quoteStyleV optional quote style
   * @param charsetV optional charset style
   * @return the trimmed string
   */
  public static Value htmlentities(Env env,
                                   StringValue string,
                                   @Optional("ENT_COMPAT") int quoteStyle,
                                   @Optional String charset)
  {
    if (charset == null || charset.length() == 0) {
      // php 5.4.0
      charset = "UTF-8";
    }

    CharSequence unicodeStr;

    if (string.isUnicode()) {
      unicodeStr = string;
    }
    else {
      try {
        Decoder decoder = Decoder.create(charset);
        decoder.setAllowMalformedOut(true);

        unicodeStr = decoder.decode(env, string);
      }
      catch (Exception e) {
        env.warning(L.l("unsupported encoding, defaulting to utf-8"), e);

        charset = "UTF-8";

        Decoder decoder = Decoder.create(charset);
        decoder.setAllowMalformedOut(true);

        unicodeStr = decoder.decode(env, string);
      }
    }

    UnicodeBuilderValue sb = new UnicodeBuilderValue();

    int len = unicodeStr.length();

    for (int i = 0; i < len; i++) {
      char ch = unicodeStr.charAt(i);

      StringValue entity = HTML_SPECIALCHARS_MAP[ch & 0xffff];

      if (ch == '"') {
        if ((quoteStyle & ENT_HTML_QUOTE_DOUBLE) != 0)
          sb.append(""");
        else
          sb.append('"');
      }
      else if (ch == '\'') {
        if ((quoteStyle & ENT_HTML_QUOTE_SINGLE) != 0)
          sb.append("'");
        else
          sb.append('\'');
      }
      else if (entity != null) {
        sb.append(entity);
      }
      else {
        sb.append((char) ch);
      }
    }

    if (string.isUnicode()) {
      return sb;
    }
    else {
      Encoder encoder = Encoder.create(charset);

      StringValue result = env.createBinaryBuilder();
      return encoder.encode(result, sb);
    }
  }

  /**
   * Escapes HTML
   *
   * @param string the string to be trimmed
   * @param quoteStyle optional quote style
   * @param charset optional charset style
   * @return the trimmed string
   */
  public static StringValue html_entity_decode(Env env,
                                               StringValue string,
                                               @Optional int quoteStyle,
                                               @Optional String charset)
  {
    if (string.length() == 0)
      return env.getEmptyString();

    ArrayValue htmlEntities = null;

    boolean isUnicode = env.isUnicodeSemantics();

    if (isUnicode) {
      if (HTML_ENTITIES_ARRAY_UNICODE_ENTITY_KEY == null) {
        HTML_ENTITIES_ARRAY_UNICODE_ENTITY_KEY = toUnicodeArray(
            env, HTML_ENTITIES_ARRAY_ENTITY_KEY);
      }

      htmlEntities = HTML_ENTITIES_ARRAY_UNICODE_ENTITY_KEY;
    }
    else
      htmlEntities = HTML_ENTITIES_ARRAY_ENTITY_KEY;

    EncodingWriter out = null;

    if (! isUnicode) {
      if (charset == null || charset.length() == 0)
        charset = env.getRuntimeEncoding();

      out = Encoding.getWriteEncoding(charset);
    }

    int len = string.length();
    int htmlEntityStart = -1;
    StringValue result = env.createStringBuilder();

    try {
      // Loop through each character
      for (int i = 0; i < len; i++) {
        char ch = string.charAt(i);

        // Check whether it's a html entity
        // i.e. starts with '&' and ends with ';'
        if (ch == '&' && htmlEntityStart < 0) {
          htmlEntityStart = i;
        }
        else if (htmlEntityStart < 0) {
          // else add it to result.
          result.append(ch);
        }
        else if (ch == ';') {
          // If so substitute the entity and add it to result.
          StringValue entity = string.substring(htmlEntityStart, i + 1);
          Value value = htmlEntities.get(entity);

          if (value.isNull()) {
            result.append(entity);
          }
          else if (isUnicode) {
            result.append((char)value.toInt());
          }
          else {
            out.write(result, (char)value.toInt());
          }

          htmlEntityStart = -1;
        } else if (('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z')) {
        }
        else {
          result.append('&');
          i = htmlEntityStart;
          htmlEntityStart = -1;
        }
      }

      if (htmlEntityStart > 0) {
        result.append(string, htmlEntityStart, len);
      }
    } catch (IOException e) {
      log.log(Level.FINE, e.toString(), e);
    }

    return result;
  }

  /**
   * Replaces newlines with HTML breaks.
   *
   * @param env the calling environment
   */
  public static Value nl2br(Env env, StringValue string)
  {
    int strLen = string.length();

    StringValue sb = string.createStringBuilder(strLen * 5 / 4);

    for (int i = 0; i < strLen; i++) {
      char ch = string.charAt(i);

      if (ch == '\n') {
        sb.append("
\n"); } else if (ch == '\r') { if (i + 1 < strLen && string.charAt(i + 1) == '\n') { sb.append("
\r\n"); i++; } else { sb.append("
\r"); } } else { sb.append(ch); } } return sb; } private static void entity(ArrayValue array, StringValue []map, ArrayValue revMap, int ch, String entity) { // XXX: i18n and optimize static variables usage array.put("" + (char) ch, entity); StringValue entityValue = new StringBuilderValue(entity); map[ch & 0xffff] = entityValue; revMap.put(entityValue, LongValue.create(ch)); } static { ArrayValueImpl array = new ArrayValueImpl(); array.put("<", "<"); array.put(">", ">"); array.put("&", "&"); HTML_SPECIALCHARS_ARRAY = new ConstArrayValue(array); StringValue []map = new StringValue[65536]; HTML_SPECIALCHARS_MAP = map; ArrayValue revMap = new ArrayValueImpl(); HTML_ENTITIES_ARRAY_ENTITY_KEY = revMap; array = new ArrayValueImpl(); entity(array, map, revMap, '<', "<"); entity(array, map, revMap, '>', ">"); entity(array, map, revMap, '&', "&"); entity(array, map, revMap, 160, " "); entity(array, map, revMap, 161, "¡"); entity(array, map, revMap, 162, "¢"); entity(array, map, revMap, 163, "£"); entity(array, map, revMap, 164, "¤"); entity(array, map, revMap, 165, "¥"); entity(array, map, revMap, 166, "¦"); entity(array, map, revMap, 167, "§"); entity(array, map, revMap, 168, "¨"); entity(array, map, revMap, 169, "©"); entity(array, map, revMap, 170, "ª"); entity(array, map, revMap, 171, "«"); entity(array, map, revMap, 172, "¬"); entity(array, map, revMap, 173, "­"); entity(array, map, revMap, 174, "®"); entity(array, map, revMap, 175, "¯"); entity(array, map, revMap, 176, "°"); entity(array, map, revMap, 177, "±"); entity(array, map, revMap, 178, "²"); entity(array, map, revMap, 179, "³"); entity(array, map, revMap, 180, "´"); entity(array, map, revMap, 181, "µ"); entity(array, map, revMap, 182, "¶"); entity(array, map, revMap, 183, "·"); entity(array, map, revMap, 184, "¸"); entity(array, map, revMap, 185, "¹"); entity(array, map, revMap, 186, "º"); entity(array, map, revMap, 187, "»"); entity(array, map, revMap, 188, "¼"); entity(array, map, revMap, 189, "½"); entity(array, map, revMap, 190, "¾"); entity(array, map, revMap, 191, "¿"); entity(array, map, revMap, 192, "À"); entity(array, map, revMap, 193, "Á"); entity(array, map, revMap, 194, "Â"); entity(array, map, revMap, 195, "Ã"); entity(array, map, revMap, 196, "Ä"); entity(array, map, revMap, 197, "Å"); entity(array, map, revMap, 198, "Æ"); entity(array, map, revMap, 199, "Ç"); entity(array, map, revMap, 200, "È"); entity(array, map, revMap, 201, "É"); entity(array, map, revMap, 202, "Ê"); entity(array, map, revMap, 203, "Ë"); entity(array, map, revMap, 204, "Ì"); entity(array, map, revMap, 205, "Í"); entity(array, map, revMap, 206, "Î"); entity(array, map, revMap, 207, "Ï"); entity(array, map, revMap, 208, "Ð"); entity(array, map, revMap, 209, "Ñ"); entity(array, map, revMap, 210, "Ò"); entity(array, map, revMap, 211, "Ó"); entity(array, map, revMap, 212, "Ô"); entity(array, map, revMap, 213, "Õ"); entity(array, map, revMap, 214, "Ö"); entity(array, map, revMap, 215, "×"); entity(array, map, revMap, 216, "Ø"); entity(array, map, revMap, 217, "Ù"); entity(array, map, revMap, 218, "Ú"); entity(array, map, revMap, 219, "Û"); entity(array, map, revMap, 220, "Ü"); entity(array, map, revMap, 221, "Ý"); entity(array, map, revMap, 222, "Þ"); entity(array, map, revMap, 223, "ß"); entity(array, map, revMap, 224, "à"); entity(array, map, revMap, 225, "á"); entity(array, map, revMap, 226, "â"); entity(array, map, revMap, 227, "ã"); entity(array, map, revMap, 228, "ä"); entity(array, map, revMap, 229, "å"); entity(array, map, revMap, 230, "æ"); entity(array, map, revMap, 231, "ç"); entity(array, map, revMap, 232, "è"); entity(array, map, revMap, 233, "é"); entity(array, map, revMap, 234, "ê"); entity(array, map, revMap, 235, "ë"); entity(array, map, revMap, 236, "ì"); entity(array, map, revMap, 237, "í"); entity(array, map, revMap, 238, "î"); entity(array, map, revMap, 239, "ï"); entity(array, map, revMap, 240, "ð"); entity(array, map, revMap, 241, "ñ"); entity(array, map, revMap, 242, "ò"); entity(array, map, revMap, 243, "ó"); entity(array, map, revMap, 244, "ô"); entity(array, map, revMap, 245, "õ"); entity(array, map, revMap, 246, "ö"); entity(array, map, revMap, 247, "÷"); entity(array, map, revMap, 248, "ø"); entity(array, map, revMap, 249, "ù"); entity(array, map, revMap, 250, "ú"); entity(array, map, revMap, 251, "û"); entity(array, map, revMap, 252, "ü"); entity(array, map, revMap, 253, "ý"); entity(array, map, revMap, 254, "þ"); entity(array, map, revMap, 255, "ÿ"); // XXX: charset, order it. entity(array, map, revMap, 0x2002, " "); entity(array, map, revMap, 0x2009, " "); entity(array, map, revMap, 0x2018, "‘"); entity(array, map, revMap, 0x2020, "†"); entity(array, map, revMap, 0x2032, "′"); entity(array, map, revMap, 0x2044, "⁄"); entity(array, map, revMap, 0x20ac, "€"); HTML_ENTITIES_ARRAY = new ConstArrayValue(array); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy