com.phloc.commons.xml.serialize.HTMLdtd Maven / Gradle / Ivy
/**
* Copyright (C) 2006-2014 phloc systems
* http://www.phloc.com
* office[at]phloc[dot]com
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.phloc.commons.xml.serialize;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
import javax.annotation.Nonnull;
/**
* Utility class for accessing information specific to HTML documents. The HTML
* DTD is expressed as three utility function groups. Two methods allow for
* checking whether an element requires an open tag on printing (
* {@link #isEmptyTag}) or on parsing ({@link #isOptionalClosing}).
*
* Two other methods translate character references from name to value and from
* value to name. A small entities resource is loaded into memory the first time
* any of these methods is called for fast and efficient access.
*
* @version $Revision: 699892 $ $Date: 2008-09-28 17:08:27 -0400 (Sun, 28 Sep
* 2008) $
* @author Assaf Arkin
*/
public final class HTMLdtd
{
/**
* Holds element definitions.
*/
private static final Map s_aElemDefs = new HashMap ();
private static final Map s_aBoolAttrs = new HashMap ();
/**
* Only opening tag should be printed.
*/
private static final int ONLY_OPENING = 0x0001;
/**
* Element contains element content only.
*/
private static final int ELEM_CONTENT = 0x0002;
/**
* Element preserve spaces.
*/
private static final int PRESERVE = 0x0004;
/**
* Optional closing tag.
*/
private static final int OPT_CLOSING = 0x0008;
/**
* Element is empty (also means only opening tag)
*/
private static final int EMPTY = 0x0010 | ONLY_OPENING;
/**
* Allowed to appear in head.
*/
private static final int ALLOWED_HEAD = 0x0020;
/**
* When opened, closes P.
*/
private static final int CLOSE_P = 0x0040;
/**
* When opened, closes DD or DT.
*/
private static final int CLOSE_DD_DT = 0x0080;
/**
* When opened, closes itself.
*/
private static final int CLOSE_SELF = 0x0100;
/**
* When opened, closes another table section.
*/
private static final int CLOSE_TABLE = 0x0200;
/**
* When opened, closes TH or TD.
*/
private static final int CLOSE_TH_TD = 0x04000;
private HTMLdtd ()
{}
private static boolean _isElement (@Nonnull final String sTagName, final int nFlag)
{
final Integer aFlags = s_aElemDefs.get (sTagName.toUpperCase (Locale.US));
return aFlags != null && ((aFlags.intValue () & nFlag) == nFlag);
}
/**
* Returns true if element is declared to be empty. HTML elements are defines
* as empty in the DTD, not by the document syntax.
*
* @param sTagName
* The element tag name (upper case)
* @return True if element is empty
*/
public static boolean isEmptyTag (@Nonnull final String sTagName)
{
return _isElement (sTagName, EMPTY);
}
/**
* Returns true if element is declared to have element content. Whitespaces
* appearing inside element content will be ignored, other text will simply
* report an error.
*
* @param sTagName
* The element tag name (upper case)
* @return True if element content
*/
public static boolean isElementContent (@Nonnull final String sTagName)
{
return _isElement (sTagName, ELEM_CONTENT);
}
/**
* Returns true if element's textual contents preserves spaces. This only
* applies to PRE and TEXTAREA, all other HTML elements do not preserve space.
*
* @param sTagName
* The element tag name (upper case)
* @return True if element's text content preserves spaces
*/
public static boolean isPreserveSpace (@Nonnull final String sTagName)
{
return _isElement (sTagName, PRESERVE);
}
/**
* Returns true if element's closing tag is optional and need not exist. An
* error will not be reported for such elements if they are not closed. For
* example, LI is most often not closed.
*
* @param sTagName
* The element tag name (upper case)
* @return True if closing tag implied
*/
public static boolean isOptionalClosing (@Nonnull final String sTagName)
{
return _isElement (sTagName, OPT_CLOSING);
}
/**
* Returns true if element's closing tag is generally not printed. For
* example, LI should not print the closing tag.
*
* @param sTagName
* The element tag name (upper case)
* @return True if only opening tag should be printed
*/
public static boolean isOnlyOpening (@Nonnull final String sTagName)
{
return _isElement (sTagName, ONLY_OPENING);
}
/**
* Returns true if the opening of one element (sTagName) implies the
* closing of another open element (openTag). For example, every
* opening LI will close the previously open LI, and every
* opening BODY will close the previously open HEAD.
*
* @param sTagName
* The newly opened element
* @param sOpenTagName
* The already opened element
* @return True if closing tag closes opening tag
*/
public static boolean isClosing (@Nonnull final String sTagName, @Nonnull final String sOpenTagName)
{
// Several elements are defined as closing the HEAD
if (sOpenTagName.equalsIgnoreCase ("HEAD"))
return !_isElement (sTagName, ALLOWED_HEAD);
// P closes iteself
if (sOpenTagName.equalsIgnoreCase ("P"))
return _isElement (sTagName, CLOSE_P);
// DT closes DD, DD closes DT
if (sOpenTagName.equalsIgnoreCase ("DT") || sOpenTagName.equalsIgnoreCase ("DD"))
return _isElement (sTagName, CLOSE_DD_DT);
// LI and OPTION close themselves
if (sOpenTagName.equalsIgnoreCase ("LI") || sOpenTagName.equalsIgnoreCase ("OPTION"))
return _isElement (sTagName, CLOSE_SELF);
// Each of these table sections closes all the others
if (sOpenTagName.equalsIgnoreCase ("THEAD") ||
sOpenTagName.equalsIgnoreCase ("TFOOT") ||
sOpenTagName.equalsIgnoreCase ("TBODY") ||
sOpenTagName.equalsIgnoreCase ("TR") ||
sOpenTagName.equalsIgnoreCase ("COLGROUP"))
return _isElement (sTagName, CLOSE_TABLE);
// TD closes TH and TH closes TD
if (sOpenTagName.equalsIgnoreCase ("TH") || sOpenTagName.equalsIgnoreCase ("TD"))
return _isElement (sTagName, CLOSE_TH_TD);
return false;
}
/**
* Returns true if the specified attribute it a URI and should be escaped
* appropriately. In HTML URIs are escaped differently than normal attributes.
*
* @param sTagName
* The element's tag name
* @param sAttrName
* The attribute's name
* @return true
if the passed combination is an URI attribute
*/
public static boolean isURI (@Nonnull final String sTagName, @Nonnull final String sAttrName)
{
// Stupid checks.
return sAttrName.equalsIgnoreCase ("href") || sAttrName.equalsIgnoreCase ("src");
}
/**
* Returns true if the specified attribute is a boolean and should be printed
* without the value. This applies to attributes that are true if they exist,
* such as selected (OPTION/INPUT).
*
* @param sTagName
* The element's tag name
* @param sAttrName
* The attribute's name
* @return true
if the passed combination is a boolean value
*/
public static boolean isBoolean (@Nonnull final String sTagName, @Nonnull final String sAttrName)
{
final String [] aAttrNames = s_aBoolAttrs.get (sTagName.toUpperCase (Locale.US));
if (aAttrNames != null)
for (final String sCurAttrName : aAttrNames)
if (sCurAttrName.equalsIgnoreCase (sAttrName))
return true;
return false;
}
private static void _defineElement (@Nonnull final String name, final int nFlags)
{
s_aElemDefs.put (name, Integer.valueOf (nFlags));
}
private static void _defineBoolean (@Nonnull final String sTagName, @Nonnull final String... aAttrNames)
{
s_aBoolAttrs.put (sTagName, aAttrNames);
}
static
{
_defineElement ("ADDRESS", CLOSE_P);
_defineElement ("AREA", EMPTY);
_defineElement ("BASE", EMPTY | ALLOWED_HEAD);
_defineElement ("BASEFONT", EMPTY);
_defineElement ("BLOCKQUOTE", CLOSE_P);
_defineElement ("BODY", OPT_CLOSING);
_defineElement ("BR", EMPTY);
_defineElement ("COL", EMPTY);
_defineElement ("COLGROUP", ELEM_CONTENT | OPT_CLOSING | CLOSE_TABLE);
_defineElement ("DD", OPT_CLOSING | ONLY_OPENING | CLOSE_DD_DT);
_defineElement ("DIV", CLOSE_P);
_defineElement ("DL", ELEM_CONTENT | CLOSE_P);
_defineElement ("DT", OPT_CLOSING | ONLY_OPENING | CLOSE_DD_DT);
_defineElement ("FIELDSET", CLOSE_P);
_defineElement ("FORM", CLOSE_P);
_defineElement ("FRAME", EMPTY | OPT_CLOSING);
_defineElement ("H1", CLOSE_P);
_defineElement ("H2", CLOSE_P);
_defineElement ("H3", CLOSE_P);
_defineElement ("H4", CLOSE_P);
_defineElement ("H5", CLOSE_P);
_defineElement ("H6", CLOSE_P);
_defineElement ("HEAD", ELEM_CONTENT | OPT_CLOSING);
_defineElement ("HR", EMPTY | CLOSE_P);
_defineElement ("HTML", ELEM_CONTENT | OPT_CLOSING);
_defineElement ("IMG", EMPTY);
_defineElement ("INPUT", EMPTY);
_defineElement ("ISINDEX", EMPTY | ALLOWED_HEAD);
_defineElement ("LI", OPT_CLOSING | ONLY_OPENING | CLOSE_SELF);
_defineElement ("LINK", EMPTY | ALLOWED_HEAD);
_defineElement ("MAP", ALLOWED_HEAD);
_defineElement ("META", EMPTY | ALLOWED_HEAD);
_defineElement ("OL", ELEM_CONTENT | CLOSE_P);
_defineElement ("OPTGROUP", ELEM_CONTENT);
_defineElement ("OPTION", OPT_CLOSING | ONLY_OPENING | CLOSE_SELF);
_defineElement ("P", OPT_CLOSING | CLOSE_P | CLOSE_SELF);
_defineElement ("PARAM", EMPTY);
_defineElement ("PRE", PRESERVE | CLOSE_P);
_defineElement ("SCRIPT", ALLOWED_HEAD | PRESERVE);
_defineElement ("NOSCRIPT", ALLOWED_HEAD | PRESERVE);
_defineElement ("SELECT", ELEM_CONTENT);
_defineElement ("STYLE", ALLOWED_HEAD | PRESERVE);
_defineElement ("TABLE", ELEM_CONTENT | CLOSE_P);
_defineElement ("TBODY", ELEM_CONTENT | OPT_CLOSING | CLOSE_TABLE);
_defineElement ("TD", OPT_CLOSING | CLOSE_TH_TD);
_defineElement ("TEXTAREA", PRESERVE);
_defineElement ("TFOOT", ELEM_CONTENT | OPT_CLOSING | CLOSE_TABLE);
_defineElement ("TH", OPT_CLOSING | CLOSE_TH_TD);
_defineElement ("THEAD", ELEM_CONTENT | OPT_CLOSING | CLOSE_TABLE);
_defineElement ("TITLE", ALLOWED_HEAD);
_defineElement ("TR", ELEM_CONTENT | OPT_CLOSING | CLOSE_TABLE);
_defineElement ("UL", ELEM_CONTENT | CLOSE_P);
_defineBoolean ("AREA", "href");
_defineBoolean ("BUTTON", "disabled");
_defineBoolean ("DIR", "compact");
_defineBoolean ("DL", "compact");
_defineBoolean ("FRAME", "noresize");
_defineBoolean ("HR", "noshade");
_defineBoolean ("IMAGE", "ismap");
_defineBoolean ("INPUT", "defaultchecked", "checked", "readonly", "disabled");
_defineBoolean ("LINK", "link");
_defineBoolean ("MENU", "compact");
_defineBoolean ("OBJECT", "declare");
_defineBoolean ("OL", "compact");
_defineBoolean ("OPTGROUP", "disabled");
_defineBoolean ("OPTION", "default-selected", "selected", "disabled");
_defineBoolean ("SCRIPT", "defer");
_defineBoolean ("SELECT", "multiple", "disabled");
_defineBoolean ("STYLE", "disabled");
_defineBoolean ("TD", "nowrap");
_defineBoolean ("TH", "nowrap");
_defineBoolean ("TEXTAREA", "disabled", "readonly");
_defineBoolean ("UL", "compact");
}
}