All Downloads are FREE. Search and download functionalities are using the official Maven repository.

src.java.net.htmlparser.jericho.HTMLElements Maven / Gradle / Ivy

Go to download

Jericho HTML Parser is a java library allowing analysis and manipulation of parts of an HTML document, including server-side tags, while reproducing verbatim any unrecognised or invalid HTML.

There is a newer version: 3.4
Show newest version
// Jericho HTML Parser - Java based library for analysing and manipulating HTML
// Version 3.1
// Copyright (C) 2004-2009 Martin Jericho
// http://jericho.htmlparser.net/
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of either one of the following licences:
//
// 1. The Eclipse Public License (EPL) version 1.0,
// included in this distribution in the file licence-epl-1.0.html
// or available at http://www.eclipse.org/legal/epl-v10.html
//
// 2. The GNU Lesser General Public License (LGPL) version 2.1 or later,
// included in this distribution in the file licence-lgpl-2.1.txt
// or available at http://www.gnu.org/licenses/lgpl.txt
//
// This library is distributed on an "AS IS" basis,
// WITHOUT WARRANTY OF ANY KIND, either express or implied.
// See the individual licence texts for more details.

package net.htmlparser.jericho;

import java.util.*;
import static net.htmlparser.jericho.HTMLElementName.*;

/**
 * Contains static methods which group {@linkplain HTMLElementName HTML element names} by the characteristics of their associated
 * elements.
 * 

* An HTML element is a normal element with a * {@linkplain Element#getName() name} that matches one of the {@linkplain HTMLElementName HTML element names} (ignoring case). * This type of element spans the logical HTML element as described in the * HTML 4.01 specification section 3.2.1, * which may be implicitly terminated if it specifies an * {@linkplain #getEndTagOptionalElementNames() optional end tag}. *

* The term Non-HTML element refers to a normal element * with a {@linkplain Element#getName() name} that does not match one of the {@linkplain HTMLElementName HTML element names}. * This type of element must be either a single tag element or * explicitly terminated. *

* All of the sets returned by the methods in this class may be modified to customise the behaviour of the parser. * Care must be taken however to ensure that the sets only contain tag names in lower case. *

* Below is a table summarising the default characteristics of each HTML element. See also the * index of elements in the HTML 4.01 specification * for the official table containing similar information. *

* *

* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
{@linkplain Element#getName() Name}Box Type{@linkplain #getStartTagOptionalElementNames() Start Tag}{@linkplain #getEndTagOptionalElementNames() End Tag}{@linkplain #getNestingForbiddenElementNames() Nest}{@linkplain #getDeprecatedElementNames() Depr.}Description / Specification
{@link HTMLElementName#A A}{@linkplain #getInlineLevelElementNames() Inline} {@linkplain #getEndTagRequiredElementNames() Required}{@linkplain #getNestingForbiddenElementNames() NF} anchor
{@link HTMLElementName#ABBR ABBR}{@linkplain #getInlineLevelElementNames() Inline} {@linkplain #getEndTagRequiredElementNames() Required}  abbreviated form (e.g., WWW, HTTP, etc.)
{@link HTMLElementName#ACRONYM ACRONYM}{@linkplain #getInlineLevelElementNames() Inline} {@linkplain #getEndTagRequiredElementNames() Required}  acronym
{@link HTMLElementName#ADDRESS ADDRESS}{@linkplain #getBlockLevelElementNames() Block} {@linkplain #getEndTagRequiredElementNames() Required}{@linkplain #getNestingForbiddenElementNames() NF} information on author
{@link HTMLElementName#APPLET APPLET}{@linkplain #getInlineLevelElementNames() Inline} {@linkplain #getEndTagRequiredElementNames() Required}{@linkplain #getNestingForbiddenElementNames() NF}DJava applet
{@link HTMLElementName#AREA AREA}  {@linkplain #getEndTagForbiddenElementNames() Forbidden}{@linkplain #getNestingForbiddenElementNames() NF} client-side image map area
{@link HTMLElementName#B B}{@linkplain #getInlineLevelElementNames() Inline} {@linkplain #getEndTagRequiredElementNames() Required}  bold text style
{@link HTMLElementName#BASE BASE}  {@linkplain #getEndTagForbiddenElementNames() Forbidden}{@linkplain #getNestingForbiddenElementNames() NF} document base URI
{@link HTMLElementName#BASEFONT BASEFONT}{@linkplain #getInlineLevelElementNames() Inline} {@linkplain #getEndTagForbiddenElementNames() Forbidden}{@linkplain #getNestingForbiddenElementNames() NF}Dbase font size
{@link HTMLElementName#BDO BDO}{@linkplain #getInlineLevelElementNames() Inline} {@linkplain #getEndTagRequiredElementNames() Required}  I18N BiDi over-ride
{@link HTMLElementName#BIG BIG}{@linkplain #getInlineLevelElementNames() Inline} {@linkplain #getEndTagRequiredElementNames() Required}  large text style
{@link HTMLElementName#BLOCKQUOTE BLOCKQUOTE}{@linkplain #getBlockLevelElementNames() Block} {@linkplain #getEndTagRequiredElementNames() Required}  long quotation
{@link HTMLElementName#BODY BODY} {@linkplain #getStartTagOptionalElementNames() Optional}{@linkplain #getEndTagOptionalElementNames() Optional} (details){@linkplain #getNestingForbiddenElementNames() NF} document body
{@link HTMLElementName#BR BR}{@linkplain #getInlineLevelElementNames() Inline} {@linkplain #getEndTagForbiddenElementNames() Forbidden}{@linkplain #getNestingForbiddenElementNames() NF} forced line break
{@link HTMLElementName#BUTTON BUTTON}{@linkplain #getInlineLevelElementNames() Inline} {@linkplain #getEndTagRequiredElementNames() Required}{@linkplain #getNestingForbiddenElementNames() NF} push button
{@link HTMLElementName#CAPTION CAPTION}  {@linkplain #getEndTagRequiredElementNames() Required}{@linkplain #getNestingForbiddenElementNames() NF} table caption
{@link HTMLElementName#CENTER CENTER}{@linkplain #getBlockLevelElementNames() Block} {@linkplain #getEndTagRequiredElementNames() Required} Dshorthand for DIV align=center
{@link HTMLElementName#CITE CITE}{@linkplain #getInlineLevelElementNames() Inline} {@linkplain #getEndTagRequiredElementNames() Required}  citation
{@link HTMLElementName#CODE CODE}{@linkplain #getInlineLevelElementNames() Inline} {@linkplain #getEndTagRequiredElementNames() Required}  computer code fragment
{@link HTMLElementName#COL COL}  {@linkplain #getEndTagForbiddenElementNames() Forbidden}{@linkplain #getNestingForbiddenElementNames() NF} table column
{@link HTMLElementName#COLGROUP COLGROUP}  {@linkplain #getEndTagOptionalElementNames() Optional} (details){@linkplain #getNestingForbiddenElementNames() NF} table column group
{@link HTMLElementName#DD DD}  {@linkplain #getEndTagOptionalElementNames() Optional} (details)  definition description
{@link HTMLElementName#DEL DEL}{@linkplain #getInlineLevelElementNames() Inline} {@linkplain #getEndTagRequiredElementNames() Required}  deleted text
{@link HTMLElementName#DFN DFN}{@linkplain #getInlineLevelElementNames() Inline} {@linkplain #getEndTagRequiredElementNames() Required}  instance definition
{@link HTMLElementName#DIR DIR}{@linkplain #getBlockLevelElementNames() Block} {@linkplain #getEndTagRequiredElementNames() Required} Ddirectory list
{@link HTMLElementName#DIV DIV}{@linkplain #getBlockLevelElementNames() Block} {@linkplain #getEndTagRequiredElementNames() Required}  generic language/style container
{@link HTMLElementName#DL DL}{@linkplain #getBlockLevelElementNames() Block} {@linkplain #getEndTagRequiredElementNames() Required}  definition list
{@link HTMLElementName#DT DT}  {@linkplain #getEndTagOptionalElementNames() Optional} (details)  definition term
{@link HTMLElementName#EM EM}{@linkplain #getInlineLevelElementNames() Inline} {@linkplain #getEndTagRequiredElementNames() Required}  emphasis
{@link HTMLElementName#FIELDSET FIELDSET}{@linkplain #getBlockLevelElementNames() Block} {@linkplain #getEndTagRequiredElementNames() Required}  form control group
{@link HTMLElementName#FONT FONT}{@linkplain #getInlineLevelElementNames() Inline} {@linkplain #getEndTagRequiredElementNames() Required} Dlocal change to font
{@link HTMLElementName#FORM FORM}{@linkplain #getBlockLevelElementNames() Block} {@linkplain #getEndTagRequiredElementNames() Required}{@linkplain #getNestingForbiddenElementNames() NF} interactive form
{@link HTMLElementName#FRAME FRAME}  {@linkplain #getEndTagForbiddenElementNames() Forbidden}{@linkplain #getNestingForbiddenElementNames() NF} subwindow
{@link HTMLElementName#FRAMESET FRAMESET}  {@linkplain #getEndTagRequiredElementNames() Required}  window subdivision
{@link HTMLElementName#H1 H1}{@linkplain #getBlockLevelElementNames() Block} {@linkplain #getEndTagRequiredElementNames() Required}  heading
{@link HTMLElementName#H2 H2}{@linkplain #getBlockLevelElementNames() Block} {@linkplain #getEndTagRequiredElementNames() Required}  heading
{@link HTMLElementName#H3 H3}{@linkplain #getBlockLevelElementNames() Block} {@linkplain #getEndTagRequiredElementNames() Required}  heading
{@link HTMLElementName#H4 H4}{@linkplain #getBlockLevelElementNames() Block} {@linkplain #getEndTagRequiredElementNames() Required}  heading
{@link HTMLElementName#H5 H5}{@linkplain #getBlockLevelElementNames() Block} {@linkplain #getEndTagRequiredElementNames() Required}  heading
{@link HTMLElementName#H6 H6}{@linkplain #getBlockLevelElementNames() Block} {@linkplain #getEndTagRequiredElementNames() Required}  heading
{@link HTMLElementName#HEAD HEAD} {@linkplain #getStartTagOptionalElementNames() Optional}{@linkplain #getEndTagOptionalElementNames() Optional} (details){@linkplain #getNestingForbiddenElementNames() NF} document head
{@link HTMLElementName#HR HR}{@linkplain #getBlockLevelElementNames() Block} {@linkplain #getEndTagForbiddenElementNames() Forbidden}{@linkplain #getNestingForbiddenElementNames() NF} horizontal rule
{@link HTMLElementName#HTML HTML} {@linkplain #getStartTagOptionalElementNames() Optional}{@linkplain #getEndTagOptionalElementNames() Optional} (details){@linkplain #getNestingForbiddenElementNames() NF} document root element
{@link HTMLElementName#I I}{@linkplain #getInlineLevelElementNames() Inline} {@linkplain #getEndTagRequiredElementNames() Required}  italic text style
{@link HTMLElementName#IFRAME IFRAME}{@linkplain #getInlineLevelElementNames() Inline} {@linkplain #getEndTagRequiredElementNames() Required}{@linkplain #getNestingForbiddenElementNames() NF} inline subwindow
{@link HTMLElementName#IMG IMG}{@linkplain #getInlineLevelElementNames() Inline} {@linkplain #getEndTagForbiddenElementNames() Forbidden}{@linkplain #getNestingForbiddenElementNames() NF} Embedded image
{@link HTMLElementName#INPUT INPUT}{@linkplain #getInlineLevelElementNames() Inline} {@linkplain #getEndTagForbiddenElementNames() Forbidden}{@linkplain #getNestingForbiddenElementNames() NF} form control
{@link HTMLElementName#INS INS}{@linkplain #getInlineLevelElementNames() Inline} {@linkplain #getEndTagRequiredElementNames() Required}  inserted text
{@link HTMLElementName#ISINDEX ISINDEX}{@linkplain #getBlockLevelElementNames() Block} {@linkplain #getEndTagForbiddenElementNames() Forbidden}{@linkplain #getNestingForbiddenElementNames() NF}Dsingle line prompt
{@link HTMLElementName#KBD KBD}{@linkplain #getInlineLevelElementNames() Inline} {@linkplain #getEndTagRequiredElementNames() Required}  text to be entered by the user
{@link HTMLElementName#LABEL LABEL}{@linkplain #getInlineLevelElementNames() Inline} {@linkplain #getEndTagRequiredElementNames() Required}{@linkplain #getNestingForbiddenElementNames() NF} form field label text
{@link HTMLElementName#LEGEND LEGEND}  {@linkplain #getEndTagRequiredElementNames() Required}{@linkplain #getNestingForbiddenElementNames() NF} fieldset legend
{@link HTMLElementName#LI LI}  {@linkplain #getEndTagOptionalElementNames() Optional} (details)  list item
{@link HTMLElementName#LINK LINK}  {@linkplain #getEndTagForbiddenElementNames() Forbidden}{@linkplain #getNestingForbiddenElementNames() NF} a media-independent link
{@link HTMLElementName#MAP MAP}{@linkplain #getInlineLevelElementNames() Inline} {@linkplain #getEndTagRequiredElementNames() Required}  client-side image map
{@link HTMLElementName#MENU MENU}{@linkplain #getBlockLevelElementNames() Block} {@linkplain #getEndTagRequiredElementNames() Required} Dmenu list
{@link HTMLElementName#META META}  {@linkplain #getEndTagForbiddenElementNames() Forbidden}{@linkplain #getNestingForbiddenElementNames() NF} generic metainformation
{@link HTMLElementName#NOFRAMES NOFRAMES}{@linkplain #getBlockLevelElementNames() Block} {@linkplain #getEndTagRequiredElementNames() Required}  alternate content container for non frame-based rendering
{@link HTMLElementName#NOSCRIPT NOSCRIPT}{@linkplain #getBlockLevelElementNames() Block} {@linkplain #getEndTagRequiredElementNames() Required}  alternate content container for non script-based rendering
{@link HTMLElementName#OBJECT OBJECT}{@linkplain #getInlineLevelElementNames() Inline} {@linkplain #getEndTagRequiredElementNames() Required}  generic embedded object
{@link HTMLElementName#OL OL}{@linkplain #getBlockLevelElementNames() Block} {@linkplain #getEndTagRequiredElementNames() Required}  ordered list
{@link HTMLElementName#OPTGROUP OPTGROUP}  {@linkplain #getEndTagRequiredElementNames() Required}{@linkplain #getNestingForbiddenElementNames() NF} option group
{@link HTMLElementName#OPTION OPTION}  {@linkplain #getEndTagOptionalElementNames() Optional} (details){@linkplain #getNestingForbiddenElementNames() NF} selectable choice
{@link HTMLElementName#P P}{@linkplain #getBlockLevelElementNames() Block} {@linkplain #getEndTagOptionalElementNames() Optional} (details){@linkplain #getNestingForbiddenElementNames() NF} paragraph
{@link HTMLElementName#PARAM PARAM}  {@linkplain #getEndTagForbiddenElementNames() Forbidden}{@linkplain #getNestingForbiddenElementNames() NF} named property value
{@link HTMLElementName#PRE PRE}{@linkplain #getBlockLevelElementNames() Block} {@linkplain #getEndTagRequiredElementNames() Required}  preformatted text
{@link HTMLElementName#Q Q}{@linkplain #getInlineLevelElementNames() Inline} {@linkplain #getEndTagRequiredElementNames() Required}  short inline quotation
{@link HTMLElementName#S S}{@linkplain #getInlineLevelElementNames() Inline} {@linkplain #getEndTagRequiredElementNames() Required} Dstrike-through text style
{@link HTMLElementName#SAMP SAMP}{@linkplain #getInlineLevelElementNames() Inline} {@linkplain #getEndTagRequiredElementNames() Required}  sample program output, scripts, etc.
{@link HTMLElementName#SCRIPT SCRIPT}{@linkplain #getInlineLevelElementNames() Inline} {@linkplain #getEndTagRequiredElementNames() Required}{@linkplain #getNestingForbiddenElementNames() NF} script statements
{@link HTMLElementName#SELECT SELECT}{@linkplain #getInlineLevelElementNames() Inline} {@linkplain #getEndTagRequiredElementNames() Required}{@linkplain #getNestingForbiddenElementNames() NF} option selector
{@link HTMLElementName#SMALL SMALL}{@linkplain #getInlineLevelElementNames() Inline} {@linkplain #getEndTagRequiredElementNames() Required}  small text style
{@link HTMLElementName#SPAN SPAN}{@linkplain #getInlineLevelElementNames() Inline} {@linkplain #getEndTagRequiredElementNames() Required}  generic language/style container
{@link HTMLElementName#STRIKE STRIKE}{@linkplain #getInlineLevelElementNames() Inline} {@linkplain #getEndTagRequiredElementNames() Required} Dstrike-through text
{@link HTMLElementName#STRONG STRONG}{@linkplain #getInlineLevelElementNames() Inline} {@linkplain #getEndTagRequiredElementNames() Required}  strong emphasis
{@link HTMLElementName#STYLE STYLE}  {@linkplain #getEndTagRequiredElementNames() Required}{@linkplain #getNestingForbiddenElementNames() NF} style info
{@link HTMLElementName#SUB SUB}{@linkplain #getInlineLevelElementNames() Inline} {@linkplain #getEndTagRequiredElementNames() Required}  subscript
{@link HTMLElementName#SUP SUP}{@linkplain #getInlineLevelElementNames() Inline} {@linkplain #getEndTagRequiredElementNames() Required}  superscript
{@link HTMLElementName#TABLE TABLE}{@linkplain #getBlockLevelElementNames() Block} {@linkplain #getEndTagRequiredElementNames() Required}  table
{@link HTMLElementName#TBODY TBODY} {@linkplain #getStartTagOptionalElementNames() Optional}{@linkplain #getEndTagOptionalElementNames() Optional} (details)  table body
{@link HTMLElementName#TD TD}  {@linkplain #getEndTagOptionalElementNames() Optional} (details)  table data cell
{@link HTMLElementName#TEXTAREA TEXTAREA}{@linkplain #getInlineLevelElementNames() Inline} {@linkplain #getEndTagRequiredElementNames() Required}{@linkplain #getNestingForbiddenElementNames() NF} multi-line text field
{@link HTMLElementName#TFOOT TFOOT}  {@linkplain #getEndTagOptionalElementNames() Optional} (details)  table footer
{@link HTMLElementName#TH TH}  {@linkplain #getEndTagOptionalElementNames() Optional} (details)  table header cell
{@link HTMLElementName#THEAD THEAD}  {@linkplain #getEndTagOptionalElementNames() Optional} (details)  table header
{@link HTMLElementName#TITLE TITLE}  {@linkplain #getEndTagRequiredElementNames() Required}{@linkplain #getNestingForbiddenElementNames() NF} document title
{@link HTMLElementName#TR TR}  {@linkplain #getEndTagOptionalElementNames() Optional} (details)  table row
{@link HTMLElementName#TT TT}{@linkplain #getInlineLevelElementNames() Inline} {@linkplain #getEndTagRequiredElementNames() Required}  teletype or monospaced text style
{@link HTMLElementName#U U}{@linkplain #getInlineLevelElementNames() Inline} {@linkplain #getEndTagRequiredElementNames() Required} Dunderlined text style
{@link HTMLElementName#UL UL}{@linkplain #getBlockLevelElementNames() Block} {@linkplain #getEndTagRequiredElementNames() Required}  unordered list
{@link HTMLElementName#VAR VAR}{@linkplain #getInlineLevelElementNames() Inline} {@linkplain #getEndTagRequiredElementNames() Required}  instance of a variable or program argument
* * @see HTMLElementName * @see Element */ public final class HTMLElements { private static final List ALL=new ArrayList(Arrays.asList(new String[] {A,ABBR,ACRONYM,ADDRESS,APPLET,AREA,B,BASE,BASEFONT,BDO,BIG,BLOCKQUOTE,BODY,BR,BUTTON,CAPTION,CENTER,CITE,CODE,COL,COLGROUP,DD,DEL,DFN,DIR,DIV,DL,DT,EM,FIELDSET,FONT,FORM,FRAME,FRAMESET,H1,H2,H3,H4,H5,H6,HEAD,HR,HTML,I,IFRAME,IMG,INPUT,INS,ISINDEX,KBD,LABEL,LEGEND,LI,LINK,MAP,MENU,META,NOFRAMES,NOSCRIPT,OBJECT,OL,OPTGROUP,OPTION,P,PARAM,PRE,Q,S,SAMP,SCRIPT,SELECT,SMALL,SPAN,STRIKE,STRONG,STYLE,SUB,SUP,TABLE,TBODY,TD,TEXTAREA,TFOOT,TH,THEAD,TITLE,TR,TT,U,UL,VAR})); private static final HTMLElementNameSet BLOCK=new HTMLElementNameSet(new String[] {P,H1,H2,H3,H4,H5,H6,UL,OL,DIR,MENU,PRE,DL,DIV,CENTER,NOSCRIPT,NOFRAMES,BLOCKQUOTE,FORM,ISINDEX,HR,TABLE,FIELDSET,ADDRESS}); private static final HTMLElementNameSet INLINE=new HTMLElementNameSet(new String[] {TT,I,B,U,S,STRIKE,BIG,SMALL,EM,STRONG,DFN,CODE,SAMP,KBD,VAR,CITE,ABBR,ACRONYM,A,IMG,APPLET,OBJECT,FONT,BASEFONT,BR,SCRIPT,MAP,Q,SUB,SUP,SPAN,BDO,IFRAME,INPUT,SELECT,TEXTAREA,LABEL,BUTTON,INS,DEL}); private static final HTMLElementNameSet END_TAG_FORBIDDEN_SET=new HTMLElementNameSet(new String[] {AREA,BASE,BASEFONT,BR,COL,FRAME,HR,IMG,INPUT,ISINDEX,LINK,META,PARAM}); private static final HTMLElementNameSet _UL_OL=new HTMLElementNameSet(UL).union(OL); private static final HTMLElementNameSet _DD_DT=new HTMLElementNameSet(DD).union(DT); private static final HTMLElementNameSet _THEAD_TBODY_TFOOT_TR=new HTMLElementNameSet(THEAD).union(TBODY).union(TFOOT).union(TR); private static final HTMLElementNameSet _THEAD_TBODY_TFOOT_TR_TD_TH=new HTMLElementNameSet(_THEAD_TBODY_TFOOT_TR).union(TD).union(TH); private static final HTMLElementNameSet DEPRECATED=new HTMLElementNameSet().union(APPLET).union(BASEFONT).union(CENTER).union(DIR).union(FONT).union(ISINDEX).union(MENU).union(S).union(STRIKE).union(U); private static final HTMLElementNameSet START_TAG_OPTIONAL_SET=new HTMLElementNameSet().union(BODY).union(HEAD).union(HTML).union(TBODY); private static final HashMap CONSTANT_NAME_MAP=buildTagMap(); // contains a map of tag names to the tag constants, allowing standard tags to be tested using == operator instead of equals() private static final HashMap TERMINATING_TAG_NAME_SETS_MAP=buildTerminatingTagNameSetsMap(); // contains a map of tags having optional end tags to the HTMLElementTerminatingTagNameSets that can terminate the element if the end tag is not present private static final Set END_TAG_OPTIONAL_SET=TERMINATING_TAG_NAME_SETS_MAP.keySet(); private static final HTMLElementNameSet END_TAG_REQUIRED_SET=new HTMLElementNameSet().union(ALL).minus(END_TAG_FORBIDDEN_SET).minus(END_TAG_OPTIONAL_SET); private static final HTMLElementNameSet CLOSING_SLASH_IGNORED_SET=new HTMLElementNameSet().union(END_TAG_OPTIONAL_SET).union(END_TAG_REQUIRED_SET); static final HTMLElementNameSet END_TAG_REQUIRED_NESTING_FORBIDDEN_SET=new HTMLElementNameSet().union(A).union(ADDRESS).union(APPLET).union(BUTTON).union(CAPTION).union(FORM).union(IFRAME).union(LABEL).union(LEGEND).union(OPTGROUP).union(SCRIPT).union(SELECT).union(STYLE).union(TEXTAREA).union(TITLE); private static final HTMLElementNameSet END_TAG_OPTIONAL_NESTING_FORBIDDEN_SET=new HTMLElementNameSet().union(BODY).union(COLGROUP).union(HEAD).union(HTML).union(OPTION).union(P); private static final HTMLElementNameSet NESTING_FORBIDDEN_SET=new HTMLElementNameSet().union(END_TAG_REQUIRED_NESTING_FORBIDDEN_SET).union(END_TAG_OPTIONAL_NESTING_FORBIDDEN_SET).union(END_TAG_FORBIDDEN_SET); private HTMLElements() {} /** * Returns a list containing all of the {@linkplain HTMLElementName HTML element names}. *

* The returned list is in alphabetical order. * * @return a list containing of all the {@linkplain HTMLElementName HTML element names}. */ public static final List getElementNames() { return ALL; } /** * Returns a set containing the {@linkplain Element#getName() names} of all the * block-level elements. *

* The element names contained in this set are:
* {@link HTMLElementName#ADDRESS ADDRESS}, {@link HTMLElementName#BLOCKQUOTE BLOCKQUOTE}, {@link HTMLElementName#CENTER CENTER}, {@link HTMLElementName#DIR DIR}, {@link HTMLElementName#DIV DIV}, * {@link HTMLElementName#DL DL}, {@link HTMLElementName#FIELDSET FIELDSET}, {@link HTMLElementName#FORM FORM}, * {@link HTMLElementName#H1 H1}, {@link HTMLElementName#H2 H2}, {@link HTMLElementName#H3 H3}, {@link HTMLElementName#H4 H4}, {@link HTMLElementName#H5 H5}, {@link HTMLElementName#H6 H6}, * {@link HTMLElementName#HR HR}, {@link HTMLElementName#ISINDEX ISINDEX}, {@link HTMLElementName#MENU MENU}, {@link HTMLElementName#NOFRAMES NOFRAMES}, {@link HTMLElementName#NOSCRIPT NOSCRIPT}, * {@link HTMLElementName#OL OL}, {@link HTMLElementName#P P}, {@link HTMLElementName#PRE PRE}, {@link HTMLElementName#TABLE TABLE}, {@link HTMLElementName#UL UL} *

* This set is defined in the HTML 4.01 Transitional DTD, * but more detailed information can be found in the * HTML 4.01 specification section 7.5.3 - Block-level and inline elements * and the CSS2 specification section 9.2.1 - Block-level elements and block boxes. *

* The CSS2 display property can be used * to override the normal box type of an element. * * @return a set containing the {@linkplain Element#getName() names} of all the block-level elements. * @see #getInlineLevelElementNames() */ public static Set getBlockLevelElementNames() { return BLOCK; } /** * Returns a set containing the {@linkplain Element#getName() names} of all the * inline-level elements. *

* The element names contained in this set are:
* {@link HTMLElementName#A A}, {@link HTMLElementName#ABBR ABBR}, {@link HTMLElementName#ACRONYM ACRONYM}, {@link HTMLElementName#APPLET APPLET}, {@link HTMLElementName#B B}, {@link HTMLElementName#BASEFONT BASEFONT}, * {@link HTMLElementName#BDO BDO}, {@link HTMLElementName#BIG BIG}, {@link HTMLElementName#BR BR}, {@link HTMLElementName#BUTTON BUTTON}, {@link HTMLElementName#CITE CITE}, {@link HTMLElementName#CODE CODE}, * {@link HTMLElementName#DEL DEL}, {@link HTMLElementName#DFN DFN}, {@link HTMLElementName#EM EM}, {@link HTMLElementName#FONT FONT}, {@link HTMLElementName#I I}, {@link HTMLElementName#IFRAME IFRAME}, {@link HTMLElementName#IMG IMG}, * {@link HTMLElementName#INPUT INPUT}, {@link HTMLElementName#INS INS}, {@link HTMLElementName#KBD KBD}, {@link HTMLElementName#LABEL LABEL}, {@link HTMLElementName#MAP MAP}, {@link HTMLElementName#OBJECT OBJECT}, * {@link HTMLElementName#Q Q}, {@link HTMLElementName#S S}, {@link HTMLElementName#SAMP SAMP}, {@link HTMLElementName#SCRIPT SCRIPT}, {@link HTMLElementName#SELECT SELECT}, {@link HTMLElementName#SMALL SMALL}, * {@link HTMLElementName#SPAN SPAN}, {@link HTMLElementName#STRIKE STRIKE}, {@link HTMLElementName#STRONG STRONG}, {@link HTMLElementName#SUB SUB}, {@link HTMLElementName#SUP SUP}, {@link HTMLElementName#TEXTAREA TEXTAREA}, * {@link HTMLElementName#TT TT}, {@link HTMLElementName#U U}, {@link HTMLElementName#VAR VAR} *

* This set is defined in the HTML 4.01 Transitional DTD, * but more detailed information can be found in the * HTML 4.01 specification section 7.5.3 - Block-level and inline elements * and the CSS2 specification section 9.2.2 - Inline-level elements and inline boxes. *

* The CSS2 display property can be used * to override the normal box type of an element. *

* The HTML Document Type Definitions * forbid the presence of {@linkplain #getBlockLevelElementNames() block-level elements} inside inline-level elements, * but it is tolerated by all popular browsers in various situations, even in XHTML documents. * The most notorious example of this is the common inclusion of block-level elements inside {@link HTMLElementName#FONT FONT} elements. * * @return a set containing the {@linkplain Element#getName() names} of all the inline-level elements. * @see #getBlockLevelElementNames() */ public static Set getInlineLevelElementNames() { return INLINE; } /** * Returns a set containing the {@linkplain Element#getName() names} of all * deprecated elements in HTML 4.01. * @return a set containing the {@linkplain Element#getName() names} of all deprecated elements in HTML 4.01. */ public static Set getDeprecatedElementNames() { return DEPRECATED; } /** * Returns a set containing the {@linkplain Element#getName() names} of all of the HTML elements * for which the {@linkplain Element#getEndTag() end tag} is forbidden. *

* See the element parsing rules for HTML elements with forbidden end tags * for more information. *

* The index of elements in the HTML 4.01 specification * includes the letter 'F' in the "End Tag" column for elements whose end tag is forbidden. * * @return a set containing the {@linkplain Element#getName() names} of all of the HTML elements for which the {@linkplain Element#getEndTag() end tag} is forbidden. * @see #getEndTagOptionalElementNames() * @see #getEndTagRequiredElementNames() */ public static Set getEndTagForbiddenElementNames() { return END_TAG_FORBIDDEN_SET; } /** * Returns a set containing the {@linkplain Element#getName() names} of all of the HTML elements * for which the {@linkplain Element#getEndTag() end tag} is optional. *

* Elements with these names may be implicitly terminated by a subsequent * {@linkplain #getTerminatingStartTagNames(String) terminating start tag} or * {@linkplain #getTerminatingEndTagNames(String) terminating end tag}. * A list of the these terminating tags, and the names of {@linkplain #getNonterminatingElementNames(String) non-terminating elements} * that can be nested within the element, can be found in the documentation of each relevant element in the {@link HTMLElementName} class. *

* See the element parsing rules for HTML elements with optional end tags * for more information. *

* The index of elements in the HTML 4.01 specification * includes the letter 'O' in the "End Tag" column for elements whose end tag is optional. * * @return a set containing the {@linkplain Element#getName() names} of all of the HTML elements for which the {@linkplain Element#getEndTag() end tag} is optional. * @see #getEndTagForbiddenElementNames() * @see #getEndTagRequiredElementNames() */ public static Set getEndTagOptionalElementNames() { return END_TAG_OPTIONAL_SET; } /** * Returns a set containing the {@linkplain Element#getName() names} of all of the HTML elements * for which the {@linkplain Element#getEndTag() end tag} is required. *

* See the element parsing rules for HTML elements with required end tags * for more information. *

* The index of elements in the HTML 4.01 specification * leaves the "End Tag" column blank for elements whose end tag is required. * * @return a set containing the {@linkplain Element#getName() names} of all of the HTML elements for which the {@linkplain Element#getEndTag() end tag} is required. * @see #getEndTagForbiddenElementNames() * @see #getEndTagOptionalElementNames() */ public static Set getEndTagRequiredElementNames() { return END_TAG_REQUIRED_SET; } /** * Returns a set containing the {@linkplain Element#getName() names} of all of the HTML elements * for which the {@linkplain Element#getStartTag() start tag} is optional. *

* Elements with optional start tags must be present in the document object model (DOM) * in certain locations, either forming part of the structure of the HTML document as a whole * (e.g. the {@link HTMLElementName#HTML HTML}, {@link HTMLElementName#HEAD HEAD}, and {@link HTMLElementName#BODY BODY} elements), * or forming part of the structure of a {@link HTMLElementName#TABLE TABLE} element (e.g. the {@link HTMLElementName#TBODY TBODY} element). * The location of an omitted start tag * in the document's object model can be inferred from the surrounding elements. *

* This library does not use this property in any way when parsing documents, and does not construct a document object model from the * source, so no implied element is created where an optional start tag is omitted. *

* When the start tag has been omitted in the document text, the corresponding end tag should also be omitted. *

* The index of elements in the HTML 4.01 specification * includes the letter 'O' in the "Start Tag" column for elements whose start tag is optional. * * @return a set containing the {@linkplain Element#getName() names} of all of the HTML elements for which the {@linkplain Element#getStartTag() start tag} is optional. */ public static Set getStartTagOptionalElementNames() { return START_TAG_OPTIONAL_SET; } /** * Returns the {@linkplain StartTag#getName() names} of start tags that implicitly terminate * an HTML element with the specified name. *

* This method is only relevant to HTML elements for which the * {@linkplain #getEndTagOptionalElementNames() end tag is optional}. * It returns null if *
{@link #getEndTagOptionalElementNames()}.contains(endTagOptionalElementName.toLowerCase())==null. * * @param endTagOptionalElementName the {@linkplain Element#getName() name} of an element for which the {@linkplain #getEndTagOptionalElementNames() end tag is optional}. * @return the {@linkplain StartTag#getName() names} of start tags that implicitly terminate an HTML element with the specified name, or null if the name does not identify an element for which the {@linkplain #getEndTagOptionalElementNames() end tag is optional}. * @see #getTerminatingEndTagNames(String endTagOptionalElementName) * @see #getNonterminatingElementNames(String endTagOptionalElementName) */ public static Set getTerminatingStartTagNames(final String endTagOptionalElementName) { final HTMLElementTerminatingTagNameSets terminatingTagNameSets=getTerminatingTagNameSets(endTagOptionalElementName); if (terminatingTagNameSets==null) return null; return terminatingTagNameSets.TerminatingStartTagNameSet; } /** * Returns the {@linkplain EndTag#getName() names} of end tags that implicitly terminate * an HTML element with the specified name. *

* This method is only relevant to HTML elements for which the * {@linkplain #getEndTagOptionalElementNames() end tag is optional}. * It returns null if *
{@link #getEndTagOptionalElementNames()}.contains(endTagOptionalElementName.toLowerCase())==null. *

* Note that removing the tag name matching the specified element has no effect on the behaviour of the parser, * as it is always assumed that a start tag is terminated by an end tag with a matching name. * * @param endTagOptionalElementName the {@linkplain Element#getName() name} of an element for which the {@linkplain #getEndTagOptionalElementNames() end tag is optional}. * @return the {@linkplain StartTag#getName() names} of end tags that implicitly terminate an HTML element with the specified name, or null if the name does not identify an element for which the {@linkplain #getEndTagOptionalElementNames() end tag is optional}. * @see #getTerminatingStartTagNames(String endTagOptionalElementName) * @see #getNonterminatingElementNames(String endTagOptionalElementName) */ public static Set getTerminatingEndTagNames(final String endTagOptionalElementName) { final HTMLElementTerminatingTagNameSets terminatingTagNameSets=getTerminatingTagNameSets(endTagOptionalElementName); if (terminatingTagNameSets==null) return null; return terminatingTagNameSets.TerminatingEndTagNameSet; } /** * Returns the {@linkplain Element#getName() names} of elements that do NOT implicitly terminate * an HTML element with the specified name. * Neither can any tag nested inside any of these elements implicitly terminate the specified element, * even if it is listed as one of the {@linkplain #getTerminatingStartTagNames(String) terminating start tags} or * {@linkplain #getTerminatingEndTagNames(String) terminating end tags}. *

* This method is only relevant to HTML elements for which the * {@linkplain #getEndTagOptionalElementNames() end tag is optional}. * It returns null if *
{@link #getEndTagOptionalElementNames()}.contains(endTagOptionalElementName.toLowerCase())==null. * * @param endTagOptionalElementName the {@linkplain Element#getName() name} of an element for which the {@linkplain #getEndTagOptionalElementNames() end tag is optional}. * @return the {@linkplain Element#getName() names} of elements that do NOT implicitly terminate an HTML element with the specified name, or null if the name does not identify an element for which the {@linkplain #getEndTagOptionalElementNames() end tag is optional}. * @see #getTerminatingStartTagNames(String endTagOptionalElementName) * @see #getTerminatingEndTagNames(String endTagOptionalElementName) */ public static Set getNonterminatingElementNames(final String endTagOptionalElementName) { final HTMLElementTerminatingTagNameSets terminatingTagNameSets=getTerminatingTagNameSets(endTagOptionalElementName); if (terminatingTagNameSets==null) return null; return terminatingTagNameSets.NonterminatingElementNameSet; } /** * Returns a set containing the {@linkplain Element#getName() names} of all of the HTML elements * which should never contain elements of the same name, either as direct or indirect descendants. * * @return a set containing the {@linkplain Element#getName() names} of all of the HTML elements which should never contain elements of the same name. */ public static Set getNestingForbiddenElementNames() { return NESTING_FORBIDDEN_SET; } static final String getConstantElementName(final String elementName) { final String elementNameConstant=CONSTANT_NAME_MAP.get(elementName); return elementNameConstant!=null ? elementNameConstant : elementName; } static final boolean isClosingSlashIgnored(final String elementName) { return CLOSING_SLASH_IGNORED_SET.contains(elementName); } static final HTMLElementTerminatingTagNameSets getTerminatingTagNameSets(final String endTagOptionalElementName) { return TERMINATING_TAG_NAME_SETS_MAP.get(endTagOptionalElementName); } private static HashMap buildTerminatingTagNameSetsMap() { // HTML is included in the NonterminatingElementNameSet of BODY and HTML in case the source contains (illegaly) nested HTML documents final HashMap map=new HashMap(20,1.0F); // 15 entries in total map.put(BODY,new HTMLElementTerminatingTagNameSets(new HTMLElementNameSet(), new HTMLElementNameSet(HTML).union(BODY), new HTMLElementNameSet(HTML))); map.put(COLGROUP,new HTMLElementTerminatingTagNameSets(new HTMLElementNameSet(_THEAD_TBODY_TFOOT_TR).union(COLGROUP), new HTMLElementNameSet(TABLE).union(COLGROUP), new HTMLElementNameSet(TABLE))); map.put(DD,new HTMLElementTerminatingTagNameSets(new HTMLElementNameSet(_DD_DT), new HTMLElementNameSet(DL).union(DD), new HTMLElementNameSet(DL))); map.put(DT,new HTMLElementTerminatingTagNameSets(new HTMLElementNameSet(_DD_DT), new HTMLElementNameSet(DL).union(DT), new HTMLElementNameSet(DL))); map.put(HEAD,new HTMLElementTerminatingTagNameSets(new HTMLElementNameSet(BODY).union(FRAMESET), new HTMLElementNameSet(HTML).union(HEAD), new HTMLElementNameSet())); map.put(HTML,new HTMLElementTerminatingTagNameSets(new HTMLElementNameSet(), new HTMLElementNameSet(HTML), new HTMLElementNameSet(HTML))); map.put(LI,new HTMLElementTerminatingTagNameSets(new HTMLElementNameSet(LI), new HTMLElementNameSet(_UL_OL).union(LI), new HTMLElementNameSet(_UL_OL))); map.put(OPTION,new HTMLElementTerminatingTagNameSets(new HTMLElementNameSet(OPTION).union(OPTGROUP), new HTMLElementNameSet(SELECT).union(OPTION), new HTMLElementNameSet())); map.put(P,new HTMLElementTerminatingTagNameSets(new HTMLElementNameSet(BLOCK).union(_DD_DT).union(TH).union(TD).union(LI), new HTMLElementNameSet(BLOCK).union(_DD_DT).union(BODY).union(HTML).union(_THEAD_TBODY_TFOOT_TR_TD_TH).union(CAPTION).union(LEGEND), new HTMLElementNameSet())); map.put(TBODY,new HTMLElementTerminatingTagNameSets(new HTMLElementNameSet(TBODY).union(TFOOT).union(THEAD), new HTMLElementNameSet(TABLE).union(TBODY), new HTMLElementNameSet(TABLE))); map.put(TD,new HTMLElementTerminatingTagNameSets(new HTMLElementNameSet(_THEAD_TBODY_TFOOT_TR_TD_TH), new HTMLElementNameSet(_THEAD_TBODY_TFOOT_TR).union(TABLE).union(TD), new HTMLElementNameSet(TABLE))); map.put(TFOOT,new HTMLElementTerminatingTagNameSets(new HTMLElementNameSet(TBODY).union(TFOOT).union(THEAD), new HTMLElementNameSet(TABLE).union(TFOOT), new HTMLElementNameSet(TABLE))); map.put(TH,new HTMLElementTerminatingTagNameSets(new HTMLElementNameSet(_THEAD_TBODY_TFOOT_TR_TD_TH), new HTMLElementNameSet(_THEAD_TBODY_TFOOT_TR).union(TABLE).union(TH), new HTMLElementNameSet(TABLE))); map.put(THEAD,new HTMLElementTerminatingTagNameSets(new HTMLElementNameSet(TBODY).union(TFOOT).union(THEAD), new HTMLElementNameSet(TABLE).union(THEAD), new HTMLElementNameSet(TABLE))); map.put(TR,new HTMLElementTerminatingTagNameSets(new HTMLElementNameSet(_THEAD_TBODY_TFOOT_TR), new HTMLElementNameSet(_THEAD_TBODY_TFOOT_TR).union(TABLE), new HTMLElementNameSet(TABLE))); return map; } private static HashMap buildTagMap() { final HashMap map=new HashMap(132,1.0F); // 99 tags total for (String tagName : ALL) map.put(tagName,tagName); map.put(StartTagTypeMarkupDeclaration.ELEMENT,StartTagTypeMarkupDeclaration.ELEMENT); map.put(StartTagTypeMarkupDeclaration.ATTLIST,StartTagTypeMarkupDeclaration.ATTLIST); map.put(StartTagTypeMarkupDeclaration.ENTITY,StartTagTypeMarkupDeclaration.ENTITY); map.put(StartTagTypeMarkupDeclaration.NOTATION,StartTagTypeMarkupDeclaration.NOTATION); map.put(StartTagTypeMicrosoftDownlevelRevealedConditionalComment.IF,StartTagTypeMicrosoftDownlevelRevealedConditionalComment.IF); map.put(StartTagTypeMicrosoftDownlevelRevealedConditionalComment.ENDIF,StartTagTypeMicrosoftDownlevelRevealedConditionalComment.ENDIF); return map; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy