All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.googlecode.html.HTMLTagBalancer Maven / Gradle / Ivy

There is a newer version: 0.63
Show newest version
/*
 * Copyright 2002-2009 Andy Clark, Marc Guillemot
 * 
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 * in compliance with the License. You may obtain a copy of the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software distributed under the License
 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
 * or implied. See the License for the specific language governing permissions and limitations under
 * the License.
 */

package com.googlecode.html;

import com.googlecode.html.HTMLElements.Element;
import com.googlecode.html.filters.NamespaceBinder;
import com.googlecode.html.xercesbridge.XercesBridge;
import org.apache.xerces.util.XMLAttributesImpl;
import org.apache.xerces.xni.*;
import org.apache.xerces.xni.parser.XMLComponentManager;
import org.apache.xerces.xni.parser.XMLConfigurationException;
import org.apache.xerces.xni.parser.XMLDocumentFilter;
import org.apache.xerces.xni.parser.XMLDocumentSource;

import java.util.ArrayList;
import java.util.List;

/**
 * Balances tags in an HTML document. This component receives document events and tries to correct
 * many common mistakes that human (and computer) HTML document authors make. This tag balancer can:
 * 
    *
  • add missing parent elements; *
  • automatically close elements with optional end tags; and *
  • handle mis-matched inline element tags. *
*

* This component recognizes the following features: *

    *
  • http://cyberneko.org/html/features/augmentations *
  • http://cyberneko.org/html/features/report-errors *
  • http://cyberneko.org/html/features/balance-tags/document-fragment *
  • http://cyberneko.org/html/features/balance-tags/ignore-outside-content *
*

* This component recognizes the following properties: *

    *
  • http://cyberneko.org/html/properties/names/elems *
  • http://cyberneko.org/html/properties/names/attrs *
  • http://cyberneko.org/html/properties/error-reporter *
  • http://cyberneko.org/html/properties/balance-tags/current-stack *
* * @author Andy Clark * @author Marc Guillemot * @version $Id: HTMLTagBalancer.java,v 1.20 2005/02/14 04:06:22 andyc Exp $ * @see HTMLElements */ public class HTMLTagBalancer implements XMLDocumentFilter, HTMLComponent { // // Constants // // features /** * Element info for each start element. This information is used when closing unbalanced inline * elements. For example: *

*

     * <i>unbalanced <b>HTML</i> content</b>
     * 
*

* It seems that it is a waste of processing and memory to copy the attributes for every start * element even if there are no unbalanced inline elements in the document. However, if the * attributes are not saved, then important attributes such as style information would * be lost. * * @author Andy Clark */ public static class Info { // // Data // /** * The element attributes. */ public XMLAttributes attributes; /** * The element. */ public HTMLElements.Element element; /** * The element qualified name. */ public QName qname; // // Constructors // /** * Creates an element information object. *

* Note: This constructor makes a copy of the element information. * * @param element The element qualified name. */ public Info(HTMLElements.Element element, QName qname) { this(element, qname, null); } // (HTMLElements.Element,QName) /** * Creates an element information object. *

* Note: This constructor makes a copy of the element information. * * @param element The element qualified name. * @param attributes The element attributes. */ public Info(HTMLElements.Element element, QName qname, XMLAttributes attributes) { this.element = element; this.qname = new QName(qname); if (attributes != null) { int length = attributes.getLength(); if (length > 0) { QName aqname = new QName(); XMLAttributes newattrs = new XMLAttributesImpl(); for (int i = 0; i < length; i++) { attributes.getName(i, aqname); String type = attributes.getType(i); String value = attributes.getValue(i); String nonNormalizedValue = attributes.getNonNormalizedValue(i); boolean specified = attributes.isSpecified(i); newattrs.addAttribute(aqname, type, value); newattrs.setNonNormalizedValue(i, nonNormalizedValue); newattrs.setSpecified(i, specified); } this.attributes = newattrs; } } } // (HTMLElements.Element,QName,XMLAttributes) /** * Simple representation to make debugging easier */ @Override public String toString() { return super.toString() + qname; } } // class Info /** * Unsynchronized stack of element information. */ public static class InfoStack { // // Data // /** * The stack data. */ public Info[] data = new Info[10]; /** * The top of the stack. */ public int top; // // Public methods // /** * Peeks at the top of the stack. */ public Info peek() { return data[top - 1]; } // peek():Info /** * Pops the top item off of the stack. */ public Info pop() { return data[--top]; } // pop():Info /** * Pushes element information onto the stack. */ public void push(Info info) { if (top == data.length) { Info[] newarray = new Info[top + 10]; System.arraycopy(data, 0, newarray, 0, top); data = newarray; } data[top++] = info; } // push(Info) /** * Simple representation to make debugging easier */ @Override public String toString() { final StringBuffer sb = new StringBuffer("InfoStack("); for (int i = top - 1; i >= 0; --i) { sb.append(data[i]); if (i != 0) { sb.append(", "); } } sb.append(")"); return sb.toString(); } } // class InfoStack /** * Structure to hold information about an element placed in buffer to be comsumed later */ static class ElementEntry { private final Augmentations augs_; private final QName name_; ElementEntry(final QName element, final Augmentations augs) { name_ = new QName(element); augs_ = augs == null ? null : new HTMLAugmentations(augs); } } /** * EXPERIMENTAL: may change in next release
* Name of the property holding the stack of elements in which context a document fragment should * be parsed. **/ public static final String FRAGMENT_CONTEXT_STACK = "http://cyberneko.org/html/properties/balance-tags/fragment-context-stack"; /** * Include infoset augmentations. */ protected static final String AUGMENTATIONS = "http://cyberneko.org/html/features/augmentations"; /** * Document fragment balancing only. */ protected static final String DOCUMENT_FRAGMENT = "http://cyberneko.org/html/features/balance-tags/document-fragment"; /** * Document fragment balancing only (deprecated). */ protected static final String DOCUMENT_FRAGMENT_DEPRECATED = "http://cyberneko.org/html/features/document-fragment"; /** * Error reporter. */ protected static final String ERROR_REPORTER = "http://cyberneko.org/html/properties/error-reporter"; // properties /** * Ignore outside content. */ protected static final String IGNORE_OUTSIDE_CONTENT = "http://cyberneko.org/html/features/balance-tags/ignore-outside-content"; /** * Modify HTML attribute names: { "upper", "lower", "default" }. */ protected static final String NAMES_ATTRS = "http://cyberneko.org/html/properties/names/attrs"; /** * Modify HTML element names: { "upper", "lower", "default" }. */ protected static final String NAMES_ELEMS = "http://cyberneko.org/html/properties/names/elems"; /** * Lowercase HTML names. */ protected static final short NAMES_LOWERCASE = 2; /** * Match HTML element names. */ protected static final short NAMES_MATCH = 0; /** * Don't modify HTML names. */ protected static final short NAMES_NO_CHANGE = 0; // modify HTML names /** * Uppercase HTML names. */ protected static final short NAMES_UPPERCASE = 1; /** * Namespaces. */ protected static final String NAMESPACES = "http://xml.org/sax/features/namespaces"; /** * Report errors. */ protected static final String REPORT_ERRORS = "http://cyberneko.org/html/features/report-errors"; /** * Synthesized event info item. */ protected static final HTMLEventInfo SYNTHESIZED_ITEM = new HTMLEventInfo.SynthesizedItem(); // static vars /** * Recognized features. */ private static final String[] RECOGNIZED_FEATURES = { NAMESPACES, AUGMENTATIONS, REPORT_ERRORS, DOCUMENT_FRAGMENT_DEPRECATED, DOCUMENT_FRAGMENT, IGNORE_OUTSIDE_CONTENT,}; // // Data // // features /** * Recognized features defaults. */ private static final Boolean[] RECOGNIZED_FEATURES_DEFAULTS = { null, null, null, null, Boolean.FALSE, Boolean.FALSE,}; /** * Recognized properties. */ private static final String[] RECOGNIZED_PROPERTIES = { NAMES_ELEMS, NAMES_ATTRS, ERROR_REPORTER, FRAGMENT_CONTEXT_STACK,}; /** * Recognized properties defaults. */ private static final Object[] RECOGNIZED_PROPERTIES_DEFAULTS = {null, null, null, null,}; /** * Converts HTML names string value to constant value. * * @see #NAMES_NO_CHANGE * @see #NAMES_LOWERCASE * @see #NAMES_UPPERCASE */ protected static final short getNamesValue(String value) { if (value.equals("lower")) { return NAMES_LOWERCASE; } if (value.equals("upper")) { return NAMES_UPPERCASE; } return NAMES_NO_CHANGE; } // getNamesValue(String):short /** * Modifies the given name based on the specified mode. */ protected static final String modifyName(String name, short mode) { switch (mode) { case NAMES_UPPERCASE: return name.toUpperCase(); case NAMES_LOWERCASE: return name.toLowerCase(); } return name; } // modifyName(String,short):String /** * Allows self closing iframe tags. */ protected boolean fAllowSelfclosingIframe; // properties /** * Include infoset augmentations. */ protected boolean fAugmentations; /** * Document fragment balancing only. */ protected boolean fDocumentFragment; /** * The document handler. */ protected XMLDocumentHandler fDocumentHandler; // connections /** * The document source. */ protected XMLDocumentSource fDocumentSource; /** * The element stack. */ protected final InfoStack fElementStack = new InfoStack(); // state /** * Error reporter. */ protected HTMLErrorReporter fErrorReporter; /** * Ignore outside content. */ protected boolean fIgnoreOutsideContent; /** * The inline stack. */ protected final InfoStack fInlineStack = new InfoStack(); /** * Modify HTML attribute names. */ protected short fNamesAttrs; /** * Modify HTML element names. */ protected short fNamesElems; /** * Namespaces. */ protected boolean fNamespaces; /** * True if a form is in the stack (allow to discard opening of nested forms) */ protected boolean fOpenedForm; /** * Report errors. */ protected boolean fReportErrors; /** * True if seen anything. Important for xml declaration. */ protected boolean fSeenAnything; // temp vars /** * True if seen <body< element. */ protected boolean fSeenBodyElement; /** * True if root element has been seen. */ protected boolean fSeenDoctype; /** * True if seen <head< element. */ protected boolean fSeenHeadElement; /** * True if root element has been seen. */ protected boolean fSeenRootElement; /** * True if seen the end of the document element. In other words, this variable is set to false * until the end </HTML> tag is seen (or synthesized). This is used to ensure that * extraneous events after the end of the document element do not make the document stream * ill-formed. */ protected boolean fSeenRootElementEnd; protected HTMLTagBalancingListener tagBalancingListener; private List/* ElementEntry */endElementsBuffer_ = new ArrayList(); /** * Empty attributes. */ private final XMLAttributes fEmptyAttrs = new XMLAttributesImpl(); /** * Augmentations. */ private final HTMLAugmentations fInfosetAugs = new HTMLAugmentations(); private boolean forcedEndElement_ = false; // // HTMLComponent methods // private boolean forcedStartElement_ = false; /** * A qualified name. */ private final QName fQName = new QName(); // // XMLComponent methods // /** * Stack of elements determining the context in which a document fragment should be parsed */ private QName[] fragmentContextStack_ = null; private int fragmentContextStackSize_ = 0; // not 0 only when a fragment is // parsed and // fragmentContextStack_ is set private LostText lostText_ = new LostText(); /** * Characters. */ public void characters(final XMLString text, final Augmentations augs) throws XNIException { // check for end of document if (fSeenRootElementEnd) { return; } if (fElementStack.top == 0 && !fDocumentFragment) { // character before first opening tag lostText_.add(text, augs); return; } // is this text whitespace? boolean whitespace = true; for (int i = 0; i < text.length; i++) { if (!Character.isWhitespace(text.ch[text.offset + i])) { whitespace = false; break; } } if (!fDocumentFragment) { // handle bare characters if (!fSeenRootElement) { if (whitespace) { return; } forceStartBody(); } if (whitespace && (fElementStack.top < 2 || endElementsBuffer_.size() == 1)) { // ignore spaces directly within return; } // handle character content in head // NOTE: This frequently happens when the document looks like: // Title // And here's some text. else if (!whitespace) { Info info = fElementStack.peek(); if (info.element.code == HTMLElements.HEAD || info.element.code == HTMLElements.HTML) { String hname = modifyName("head", fNamesElems); String bname = modifyName("body", fNamesElems); if (fReportErrors) { fErrorReporter.reportWarning("HTML2009", new Object[]{hname, bname}); } forceStartBody(); } } } // call handler if (fDocumentHandler != null) { fDocumentHandler.characters(text, augs); } } // characters(XMLString,Augmentations) /** * Comment. */ public void comment(XMLString text, Augmentations augs) throws XNIException { fSeenAnything = true; consumeEarlyTextIfNeeded(); if (fDocumentHandler != null) { fDocumentHandler.comment(text, augs); } } // comment(XMLString,Augmentations) // // XMLDocumentSource methods // /** * Doctype declaration. */ public void doctypeDecl(String rootElementName, String publicId, String systemId, Augmentations augs) throws XNIException { fSeenAnything = true; if (fReportErrors) { if (fSeenRootElement) { fErrorReporter.reportError("HTML2010", null); } else if (fSeenDoctype) { fErrorReporter.reportError("HTML2011", null); } } if (!fSeenRootElement && !fSeenDoctype) { fSeenDoctype = true; if (fDocumentHandler != null) { fDocumentHandler.doctypeDecl(rootElementName, publicId, systemId, augs); } } } // doctypeDecl(String,String,String,Augmentations) // @since Xerces 2.1.0 /** * Empty element. */ public void emptyElement(final QName element, XMLAttributes attrs, Augmentations augs) throws XNIException { startElement(element, attrs, augs); // browser ignore the closing indication for non empty tags like

but not for unknown element final HTMLElements.Element elem = getElement(element); if (elem.isEmpty() || elem.code == HTMLElements.UNKNOWN || elem.code == HTMLElements.IFRAME && fAllowSelfclosingIframe) { endElement(element, augs); } } // emptyElement(QName,XMLAttributes,Augmentations) // // XMLDocumentHandler methods // // since Xerces-J 2.2.0 /** * End CDATA section. */ public void endCDATA(Augmentations augs) throws XNIException { // check for end of document if (fSeenRootElementEnd) { return; } // call handler if (fDocumentHandler != null) { fDocumentHandler.endCDATA(augs); } } // endCDATA(Augmentations) // old methods /** * End document. */ public void endDocument(Augmentations augs) throws XNIException { // and have been buffered to consider outside content fIgnoreOutsideContent = true; // endElement should not ignore the elements // passed from buffer consumeBufferedEndElements(); // handle empty document if (!fSeenRootElement && !fDocumentFragment) { if (fReportErrors) { fErrorReporter.reportError("HTML2000", null); } if (fDocumentHandler != null) { fSeenRootElementEnd = false; forceStartBody(); // will force and final String body = modifyName("body", fNamesElems); fQName.setValues(null, body, body, null); callEndElement(fQName, synthesizedAugs()); final String ename = modifyName("html", fNamesElems); fQName.setValues(null, ename, ename, null); callEndElement(fQName, synthesizedAugs()); } } // pop all remaining elements else { int length = fElementStack.top - fragmentContextStackSize_; for (int i = 0; i < length; i++) { Info info = fElementStack.pop(); if (fReportErrors) { String ename = info.qname.rawname; fErrorReporter.reportWarning("HTML2001", new Object[]{ename}); } if (fDocumentHandler != null) { callEndElement(info.qname, synthesizedAugs()); } } } // call handler if (fDocumentHandler != null) { fDocumentHandler.endDocument(augs); } } // endDocument(Augmentations) /** * End element. */ public void endElement(final QName element, final Augmentations augs) throws XNIException { final boolean forcedEndElement = forcedEndElement_; // is there anything to do? if (fSeenRootElementEnd) { notifyDiscardedEndElement(element, augs); return; } // get element information HTMLElements.Element elem = getElement(element); // if we consider outside content, just buffer and to // consider them at the very end if (!fIgnoreOutsideContent && (elem.code == HTMLElements.BODY || elem.code == HTMLElements.HTML)) { endElementsBuffer_.add(new ElementEntry(element, augs)); return; } // check for end of document if (elem.code == HTMLElements.HTML) { fSeenRootElementEnd = true; } else if (elem.code == HTMLElements.FORM) { fOpenedForm = false; } else if (elem.code == HTMLElements.HEAD && !forcedEndElement) { // consume first when is reached to retrieve content // lost between and endElementsBuffer_.add(new ElementEntry(element, augs)); return; } // empty element int depth = getElementDepth(elem); if (depth == -1) { if (elem.code == HTMLElements.P) { forceStartElement(element, emptyAttributes(), synthesizedAugs()); endElement(element, augs); } else if (!elem.isEmpty()) { notifyDiscardedEndElement(element, augs); } return; } // find unbalanced inline elements if (depth > 1 && elem.isInline()) { final int size = fElementStack.top; fInlineStack.top = 0; for (int i = 0; i < depth - 1; i++) { final Info info = fElementStack.data[size - i - 1]; final HTMLElements.Element pelem = info.element; if (pelem.isInline() || pelem.code == HTMLElements.FONT) { // TODO: // investigate // if // only // FONT // NOTE: I don't have to make a copy of the info because // it will just be popped off of the element stack // as soon as we close it, anyway. fInlineStack.push(info); } } } // close children up to appropriate element for (int i = 0; i < depth; i++) { Info info = fElementStack.pop(); if (fReportErrors && i < depth - 1) { String ename = modifyName(element.rawname, fNamesElems); String iname = info.qname.rawname; fErrorReporter.reportWarning("HTML2007", new Object[]{ename, iname}); } if (fDocumentHandler != null) { // PATCH: Marc-André Morissette callEndElement(info.qname, i < depth - 1 ? synthesizedAugs() : augs); } } // re-open inline elements if (depth > 1) { int size = fInlineStack.top; for (int i = 0; i < size; i++) { Info info = fInlineStack.pop(); XMLAttributes attributes = info.attributes; if (fReportErrors) { String iname = info.qname.rawname; fErrorReporter.reportWarning("HTML2008", new Object[]{iname}); } forceStartElement(info.qname, attributes, synthesizedAugs()); } } } // endElement(QName,Augmentations) /** * End entity. */ public void endGeneralEntity(String name, Augmentations augs) throws XNIException { // check for end of document if (fSeenRootElementEnd) { return; } // call handler if (fDocumentHandler != null) { fDocumentHandler.endGeneralEntity(name, augs); } } // endGeneralEntity(String,Augmentations) /** * End prefix mapping. */ public void endPrefixMapping(String prefix, Augmentations augs) throws XNIException { // check for end of document if (fSeenRootElementEnd) { return; } // call handler if (fDocumentHandler != null) { XercesBridge.getInstance().XMLDocumentHandler_endPrefixMapping(fDocumentHandler, prefix, augs); } } // endPrefixMapping(String,Augmentations) /** * Returns the document handler. */ public XMLDocumentHandler getDocumentHandler() { return fDocumentHandler; } // getDocumentHandler():XMLDocumentHandler /** * Returns the document source. */ public XMLDocumentSource getDocumentSource() { return fDocumentSource; } // getDocumentSource():XMLDocumentSource /** * Returns the default state for a feature. */ public Boolean getFeatureDefault(String featureId) { int length = RECOGNIZED_FEATURES != null ? RECOGNIZED_FEATURES.length : 0; for (int i = 0; i < length; i++) { if (RECOGNIZED_FEATURES[i].equals(featureId)) { return RECOGNIZED_FEATURES_DEFAULTS[i]; } } return null; } // getFeatureDefault(String):Boolean /** * Returns the default state for a property. */ public Object getPropertyDefault(String propertyId) { int length = RECOGNIZED_PROPERTIES != null ? RECOGNIZED_PROPERTIES.length : 0; for (int i = 0; i < length; i++) { if (RECOGNIZED_PROPERTIES[i].equals(propertyId)) { return RECOGNIZED_PROPERTIES_DEFAULTS[i]; } } return null; } // getPropertyDefault(String):Object /** * Returns recognized features. */ public String[] getRecognizedFeatures() { return RECOGNIZED_FEATURES; } // getRecognizedFeatures():String[] /** * Returns recognized properties. */ public String[] getRecognizedProperties() { return RECOGNIZED_PROPERTIES; } // getRecognizedProperties():String[] /** * Ignorable whitespace. */ public void ignorableWhitespace(XMLString text, Augmentations augs) throws XNIException { characters(text, augs); } // ignorableWhitespace(XMLString,Augmentations) /** * Processing instruction. */ public void processingInstruction(String target, XMLString data, Augmentations augs) throws XNIException { fSeenAnything = true; consumeEarlyTextIfNeeded(); if (fDocumentHandler != null) { fDocumentHandler.processingInstruction(target, data, augs); } } // processingInstruction(String,XMLString,Augmentations) /** * Resets the component. */ public void reset(final XMLComponentManager manager) throws XMLConfigurationException { // get features fNamespaces = manager.getFeature(NAMESPACES); fAugmentations = manager.getFeature(AUGMENTATIONS); fReportErrors = manager.getFeature(REPORT_ERRORS); fDocumentFragment = manager.getFeature(DOCUMENT_FRAGMENT) || manager.getFeature(DOCUMENT_FRAGMENT_DEPRECATED); fIgnoreOutsideContent = manager.getFeature(IGNORE_OUTSIDE_CONTENT); fAllowSelfclosingIframe = manager.getFeature(HTMLScanner.ALLOW_SELFCLOSING_IFRAME); // get properties fNamesElems = getNamesValue(String.valueOf(manager.getProperty(NAMES_ELEMS))); fNamesAttrs = getNamesValue(String.valueOf(manager.getProperty(NAMES_ATTRS))); fErrorReporter = (HTMLErrorReporter) manager.getProperty(ERROR_REPORTER); fragmentContextStack_ = (QName[]) manager.getProperty(FRAGMENT_CONTEXT_STACK); } // reset(XMLComponentManager) /** * Sets the document handler. */ public void setDocumentHandler(XMLDocumentHandler handler) { fDocumentHandler = handler; } // setDocumentHandler(XMLDocumentHandler) /** * Sets the document source. */ public void setDocumentSource(XMLDocumentSource source) { fDocumentSource = source; } // setDocumentSource(XMLDocumentSource) /** * Sets a feature. */ public void setFeature(String featureId, boolean state) throws XMLConfigurationException { if (featureId.equals(AUGMENTATIONS)) { fAugmentations = state; return; } if (featureId.equals(REPORT_ERRORS)) { fReportErrors = state; return; } if (featureId.equals(IGNORE_OUTSIDE_CONTENT)) { fIgnoreOutsideContent = state; return; } } // setFeature(String,boolean) /** * Sets a property. */ public void setProperty(String propertyId, Object value) throws XMLConfigurationException { if (propertyId.equals(NAMES_ELEMS)) { fNamesElems = getNamesValue(String.valueOf(value)); return; } if (propertyId.equals(NAMES_ATTRS)) { fNamesAttrs = getNamesValue(String.valueOf(value)); return; } } // setProperty(String,Object) /** * Start CDATA section. */ public void startCDATA(Augmentations augs) throws XNIException { fSeenAnything = true; consumeEarlyTextIfNeeded(); // check for end of document if (fSeenRootElementEnd) { return; } // call handler if (fDocumentHandler != null) { fDocumentHandler.startCDATA(augs); } } // startCDATA(Augmentations) /** * Start document. */ public void startDocument(XMLLocator locator, String encoding, Augmentations augs) throws XNIException { startDocument(locator, encoding, null, augs); } // startDocument(XMLLocator,String,Augmentations) /** * Start document. */ public void startDocument(XMLLocator locator, String encoding, NamespaceContext nscontext, Augmentations augs) throws XNIException { // reset state fElementStack.top = 0; if (fragmentContextStack_ != null) { fragmentContextStackSize_ = fragmentContextStack_.length; for (int i = 0; i < fragmentContextStack_.length; ++i) { final QName name = fragmentContextStack_[i]; final Element elt = HTMLElements.getElement(name.localpart); fElementStack.push(new Info(elt, name)); } } else { fragmentContextStackSize_ = 0; } fSeenAnything = false; fSeenDoctype = false; fSeenRootElement = false; fSeenRootElementEnd = false; fSeenHeadElement = false; fSeenBodyElement = false; // pass on event if (fDocumentHandler != null) { XercesBridge.getInstance().XMLDocumentHandler_startDocument(fDocumentHandler, locator, encoding, nscontext, augs); } } // startDocument(XMLLocator,String,Augmentations) // @since Xerces 2.1.0 /** * Start element. */ public void startElement(final QName elem, XMLAttributes attrs, final Augmentations augs) throws XNIException { fSeenAnything = true; final boolean isForcedCreation = forcedStartElement_; forcedStartElement_ = false; // check for end of document if (fSeenRootElementEnd) { notifyDiscardedStartElement(elem, attrs, augs); return; } // get element information final HTMLElements.Element element = getElement(elem); final short elementCode = element.code; // the creation of some elements like TABLE or SELECT can't be forced. Any // others? if (isForcedCreation && (elementCode == HTMLElements.TABLE || elementCode == HTMLElements.SELECT)) { return; // don't accept creation } // ignore multiple html, head, body elements if (fSeenRootElement && elementCode == HTMLElements.HTML) { notifyDiscardedStartElement(elem, attrs, augs); return; } if (elementCode == HTMLElements.HEAD) { if (fSeenHeadElement) { notifyDiscardedStartElement(elem, attrs, augs); return; } fSeenHeadElement = true; } else if (elementCode == HTMLElements.FRAMESET) { consumeBufferedEndElements(); // (if any) has been buffered } else if (elementCode == HTMLElements.BODY) { // create if none was present if (!fSeenHeadElement) { final QName head = createQName("head"); forceStartElement(head, null, synthesizedAugs()); endElement(head, synthesizedAugs()); } consumeBufferedEndElements(); // (if any) has been buffered if (fSeenBodyElement) { notifyDiscardedStartElement(elem, attrs, augs); return; } fSeenBodyElement = true; } else if (elementCode == HTMLElements.FORM) { if (fOpenedForm) { notifyDiscardedStartElement(elem, attrs, augs); return; } fOpenedForm = true; } else if (elementCode == HTMLElements.UNKNOWN) { consumeBufferedEndElements(); } // if block element, save immediate parent inline elements int depth = 0; if (element.flags == 0) { int length = fElementStack.top; fInlineStack.top = 0; for (int i = length - 1; i >= 0; i--) { Info info = fElementStack.data[i]; if (!info.element.isInline()) { break; } fInlineStack.push(info); endElement(info.qname, synthesizedAugs()); } depth = fInlineStack.top; } // close previous elements // all elements close a