
nu.validator.saxtree.TreeBuilder Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of htmlparser Show documentation
Show all versions of htmlparser Show documentation
The Validator.nu HTML Parser is an implementation of the HTML5 parsing algorithm in Java for applications. The parser is designed to work as a drop-in replacement for the XML parser in applications that already support XHTML 1.x content with an XML parser and use SAX, DOM or XOM to interface with the parser.
/*
* Copyright (c) 2007 Henri Sivonen
* Copyright (c) 2008 Mozilla Foundation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
package nu.validator.saxtree;
import java.util.LinkedList;
import java.util.List;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.ext.LexicalHandler;
/**
* Builds a SAX Tree representation of a document or a fragment
* streamed as ContentHandler
and
* LexicalHandler
events. The start/end event matching
* is expected to adhere to the SAX API contract. Things will
* simply break if this is not the case. Fragments are expected to
* omit startDocument()
and endDocument()
* calls.
*
* @version $Id$
* @author hsivonen
*/
public class TreeBuilder implements ContentHandler, LexicalHandler {
/**
* The locator.
*/
private Locator locator;
/**
* The current node.
*/
private ParentNode current;
/**
* Whether to retain attribute objects.
*/
private final boolean retainAttributes;
/**
* The prefix mappings for the next element to be inserted.
*/
private List prefixMappings;
/**
* Constructs a reusable TreeBuilder
that builds
* Document
s and copies attributes.
*/
public TreeBuilder() {
this(false, false);
}
/**
* The constructor. The instance will be reusabe if building a full
* document and not reusable if building a fragment.
*
* @param fragment whether this TreeBuilder
should build
* a DocumentFragment
instead of a Document
.
* @param retainAttributes whether instances of the Attributes
* interface passed to startElement
should be retained
* (the alternative is copying).
*/
public TreeBuilder(boolean fragment, boolean retainAttributes) {
if (fragment) {
current = new DocumentFragment();
}
this.retainAttributes = retainAttributes;
}
/**
*
* @see org.xml.sax.ContentHandler#characters(char[], int, int)
*/
public void characters(char[] ch, int start, int length) throws SAXException {
current.appendChild(new Characters(locator, ch, start, length));
}
/**
*
* @see org.xml.sax.ContentHandler#endDocument()
*/
public void endDocument() throws SAXException {
current.setEndLocator(locator);
}
/**
*
* @see org.xml.sax.ContentHandler#endElement(java.lang.String, java.lang.String, java.lang.String)
*/
public void endElement(String uri, String localName, String qName) throws SAXException {
current.setEndLocator(locator);
current = current.getParentNode();
}
/**
*
* @see org.xml.sax.ContentHandler#endPrefixMapping(java.lang.String)
*/
public void endPrefixMapping(String prefix) throws SAXException {
}
/**
*
* @see org.xml.sax.ContentHandler#ignorableWhitespace(char[], int, int)
*/
public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException {
current.appendChild(new IgnorableWhitespace(locator, ch, start, length));
}
/**
*
* @see org.xml.sax.ContentHandler#processingInstruction(java.lang.String, java.lang.String)
*/
public void processingInstruction(String target, String data) throws SAXException {
current.appendChild(new ProcessingInstruction(locator, target, data));
}
/**
*
* @see org.xml.sax.ContentHandler#setDocumentLocator(org.xml.sax.Locator)
*/
public void setDocumentLocator(Locator locator) {
this.locator = locator;
}
public void skippedEntity(String name) throws SAXException {
current.appendChild(new SkippedEntity(locator, name));
}
/**
*
* @see org.xml.sax.ContentHandler#startDocument()
*/
public void startDocument() throws SAXException {
current = new Document(locator);
}
/**
*
* @see org.xml.sax.ContentHandler#startElement(java.lang.String, java.lang.String, java.lang.String, org.xml.sax.Attributes)
*/
public void startElement(String uri, String localName, String qName, Attributes atts) throws SAXException {
current = (ParentNode) current.appendChild(new Element(locator, uri, localName, qName, atts, retainAttributes, prefixMappings));
prefixMappings = null;
}
/**
*
* @see org.xml.sax.ContentHandler#startPrefixMapping(java.lang.String, java.lang.String)
*/
public void startPrefixMapping(String prefix, String uri) throws SAXException {
if (prefixMappings == null) {
prefixMappings = new LinkedList();
}
prefixMappings.add(new PrefixMapping(prefix, uri));
}
/**
*
* @see org.xml.sax.ext.LexicalHandler#comment(char[], int, int)
*/
public void comment(char[] ch, int start, int length) throws SAXException {
current.appendChild(new Comment(locator, ch, start, length));
}
/**
*
* @see org.xml.sax.ext.LexicalHandler#endCDATA()
*/
public void endCDATA() throws SAXException {
current.setEndLocator(locator);
current = current.getParentNode();
}
/**
*
* @see org.xml.sax.ext.LexicalHandler#endDTD()
*/
public void endDTD() throws SAXException {
current.setEndLocator(locator);
current = current.getParentNode();
}
/**
*
* @see org.xml.sax.ext.LexicalHandler#endEntity(java.lang.String)
*/
public void endEntity(String name) throws SAXException {
current.setEndLocator(locator);
current = current.getParentNode();
}
/**
*
* @see org.xml.sax.ext.LexicalHandler#startCDATA()
*/
public void startCDATA() throws SAXException {
current = (ParentNode) current.appendChild(new CDATA(locator));
}
/**
*
* @see org.xml.sax.ext.LexicalHandler#startDTD(java.lang.String, java.lang.String, java.lang.String)
*/
public void startDTD(String name, String publicId, String systemId) throws SAXException {
current = (ParentNode) current.appendChild(new DTD(locator, name, publicId, systemId));
}
/**
*
* @see org.xml.sax.ext.LexicalHandler#startEntity(java.lang.String)
*/
public void startEntity(String name) throws SAXException {
current = (ParentNode) current.appendChild(new Entity(locator, name));
}
/**
* Returns the root (Document
if building a full document or
* DocumentFragment
if building a fragment.).
*
* @return the root
*/
public ParentNode getRoot() {
return current;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy