org.w3c.tidy.Node Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of jtidy Show documentation
JTidy is a Java port of HTML Tidy, a HTML syntax checker and pretty printer. Like its non-Java cousin, JTidy can be used as a tool for cleaning up malformed and faulty HTML. In addition, JTidy provides a DOM interface to the document that is being processed, which effectively makes you able to use JTidy as a DOM parser for real-world HTML.
There is a newer version: 1.0.5
Show newest version
/*
 *  Java HTML Tidy - JTidy
 *  HTML parser and pretty printer
 *
 *  Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
 *  Institute of Technology, Institut National de Recherche en
 *  Informatique et en Automatique, Keio University). All Rights
 *  Reserved.
 *
 *  Contributing Author(s):
 *
 *     Dave Raggett 
 *     Andy Quick  (translation to Java)
 *     Gary L Peskin  (Java development)
 *     Sami Lempinen  (release management)
 *     Fabrizio Giustina 
 *
 *  The contributing author(s) would like to thank all those who
 *  helped with testing, bug fixes, and patience.  This wouldn't
 *  have been possible without all of you.
 *
 *  COPYRIGHT NOTICE:
 * 
 *  This software and documentation is provided "as is," and
 *  the copyright holders and contributing author(s) make no
 *  representations or warranties, express or implied, including
 *  but not limited to, warranties of merchantability or fitness
 *  for any particular purpose or that the use of the software or
 *  documentation will not infringe any third party patents,
 *  copyrights, trademarks or other rights. 
 *
 *  The copyright holders and contributing author(s) will not be
 *  liable for any direct, indirect, special or consequential damages
 *  arising out of any use of the software or documentation, even if
 *  advised of the possibility of such damage.
 *
 *  Permission is hereby granted to use, copy, modify, and distribute
 *  this source code, or portions hereof, documentation and executables,
 *  for any purpose, without fee, subject to the following restrictions:
 *
 *  1. The origin of this source code must not be misrepresented.
 *  2. Altered versions must be plainly marked as such and must
 *     not be misrepresented as being the original source.
 *  3. This Copyright notice may not be removed or altered from any
 *     source or altered source distribution.
 * 
 *  The copyright holders and contributing author(s) specifically
 *  permit, without fee, and encourage the use of this source code
 *  as a component for supporting the Hypertext Markup Language in
 *  commercial products. If you use this source code in a product,
 *  acknowledgment is not required but would be appreciated.
 *
 */
package org.w3c.tidy;

/**
 * Used for elements and text nodes element name is null for text nodes start and end are offsets into lexbuf which
 * contains the textual content of all elements in the parse tree. Parent and content allow traversal of the parse tree
 * in any direction. attributes are represented as a linked list of AttVal nodes which hold the strings for
 * attribute/value pairs.
 * @author Dave Raggett [email protected] 
 * @author Andy Quick [email protected]  (translation to Java)
 * @author Fabrizio Giustina
 * @version $Revision$ ($Author$)
 */
public class Node
{

    /**
     * node type: root.
     */
    public static final short ROOT_NODE = 0;

    /**
     * node type: doctype.
     */
    public static final short DOCTYPE_TAG = 1;

    /**
     * node type: comment.
     */
    public static final short COMMENT_TAG = 2;

    /**
     * node type: .
     */
    public static final short PROC_INS_TAG = 3;

    /**
     * node type: text.
     */
    public static final short TEXT_NODE = 4;

    /**
     * Start tag.
     */
    public static final short START_TAG = 5;

    /**
     * End tag.
     */
    public static final short END_TAG = 6;

    /**
     * Start of an end tag.
     */
    public static final short START_END_TAG = 7;

    /**
     * node type: CDATA.
     */
    public static final short CDATA_TAG = 8;

    /**
     * node type: section tag.
     */
    public static final short SECTION_TAG = 9;

    /**
     * node type: asp tag.
     */
    public static final short ASP_TAG = 10;

    /**
     * node type: jste tag.
     */
    public static final short JSTE_TAG = 11;

    /**
     * node type: php tag.
     */
    public static final short PHP_TAG = 12;

    /**
     * node type: doctype.
     */
    public static final short XML_DECL = 13;

    /**
     * Description for all the node types. Used in toString.
     */
    private static final String[] NODETYPE_STRING = {
        "RootNode",
        "DocTypeTag",
        "CommentTag",
        "ProcInsTag",
        "TextNode",
        "StartTag",
        "EndTag",
        "StartEndTag",
        "SectionTag",
        "AspTag",
        "PhpTag",
        "XmlDecl"};

    /**
     * parent node.
     */
    protected Node parent;

    /**
     * pevious node.
     */
    protected Node prev;

    /**
     * next node.
     */
    protected Node next;

    /**
     * last node.
     */
    protected Node last;

    /**
     * start of span onto text array.
     */
    protected int start;

    /**
     * end of span onto text array.
     */
    protected int end;

    /**
     * the text array.
     */
    protected byte[] textarray;

    /**
     * TextNode, StartTag, EndTag etc.
     */
    protected short type;

    /**
     * true if closed by explicit end tag.
     */
    protected boolean closed;

    /**
     * true if inferred.
     */
    protected boolean implicit;

    /**
     * true if followed by a line break.
     */
    protected boolean linebreak;

    /**
     * old tag when it was changed.
     */
    protected Dict was;

    /**
     * tag's dictionary definition.
     */
    protected Dict tag;

    /**
     * Tag name.
     */
    protected String element;

    /**
     * Attribute/Value linked list.
     */
    protected AttVal attributes;

    /**
     * Contained node.
     */
    protected Node content;

    /**
     * DOM adapter.
     */
    protected org.w3c.dom.Node adapter;

    /**
     * Instantiates a new text node.
     */
    public Node()
    {
        this(TEXT_NODE, null, 0, 0);
    }

    /**
     * Instantiates a new node.
     * @param type node type: Node.ROOT_NODE | Node.DOCTYPE_TAG | Node.COMMENT_TAG | Node.PROC_INS_TAG | Node.TEXT_NODE |
     * Node.START_TAG | Node.END_TAG | Node.START_END_TAG | Node.CDATA_TAG | Node.SECTION_TAG | Node. ASP_TAG |
     * Node.JSTE_TAG | Node.PHP_TAG | Node.XML_DECL
     * @param textarray array of bytes contained in the Node
     * @param start start position
     * @param end end position
     */
    public Node(short type, byte[] textarray, int start, int end)
    {
        this.parent = null;
        this.prev = null;
        this.next = null;
        this.last = null;
        this.start = start;
        this.end = end;
        this.textarray = textarray;
        this.type = type;
        this.closed = false;
        this.implicit = false;
        this.linebreak = false;
        this.was = null;
        this.tag = null;
        this.element = null;
        this.attributes = null;
        this.content = null;
    }

    /**
     * Instantiates a new node.
     * @param type node type: Node.ROOT_NODE | Node.DOCTYPE_TAG | Node.COMMENT_TAG | Node.PROC_INS_TAG | Node.TEXT_NODE |
     * Node.START_TAG | Node.END_TAG | Node.START_END_TAG | Node.CDATA_TAG | Node.SECTION_TAG | Node. ASP_TAG |
     * Node.JSTE_TAG | Node.PHP_TAG | Node.XML_DECL
     * @param textarray array of bytes contained in the Node
     * @param start start position
     * @param end end position
     * @param element tag name
     * @param tt tag table instance
     */
    public Node(short type, byte[] textarray, int start, int end, String element, TagTable tt)
    {
        this.parent = null;
        this.prev = null;
        this.next = null;
        this.last = null;
        this.start = start;
        this.end = end;
        this.textarray = textarray;
        this.type = type;
        this.closed = false;
        this.implicit = false;
        this.linebreak = false;
        this.was = null;
        this.tag = null;
        this.element = element;
        this.attributes = null;
        this.content = null;
        if (type == START_TAG || type == START_END_TAG || type == END_TAG)
        {
            tt.findTag(this);
        }
    }

    /**
     * Returns an attribute with the given name in the current node.
     * @param name attribute name.
     * @return AttVal instance or null if no attribute with the iven name is found
     */
    public AttVal getAttrByName(String name)
    {
        AttVal attr;

        for (attr = this.attributes; attr != null; attr = attr.next)
        {
            if (attr.attribute != null && attr.attribute.equals(name))
            {
                break;
            }
        }

        return attr;
    }

    /**
     * Default method for checking an element's attributes.
     * @param lexer Lexer
     */
    public void checkAttributes(Lexer lexer)
    {
        AttVal attval;

        for (attval = this.attributes; attval != null; attval = attval.next)
        {
            attval.checkAttribute(lexer, this);
        }
    }

    /**
     * The same attribute name can't be used more than once in each element. Discard or join attributes according to
     * configuration.
     * @param lexer Lexer
     */
    public void repairDuplicateAttributes(Lexer lexer)
    {
        AttVal attval;

        for (attval = this.attributes; attval != null;)
        {
            if (attval.asp == null && attval.php == null)
            {
                AttVal current;

                for (current = attval.next; current != null;)
                {
                    if (current.asp == null
                        && current.php == null
                        && attval.attribute != null
                        && attval.attribute.equalsIgnoreCase(current.attribute))
                    {
                        AttVal temp;

                        if ("class".equalsIgnoreCase(current.attribute) && lexer.configuration.joinClasses)
                        {
                            // concatenate classes
                            current.value = current.value + " " + attval.value;

                            temp = attval.next;

                            if (temp.next == null)
                            {
                                current = null;
                            }
                            else
                            {
                                current = current.next;
                            }

                            lexer.report.attrError(lexer, this, attval, Report.JOINING_ATTRIBUTE);

                            removeAttribute(attval);
                            attval = temp;
                        }
                        else if ("style".equalsIgnoreCase(current.attribute) && lexer.configuration.joinStyles)
                        {
                            // concatenate styles

                            // this doesn't handle CSS comments and leading/trailing white-space very well see
                            // http://www.w3.org/TR/css-style-attr

                            int end = current.value.length() - 1;

                            if (current.value.charAt(end) == ';')
                            {
                                // attribute ends with declaration seperator
                                current.value = current.value + " " + attval.value;
                            }
                            else if (current.value.charAt(end) == '}')
                            {
                                // attribute ends with rule set
                                current.value = current.value + " { " + attval.value + " }";
                            }
                            else
                            {
                                // attribute ends with property value
                                current.value = current.value + "; " + attval.value;
                            }

                            temp = attval.next;

                            if (temp.next == null)
                            {
                                current = null;
                            }
                            else
                            {
                                current = current.next;
                            }

                            lexer.report.attrError(lexer, this, attval, Report.JOINING_ATTRIBUTE);

                            removeAttribute(attval);
                            attval = temp;

                        }
                        else if (lexer.configuration.duplicateAttrs == Configuration.KEEP_LAST)
                        {
                            temp = current.next;

                            lexer.report.attrError(lexer, this, current, Report.REPEATED_ATTRIBUTE);

                            removeAttribute(current);
                            current = temp;
                        }
                        else
                        {
                            temp = attval.next;

                            if (attval.next == null)
                            {
                                current = null;
                            }
                            else
                            {
                                current = current.next;
                            }

                            lexer.report.attrError(lexer, this, attval, Report.REPEATED_ATTRIBUTE);

                            removeAttribute(attval);
                            attval = temp;
                        }
                    }
                    else
                    {
                        current = current.next;
                    }
                }
                attval = attval.next;
            }
            else
            {
                attval = attval.next;
            }
        }
    }

    /**
     * Adds an attribute to the node.
     * @param name attribute name
     * @param value attribute value
     */
    public void addAttribute(String name, String value)
    {
        AttVal av = new AttVal(null, null, null, null, '"', name, value);
        av.dict = AttributeTable.getDefaultAttributeTable().findAttribute(av);

        if (this.attributes == null)
        {
            this.attributes = av;
        }
        else
        {
            // append to end of attributes
            AttVal here = this.attributes;

            while (here.next != null)
            {
                here = here.next;
            }

            here.next = av;
        }
    }

    /**
     * Remove an attribute from node and then free it.
     * @param attr attribute to remove
     */
    public void removeAttribute(AttVal attr)
    {
        AttVal av;
        AttVal prev = null;
        AttVal next;

        for (av = this.attributes; av != null; av = next)
        {
            next = av.next;

            if (av == attr)
            {
                if (prev != null)
                {
                    prev.next = next;
                }
                else
                {
                    this.attributes = next;
                }
            }
            else
            {
                prev = av;
            }
        }
    }

    /**
     * Find the doctype element.
     * @return doctype node or null if not found
     */
    public Node findDocType()
    {
        Node node = this.content;

        while (node != null && node.type != DOCTYPE_TAG)
        {
            node = node.next;
        }

        return node;
    }

    /**
     * Discard the doctype node.
     */
    public void discardDocType()
    {
        Node node;

        node = findDocType();
        if (node != null)
        {
            if (node.prev != null)
            {
                node.prev.next = node.next;
            }
            else
            {
                node.parent.content = node.next;
            }

            if (node.next != null)
            {
                node.next.prev = node.prev;
            }

            node.next = null;
        }
    }

    /**
     * Remove node from markup tree and discard it.
     * @param element discarded node
     * @return next node
     */
    public static Node discardElement(Node element)
    {
        Node next = null;

        if (element != null)
        {
            next = element.next;
            element.removeNode();
        }

        return next;
    }

    /**
     * Insert a node into markup tree.
     * @param node to insert
     */
    public void insertNodeAtStart(Node node)
    {
        node.parent = this;

        if (this.content == null)
        {
            this.last = node;
        }
        else
        {
            this.content.prev = node; // AQ added 13 Apr 2000
        }

        node.next = this.content;
        node.prev = null;
        this.content = node;
    }

    /**
     * Insert node into markup tree.
     * @param node Node to insert
     */
    public void insertNodeAtEnd(Node node)
    {
        node.parent = this;
        node.prev = this.last;

        if (this.last != null)
        {
            this.last.next = node;
        }
        else
        {
            this.content = node;
        }

        this.last = node;

        if (node.next == this) {
            node.next = null;
        }
    }

    /**
     * Insert node into markup tree in pace of element which is moved to become the child of the node.
     * @param element child node. Will be inserted as a child of element
     * @param node parent node
     */
    public static void insertNodeAsParent(Node element, Node node)
    {
        node.content = element;
        node.last = element;
        node.parent = element.parent;
        element.parent = node;

        if (node.parent.content == element)
        {
            node.parent.content = node;
        }

        if (node.parent.last == element)
        {
            node.parent.last = node;
        }

        node.prev = element.prev;
        element.prev = null;

        if (node.prev != null)
        {
            node.prev.next = node;
        }

        node.next = element.next;
        element.next = null;

        if (node.next != null)
        {
            node.next.prev = node;
        }
    }

    /**
     * Insert node into markup tree before element.
     * @param element child node. Will be insertedbefore element
     * @param node following node
     */
    public static void insertNodeBeforeElement(Node element, Node node)
    {
        Node parent;

        parent = element.parent;
        node.parent = parent;
        node.next = element;
        node.prev = element.prev;
        element.prev = node;

        if (node.prev != null)
        {
            node.prev.next = node;
        }

        if (parent != null && parent.content == element)
        {
            parent.content = node;
        }
    }

    /**
     * Insert node into markup tree after element.
     * @param node new node to insert
     */
    public void insertNodeAfterElement(Node node)
    {
        Node parent;

        parent = this.parent;
        node.parent = parent;

        // AQ - 13Jan2000 fix for parent == null
        if (parent != null && parent.last == this)
        {
            parent.last = node;
        }
        else
        {
            node.next = this.next;
            // AQ - 13Jan2000 fix for node.next == null
            if (node.next != null)
            {
                node.next.prev = node;
            }
        }

        this.next = node;
        node.prev = this;
    }

    /**
     * Trim an empty element.
     * @param lexer Lexer
     * @param element empty node to be removed
     */
    public static void trimEmptyElement(Lexer lexer, Node element)
    {
        // don't trim if user explicitely set trim-empty-elements to false
        // empty element can be needed in css sites
        if (lexer.configuration.trimEmpty)
        {
            TagTable tt = lexer.configuration.tt;

            if (lexer.canPrune(element))
            {
                if (element.type != TEXT_NODE)
                {
                    lexer.report.warning(lexer, element, null, Report.TRIM_EMPTY_ELEMENT);
                }

                discardElement(element);
            }
            else if (element.tag == tt.tagP && element.content == null)
            {
                // replace 
 by 

 to preserve formatting
                Node node = lexer.inferredTag("br");
                Node.coerceNode(lexer, element, tt.tagBr);
                element.insertNodeAfterElement(node);
            }
        }
    }

    /**
     * This maps  hello  world  to  hello  world . If last child of
     * element is a text node then trim trailing white space character moving it to after element's end tag.
     * @param lexer Lexer
     * @param element node
     * @param last last child of element
     */
    public static void trimTrailingSpace(Lexer lexer, Node element, Node last)
    {
        byte c;
        TagTable tt = lexer.configuration.tt;

        if (last != null && last.type == Node.TEXT_NODE)
        {
            if (last.end > last.start)

            {
                c = lexer.lexbuf[last.end - 1];

                if (c == 160 || c == (byte) ' ')
                {
                    // take care with    
                    // fix for [435920]
                    if (c == 160 && (element.tag == tt.tagTd || element.tag == tt.tagTh))
                    {
                        if (last.end > last.start + 1)
                        {
                            last.end -= 1;
                        }
                    }
                    else
                    {
                        last.end -= 1;

                        if (TidyUtils.toBoolean(element.tag.model & Dict.CM_INLINE)
                            && !TidyUtils.toBoolean(element.tag.model & Dict.CM_FIELD))
                        {
                            lexer.insertspace = true;
                        }
                    }
                }
            }
            // if empty string then delete from parse tree
            if (last.start == last.end) // COMMENT_NBSP_FIX: && tag != tag_td && tag != tag_th
            {
                trimEmptyElement(lexer, last);
            }
        }
    }

    /**
     * Escapes the given tag.
     * @param lexer Lexer
     * @param element node to be escaped
     * @return escaped node
     */
    protected static Node escapeTag(Lexer lexer, Node element)
    {
        Node node = lexer.newNode();
        node.start = lexer.lexsize;
        node.textarray = element.textarray; // TODO check it
        lexer.addByte('<');

        if (element.type == END_TAG)
        {
            lexer.addByte('/');
        }

        if (element.element != null)
        {
            lexer.addStringLiteral(element.element);
        }
        else if (element.type == DOCTYPE_TAG)
        {
            int i;

            lexer.addByte('!');
            lexer.addByte('D');
            lexer.addByte('O');
            lexer.addByte('C');
            lexer.addByte('T');
            lexer.addByte('Y');
            lexer.addByte('P');
            lexer.addByte('E');
            lexer.addByte(' ');

            for (i = element.start; i < element.end; ++i)
            {
                lexer.addByte(lexer.lexbuf[i]);
            }
        }

        if (element.type == START_END_TAG)
        {
            lexer.addByte('/');
        }

        lexer.addByte('>');
        node.end = lexer.lexsize;

        return node;
    }

    /**
     * Is the node content empty or blank? Assumes node is a text node.
     * @param lexer Lexer
     * @return true if the node content empty or blank
     */
    public boolean isBlank(Lexer lexer)
    {
        if (this.type == TEXT_NODE)
        {
            if (this.end == this.start)
            {
                return true;
            }
            return this.end == this.start + 1 && lexer.lexbuf[this.end - 1] == ' ';
        }
        return false;
    }

    /**
     * This maps <p> hello <em> world </em> to 
     * <p> hello <em> world </em>.
     * Trims initial space, by moving it before the start tag, or if this element 
     * is the first in parent's content, then by discarding the space.
     * @param lexer Lexer
     * @param element parent node
     * @param text text node
     */
    public static void trimInitialSpace(Lexer lexer, Node element, Node text)
    {
        Node prev, node;

        if (text.type == TEXT_NODE && lexer.lexbuf[text.start] == (byte) ' ' && (text.start < text.end))
        {
            if (TidyUtils.toBoolean(element.tag.model & Dict.CM_INLINE)
                && !TidyUtils.toBoolean(element.tag.model & Dict.CM_FIELD))
            {
                prev = element.prev;

                if (prev != null && prev.textarray != null && prev.type == TEXT_NODE)
                {
                    if (prev.textarray[prev.end - 1] != (byte) ' ')
                    {
                        prev.textarray[prev.end++] = (byte) ' ';
                    }

                    ++element.start;
                }
                else
                {
                    // create new node
                    node = lexer.newNode();
                    node.start = element.start++;
                    node.end = element.start;
                    lexer.lexbuf[node.start] = (byte) ' ';
                    Node.insertNodeBeforeElement(element, node);
                }
            }

            // discard the space in current node
            ++text.start;
        }
    }

    /**
     * Move initial and trailing space out. This routine maps: hello  world  to hello  world  and
     *  hello  world  to  hello  world .
     * @param lexer Lexer
     * @param element Node
     */
    public static void trimSpaces(Lexer lexer, Node element)
    {
        Node text = element.content;
        TagTable tt = lexer.configuration.tt;

        if (text != null && text.type == Node.TEXT_NODE && element.tag != tt.tagPre)
        {
            trimInitialSpace(lexer, element, text);
        }

        text = element.last;

        if (text != null && text.type == Node.TEXT_NODE)
        {
            trimTrailingSpace(lexer, element, text);
        }
    }

    /**
     * Is this node contained in a given tag?
     * @param tag descendant tag
     * @return true if node is contained in tag
     */
    public boolean isDescendantOf(Dict tag)
    {
        Node parent;

        for (parent = this.parent; parent != null; parent = parent.parent)
        {
            if (parent.tag == tag)
            {
                return true;
            }
        }

        return false;
    }

    /**
     * The doctype has been found after other tags, and needs moving to before the html element.
     * @param lexer Lexer
     * @param element document
     * @param doctype doctype node to insert at the beginning of element
     */
    public static void insertDocType(Lexer lexer, Node element, Node doctype)
    {
        TagTable tt = lexer.configuration.tt;

        lexer.report.warning(lexer, element, doctype, Report.DOCTYPE_AFTER_TAGS);

        while (element.tag != tt.tagHtml)
        {
            element = element.parent;
        }

        insertNodeBeforeElement(element, doctype);
    }

    /**
     * Find the body node.
     * @param tt tag table
     * @return body node
     */
    public Node findBody(TagTable tt)
    {
        Node node;

        node = this.content;

        while (node != null && node.tag != tt.tagHtml)
        {
            node = node.next;
        }

        if (node == null)
        {
            return null;
        }

        node = node.content;

        while (node != null && node.tag != tt.tagBody && node.tag != tt.tagFrameset)
        {
            node = node.next;
        }

        if (node!=null && node.tag == tt.tagFrameset)
        {
            node = node.content;

            while (node != null && node.tag != tt.tagNoframes)
            {
                node = node.next;
            }

            if (node != null)
            {
                node = node.content;
                while (node != null && node.tag != tt.tagBody)
                {
                    node = node.next;
                }
            }
        }

        return node;
    }

    /**
     * Is the node an element?
     * @return true if type is START_TAG | START_END_TAG
     */
    public boolean isElement()
    {
        return (this.type == START_TAG || this.type == START_END_TAG);
    }

    /**
     * Unexpected content in table row is moved to just before the table in accordance with Netscape and IE. This code
     * assumes that node hasn't been inserted into the row.
     * @param row Row node
     * @param node Node which should be moved before the table
     * @param tt tag table
     */
    public static void moveBeforeTable(Node row, Node node, TagTable tt)
    {
        Node table;

        /* first find the table element */
        for (table = row.parent; table != null; table = table.parent)
        {
            if (table.tag == tt.tagTable)
            {
                if (table.parent.content == table)
                {
                    table.parent.content = node;
                }

                node.prev = table.prev;
                node.next = table;
                table.prev = node;
                node.parent = table.parent;

                if (node.prev != null)
                {
                    node.prev.next = node;
                }

                break;
            }
        }
    }

    /**
     * If a table row is empty then insert an empty cell.This practice is consistent with browser behavior and avoids
     * potential problems with row spanning cells.
     * @param lexer Lexer
     * @param row row node
     */
    public static void fixEmptyRow(Lexer lexer, Node row)
    {
        Node cell;

        if (row.content == null)
        {
            cell = lexer.inferredTag("td");
            row.insertNodeAtEnd(cell);
            lexer.report.warning(lexer, row, cell, Report.MISSING_STARTTAG);
        }
    }

    /**
     * Coerce a node.
     * @param lexer Lexer
     * @param node Node
     * @param tag tag dictionary reference
     */
    public static void coerceNode(Lexer lexer, Node node, Dict tag)
    {
        Node tmp = lexer.inferredTag(tag.name);
        lexer.report.warning(lexer, node, tmp, Report.OBSOLETE_ELEMENT);
        node.was = node.tag;
        node.tag = tag;
        node.type = START_TAG;
        node.implicit = true;
        node.element = tag.name;
    }

    /**
     * Extract this node and its children from a markup tree.
     */
    public void removeNode()
    {
        if (this.prev != null)
        {
            this.prev.next = this.next;
        }

        if (this.next != null)
        {
            this.next.prev = this.prev;
        }

        if (this.parent != null)
        {
            if (this.parent.content == this)
            {
                this.parent.content = this.next;
            }

            if (this.parent.last == this)
            {
                this.parent.last = this.prev;
            }
        }

        this.parent = null;
        this.prev = null;
        this.next = null;
    }

    /**
     * Insert a node at the end.
     * @param element parent node
     * @param node will be inserted at the end of element
     * @return true if the node has been inserted
     */
    public static boolean insertMisc(Node element, Node node)
    {
        if (node.type == COMMENT_TAG
            || node.type == PROC_INS_TAG
            || node.type == CDATA_TAG
            || node.type == SECTION_TAG
            || node.type == ASP_TAG
            || node.type == JSTE_TAG
            || node.type == PHP_TAG
            || node.type == XML_DECL)
        {
            element.insertNodeAtEnd(node);
            return true;
        }

        return false;
    }

    /**
     * Is this a new (user defined) node? Used to determine how attributes without values should be printed. This was
     * introduced to deal with user defined tags e.g. Cold Fusion.
     * @return true if this node represents a user-defined tag.
     */
    public boolean isNewNode()
    {
        if (this.tag != null)
        {
            return TidyUtils.toBoolean(this.tag.model & Dict.CM_NEW);
        }

        return true;
    }

    /**
     * Does the node have one (and only one) child?
     * @return true if the node has one child
     */
    public boolean hasOneChild()
    {
        return (this.content != null && this.content.next == null);
    }

    /**
     * Find the "html" element.
     * @param tt tag table
     * @return html node
     */
    public Node findHTML(TagTable tt)
    {
        Node node;

        for (node = this.content; node != null && node.tag != tt.tagHtml; node = node.next)
        {
            //
        }

        return node;
    }

    /**
     * Find the head tag.
     * @param tt tag table
     * @return head node
     */
    public Node findHEAD(TagTable tt)
    {
        Node node;

        node = this.findHTML(tt);

        if (node != null)
        {
            for (node = node.content; node != null && node.tag != tt.tagHead; node = node.next)
            {
                //
            }
        }

        return node;
    }
    
    public Node findTITLE(TagTable tt) {
        Node node = findHEAD(tt);
        if (node != null) {
            for (node = node.content; node != null && node.tag != tt.tagTitle; node = node.next) {
            	// do nothing
            }
        }
        return node;
    }

    /**
     * Checks for node integrity.
     * @return false if node is not consistent
     */
    public boolean checkNodeIntegrity()
    {
        Node child;

        if (this.prev != null)
        {
            if (this.prev.next != this)
            {
                return false;
            }
        }

        if (this.next != null)
        {
            if (next == this || this.next.prev != this)
            {
                return false;
            }
        }

        if (this.parent != null)
        {
            if (this.prev == null && this.parent.content != this)
            {
                return false;
            }

            if (this.next == null && this.parent.last != this)
            {
                return false;
            }
        }

        for (child = this.content; child != null; child = child.next)
        {
            if (child.parent != this || !child.checkNodeIntegrity())
            {
                return false;
            }
        }
        return true;
    }

    /**
     * Add a css class to the node. If a class attribute already exists adds the value to the existing attribute.
     * @param classname css class name
     */
    public void addClass(String classname)
    {
        AttVal classattr = this.getAttrByName("class");

        // if there already is a class attribute then append class name after a space
        if (classattr != null)
        {
            classattr.value = classattr.value + " " + classname;
        }
        else
        {
            // create new class attribute
            this.addAttribute("class", classname);
        }
    }

    /**
     * @see java.lang.Object#toString()
     */
    public String toString()
    {
        StringBuilder s = new StringBuilder();
        Node n = this;

        while (n != null)
        {
            s.append("[Node type=");
            s.append(NODETYPE_STRING[n.type]);
            s.append(",element=");
            if (n.element != null)
            {
                s.append(n.element);
            }
            else
            {
                s.append("null");
            }
            if (n.type == TEXT_NODE || n.type == COMMENT_TAG || n.type == PROC_INS_TAG)
            {
                s.append(",text=");
                if (n.textarray != null && n.start <= n.end)
                {
                    s.append("\"");
                    s.append(TidyUtils.getString(n.textarray, n.start, n.end - n.start));
                    s.append("\"");
                }
                else
                {
                    s.append("null");
                }
            }
            s.append(",content=");
            if (n.content != null)
            {
                s.append(n.content.toString());
            }
            else
            {
                s.append("null");
            }
            s.append("]");
            if (n.next != null)
            {
                s.append(",");
            }
            n = n.next;
        }
        return s.toString();
    }

    /**
     * Returns a DOM Node which wrap the current tidy Node.
     * @return org.w3c.dom.Node instance
     */
    protected org.w3c.dom.Node getAdapter()
    {
        if (adapter == null)
        {
            switch (this.type)
            {
                case ROOT_NODE :
                    adapter = new DOMDocumentImpl(this);
                    break;
                case START_TAG :
                case START_END_TAG :
                    adapter = new DOMElementImpl(this);
                    break;
                case DOCTYPE_TAG :
                    adapter = new DOMDocumentTypeImpl(this);
                    break;
                case COMMENT_TAG :
                    adapter = new DOMCommentImpl(this);
                    break;
                case TEXT_NODE :
                    adapter = new DOMTextImpl(this);
                    break;
                case CDATA_TAG :
                    adapter = new DOMCDATASectionImpl(this);
                    break;
                case PROC_INS_TAG :
                    adapter = new DOMProcessingInstructionImpl(this);
                    break;
                default :
                    adapter = new DOMNodeImpl(this);
            }
        }
        return adapter;
    }

    /**
     * Clone this node.
     * @param deep if true deep clone the node (also clones all the contained nodes)
     * @return cloned node
     */
    protected Node cloneNode(boolean deep)
    {
    	Node node = new Node(type, textarray, start, end);
        node.parent = parent;
        node.closed = closed;
        node.implicit = implicit;
        node.tag = tag;
        node.element = element;
        if (attributes != null) {
        	node.attributes = (AttVal) attributes.clone();
        }
        if (deep)
        {
            Node child;
            Node newChild;
            for (child = this.content; child != null; child = child.next)
            {
                newChild = child.cloneNode(deep);
                node.insertNodeAtEnd(newChild);
            }
        }
        return node;
    }

    /**
     * Setter for node type.
     * @param newType a valid node type constant
     */
    protected void setType(short newType)
    {
        this.type = newType;
    }

    /**
     * Used to check script node for script language.
     * @return true if the script node contains javascript
     */
    public boolean isJavaScript()
    {
        boolean result = false;
        AttVal attr;

        if (this.attributes == null)
        {
            return true;
        }

        for (attr = this.attributes; attr != null; attr = attr.next)
        {
            if (("language".equalsIgnoreCase(attr.attribute) || "type".equalsIgnoreCase(attr.attribute))
                && attr.value.toLowerCase().contains("javascript"))
            {
                result = true;
            }
        }

        return result;
    }

    /**
     * Does the node expect contents?
     * @return false if this node should be empty
     */
    public boolean expectsContent()
    {
        if (this.type != Node.START_TAG)
        {
            return false;
        }

        // unknown element?
        if (this.tag == null)
        {
            return true;
        }

        return !TidyUtils.toBoolean(this.tag.model & Dict.CM_EMPTY);
    }
}