org.w3c.tidy.Clean Maven / Gradle / Ivy

Go to download
/*
 *  Java HTML Tidy - JTidy
 *  HTML parser and pretty printer
 *
 *  Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
 *  Institute of Technology, Institut National de Recherche en
 *  Informatique et en Automatique, Keio University). All Rights
 *  Reserved.
 *
 *  Contributing Author(s):
 *
 *     Dave Raggett 
 *     Andy Quick  (translation to Java)
 *     Gary L Peskin  (Java development)
 *     Sami Lempinen  (release management)
 *     Fabrizio Giustina 
 *
 *  The contributing author(s) would like to thank all those who
 *  helped with testing, bug fixes, and patience.  This wouldn't
 *  have been possible without all of you.
 *
 *  COPYRIGHT NOTICE:
 *
 *  This software and documentation is provided "as is," and
 *  the copyright holders and contributing author(s) make no
 *  representations or warranties, express or implied, including
 *  but not limited to, warranties of merchantability or fitness
 *  for any particular purpose or that the use of the software or
 *  documentation will not infringe any third party patents,
 *  copyrights, trademarks or other rights.
 *
 *  The copyright holders and contributing author(s) will not be
 *  liable for any direct, indirect, special or consequential damages
 *  arising out of any use of the software or documentation, even if
 *  advised of the possibility of such damage.
 *
 *  Permission is hereby granted to use, copy, modify, and distribute
 *  this source code, or portions hereof, documentation and executables,
 *  for any purpose, without fee, subject to the following restrictions:
 *
 *  1. The origin of this source code must not be misrepresented.
 *  2. Altered versions must be plainly marked as such and must
 *     not be misrepresented as being the original source.
 *  3. This Copyright notice may not be removed or altered from any
 *     source or altered source distribution.
 *
 *  The copyright holders and contributing author(s) specifically
 *  permit, without fee, and encourage the use of this source code
 *  as a component for supporting the Hypertext Markup Language in
 *  commercial products. If you use this source code in a product,
 *  acknowledgment is not required but would be appreciated.
 *
 */
package org.w3c.tidy;

/**
 * Clean up misuse of presentation markup. Filters from other formats such as Microsoft Word often make excessive use of
 * presentation markup such as font tags, B, I, and the align attribute. By applying a set of production rules, it is
 * straight forward to transform this to use CSS. Some rules replace some of the children of an element by style
 * properties on the element, e.g.
 *  * {@code
 * 
 * ... 
 * .
 * 
 * ...
 * 
 * }
 * 
 * Such rules are applied to the element's content and then to the element itself until none of the rules more apply.
 * Having applied all the rules to an element, it will have a style attribute with one or more properties. Other rules
 * strip the element they apply to, replacing it by style properties on the contents, e.g.
 *  * {@code
 * 
 * 
 * 
 * ...
 * .
 * 
 * ...
 * }
 * 
 * These rules are applied to an element before processing its content and replace the current element by the first
 * element in the exposed content. After applying both sets of rules, you can replace the style attribute by a class
 * value and style rule in the document head. To support this, an association of styles and class names is built. A
 * naive approach is to rely on string matching to test when two property lists are the same. A better approach would be
 * to first sort the properties before matching.
 *
 * @author Dave Raggett [email protected] 
 * @author Andy Quick [email protected]  (translation to Java)
 * @author Fabrizio Giustina
 * @version $Revision$ ($Author$)
 */
public class Clean
{

    /**
     * sequential number for generated css classes.
     */
    private int classNum;

    /**
     * Tag table.
     */
    private TagTable tt;

    /**
     * Instantiates a new Clean.
     *
     * @param tagTable tag table instance
     */
    public Clean(TagTable tagTable)
    {
        this.tt = tagTable;
    }

    /**
     * Insert a css style property.
     *
     * @param props StyleProp instance
     * @param name  property name
     * @param value property value
     * @return StyleProp containin the given property
     */
    private StyleProp insertProperty(StyleProp props, String name, String value)
    {
        StyleProp first, prev, prop;
        int cmp;

        prev = null;
        first = props;

        while (props != null)
        {
            cmp = props.name.compareTo(name);

            if (cmp == 0)
            {
                // this property is already defined, ignore new value
                return first;
            }

            if (cmp > 0) // props.name > name
            {
                // insert before this

                prop = new StyleProp(name, value, props);

                if (prev != null)
                {
                    prev.next = prop;
                }
                else
                {
                    first = prop;
                }

                return first;
            }

            prev = props;
            props = props.next;
        }

        prop = new StyleProp(name, value, null);

        if (prev != null)
        {
            prev.next = prop;
        }
        else
        {
            first = prop;
        }

        return first;
    }

    /**
     * Create sorted linked list of properties from style string.
     *
     * @param prop  StyleProp
     * @param style style string
     * @return StyleProp with given style
     */
    private StyleProp createProps(StyleProp prop, String style)
    {
        int nameEnd;
        int valueEnd;
        int valueStart = 0;
        int nameStart = 0;
        boolean more;

        nameStart = 0;
        while (nameStart < style.length())
        {
            while (nameStart < style.length() && style.charAt(nameStart) == ' ')
            {
                ++nameStart;
            }

            nameEnd = nameStart;

            while (nameEnd < style.length())
            {
                if (style.charAt(nameEnd) == ':')
                {
                    valueStart = nameEnd + 1;
                    break;
                }

                ++nameEnd;
            }

            if (nameEnd >= style.length() || style.charAt(nameEnd) != ':')
            {
                break;
            }

            while (valueStart < style.length() && style.charAt(valueStart) == ' ')
            {
                ++valueStart;
            }

            valueEnd = valueStart;
            more = false;

            while (valueEnd < style.length())
            {
                if (style.charAt(valueEnd) == ';')
                {
                    more = true;
                    break;
                }

                ++valueEnd;
            }

            prop = insertProperty(prop, style.substring(nameStart, nameEnd), style.substring(valueStart, valueEnd));

            if (more)
            {
                nameStart = valueEnd + 1;
                continue;
            }

            break;
        }

        return prop;
    }

    /**
     * Create a css property.
     *
     * @param props StyleProp
     * @return css property as String
     */
    private String createPropString(StyleProp props)
    {
        String style = "";
        int len;
        StyleProp prop;

        // compute length
        for (len = 0, prop = props; prop != null; prop = prop.next)
        {
            len += prop.name.length() + 2;
            len += prop.value.length() + 2;
        }

        for (prop = props; prop != null; prop = prop.next)
        {
            style = style.concat(prop.name);
            style = style.concat(": ");

            style = style.concat(prop.value);

            if (prop.next == null)
            {
                break;
            }

            style = style.concat("; ");
        }

        return style;
    }

    /**
     * Creates a string with merged properties.
     *
     * @param style    css style
     * @param property css properties
     * @return merged string
     */
    private String addProperty(String style, String property)
    {
        StyleProp prop;

        prop = createProps(null, style);
        prop = createProps(prop, property);
        style = createPropString(prop);
        return style;
    }

    /**
     * Generates a new css class name.
     *
     * @param lexer Lexer
     * @return generated css class
     */
    private String gensymClass(final Lexer lexer)
    {
        String pfx = lexer.configuration.cssPrefix;
        if (pfx == null)
        {
            pfx = "c";
        }
        return pfx + ++classNum;
    }

    /**
     * Finds a css style.
     *
     * @param lexer      Lexer
     * @param tag        tag name
     * @param properties css properties
     * @return style string
     */
    private String findStyle(Lexer lexer, String tag, String properties)
    {
        Style style;

        for (style = lexer.styles; style != null; style = style.next)
        {
            if (style.tag.equals(tag) && style.properties.equals(properties))
            {
                return style.tagClass;
            }
        }

        style = new Style(tag, gensymClass(lexer), properties, lexer.styles);
        lexer.styles = style;
        return style.tagClass;
    }

    /**
     * Find style attribute in node, and replace it by corresponding class attribute. Search for class in style
     * dictionary otherwise gensym new class and add to dictionary. Assumes that node doesn't have a class attribute.
     *
     * @param lexer Lexer
     * @param node  node with a style attribute
     */
    private void style2Rule(Lexer lexer, Node node)
    {
        AttVal styleattr, classattr;
        String classname;

        styleattr = node.getAttrByName("style");

        if (styleattr != null)
        {
            classname = findStyle(lexer, node.element, styleattr.value);
            classattr = node.getAttrByName("class");

            // if there already is a class attribute then append class name after a space

            if (classattr != null)
            {
                classattr.value = classattr.value + " " + classname;
                node.removeAttribute(styleattr);
            }
            else
            {
                // reuse style attribute for class attribute
                styleattr.attribute = "class";
                styleattr.value = classname;
            }
        }
    }

    /**
     * Adds a css rule for color.
     *
     * @param lexer    Lexer
     * @param selector css selector
     * @param color    color value
     */
    private void addColorRule(Lexer lexer, String selector, String color)
    {
        if (color != null)
        {
            lexer.addStringLiteral(selector);
            lexer.addStringLiteral(" { color: ");
            lexer.addStringLiteral(color);
            lexer.addStringLiteral(" }\n");
        }
    }

    /**
     * Move presentation attribs from body to style element.
     *
     *      * background="foo" . body { background-image: url(foo) }
     * bgcolor="foo" . body { background-color: foo }
     * text="foo" . body { color: foo }
     * link="foo" . :link { color: foo }
     * vlink="foo" . :visited { color: foo }
     * alink="foo" . :active { color: foo }
     * 
     *
     * @param lexer Lexer
     * @param body  body node
     */
    private void cleanBodyAttrs(Lexer lexer, Node body)
    {
        AttVal attr;
        String bgurl = null;
        String bgcolor = null;
        String color = null;

        attr = body.getAttrByName("background");

        if (attr != null)
        {
            bgurl = attr.value;
            attr.value = null;
            body.removeAttribute(attr);
        }

        attr = body.getAttrByName("bgcolor");

        if (attr != null)
        {
            bgcolor = attr.value;
            attr.value = null;
            body.removeAttribute(attr);
        }

        attr = body.getAttrByName("text");

        if (attr != null)
        {
            color = attr.value;
            attr.value = null;
            body.removeAttribute(attr);
        }

        if (bgurl != null || bgcolor != null || color != null)
        {
            lexer.addStringLiteral(" body {\n");

            if (bgurl != null)
            {
                lexer.addStringLiteral("  background-image: url(");
                lexer.addStringLiteral(bgurl);
                lexer.addStringLiteral(");\n");
            }

            if (bgcolor != null)
            {
                lexer.addStringLiteral("  background-color: ");
                lexer.addStringLiteral(bgcolor);
                lexer.addStringLiteral(";\n");
            }

            if (color != null)
            {
                lexer.addStringLiteral("  color: ");
                lexer.addStringLiteral(color);
                lexer.addStringLiteral(";\n");
            }

            lexer.addStringLiteral(" }\n");
        }

        attr = body.getAttrByName("link");

        if (attr != null)
        {
            addColorRule(lexer, " :link", attr.value);
            body.removeAttribute(attr);
        }

        attr = body.getAttrByName("vlink");

        if (attr != null)
        {
            addColorRule(lexer, " :visited", attr.value);
            body.removeAttribute(attr);
        }

        attr = body.getAttrByName("alink");

        if (attr != null)
        {
            addColorRule(lexer, " :active", attr.value);
            body.removeAttribute(attr);
        }
    }

    /**
     * Check deprecated attributes in body tag.
     *
     * @param lexer Lexer
     * @param doc   document root node
     * @return true is the body doesn't contain deprecated attributes, false otherwise.
     */
    private boolean niceBody(Lexer lexer, Node doc)
    {
        Node body = doc.findBody(lexer.configuration.tt);

        if (body != null)
        {
            if (body.getAttrByName("background") != null
                || body.getAttrByName("bgcolor") != null
                || body.getAttrByName("text") != null
                || body.getAttrByName("link") != null
                || body.getAttrByName("vlink") != null
                || body.getAttrByName("alink") != null)
            {
                lexer.badLayout |= Report.USING_BODY;
                return false;
            }
        }

        return true;
    }

    /**
     * Create style element using rules from dictionary.
     *
     * @param lexer Lexer
     * @param doc   root node
     */
    private void createStyleElement(Lexer lexer, Node doc)
    {
        Node node, head, body;
        Style style;
        AttVal av;

        if (lexer.styles == null && niceBody(lexer, doc))
        {
            return;
        }

        node = lexer.newNode(Node.START_TAG, null, 0, 0, "style");
        node.implicit = true;

        // insert type attribute
        av = new AttVal(null, null, '"', "type", "text/css");
        av.dict = AttributeTable.getDefaultAttributeTable().findAttribute(av);
        node.attributes = av;

        body = doc.findBody(lexer.configuration.tt);

        lexer.txtstart = lexer.lexsize;

        if (body != null)
        {
            cleanBodyAttrs(lexer, body);
        }

        for (style = lexer.styles; style != null; style = style.next)
        {
            lexer.addCharToLexer(' ');
            lexer.addStringLiteral(style.tag);
            lexer.addCharToLexer('.');
            lexer.addStringLiteral(style.tagClass);
            lexer.addCharToLexer(' ');
            lexer.addCharToLexer('{');
            lexer.addStringLiteral(style.properties);
            lexer.addCharToLexer('}');
            lexer.addCharToLexer('\n');
        }

        lexer.txtend = lexer.lexsize;

        node.insertNodeAtEnd(lexer.newNode(Node.TEXT_NODE, lexer.lexbuf, lexer.txtstart, lexer.txtend));

        // now insert style element into document head doc is root node. search its children for html node the head
        // node should be first child of html node

        head = doc.findHEAD(lexer.configuration.tt);

        if (head != null)
        {
            head.insertNodeAtEnd(node);
        }
    }

    /**
     * Ensure bidirectional links are consistent.
     *
     * @param node root node
     */
    private void fixNodeLinks(Node node)
    {
        Node child;

        if (node.prev != null)
        {
            node.prev.next = node;
        }
        else
        {
            node.parent.content = node;
        }

        if (node.next != null)
        {
            node.next.prev = node;
        }
        else
        {
            node.parent.last = node;
        }

        for (child = node.content; child != null; child = child.next)
        {
            child.parent = node;
        }
    }

    /**
     * Used to strip child of node when the node has one and only one child.
     *
     * @param node parent node
     */
    private void stripOnlyChild(Node node)
    {
        Node child;

        child = node.content;
        node.content = child.content;
        node.last = child.last;
        child.content = null;

        for (child = node.content; child != null; child = child.next)
        {
            child.parent = node;
        }
    }

    /**
     * Used to strip font start and end tags.
     *
     * @param element original node
     * @param pnode   passed in as array to allow modification. pnode[0] will contain the final node
     *                TODO remove the pnode parameter and make it a return value
     */
    private void discardContainer(Node element, Node[] pnode)
    {
        Node node;
        Node parent = element.parent;

        if (element.content != null)
        {
            element.last.next = element.next;

            if (element.next != null)
            {
                element.next.prev = element.last;
                element.last.next = element.next;
            }
            else
            {
                parent.last = element.last;
            }

            if (element.prev != null)
            {
                element.content.prev = element.prev;
                element.prev.next = element.content;
            }
            else
            {
                parent.content = element.content;
            }

            for (node = element.content; node != null; node = node.next)
            {
                node.parent = parent;
            }

            pnode[0] = element.content;
        }
        else
        {
            if (element.next != null)
            {
                element.next.prev = element.prev;
            }
            else
            {
                parent.last = element.prev;
            }

            if (element.prev != null)
            {
                element.prev.next = element.next;
            }
            else
            {
                parent.content = element.next;
            }

            pnode[0] = element.next;
        }

        element.next = null;
        element.content = null;
    }

    /**
     * Add style property to element, creating style attribute as needed and adding ; delimiter.
     *
     * @param node     node
     * @param property property added to node
     */
    private void addStyleProperty(Node node, String property)
    {
        AttVal av;

        for (av = node.attributes; av != null; av = av.next)
        {
            if (av.attribute.equals("style"))
            {
                break;
            }
        }

        // if style attribute already exists then insert property

        if (av != null)
        {
            String s;

            s = addProperty(av.value, property);
            av.value = s;
        }
        else
        {
            // else create new style attribute
            av = new AttVal(node.attributes, null, '"', "style", property);
            av.dict = AttributeTable.getDefaultAttributeTable().findAttribute(av);
            node.attributes = av;
        }
    }

    /**
     * Create new string that consists of the combined style properties in s1 and s2. To merge property lists, we build
     * a linked list of property/values and insert properties into the list in order, merging values for the same
     * property name.
     *
     * @param s1 first property
     * @param s2 second property
     * @return merged properties
     */
    private String mergeProperties(String s1, String s2)
    {
        String s;
        StyleProp prop;

        prop = createProps(null, s1);
        prop = createProps(prop, s2);
        s = createPropString(prop);
        return s;
    }

    /**
     * Merge class attributes from 2 nodes.
     *
     * @param node  Node
     * @param child Child node
     */
    private void mergeClasses(Node node, Node child)
    {
        AttVal av;
        String s1, s2, names;

        for (s2 = null, av = child.attributes; av != null; av = av.next)
        {
            if ("class".equals(av.attribute))
            {
                s2 = av.value;
                break;
            }
        }

        for (s1 = null, av = node.attributes; av != null; av = av.next)
        {
            if ("class".equals(av.attribute))
            {
                s1 = av.value;
                break;
            }
        }

        if (s1 != null)
        {
            if (s2 != null) // merge class names from both
            {
                names = s1 + ' ' + s2;
                av.value = names;
            }
        }
        else if (s2 != null) // copy class names from child
        {
            av = new AttVal(node.attributes, null, '"', "class", s2);
            av.dict = AttributeTable.getDefaultAttributeTable().findAttribute(av);
            node.attributes = av;
        }
    }

    /**
     * Merge style from 2 nodes.
     *
     * @param node  Node
     * @param child Child node
     */
    private void mergeStyles(Node node, Node child)
    {
        AttVal av;
        String s1, s2, style;

        // the child may have a class attribute used for attaching styles, if so the class name needs to be copied to
        // node's class
        mergeClasses(node, child);

        for (s2 = null, av = child.attributes; av != null; av = av.next)
        {
            if (av.attribute.equals("style"))
            {
                s2 = av.value;
                break;
            }
        }

        for (s1 = null, av = node.attributes; av != null; av = av.next)
        {
            if (av.attribute.equals("style"))
            {
                s1 = av.value;
                break;
            }
        }

        if (s1 != null)
        {
            if (s2 != null) // merge styles from both
            {
                style = mergeProperties(s1, s2);
                av.value = style;
            }
        }
        else if (s2 != null) // copy style of child
        {
            av = new AttVal(node.attributes, null, '"', "style", s2);
            av.dict = AttributeTable.getDefaultAttributeTable().findAttribute(av);
            node.attributes = av;
        }
    }

    /**
     * Map a % font size to a named font size.
     *
     * @param size size in %
     * @return font size name
     */
    private String fontSize2Name(String size)
    {
        String[] sizes = {"60%", "70%", "80%", null, "120%", "150%", "200%"};
        String buf;

        if (size.length() > 0 && '0' <= size.charAt(0) && size.charAt(0) <= '6')
        {
            int n = size.charAt(0) - '0';
            return sizes[n];
        }

        if (size.length() > 0 && size.charAt(0) == '-')
        {
            if (size.length() > 1 && '0' <= size.charAt(1) && size.charAt(1) <= '6')
            {
                int n = size.charAt(1) - '0';
                double x;

                for (x = 1.0; n > 0; --n)
                {
                    x *= 0.8;
                }

                x *= 100.0;
                buf = "" + (int) x + "%";

                return buf;
            }

            return "smaller"; /* "70%"; */
        }

        if (size.length() > 1 && '0' <= size.charAt(1) && size.charAt(1) <= '6')
        {
            int n = size.charAt(1) - '0';
            double x;

            for (x = 1.0; n > 0; --n)
            {
                x *= 1.2;
            }

            x *= 100.0;
            buf = "" + (int) x + "%";

            return buf;
        }

        return "larger"; /* "140%" */
    }

    /**
     * Adds a font-family style.
     *
     * @param node Node
     * @param face font face
     */
    private void addFontFace(Node node, String face)
    {
        addStyleProperty(node, "font-family: " + face);
    }

    /**
     * Adds a font size style.
     *
     * @param node Node
     * @param size font size
     */
    private void addFontSize(Node node, String size)
    {
        if (size == null)
        {
            return;
        }

        if ("6".equals(size) && node.tag == this.tt.tagP)
        {
            node.element = "h1";
            this.tt.findTag(node);
            return;
        }

        if ("5".equals(size) && node.tag == this.tt.tagP)
        {
            node.element = "h2";
            this.tt.findTag(node);
            return;
        }

        if ("4".equals(size) && node.tag == this.tt.tagP)
        {
            node.element = "h3";
            this.tt.findTag(node);
            return;
        }

        String value = fontSize2Name(size);

        if (value != null)
        {
            addStyleProperty(node, "font-size: " + value);
        }
    }

    /**
     * Adds a font color style.
     *
     * @param node  Node
     * @param color color value
     */
    private void addFontColor(Node node, String color)
    {
        addStyleProperty(node, "color: " + color);
    }

    /**
     * Adds an align style.
     *
     * @param node  Node
     * @param align align value
     */
    private void addAlign(Node node, String align)
    {
        // force alignment value to lower case
        addStyleProperty(node, "text-align: " + align.toLowerCase());
    }

    /**
     * Add style properties to node corresponding to the font face, size and color attributes.
     *
     * @param node font tag
     * @param av   attribute list for node
     */
    private void addFontStyles(Node node, AttVal av)
    {
        while (av != null)
        {
            switch (av.attribute)
            {
                case "face":
                    addFontFace(node, av.value);
                    break;
                case "size":
                    addFontSize(node, av.value);
                    break;
                case "color":
                    addFontColor(node, av.value);
                    break;
                default:
                    break;
            }

            av = av.next;
        }
    }

    /**
     * Symptom: <p align=center>. Action: <p style="text-align: center">.
     *
     * @param lexer Lexer
     * @param node  node with center attribute. Will be modified to use css style.
     */
    private void textAlign(Lexer lexer, Node node)
    {
        AttVal av, prev;

        prev = null;

        for (av = node.attributes; av != null; av = av.next)
        {
            if (av.attribute.equals("align"))
            {
                if (prev != null)
                {
                    prev.next = av.next;
                }
                else
                {
                    node.attributes = av.next;
                }

                if (av.value != null)
                {
                    addAlign(node, av.value);
                }

                break;
            }

            prev = av;
        }
    }

    /*
	    Symptom: 
	    Action: 
	*/
    private void tableBgColor(final Node node)
    {
        final AttVal attr = node.getAttrByName("bgcolor");
        if (null != attr)
        {
            node.removeAttribute(attr);
            addStyleProperty(node, "background-color: " + attr.value);
        }
    }

    /**
     * Symptom: <dir><li> where <li> is only child. Action: coerce
     * <dir> <li> to <div> with indent. The clean up rules use the pnode argument
     * to return the next node when the original node has been deleted.
     *
     * @param lexer Lexer
     * @param node  dir tag
     * @return true if a dir tag has been coerced to a div
     */
    private boolean dir2Div(Lexer lexer, Node node)
    {
        Node child;

        if (node.tag == this.tt.tagDir || node.tag == this.tt.tagUl || node.tag == this.tt.tagOl)
        {
            child = node.content;

            if (child == null)
            {
                return false;
            }

            // check child has no peers
            if (child.next != null)
            {
                return false;
            }

            if (child.tag != this.tt.tagLi)
            {
                return false;
            }

            if (!child.implicit)
            {
                return false;
            }

            // coerce dir to div
            node.tag = this.tt.tagDiv;
            node.element = "div";
            addStyleProperty(node, "margin-left: 2em");
            stripOnlyChild(node);
            return true;
        }

        return false;
    }

    /**
     * Symptom:
     *
     *      * <center>
     * .
     * 
     * Action: replace <center> by <div style="text-align: center">
     * 
     *
     * @param lexer Lexer
     * @param node  center tag
     * @param pnode pnode[0] is the same as node, passed in as an array to allow modification
     * @return true if a center tag has been replaced by a div
     */
    private boolean center2Div(Lexer lexer, Node node, Node[] pnode)
    {
        if (node.tag == this.tt.tagCenter)
        {
            if (lexer.configuration.dropFontTags)
            {
                if (node.content != null)
                {
                    Node last = node.last;
                    Node parent = node.parent;

                    discardContainer(node, pnode);

                    node = lexer.inferredTag("br");

                    if (last.next != null)
                    {
                        last.next.prev = node;
                    }

                    node.next = last.next;
                    last.next = node;
                    node.prev = last;

                    if (parent.last == last)
                    {
                        parent.last = node;
                    }

                    node.parent = parent;
                }
                else
                {
                    Node prev = node.prev;
                    Node next = node.next;
                    Node parent = node.parent;
                    discardContainer(node, pnode);

                    node = lexer.inferredTag("br");
                    node.next = next;
                    node.prev = prev;
                    node.parent = parent;

                    if (next != null)
                    {
                        next.prev = node;
                    }
                    else
                    {
                        parent.last = node;
                    }

                    if (prev != null)
                    {
                        prev.next = node;
                    }
                    else
                    {
                        parent.content = node;
                    }
                }

                return true;
            }
            node.tag = this.tt.tagDiv;
            node.element = "div";
            addStyleProperty(node, "text-align: center");
            return true;
        }

        return false;
    }

    /**
     * Symptom: <div><div>...</div></div> Action: merge the two divs. This is useful after
     * nested <dir>s used by Word for indenting have been converted to <div>s.
     *
     * @param lexer Lexer
     * @param node  first div
     * @return true if the divs have been merged
     */
    private boolean mergeDivs(Lexer lexer, Node node)
    {
        Node child;

        if (node.tag != this.tt.tagDiv)
        {
            return false;
        }

        child = node.content;

        if (child == null)
        {
            return false;
        }

        if (child.tag != this.tt.tagDiv)
        {
            return false;
        }

        if (child.next != null)
        {
            return false;
        }

        mergeStyles(node, child);
        stripOnlyChild(node);
        return true;
    }

    /**
     * Symptom:
     * 
     * 
     * 
     * ...
     * 
     * 
     * 
     * Action: discard outer list.
     *
     * @param lexer Lexer
     * @param node  Node
     * @param pnode passed in as array to allow modifications.
     * @return true if nested lists have been found and replaced
     */
    private boolean nestedList(Lexer lexer, Node node, Node[] pnode)
    {
        Node child, list;

        if (node.tag == this.tt.tagUl || node.tag == this.tt.tagOl)
        {
            child = node.content;

            if (child == null)
            {
                return false;
            }

            // check child has no peers

            if (child.next != null)
            {
                return false;
            }

            list = child.content;

            if (list == null)
            {
                return false;
            }

            if (list.tag != node.tag)
            {
                return false;
            }

            pnode[0] = list; // Set node to resume iteration

            // move inner list node into position of outer node
            list.prev = node.prev;
            list.next = node.next;
            list.parent = node.parent;
            fixNodeLinks(list);

            // get rid of outer ul and its li
            // XXX: Are we leaking the child node? -creitzel 7 Jun, 01
            child.content = null;
            node.content = null;
            node.next = null;
            node = null;

            // If prev node was a list the chances are this node should be appended to that list. Word has no way of
            // recognizing nested lists and just uses indents
            if (list.prev != null)
            {
                if (list.prev.tag == this.tt.tagUl || list.prev.tag == this.tt.tagOl)
                {

                    node = list;
                    list = node.prev;

                    list.next = node.next;

                    if (list.next != null)
                    {
                        list.next.prev = list;
                    }

                    child = list.last; /*  */

                    node.parent = child;
                    node.next = null;
                    node.prev = child.last;
                    fixNodeLinks(node);
                    cleanNode(lexer, node);
                }
            }

            return true;
        }

        return false;
    }

    /**
     * Symptom: the only child of a block-level element is a presentation element such as B, I or FONT. Action: add
     * style "font-weight: bold" to the block and strip the <b>element, leaving its children. example:
     *
     *      * <p>
     * <b><font face="Arial" size="6">Draft Recommended Practice</font></b>
     * </p>
     * 
     * 
     * becomes:
     *
     * 
     * <p style="font-weight: bold; font-family: Arial; font-size: 6">
     * Draft Recommended Practice
     * </p>
     * 
     *
     * 
     * This code also replaces the align attribute by a style attribute. However, to avoid CSS problems with Navigator
     * 4, this isn't done for the elements: caption, tr and table
     * 
     *
     * @param lexer Lexer
     * @param node  parent node
     * @return true if the child node has been removed
     */
    private boolean blockStyle(Lexer lexer, Node node)
    {
        /* check for bgcolor */
        if (node.tag == tt.tagTable || node.tag == tt.tagTd || node.tag == tt.tagTh || node.tag == tt.tagTr)
        {
            tableBgColor(node);
        }
        Node child;

        if ((node.tag.model & (Dict.CM_BLOCK | Dict.CM_LIST | Dict.CM_DEFLIST | Dict.CM_TABLE)) != 0)
        {
            if (node.tag != this.tt.tagTable && node.tag != this.tt.tagTr && node.tag != this.tt.tagLi)
            {
                // check for align attribute
                if (node.tag != this.tt.tagCaption)
                {
                    textAlign(lexer, node);
                }

                child = node.content;

                if (child == null)
                {
                    return false;
                }

                // check child has no peers
                if (child.next != null)
                {
                    return false;
                }

                if (child.tag == this.tt.tagB)
                {
                    mergeStyles(node, child);
                    addStyleProperty(node, "font-weight: bold");
                    stripOnlyChild(node);
                    return true;
                }

                if (child.tag == this.tt.tagI)
                {
                    mergeStyles(node, child);
                    addStyleProperty(node, "font-style: italic");
                    stripOnlyChild(node);
                    return true;
                }

                if (child.tag == this.tt.tagFont)
                {
                    mergeStyles(node, child);
                    addFontStyles(node, child.attributes);
                    stripOnlyChild(node);
                    return true;
                }
            }
        }

        return false;
    }

    /**
     * If the node has only one b, i, or font child remove the child node and add the appropriate style attributes to
     * parent.
     *
     * @param lexer Lexer
     * @param node  parent node
     * @param pnode passed as an array to allow modifications
     * @return true if child node has been stripped, replaced by style attributes.
     */
    private boolean inlineStyle(Lexer lexer, Node node, Node[] pnode)
    {
        Node child;

        if (node.tag != this.tt.tagFont && (node.tag.model & (Dict.CM_INLINE | Dict.CM_ROW)) != 0)
        {
            child = node.content;

            if (child == null)
            {
                return false;
            }

            // check child has no peers
            if (child.next != null)
            {
                return false;
            }

            if (child.tag == this.tt.tagB && lexer.configuration.logicalEmphasis)
            {
                mergeStyles(node, child);
                addStyleProperty(node, "font-weight: bold");
                stripOnlyChild(node);
                return true;
            }

            if (child.tag == this.tt.tagI && lexer.configuration.logicalEmphasis)
            {
                mergeStyles(node, child);
                addStyleProperty(node, "font-style: italic");
                stripOnlyChild(node);
                return true;
            }

            if (child.tag == this.tt.tagFont)
            {
                mergeStyles(node, child);
                addFontStyles(node, child.attributes);
                stripOnlyChild(node);
                return true;
            }
        }

        return false;
    }

    /**
     * Replace font elements by span elements, deleting the font element's attributes and replacing them by a single
     * style attribute.
     *
     * @param lexer Lexer
     * @param node  font tag
     * @param pnode passed as an array to allow modifications
     * @return true if a font tag has been dropped and replaced by style attributes
     */
    private boolean font2Span(Lexer lexer, Node node, Node[] pnode)
    {
        AttVal av, style, next;

        if (node.tag == this.tt.tagFont)
        {
            if (lexer.configuration.dropFontTags)
            {
                discardContainer(node, pnode);
                return false;
            }

            // if FONT is only child of parent element then leave alone
            if (node.parent.content == node && node.next == null)
            {
                return false;
            }

            addFontStyles(node, node.attributes);

            // extract style attribute and free the rest
            av = node.attributes;
            style = null;

            while (av != null)
            {
                next = av.next;

                if (av.attribute.equals("style"))
                {
                    av.next = null;
                    style = av;
                }

                av = next;
            }

            node.attributes = style;

            node.tag = this.tt.tagSpan;
            node.element = "span";

            return true;
        }

        return false;
    }

    /**
     * Applies all matching rules to a node.
     *
     * @param lexer Lexer
     * @param node  original node
     * @return cleaned up node
     */
    private Node cleanNode(Lexer lexer, Node node)
    {
        Node next = null;
        Node[] o = new Node[1];
        boolean b = false;

        for (next = node; node != null && node.isElement(); node = next)
        {
            o[0] = next;

            b = dir2Div(lexer, node);
            next = o[0];
            if (b)
            {
                continue;
            }

            // Special case: true result means that arg node and its parent no longer exist.
            // So we must jump back up the CreateStyleProperties() call stack until we have a valid node reference.
            b = nestedList(lexer, node, o);
            next = o[0];
            if (b)
            {
                return next;
            }

            b = center2Div(lexer, node, o);
            next = o[0];
            if (b)
            {
                continue;
            }

            b = mergeDivs(lexer, node);
            next = o[0];
            if (b)
            {
                continue;
            }

            b = blockStyle(lexer, node);
            next = o[0];
            if (b)
            {
                continue;
            }

            b = inlineStyle(lexer, node, o);
            next = o[0];
            if (b)
            {
                continue;
            }

            b = font2Span(lexer, node, o);
            next = o[0];
            if (b)
            {
                continue;
            }

            break;
        }

        return next;
    }

    /**
     * Special case: if the current node is destroyed by CleanNode() lower in the tree, this node and its parent no
     * longer exist. So we must jump back up the CreateStyleProperties() call stack until we have a valid node
     * reference.
     *
     * @param lexer Lexer
     * @param node  Node
     * @param prepl passed in as array to allow modifications
     * @return cleaned Node
     */
    private Node createStyleProperties(Lexer lexer, Node node, Node[] prepl)
    {
        Node child = node.content;

        if (child != null)
        {
            Node[] repl = new Node[1];
            repl[0] = node;
            while (child != null)
            {
                child = createStyleProperties(lexer, child, repl);
                if (repl[0] != node)
                {
                    return repl[0];
                }
                if (child != null)
                {
                    child = child.next;
                }
            }
        }

        return cleanNode(lexer, node);
    }

    /**
     * Find style attribute in node content, and replace it by corresponding class attribute.
     *
     * @param lexer Lexer
     * @param node  parent node
     */
    private void defineStyleRules(Lexer lexer, Node node)
    {
        Node child;

        if (node.content != null)
        {
            child = node.content;
            while (child != null)
            {
                defineStyleRules(lexer, child);
                child = child.next;
            }
        }

        style2Rule(lexer, node);
    }

    /**
     * Clean an html tree.
     *
     * @param lexer Lexer
     * @param doc   root node
     */
    public void cleanTree(Lexer lexer, Node doc)
    {
        Node[] repl = new Node[1];
        repl[0] = doc;
        doc = createStyleProperties(lexer, doc, repl);

        if (doc != null && lexer.configuration.makeClean)
        {
            defineStyleRules(lexer, doc);
            createStyleElement(lexer, doc);
        }
    }

    /**
     * simplifies {@literal ...  ... } etc.
     *
     * @param node root Node
     */
    public void nestedEmphasis(Node node)
    {
        Node[] o = new Node[1];
        Node next;

        while (node != null)
        {
            next = node.next;

            if ((node.tag == this.tt.tagB || node.tag == this.tt.tagI)
                && node.parent != null
                && node.parent.tag == node.tag)
            {
                // strip redundant inner element
                o[0] = next;
                discardContainer(node, o);
                next = o[0];
                node = next;
                continue;
            }

            if (node.content != null)
            {
                nestedEmphasis(node.content);
            }

            node = next;
        }
    }

    /**
     * Replace i by em and b by strong.
     *
     * @param node root Node
     */
    public void emFromI(Node node)
    {
        while (node != null)
        {
            if (node.tag == this.tt.tagI)
            {
                node.element = this.tt.tagEm.name;
                node.tag = this.tt.tagEm;
            }
            else if (node.tag == this.tt.tagB)
            {
                node.element = this.tt.tagStrong.name;
                node.tag = this.tt.tagStrong;
            }

            if (node.content != null)
            {
                emFromI(node.content);
            }

            node = node.next;
        }
    }

    /**
     * Some people use dir or ul without an li to indent the content. The pattern to look for is a list with a single
     * implicit li. This is recursively replaced by an implicit blockquote.
     *
     * @param node root Node
     */
    public void list2BQ(Node node)
    {
        while (node != null)
        {
            if (node.content != null)
            {
                list2BQ(node.content);
            }

            if (node.tag != null
                && node.tag.getParser() == ParserImpl.LIST
                && node.hasOneChild()
                && node.content.implicit)
            {
                stripOnlyChild(node);
                node.element = this.tt.tagBlockquote.name;
                node.tag = this.tt.tagBlockquote;
                node.implicit = true;
            }

            node = node.next;
        }
    }

    /**
     * Replace implicit blockquote by div with an indent taking care to reduce nested blockquotes to a single div with
     * the indent set to match the nesting depth.
     *
     * @param node root Node
     */
    public void bQ2Div(Node node)
    {
        int indent;
        String indentBuf;
        AttVal attval;

        while (node != null)
        {
            if (node.tag == this.tt.tagBlockquote && node.implicit)
            {
                indent = 1;

                while (node.hasOneChild() && node.content.tag == this.tt.tagBlockquote && node.implicit)
                {
                    ++indent;
                    stripOnlyChild(node);
                }

                if (node.content != null)
                {
                    bQ2Div(node.content);
                }

                indentBuf = "margin-left: " + (new Integer(2 * indent)).toString() + "em";

                node.element = this.tt.tagDiv.name;
                node.tag = this.tt.tagDiv;

                attval = node.getAttrByName("style");

                if (attval != null && attval.value != null)
                {
                    attval.value = indentBuf + "; " + attval.value;
                }
                else
                {
                    node.addAttribute("style", indentBuf);
                }
            }
            else if (node.content != null)
            {
                bQ2Div(node.content);
            }

            node = node.next;
        }
    }

    /**
     * Find the enclosing table cell for the given node.
     *
     * @param node Node
     * @return enclosing cell node
     */
    Node findEnclosingCell(Node node)
    {
        Node check;

        for (check = node; check != null; check = check.parent)
        {
            if (check.tag == tt.tagTd)
            {
                return check;
            }
        }
        return null;
    }

    /**
     * node is <![if ...]> prune up to <![endif]>.
     *
     * @param lexer Lexer
     * @param node  Node
     * @return cleaned up Node
     */
    public Node pruneSection(Lexer lexer, Node node)
    {
        for (; ; )
        {

            // FG: commented out - don't add   to empty cells

            // if ((Lexer.getString(node.textarray, node.start, 21)).equals("if !supportEmptyParas"))
            // {
            // Node cell = findEnclosingCell(node);
            // if (cell != null)
            // {
            // // Need to put   into cell so it doesn't look weird
            // char onesixty[] = {(char) 160, (char) 0};
            // Node nbsp = lexer.newLiteralTextNode(lexer, onesixty);
            // Node.insertNodeBeforeElement(node, nbsp);
            // }
            // }

            // discard node and returns next
            node = Node.discardElement(node);

            if (node == null)
            {
                return null;
            }

            if (node.type == Node.SECTION_TAG)
            {
                if ((TidyUtils.getString(node.textarray, node.start, 2)).equals("if"))
                {
                    node = pruneSection(lexer, node);
                    continue;
                }

                if ((TidyUtils.getString(node.textarray, node.start, 5)).equals("endif"))
                {
                    node = Node.discardElement(node);
                    break;
                }
            }
        }

        return node;
    }

    /**
     * Drop if/endif sections inserted by word2000.
     *
     * @param lexer Lexer
     * @param node  Node root node
     */
    public void dropSections(Lexer lexer, Node node)
    {
        while (node != null)
        {
            if (node.type == Node.SECTION_TAG)
            {
                // prune up to matching endif
                if ((TidyUtils.getString(node.textarray, node.start, 2)).equals("if")
                    && (!(TidyUtils.getString(node.textarray, node.start, 7)).equals("if !vml"))) // #444394 - fix 13
                // Sep 01
                {
                    node = pruneSection(lexer, node);
                    continue;
                }

                // discard others as well
                node = Node.discardElement(node);
                continue;
            }

            if (node.content != null)
            {
                dropSections(lexer, node.content);
            }

            node = node.next;
        }
    }

    /**
     * Remove word2000 attributes from node.
     *
     * @param node node to cleanup
     */
    public void purgeWord2000Attributes(Node node)
    {
        AttVal attr = null;
        AttVal next = null;
        AttVal prev = null;

        for (attr = node.attributes; attr != null; attr = next)
        {
            next = attr.next;

            // special check for class="Code" denoting pre text
            // Pass thru user defined styles as HTML class names
            if (attr.attribute != null && attr.value != null && attr.attribute.equals("class"))
            {
                if (attr.value.equals("Code") || !attr.value.startsWith("Mso"))
                {
                    prev = attr;
                    continue;
                }
            }

            if (attr.attribute != null
                && (attr.attribute.equals("class")
                || attr.attribute.equals("style")
                || attr.attribute.equals("lang")
                || attr.attribute.startsWith("x:") || ((attr.attribute.equals("height") || attr.attribute
                .equals("width")) && //
                (node.tag == this.tt.tagTd || node.tag == this.tt.tagTr || node.tag == this.tt.tagTh))))
            {
                if (prev != null)
                {
                    prev.next = next;
                }
                else
                {
                    node.attributes = next;
                }

            }
            else
            {
                prev = attr;
            }
        }
    }

    /**
     * Word2000 uses span excessively, so we strip span out.
     *
     * @param lexer Lexer
     * @param span  Node span
     * @return cleaned node
     */
    public Node stripSpan(Lexer lexer, Node span)
    {
        Node node;
        Node prev = null;
        Node content;

        // deal with span elements that have content by splicing the content in place of the span after having
        // processed it

        cleanWord2000(lexer, span.content);
        content = span.content;

        if (span.prev != null)
        {
            prev = span.prev;
        }
        else if (content != null)
        {
            node = content;
            content = content.next;
            node.removeNode();
            Node.insertNodeBeforeElement(span, node);
            prev = node;
        }

        while (content != null)
        {
            node = content;
            content = content.next;
            node.removeNode();
            prev.insertNodeAfterElement(node);
            prev = node;
        }

        if (span.next == null)
        {
            span.parent.last = prev;
        }

        node = span.next;
        span.content = null;
        Node.discardElement(span);
        return node;
    }

    /**
     * Map non-breaking spaces to regular spaces.
     *
     * @param lexer Lexer
     * @param node  Node
     */
    private void normalizeSpaces(Lexer lexer, Node node)
    {
        while (node != null)
        {
            if (node.content != null)
            {
                normalizeSpaces(lexer, node.content);
            }

            if (node.type == Node.TEXT_NODE)
            {
                int i;
                int[] c = new int[1];
                int p = node.start;

                for (i = node.start; i < node.end; ++i)
                {
                    c[0] = node.textarray[i];

                    // look for UTF-8 multibyte character
                    if (c[0] > 0x7F)
                    {
                        i += PPrint.getUTF8(node.textarray, i, c);
                    }

                    if (c[0] == 160)
                    {
                        c[0] = ' ';
                    }

                    p = PPrint.putUTF8(node.textarray, p, c[0]);
                }
            }

            node = node.next;
        }
    }

    /**
     * Used to hunt for hidden preformatted sections.
     *
     * @param node checked node
     * @return true if the node has a "margin-top: 0" or "margin-bottom: 0" style
     */
    boolean noMargins(Node node)
    {
        AttVal attval = node.getAttrByName("style");

        if (attval == null || attval.value == null)
        {
            return false;
        }

        // search for substring "margin-top: 0"
        if (!attval.value.contains("margin-top: 0"))
        {
            return false;
        }

        // search for substring "margin-top: 0"
        return attval.value.contains("margin-bottom: 0");
    }

    /**
     * Does element have a single space as its content?
     *
     * @param lexer Lexer
     * @param node  checked node
     * @return true if the element has a single space as its content
     */
    boolean singleSpace(Lexer lexer, Node node)
    {
        if (node.content != null)
        {
            node = node.content;

            if (node.next != null)
            {
                return false;
            }

            if (node.type != Node.TEXT_NODE)
            {
                return false;
            }

            if (((node.end - node.start) == 1) && lexer.lexbuf[node.start] == ' ')
            {
                return true;
            }

            if ((node.end - node.start) == 2)
            {
                int[] c = new int[1];

                PPrint.getUTF8(lexer.lexbuf, node.start, c);

                return c[0] == 160;
            }
        }

        return false;
    }

    /**
     * This is a major clean up to strip out all the extra stuff you get when you save as web page from Word 2000. It
     * doesn't yet know what to do with VML tags, but these will appear as errors unless you declare them as new tags,
     * such as o:p which needs to be declared as inline.
     *
     * @param lexer Lexer
     * @param node  node to clean up
     */
    public void cleanWord2000(Lexer lexer, Node node)
    {
        // used to a list from a sequence of bulletted p's
        Node list = null;

        while (node != null)
        {
            // get rid of Word's xmlns attributes
            if (node.tag == tt.tagHtml)
            {
                // check that it's a Word 2000 document
                if ((node.getAttrByName("xmlns:o") == null))
                {
                    return;
                }
                lexer.configuration.tt.freeAttrs(node);
            }
            // fix up preformatted sections by looking for a sequence of paragraphs with zero top/bottom margin
            if (node.tag == tt.tagP)
            {
                if (noMargins(node))
                {
                    Node pre;
                    Node next;
                    Node.coerceNode(lexer, node, tt.tagPre);

                    purgeWord2000Attributes(node);

                    if (node.content != null)
                    {
                        cleanWord2000(lexer, node.content);
                    }

                    pre = node;
                    node = node.next;

                    // continue to strip p's
                    while (node.tag == tt.tagP && noMargins(node))
                    {
                        next = node.next;
                        node.removeNode();
                        pre.insertNodeAtEnd(lexer.newLineNode());
                        pre.insertNodeAtEnd(node);
                        stripSpan(lexer, node);
                        node = next;
                    }
                }
            }

            if (node.tag != null && TidyUtils.toBoolean(node.tag.model & Dict.CM_BLOCK) && singleSpace(lexer, node))
            {
                node = stripSpan(lexer, node);
                continue;
            }

            // discard Word's style verbiage
            if (node.tag == this.tt.tagStyle || node.tag == this.tt.tagMeta || node.type == Node.COMMENT_TAG)
            {
                node = Node.discardElement(node);
                continue;
            }

            // strip out all span and font tags Word scatters so liberally!
            if (node.tag == this.tt.tagSpan || node.tag == this.tt.tagFont)
            {
                node = stripSpan(lexer, node);
                continue;
            }

            if (node.tag == this.tt.tagLink)
            {
                AttVal attr = node.getAttrByName("rel");

                if (attr != null && attr.value != null && attr.value.equals("File-List"))
                {
                    node = Node.discardElement(node);
                    continue;
                }
            }

            // discard empty paragraphs
            if (node.content == null && node.tag == this.tt.tagP)
            {
                node = Node.discardElement(node);
                continue;
            }

            if (node.tag == this.tt.tagP)
            {
                AttVal attr = node.getAttrByName("class");
                AttVal atrStyle = node.getAttrByName("style");

                // (JES) Sometimes Word marks a list item with the following hokie syntax
                //  to 
 ... 
                // map  to 
...
                if (attr != null
                    && attr.value != null
                    && ((attr.value.equals("MsoListBullet") || attr.value.equals("MsoListNumber")) //
                    || (atrStyle != null && (atrStyle.value.contains("mso-list:"))))) // 463066 - fix by Joel
                // Shafer 19 Sep 01
                {
                    Dict listType = tt.tagUl;

                    if (attr.value.equals("MsoListNumber"))
                    {
                        listType = tt.tagOl;
                    }

                    Node.coerceNode(lexer, node, this.tt.tagLi);

                    if (list == null || list.tag != listType)
                    {
                        list = lexer.inferredTag(listType.name);
                        Node.insertNodeBeforeElement(node, list);
                    }

                    purgeWord2000Attributes(node);

                    if (node.content != null)
                    {
                        cleanWord2000(lexer, node.content);
                    }

                    // remove node and append to contents of list
                    node.removeNode();
                    list.insertNodeAtEnd(node);
                    node = list;
                }
                // map sequence of  to 
 ... 
                else if (attr != null && attr.value != null && attr.value.equals("Code"))
                {
                    Node br = lexer.newLineNode();
                    normalizeSpaces(lexer, node);

                    if (list == null || list.tag != this.tt.tagPre)
                    {
                        list = lexer.inferredTag("pre");
                        Node.insertNodeBeforeElement(node, list);
                    }

                    // remove node and append to contents of list
                    node.removeNode();
                    list.insertNodeAtEnd(node);
                    stripSpan(lexer, node);
                    list.insertNodeAtEnd(br);
                    node = list.next;
                }
                else
                {
                    list = null;
                }
            }
            else
            {
                list = null;
            }

            // strip out style and class attributes
            if (node.type == Node.START_TAG || node.type == Node.START_END_TAG)
            {
                purgeWord2000Attributes(node);
            }

            if (node.content != null)
            {
                cleanWord2000(lexer, node.content);
            }

            node = node.next;
        }
    }

    /**
     * Check if the current document is a converted Word document.
     *
     * @param root root Node
     * @return true if the document has been geenrated by Microsoft Word.
     */
    public boolean isWord2000(Node root)
    {
        AttVal attval;
        Node node;
        Node head;
        Node html = root.findHTML(this.tt);

        if (html != null && html.getAttrByName("xmlns:o") != null)
        {
            return true;
        }

        head = root.findHEAD(tt);
        if (head == null)
        {
            // no HEAD, no 
            return false;
        }
        // search for 
        for (node = head.content; node != null; node = node.next)
        {
            if (node.tag != tt.tagMeta)
            {
                continue;
            }
            attval = node.getAttrByName("name");
            if (attval == null || attval.value == null)
            {
                continue;
            }
            if (!"generator".equals(attval.value))
            {
                continue;
            }
            attval = node.getAttrByName("content");
            if (attval == null || attval.value == null)
            {
                continue;
            }
            if (attval.value.contains("Microsoft"))
            {
                return true;
            }
        }
        return false;
    }

    /**
     * Where appropriate move object elements from head to body.
     *
     * @param lexer Lexer
     * @param html  html node
     */
    static void bumpObject(Lexer lexer, Node html)
    {
        if (html == null)
        {
            return;
        }

        Node node, next, head = null, body = null;
        TagTable tt = lexer.configuration.tt;
        for (node = html.content; node != null; node = node.next)
        {
            if (node.tag == tt.tagHead)
            {
                head = node;
            }

            if (node.tag == tt.tagBody)
            {
                body = node;
            }
        }

        if (head != null && body != null)
        {
            for (node = head.content; node != null; node = next)
            {
                next = node.next;

                if (node.tag == tt.tagObject)
                {
                    Node child;
                    boolean bump = false;

                    for (child = node.content; child != null; child = child.next)
                    {
                        // bump to body unless content is param
                        if ((child.type == Node.TEXT_NODE && !node.isBlank(lexer)) || child.tag != tt.tagParam)
                        {
                            bump = true;
                            break;
                        }
                    }

                    if (bump)
                    {
                        node.removeNode();
                        body.insertNodeAtStart(node);
                    }
                }
            }
        }
    }

}    

    

    
            
    
            

    
        
            
                Related Artifacts
                
                     mysql-connector-java mysql
 facebook-messenger com.github.codedrinker
 selenium-java org.seleniumhq.selenium
 instagram-java com.github.sola92
 gson com.google.code.gson
 poi org.apache.poi
 httpclient org.apache.httpcomponents
 json org.json
 facebook-java-api com.google.code.facebook-java-api
 poi-ooxml org.apache.poi
 jackson-databind com.fasterxml.jackson.core
 junit junit
 primefaces org.primefaces
 ojdbc7 com.github.noraui
 jfoenix com.jfoenix
 testng org.testng
 json-simple com.googlecode.json-simple
 selenium-server org.seleniumhq.selenium
 itextpdf com.itextpdf
 spring-core org.springframework
                
            
        
        
            
                Related Groups
                
                     org.springframework
 org.apache.poi
 org.hibernate
 org.springframework.boot
 com.fasterxml.jackson.core
 com.itextpdf
 org.seleniumhq.selenium
 mysql
 org.finos.legend.engine
 org.apache.httpcomponents
 org.apache.logging.log4j
 org.openjfx
 org.apache.commons
 org.json
 com.google.guava
 com.google.zxing
 net.sf.jasperreports
 javax.xml.bind
 ojdbc
 com.google.code.facebook-java-api
                
            
        
    
    





    © 2015 - 2024 Weber Informatics LLC | Privacy Policy