com.codename1.ui.html.HTMLParser Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of codenameone-core Show documentation
There is a newer version: 7.0.161
/*
 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.  Oracle designates this
 * particular file as subject to the "Classpath" exception as provided
 * by Oracle in the LICENSE file that accompanied this code.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores
 * CA 94065 USA or visit www.oracle.com if you need additional information or
 * have any questions.
 */
package com.codename1.ui.html;

import com.codename1.xml.Element;
import com.codename1.xml.XMLParser;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;

/**
 * The HTMLParser class is used to parse an XHTML-MP 1.0 document into a DOM object (Element).
 * Unsupported tags and attributes as well as comments are dropped in the parsing process.
 * The parser is also makes use of CSSParser for external CSS files, embedded CSS segments and CSS within the 'style' attribute.
 *
 * @author Ofir Leitner
 */
public class HTMLParser extends XMLParser {

    HTMLComponent htmlC; // The HTMLComponent that uses this Parser

    /**
     * The list of empty tags (tags that naturally don't have any children).
     * This is used to enable empty tags to be closed also in a non-strict way (i.e. <br> instead of <br>/;)
     * some of these tags are not a part of the XHTML-MP 1.0 standard, but including them here allows a more smooth parsing if the document is not strictly XHTML-MP 1.0
     */
    private static String[] EMPTY_TAGS = {"br","link","meta","base","area","basefont","col","frame","hr","img","input","isindex","param"};

    /**
     * Constructs a new instance of HTMLParser
     */
    public HTMLParser() {
        // Add common char entities that are above the HTML 2.0 char entities range
        addCharEntity("bull", 8226);
        addCharEntity("euro", 8364);
        setIncludeWhitespacesBetweenTags(true);
    }

    /**
     * Pair this HTMLParser with the HTMLComponent that uses it.
     * This pairing is necessary to allow access to the htmlC in parseTagContent upon finding a CSS embedded segment
     * 
     * @param htmlC The HTMLComponent that uses this parser
     */
    void setHTMLComponent(HTMLComponent htmlC) {
        if ((htmlC!=null) && (this.htmlC!=null)) {
            throw new IllegalStateException("This HTMLParser is already paired with an HTMLComponent");
        }
        this.htmlC=htmlC;
    }
    
    /**
     * Overrides XMLParser.parseTagContent to enable embedded CSS segments (Style tags)
     * 
     * @param element The current parent element
     * @param is The reader containing the XML
     * @throws IOException if an I/O error in the stream is encountered
     */
    protected void parseTagContent(Element element, Reader is) throws IOException {
        if ((HTMLComponent.SUPPORT_CSS) && (htmlC.loadCSS) && (((HTMLElement)element).getTagId() == HTMLElement.TAG_STYLE)) { // We aren't strict and don't require text/css in a style tag // && "text/css".equals(element.getAttributeById(Element.ATTR_TYPE)))) {
            CSSElement addTo = CSSParser.getInstance().parseCSSSegment(is,null,htmlC,null);
            htmlC.addToEmebeddedCSS(addTo);
            return;
        }

        super.parseTagContent(element, is);
    }

    /**
     * Overrides XMLParser.createNewElement to return an HTMLElement instance
     * 
     * @param name The HTMLElement's name
     * @return a new instance of the names HTMLElement
     */
    protected Element createNewElement(String name) {
        HTMLElement elem=new HTMLElement(name);
        return elem;
    }

    /**
     * Overrides XMLParser.createNewTextElement to return an HTMLElement instance
     *
     * @param text The HTMLElement's text
     * @return a new instance of the HTMLElement
     */
    protected Element createNewTextElement(String text) {
        HTMLElement elem=new HTMLElement(text,true);
        return elem;
    }

    /**
     * Overrides XMLParser.convertCharEntity to add in HTML char entities
     *
     * @param charEntity The char entity to convert
     * @return A string containing a single char, or the original char entity string (with & and ;) if the char entity couldn't be resolved
     */
    protected String convertCharEntity(String charEntity) {
        try {
            return HTMLUtils.convertCharEntity(charEntity, true, null);
        } catch (IllegalArgumentException iae) {
            return super.convertCharEntity(charEntity);
        }
    }

    /**
     * This method translates between an HTML char entity string to the according char code.
     * It first tries to find it using its super method.
     * If not found, the search continues to a wider string array of char codes 160-255 which are supported in ISO-8859-1 / HTML 2.0
     *
     * @param symbol The symbol to lookup
     * @return The char code of the symbol, or -1 if none found
     *
    protected int getCharEntityCode(String symbol) {
        int val=super.getCharEntityCode(symbol);
        if (val==-1) {
            // Not one of the most popular char codes, proceed to check the ISO-8859-1 symbols array
            val=CSSElement.getStringVal(symbol, CHAR_ENTITY_STRINGS);
            if (val!=-1) {
                return val+160;
            }
        }
        return val;
    }*/



    /**
     * Checks whether the specified tag is an empty tag as defined in EMPTY_TAGS
     *
     * @param tagName The tag name to check
     * @return true if that tag is defined as an empty tag, false otherwise
     */
    protected boolean isEmptyTag(String tagName) {
        int i=0;
        boolean found=false;
        while ((i