All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.tinygroup.htmlparser.node.HtmlNode Maven / Gradle / Ivy

/**
 *  Copyright (c) 1997-2013, www.tinygroup.org ([email protected]).
 *
 *  Licensed under the GPL, Version 3.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *       http://www.gnu.org/licenses/gpl.html
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 */
package org.tinygroup.htmlparser.node;

import org.tinygroup.htmlparser.HtmlNodeType;
import org.tinygroup.parser.node.NodeImpl;

import java.util.HashSet;

/**
 *
 * @author luoguo
 */
public class HtmlNode extends NodeImpl {
    public static HashSet singleNodeNames = new HashSet();

    static {
        singleNodeNames.add("br");
        singleNodeNames.add("hr");
        singleNodeNames.add("img");
        singleNodeNames.add("input");
        singleNodeNames.add("param");
        singleNodeNames.add("meta");
        singleNodeNames.add("link");
    }

    /**
     * 构造方法
     *
     * @param nodeType
     */
    public HtmlNode(HtmlNodeType nodeType) {
        super(nodeType);
    }

    public HtmlNode getRoot() {
        HtmlNode root = super.getRoot();
        if (root.getNodeName() == null && root.getSubNodes().size() == 1) {
            return root.getSubNodes().get(0);
        }
        return root;
    }

    /**
     * 构造方法
     *
     * @param nodeName
     */
    public HtmlNode(String nodeName) {
        super(nodeName, HtmlNodeType.ELEMENT);
    }

    public boolean isSingleNode() {
        if (getNodeName() != null) {
            return singleNodeNames.contains(getCaseSensitiveName(getNodeName()));
        }
        return false;
    }

    /**
     * 构造方法
     *
     * @param nodeType
     * @param nodeName
     */
    public HtmlNode(HtmlNodeType nodeType, String nodeName) {
        super(nodeType, nodeName);
    }

    /**
     * 构造方法
     *
     * @param nodeName
     * @param nodeType
     */
    public HtmlNode(String nodeName, HtmlNodeType nodeType) {
        super(nodeType, nodeName);
    }

    public HtmlNode addContent(String content) {
        HtmlNode node = new HtmlNode(HtmlNodeType.TEXT);
        node.setContent(content);
        addNode(node);
        return this;
    }


    protected String encode(String string) {
        String str = string;
        str = str.replaceAll("&", "&");
        str = str.replaceAll("<", "<");
        str = str.replaceAll(">", ">");
        str = str.replaceAll("\"", """);
        str = str.replaceAll(" ", " ");
        str = str.replaceAll("'", "'");
        return str;
    }


    protected String decode(String string) {
        String str = string;
        str = str.replaceAll("&", "&");
        str = str.replaceAll("<", "<");
        str = str.replaceAll(" ", " ");
        str = str.replaceAll(">", ">");
        str = str.replaceAll(""", "\"");
        str = str.replaceAll("'", "'");
        return str;
    }

    public String getPureText() {
        StringBuffer sb = new StringBuffer();
        getPureText(this, sb);
        return sb.toString().trim();
    }

    void getPureText(HtmlNode node, StringBuffer sb) {
        if (node.getNodeType() == HtmlNodeType.CDATA || node.getNodeType() == HtmlNodeType.TEXT) {
            String content = node.getContent();
            if (content != null) {
                sb.append(content).append(" ");
            }
        } else {
            if (node.getNodeType().isHasHeader() && node.getSubNodes() != null) {
                for (HtmlNode n : node.getSubNodes()) {
                    getPureText(n, sb);
                }
            }
        }
    }

    public boolean isCaseSensitive() {
        return false;
    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy