All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.seimicrawler.xpath.util.Scanner Maven / Gradle / Ivy

Go to download

一个非常好用而且强大的基于xpath的html解析器。html的DOM树生成依赖Jsoup。Lexer 和 Parser基于Antlr4,支持完备的W3C XPATH 1.0标准语法,W3C规范:http://www.w3.org/TR/1999/REC-xpath-19991116。

There is a newer version: 2.5.3
Show newest version
package org.seimicrawler.xpath.util;

import com.google.common.reflect.ClassPath;
import com.google.common.reflect.Reflection;
import org.apache.commons.lang3.exception.ExceptionUtils;
import org.seimicrawler.xpath.core.AxisSelector;
import org.seimicrawler.xpath.core.Function;
import org.seimicrawler.xpath.core.NodeTest;
import org.seimicrawler.xpath.exception.NoSuchAxisException;
import org.seimicrawler.xpath.exception.NoSuchFunctionException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.HashMap;
import java.util.Map;

/**
 * @author github.com/zhegexiaohuozi [email protected]
 * @since 2018/2/28.
 */
public class Scanner {
    private static Map axisSelectorMap = new HashMap<>();
    private static Map nodeTestMap = new HashMap<>();
    private static Map functionMap = new HashMap<>();
    private static Logger logger = LoggerFactory.getLogger(Scanner.class);

    static {
        try {
            ClassPath axisPkg = ClassPath.from(Scanner.class.getClassLoader());
            String axisPkgPath = "org.seimicrawler.xpath.core.axis";
            String nodePkgPath = "org.seimicrawler.xpath.core.node";
            String functionPkgPath = "org.seimicrawler.xpath.core.function";
            for (ClassPath.ClassInfo classInfo : axisPkg.getTopLevelClasses(axisPkgPath)) {
                Class selectorClass = classInfo.load();
                Reflection.initialize(selectorClass);
                if (AxisSelector.class.isAssignableFrom(selectorClass)){
                    AxisSelector selector = (AxisSelector) selectorClass.newInstance();
                    axisSelectorMap.put(selector.name(),selector);
                }
            }
            for (ClassPath.ClassInfo classInfo : axisPkg.getTopLevelClasses(nodePkgPath)) {
                Class nodeTestClass = classInfo.load();
                Reflection.initialize(nodeTestClass);
                if (NodeTest.class.isAssignableFrom(nodeTestClass)){
                    NodeTest nodeTest = (NodeTest) nodeTestClass.newInstance();
                    nodeTestMap.put(nodeTest.name(), nodeTest);
                }
            }
            for (ClassPath.ClassInfo classInfo : axisPkg.getTopLevelClasses(functionPkgPath)) {
                Class funcClass = classInfo.load();
                Reflection.initialize(funcClass);
                if (Function.class.isAssignableFrom(funcClass)){
                    Function function = (Function) funcClass.newInstance();
                    functionMap.put(function.name(), function);
                }
            }
        } catch (Exception e) {
            logger.error(ExceptionUtils.getRootCauseMessage(e));
        }
    }

    public static AxisSelector findSelectorByName(String selectorName){
        AxisSelector selector = axisSelectorMap.get(selectorName);
        if (selector  == null){
            throw new NoSuchAxisException("not support axis: "+selectorName);
        }
        return selector;
    }

    public static NodeTest findNodeTestByName(String nodeTestName){
        NodeTest nodeTest = nodeTestMap.get(nodeTestName);
        if (nodeTest == null){
            throw new NoSuchFunctionException("not support nodeTest: "+nodeTestName);
        }
        return nodeTest;
    }

    public static Function findFunctionByName(String funcName){
        Function function = functionMap.get(funcName);
        if (function == null){
            throw new NoSuchFunctionException("not support function: "+funcName);
        }
        return function;
    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy