org.seimicrawler.xpath.util.Scanner Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of JsoupXpath Show documentation
Show all versions of JsoupXpath Show documentation
一个非常好用而且强大的基于xpath的html解析器。html的DOM树生成依赖Jsoup。Lexer 和 Parser基于Antlr4,支持完备的W3C XPATH 1.0标准语法,W3C规范:http://www.w3.org/TR/1999/REC-xpath-19991116。
package org.seimicrawler.xpath.util;
import com.google.common.reflect.ClassPath;
import com.google.common.reflect.Reflection;
import org.apache.commons.lang3.exception.ExceptionUtils;
import org.seimicrawler.xpath.core.AxisSelector;
import org.seimicrawler.xpath.core.Function;
import org.seimicrawler.xpath.core.NodeTest;
import org.seimicrawler.xpath.exception.NoSuchAxisException;
import org.seimicrawler.xpath.exception.NoSuchFunctionException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.HashMap;
import java.util.Map;
/**
* @author github.com/zhegexiaohuozi [email protected]
* @since 2018/2/28.
*/
public class Scanner {
private static Map axisSelectorMap = new HashMap<>();
private static Map nodeTestMap = new HashMap<>();
private static Map functionMap = new HashMap<>();
private static Logger logger = LoggerFactory.getLogger(Scanner.class);
static {
try {
ClassPath axisPkg = ClassPath.from(Scanner.class.getClassLoader());
String axisPkgPath = "org.seimicrawler.xpath.core.axis";
String nodePkgPath = "org.seimicrawler.xpath.core.node";
String functionPkgPath = "org.seimicrawler.xpath.core.function";
for (ClassPath.ClassInfo classInfo : axisPkg.getTopLevelClasses(axisPkgPath)) {
Class selectorClass = classInfo.load();
Reflection.initialize(selectorClass);
if (AxisSelector.class.isAssignableFrom(selectorClass)){
AxisSelector selector = (AxisSelector) selectorClass.newInstance();
axisSelectorMap.put(selector.name(),selector);
}
}
for (ClassPath.ClassInfo classInfo : axisPkg.getTopLevelClasses(nodePkgPath)) {
Class nodeTestClass = classInfo.load();
Reflection.initialize(nodeTestClass);
if (NodeTest.class.isAssignableFrom(nodeTestClass)){
NodeTest nodeTest = (NodeTest) nodeTestClass.newInstance();
nodeTestMap.put(nodeTest.name(), nodeTest);
}
}
for (ClassPath.ClassInfo classInfo : axisPkg.getTopLevelClasses(functionPkgPath)) {
Class funcClass = classInfo.load();
Reflection.initialize(funcClass);
if (Function.class.isAssignableFrom(funcClass)){
Function function = (Function) funcClass.newInstance();
functionMap.put(function.name(), function);
}
}
} catch (Exception e) {
logger.error(ExceptionUtils.getRootCauseMessage(e));
}
}
public static AxisSelector findSelectorByName(String selectorName){
AxisSelector selector = axisSelectorMap.get(selectorName);
if (selector == null){
throw new NoSuchAxisException("not support axis: "+selectorName);
}
return selector;
}
public static NodeTest findNodeTestByName(String nodeTestName){
NodeTest nodeTest = nodeTestMap.get(nodeTestName);
if (nodeTest == null){
throw new NoSuchFunctionException("not support nodeTest: "+nodeTestName);
}
return nodeTest;
}
public static Function findFunctionByName(String funcName){
Function function = functionMap.get(funcName);
if (function == null){
throw new NoSuchFunctionException("not support function: "+funcName);
}
return function;
}
}