cn.wanghaomiao.seimi.core.SeimiBeanResolver Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of SeimiCrawler Show documentation
Show all versions of SeimiCrawler Show documentation
一个支持分布式的可以高效开发且可以高效运行的爬虫框架。设计思想上融合了spring与scrapy的优点。
package cn.wanghaomiao.seimi.core;
import cn.wanghaomiao.seimi.annotation.Xpath;
import cn.wanghaomiao.seimi.exception.SeimiBeanResolveException;
import cn.wanghaomiao.seimi.utils.GenericUtils;
import cn.wanghaomiao.xpath.model.JXDocument;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.nodes.Element;
import org.springframework.util.ReflectionUtils;
import java.lang.reflect.Field;
import java.lang.reflect.Method;
import java.util.Collection;
import java.util.LinkedList;
import java.util.List;
/**
* 根据Bean中字段定义的XPath路径自动提取数据
* @author 汪浩淼 [email protected]
* @since 2015/6/17.
*/
@SuppressWarnings("uncheck")
public class SeimiBeanResolver {
public static T parse(Class target,String text) throws Exception {
T bean = target.newInstance();
final List props = new LinkedList<>();
ReflectionUtils.doWithFields(target, new ReflectionUtils.FieldCallback() {
@Override
public void doWith(Field field) throws IllegalArgumentException, IllegalAccessException {
props.add(field);
}
});
JXDocument jxDocument = new JXDocument(text);
for (Field f:props){
Xpath xpathInfo = f.getAnnotation(Xpath.class);
if (xpathInfo!=null){
String xpath = xpathInfo.value();
List
© 2015 - 2024 Weber Informatics LLC | Privacy Policy