cn.wanghaomiao.seimi.core.SeimiBeanResolver Maven / Gradle / Ivy

Go to download

Show more of this group Show more artifacts with this name
Show all versions of SeimiCrawler Show documentation

一个支持分布式的可以高效开发且可以高效运行的爬虫框架。设计思想上融合了spring与scrapy的优点。

There is a newer version: 2.1.4

package cn.wanghaomiao.seimi.core;

import cn.wanghaomiao.seimi.annotation.Xpath;
import cn.wanghaomiao.seimi.exception.SeimiBeanResolveException;
import cn.wanghaomiao.seimi.utils.GenericUtils;
import cn.wanghaomiao.xpath.model.JXDocument;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.nodes.Element;
import org.springframework.util.ReflectionUtils;

import java.lang.reflect.Field;
import java.lang.reflect.Method;
import java.util.Collection;
import java.util.LinkedList;
import java.util.List;

/**
 * 根据Bean中字段定义的XPath路径自动提取数据
 * @author 汪浩淼 [email protected]
 * @since 2015/6/17.
 */
@SuppressWarnings("uncheck")
public class SeimiBeanResolver {
    public static  T parse(Class target,String text) throws Exception {
        T bean = target.newInstance();
        final List props = new LinkedList<>();
        ReflectionUtils.doWithFields(target, new ReflectionUtils.FieldCallback() {
            @Override
            public void doWith(Field field) throws IllegalArgumentException, IllegalAccessException {
                props.add(field);
            }
        });
        JXDocument jxDocument = new JXDocument(text);
        for (Field f:props){
            Xpath xpathInfo = f.getAnnotation(Xpath.class);
            if (xpathInfo!=null){
                String xpath = xpathInfo.value();
                List