All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.zodiac.okhttp.DomMapper Maven / Gradle / Ivy

The newest version!
package org.zodiac.okhttp;

import org.jsoup.helper.DataUtil;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.parser.Parser;
import org.jsoup.select.Elements;
import org.springframework.cglib.proxy.Enhancer;
import org.zodiac.sdk.toolkit.util.ExceptionUtil;

import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;

/**
 * 爬虫 xml 转 bean 基于 jsoup。
 *
 */
public class DomMapper {

    /**
     * Returns body to jsoup Document.
     *
     * @param response response
     * @return Document
     */
    public static Document asDocument(ResponseSpec response) {
        return readDocument(response.asString());
    }

    /**
     * 将流读取为 jsoup Document。
     *
     * @param inputStream InputStream
     * @return Document
     */
    public static Document readDocument(InputStream inputStream) {
        try {
            return DataUtil.load(inputStream, StandardCharsets.UTF_8.name(), "");
        } catch (IOException e) {
            throw ExceptionUtil.unchecked(e);
        }
    }

    /**
     * 将 html 字符串读取为 jsoup Document。
     *
     * @param html String
     * @return Document
     */
    public static Document readDocument(String html) {
        return Parser.parse(html, "");
    }

    /**
     * 读取 xml 信息为 java Bean。
     *
     * @param inputStream InputStream
     * @param clazz bean Class
     * @param  泛型
     * @return 对象
     */
    public static  T readValue(InputStream inputStream, final Class clazz) {
        return readValue(readDocument(inputStream), clazz);
    }

    /**
     * 读取 xml 信息为 java Bean。
     *
     * @param html html String
     * @param clazz bean Class
     * @param  泛型
     * @return 对象
     */
    public static  T readValue(String html, final Class clazz) {
        return readValue(readDocument(html), clazz);
    }

    /**
     * 读取 xml 信息为 java Bean。
     *
     * @param doc xml element
     * @param clazz bean Class
     * @param  泛型
     * @return 对象
     */
    @SuppressWarnings("unchecked")
    public static  T readValue(final Element doc, final Class clazz) {
        Enhancer enhancer = new Enhancer();
        enhancer.setSuperclass(clazz);
        enhancer.setUseCache(true);
        enhancer.setCallback(new CssQueryMethodInterceptor(clazz, doc));
        return (T)enhancer.create();
    }

    /**
     * 读取 xml 信息为 java Bean。
     *
     * @param 
     *            泛型
     * @param inputStream InputStream
     * @param clazz bean Class
     * @return 对象
     */
    public static  List readList(InputStream inputStream, final Class clazz) {
        return readList(readDocument(inputStream), clazz);
    }

    /**
     * 读取 xml 信息为 java Bean。
     *
     * @param  泛型
     * @param html html String
     * @param clazz bean Class
     * @return 对象
     */
    public static  List readList(String html, final Class clazz) {
        return readList(readDocument(html), clazz);
    }

    /**
     * 读取 xml 信息为 java Bean。
     *
     * @param doc xml element
     * @param clazz bean Class
     * @param  泛型
     * @return 对象列表
     */
    public static  List readList(Element doc, Class clazz) {
        CssQuery annotation = clazz.getAnnotation(CssQuery.class);
        if (annotation == null) {
            throw new IllegalArgumentException("DomMapper readList " + clazz + " mast has annotation @CssQuery.");
        }
        String cssQueryValue = annotation.value();
        Elements elements = doc.select(cssQueryValue);
        List valueList = new ArrayList<>();
        for (Element element : elements) {
            valueList.add(readValue(element, clazz));
        }
        return valueList;
    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy