All Downloads are FREE. Search and download functionalities are using the official Maven repository.

us.codecraft.webmagic.pipeline.JsonFilePipeline Maven / Gradle / Ivy

The newest version!
package us.codecraft.webmagic.pipeline;

import com.alibaba.fastjson.JSON;
import org.apache.commons.codec.digest.DigestUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import us.codecraft.webmagic.ResultItems;
import us.codecraft.webmagic.Task;
import us.codecraft.webmagic.utils.FilePersistentBase;

import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;

/**
 * Store results to files in JSON format.
* * @author [email protected]
* @since 0.2.0 */ public class JsonFilePipeline extends FilePersistentBase implements Pipeline { private Logger logger = LoggerFactory.getLogger(getClass()); /** * new JsonFilePageModelPipeline with default path "/data/webmagic/" */ public JsonFilePipeline() { setPath("/data/webmagic"); } public JsonFilePipeline(String path) { setPath(path); } @Override public void process(ResultItems resultItems, Task task) { String path = this.path + PATH_SEPERATOR + task.getUUID() + PATH_SEPERATOR; try { PrintWriter printWriter = new PrintWriter(new FileWriter(getFile(path + DigestUtils.md5Hex(resultItems.getRequest().getUrl()) + ".json"))); printWriter.write(JSON.toJSONString(resultItems.getAll())); printWriter.close(); } catch (IOException e) { logger.warn("write file error", e); } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy