us.codecraft.webmagic.pipeline.JsonFilePipeline Maven / Gradle / Ivy
package us.codecraft.webmagic.pipeline;
import com.alibaba.fastjson.JSON;
import org.apache.commons.codec.digest.DigestUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import us.codecraft.webmagic.ResultItems;
import us.codecraft.webmagic.Task;
import us.codecraft.webmagic.utils.FilePersistentBase;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
/**
* Store results to files in JSON format.
*
* @author [email protected]
* @since 0.2.0
*/
public class JsonFilePipeline extends FilePersistentBase implements Pipeline {
private Logger logger = LoggerFactory.getLogger(getClass());
/**
* new JsonFilePageModelPipeline with default path "/data/webmagic/"
*/
public JsonFilePipeline() {
setPath("/data/webmagic");
}
public JsonFilePipeline(String path) {
setPath(path);
}
@Override
public void process(ResultItems resultItems, Task task) {
String path = this.path + PATH_SEPERATOR + task.getUUID() + PATH_SEPERATOR;
try {
PrintWriter printWriter = new PrintWriter(new FileWriter(getFile(path + DigestUtils.md5Hex(resultItems.getRequest().getUrl()) + ".json")));
printWriter.write(JSON.toJSONString(resultItems.getAll()));
printWriter.close();
} catch (IOException e) {
logger.warn("write file error", e);
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy