All Downloads are FREE. Search and download functionalities are using the official Maven repository.

us.codecraft.webmagic.configurable.ConfigurablePageProcessor Maven / Gradle / Ivy

There is a newer version: 1.0.2
Show newest version
package us.codecraft.webmagic.configurable;

import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.processor.PageProcessor;
import us.codecraft.webmagic.utils.Experimental;

import java.util.List;

/**
 * @author [email protected] 
*/ @Experimental public class ConfigurablePageProcessor implements PageProcessor { private Site site; private List extractRules; public ConfigurablePageProcessor(Site site, List extractRules) { this.site = site; this.extractRules = extractRules; } @Override public void process(Page page) { for (ExtractRule extractRule : extractRules) { if (extractRule.isMulti()) { List results = page.getHtml().selectDocumentForList(extractRule.getSelector()); if (extractRule.isNotNull() && results.size() == 0) { page.setSkip(true); } else { page.getResultItems().put(extractRule.getFieldName(), results); } } else { String result = page.getHtml().selectDocument(extractRule.getSelector()); if (extractRule.isNotNull() && result == null) { page.setSkip(true); } else { page.getResultItems().put(extractRule.getFieldName(), result); } } } } @Override public Site getSite() { return site; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy