com.yishuifengxiao.common.crawler.extractor.ExtractorFactory Maven / Gradle / Ivy
package com.yishuifengxiao.common.crawler.extractor;
import com.yishuifengxiao.common.crawler.domain.model.ContentItem;
import com.yishuifengxiao.common.crawler.extractor.content.ContentExtractor;
import com.yishuifengxiao.common.crawler.extractor.content.SimpleContentExtractor;
import com.yishuifengxiao.common.crawler.extractor.links.LinkExtractor;
import com.yishuifengxiao.common.crawler.extractor.links.impl.SimpleLinkExtractor;
/**
* 提取器生成工厂
* 根据提取规则生成对应提取器
*
* @author yishui
* @version 1.0.0
* @date 2019-11-6
*/
public class ExtractorFactory extends AbstractExtractorFactory {
/**
* 生成链接提取器
*
* @param regex 链接的提取规则,正则表达式
* @return 链接提取器
*/
@Override
public LinkExtractor getLinkExtractor(String regex) {
return new SimpleLinkExtractor(regex);
}
/**
* 根据内容提取规则生成内容提取器
*
* @param contentRule 内容提取规则
* @return 内容提取器
*/
@Override
public ContentExtractor getContentExtractor(ContentItem contentRule) {
return new SimpleContentExtractor(contentRule);
}
}