cn.wanghaomiao.seimi.core.SeimiScanner Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of SeimiCrawler Show documentation
Show all versions of SeimiCrawler Show documentation
一个支持分布式的可以高效开发且可以高效运行的爬虫框架。设计思想上融合了spring与scrapy的优点。
package cn.wanghaomiao.seimi.core;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.context.ApplicationContext;
import org.springframework.context.annotation.AnnotationConfigApplicationContext;
import org.springframework.core.io.Resource;
import org.springframework.core.io.support.PathMatchingResourcePatternResolver;
import org.springframework.core.io.support.ResourcePatternResolver;
import org.springframework.core.type.classreading.CachingMetadataReaderFactory;
import org.springframework.core.type.classreading.MetadataReader;
import org.springframework.core.type.classreading.MetadataReaderFactory;
import org.springframework.core.type.filter.AnnotationTypeFilter;
import org.springframework.core.type.filter.TypeFilter;
import org.springframework.util.ClassUtils;
import org.springframework.util.CollectionUtils;
import java.io.IOException;
import java.lang.annotation.Annotation;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;
/**
* 上下文加载器
* @author 汪浩淼 [email protected]
* @since 2015/6/17.
*/
public class SeimiScanner {
private final Logger logger = LoggerFactory.getLogger(getClass());
private static final String RESOURCE_PATTERN = "**/%s/**/*.class";
private ResourcePatternResolver resourcePatternResolver = new PathMatchingResourcePatternResolver();
private AnnotationConfigApplicationContext context = new AnnotationConfigApplicationContext(ScanConfig.class);
@SafeVarargs
public final Set> scan(String[] confPkgs, Class extends Annotation>... annotationTags){
Set> resClazzSet = new HashSet<>();
List typeFilters = new LinkedList<>();
if (ArrayUtils.isNotEmpty(annotationTags)){
for (Class extends Annotation> annotation : annotationTags) {
typeFilters.add(new AnnotationTypeFilter(annotation, false));
}
}
if (ArrayUtils.isNotEmpty(confPkgs)) {
for (String pkg : confPkgs) {
String pattern = ResourcePatternResolver.CLASSPATH_ALL_URL_PREFIX + String.format(RESOURCE_PATTERN,ClassUtils.convertClassNameToResourcePath(pkg));
try {
Resource[] resources = this.resourcePatternResolver.getResources(pattern);
MetadataReaderFactory readerFactory = new CachingMetadataReaderFactory(this.resourcePatternResolver);
for (Resource resource : resources) {
if (resource.isReadable()) {
MetadataReader reader = readerFactory.getMetadataReader(resource);
String className = reader.getClassMetadata().getClassName();
if (ifMatchesEntityType(reader, readerFactory,typeFilters)) {
//不使用class.forName()
Class> curClass = Thread.currentThread().getContextClassLoader().loadClass(className);
context.register(curClass);
resClazzSet.add(curClass);
}
}
}
} catch (Exception e) {
logger.error("扫描提取[{}]包路径下,标记了注解[{}]的类出现异常", pattern,StringUtils.join(typeFilters,","));
}
}
}
if (!context.isActive()){
context.refresh();
}
return resClazzSet;
}
/**
* 检查当前扫描到的类是否含有任何一个指定的注解标记
* @param reader
* @param readerFactory
* @return ture/false
*/
private boolean ifMatchesEntityType(MetadataReader reader, MetadataReaderFactory readerFactory,List typeFilters) {
if (!CollectionUtils.isEmpty(typeFilters)) {
for (TypeFilter filter : typeFilters) {
try {
if (filter.match(reader, readerFactory)) {
return true;
}
} catch (IOException e) {
logger.error("过滤匹配类型时出错 {}",e.getMessage());
}
}
}
return false;
}
public ApplicationContext getContext(){
return this.context;
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy