us.codecraft.webmagic.model.annotation.HelpUrl Maven / Gradle / Ivy
The newest version!
package us.codecraft.webmagic.model.annotation;
import java.lang.annotation.ElementType;
import java.lang.annotation.Retention;
import java.lang.annotation.Target;
/**
* Define the 'help' url patterns for class.
* All urls matching the pattern will be crawled and but not extracted for new objects.
*
* @author [email protected]
* @since 0.2.0
*/
@Retention(java.lang.annotation.RetentionPolicy.RUNTIME)
@Target({ElementType.TYPE})
public @interface HelpUrl {
/**
* The url patterns to crawl.
* Use regex expression with some changes:
* "." stand for literal character "." instead of "any character".
* "*" stand for any legal character for url in 0-n length ([^"'#]*) instead of "any length".
*
* @return the url patterns for class
*/
String[] value();
/**
* Define the region for url extracting.
* Only support XPath.
* When sourceRegion is set, the urls will be extracted only from the region instead of entire content.
*
* @return the region for url extracting
*/
String sourceRegion() default "";
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy