All Downloads are FREE. Search and download functionalities are using the official Maven repository.

us.codecraft.webmagic.model.annotation.ExtractBy Maven / Gradle / Ivy

package us.codecraft.webmagic.model.annotation;

import java.lang.annotation.ElementType;
import java.lang.annotation.Retention;
import java.lang.annotation.Target;

/**
 * Define the extractor for field or class.
* * @author [email protected]
* @since 0.2.0 */ @Retention(java.lang.annotation.RetentionPolicy.RUNTIME) @Target({ElementType.FIELD, ElementType.TYPE}) public @interface ExtractBy { /** * Extractor expression, support XPath, CSS Selector and regex. * * @return extractor expression */ String value(); /** * types of extractor expressions */ public static enum Type {XPath, Regex, Css, JsonPath} /** * Extractor type, support XPath, CSS Selector and regex. * * @return extractor type */ Type type() default Type.XPath; /** * Define whether the field can be null.
* If set to 'true' and the extractor get no result, the entire class will be discarded.
* * @return whether the field can be null */ boolean notNull() default false; /** * types of source for extracting. */ public static enum Source { /** * extract from the content extracted by class extractor */ SelectedHtml, /** * extract from the raw html */ RawHtml } /** * The source for extracting.
* It works only if you already added 'ExtractBy' to Class.
* * @return the source for extracting */ Source source() default Source.SelectedHtml; /** * Define whether the extractor return more than one result. * When set to 'true', the extractor return a list of string (so you should define the field as List).
* * Deprecated since 0.4.2. This option is determined automatically by the class of field. * @deprecated since 0.4.2 * @return whether the extractor return more than one result */ boolean multi() default false; }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy