crawler.transformer+fessXpathTransformer.xml Maven / Gradle / Ivy
<?xml version="1.0" encoding="UTF-8"?> <!DOCTYPE components PUBLIC "-//DBFLUTE//DTD LastaDi 1.0//EN" "http://dbflute.org/meta/lastadi10.dtd"> <components> <include path="crawler/transformer_basic.xml"/> <component name="fessXpathTransformer" class="org.codelibs.fess.crawler.transformer.CustomFessXpathTransformer" instance="singleton"> <property name="name">"fessXpathTransformer"</property> <property name="featureMap">defaultFeatureMap</property> <property name="propertyMap">defaultPropertyMap</property> <property name="childUrlRuleMap">htmlUrlRuleMap</property> <!-- <property name="invalidUrlPattern">@java.util.regex.Pattern@compile("^\\s*javascript:|^\\s*mailto:|^\\s*irc:|^\\s*skype:|^\\s*callto:",@java.util.regex.Pattern@CASE_INSENSITIVE)</property> --> <property name="convertUrlMap"> {"feed:" : "http:"} </property> <!-- segment --> <postConstruct name="addFieldRule"> <arg>"title"</arg> <arg>"//TITLE"</arg> <arg>true</arg> </postConstruct> <postConstruct name="addFieldRule"> <arg>"important_content"</arg> <arg>"//*[self::H1 or self::H2 or self::H3]"</arg> <arg>true</arg> </postConstruct> </component> </components>
© 2015 - 2025 Weber Informatics LLC | Privacy Policy