crawler.rule.xml Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of fess Show documentation
Show all versions of fess Show documentation
Fess is Full tExt Search System.
<?xml version="1.0" encoding="UTF-8"?> <!DOCTYPE components PUBLIC "-//DBFLUTE//DTD LastaDi 1.0//EN" "http://dbflute.org/meta/lastadi10.dtd"> <components namespace="fessCrawler"> <include path="crawler/container.xml" /> <include path="crawler/transformer.xml" /> <component name="ruleManager" class="org.codelibs.fess.crawler.rule.impl.RuleManagerImpl" instance="prototype"> <postConstruct name="addRule"> <arg>sitemapsRule</arg> </postConstruct> <postConstruct name="addRule"> <arg>webHtmlRule</arg> </postConstruct> <postConstruct name="addRule"> <arg>webFileRule</arg> </postConstruct> <postConstruct name="addRule"> <arg>fsFileRule</arg> </postConstruct> <postConstruct name="addRule"> <arg>defaultRule</arg> </postConstruct> </component> <component name="sitemapsRule" class="org.codelibs.fess.crawler.rule.impl.RegexRule" > <property name="ruleId">"sitemapsRule"</property> <property name="responseProcessor"> <component class="org.codelibs.fess.crawler.processor.impl.SitemapsResponseProcessor"> </component> </property> <postConstruct name="addRule"> <arg>"url"</arg> <arg>"http[s]?:.*sitemap[^/]*\.xml.*|http[s]?:.*sitemap[^/]*\.gz.*|http[s]?:.*sitemap[^/]*\.txt.*"</arg> </postConstruct> </component> <component name="webHtmlRule" class="org.codelibs.fess.crawler.rule.impl.RegexRule" > <property name="ruleId">"webHtmlRule"</property> <property name="responseProcessor"> <component class="org.codelibs.fess.crawler.processor.FessResponseProcessor"> <property name="transformer">fessXpathTransformer</property> <property name="successfulHttpCodes">(int[])[200]</property> <property name="notModifiedHttpCodes">(int[])[304]</property> </component> </property> <property name="allRequired">true</property> <postConstruct name="addRule"> <arg>"url"</arg> <arg>"http[s]?:.*"</arg> </postConstruct> <postConstruct name="addRule"> <arg>"mimeType"</arg> <!-- Supported MIME type --> <arg>"text/html"</arg> </postConstruct> </component> <component name="webFileRule" class="org.codelibs.fess.crawler.rule.impl.RegexRule" > <property name="ruleId">"webFileRule"</property> <property name="responseProcessor"> <component class="org.codelibs.fess.crawler.processor.FessResponseProcessor"> <property name="transformer">fessFileTransformer</property> <property name="successfulHttpCodes">(int[])[200]</property> <property name="notModifiedHttpCodes">(int[])[304]</property> </component> </property> <property name="allRequired">true</property> <postConstruct name="addRule"> <arg>"url"</arg> <arg>"http[s]?:.*"</arg> </postConstruct> <postConstruct name="addRule"> <arg>"mimeType"</arg> <!-- Supported MIME type --> <arg> "(application/xml" + "|application/xhtml+xml" + "|application/rdf+xml" + "|application/pdf" + "|application/x-freemind" + "|text/xml" + "|text/xml-external-parsed-entity)" </arg> </postConstruct> </component> <component name="fsFileRule" class="org.codelibs.fess.crawler.rule.impl.RegexRule" > <property name="ruleId">"fsFileRule"</property> <property name="responseProcessor"> <component class="org.codelibs.fess.crawler.processor.FessResponseProcessor"> <property name="transformer">fessFileTransformer</property> <property name="successfulHttpCodes">(int[])[200]</property> <property name="notModifiedHttpCodes">(int[])[304]</property> </component> </property> <property name="allRequired">true</property> <postConstruct name="addRule"> <arg>"url"</arg> <arg>"(file|smb|smb1|ftp):.*"</arg> </postConstruct> <postConstruct name="addRule"> <arg>"mimeType"</arg> <!-- Supported MIME type --> <arg> "(application/xml" + "|application/xhtml+xml" + "|application/rdf+xml" + "|application/pdf" + "|application/x-freemind" + "|application/lha" + "|application/x-lha" + "|application/x-lha-compressed" + "|text/xml" + "|text/xml-external-parsed-entity" + "|text/html)" </arg> </postConstruct> </component> <component name="defaultRule" class="org.codelibs.fess.crawler.rule.impl.RegexRule" > <property name="ruleId">"defaultRule"</property> <property name="responseProcessor"> <component class="org.codelibs.fess.crawler.processor.FessResponseProcessor"> <property name="transformer">fessStandardTransformer</property> <property name="successfulHttpCodes">(int[])[200]</property> <property name="notModifiedHttpCodes">(int[])[304]</property> </component> </property> <property name="allRequired">true</property> <postConstruct name="addRule"> <arg>"url"</arg> <arg>".*"</arg> </postConstruct> </component> </components>
© 2015 - 2025 Weber Informatics LLC | Privacy Policy