
com.jaeksoft.searchlib.template.file_crawler.config.xml Maven / Gradle / Ivy
<?xml version="1.0" encoding="UTF-8"?> <!-- License Agreement for OpenSearchServer --> <!-- --> <!-- Copyright (C) 2008-2014 Emmanuel Keller / Jaeksoft --> <!-- --> <!-- This file is part of OpenSearchServer. --> <!-- http://www.open-search-server.com --> <!-- --> <!-- OpenSearchServer is free software: you can --> <!-- redistribute it and/or modify it under the terms of --> <!-- the GNU General Public License as published by the --> <!-- Free Software Foundation, either version 3 of the --> <!-- License, or (at your option) any later version. --> <!-- --> <!-- OpenSearchServer is distributed in the --> <!-- hope that it will be useful, but WITHOUT ANY --> <!-- WARRANTY; without even the implied warranty of --> <!-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. --> <!-- --> <!-- See the GNU General Public License for more details. --> <configuration> <indices> <index name="index" searchCache="100" filterCache="100" fieldCache="500" remoteURI="{remoteURI}" /> </indices> <schema> <analyzers> <!-- Default standard analyzer --> <analyzer name="StandardAnalyzer" tokenizer="LetterOrDigitTokenizerFactory"> <filter class="LowerCaseFilter" /> <filter class="ISOLatin1AccentFilter" /> </analyzer> <!-- Phonetic analyzer --> <analyzer name="PhoneticAnalyzer" tokenizer="StandardTokenizer"> <filter class="LowerCaseFilter" /> <filter class="PhoneticFilter" codec="Beider Morse" scope="QUERY_INDEX" /> </analyzer> <!-- Default text analyser --> <analyzer name="TextAnalyzer" tokenizer="LetterOrDigitTokenizerFactory"> <filter class="LowerCaseFilter" /> </analyzer> <!-- Keyword analyzer --> <analyzer name="KeywordLikeAnalyzer" tokenizer="KeywordTokenizer"> <filter class="LowerCaseFilter" /> <filter class="ISOLatin1AccentFilter" /> </analyzer> <!-- Arabic --> <analyzer name="TextAnalyzer" lang="ar" tokenizer="ArabicLetterTokenizer"> <filter class="ArabicNormalizerFilter" /> <filter class="ArabicStemFilter" /> </analyzer> <!-- Chinese --> <analyzer name="TextAnalyzer" lang="zh" tokenizer="ChineseTokenizer"> <filter class="ChineseFilter" /> </analyzer> <!-- Czech --> <analyzer name="TextAnalyzer" lang="cz" tokenizer="LetterOrDigitTokenizerFactory"> <filter class="LowerCaseFilter" /> <filter class="ISOLatin1AccentFilter" /> <filter class="CzechStemFilter" /> </analyzer> <!-- Danish --> <analyzer name="TextAnalyzer" lang="da" tokenizer="LetterOrDigitTokenizerFactory"> <filter class="LowerCaseFilter" /> <filter class="ISOLatin1AccentFilter" /> <filter class="SnowballDanishFilter" /> </analyzer> <!-- Dutch --> <analyzer name="TextAnalyzer" lang="nl" tokenizer="LetterOrDigitTokenizerFactory"> <filter class="LowerCaseFilter" /> <filter class="ISOLatin1AccentFilter" /> <filter class="DutchStemFilter" /> </analyzer> <!-- English --> <analyzer name="TextAnalyzer" lang="en" tokenizer="LetterOrDigitTokenizerFactory"> <filter class="LowerCaseFilter" /> <filter class="SnowballEnglishFilter" /> </analyzer> <!-- Finish --> <analyzer name="TextAnalyzer" lang="fi" tokenizer="LetterOrDigitTokenizerFactory"> <filter class="LowerCaseFilter" /> <filter class="ISOLatin1AccentFilter" /> <filter class="SnowballFinnishFilter" /> </analyzer> <!-- French --> <analyzer name="TextAnalyzer" lang="fr" tokenizer="LetterOrDigitTokenizerFactory"> <filter class="LowerCaseFilter" /> <filter class="ISOLatin1AccentFilter" /> <filter class="FrenchStemFilter" /> </analyzer> <!-- German --> <analyzer name="TextAnalyzer" lang="de" tokenizer="LetterOrDigitTokenizerFactory"> <filter class="LowerCaseFilter" /> <filter class="ISOLatin1AccentFilter" /> <filter class="SnowballGermanFilter" /> </analyzer> <!-- Italian --> <analyzer name="TextAnalyzer" lang="it" tokenizer="LetterOrDigitTokenizerFactory"> <filter class="LowerCaseFilter" /> <filter class="ISOLatin1AccentFilter" /> <filter class="SnowballItalianFilter" /> </analyzer> <!-- Japanese --> <analyzer name="TextAnalyzer" lang="ja" tokenizer="CJKTokenizer" /> <!-- Korean --> <analyzer name="TextAnalyzer" lang="kr" tokenizer="CJKTokenizer" /> <!-- Norwegian --> <analyzer name="TextAnalyzer" lang="no" tokenizer="LetterOrDigitTokenizerFactory"> <filter class="LowerCaseFilter" /> <filter class="ISOLatin1AccentFilter" /> <filter class="SnowballNorwegianFilter" /> </analyzer> <!-- Polish --> <analyzer name="TextAnalyzer" lang="pl" tokenizer="LetterOrDigitTokenizerFactory"> <filter class="LowerCaseFilter" /> <filter class="PolishStemFilter" /> </analyzer> <!-- Portuguese --> <analyzer name="TextAnalyzer" lang="pt" tokenizer="LetterOrDigitTokenizerFactory"> <filter class="LowerCaseFilter" /> <filter class="ISOLatin1AccentFilter" /> <filter class="SnowballPortugueseFilter" /> </analyzer> <!-- Romanian --> <analyzer name="TextAnalyzer" lang="ro" tokenizer="LetterOrDigitTokenizerFactory"> <filter class="LowerCaseFilter" /> <filter class="ISOLatin1AccentFilter" /> <filter class="SnowballRomanianFilter" /> </analyzer> <!-- Russian --> <analyzer name="TextAnalyzer" lang="ru" tokenizer="RussianLetterTokenizer"> <filter class="RussianStemFilter" /> </analyzer> <!-- Spanish --> <analyzer name="TextAnalyzer" lang="es" tokenizer="LetterOrDigitTokenizerFactory"> <filter class="LowerCaseFilter" /> <filter class="ISOLatin1AccentFilter" /> <filter class="SnowballSpanishFilter" /> </analyzer> <!-- Swedish --> <analyzer name="TextAnalyzer" lang="sv" tokenizer="LetterOrDigitTokenizerFactory"> <filter class="LowerCaseFilter" /> <filter class="ISOLatin1AccentFilter" /> <filter class="SnowballSwedishFilter" /> </analyzer> <!-- Turkish --> <analyzer name="TextAnalyzer" lang="tr" tokenizer="LetterOrDigitTokenizerFactory"> <filter class="LowerCaseFilter" /> <filter class="ISOLatin1AccentFilter" /> <filter class="SnowballTurkishFilter" /> </analyzer> <!-- Autocompletion --> <analyzer name="AutoCompletionAnalyzer" lang=""> <tokenizer class="StandardTokenizer" /> <filter class="ShingleFilter" max_shingle_size="5" min_shingle_size="1" scope="QUERY_INDEX" token_separator=" " /> <filter class="PrefixSuffixStopFilter" ignore_case="true" prefixList="English stop words" scope="QUERY_INDEX" suffixList="English stop words" token_separator=" " /> <filter class="PrefixSuffixStopFilter" ignore_case="true" prefixList="French stop words" scope="QUERY_INDEX" suffixList="French stop words" token_separator=" " /> <filter class="PrefixSuffixStopFilter" ignore_case="true" prefixList="German stop words" scope="QUERY_INDEX" suffixList="German stop words" token_separator=" " /> </analyzer> <!-- Default analyzer for suggestion on autocomplete --> <analyzer name="SuggestionAnalyzer" lang=""> <tokenizer class="StandardTokenizer" /> <filter class="LowerCaseFilter" scope="QUERY_INDEX" /> <filter class="ISOLatin1AccentFilter" scope="QUERY_INDEX" /> <filter class="EdgeNGramFilter" max_gram="100" min_gram="1" scope="QUERY_INDEX" side="front" /> </analyzer> <!-- Numeric --> <analyzer name="IntegerAnalyzer" lang=""> <tokenizer class="KeywordTokenizer" /> <filter class="NumberFormatFilter" numberFormat="'>'0000000000;'<'0000000000" scope="QUERY_INDEX" /> </analyzer> <analyzer name="LongAnalyzer" lang=""> <tokenizer class="KeywordTokenizer" /> <filter class="NumberFormatFilter" numberFormat="'>'00000000000000000000;'<'00000000000000000000" scope="QUERY_INDEX" /> </analyzer> <analyzer name="DecimalAnalyzer" lang=""> <tokenizer class="KeywordTokenizer" /> <filter class="NumberFormatFilter" numberFormat="'>'0000000000.##########;'<'0000000000.##########" scope="QUERY_INDEX" /> </analyzer> <!-- Geospatial --> <analyzer name="GeoDegreesAnalyzer" lang=""> <tokenizer class="KeywordTokenizer" /> <filter class="DegreesRadiansFilter" degrees_radians_conversion="Degrees to Radians" fault_tolerant="true" scope="QUERY_INDEX" /> </analyzer> <analyzer name="GeoRadianAnalyzer" lang=""> <tokenizer class="KeywordTokenizer" /> <filter class="DegreesRadiansFilter" degrees_radians_conversion="Check radians" fault_tolerant="true" scope="QUERY_INDEX" /> </analyzer> </analyzers> <fields default="content" unique="url"> <field name="lang" indexed="yes" stored="no" termVector="no" /> <field name="title" analyzer="TextAnalyzer" indexed="yes" stored="yes" termVector="positions_offsets" /> <field name="titleExact" analyzer="StandardAnalyzer" indexed="yes" stored="no" termVector="yes"> <copyOf field="title" /> </field> <field name="titlePhonetic" analyzer="PhoneticAnalyzer" indexed="yes" stored="no" termVector="no"> <copyOf field="title" /> </field> <field name="content" analyzer="TextAnalyzer" indexed="yes" stored="compress" termVector="positions_offsets" /> <field name="contentExact" analyzer="StandardAnalyzer" indexed="yes" stored="no" termVector="yes"> <copyOf field="content" /> </field> <field name="contentPhonetic" analyzer="PhoneticAnalyzer" indexed="yes" stored="no" termVector="no"> <copyOf field="content" /> </field> <field name="url" indexed="yes" stored="no" termVector="no" /> <field name="urlSplit" indexed="yes" stored="no" analyzer="TextAnalyzer" termVector="no"> <copyOf field="url" /> </field> <field name="urlExact" indexed="yes" stored="no" analyzer="StandardAnalyzer" termVector="no"> <copyOf field="url" /> </field> <field name="urlPhonetic" indexed="yes" stored="no" analyzer="PhoneticAnalyzer" termVector="no"> <copyOf field="url" /> </field> <field name="fileName" analyzer="TextAnalyzer" indexed="yes" stored="yes" termVector="positions_offsets" /> <field name="fileNamePhonetic" analyzer="PhoneticAnalyzer" indexed="yes" stored="no" termVector="no"> <copyOf field="fileName" /> </field> <field name="fileNameExact" analyzer="StandardAnalyzer" indexed="yes" stored="no"> <copyOf field="fileName" /> </field> <field name="full" analyzer="TextAnalyzer" indexed="yes" stored="no" termVector="no"> <copyOf field="title" /> <copyOf field="content" /> <copyOf field="fileName" /> <copyOf field="url" /> </field> <field name="fullExact" analyzer="StandardAnalyzer" indexed="yes" stored="no" termVector="no"> <copyOf field="title" /> <copyOf field="content" /> <copyOf field="fileName" /> <copyOf field="url" /> </field> <field name="fullPhonetic" analyzer="PhoneticAnalyzer" indexed="yes" stored="no" termVector="no"> <copyOf field="title" /> <copyOf field="content" /> <copyOf field="fileName" /> <copyOf field="url" /> </field> <field name="autocomplete" analyzer="AutoCompletionAnalyzer" indexed="yes" stored="no" termVector="no"> <copyOf field="title" /> <copyOf field="fileName" /> </field> <field name="directory" indexed="yes" stored="no" /> <field name="subDirectory" indexed="yes" stored="no" /> <field name="crawlDate" indexed="yes" stored="no" /> <field name="fileSystemDate" indexed="yes" stored="no" /> <field name="fileExtension" indexed="yes" stored="no" /> <field name="fileType" indexed="yes" stored="no" /> <field name="fileSize" indexed="yes" stored="no" analyzer="LongAnalyzer" /> <field name="userAllow" indexed="yes" stored="no" /> <field name="userDeny" indexed="yes" stored="no" /> <field name="groupAllow" indexed="yes" stored="no" /> <field name="groupDeny" indexed="yes" stored="no" /> </fields> </schema> <statistics> <statistic type="SEARCH" period="MINUTE" maxRetention="60" writeToLog="no" /> <statistic type="SEARCH" period="HOUR" maxRetention="24" writeToLog="yes" /> <statistic type="SEARCH" period="DAY" maxRetention="30" writeToLog="yes" /> <statistic type="UPDATE" period="HOUR" maxRetention="24" writeToLog="yes" /> <statistic type="UPDATE" period="DAY" maxRetention="30" writeToLog="yes" /> <statistic type="DELETE" period="HOUR" maxRetention="24" writeToLog="yes" /> <statistic type="DELETE" period="DAY" maxRetention="30" writeToLog="yes" /> <statistic type="RELOAD" period="HOUR" maxRetention="24" writeToLog="yes" /> <statistic type="RELOAD" period="DAY" maxRetention="30" writeToLog="yes" /> <statistic type="OPTIMIZE" period="DAY" maxRetention="30" writeToLog="yes" /> </statistics> </configuration>
© 2015 - 2025 Weber Informatics LLC | Privacy Policy