
solr.Items.conf.schema.xml Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of mklab-framework-client Show documentation
Show all versions of mklab-framework-client Show documentation
The project contains a set of convenience methods on top of common data repositories.
The newest version!
<?xml version="1.0" encoding="utf-8"?> <schema name="items" version="1.5"> <fields> <!-- Item fields --> <field name="id" type="string" indexed="true" stored="true" required="true" multiValued="false" /> <field name="source" type="string" indexed="true" stored="false" multiValued="false" /> <field name="original" type="boolean" indexed="true" stored="false" multiValued="false" /> <field name="title" type="text_en" indexed="true" stored="true" multiValued="false" /> <field name="description" type="text_en" indexed="true" stored="false" multiValued="false" /> <field name="tags" type="tags" indexed="true" stored="true" multiValued="true" /> <field name="uid" type="string" indexed="true" stored="false" multiValued="false" /> <field name="username" type="tags" indexed="true" stored="false" multiValued="false" /> <field name="reference" type="string" indexed="true" stored="false" multiValued="false" /> <field name="publicationTime" type="long" indexed="true" stored="false" multiValued="false" /> <field name="language" type="string" indexed="true" stored="false" multiValued="false" /> <field name="latitude" type="double" indexed="true" stored="false" multiValued="false" /> <field name="longitude" type="double" indexed="true" stored="false" multiValued="false" /> <field name="location" type="string" indexed="true" stored="false" multiValued="false" /> <field name="city" type="string" indexed="true" stored="false" multiValued="false" /> <field name="country" type="string" indexed="true" stored="false" multiValued="false" /> <field name="latlon" type="location_latlon" indexed="true" stored="true" multiValued="false" /> <field name="latlonRPT" type="location_rpt" indexed="true" stored="true" multiValued="false" /> <field name="labels" type="string" indexed="true" stored="false" multiValued="true" /> <field name="mediaIds" type="string" indexed="true" stored="false" multiValued="true" /> <field name="persons" type="tags" indexed="true" stored="false" multiValued="true" /> <field name="locations" type="tags" indexed="true" stored="false" multiValued="true" /> <field name="organizations" type="tags" indexed="true" stored="false" multiValued="true" /> <field name="mentions" type="string" indexed="true" stored="false" multiValued="true" /> <field name="text" type="text_ws" indexed="true" stored="false" multiValued="false" /> <field name="allText" type="text_general" indexed="true" stored="false" multiValued="true" /> <field name="text1" type="text_general" indexed="true" stored="false" multiValued="false" /> <field name="text2" type="text_en_splitting" indexed="true" stored="false" multiValued="false" /> <field name="text3" type="text_en_splitting_tight" indexed="true" stored="false" multiValued="false" /> <field name="referenceUserId" type="string" indexed="true" stored="false" multiValued="true" /> <field name="likes" type="long" indexed="true" stored="false" multiValued="false" /> <field name="comments" type="long" indexed="true" stored="false" multiValued="false" /> <field name="shares" type="long" indexed="true" stored="false" multiValued="false" /> <field name="followers" type="long" indexed="true" stored="false" multiValued="false" /> <field name="friends" type="long" indexed="true" stored="false" multiValued="false" /> <!-- Type used to index the lat and lon components for the "location" FieldType --> <dynamicField name="*_coordinate" type="tdouble" indexed="true" stored="false" /> <dynamicField name="*_s" type="string" indexed="true" stored="true" /> <field name="_version_" type="long" indexed="true" stored="true" /> </fields> <uniqueKey>id</uniqueKey> <copyField source="title" dest="text1" /> <copyField source="title" dest="text2" /> <copyField source="title" dest="text3" /> <copyField source="title" dest="allText" /> <copyField source="description" dest="allText" /> <copyField source="tags" dest="allText" /> <copyField source="persons" dest="allText" /> <copyField source="organizations" dest="allText" /> <types> <fieldType name="string" class="solr.StrField" sortMissingLast="true" /> <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true" /> <fieldType name="int" class="solr.TrieIntField" precisionStep="0" positionIncrementGap="0" /> <fieldType name="float" class="solr.TrieFloatField" precisionStep="0" positionIncrementGap="0" /> <fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0" /> <fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" positionIncrementGap="0" /> <fieldType name="date" class="solr.TrieDateField" precisionStep="0" positionIncrementGap="0" /> <fieldtype name="binary" class="solr.BinaryField" /> <fieldType name="random" class="solr.RandomSortField" indexed="true" /> <fieldType name="tint" class="solr.TrieIntField" precisionStep="8" positionIncrementGap="0"/> <fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" positionIncrementGap="0"/> <fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" positionIncrementGap="0"/> <fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" positionIncrementGap="0"/> <!-- A specialized field for geo-spatial search. If indexed, this fieldType must not be multi-valued. --> <fieldType name="location_latlon" class="solr.LatLonType" subFieldSuffix="_coordinate"/> <!-- A specialized field for geo-spatial search. --> <fieldType name="location_rpt" class="solr.SpatialRecursivePrefixTreeFieldType" geo="true" distErrPct="0.025" maxDistErr="0.001" distanceUnits="kilometers" /> <!-- A text field that only splits on whitespace for exact matching of words --> <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100"> <analyzer> <tokenizer class="solr.WhitespaceTokenizerFactory" /> <filter class="solr.LowerCaseFilterFactory" /> </analyzer> </fieldType> <!-- A text field that only splits on whitespace for exact matching of words --> <fieldType name="tags" class="solr.TextField" positionIncrementGap="100"> <analyzer> <tokenizer class="solr.KeywordTokenizerFactory" /> <filter class="solr.LowerCaseFilterFactory" /> </analyzer> </fieldType> <!-- A general text field that has reasonable, generic cross-language defaults: it tokenizes with StandardTokenizer, removes stop words from case-insensitive "lang/stopwords_en.txt" (empty by default), and down cases. --> <fieldType name="text_general" class="solr.TextField" positionIncrementGap="100"> <analyzer type="index"> <tokenizer class="solr.StandardTokenizerFactory" /> <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt" /> <filter class="solr.LowerCaseFilterFactory" /> </analyzer> <analyzer type="query"> <tokenizer class="solr.StandardTokenizerFactory" /> <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt" /> <filter class="solr.LowerCaseFilterFactory" /> </analyzer> </fieldType> <!-- A text field with defaults appropriate for English: it tokenizes with StandardTokenizer, removes English stop words (lang/stopwords_en.txt), down cases and finally applies Porter's stemming. --> <fieldType name="text_en" class="solr.TextField" positionIncrementGap="100"> <analyzer type="index"> <tokenizer class="solr.StandardTokenizerFactory" /> <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt" /> <filter class="solr.LowerCaseFilterFactory" /> <filter class="solr.EnglishPossessiveFilterFactory" /> <!-- <filter class="solr.EnglishMinimalStemFilterFactory"/> --> <filter class="solr.PorterStemFilterFactory" /> </analyzer> <analyzer type="query"> <tokenizer class="solr.StandardTokenizerFactory" /> <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt" /> <filter class="solr.LowerCaseFilterFactory" /> <filter class="solr.EnglishPossessiveFilterFactory" /> <!-- <filter class="solr.EnglishMinimalStemFilterFactory"/> --> <filter class="solr.PorterStemFilterFactory" /> </analyzer> </fieldType> <!-- A text field with defaults appropriate for English, plus aggressive word-splitting and autophrase features enabled. This field is just like text_en, except it adds WordDelimiterFilter to enable splitting and matching of words on case-change, alpha numeric boundaries, and non-alphanumeric chars. This means certain compound word cases will work, for example query "wi fi" will match document "WiFi" or "wi-fi". --> <fieldType name="text_en_splitting" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true"> <analyzer type="index"> <tokenizer class="solr.WhitespaceTokenizerFactory" /> <!-- Case insensitive stop word removal. add enablePositionIncrements=true in both the index and query analyzers to leave a 'gap' for more accurate phrase queries. --> <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt" /> <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1" /> <filter class="solr.LowerCaseFilterFactory" /> <filter class="solr.PorterStemFilterFactory" /> </analyzer> <analyzer type="query"> <tokenizer class="solr.WhitespaceTokenizerFactory" /> <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt" /> <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1" /> <filter class="solr.LowerCaseFilterFactory" /> <filter class="solr.PorterStemFilterFactory" /> </analyzer> </fieldType> <!-- Less flexible matching, but less false matches. Probably not ideal for product names, but may be good for SKUs. Can insert dashes in the wrong place and still match. --> <fieldType name="text_en_splitting_tight" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true"> <analyzer> <tokenizer class="solr.WhitespaceTokenizerFactory" /> <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt" /> <filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0" /> <filter class="solr.LowerCaseFilterFactory" /> <filter class="solr.EnglishMinimalStemFilterFactory" /> <filter class="solr.RemoveDuplicatesTokenFilterFactory" /> </analyzer> </fieldType> <!-- Different languages (generally ordered by ISO code) --> <!-- Arabic --> <fieldType name="text_ar" class="solr.TextField" positionIncrementGap="100"> <analyzer> <tokenizer class="solr.StandardTokenizerFactory" /> <!-- for any non-arabic --> <filter class="solr.LowerCaseFilterFactory" /> <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ar.txt" /> <!-- normalizes ﻯ to ﻱ, etc --> <filter class="solr.ArabicNormalizationFilterFactory" /> <filter class="solr.ArabicStemFilterFactory" /> </analyzer> </fieldType> <!-- Bulgarian --> <fieldType name="text_bg" class="solr.TextField" positionIncrementGap="100"> <analyzer> <tokenizer class="solr.StandardTokenizerFactory" /> <filter class="solr.LowerCaseFilterFactory" /> <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_bg.txt" /> <filter class="solr.BulgarianStemFilterFactory" /> </analyzer> </fieldType> <!-- Catalan --> <fieldType name="text_ca" class="solr.TextField" positionIncrementGap="100"> <analyzer> <tokenizer class="solr.StandardTokenizerFactory" /> <!-- removes l', etc --> <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_ca.txt" /> <filter class="solr.LowerCaseFilterFactory" /> <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ca.txt" /> <filter class="solr.SnowballPorterFilterFactory" language="Catalan" /> </analyzer> </fieldType> <!-- CJK bigram (see text_ja for a Japanese configuration using morphological analysis) --> <fieldType name="text_cjk" class="solr.TextField" positionIncrementGap="100"> <analyzer> <tokenizer class="solr.StandardTokenizerFactory" /> <!-- normalize width before bigram, as e.g. half-width dakuten combine --> <filter class="solr.CJKWidthFilterFactory" /> <!-- for any non-CJK --> <filter class="solr.LowerCaseFilterFactory" /> <filter class="solr.CJKBigramFilterFactory" /> </analyzer> </fieldType> <!-- Czech --> <fieldType name="text_cz" class="solr.TextField" positionIncrementGap="100"> <analyzer> <tokenizer class="solr.StandardTokenizerFactory" /> <filter class="solr.LowerCaseFilterFactory" /> <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_cz.txt" /> <filter class="solr.CzechStemFilterFactory" /> </analyzer> </fieldType> <!-- Danish --> <fieldType name="text_da" class="solr.TextField" positionIncrementGap="100"> <analyzer> <tokenizer class="solr.StandardTokenizerFactory" /> <filter class="solr.LowerCaseFilterFactory" /> <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_da.txt" format="snowball" /> <filter class="solr.SnowballPorterFilterFactory" language="Danish" /> </analyzer> </fieldType> <!-- German --> <fieldType name="text_de" class="solr.TextField" positionIncrementGap="100"> <analyzer> <tokenizer class="solr.StandardTokenizerFactory" /> <filter class="solr.LowerCaseFilterFactory" /> <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_de.txt" format="snowball" /> <filter class="solr.GermanNormalizationFilterFactory" /> <filter class="solr.GermanLightStemFilterFactory" /> <!-- less aggressive: <filter class="solr.GermanMinimalStemFilterFactory"/> --> <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="German2"/> --> </analyzer> </fieldType> <!-- Greek --> <fieldType name="text_el" class="solr.TextField" positionIncrementGap="100"> <analyzer> <tokenizer class="solr.StandardTokenizerFactory" /> <!-- greek specific lowercase for sigma --> <filter class="solr.GreekLowerCaseFilterFactory" /> <filter class="solr.StopFilterFactory" ignoreCase="false" words="lang/stopwords_el.txt" /> <filter class="solr.GreekStemFilterFactory" /> </analyzer> </fieldType> <!-- Spanish --> <fieldType name="text_es" class="solr.TextField" positionIncrementGap="100"> <analyzer> <tokenizer class="solr.StandardTokenizerFactory" /> <filter class="solr.LowerCaseFilterFactory" /> <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_es.txt" format="snowball" /> <filter class="solr.SpanishLightStemFilterFactory" /> <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Spanish"/> --> </analyzer> </fieldType> <!-- Basque --> <fieldType name="text_eu" class="solr.TextField" positionIncrementGap="100"> <analyzer> <tokenizer class="solr.StandardTokenizerFactory" /> <filter class="solr.LowerCaseFilterFactory" /> <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_eu.txt" /> <filter class="solr.SnowballPorterFilterFactory" language="Basque" /> </analyzer> </fieldType> <!-- Persian --> <fieldType name="text_fa" class="solr.TextField" positionIncrementGap="100"> <analyzer> <!-- for ZWNJ --> <charFilter class="solr.PersianCharFilterFactory" /> <tokenizer class="solr.StandardTokenizerFactory" /> <filter class="solr.LowerCaseFilterFactory" /> <filter class="solr.ArabicNormalizationFilterFactory" /> <filter class="solr.PersianNormalizationFilterFactory" /> <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fa.txt" /> </analyzer> </fieldType> <!-- Finnish --> <fieldType name="text_fi" class="solr.TextField" positionIncrementGap="100"> <analyzer> <tokenizer class="solr.StandardTokenizerFactory" /> <filter class="solr.LowerCaseFilterFactory" /> <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fi.txt" format="snowball" /> <filter class="solr.SnowballPorterFilterFactory" language="Finnish" /> <!-- less aggressive: <filter class="solr.FinnishLightStemFilterFactory"/> --> </analyzer> </fieldType> <!-- French --> <fieldType name="text_fr" class="solr.TextField" positionIncrementGap="100"> <analyzer> <tokenizer class="solr.StandardTokenizerFactory" /> <!-- removes l', etc --> <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_fr.txt" /> <filter class="solr.LowerCaseFilterFactory" /> <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fr.txt" format="snowball" /> <filter class="solr.FrenchLightStemFilterFactory" /> <!-- less aggressive: <filter class="solr.FrenchMinimalStemFilterFactory"/> --> <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="French"/> --> </analyzer> </fieldType> <!-- Irish --> <fieldType name="text_ga" class="solr.TextField" positionIncrementGap="100"> <analyzer> <tokenizer class="solr.StandardTokenizerFactory" /> <!-- removes d', etc --> <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_ga.txt" /> <!-- removes n-, etc. position increments is intentionally false! --> <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/hyphenations_ga.txt" /> <filter class="solr.IrishLowerCaseFilterFactory" /> <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ga.txt" /> <filter class="solr.SnowballPorterFilterFactory" language="Irish" /> </analyzer> </fieldType> <!-- Galician --> <fieldType name="text_gl" class="solr.TextField" positionIncrementGap="100"> <analyzer> <tokenizer class="solr.StandardTokenizerFactory" /> <filter class="solr.LowerCaseFilterFactory" /> <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_gl.txt" /> <filter class="solr.GalicianStemFilterFactory" /> <!-- less aggressive: <filter class="solr.GalicianMinimalStemFilterFactory"/> --> </analyzer> </fieldType> <!-- Hindi --> <fieldType name="text_hi" class="solr.TextField" positionIncrementGap="100"> <analyzer> <tokenizer class="solr.StandardTokenizerFactory" /> <filter class="solr.LowerCaseFilterFactory" /> <!-- normalizes unicode representation --> <filter class="solr.IndicNormalizationFilterFactory" /> <!-- normalizes variation in spelling --> <filter class="solr.HindiNormalizationFilterFactory" /> <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hi.txt" /> <filter class="solr.HindiStemFilterFactory" /> </analyzer> </fieldType> <!-- Hungarian --> <fieldType name="text_hu" class="solr.TextField" positionIncrementGap="100"> <analyzer> <tokenizer class="solr.StandardTokenizerFactory" /> <filter class="solr.LowerCaseFilterFactory" /> <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hu.txt" format="snowball" /> <filter class="solr.SnowballPorterFilterFactory" language="Hungarian" /> <!-- less aggressive: <filter class="solr.HungarianLightStemFilterFactory"/> --> </analyzer> </fieldType> <!-- Armenian --> <fieldType name="text_hy" class="solr.TextField" positionIncrementGap="100"> <analyzer> <tokenizer class="solr.StandardTokenizerFactory" /> <filter class="solr.LowerCaseFilterFactory" /> <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hy.txt" /> <filter class="solr.SnowballPorterFilterFactory" language="Armenian" /> </analyzer> </fieldType> <!-- Indonesian --> <fieldType name="text_id" class="solr.TextField" positionIncrementGap="100"> <analyzer> <tokenizer class="solr.StandardTokenizerFactory" /> <filter class="solr.LowerCaseFilterFactory" /> <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_id.txt" /> <!-- for a less aggressive approach (only inflectional suffixes), set stemDerivational to false --> <filter class="solr.IndonesianStemFilterFactory" stemDerivational="true" /> </analyzer> </fieldType> <!-- Italian --> <fieldType name="text_it" class="solr.TextField" positionIncrementGap="100"> <analyzer> <tokenizer class="solr.StandardTokenizerFactory" /> <!-- removes l', etc --> <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_it.txt" /> <filter class="solr.LowerCaseFilterFactory" /> <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_it.txt" format="snowball" /> <filter class="solr.ItalianLightStemFilterFactory" /> <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Italian"/> --> </analyzer> </fieldType> <!-- Japanese using morphological analysis --> <fieldType name="text_ja" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="false"> <analyzer> <tokenizer class="solr.JapaneseTokenizerFactory" mode="search" /> <!--<tokenizer class="solr.JapaneseTokenizerFactory" mode="search" userDictionary="lang/userdict_ja.txt"/>--> <!-- Reduces inflected verbs and adjectives to their base/dictionary forms (辞書形) --> <filter class="solr.JapaneseBaseFormFilterFactory" /> <!-- Removes tokens with certain part-of-speech tags --> <filter class="solr.JapanesePartOfSpeechStopFilterFactory" tags="lang/stoptags_ja.txt" /> <!-- Normalizes full-width romaji to half-width and half-width kana to full-width (Unicode NFKC subset) --> <filter class="solr.CJKWidthFilterFactory" /> <!-- Removes common tokens typically not useful for search, but have a negative effect on ranking --> <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ja.txt" /> <!-- Normalizes common katakana spelling variations by removing any last long sound character (U+30FC) --> <filter class="solr.JapaneseKatakanaStemFilterFactory" minimumLength="4" /> <!-- Lower-cases romaji characters --> <filter class="solr.LowerCaseFilterFactory" /> </analyzer> </fieldType> <!-- Latvian --> <fieldType name="text_lv" class="solr.TextField" positionIncrementGap="100"> <analyzer> <tokenizer class="solr.StandardTokenizerFactory" /> <filter class="solr.LowerCaseFilterFactory" /> <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_lv.txt" /> <filter class="solr.LatvianStemFilterFactory" /> </analyzer> </fieldType> <!-- Dutch --> <fieldType name="text_nl" class="solr.TextField" positionIncrementGap="100"> <analyzer> <tokenizer class="solr.StandardTokenizerFactory" /> <filter class="solr.LowerCaseFilterFactory" /> <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_nl.txt" format="snowball" /> <filter class="solr.StemmerOverrideFilterFactory" dictionary="lang/stemdict_nl.txt" ignoreCase="false" /> <filter class="solr.SnowballPorterFilterFactory" language="Dutch" /> </analyzer> </fieldType> <!-- Norwegian --> <fieldType name="text_no" class="solr.TextField" positionIncrementGap="100"> <analyzer> <tokenizer class="solr.StandardTokenizerFactory" /> <filter class="solr.LowerCaseFilterFactory" /> <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_no.txt" format="snowball" /> <filter class="solr.SnowballPorterFilterFactory" language="Norwegian" /> <!-- less aggressive: <filter class="solr.NorwegianLightStemFilterFactory"/> --> <!-- singular/plural: <filter class="solr.NorwegianMinimalStemFilterFactory"/> --> </analyzer> </fieldType> <!-- Portuguese --> <fieldType name="text_pt" class="solr.TextField" positionIncrementGap="100"> <analyzer> <tokenizer class="solr.StandardTokenizerFactory" /> <filter class="solr.LowerCaseFilterFactory" /> <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_pt.txt" format="snowball" /> <filter class="solr.PortugueseLightStemFilterFactory" /> <!-- less aggressive: <filter class="solr.PortugueseMinimalStemFilterFactory"/> --> <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Portuguese"/> --> <!-- most aggressive: <filter class="solr.PortugueseStemFilterFactory"/> --> </analyzer> </fieldType> <!-- Romanian --> <fieldType name="text_ro" class="solr.TextField" positionIncrementGap="100"> <analyzer> <tokenizer class="solr.StandardTokenizerFactory" /> <filter class="solr.LowerCaseFilterFactory" /> <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ro.txt" /> <filter class="solr.SnowballPorterFilterFactory" language="Romanian" /> </analyzer> </fieldType> <!-- Russian --> <fieldType name="text_ru" class="solr.TextField" positionIncrementGap="100"> <analyzer> <tokenizer class="solr.StandardTokenizerFactory" /> <filter class="solr.LowerCaseFilterFactory" /> <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ru.txt" format="snowball" /> <filter class="solr.SnowballPorterFilterFactory" language="Russian" /> <!-- less aggressive: <filter class="solr.RussianLightStemFilterFactory"/> --> </analyzer> </fieldType> <!-- Swedish --> <fieldType name="text_sv" class="solr.TextField" positionIncrementGap="100"> <analyzer> <tokenizer class="solr.StandardTokenizerFactory" /> <filter class="solr.LowerCaseFilterFactory" /> <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_sv.txt" format="snowball" /> <filter class="solr.SnowballPorterFilterFactory" language="Swedish" /> <!-- less aggressive: <filter class="solr.SwedishLightStemFilterFactory"/> --> </analyzer> </fieldType> <!-- Turkish --> <fieldType name="text_tr" class="solr.TextField" positionIncrementGap="100"> <analyzer> <tokenizer class="solr.StandardTokenizerFactory" /> <filter class="solr.TurkishLowerCaseFilterFactory" /> <filter class="solr.StopFilterFactory" ignoreCase="false" words="lang/stopwords_tr.txt" /> <filter class="solr.SnowballPorterFilterFactory" language="Turkish" /> </analyzer> </fieldType> </types> </schema>
© 2015 - 2025 Weber Informatics LLC | Privacy Policy