All Downloads are FREE. Search and download functionalities are using the official Maven repository.

solr.Items.conf.schema.xml Maven / Gradle / Ivy

Go to download

The project contains a set of convenience methods on top of common data repositories.

The newest version!
<?xml version="1.0" encoding="utf-8"?>
<schema name="items" version="1.5">

	<fields>
   		<!-- Item fields -->
      	<field name="id" type="string" indexed="true" stored="true" required="true" multiValued="false" />
      	<field name="source" type="string" indexed="true" stored="false" multiValued="false" />

		<field name="original" type="boolean" indexed="true" stored="false" multiValued="false" />

      	<field name="title" type="text_en" indexed="true" stored="true" multiValued="false" />
      	<field name="description" type="text_en" indexed="true" stored="false" multiValued="false" />
      	<field name="tags" type="tags" indexed="true" stored="true" multiValued="true" />

	  	<field name="uid" type="string" indexed="true" stored="false" multiValued="false" />
	  	<field name="username" type="tags" indexed="true" stored="false" multiValued="false" />
		<field name="reference" type="string" indexed="true" stored="false" multiValued="false" />

      	<field name="publicationTime" type="long" indexed="true" stored="false" multiValued="false" />
		<field name="language" type="string" indexed="true" stored="false" multiValued="false" />

      	<field name="latitude" type="double" indexed="true" stored="false" multiValued="false" />
      	<field name="longitude" type="double" indexed="true" stored="false" multiValued="false" />
      	<field name="location" type="string" indexed="true" stored="false" multiValued="false" />
      	<field name="city" type="string" indexed="true" stored="false" multiValued="false" />
      	<field name="country" type="string" indexed="true" stored="false" multiValued="false" />
      	
      	<field name="latlon" type="location_latlon" indexed="true" stored="true" multiValued="false" />
		<field name="latlonRPT" type="location_rpt" indexed="true" stored="true" multiValued="false" />

      	<field name="labels" type="string" indexed="true" stored="false" multiValued="true" />
      	<field name="mediaIds" type="string" indexed="true" stored="false" multiValued="true" />

	  	<field name="persons" type="tags" indexed="true" stored="false" multiValued="true" />
      	<field name="locations" type="tags" indexed="true" stored="false" multiValued="true" />
      	<field name="organizations" type="tags" indexed="true" stored="false" multiValued="true" />

		<field name="mentions" type="string" indexed="true" stored="false" multiValued="true" />

		<field name="text" type="text_ws" indexed="true" stored="false" multiValued="false" />
		
		<field name="allText" type="text_general" indexed="true" stored="false" multiValued="true" />
		
	  	<field name="text1" type="text_general" indexed="true" stored="false" multiValued="false" />
	  	<field name="text2" type="text_en_splitting" indexed="true" stored="false" multiValued="false" />
	  	<field name="text3" type="text_en_splitting_tight" indexed="true" stored="false" multiValued="false" />

        <field name="referenceUserId" type="string" indexed="true" stored="false" multiValued="true" />
            
    	<field name="likes" type="long" indexed="true" stored="false" multiValued="false" />
        <field name="comments" type="long" indexed="true" stored="false" multiValued="false" />
        <field name="shares" type="long" indexed="true" stored="false" multiValued="false" />
        
      	<field name="followers" type="long" indexed="true" stored="false" multiValued="false" />
        <field name="friends" type="long" indexed="true" stored="false" multiValued="false" />
        
		<!-- Type used to index the lat and lon components for the "location" FieldType -->
   		<dynamicField name="*_coordinate"  type="tdouble" indexed="true" stored="false" />
		<dynamicField name="*_s"  type="string" indexed="true" stored="true" />

      	<field name="_version_" type="long" indexed="true" stored="true" />
   	</fields>

   	<uniqueKey>id</uniqueKey>
   
   	<copyField source="title" dest="text1" />
   	<copyField source="title" dest="text2" />
   	<copyField source="title" dest="text3" />

	<copyField source="title" dest="allText" />
	<copyField source="description" dest="allText" />
	<copyField source="tags" dest="allText" />
	<copyField source="persons" dest="allText" />
	<copyField source="organizations" dest="allText" />

   	<types>
    	<fieldType name="string" class="solr.StrField" sortMissingLast="true" />
      	<fieldType name="boolean" class="solr.BoolField" sortMissingLast="true" />
      	<fieldType name="int" class="solr.TrieIntField" precisionStep="0" positionIncrementGap="0" />
      	<fieldType name="float" class="solr.TrieFloatField" precisionStep="0" positionIncrementGap="0" />
      	<fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0" />
      	<fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" positionIncrementGap="0" />
      	<fieldType name="date" class="solr.TrieDateField" precisionStep="0" positionIncrementGap="0" />
      	<fieldtype name="binary" class="solr.BinaryField" />
      	<fieldType name="random" class="solr.RandomSortField" indexed="true" />

		<fieldType name="tint" class="solr.TrieIntField" precisionStep="8" positionIncrementGap="0"/>
    	<fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" positionIncrementGap="0"/>
    	<fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" positionIncrementGap="0"/>
    	<fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" positionIncrementGap="0"/>

		<!-- A specialized field for geo-spatial search. If indexed, this fieldType must not be multi-valued. -->
    	<fieldType name="location_latlon" class="solr.LatLonType" subFieldSuffix="_coordinate"/>

		<!-- A specialized field for geo-spatial search. -->
		<fieldType name="location_rpt" class="solr.SpatialRecursivePrefixTreeFieldType" geo="true" distErrPct="0.025" maxDistErr="0.001" distanceUnits="kilometers" />

      	<!-- A text field that only splits on whitespace for exact matching of words -->
      	<fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
        	<analyzer>
            	<tokenizer class="solr.WhitespaceTokenizerFactory" />
			 	<filter class="solr.LowerCaseFilterFactory" />
         	</analyzer>
      	</fieldType>

      	<!-- A text field that only splits on whitespace for exact matching of words -->
      	<fieldType name="tags" class="solr.TextField" positionIncrementGap="100"> 
			<analyzer>
           		<tokenizer class="solr.KeywordTokenizerFactory" />
			 	<filter class="solr.LowerCaseFilterFactory" />
         	</analyzer>
      	</fieldType>

      	<!-- A general text field that has reasonable, generic cross-language defaults: it tokenizes with StandardTokenizer,
       	removes stop words from case-insensitive "lang/stopwords_en.txt" (empty by default), and down cases. -->
      	<fieldType name="text_general" class="solr.TextField" positionIncrementGap="100">
         	<analyzer type="index">
            	<tokenizer class="solr.StandardTokenizerFactory" />
            	<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt" />
            	<filter class="solr.LowerCaseFilterFactory" />
         	</analyzer>
         	<analyzer type="query">
            	<tokenizer class="solr.StandardTokenizerFactory" />
            	<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt" />
            	<filter class="solr.LowerCaseFilterFactory" />
         	</analyzer>
      	</fieldType>

      	<!-- A text field with defaults appropriate for English: it tokenizes with StandardTokenizer, removes English stop words
        	(lang/stopwords_en.txt), down cases and finally applies Porter's stemming. -->
      	<fieldType name="text_en" class="solr.TextField" positionIncrementGap="100">
       		<analyzer type="index">
        	    <tokenizer class="solr.StandardTokenizerFactory" />
            	<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt" />
            	<filter class="solr.LowerCaseFilterFactory" />
            	<filter class="solr.EnglishPossessiveFilterFactory" />
            	<!-- <filter class="solr.EnglishMinimalStemFilterFactory"/> -->
            	<filter class="solr.PorterStemFilterFactory" />
         	</analyzer>
        	<analyzer type="query">
            	<tokenizer class="solr.StandardTokenizerFactory" />
            	<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt" />
            	<filter class="solr.LowerCaseFilterFactory" />
            	<filter class="solr.EnglishPossessiveFilterFactory" />
            	<!-- <filter class="solr.EnglishMinimalStemFilterFactory"/> -->
            	<filter class="solr.PorterStemFilterFactory" />
         	</analyzer>
      	</fieldType>

      	<!-- A text field with defaults appropriate for English, plus aggressive word-splitting and autophrase features enabled.
       		This field is just like text_en, except it adds WordDelimiterFilter to enable splitting and matching of
         	words on case-change, alpha numeric boundaries, and non-alphanumeric chars.  This means certain compound word
         	cases will work, for example query "wi fi" will match document "WiFi" or "wi-fi".
   		-->
		<fieldType name="text_en_splitting" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
        	<analyzer type="index">
            	<tokenizer class="solr.WhitespaceTokenizerFactory" />
            	<!-- Case insensitive stop word removal. add enablePositionIncrements=true in both the index and query
          			analyzers to leave a 'gap' for more accurate phrase queries. -->
            	<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt" />
            	<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1" />
            	<filter class="solr.LowerCaseFilterFactory" />
            	<filter class="solr.PorterStemFilterFactory" />
         	</analyzer>
         	<analyzer type="query">
         	   <tokenizer class="solr.WhitespaceTokenizerFactory" />
            	<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt" />
            	<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1" />
            	<filter class="solr.LowerCaseFilterFactory" />
            	<filter class="solr.PorterStemFilterFactory" />
         	</analyzer>
      	</fieldType>

      	<!-- Less flexible matching, but less false matches.  Probably not ideal for product names, but may be good for SKUs. Can insert dashes in the wrong place and still match. -->
      	<fieldType name="text_en_splitting_tight" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
        	<analyzer>
            	<tokenizer class="solr.WhitespaceTokenizerFactory" />
            	<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt" />
            	<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0" />
            	<filter class="solr.LowerCaseFilterFactory" />
            	<filter class="solr.EnglishMinimalStemFilterFactory" />
            	<filter class="solr.RemoveDuplicatesTokenFilterFactory" />
         	</analyzer>
      	</fieldType>
      
	  	<!-- Different languages (generally ordered by ISO code) -->
      	<!-- Arabic -->
      	<fieldType name="text_ar" class="solr.TextField" positionIncrementGap="100">
       		<analyzer>
           		<tokenizer class="solr.StandardTokenizerFactory" />
            	<!-- for any non-arabic -->
            	<filter class="solr.LowerCaseFilterFactory" />
            	<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ar.txt" />
            	<!-- normalizes ﻯ to ﻱ, etc -->
            	<filter class="solr.ArabicNormalizationFilterFactory" />
            	<filter class="solr.ArabicStemFilterFactory" />
         	</analyzer>
      	</fieldType>
      <!-- Bulgarian -->
      <fieldType name="text_bg" class="solr.TextField" positionIncrementGap="100">
         <analyzer>
            <tokenizer class="solr.StandardTokenizerFactory" />
            <filter class="solr.LowerCaseFilterFactory" />
            <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_bg.txt" />
            <filter class="solr.BulgarianStemFilterFactory" />
         </analyzer>
      </fieldType>
      <!-- Catalan -->
      <fieldType name="text_ca" class="solr.TextField" positionIncrementGap="100">
         <analyzer>
            <tokenizer class="solr.StandardTokenizerFactory" />
            <!-- removes l', etc -->
            <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_ca.txt" />
            <filter class="solr.LowerCaseFilterFactory" />
            <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ca.txt" />
            <filter class="solr.SnowballPorterFilterFactory" language="Catalan" />
         </analyzer>
      </fieldType>
      <!-- CJK bigram (see text_ja for a Japanese configuration using morphological analysis) -->
      <fieldType name="text_cjk" class="solr.TextField" positionIncrementGap="100">
         <analyzer>
            <tokenizer class="solr.StandardTokenizerFactory" />
            <!-- normalize width before bigram, as e.g. half-width dakuten combine  -->
            <filter class="solr.CJKWidthFilterFactory" />
            <!-- for any non-CJK -->
            <filter class="solr.LowerCaseFilterFactory" />
            <filter class="solr.CJKBigramFilterFactory" />
         </analyzer>
      </fieldType>
      <!-- Czech -->
      <fieldType name="text_cz" class="solr.TextField" positionIncrementGap="100">
         <analyzer>
            <tokenizer class="solr.StandardTokenizerFactory" />
            <filter class="solr.LowerCaseFilterFactory" />
            <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_cz.txt" />
            <filter class="solr.CzechStemFilterFactory" />
         </analyzer>
      </fieldType>
      <!-- Danish -->
      <fieldType name="text_da" class="solr.TextField" positionIncrementGap="100">
         <analyzer>
            <tokenizer class="solr.StandardTokenizerFactory" />
            <filter class="solr.LowerCaseFilterFactory" />
            <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_da.txt" format="snowball" />
            <filter class="solr.SnowballPorterFilterFactory" language="Danish" />
         </analyzer>
      </fieldType>
      <!-- German -->
      <fieldType name="text_de" class="solr.TextField" positionIncrementGap="100">
         <analyzer>
            <tokenizer class="solr.StandardTokenizerFactory" />
            <filter class="solr.LowerCaseFilterFactory" />
            <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_de.txt" format="snowball" />
            <filter class="solr.GermanNormalizationFilterFactory" />
            <filter class="solr.GermanLightStemFilterFactory" />
            <!-- less aggressive: <filter class="solr.GermanMinimalStemFilterFactory"/> -->
            <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="German2"/> -->
         </analyzer>
      </fieldType>
      <!-- Greek -->
      <fieldType name="text_el" class="solr.TextField" positionIncrementGap="100">
         <analyzer>
            <tokenizer class="solr.StandardTokenizerFactory" />
            <!-- greek specific lowercase for sigma -->
            <filter class="solr.GreekLowerCaseFilterFactory" />
            <filter class="solr.StopFilterFactory" ignoreCase="false" words="lang/stopwords_el.txt" />
            <filter class="solr.GreekStemFilterFactory" />
         </analyzer>
      </fieldType>
      <!-- Spanish -->
      <fieldType name="text_es" class="solr.TextField" positionIncrementGap="100">
         <analyzer>
            <tokenizer class="solr.StandardTokenizerFactory" />
            <filter class="solr.LowerCaseFilterFactory" />
            <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_es.txt" format="snowball" />
            <filter class="solr.SpanishLightStemFilterFactory" />
            <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Spanish"/> -->
         </analyzer>
      </fieldType>
      <!-- Basque -->
      <fieldType name="text_eu" class="solr.TextField" positionIncrementGap="100">
         <analyzer>
            <tokenizer class="solr.StandardTokenizerFactory" />
            <filter class="solr.LowerCaseFilterFactory" />
            <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_eu.txt" />
            <filter class="solr.SnowballPorterFilterFactory" language="Basque" />
         </analyzer>
      </fieldType>
      <!-- Persian -->
      <fieldType name="text_fa" class="solr.TextField" positionIncrementGap="100">
         <analyzer>
            <!-- for ZWNJ -->
            <charFilter class="solr.PersianCharFilterFactory" />
            <tokenizer class="solr.StandardTokenizerFactory" />
            <filter class="solr.LowerCaseFilterFactory" />
            <filter class="solr.ArabicNormalizationFilterFactory" />
            <filter class="solr.PersianNormalizationFilterFactory" />
            <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fa.txt" />
         </analyzer>
      </fieldType>
      <!-- Finnish -->
      <fieldType name="text_fi" class="solr.TextField" positionIncrementGap="100">
         <analyzer>
            <tokenizer class="solr.StandardTokenizerFactory" />
            <filter class="solr.LowerCaseFilterFactory" />
            <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fi.txt" format="snowball" />
            <filter class="solr.SnowballPorterFilterFactory" language="Finnish" />
            <!-- less aggressive: <filter class="solr.FinnishLightStemFilterFactory"/> -->
         </analyzer>
      </fieldType>
      <!-- French -->
      <fieldType name="text_fr" class="solr.TextField" positionIncrementGap="100">
         <analyzer>
            <tokenizer class="solr.StandardTokenizerFactory" />
            <!-- removes l', etc -->
            <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_fr.txt" />
            <filter class="solr.LowerCaseFilterFactory" />
            <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fr.txt" format="snowball" />
            <filter class="solr.FrenchLightStemFilterFactory" />
            <!-- less aggressive: <filter class="solr.FrenchMinimalStemFilterFactory"/> -->
            <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="French"/> -->
         </analyzer>
      </fieldType>
      <!-- Irish -->
      <fieldType name="text_ga" class="solr.TextField" positionIncrementGap="100">
         <analyzer>
            <tokenizer class="solr.StandardTokenizerFactory" />
            <!-- removes d', etc -->
            <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_ga.txt" />
            <!-- removes n-, etc. position increments is intentionally false! -->
            <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/hyphenations_ga.txt" />
            <filter class="solr.IrishLowerCaseFilterFactory" />
            <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ga.txt" />
            <filter class="solr.SnowballPorterFilterFactory" language="Irish" />
         </analyzer>
      </fieldType>
      <!-- Galician -->
      <fieldType name="text_gl" class="solr.TextField" positionIncrementGap="100">
         <analyzer>
            <tokenizer class="solr.StandardTokenizerFactory" />
            <filter class="solr.LowerCaseFilterFactory" />
            <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_gl.txt" />
            <filter class="solr.GalicianStemFilterFactory" />
            <!-- less aggressive: <filter class="solr.GalicianMinimalStemFilterFactory"/> -->
         </analyzer>
      </fieldType>
      <!-- Hindi -->
      <fieldType name="text_hi" class="solr.TextField" positionIncrementGap="100">
         <analyzer>
            <tokenizer class="solr.StandardTokenizerFactory" />
            <filter class="solr.LowerCaseFilterFactory" />
            <!-- normalizes unicode representation -->
            <filter class="solr.IndicNormalizationFilterFactory" />
            <!-- normalizes variation in spelling -->
            <filter class="solr.HindiNormalizationFilterFactory" />
            <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hi.txt" />
            <filter class="solr.HindiStemFilterFactory" />
         </analyzer>
      </fieldType>
      <!-- Hungarian -->
      <fieldType name="text_hu" class="solr.TextField" positionIncrementGap="100">
         <analyzer>
            <tokenizer class="solr.StandardTokenizerFactory" />
            <filter class="solr.LowerCaseFilterFactory" />
            <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hu.txt" format="snowball" />
            <filter class="solr.SnowballPorterFilterFactory" language="Hungarian" />
            <!-- less aggressive: <filter class="solr.HungarianLightStemFilterFactory"/> -->
         </analyzer>
      </fieldType>
      <!-- Armenian -->
      <fieldType name="text_hy" class="solr.TextField" positionIncrementGap="100">
         <analyzer>
            <tokenizer class="solr.StandardTokenizerFactory" />
            <filter class="solr.LowerCaseFilterFactory" />
            <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hy.txt" />
            <filter class="solr.SnowballPorterFilterFactory" language="Armenian" />
         </analyzer>
      </fieldType>
      <!-- Indonesian -->
      <fieldType name="text_id" class="solr.TextField" positionIncrementGap="100">
         <analyzer>
            <tokenizer class="solr.StandardTokenizerFactory" />
            <filter class="solr.LowerCaseFilterFactory" />
            <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_id.txt" />
            <!-- for a less aggressive approach (only inflectional suffixes), set stemDerivational to false -->
            <filter class="solr.IndonesianStemFilterFactory" stemDerivational="true" />
         </analyzer>
      </fieldType>
      <!-- Italian -->
      <fieldType name="text_it" class="solr.TextField" positionIncrementGap="100">
         <analyzer>
            <tokenizer class="solr.StandardTokenizerFactory" />
            <!-- removes l', etc -->
            <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_it.txt" />
            <filter class="solr.LowerCaseFilterFactory" />
            <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_it.txt" format="snowball" />
            <filter class="solr.ItalianLightStemFilterFactory" />
            <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Italian"/> -->
         </analyzer>
      </fieldType>
      <!-- Japanese using morphological analysis -->
      <fieldType name="text_ja" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="false">
         <analyzer>
            <tokenizer class="solr.JapaneseTokenizerFactory" mode="search" />
            <!--<tokenizer class="solr.JapaneseTokenizerFactory" mode="search" userDictionary="lang/userdict_ja.txt"/>-->
            <!-- Reduces inflected verbs and adjectives to their base/dictionary forms (辞書形) -->
            <filter class="solr.JapaneseBaseFormFilterFactory" />
            <!-- Removes tokens with certain part-of-speech tags -->
            <filter class="solr.JapanesePartOfSpeechStopFilterFactory" tags="lang/stoptags_ja.txt" />
            <!-- Normalizes full-width romaji to half-width and half-width kana to full-width (Unicode NFKC subset) -->
            <filter class="solr.CJKWidthFilterFactory" />
            <!-- Removes common tokens typically not useful for search, but have a negative effect on ranking -->
            <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ja.txt" />
            <!-- Normalizes common katakana spelling variations by removing any last long sound character (U+30FC) -->
            <filter class="solr.JapaneseKatakanaStemFilterFactory" minimumLength="4" />
            <!-- Lower-cases romaji characters -->
            <filter class="solr.LowerCaseFilterFactory" />
         </analyzer>
      </fieldType>
      <!-- Latvian -->
      <fieldType name="text_lv" class="solr.TextField" positionIncrementGap="100">
         <analyzer>
            <tokenizer class="solr.StandardTokenizerFactory" />
            <filter class="solr.LowerCaseFilterFactory" />
            <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_lv.txt" />
            <filter class="solr.LatvianStemFilterFactory" />
         </analyzer>
      </fieldType>
      	<!-- Dutch -->
      	<fieldType name="text_nl" class="solr.TextField" positionIncrementGap="100">
         	<analyzer>
            	<tokenizer class="solr.StandardTokenizerFactory" />
            	<filter class="solr.LowerCaseFilterFactory" />
            	<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_nl.txt" format="snowball" />
            	<filter class="solr.StemmerOverrideFilterFactory" dictionary="lang/stemdict_nl.txt" ignoreCase="false" />
            	<filter class="solr.SnowballPorterFilterFactory" language="Dutch" />
         	</analyzer>
      	</fieldType>
      	<!-- Norwegian -->
      	<fieldType name="text_no" class="solr.TextField" positionIncrementGap="100">
         	<analyzer>
            	<tokenizer class="solr.StandardTokenizerFactory" />
            	<filter class="solr.LowerCaseFilterFactory" />
            	<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_no.txt" format="snowball" />
            	<filter class="solr.SnowballPorterFilterFactory" language="Norwegian" />
            	<!-- less aggressive: <filter class="solr.NorwegianLightStemFilterFactory"/> -->
            	<!-- singular/plural: <filter class="solr.NorwegianMinimalStemFilterFactory"/> -->
         	</analyzer>
      	</fieldType>
      	<!-- Portuguese -->
      	<fieldType name="text_pt" class="solr.TextField" positionIncrementGap="100">
         	<analyzer>
            	<tokenizer class="solr.StandardTokenizerFactory" />
            	<filter class="solr.LowerCaseFilterFactory" />
            	<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_pt.txt" format="snowball" />
            	<filter class="solr.PortugueseLightStemFilterFactory" />
            	<!-- less aggressive: <filter class="solr.PortugueseMinimalStemFilterFactory"/> -->
            	<!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Portuguese"/> -->
            	<!-- most aggressive: <filter class="solr.PortugueseStemFilterFactory"/> -->
         	</analyzer>
      	</fieldType>
      	<!-- Romanian -->
      	<fieldType name="text_ro" class="solr.TextField" positionIncrementGap="100">
         	<analyzer>
            	<tokenizer class="solr.StandardTokenizerFactory" />
            	<filter class="solr.LowerCaseFilterFactory" />
            	<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ro.txt" />
            	<filter class="solr.SnowballPorterFilterFactory" language="Romanian" />
        	 </analyzer>
      	</fieldType>
      	<!-- Russian -->
      	<fieldType name="text_ru" class="solr.TextField" positionIncrementGap="100">
         	<analyzer>
            	<tokenizer class="solr.StandardTokenizerFactory" />
            	<filter class="solr.LowerCaseFilterFactory" />
            	<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ru.txt" format="snowball" />
            	<filter class="solr.SnowballPorterFilterFactory" language="Russian" />
        		<!-- less aggressive: <filter class="solr.RussianLightStemFilterFactory"/> -->
      	   	</analyzer>
      	</fieldType>
      	<!-- Swedish -->
      	<fieldType name="text_sv" class="solr.TextField" positionIncrementGap="100">
         	<analyzer>
            	<tokenizer class="solr.StandardTokenizerFactory" />
            	<filter class="solr.LowerCaseFilterFactory" />
            	<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_sv.txt" format="snowball" />
            	<filter class="solr.SnowballPorterFilterFactory" language="Swedish" />
            	<!-- less aggressive: <filter class="solr.SwedishLightStemFilterFactory"/> -->
         	</analyzer>
      	</fieldType>
      	<!-- Turkish -->
      	<fieldType name="text_tr" class="solr.TextField" positionIncrementGap="100">
         	<analyzer>
            	<tokenizer class="solr.StandardTokenizerFactory" />
            	<filter class="solr.TurkishLowerCaseFilterFactory" />
            	<filter class="solr.StopFilterFactory" ignoreCase="false" words="lang/stopwords_tr.txt" />
            	<filter class="solr.SnowballPorterFilterFactory" language="Turkish" />
         	</analyzer>
    	</fieldType>
	</types>
</schema>





© 2015 - 2025 Weber Informatics LLC | Privacy Policy