default-topic-trainingset.conf.solrconfig.xml Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of org.apache.stanbol.enhancer.engine.topic Show documentation
Show all versions of org.apache.stanbol.enhancer.engine.topic Show documentation
Implementation of an annotation engine that links the content item
to a set of possible categories from a dedicated Solr index using
MoreLikeThis queries.
The classification can be either applied to a complete document
(text in a given language) which is the default behavior or to a
specific portion of the text (using a TextAnnotation).
The newest version!
<?xml version="1.0" encoding="UTF-8" ?> <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. --> <!-- For more details about configurations options that may appear in this file, see http://wiki.apache.org/solr/SolrConfigXml. --> <config> <!-- In all configuration below, a prefix of "solr." for class names is an alias that causes solr to search appropriate packages, including org.apache.solr.(search|update|request|core|analysis) You may also specify a fully qualified Java classname if you have your own custom plugins. --> <!-- Set this to 'false' if you want solr to continue working after it has encountered an severe configuration error. In a production environment, you may want solr to keep working even if one handler is mis-configured. You may also set this to false using by setting the system property: -Dsolr.abortOnConfigurationError=false --> <abortOnConfigurationError>${solr.abortOnConfigurationError:true}</abortOnConfigurationError> <!-- Controls what version of Lucene various components of Solr adhere to. Generally, you want to use the latest version to get all bug fixes and improvements. It is highly recommended that you fully re-index after changing this setting as it can affect both how text is indexed and queried. --> <luceneMatchVersion>LUCENE_42</luceneMatchVersion> <dataDir>${solr.data.dir:}</dataDir> <directoryFactory name="DirectoryFactory" class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}"/> <indexConfig> <!-- maxFieldLength was removed in 4.0. To get similar behavior, include a LimitTokenCountFilterFactory in your fieldType definition. E.g. <filter class="solr.LimitTokenCountFilterFactory" maxTokenCount="10000"/> --> <!-- Maximum time to wait for a write lock (ms) for an IndexWriter. Default: 1000 --> <writeLockTimeout>5000</writeLockTimeout> </indexConfig> <!-- The default high-performance update handler --> <updateHandler class="solr.DirectUpdateHandler2"> <!-- Deactivate transaction log <updateLog> <str name="dir">${solr.ulog.dir:}</str> </updateLog > --> <!-- no auto commit <autoCommit> <maxTime>15000</maxTime> <openSearcher>false</openSearcher> </autoCommit> --> <!-- <autoSoftCommit> <maxTime>1000</maxTime> </autoSoftCommit> --> </updateHandler> <query> <maxBooleanClauses>1024</maxBooleanClauses> <filterCache class="solr.FastLRUCache" size="2048" initialSize="1024" autowarmCount="512"/> <queryResultCache class="solr.LRUCache" size="2048" initialSize="1024" autowarmCount="512"/> <documentCache class="solr.LRUCache" size="4096" initialSize="1024" autowarmCount="0"/> <!-- <fieldValueCache class="solr.FastLRUCache" size="512" autowarmCount="128" showItems="32" /> --> <enableLazyFieldLoading>true</enableLazyFieldLoading> <!-- Result Window Size An optimization for use with the queryResultCache. When a search is requested, a superset of the requested number of document ids are collected. For example, if a search for a particular query requests matching documents 10 through 19, and queryWindowSize is 50, then documents 0 through 49 will be collected and cached. Any further requests in that range can be satisfied via the cache. --> <queryResultWindowSize>20</queryResultWindowSize> <!-- Maximum number of documents to cache for any entry in the queryResultCache. --> <queryResultMaxDocsCached>200</queryResultMaxDocsCached> <!-- QuerySenderListener takes an array of NamedList and executes a local query request for each NamedList in sequence. --> <listener event="newSearcher" class="solr.QuerySenderListener"> <arr name="queries"> <!-- <lst><str name="q">solr</str><str name="sort">price asc</str></lst> <lst><str name="q">rocks</str><str name="sort">weight asc</str></lst> --> </arr> </listener> <listener event="firstSearcher" class="solr.QuerySenderListener"> <arr name="queries"> <lst> <str name="q">static firstSearcher warming in solrconfig.xml</str> </lst> </arr> </listener> <!-- Use Cold Searcher If a search request comes in and there is no current registered searcher, then immediately register the still warming searcher and use it. If "false" then all requests will block until the first searcher is done warming. --> <useColdSearcher>false</useColdSearcher> <!-- Max Warming Searchers Maximum number of searchers that may be warming in the background concurrently. An error is returned if this limit is exceeded. Recommend values of 1-2 for read-only slaves, higher for masters w/o cache warming. --> <maxWarmingSearchers>2</maxWarmingSearchers> </query> <requestDispatcher handleSelect="false" > <requestParsers enableRemoteStreaming="true" multipartUploadLimitInKB="2048000" formdataUploadLimitInKB="2048"/> <httpCaching never304="true" /> </requestDispatcher> <!-- Request Handlers http://wiki.apache.org/solr/SolrRequestHandler --> <!-- SearchHandler http://wiki.apache.org/solr/SearchHandler --> <requestHandler name="/select" class="solr.SearchHandler"> <!-- default values for query parameters can be specified, these will be overridden by parameters in the request --> <lst name="defaults"> <str name="echoParams">explicit</str> <int name="rows">10</int> </lst> </requestHandler> <!-- Request Handler for similarity queries and topic classification --> <requestHandler name="/mlt" class="solr.MoreLikeThisHandler" startup="lazy" /> <!-- A request handler that returns indented JSON by default --> <requestHandler name="/query" class="solr.SearchHandler"> <lst name="defaults"> <str name="echoParams">explicit</str> <str name="wt">json</str> <str name="indent">true</str> <str name="df">text</str> </lst> </requestHandler> <!-- realtime get handler, guaranteed to return the latest stored fields of any document, without the need to commit or open a new searcher. The current implementation relies on the updateLog feature being enabled. --> <requestHandler name="/get" class="solr.RealTimeGetHandler"> <lst name="defaults"> <str name="omitHeader">true</str> <str name="wt">json</str> <str name="indent">true</str> </lst> </requestHandler> <!-- Update Request Handler. http://wiki.apache.org/solr/UpdateXmlMessages --> <requestHandler name="/update" class="solr.UpdateRequestHandler" /> <!-- for back compat with clients using /update/json and /update/csv --> <requestHandler name="/update/json" class="solr.JsonUpdateRequestHandler"> <lst name="defaults"> <str name="stream.contentType">application/json</str> </lst> </requestHandler> <requestHandler name="/update/csv" class="solr.CSVRequestHandler"> <lst name="defaults"> <str name="stream.contentType">application/csv</str> </lst> </requestHandler> <!-- Solr Cell Update Request Handler http://wiki.apache.org/solr/ExtractingRequestHandler --> <requestHandler name="/update/extract" startup="lazy" class="solr.extraction.ExtractingRequestHandler" > <lst name="defaults"> <str name="lowernames">true</str> <str name="uprefix">ignored_</str> <!-- capture link hrefs but ignore div attributes --> <str name="captureAttr">true</str> <str name="fmap.a">links</str> <str name="fmap.div">ignored_</str> </lst> </requestHandler> <!-- Field Analysis Request Handler RequestHandler that provides much the same functionality as analysis.jsp. Provides the ability to specify multiple field types and field names in the same request and outputs index-time and query-time analysis for each of them. Request parameters are: analysis.fieldname - field name whose analyzers are to be used analysis.fieldtype - field type whose analyzers are to be used analysis.fieldvalue - text for index-time analysis q (or analysis.q) - text for query time analysis analysis.showmatch (true|false) - When set to true and when query analysis is performed, the produced tokens of the field value analysis will be marked as "matched" for every token that is produces by the query analysis --> <requestHandler name="/analysis/field" startup="lazy" class="solr.FieldAnalysisRequestHandler" /> <!-- Document Analysis Handler http://wiki.apache.org/solr/AnalysisRequestHandler An analysis handler that provides a breakdown of the analysis process of provided documents. This handler expects a (single) content stream with the following format: <docs> <doc> <field name="id">1</field> <field name="name">The Name</field> <field name="text">The Text Value</field> </doc> <doc>...</doc> <doc>...</doc> ... </docs> Note: Each document must contain a field which serves as the unique key. This key is used in the returned response to associate an analysis breakdown to the analyzed document. Like the FieldAnalysisRequestHandler, this handler also supports query analysis by sending either an "analysis.query" or "q" request parameter that holds the query text to be analyzed. It also supports the "analysis.showmatch" parameter which when set to true, all field tokens that match the query tokens will be marked as a "match". --> <requestHandler name="/analysis/document" class="solr.DocumentAnalysisRequestHandler" startup="lazy" /> <!-- Admin Handlers Admin Handlers - This will register all the standard admin RequestHandlers. --> <requestHandler name="/admin/" class="solr.admin.AdminHandlers" /> <!-- ping/healthcheck --> <requestHandler name="/admin/ping" class="solr.PingRequestHandler"> <lst name="invariants"> <str name="q">solrpingquery</str> </lst> <lst name="defaults"> <str name="echoParams">all</str> </lst> <!-- An optional feature of the PingRequestHandler is to configure the handler with a "healthcheckFile" which can be used to enable/disable the PingRequestHandler. relative paths are resolved against the data dir --> <!-- <str name="healthcheckFile">server-enabled.txt</str> --> </requestHandler> <!-- Echo the request contents back to the client --> <requestHandler name="/debug/dump" class="solr.DumpRequestHandler" > <lst name="defaults"> <str name="echoParams">explicit</str> <str name="echoHandler">true</str> </lst> </requestHandler> <!-- Solr Replication The SolrReplicationHandler supports replicating indexes from a "master" used for indexing and "slaves" used for queries. http://wiki.apache.org/solr/SolrReplication It is also neccessary for SolrCloud to function (in Cloud mode, the replication handler is used to bulk transfer segments when nodes are added or need to recover). https://wiki.apache.org/solr/SolrCloud/ --> <requestHandler name="/replication" class="solr.ReplicationHandler" > <!-- To enable simple master/slave replication, uncomment one of the sections below, depending on wether this solr instance should be the "master" or a "slave". If this instance is a "slave" you will also need to fill in the masterUrl to point to a real machine. --> <!-- <lst name="master"> <str name="replicateAfter">commit</str> <str name="replicateAfter">startup</str> <str name="confFiles">schema.xml,stopwords.txt</str> </lst> --> <!-- <lst name="slave"> <str name="masterUrl">http://your-master-hostname:8983/solr</str> <str name="pollInterval">00:00:60</str> </lst> --> </requestHandler> <!-- Spell Check The spell check component can return a list of alternative spelling suggestions. http://wiki.apache.org/solr/SpellCheckComponent --> <searchComponent name="spellcheck" class="solr.SpellCheckComponent"> <str name="queryAnalyzerFieldType">textSpell</str> <!-- Multiple "Spell Checkers" can be declared and used by this component --> <!-- a spellchecker built from a field of the main index --> <lst name="spellchecker"> <str name="name">default</str> <str name="field">name</str> <str name="classname">solr.DirectSolrSpellChecker</str> <!-- the spellcheck distance measure used, the default is the internal levenshtein --> <str name="distanceMeasure">internal</str> <!-- minimum accuracy needed to be considered a valid spellcheck suggestion --> <float name="accuracy">0.5</float> <!-- the maximum #edits we consider when enumerating terms: can be 1 or 2 --> <int name="maxEdits">2</int> <!-- the minimum shared prefix when enumerating terms --> <int name="minPrefix">1</int> <!-- maximum number of inspections per result. --> <int name="maxInspections">5</int> <!-- minimum length of a query term to be considered for correction --> <int name="minQueryLength">4</int> <!-- maximum threshold of documents a query term can appear to be considered for correction --> <float name="maxQueryFrequency">0.01</float> <!-- uncomment this to require suggestions to occur in 1% of the documents <float name="thresholdTokenFrequency">.01</float> --> </lst> <!-- a spellchecker that can break or combine words. See "/spell" handler below for usage --> <lst name="spellchecker"> <str name="name">wordbreak</str> <str name="classname">solr.WordBreakSolrSpellChecker</str> <str name="field">name</str> <str name="combineWords">true</str> <str name="breakWords">true</str> <int name="maxChanges">10</int> </lst> <!-- a spellchecker that uses a different distance measure --> <!-- <lst name="spellchecker"> <str name="name">jarowinkler</str> <str name="field">spell</str> <str name="classname">solr.DirectSolrSpellChecker</str> <str name="distanceMeasure"> org.apache.lucene.search.spell.JaroWinklerDistance </str> </lst> --> <!-- a spellchecker that use an alternate comparator comparatorClass be one of: 1. score (default) 2. freq (Frequency first, then score) 3. A fully qualified class name --> <!-- <lst name="spellchecker"> <str name="name">freq</str> <str name="field">lowerfilt</str> <str name="classname">solr.DirectSolrSpellChecker</str> <str name="comparatorClass">freq</str> --> <!-- A spellchecker that reads the list of words from a file --> <!-- <lst name="spellchecker"> <str name="classname">solr.FileBasedSpellChecker</str> <str name="name">file</str> <str name="sourceLocation">spellings.txt</str> <str name="characterEncoding">UTF-8</str> <str name="spellcheckIndexDir">spellcheckerFile</str> </lst> --> </searchComponent> <!-- A request handler for demonstrating the spellcheck component. NOTE: This is purely as an example. The whole purpose of the SpellCheckComponent is to hook it into the request handler that handles your normal user queries so that a separate request is not needed to get suggestions. IN OTHER WORDS, THERE IS REALLY GOOD CHANCE THE SETUP BELOW IS NOT WHAT YOU WANT FOR YOUR PRODUCTION SYSTEM! See http://wiki.apache.org/solr/SpellCheckComponent for details on the request parameters. --> <requestHandler name="/spell" class="solr.SearchHandler" startup="lazy"> <lst name="defaults"> <str name="df">text</str> <!-- Solr will use suggestions from both the 'default' spellchecker and from the 'wordbreak' spellchecker and combine them. collations (re-written queries) can include a combination of corrections from both spellcheckers --> <str name="spellcheck.dictionary">default</str> <str name="spellcheck.dictionary">wordbreak</str> <str name="spellcheck">on</str> <str name="spellcheck.extendedResults">true</str> <str name="spellcheck.count">10</str> <str name="spellcheck.alternativeTermCount">5</str> <str name="spellcheck.maxResultsForSuggest">5</str> <str name="spellcheck.collate">true</str> <str name="spellcheck.collateExtendedResults">true</str> <str name="spellcheck.maxCollationTries">10</str> <str name="spellcheck.maxCollations">5</str> </lst> <arr name="last-components"> <str>spellcheck</str> </arr> </requestHandler> <!-- Term Vector Component http://wiki.apache.org/solr/TermVectorComponent --> <searchComponent name="tvComponent" class="solr.TermVectorComponent"/> <!-- Clustering Component http://wiki.apache.org/solr/ClusteringComponent You'll need to set the solr.clustering.enabled system property when running solr to run with clustering enabled: java -Dsolr.clustering.enabled=true -jar start.jar --> <searchComponent name="clustering" enable="${solr.clustering.enabled:false}" class="solr.clustering.ClusteringComponent" > <!-- Declare an engine --> <lst name="engine"> <!-- The name, only one can be named "default" --> <str name="name">default</str> <!-- Class name of Carrot2 clustering algorithm. Currently available algorithms are: * org.carrot2.clustering.lingo.LingoClusteringAlgorithm * org.carrot2.clustering.stc.STCClusteringAlgorithm * org.carrot2.clustering.kmeans.BisectingKMeansClusteringAlgorithm See http://project.carrot2.org/algorithms.html for the algorithm's characteristics. --> <str name="carrot.algorithm">org.carrot2.clustering.lingo.LingoClusteringAlgorithm</str> <!-- Overriding values for Carrot2 default algorithm attributes. For a description of all available attributes, see: http://download.carrot2.org/stable/manual/#chapter.components. Use attribute key as name attribute of str elements below. These can be further overridden for individual requests by specifying attribute key as request parameter name and attribute value as parameter value. --> <str name="LingoClusteringAlgorithm.desiredClusterCountBase">20</str> <!-- Location of Carrot2 lexical resources. A directory from which to load Carrot2-specific stop words and stop labels. Absolute or relative to Solr config directory. If a specific resource (e.g. stopwords.en) is present in the specified dir, it will completely override the corresponding default one that ships with Carrot2. For an overview of Carrot2 lexical resources, see: http://download.carrot2.org/head/manual/#chapter.lexical-resources --> <str name="carrot.lexicalResourcesDir">clustering/carrot2</str> <!-- The language to assume for the documents. For a list of allowed values, see: http://download.carrot2.org/stable/manual/#section.attribute.lingo.MultilingualClustering.defaultLanguage --> <str name="MultilingualClustering.defaultLanguage">ENGLISH</str> </lst> <lst name="engine"> <str name="name">stc</str> <str name="carrot.algorithm">org.carrot2.clustering.stc.STCClusteringAlgorithm</str> </lst> </searchComponent> <!-- A request handler for demonstrating the clustering component This is purely as an example. In reality you will likely want to add the component to your already specified request handlers. --> <requestHandler name="/clustering" startup="lazy" enable="${solr.clustering.enabled:false}" class="solr.SearchHandler"> <lst name="defaults"> <bool name="clustering">true</bool> <str name="clustering.engine">default</str> <bool name="clustering.results">true</bool> <!-- The title field --> <str name="carrot.title">name</str> <str name="carrot.url">id</str> <!-- The field to cluster on --> <str name="carrot.snippet">features</str> <!-- produce summaries --> <bool name="carrot.produceSummary">true</bool> <!-- the maximum number of labels per cluster --> <!--<int name="carrot.numDescriptions">5</int>--> <!-- produce sub clusters --> <bool name="carrot.outputSubClusters">false</bool> <str name="defType">edismax</str> <str name="qf"> text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 </str> <str name="q.alt">*:*</str> <str name="rows">10</str> <str name="fl">*,score</str> </lst> <arr name="last-components"> <str>clustering</str> </arr> </requestHandler> <!-- Terms Component http://wiki.apache.org/solr/TermsComponent A component to return terms and document frequency of those terms --> <searchComponent name="terms" class="solr.TermsComponent"/> <!-- A request handler for demonstrating the terms component --> <requestHandler name="/terms" class="solr.SearchHandler" startup="lazy"> <lst name="defaults"> <bool name="terms">true</bool> <bool name="distrib">false</bool> </lst> <arr name="components"> <str>terms</str> </arr> </requestHandler> <!-- Update Processors Chains of Update Processor Factories for dealing with Update Requests can be declared, and then used by name in Update Request Processors http://wiki.apache.org/solr/UpdateRequestProcessor --> <queryResponseWriter name="json" class="solr.JSONResponseWriter"> <!-- For the purposes of the tutorial, JSON responses are written as plain text so that they are easy to read in *any* browser. If you expect a MIME type of "application/json" just remove this override. --> <str name="content-type">text/plain; charset=UTF-8</str> </queryResponseWriter> <!-- Custom response writers can be declared as needed... --> <queryResponseWriter name="velocity" class="solr.VelocityResponseWriter" startup="lazy"/> <!-- XSLT response writer transforms the XML output by any xslt file found in Solr's conf/xslt directory. Changes to xslt files are checked for every xsltCacheLifetimeSeconds. --> <queryResponseWriter name="xslt" class="solr.XSLTResponseWriter"> <int name="xsltCacheLifetimeSeconds">5</int> </queryResponseWriter> <!-- Query Parsers http://wiki.apache.org/solr/SolrQuerySyntax Multiple QParserPlugins can be registered by name, and then used in either the "defType" param for the QueryComponent (used by SearchHandler) or in LocalParams --> <!-- Legacy config for the admin interface --> <admin> <defaultQuery>*:*</defaultQuery> </admin> </config>