All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.elasticsearch.indices.analysis.PreBuiltAnalyzers Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to Elasticsearch under one or more contributor
 * license agreements. See the NOTICE file distributed with
 * this work for additional information regarding copyright
 * ownership. Elasticsearch licenses this file to you under
 * the Apache License, Version 2.0 (the "License"); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.elasticsearch.indices.analysis;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.ar.ArabicAnalyzer;
import org.apache.lucene.analysis.bg.BulgarianAnalyzer;
import org.apache.lucene.analysis.br.BrazilianAnalyzer;
import org.apache.lucene.analysis.ca.CatalanAnalyzer;
import org.apache.lucene.analysis.cjk.CJKAnalyzer;
import org.apache.lucene.analysis.ckb.SoraniAnalyzer;
import org.apache.lucene.analysis.core.KeywordAnalyzer;
import org.apache.lucene.analysis.core.SimpleAnalyzer;
import org.apache.lucene.analysis.core.StopAnalyzer;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.analysis.cz.CzechAnalyzer;
import org.apache.lucene.analysis.da.DanishAnalyzer;
import org.apache.lucene.analysis.de.GermanAnalyzer;
import org.apache.lucene.analysis.el.GreekAnalyzer;
import org.apache.lucene.analysis.en.EnglishAnalyzer;
import org.apache.lucene.analysis.es.SpanishAnalyzer;
import org.apache.lucene.analysis.eu.BasqueAnalyzer;
import org.apache.lucene.analysis.fa.PersianAnalyzer;
import org.apache.lucene.analysis.fi.FinnishAnalyzer;
import org.apache.lucene.analysis.fr.FrenchAnalyzer;
import org.apache.lucene.analysis.ga.IrishAnalyzer;
import org.apache.lucene.analysis.gl.GalicianAnalyzer;
import org.apache.lucene.analysis.hi.HindiAnalyzer;
import org.apache.lucene.analysis.hu.HungarianAnalyzer;
import org.apache.lucene.analysis.hy.ArmenianAnalyzer;
import org.apache.lucene.analysis.id.IndonesianAnalyzer;
import org.apache.lucene.analysis.it.ItalianAnalyzer;
import org.apache.lucene.analysis.lt.LithuanianAnalyzer;
import org.apache.lucene.analysis.lv.LatvianAnalyzer;
import org.apache.lucene.analysis.nl.DutchAnalyzer;
import org.apache.lucene.analysis.no.NorwegianAnalyzer;
import org.apache.lucene.analysis.pt.PortugueseAnalyzer;
import org.apache.lucene.analysis.ro.RomanianAnalyzer;
import org.apache.lucene.analysis.ru.RussianAnalyzer;
import org.apache.lucene.analysis.standard.ClassicAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.sv.SwedishAnalyzer;
import org.apache.lucene.analysis.th.ThaiAnalyzer;
import org.apache.lucene.analysis.tr.TurkishAnalyzer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.elasticsearch.Version;
import org.elasticsearch.common.regex.Regex;
import org.elasticsearch.index.analysis.PatternAnalyzer;
import org.elasticsearch.index.analysis.StandardHtmlStripAnalyzer;
import org.elasticsearch.index.analysis.SnowballAnalyzer;
import org.elasticsearch.indices.analysis.PreBuiltCacheFactory.CachingStrategy;

import java.util.Locale;

/**
 *
 */
public enum PreBuiltAnalyzers {

    STANDARD(CachingStrategy.ELASTICSEARCH) { // we don't do stopwords anymore from 1.0Beta on
        @Override
        protected Analyzer create(Version version) {
            final Analyzer a;
            if (version.onOrAfter(Version.V_1_0_0_Beta1)) {
                a = new StandardAnalyzer(CharArraySet.EMPTY_SET);
            } else {
                a = new StandardAnalyzer();
            }
            a.setVersion(version.luceneVersion);
            return a;        
        }
    },

    DEFAULT(CachingStrategy.ELASTICSEARCH){
        @Override
        protected Analyzer create(Version version) {
            // by calling get analyzer we are ensuring reuse of the same STANDARD analyzer for DEFAULT!
            // this call does not create a new instance
            return STANDARD.getAnalyzer(version);
        }
    },

    KEYWORD(CachingStrategy.ONE) {
        @Override
        protected Analyzer create(Version version) {
            return new KeywordAnalyzer();
        }
    },

    STOP {
        @Override
        protected Analyzer create(Version version) {
            Analyzer a = new StopAnalyzer();
            a.setVersion(version.luceneVersion);
            return a;
        }
    },

    WHITESPACE {
        @Override
        protected Analyzer create(Version version) {
            Analyzer a = new WhitespaceAnalyzer();
            a.setVersion(version.luceneVersion);
            return a;
        }
    },

    SIMPLE {
        @Override
        protected Analyzer create(Version version) {
            Analyzer a = new SimpleAnalyzer();
            a.setVersion(version.luceneVersion);
            return a;
        }
    },

    CLASSIC {
        @Override
        protected Analyzer create(Version version) {
            Analyzer a = new ClassicAnalyzer();
            a.setVersion(version.luceneVersion);
            return a;
        }
    },

    SNOWBALL {
        @Override
        protected Analyzer create(Version version) {
            Analyzer analyzer = new SnowballAnalyzer("English", StopAnalyzer.ENGLISH_STOP_WORDS_SET);
            analyzer.setVersion(version.luceneVersion);
            return analyzer;
        }
    },

    PATTERN(CachingStrategy.ELASTICSEARCH) {
        @Override
        protected Analyzer create(Version version) {
            if (version.onOrAfter(Version.V_1_0_0_RC1)) {
                return new PatternAnalyzer(Regex.compile("\\W+" /*PatternAnalyzer.NON_WORD_PATTERN*/, null), true, CharArraySet.EMPTY_SET);
            }
            return new PatternAnalyzer(Regex.compile("\\W+" /*PatternAnalyzer.NON_WORD_PATTERN*/, null), true, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
        }
    },

    STANDARD_HTML_STRIP(CachingStrategy.ELASTICSEARCH) {
        @Override
        protected Analyzer create(Version version) {
            final Analyzer analyzer;
            if (version.onOrAfter(Version.V_1_0_0_RC1)) {
                analyzer = new StandardHtmlStripAnalyzer(CharArraySet.EMPTY_SET);
            } else {
                analyzer = new StandardHtmlStripAnalyzer();
            }
            analyzer.setVersion(version.luceneVersion);
            return analyzer;
        }
    },

    ARABIC {
        @Override
        protected Analyzer create(Version version) {
            Analyzer a = new ArabicAnalyzer();
            a.setVersion(version.luceneVersion);
            return a;
        }
    },

    ARMENIAN {
        @Override
        protected Analyzer create(Version version) {
            Analyzer a = new ArmenianAnalyzer();
            a.setVersion(version.luceneVersion);
            return a;
        }
    },

    BASQUE {
        @Override
        protected Analyzer create(Version version) {
            Analyzer a = new BasqueAnalyzer();
            a.setVersion(version.luceneVersion);
            return a;
        }
    },

    BRAZILIAN {
        @Override
        protected Analyzer create(Version version) {
            Analyzer a = new BrazilianAnalyzer();
            a.setVersion(version.luceneVersion);
            return a;
        }
    },

    BULGARIAN {
        @Override
        protected Analyzer create(Version version) {
            Analyzer a = new BulgarianAnalyzer();
            a.setVersion(version.luceneVersion);
            return a;
        }
    },

    CATALAN {
        @Override
        protected Analyzer create(Version version) {
            Analyzer a = new CatalanAnalyzer();
            a.setVersion(version.luceneVersion);
            return a;
        }
    },

    CHINESE(CachingStrategy.ONE) {
        @Override
        protected Analyzer create(Version version) {
            Analyzer a = new StandardAnalyzer();
            a.setVersion(version.luceneVersion);
            return a;
        }
    },

    CJK {
        @Override
        protected Analyzer create(Version version) {
            Analyzer a = new CJKAnalyzer();
            a.setVersion(version.luceneVersion);
            return a;
        }
    },

    CZECH {
        @Override
        protected Analyzer create(Version version) {
            Analyzer a = new CzechAnalyzer();
            a.setVersion(version.luceneVersion);
            return a;
        }
    },

    DUTCH {
        @Override
        protected Analyzer create(Version version) {
            Analyzer a = new DutchAnalyzer();
            a.setVersion(version.luceneVersion);
            return a;
        }
    },

    DANISH {
        @Override
        protected Analyzer create(Version version) {
            Analyzer a = new DanishAnalyzer();
            a.setVersion(version.luceneVersion);
            return a;
        }
    },

    ENGLISH {
        @Override
        protected Analyzer create(Version version) {
            Analyzer a = new EnglishAnalyzer();
            a.setVersion(version.luceneVersion);
            return a;
        }
    },

    FINNISH {
        @Override
        protected Analyzer create(Version version) {
            Analyzer a = new FinnishAnalyzer();
            a.setVersion(version.luceneVersion);
            return a;
        }
    },

    FRENCH {
        @Override
        protected Analyzer create(Version version) {
            Analyzer a = new FrenchAnalyzer();
            a.setVersion(version.luceneVersion);
            return a;
        }
    },

    GALICIAN {
        @Override
        protected Analyzer create(Version version) {
            Analyzer a = new GalicianAnalyzer();
            a.setVersion(version.luceneVersion);
            return a;
        }
    },

    GERMAN {
        @Override
        protected Analyzer create(Version version) {
            Analyzer a = new GermanAnalyzer();
            a.setVersion(version.luceneVersion);
            return a;
        }
    },

    GREEK {
        @Override
        protected Analyzer create(Version version) {
            Analyzer a = new GreekAnalyzer();
            a.setVersion(version.luceneVersion);
            return a;
        }
    },

    HINDI {
        @Override
        protected Analyzer create(Version version) {
            Analyzer a = new HindiAnalyzer();
            a.setVersion(version.luceneVersion);
            return a;
        }
    },

    HUNGARIAN {
        @Override
        protected Analyzer create(Version version) {
            Analyzer a = new HungarianAnalyzer();
            a.setVersion(version.luceneVersion);
            return a;
        }
    },

    INDONESIAN {
        @Override
        protected Analyzer create(Version version) {
            Analyzer a = new IndonesianAnalyzer();
            a.setVersion(version.luceneVersion);
            return a;
        }
    },

    IRISH {
        @Override
        protected Analyzer create(Version version) {
            Analyzer a = new IrishAnalyzer();
            a.setVersion(version.luceneVersion);
            return a;
        }
    },

    ITALIAN {
        @Override
        protected Analyzer create(Version version) {
            Analyzer a = new ItalianAnalyzer();
            a.setVersion(version.luceneVersion);
            return a;
        }
    },

    LATVIAN {
        @Override
        protected Analyzer create(Version version) {
            Analyzer a = new LatvianAnalyzer();
            a.setVersion(version.luceneVersion);
            return a;
        }
    },

    LITHUANIAN {
        @Override
        protected Analyzer create(Version version) {
            Analyzer a = new LithuanianAnalyzer();
            a.setVersion(version.luceneVersion);
            return a;
        }
    },

    NORWEGIAN {
        @Override
        protected Analyzer create(Version version) {
            Analyzer a = new NorwegianAnalyzer();
            a.setVersion(version.luceneVersion);
            return a;
        }
    },

    PERSIAN {
        @Override
        protected Analyzer create(Version version) {
            Analyzer a = new PersianAnalyzer();
            a.setVersion(version.luceneVersion);
            return a;
        }
    },

    PORTUGUESE {
        @Override
        protected Analyzer create(Version version) {
            Analyzer a = new PortugueseAnalyzer();
            a.setVersion(version.luceneVersion);
            return a;
        }
    },

    ROMANIAN {
        @Override
        protected Analyzer create(Version version) {
            Analyzer a = new RomanianAnalyzer();
            a.setVersion(version.luceneVersion);
            return a;
        }
    },

    RUSSIAN {
        @Override
        protected Analyzer create(Version version) {
            Analyzer a = new RussianAnalyzer();
            a.setVersion(version.luceneVersion);
            return a;
        }
    },
    
    SORANI {
        @Override
        protected Analyzer create(Version version) {
            Analyzer a = new SoraniAnalyzer();
            a.setVersion(version.luceneVersion);
            return a;
        }
    },

    SPANISH {
        @Override
        protected Analyzer create(Version version) {
            Analyzer a = new SpanishAnalyzer();
            a.setVersion(version.luceneVersion);
            return a;
        }
    },

    SWEDISH {
        @Override
        protected Analyzer create(Version version) {
            Analyzer a = new SwedishAnalyzer();
            a.setVersion(version.luceneVersion);
            return a;
        }
    },

    TURKISH {
        @Override
        protected Analyzer create(Version version) {
            Analyzer a = new TurkishAnalyzer();
            a.setVersion(version.luceneVersion);
            return a;
        }
    },

    THAI {
        @Override
        protected Analyzer create(Version version) {
            Analyzer a = new ThaiAnalyzer();
            a.setVersion(version.luceneVersion);
            return a;
        }
    };

    abstract protected Analyzer create(Version version);

    protected final PreBuiltCacheFactory.PreBuiltCache cache;

    PreBuiltAnalyzers() {
        this(PreBuiltCacheFactory.CachingStrategy.LUCENE);
    }

    PreBuiltAnalyzers(PreBuiltCacheFactory.CachingStrategy cachingStrategy) {
        cache = PreBuiltCacheFactory.getCache(cachingStrategy);
    }

    PreBuiltCacheFactory.PreBuiltCache getCache() {
        return cache;
    }

    public synchronized Analyzer getAnalyzer(Version version) {
        Analyzer analyzer = cache.get(version);
        if (analyzer == null) {
            analyzer = this.create(version);
            cache.put(version, analyzer);
        }

        return analyzer;
    }

    /**
     * Get a pre built Analyzer by its name or fallback to the default one
     * @param name Analyzer name
     * @param defaultAnalyzer default Analyzer if name not found
     */
    public static PreBuiltAnalyzers getOrDefault(String name, PreBuiltAnalyzers defaultAnalyzer) {
        try {
            return valueOf(name.toUpperCase(Locale.ROOT));
        } catch (IllegalArgumentException e) {
            return defaultAnalyzer;
        }
    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy