org.janusgraph.diskstorage.lucene.LuceneCustomAnalyzer Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of janusgraph-lucene Show documentation
There is a newer version: 1.1.0-20240822-065119.1a7f5ab
// Copyright 2018 JanusGraph Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package org.janusgraph.diskstorage.lucene;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.DelegatingAnalyzerWrapper;
import org.apache.lucene.analysis.core.KeywordAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.janusgraph.core.schema.Mapping;
import org.janusgraph.core.schema.Parameter;
import org.janusgraph.diskstorage.indexing.KeyInformation;
import org.janusgraph.graphdb.types.ParameterType;

import java.util.HashMap;
import java.util.Map;
import java.util.UUID;

/**
 * An Analyzer that allows delegating analysis to custom analyzers. The class names for the custom analyzers are
 * provided with Parameters passed to an IndexBuilder when declaring a lucene backed mixed index:
 *  * addKey(someStringProperty, Mapping.TEXT.asParameter(), Parameter.of(ParameterType.TEXT_ANALYZER.getName(),
 *        "org.apache.lucene.analysis.fr.FrenchAnalyzer"))
 * 
 * 
 * Custom analyzers are instantiated lazily and then cached for reuse.
 *
 * @author Olivier binda ([email protected])
 */
public class LuceneCustomAnalyzer extends DelegatingAnalyzerWrapper {
    private static final String KEYWORD_ANALYZER = KeywordAnalyzer.class.getName();
    private static final String STANDARD_ANALYZER = StandardAnalyzer.class.getName();

    private final String store;
    private final KeyInformation.IndexRetriever informations;

    private final Map analyzers = new HashMap<>();

    public LuceneCustomAnalyzer(String store, KeyInformation.IndexRetriever informations, ReuseStrategy fallbackStrategy) {
        super(fallbackStrategy);
        this.store = store;
        this.informations = informations;
        analyzers.put(KEYWORD_ANALYZER, new KeywordAnalyzer());
        analyzers.put(STANDARD_ANALYZER, new StandardAnalyzer());
    }

    @Override
    protected final Analyzer getWrappedAnalyzer(String fieldName) {
        if (LuceneIndex.DOCID.equals(fieldName)) {
            return analyzerFor(KEYWORD_ANALYZER);
        }
        final KeyInformation keyInformation = informations.get(store, LuceneIndex.getOrigFieldName(fieldName));
        if (keyInformation != null && keyInformation.getDataType().equals(UUID.class)) {
            return analyzerFor(KEYWORD_ANALYZER);
        }
        if (keyInformation == null || !String.class.isAssignableFrom(keyInformation.getDataType())) {
            return analyzerFor(STANDARD_ANALYZER);
        }
        final Parameter[] parameters = keyInformation.getParameters();
        final Mapping mapping;
        if (LuceneIndex.isDualFieldName(fieldName)) {
            mapping = LuceneIndex.getDualMapping(keyInformation);
        } else {
            // if mapping isn't present in parameters, we use Mapping.DEFAULT
            mapping = ParameterType.MAPPING.findParameter(parameters, Mapping.DEFAULT);
        }
        // at the moment, we only try to support custom analyzers for string data.
        // everything else falls through a StandardAnalyzer as was the case before
        return analyzerFor(analyzerNameFor(parameters, mapping, KEYWORD_ANALYZER, STANDARD_ANALYZER));
    }

    private static String analyzerNameFor(final Parameter[] parameters, final Mapping mapping, final String defaultStringAnalyzer,
                                          final String defaultTextAnalyzer) {
        switch (mapping) {
            case PREFIX_TREE:
                throw new RuntimeException("Prefix-tree is an unsupported mapping for string data & custom analyzers");
            case STRING:
                return ParameterType.STRING_ANALYZER.findParameter(parameters, defaultStringAnalyzer);
            case TEXT:
            case TEXTSTRING:
            case DEFAULT:// TEXT
                return ParameterType.TEXT_ANALYZER.findParameter(parameters, defaultTextAnalyzer);
            default:
                throw new RuntimeException("Not supported");
        }
    }

    private Analyzer analyzerFor(final String analyzerName) {
        if (!analyzers.containsKey(analyzerName)) {
            try {
                final Class classDefinition = Class.forName(analyzerName);
                analyzers.put(analyzerName, (Analyzer) classDefinition.newInstance());
            } catch (Exception e) {
                throw new RuntimeException("Analyzer cannot be instanciated for class " + analyzerName, e);
            }
        }
        return analyzers.get(analyzerName);
    }
}