All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.codelibs.fess.suggest.settings.AnalyzerSettings Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2012-2024 CodeLibs Project and the Others.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
 * either express or implied. See the License for the specific language
 * governing permissions and limitations under the License.
 */
package org.codelibs.fess.suggest.settings;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.regex.Pattern;
import java.util.stream.Stream;

import org.codelibs.core.io.ResourceUtil;
import org.codelibs.core.lang.StringUtil;
import org.codelibs.fess.suggest.analysis.SuggestAnalyzer;
import org.codelibs.fess.suggest.constants.FieldNames;
import org.codelibs.fess.suggest.exception.SuggestSettingsException;
import org.codelibs.fess.suggest.util.SuggestUtil;
import org.opensearch.action.admin.indices.analyze.AnalyzeAction;
import org.opensearch.action.admin.indices.analyze.AnalyzeAction.AnalyzeToken;
import org.opensearch.action.admin.indices.exists.indices.IndicesExistsResponse;
import org.opensearch.action.admin.indices.settings.get.GetSettingsResponse;
import org.opensearch.action.search.SearchResponse;
import org.opensearch.client.Client;
import org.opensearch.common.settings.Settings;
import org.opensearch.common.xcontent.XContentType;
import org.opensearch.index.query.QueryBuilders;
import org.opensearch.search.SearchHit;

public class AnalyzerSettings {
    public static final String READING_ANALYZER = "reading_analyzer";
    public static final String READING_TERM_ANALYZER = "reading_term_analyzer";
    public static final String NORMALIZE_ANALYZER = "normalize_analyzer";
    public static final String CONTENTS_ANALYZER = "contents_analyzer";
    public static final String CONTENTS_READING_ANALYZER = "contents_reading_analyzer";
    public static final String FIELD_ANALYZER_MAPPING = "fieldAnalyzerMapping";

    public static final String DOC_TYPE_NAME = "_doc";

    protected final Client client;
    protected final String analyzerSettingsIndexName;
    private final SuggestSettings settings;

    protected static Map> analyzerMap = new ConcurrentHashMap<>();
    protected static Map> fieldAnalyzerMappingMap = new ConcurrentHashMap<>();

    protected static final String[] SUPPORTED_LANGUAGES = { "ar", "bg", "bn", "ca", "cs", "da", "de", "el", "en", "es", "et", "fa", "fi",
            "fr", "gu", "he", "hi", "hr", "hu", "id", "it", "ja", "ko", "lt", "lv", "mk", "ml", "nl", "no", "pa", "pl", "pt", "ro", "ru",
            "si", "sq", "sv", "ta", "te", "th", "tl", "tr", "uk", "ur", "vi", "zh-cn", "zh-tw" };

    public AnalyzerSettings(final Client client, final SuggestSettings settings, final String settingsIndexName) {
        this.client = client;
        this.settings = settings;
        analyzerSettingsIndexName = createAnalyzerSettingsIndexName(settingsIndexName);
    }

    public synchronized void init() {
        try {
            final IndicesExistsResponse response =
                    client.admin().indices().prepareExists(analyzerSettingsIndexName).execute().actionGet(settings.getIndicesTimeout());
            if (!response.isExists()) {
                createAnalyzerSettings(loadIndexSettings(), loadIndexMapping());
            }
            analyzerMap.put(analyzerSettingsIndexName, getAnalyzerNames());
            fieldAnalyzerMappingMap.put(analyzerSettingsIndexName, getFieldAnalyzerMapping());
        } catch (final IOException e) {
            throw new SuggestSettingsException("Failed to create mappings.");
        }
    }

    public String getAnalyzerSettingsIndexName() {
        return analyzerSettingsIndexName;
    }

    public String getReadingAnalyzerName(final String field, final String lang) {
        final Map fieldAnalyzerMapping = fieldAnalyzerMappingMap.get(analyzerSettingsIndexName);
        final Set analyzerNames = analyzerMap.get(analyzerSettingsIndexName);
        final String analyzerName;
        if (StringUtil.isNotBlank(field) && fieldAnalyzerMapping.containsKey(field)
                && fieldAnalyzerMapping.get(field).readingAnalyzer != null) {
            analyzerName = fieldAnalyzerMapping.get(field).readingAnalyzer;
        } else {
            analyzerName = READING_ANALYZER;
        }
        final String analyzerNameWithlang = isSupportedLanguage(lang) ? analyzerName + '_' + lang : analyzerName;
        if (analyzerNames.contains(analyzerNameWithlang)) {
            return analyzerNameWithlang;
        }
        return analyzerName;
    }

    public String getReadingTermAnalyzerName(final String field, final String lang) {
        final Map fieldAnalyzerMapping = fieldAnalyzerMappingMap.get(analyzerSettingsIndexName);
        final Set analyzerNames = analyzerMap.get(analyzerSettingsIndexName);
        final String analyzerName;
        if (StringUtil.isNotBlank(field) && fieldAnalyzerMapping.containsKey(field)
                && fieldAnalyzerMapping.get(field).readingTermAnalyzer != null) {
            analyzerName = fieldAnalyzerMapping.get(field).readingTermAnalyzer;
        } else {
            analyzerName = READING_TERM_ANALYZER;
        }
        final String analyzerNameWithlang = isSupportedLanguage(lang) ? analyzerName + '_' + lang : analyzerName;
        if (analyzerNames.contains(analyzerNameWithlang)) {
            return analyzerNameWithlang;
        }
        return analyzerName;
    }

    public String getNormalizeAnalyzerName(final String field, final String lang) {
        final Map fieldAnalyzerMapping = fieldAnalyzerMappingMap.get(analyzerSettingsIndexName);
        final Set analyzerNames = analyzerMap.get(analyzerSettingsIndexName);
        final String analyzerName;
        if (StringUtil.isNotBlank(field) && fieldAnalyzerMapping.containsKey(field)
                && fieldAnalyzerMapping.get(field).normalizeAnalyzer != null) {
            analyzerName = fieldAnalyzerMapping.get(field).normalizeAnalyzer;
        } else {
            analyzerName = NORMALIZE_ANALYZER;
        }
        final String analyzerNameWithlang = isSupportedLanguage(lang) ? analyzerName + '_' + lang : analyzerName;
        if (analyzerNames.contains(analyzerNameWithlang)) {
            return analyzerNameWithlang;
        }
        return analyzerName;
    }

    public String getContentsAnalyzerName(final String field, final String lang) {
        final Map fieldAnalyzerMapping = fieldAnalyzerMappingMap.get(analyzerSettingsIndexName);
        final Set analyzerNames = analyzerMap.get(analyzerSettingsIndexName);
        final String analyzerName;
        if (StringUtil.isNotBlank(field) && fieldAnalyzerMapping.containsKey(field)
                && fieldAnalyzerMapping.get(field).contentsAnalyzer != null) {
            analyzerName = fieldAnalyzerMapping.get(field).contentsAnalyzer;
        } else {
            analyzerName = CONTENTS_ANALYZER;
        }
        final String analyzerNameWithlang = isSupportedLanguage(lang) ? analyzerName + '_' + lang : analyzerName;
        if (analyzerNames.contains(analyzerNameWithlang)) {
            return analyzerNameWithlang;
        }
        return analyzerName;
    }

    public String getContentsReadingAnalyzerName(final String field, final String lang) {
        final Map fieldAnalyzerMapping = fieldAnalyzerMappingMap.get(analyzerSettingsIndexName);
        final Set analyzerNames = analyzerMap.get(analyzerSettingsIndexName);
        final String analyzerName;
        if (StringUtil.isNotBlank(field) && fieldAnalyzerMapping.containsKey(field)
                && fieldAnalyzerMapping.get(field).contentsReadingAnalyzer != null) {
            analyzerName = fieldAnalyzerMapping.get(field).contentsReadingAnalyzer;
        } else {
            analyzerName = CONTENTS_READING_ANALYZER;
        }
        final String analyzerNameWithlang = isSupportedLanguage(lang) ? analyzerName + '_' + lang : analyzerName;
        if (analyzerNames.contains(analyzerNameWithlang)) {
            return analyzerNameWithlang;
        }
        return analyzerName;
    }

    public void updateAnalyzer(final Map settings) {
        client.admin().indices().prepareCreate(analyzerSettingsIndexName).setSettings(settings).execute()
                .actionGet(this.settings.getIndicesTimeout());
    }

    protected void deleteAnalyzerSettings() {
        client.admin().indices().prepareDelete(analyzerSettingsIndexName).execute().actionGet(settings.getIndicesTimeout());
    }

    protected void createAnalyzerSettings(final String settings, final String mappings) {
        client.admin().indices().prepareCreate(analyzerSettingsIndexName).setSettings(settings, XContentType.JSON).setMapping(mappings)
                .execute().actionGet(this.settings.getIndicesTimeout());
    }

    protected void createAnalyzerSettings(final Map settings, final Map mappings) {
        client.admin().indices().prepareCreate(analyzerSettingsIndexName).setSettings(settings).execute()
                .actionGet(this.settings.getIndicesTimeout());
    }

    protected String createAnalyzerSettingsIndexName(final String settingsIndexName) {
        return settingsIndexName + "_analyzer";
    }

    protected String loadIndexSettings() throws IOException {
        final String dictionaryPath = System.getProperty("fess.dictionary.path", StringUtil.EMPTY);
        final StringBuilder sb = new StringBuilder();
        try (BufferedReader br =
                new BufferedReader(new InputStreamReader(this.getClass().getClassLoader().getResourceAsStream(getSuggestAnalyzerPath())))) {

            String line;
            while ((line = br.readLine()) != null) {
                sb.append(line);
            }
        }
        return sb.toString().replaceAll(Pattern.quote("${fess.dictionary.path}"), dictionaryPath);
    }

    protected String getSuggestAnalyzerPath() {
        final Object typeObj = settings.get("search_engine.type");
        if (typeObj != null) {
            final String path = "suggest_indices/_" + typeObj.toString() + "/suggest_analyzer.json";
            if (ResourceUtil.getResourceNoException(path) != null) {
                return path;
            }
        }
        return "suggest_indices/suggest_analyzer.json";
    }

    protected String loadIndexMapping() throws IOException {
        final StringBuilder sb = new StringBuilder();
        try (BufferedReader br = new BufferedReader(new InputStreamReader(
                this.getClass().getClassLoader().getResourceAsStream("suggest_indices/analyzer/mapping-default.json")));) {

            String line;
            while ((line = br.readLine()) != null) {
                sb.append(line);
            }
        }
        return sb.toString();
    }

    public class DefaultContentsAnalyzer implements SuggestAnalyzer {

        private final int maxContentLenth = settings.getAsInt(SuggestSettings.DefaultKeys.MAX_CONTENT_LENGTH, 50000);

        @Override
        public List analyze(final String text, final String field, final String lang) {
            if (text == null || text.length() > maxContentLenth) {
                return Collections.emptyList();
            }
            final AnalyzeAction.Response analyzeResponse = client.admin().indices().prepareAnalyze(analyzerSettingsIndexName, text)
                    .setAnalyzer(getContentsAnalyzerName(field, lang)).execute().actionGet(settings.getIndicesTimeout());
            return analyzeResponse.getTokens();
        }

        @Override
        public List analyzeAndReading(final String text, final String field, final String lang) {
            try {
                final String contentsReadingAnalyzerName = getContentsReadingAnalyzerName(field, lang);
                if (StringUtil.isBlank(contentsReadingAnalyzerName)) {
                    return null;
                }
                final AnalyzeAction.Response analyzeResponse = client.admin().indices().prepareAnalyze(analyzerSettingsIndexName, text)
                        .setAnalyzer(contentsReadingAnalyzerName).execute().actionGet(settings.getIndicesTimeout());
                return analyzeResponse.getTokens();
            } catch (final IllegalArgumentException e) {
                return analyze(text, field, lang);
            }
        }
    }

    public static boolean isSupportedLanguage(final String lang) {
        return StringUtil.isNotBlank(lang) && Stream.of(SUPPORTED_LANGUAGES).anyMatch(lang::equals);
    }

    protected Set getAnalyzerNames() {
        final GetSettingsResponse response =
                client.admin().indices().prepareGetSettings().setIndices(analyzerSettingsIndexName).execute().actionGet();
        final Settings settings = response.getIndexToSettings().get(analyzerSettingsIndexName);
        final Settings analyzerSettings = settings.getAsSettings("index.analysis.analyzer");
        return analyzerSettings.getAsGroups().keySet();
    }

    protected Map getFieldAnalyzerMapping() {
        final Map mappingMap = new HashMap<>();
        SearchResponse response = client.prepareSearch(analyzerSettingsIndexName)
                .setQuery(QueryBuilders.termQuery(FieldNames.ANALYZER_SETTINGS_TYPE, FIELD_ANALYZER_MAPPING))
                .setScroll(settings.getScrollTimeout()).execute().actionGet(settings.getSearchTimeout());
        String scrollId = response.getScrollId();
        try {
            while (scrollId != null) {
                final SearchHit[] hits = response.getHits().getHits();
                if (hits.length == 0) {
                    break;
                }
                for (final SearchHit hit : hits) {
                    final Map source = hit.getSourceAsMap();
                    final String fieldReadingAnalyzer = source.get(FieldNames.ANALYZER_SETTINGS_READING_ANALYZER) == null ? null
                            : source.get(FieldNames.ANALYZER_SETTINGS_READING_ANALYZER).toString();
                    final String fieldReadingTermAnalyzer = source.get(FieldNames.ANALYZER_SETTINGS_READING_TERM_ANALYZER) == null ? null
                            : source.get(FieldNames.ANALYZER_SETTINGS_READING_TERM_ANALYZER).toString();
                    final String fieldNormalizeAnalyzer = source.get(FieldNames.ANALYZER_SETTINGS_NORMALIZE_ANALYZER) == null ? null
                            : source.get(FieldNames.ANALYZER_SETTINGS_NORMALIZE_ANALYZER).toString();
                    final String fieldContentsAnalyzer = source.get(FieldNames.ANALYZER_SETTINGS_CONTENTS_ANALYZER) == null ? null
                            : source.get(FieldNames.ANALYZER_SETTINGS_CONTENTS_ANALYZER).toString();
                    final String fieldContentsReadingAnalyzer =
                            source.get(FieldNames.ANALYZER_SETTINGS_CONTENTS_READING_ANALYZER) == null ? null
                                    : source.get(FieldNames.ANALYZER_SETTINGS_CONTENTS_READING_ANALYZER).toString();

                    mappingMap.put(source.get(FieldNames.ANALYZER_SETTINGS_FIELD_NAME).toString(),
                            new FieldAnalyzerMapping(fieldReadingAnalyzer, fieldReadingTermAnalyzer, fieldNormalizeAnalyzer,
                                    fieldContentsAnalyzer, fieldContentsReadingAnalyzer));
                }
                response = client.prepareSearchScroll(scrollId).setScroll(settings.getScrollTimeout()).execute()
                        .actionGet(settings.getSearchTimeout());
                if (!scrollId.equals(response.getScrollId())) {
                    SuggestUtil.deleteScrollContext(client, scrollId);
                }
                scrollId = response.getScrollId();
            }
        } finally {
            SuggestUtil.deleteScrollContext(client, scrollId);
        }
        return mappingMap;
    }

    public Set checkAnalyzer() {
        final String text = "text";
        final Set undefinedAnalyzerSet = new HashSet<>();
        for (final String lang : SUPPORTED_LANGUAGES) {
            final String readingAnalyzer = getReadingAnalyzerName("", lang);
            try {
                client.admin().indices().prepareAnalyze(analyzerSettingsIndexName, text).setAnalyzer(readingAnalyzer).execute()
                        .actionGet(settings.getIndicesTimeout());
            } catch (final IllegalArgumentException e) {
                undefinedAnalyzerSet.add(readingAnalyzer);
            }

            final String readingTermAnalyzer = getReadingTermAnalyzerName("", lang);
            try {
                client.admin().indices().prepareAnalyze(analyzerSettingsIndexName, text).setAnalyzer(readingTermAnalyzer).execute()
                        .actionGet(settings.getIndicesTimeout());
            } catch (final IllegalArgumentException e) {
                undefinedAnalyzerSet.add(readingTermAnalyzer);
            }

            final String normalizeAnalyzer = getNormalizeAnalyzerName("", lang);
            try {
                client.admin().indices().prepareAnalyze(analyzerSettingsIndexName, text).setAnalyzer(normalizeAnalyzer).execute()
                        .actionGet(settings.getIndicesTimeout());
            } catch (final IllegalArgumentException e) {
                undefinedAnalyzerSet.add(normalizeAnalyzer);
            }

            final String contentsAnalyzer = getContentsAnalyzerName("", lang);
            try {
                client.admin().indices().prepareAnalyze(analyzerSettingsIndexName, text).setAnalyzer(contentsAnalyzer).execute()
                        .actionGet(settings.getIndicesTimeout());
            } catch (final IllegalArgumentException e) {
                undefinedAnalyzerSet.add(contentsAnalyzer);
            }

            final String contentsReadingAnalyzer = getContentsReadingAnalyzerName("", lang);
            try {
                client.admin().indices().prepareAnalyze(analyzerSettingsIndexName, text).setAnalyzer(contentsReadingAnalyzer).execute()
                        .actionGet(settings.getIndicesTimeout());
            } catch (final IllegalArgumentException e) {
                undefinedAnalyzerSet.add(contentsReadingAnalyzer);
            }
        }

        return undefinedAnalyzerSet;
    }

    protected static class FieldAnalyzerMapping {
        protected final String readingAnalyzer;
        protected final String readingTermAnalyzer;
        protected final String normalizeAnalyzer;
        protected final String contentsAnalyzer;
        protected final String contentsReadingAnalyzer;

        public FieldAnalyzerMapping(final String readingAnalyzer, final String readingTermAnalyzer, final String normalizeAnalyzer,
                final String contentsAnalyzer, final String contentsReadingAnalyzer) {
            this.readingAnalyzer = readingAnalyzer;
            this.readingTermAnalyzer = readingTermAnalyzer;
            this.normalizeAnalyzer = normalizeAnalyzer;
            this.contentsAnalyzer = contentsAnalyzer;
            this.contentsReadingAnalyzer = contentsReadingAnalyzer;
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy