All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.metaeffekt.mirror.index.IndexSearch Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2021-2024 the original author or authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.metaeffekt.mirror.index;

import com.metaeffekt.artifact.analysis.utils.StringUtils;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TopDocs;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

public class IndexSearch {

    private final static Logger LOG = LoggerFactory.getLogger(IndexSearch.class);

    private final Map> fieldContains = new HashMap<>();
    private final Map> fieldNotContains = new HashMap<>();
    private final Map> fieldEquals = new HashMap<>();
    private final Map> fieldContainsUnquoted = new HashMap<>();
    private final List withoutField = new ArrayList<>();
    private final Map>> fieldNumericBetween = new HashMap<>();

    private Analyzer analyzer = Analyzers.getStandardAnalyzer();
    private int maxDocs = Integer.MAX_VALUE;
    private String joinOperator = "AND";

    public IndexSearch fieldContains(String field, String value) {
        validateFieldSpecified(field);
        fieldContains.computeIfAbsent(field, k -> new ArrayList<>()).add(value);
        return this;
    }

    public IndexSearch fieldNotContains(String field, String value) {
        validateFieldSpecified(field);
        fieldNotContains.computeIfAbsent(field, k -> new ArrayList<>()).add(value);
        return this;
    }

    public IndexSearch fieldContainsUnquoted(String field, String value) {
        validateFieldSpecified(field);
        fieldContainsUnquoted.computeIfAbsent(field, k -> new ArrayList<>()).add(value);
        return this;
    }

    public IndexSearch fieldEquals(String field, String value) {
        validateFieldSpecified(field);
        fieldEquals.computeIfAbsent(field, k -> new ArrayList<>()).add(value);
        return this;
    }

    public IndexSearch fieldNumericBetween(String field, long from, long to) {
        validateFieldSpecified(field);
        fieldNumericBetween.computeIfAbsent(field, k -> new ArrayList<>()).add(Pair.of(from, to));
        return this;
    }

    public IndexSearch uniqueDocumentIdEquals(String value) {
        return fieldEquals(Index.UNIQUE_LUCENE_DOCUMENT_ID, value);
    }

    public IndexSearch withoutField(String field) {
        validateFieldSpecified(field);
        withoutField.add(field);
        return this;
    }

    public IndexSearch analyzer(Analyzer analyzer) {
        this.analyzer = analyzer;
        return this;
    }

    public IndexSearch maxDocs(int maxDocs) {
        this.maxDocs = maxDocs;
        return this;
    }

    /**
     * Can be either AND or OR.
     *
     * @param joinOperator the join operator to use when joining the query parts.
     * @return this
     */
    public IndexSearch joinOperator(String joinOperator) {
        this.joinOperator = joinOperator.toUpperCase();
        return this;
    }

    private static void validateFieldSpecified(String field) {
        if (!StringUtils.hasText(field)) {
            throw new IllegalArgumentException("Field must be specified");
        }
    }

    public List search(org.apache.lucene.search.IndexSearcher searcher) throws ParseException, IOException {
        final Query query = buildQuery();

        if (LOG.isTraceEnabled()) {
            LOG.trace("Searching index for: {}", query);
        }

        final TopDocs topDocs = searcher.search(query, maxDocs);

        final List documents = new ArrayList<>((int) topDocs.totalHits.value);
        for (int i = 0; i < topDocs.totalHits.value; i++) {
            documents.add(searcher.doc(topDocs.scoreDocs[i].doc));
        }

        if (fieldEquals.size() > 0) {
            documents.removeIf(e -> {
                for (Map.Entry> entry : fieldEquals.entrySet()) {
                    final String field = entry.getKey();
                    final List values = entry.getValue();

                    for (String value : values) {
                        if (!Objects.equals(e.get(field), value)) {
                            return true;
                        }
                    }
                }

                return false;
            });
        }

        return documents;
    }

    public Query buildQuery() throws ParseException {
        final StringBuilder queryBuilder = buildQueryString();

        final QueryParser parser = new QueryParser(Index.UNIQUE_LUCENE_DOCUMENT_ID, analyzer);
        parser.setAllowLeadingWildcard(true);

        return parser.parse(queryBuilder.toString());
    }

    private StringBuilder buildQueryString() {
        final StringBuilder queryBuilder = new StringBuilder();

        appendFieldQuerySearchParameters(queryBuilder, fieldContains, true, true);
        appendFieldQuerySearchParameters(queryBuilder, fieldContainsUnquoted, false, true);
        appendFieldQuerySearchParameters(queryBuilder, fieldEquals, true, true);

        appendFieldQuerySearchParameters(queryBuilder, fieldNotContains, true, false);
        appendFieldQuerySearchParameters(queryBuilder, withoutField.stream().collect(Collectors.toMap(e -> e, e -> Collections.singletonList("*"))), false, false);

        for (Map.Entry>> entry : fieldNumericBetween.entrySet()) {
            final String field = entry.getKey();
            final List> values = entry.getValue();

            for (Pair value : values) {
                if (queryBuilder.length() > 0) {
                    queryBuilder.append(" ").append(joinOperator).append(" ");
                }

                queryBuilder.append(field).append(":[").append(value.getLeft()).append(" TO ").append(value.getRight()).append("]");
            }
        }

        return queryBuilder;
    }

    private void appendFieldQuerySearchParameters(StringBuilder queryBuilder, Map> fields, boolean quoted, boolean contains) {
        for (Map.Entry> entry : fields.entrySet()) {
            final String field = entry.getKey();
            final List values = entry.getValue();

            for (String queryPart : values) {
                if (queryBuilder.length() > 0) {
                    queryBuilder.append(" ").append(joinOperator).append(" ");
                }

                if (!contains) {
                    if (queryBuilder.length() == 0) {
                        queryBuilder.append("*:* NOT ");
                    } else {
                        queryBuilder.append("NOT ");
                    }
                }

                if (StringUtils.hasText(field)) {
                    queryBuilder.append(field).append(":");
                }

                if (quoted) {
                    final String effectiveQueryPart;

                    if (queryPart.contains("\"")) {
                        // quotes are not allowed unescaped in the query string, if the query string is already quoted
                        // " without \" must be escaped
                        final Matcher matcher = Pattern.compile("(?




© 2015 - 2025 Weber Informatics LLC | Privacy Policy