com.metaeffekt.mirror.index.IndexSearch Maven / Gradle / Ivy
The newest version!
/*
* Copyright 2021-2024 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.metaeffekt.mirror.index;
import com.metaeffekt.artifact.analysis.utils.StringUtils;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TopDocs;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
public class IndexSearch {
private final static Logger LOG = LoggerFactory.getLogger(IndexSearch.class);
private final Map> fieldContains = new HashMap<>();
private final Map> fieldNotContains = new HashMap<>();
private final Map> fieldEquals = new HashMap<>();
private final Map> fieldContainsUnquoted = new HashMap<>();
private final List withoutField = new ArrayList<>();
private final Map>> fieldNumericBetween = new HashMap<>();
private Analyzer analyzer = Analyzers.getStandardAnalyzer();
private int maxDocs = Integer.MAX_VALUE;
private String joinOperator = "AND";
public IndexSearch fieldContains(String field, String value) {
validateFieldSpecified(field);
fieldContains.computeIfAbsent(field, k -> new ArrayList<>()).add(value);
return this;
}
public IndexSearch fieldNotContains(String field, String value) {
validateFieldSpecified(field);
fieldNotContains.computeIfAbsent(field, k -> new ArrayList<>()).add(value);
return this;
}
public IndexSearch fieldContainsUnquoted(String field, String value) {
validateFieldSpecified(field);
fieldContainsUnquoted.computeIfAbsent(field, k -> new ArrayList<>()).add(value);
return this;
}
public IndexSearch fieldEquals(String field, String value) {
validateFieldSpecified(field);
fieldEquals.computeIfAbsent(field, k -> new ArrayList<>()).add(value);
return this;
}
public IndexSearch fieldNumericBetween(String field, long from, long to) {
validateFieldSpecified(field);
fieldNumericBetween.computeIfAbsent(field, k -> new ArrayList<>()).add(Pair.of(from, to));
return this;
}
public IndexSearch uniqueDocumentIdEquals(String value) {
return fieldEquals(Index.UNIQUE_LUCENE_DOCUMENT_ID, value);
}
public IndexSearch withoutField(String field) {
validateFieldSpecified(field);
withoutField.add(field);
return this;
}
public IndexSearch analyzer(Analyzer analyzer) {
this.analyzer = analyzer;
return this;
}
public IndexSearch maxDocs(int maxDocs) {
this.maxDocs = maxDocs;
return this;
}
/**
* Can be either AND
or OR
.
*
* @param joinOperator the join operator to use when joining the query parts.
* @return this
*/
public IndexSearch joinOperator(String joinOperator) {
this.joinOperator = joinOperator.toUpperCase();
return this;
}
private static void validateFieldSpecified(String field) {
if (!StringUtils.hasText(field)) {
throw new IllegalArgumentException("Field must be specified");
}
}
public List search(org.apache.lucene.search.IndexSearcher searcher) throws ParseException, IOException {
final Query query = buildQuery();
if (LOG.isTraceEnabled()) {
LOG.trace("Searching index for: {}", query);
}
final TopDocs topDocs = searcher.search(query, maxDocs);
final List documents = new ArrayList<>((int) topDocs.totalHits.value);
for (int i = 0; i < topDocs.totalHits.value; i++) {
documents.add(searcher.doc(topDocs.scoreDocs[i].doc));
}
if (fieldEquals.size() > 0) {
documents.removeIf(e -> {
for (Map.Entry> entry : fieldEquals.entrySet()) {
final String field = entry.getKey();
final List values = entry.getValue();
for (String value : values) {
if (!Objects.equals(e.get(field), value)) {
return true;
}
}
}
return false;
});
}
return documents;
}
public Query buildQuery() throws ParseException {
final StringBuilder queryBuilder = buildQueryString();
final QueryParser parser = new QueryParser(Index.UNIQUE_LUCENE_DOCUMENT_ID, analyzer);
parser.setAllowLeadingWildcard(true);
return parser.parse(queryBuilder.toString());
}
private StringBuilder buildQueryString() {
final StringBuilder queryBuilder = new StringBuilder();
appendFieldQuerySearchParameters(queryBuilder, fieldContains, true, true);
appendFieldQuerySearchParameters(queryBuilder, fieldContainsUnquoted, false, true);
appendFieldQuerySearchParameters(queryBuilder, fieldEquals, true, true);
appendFieldQuerySearchParameters(queryBuilder, fieldNotContains, true, false);
appendFieldQuerySearchParameters(queryBuilder, withoutField.stream().collect(Collectors.toMap(e -> e, e -> Collections.singletonList("*"))), false, false);
for (Map.Entry>> entry : fieldNumericBetween.entrySet()) {
final String field = entry.getKey();
final List> values = entry.getValue();
for (Pair value : values) {
if (queryBuilder.length() > 0) {
queryBuilder.append(" ").append(joinOperator).append(" ");
}
queryBuilder.append(field).append(":[").append(value.getLeft()).append(" TO ").append(value.getRight()).append("]");
}
}
return queryBuilder;
}
private void appendFieldQuerySearchParameters(StringBuilder queryBuilder, Map> fields, boolean quoted, boolean contains) {
for (Map.Entry> entry : fields.entrySet()) {
final String field = entry.getKey();
final List values = entry.getValue();
for (String queryPart : values) {
if (queryBuilder.length() > 0) {
queryBuilder.append(" ").append(joinOperator).append(" ");
}
if (!contains) {
if (queryBuilder.length() == 0) {
queryBuilder.append("*:* NOT ");
} else {
queryBuilder.append("NOT ");
}
}
if (StringUtils.hasText(field)) {
queryBuilder.append(field).append(":");
}
if (quoted) {
final String effectiveQueryPart;
if (queryPart.contains("\"")) {
// quotes are not allowed unescaped in the query string, if the query string is already quoted
// " without \" must be escaped
final Matcher matcher = Pattern.compile("(?
© 2015 - 2025 Weber Informatics LLC | Privacy Policy