All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.jackrabbit.oak.plugins.index.lucene.LucenePropertyIndex Maven / Gradle / Ivy

There is a newer version: 2024.11.18751.20241128T090041Z-241100
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.jackrabbit.oak.plugins.index.lucene;

import javax.jcr.PropertyType;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Deque;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicReference;
import java.util.function.Predicate;

import org.apache.jackrabbit.guava.common.base.Joiner;
import org.apache.jackrabbit.guava.common.collect.AbstractIterator;
import org.apache.jackrabbit.guava.common.collect.FluentIterable;
import org.apache.jackrabbit.guava.common.collect.ImmutableList;
import org.apache.jackrabbit.guava.common.collect.Iterables;
import org.apache.jackrabbit.guava.common.collect.Iterators;
import org.apache.jackrabbit.guava.common.collect.Maps;
import org.apache.jackrabbit.guava.common.collect.Queues;
import org.apache.jackrabbit.oak.api.PropertyValue;
import org.apache.jackrabbit.oak.api.Type;
import org.apache.jackrabbit.oak.commons.PathUtils;
import org.apache.jackrabbit.oak.commons.PerfLogger;
import org.apache.jackrabbit.oak.commons.collections.CollectionUtils;
import org.apache.jackrabbit.oak.commons.properties.SystemPropertySupplier;
import org.apache.jackrabbit.oak.plugins.index.lucene.util.fv.SimSearchUtils;
import org.apache.jackrabbit.oak.plugins.index.lucene.writer.LuceneIndexWriter;
import org.apache.jackrabbit.oak.plugins.index.search.FieldNames;
import org.apache.jackrabbit.oak.plugins.index.search.FulltextIndexConstants;
import org.apache.jackrabbit.oak.plugins.index.search.IndexDefinition;
import org.apache.jackrabbit.oak.plugins.index.search.IndexDefinition.IndexingRule;
import org.apache.jackrabbit.oak.plugins.index.search.IndexDefinition.SecureFacetConfiguration;
import org.apache.jackrabbit.oak.plugins.index.IndexConstants;
import org.apache.jackrabbit.oak.plugins.index.lucene.property.HybridPropertyIndexLookup;
import org.apache.jackrabbit.oak.plugins.index.lucene.reader.LuceneIndexReader;
import org.apache.jackrabbit.oak.plugins.index.lucene.spi.FulltextQueryTermsProvider;
import org.apache.jackrabbit.oak.plugins.index.lucene.util.FacetHelper;
import org.apache.jackrabbit.oak.plugins.index.lucene.util.MoreLikeThisHelper;
import org.apache.jackrabbit.oak.plugins.index.lucene.util.PathStoredFieldVisitor;
import org.apache.jackrabbit.oak.plugins.index.lucene.util.SpellcheckHelper;
import org.apache.jackrabbit.oak.plugins.index.lucene.util.SuggestHelper;
import org.apache.jackrabbit.oak.plugins.index.search.IndexNode;
import org.apache.jackrabbit.oak.plugins.index.search.PropertyDefinition;
import org.apache.jackrabbit.oak.plugins.index.search.SizeEstimator;
import org.apache.jackrabbit.oak.plugins.index.search.spi.query.FulltextIndex;
import org.apache.jackrabbit.oak.plugins.index.search.spi.query.FulltextIndexPlanner.PlanResult;
import org.apache.jackrabbit.oak.plugins.index.search.spi.query.FulltextIndexPlanner.PropertyIndexResult;
import org.apache.jackrabbit.oak.plugins.index.search.util.QueryUtils;
import org.apache.jackrabbit.oak.spi.query.fulltext.FullTextAnd;
import org.apache.jackrabbit.oak.spi.query.fulltext.FullTextContains;
import org.apache.jackrabbit.oak.spi.query.fulltext.FullTextExpression;
import org.apache.jackrabbit.oak.spi.query.fulltext.FullTextOr;
import org.apache.jackrabbit.oak.spi.query.fulltext.FullTextTerm;
import org.apache.jackrabbit.oak.spi.query.fulltext.FullTextVisitor;
import org.apache.jackrabbit.oak.spi.query.Cursor;
import org.apache.jackrabbit.oak.spi.query.Filter;
import org.apache.jackrabbit.oak.spi.query.Filter.PropertyRestriction;
import org.apache.jackrabbit.oak.spi.query.QueryConstants;
import org.apache.jackrabbit.oak.spi.query.QueryLimits;
import org.apache.jackrabbit.oak.spi.state.NodeState;
import org.apache.jackrabbit.oak.spi.state.NodeStateUtils;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.facet.FacetResult;
import org.apache.lucene.facet.Facets;
import org.apache.lucene.facet.LabelAndValue;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.queryparser.classic.QueryParserBase;
import org.apache.lucene.queryparser.flexible.core.QueryNodeException;
import org.apache.lucene.queryparser.flexible.standard.StandardQueryParser;
import org.apache.lucene.queryparser.flexible.standard.config.StandardQueryConfigHandler;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.NumericRangeQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TotalHitCountCollector;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLEncoder;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.search.highlight.TextFragment;
import org.apache.lucene.search.postingshighlight.PostingsHighlighter;
import org.apache.lucene.search.spell.SuggestWord;
import org.apache.lucene.search.suggest.Lookup;
import org.apache.lucene.search.suggest.analyzing.AnalyzingInfixSuggester;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Version;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import static java.util.Objects.requireNonNull;
import static org.apache.jackrabbit.guava.common.base.Preconditions.checkState;

import static org.apache.jackrabbit.JcrConstants.JCR_MIXINTYPES;
import static org.apache.jackrabbit.JcrConstants.JCR_PRIMARYTYPE;
import static org.apache.jackrabbit.oak.api.Type.LONG;
import static org.apache.jackrabbit.oak.api.Type.STRING;
import static org.apache.jackrabbit.oak.api.Type.DOUBLE;
import static org.apache.jackrabbit.oak.commons.PathUtils.denotesRoot;
import static org.apache.jackrabbit.oak.commons.PathUtils.getParentPath;
import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.TYPE_LUCENE;
import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexLookupUtil.LUCENE_INDEX_DEFINITION_PREDICATE;
import static org.apache.jackrabbit.oak.plugins.index.search.FulltextIndexConstants.DYNAMIC_BOOST_WEIGHT;
import static org.apache.jackrabbit.oak.plugins.index.search.IndexDefinition.NATIVE_SORT_ORDER;
import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.VERSION;
import static org.apache.jackrabbit.oak.plugins.index.lucene.TermFactory.newAncestorTerm;
import static org.apache.jackrabbit.oak.plugins.index.lucene.TermFactory.newPathTerm;
import static org.apache.jackrabbit.oak.plugins.memory.PropertyValues.newName;
import static org.apache.jackrabbit.oak.spi.query.QueryConstants.JCR_PATH;
import static org.apache.jackrabbit.oak.spi.query.QueryConstants.REP_EXCERPT;
import static org.apache.lucene.search.BooleanClause.Occur.SHOULD;
import static org.apache.lucene.search.BooleanClause.Occur.MUST;
import static org.apache.lucene.search.BooleanClause.Occur.MUST_NOT;

/**
 *
 * Used to query new (compatVersion 2) Lucene indexes.
 *
 * Provides a QueryIndex that does lookups against a Lucene-based index
 *
 * 

* To define a lucene index on a subtree you have to add an * oak:index node. * * Under it follows the index definition node that: *

    *
  • must be of type oak:QueryIndexDefinition
  • *
  • must have the type property set to lucene
  • *
  • must have the async property set to async
  • *
*

* Optionally you can add *

    *
  • what subset of property types to be included in the index via the includePropertyTypes property
  • *
  • a blacklist of property names: what property to be excluded from the index via the excludePropertyNames property
  • *
  • the reindex flag which when set to true, triggers a full content re-index.
  • *
*
{@code
 * {
 *     NodeBuilder index = root.child("oak:index");
 *     index.child("lucene")
 *         .setProperty("jcr:primaryType", "oak:QueryIndexDefinition", Type.NAME)
 *         .setProperty("type", "lucene")
 *         .setProperty("async", "async")
 *         .setProperty("reindex", "true");
 * }
 * }
* * @see org.apache.jackrabbit.oak.spi.query.QueryIndex * */ public class LucenePropertyIndex extends FulltextIndex { private static final Logger LOG = LoggerFactory .getLogger(LucenePropertyIndex.class); private static final PerfLogger PERF_LOGGER = new PerfLogger(LoggerFactory.getLogger(LucenePropertyIndex.class.getName() + ".perf")); private final static long LOAD_DOCS_WARN = Long.getLong("oak.lucene.loadDocsWarn", 30 * 1000L); private final static long LOAD_DOCS_STOP = Long.getLong("oak.lucene.loadDocsStop", 3 * 60 * 1000L); private final static boolean NON_LAZY = SystemPropertySupplier.create("oak.lucene.nonLazyIndex", true).loggingTo(LOG).get(); public final static String OLD_FACET_PROVIDER_CONFIG_NAME = "oak.lucene.oldFacetProvider"; private final static boolean OLD_FACET_PROVIDER = Boolean.getBoolean(OLD_FACET_PROVIDER_CONFIG_NAME); public final static String CACHE_FACET_RESULTS_NAME = "oak.lucene.cacheFacetResults"; private final boolean CACHE_FACET_RESULTS = Boolean.parseBoolean(System.getProperty(CACHE_FACET_RESULTS_NAME, "true")); public final static String EAGER_FACET_CACHE_FILL_NAME = "oak.lucene.cacheFacetEagerFill"; private final static boolean EAGER_FACET_CACHE_FILL = Boolean.parseBoolean(System.getProperty(EAGER_FACET_CACHE_FILL_NAME, "true")); private static boolean FLAG_CACHE_FACET_RESULTS_CHANGE = true; /** * Batch size for fetching results from Lucene queries. */ public static final int LUCENE_QUERY_BATCH_SIZE = 50; protected final IndexTracker tracker; private final Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter("", ""), new SimpleHTMLEncoder(), null); private final PostingsHighlighter postingsHighlighter = new PostingsHighlighter(); private final IndexAugmentorFactory augmentorFactory; static { if (!NON_LAZY) { LOG.warn("Lazy index download is enabled explicitly; this is not recommended, see OAK-10102."); } } public LucenePropertyIndex(IndexTracker tracker) { this(tracker, null); } public LucenePropertyIndex(IndexTracker tracker, IndexAugmentorFactory augmentorFactory) { this.tracker = tracker; this.augmentorFactory = augmentorFactory; logConfigsOnce(); } private void logConfigsOnce() { if (FLAG_CACHE_FACET_RESULTS_CHANGE) { LOG.info(OLD_FACET_PROVIDER_CONFIG_NAME + " = " + OLD_FACET_PROVIDER); LOG.info(CACHE_FACET_RESULTS_NAME + " = " + CACHE_FACET_RESULTS); FLAG_CACHE_FACET_RESULTS_CHANGE = false; } } @Override public String getIndexName() { return "lucene-property"; } @Override public Cursor query(final IndexPlan plan, NodeState rootState) { if (plan.isDeprecated()) { LOG.warn("This index is deprecated: {}; it is used for query {}. " + "Please change the query or the index definitions.", plan.getPlanName(), plan.getFilter()); } final Filter filter = plan.getFilter(); final Sort sort = getSort(plan); final PlanResult pr = getPlanResult(plan); QueryLimits settings = filter.getQueryLimits(); LuceneResultRowIterator rItr = new LuceneResultRowIterator() { private final Deque queue = Queues.newArrayDeque(); private final Set seenPaths = new HashSet<>(); private ScoreDoc lastDoc; private int nextBatchSize = LUCENE_QUERY_BATCH_SIZE; private boolean noDocs = false; private IndexSearcher indexSearcher; private int indexNodeId = -1; private FacetProvider facetProvider; private int rewoundCount = 0; @Override protected FulltextResultRow computeNext() { while (!queue.isEmpty() || loadDocs()) { return queue.remove(); } releaseSearcher(); return endOfData(); } @Override public int rewoundCount() { return rewoundCount; } private FulltextResultRow convertToRow(ScoreDoc doc, IndexSearcher searcher, Map excerpts, FacetProvider facetProvider, String explanation) throws IOException { IndexReader reader = searcher.getIndexReader(); //TODO Look into usage of field cache for retrieving the path //instead of reading via reader if no of docs in index are limited PathStoredFieldVisitor visitor = new PathStoredFieldVisitor(); reader.document(doc.doc, visitor); String path = visitor.getPath(); if (path != null) { if ("".equals(path)) { path = "/"; } if (pr.isPathTransformed()) { String originalPath = path; path = pr.transformPath(path); if (path == null) { LOG.trace("Ignoring path {} : Transformation returned null", originalPath); return null; } // avoid duplicate entries if (seenPaths.contains(path)) { LOG.trace("Ignoring path {} : Duplicate post transformation", originalPath); return null; } seenPaths.add(path); } boolean shouldIncludeForHierarchy = shouldInclude(path, plan); LOG.trace("Matched path {}; shouldIncludeForHierarchy: {}", path, shouldIncludeForHierarchy); return shouldIncludeForHierarchy ? new FulltextResultRow(path, doc.score, excerpts, facetProvider, explanation) : null; } return null; } /** * Loads the lucene documents in batches * @return true if any document is loaded */ private boolean loadDocs() { if (noDocs) { return false; } ScoreDoc lastDocToRecord = null; final LuceneIndexNode indexNode = acquireIndexNode(plan); checkState(indexNode != null); try { IndexSearcher searcher = getCurrentSearcher(indexNode); LuceneRequestFacade luceneRequestFacade = getLuceneRequest(plan, augmentorFactory, searcher.getIndexReader()); if (luceneRequestFacade.getLuceneRequest() instanceof Query) { Query query = (Query) luceneRequestFacade.getLuceneRequest(); TopDocs docs; long start = PERF_LOGGER.start(); long startLoop = System.currentTimeMillis(); for (int repeated = 0;; repeated++) { if (repeated > 0) { long now = System.currentTimeMillis(); if (now > startLoop + LOAD_DOCS_WARN) { LOG.warn("loadDocs lastDoc {} repeated {} times for query {}", lastDoc, repeated, query); if (repeated > 1 && now > startLoop + LOAD_DOCS_STOP) { LOG.error("loadDocs stops", new Exception()); break; } } } if (lastDoc != null) { LOG.debug("loading the next {} entries for query {}", nextBatchSize, query); if (sort == null) { docs = searcher.searchAfter(lastDoc, query, nextBatchSize); } else { docs = searcher.searchAfter(lastDoc, query, nextBatchSize, sort); } } else { LOG.debug("loading the first {} entries for query {}", nextBatchSize, query); if (sort == null) { docs = searcher.search(query, nextBatchSize); } else { docs = searcher.search(query, nextBatchSize, sort); } } PERF_LOGGER.end(start, -1, "{} ...", docs.scoreDocs.length); nextBatchSize = (int) Math.min(nextBatchSize * 2L, 100000); if (facetProvider == null) { long f = PERF_LOGGER.start(); if (OLD_FACET_PROVIDER) { // here the current searcher gets referenced for later // but the searcher might get closed in the meantime facetProvider = new LuceneFacetProvider( FacetHelper.getFacets(searcher, query, plan, indexNode.getDefinition().getSecureFacetConfiguration()) ); } else { // a new searcher is opened and closed when needed facetProvider = new DelayedLuceneFacetProvider(LucenePropertyIndex.this, query, plan, indexNode.getDefinition().getSecureFacetConfiguration()); } PERF_LOGGER.end(f, -1, "facets retrieved"); } Set excerptFields = new HashSet<>(); for (PropertyRestriction pr : filter.getPropertyRestrictions()) { if (QueryConstants.REP_EXCERPT.equals(pr.propertyName)) { String value = pr.first.getValue(Type.STRING); excerptFields.add(value); } } boolean addExcerpt = excerptFields.size() > 0; PropertyRestriction restriction = filter.getPropertyRestriction(QueryConstants.OAK_SCORE_EXPLANATION); boolean addExplain = restriction != null && restriction.isNotNullRestriction(); Analyzer analyzer = indexNode.getDefinition().getAnalyzer(); FieldInfos mergedFieldInfos = null; if (addExcerpt) { // setup highlighter QueryScorer scorer = new QueryScorer(query); scorer.setExpandMultiTermQuery(true); highlighter.setFragmentScorer(scorer); mergedFieldInfos = MultiFields.getMergedFieldInfos(searcher.getIndexReader()); } boolean earlyStop = false; if (docs.scoreDocs.length > 1) { // reranking step for fv sim search PropertyRestriction pr = null; LuceneIndexDefinition defn = indexNode.getDefinition(); if (defn.hasFunctionDefined()) { pr = filter.getPropertyRestriction(defn.getFunctionName()); } if (pr != null) { String queryString = String.valueOf(pr.first.getValue(pr.first.getType())); if (queryString.startsWith("mlt?")) { List sp = new LinkedList<>(); for (IndexingRule r : defn.getDefinedRules()) { List similarityProperties = r.getSimilarityProperties(); for (PropertyDefinition pd : similarityProperties) { if (pd.similarityRerank) { sp.add(pd); } } } if (!sp.isEmpty()) { long fvs = PERF_LOGGER.start(); SimSearchUtils.bruteForceFVRerank(sp, docs, indexSearcher); PERF_LOGGER.end(fvs, -1, "fv reranking done"); earlyStop = true; } } } } for (ScoreDoc doc : docs.scoreDocs) { Map excerpts = null; if (addExcerpt) { excerpts = getExcerpt(query, excerptFields, analyzer, searcher, doc, mergedFieldInfos); } String explanation = null; if (addExplain) { explanation = searcher.explain(query, doc.doc).toString(); } FulltextResultRow row = convertToRow(doc, searcher, excerpts, facetProvider, explanation); if (row != null) { queue.add(row); } lastDocToRecord = doc; } if (earlyStop) { noDocs = true; break; } if (queue.isEmpty() && docs.scoreDocs.length > 0) { //queue is still empty but more results can be fetched //from Lucene so still continue lastDoc = lastDocToRecord; } else { break; } } } else if (luceneRequestFacade.getLuceneRequest() instanceof SpellcheckHelper.SpellcheckQuery) { String aclCheckField = indexNode.getDefinition().isFullTextEnabled() ? FieldNames.FULLTEXT : FieldNames.SPELLCHECK; noDocs = true; SpellcheckHelper.SpellcheckQuery spellcheckQuery = (SpellcheckHelper.SpellcheckQuery) luceneRequestFacade.getLuceneRequest(); SuggestWord[] suggestWords = SpellcheckHelper.getSpellcheck(spellcheckQuery); // ACL filter spellchecks QueryParser qp = new QueryParser(Version.LUCENE_47, aclCheckField, indexNode.getDefinition().getAnalyzer()); for (SuggestWord suggestion : suggestWords) { Query query = qp.createPhraseQuery(aclCheckField, QueryParserBase.escape(suggestion.string)); query = addDescendantClauseIfRequired(query, plan); TopDocs topDocs = searcher.search(query, 100); if (topDocs.totalHits > 0) { for (ScoreDoc doc : topDocs.scoreDocs) { Document retrievedDoc = searcher.doc(doc.doc); String prefix = filter.getPath(); if (prefix.length() == 1) { prefix = ""; } if (filter.isAccessible(prefix + retrievedDoc.get(FieldNames.PATH))) { queue.add(new FulltextResultRow(suggestion.string)); break; } } } } } else if (luceneRequestFacade.getLuceneRequest() instanceof SuggestHelper.SuggestQuery) { SuggestHelper.SuggestQuery suggestQuery = (SuggestHelper.SuggestQuery) luceneRequestFacade.getLuceneRequest(); noDocs = true; List lookupResults = SuggestHelper.getSuggestions(indexNode.getLookup(), suggestQuery); QueryParser qp = new QueryParser(Version.LUCENE_47, FieldNames.SUGGEST, indexNode.getDefinition().isSuggestAnalyzed() ? indexNode.getDefinition().getAnalyzer() : SuggestHelper.getAnalyzer()); // ACL filter suggestions for (Lookup.LookupResult suggestion : lookupResults) { Query query = qp.parse("\"" + QueryParserBase.escape(suggestion.key.toString()) + "\""); query = addDescendantClauseIfRequired(query, plan); TopDocs topDocs = searcher.search(query, 100); if (topDocs.totalHits > 0) { for (ScoreDoc doc : topDocs.scoreDocs) { Document retrievedDoc = searcher.doc(doc.doc); String prefix = filter.getPath(); if (prefix.length() == 1) { prefix = ""; } if (filter.isAccessible(prefix + retrievedDoc.get(FieldNames.PATH))) { queue.add(new FulltextResultRow(suggestion.key.toString(), suggestion.value)); break; } } } } } } catch (Exception e) { LOG.warn("query [{}] via {} failed.", plan.getFilter(), LucenePropertyIndex.this.getClass().getCanonicalName(), e); } finally { indexNode.release(); } if (lastDocToRecord != null) { this.lastDoc = lastDocToRecord; } return !queue.isEmpty(); } private IndexSearcher getCurrentSearcher(LuceneIndexNode indexNode) { //The searcher once obtained is held till either cursor is finished //or if the index gets updated. It needs to be ensured that //searcher is obtained via this method only in this iterator //Refresh the searcher if change in indexNode is detected //For NRT case its fine to keep a reference to searcher i.e. not //acquire it for every loadDocs call otherwise with frequent change //the reset of lastDoc would happen very frequently. //Upon LuceneIndexNode change i.e. when new async index update is detected //the searcher would be refreshed as done earlier if (indexNodeId != indexNode.getIndexNodeId()) { //if already initialized then log about change if (indexNodeId > 0) { LOG.info("Change in index version detected. Query would be performed without offset"); rewoundCount++; } indexSearcher = indexNode.getSearcher(); indexNodeId = indexNode.getIndexNodeId(); lastDoc = null; } return indexSearcher; } private void releaseSearcher() { //For now nullifying it. indexSearcher = null; } }; Iterator itr = rItr; SizeEstimator sizeEstimator = getSizeEstimator(plan); if (pr.hasPropertyIndexResult() || pr.evaluateSyncNodeTypeRestriction()) { itr = mergePropertyIndexResult(plan, rootState, itr); } return new FulltextPathCursor(itr, rItr, plan, settings, sizeEstimator); } private static Query addDescendantClauseIfRequired(Query query, IndexPlan plan) { Filter filter = plan.getFilter(); if (filter.getPathRestriction() == Filter.PathRestriction.ALL_CHILDREN) { String path = getPathRestriction(plan); if (!PathUtils.denotesRoot(path)) { if (getPlanResult(plan).indexDefinition.evaluatePathRestrictions()) { BooleanQuery compositeQuery = new BooleanQuery(); compositeQuery.add(query, BooleanClause.Occur.MUST); Query pathQuery = new TermQuery(newAncestorTerm(path)); compositeQuery.add(pathQuery, BooleanClause.Occur.MUST); query = compositeQuery; } else { LOG.warn("Descendant clause could not be added without path restrictions enabled. Plan: {}", plan); } } } return query; } private Map getExcerpt(Query query, Set excerptFields, Analyzer analyzer, IndexSearcher searcher, ScoreDoc doc, FieldInfos fieldInfos) throws IOException { Set excerptFieldNames = new HashSet<>(); Map fieldNameToColumnNameMap = Maps.newHashMap(); Map columnNameToExcerpts = Maps.newHashMap(); Set nodeExcerptColumns = new HashSet<>(); excerptFields.forEach(columnName -> { String fieldName; if (REP_EXCERPT.equals(columnName)) { fieldName = FulltextIndexConstants.EXCERPT_NODE_FIELD_NAME; } else { fieldName = columnName.substring(REP_EXCERPT.length() + 1, columnName.length() - 1); } if (!FulltextIndexConstants.EXCERPT_NODE_FIELD_NAME.equals(fieldName)) { excerptFieldNames.add(fieldName); fieldNameToColumnNameMap.put(fieldName, columnName); } else { nodeExcerptColumns.add(columnName); } }); final boolean requireNodeLevelExcerpt = nodeExcerptColumns.size() > 0; int docID = doc.doc; List names = new LinkedList<>(); for (IndexableField field : searcher.getIndexReader().document(docID).getFields()) { String name = field.name(); // postings highlighter can be used on analyzed fields with docs, freqs, positions and offsets stored. if (name.startsWith(FieldNames.ANALYZED_FIELD_PREFIX) && fieldInfos.hasProx() && fieldInfos.hasOffsets()) { names.add(name); } } if (!requireNodeLevelExcerpt) { names.retainAll(excerptFieldNames); } if (names.size() > 0) { int[] maxPassages = new int[names.size()]; Arrays.fill(maxPassages, 1); try { Map stringMap = postingsHighlighter.highlightFields(names.toArray(new String[names.size()]), query, searcher, new int[]{docID}, maxPassages); for (Map.Entry entry : stringMap.entrySet()) { String value = Arrays.toString(entry.getValue()); if (value.contains("")) { String fieldName = entry.getKey(); String columnName = fieldNameToColumnNameMap.get(fieldName); columnNameToExcerpts.put(columnName, value); } } } catch (Exception e) { LOG.debug("postings highlighting failed", e); } } // fallback if no excerpt could be retrieved using postings highlighter if (columnNameToExcerpts.size() == 0) { for (IndexableField field : searcher.getIndexReader().document(doc.doc).getFields()) { String name = field.name(); // only full text or analyzed fields if (name.startsWith(FieldNames.FULLTEXT) || name.startsWith(FieldNames.ANALYZED_FIELD_PREFIX)) { String text = field.stringValue(); TokenStream tokenStream = analyzer.tokenStream(name, text); try { TextFragment[] textFragments = highlighter.getBestTextFragments(tokenStream, text, true, 1); if (textFragments != null && textFragments.length > 0) { for (TextFragment fragment : textFragments) { String columnName = null; if (name.startsWith(FieldNames.ANALYZED_FIELD_PREFIX)) { columnName = fieldNameToColumnNameMap.get(name.substring(FieldNames.ANALYZED_FIELD_PREFIX.length())); } if (columnName == null && requireNodeLevelExcerpt) { columnName = name; } if (columnName != null) { columnNameToExcerpts.put(columnName, fragment.toString()); } } if (excerptFieldNames.size() == 0) { break; } } } catch (InvalidTokenOffsetsException e) { LOG.error("highlighting failed", e); } } } } if (requireNodeLevelExcerpt) { String nodeExcerpt = Joiner.on("...").join(columnNameToExcerpts.values()); nodeExcerptColumns.forEach(nodeExcerptColumnName -> columnNameToExcerpts.put(nodeExcerptColumnName, nodeExcerpt)); } columnNameToExcerpts.keySet().retainAll(excerptFields); return columnNameToExcerpts; } @Override protected LuceneIndexNode acquireIndexNode(String indexPath) { if (NON_LAZY) { return tracker.acquireIndexNode(indexPath); } return new LazyLuceneIndexNode(tracker, indexPath); } @Override protected LuceneIndexNode acquireIndexNode(IndexPlan plan) { return (LuceneIndexNode) super.acquireIndexNode(plan); } @Override protected String getType() { return TYPE_LUCENE; } @Override protected boolean filterReplacedIndexes() { return tracker.getMountInfoProvider().hasNonDefaultMounts(); } @Override protected boolean runIsActiveIndexCheck() { return filterReplacedIndexes(); } @Override protected SizeEstimator getSizeEstimator(IndexPlan plan) { return () -> { LuceneIndexNode indexNode = acquireIndexNode(plan); checkState(indexNode != null); try { IndexSearcher searcher = indexNode.getSearcher(); LuceneRequestFacade luceneRequestFacade = getLuceneRequest(plan, augmentorFactory, searcher.getIndexReader()); if (luceneRequestFacade.getLuceneRequest() instanceof Query) { Query query = (Query) luceneRequestFacade.getLuceneRequest(); TotalHitCountCollector collector = new TotalHitCountCollector(); searcher.search(query, collector); int totalHits = collector.getTotalHits(); LOG.debug("Estimated size for query {} is {}", query, totalHits); return totalHits; } LOG.debug("estimate size: not a Query: {}", luceneRequestFacade.getLuceneRequest()); } catch (IOException e) { LOG.warn("query via {} failed.", LucenePropertyIndex.this, e); } finally { indexNode.release(); } return -1; }; } @Override protected Predicate getIndexDefinitionPredicate() { return LUCENE_INDEX_DEFINITION_PREDICATE; } @Override protected String getFulltextRequestString(IndexPlan plan, IndexNode indexNode, NodeState root) { return getLuceneRequest(plan, augmentorFactory, null).toString(); } private static Sort getSort(IndexPlan plan) { List sortOrder = plan.getSortOrder(); if (sortOrder == null || sortOrder.isEmpty()) { return null; } sortOrder = removeNativeSort(sortOrder); List fieldsList = new ArrayList<>(sortOrder.size()); PlanResult planResult = getPlanResult(plan); for (int i = 0; i < sortOrder.size(); i++) { OrderEntry oe = sortOrder.get(i); PropertyDefinition pd = planResult.getOrderedProperty(i); boolean reverse = oe.getOrder() != OrderEntry.Order.ASCENDING; String propName = oe.getPropertyName(); propName = FieldNames.createDocValFieldName(propName); fieldsList.add(new SortField(propName, toLuceneSortType(oe, pd), reverse)); } if (fieldsList.isEmpty()) { return null; } else { return new Sort(fieldsList.toArray(new SortField[0])); } } /** * Remove all "jcr:score" entries. * * @param original the original list (is not modified) * @return the list with the entries removed */ private static List removeNativeSort(List original) { if (original == null || original.isEmpty()) { return original; } ArrayList result = new ArrayList<>(); for (OrderEntry oe : original) { if (!isNativeSort(oe)) { result.add(oe); } } return result; } /** * Identifies the default sort order used by the index (@jcr:score descending) * * @param oe order entry * @return */ private static boolean isNativeSort(OrderEntry oe) { return oe.getPropertyName().equals(NATIVE_SORT_ORDER.getPropertyName()); } private static SortField.Type toLuceneSortType(OrderEntry oe, PropertyDefinition defn) { Type t = oe.getPropertyType(); checkState(t != null, "Type cannot be null"); checkState(!t.isArray(), "Array types are not supported"); int type = getPropertyType(defn, oe.getPropertyName(), t.tag()); switch (type) { case PropertyType.LONG: case PropertyType.DATE: return SortField.Type.LONG; case PropertyType.DOUBLE: return SortField.Type.DOUBLE; default: //TODO Check about SortField.Type.STRING_VAL return SortField.Type.STRING; } } /** * Get the Lucene query for the given filter. * * @param plan index plan containing filter details * @param reader the Lucene reader * @return the Lucene query */ private static LuceneRequestFacade getLuceneRequest(IndexPlan plan, IndexAugmentorFactory augmentorFactory, IndexReader reader) { FulltextQueryTermsProvider augmentor = getIndexAgumentor(plan, augmentorFactory); List qs = new ArrayList<>(); Filter filter = plan.getFilter(); FullTextExpression ft = filter.getFullTextConstraint(); PlanResult planResult = getPlanResult(plan); LuceneIndexDefinition defn = (LuceneIndexDefinition) planResult.indexDefinition; Analyzer analyzer = defn.getAnalyzer(); if (ft == null) { // there might be no full-text constraint // when using the LowCostLuceneIndexProvider // which is used for testing } else { qs.add(getFullTextQuery(plan, ft, analyzer, augmentor)); } //Check if native function is supported PropertyRestriction pr = null; if (defn.hasFunctionDefined()) { pr = filter.getPropertyRestriction(defn.getFunctionName()); } if (pr != null) { String query = String.valueOf(pr.first.getValue(pr.first.getType())); QueryParser queryParser = new QueryParser(VERSION, "", analyzer); if (query.startsWith("mlt?")) { String mltQueryString = query.replace("mlt?", ""); if (reader != null) { List sp = new LinkedList<>(); for (IndexingRule r : defn.getDefinedRules()) { sp.addAll(r.getSimilarityProperties()); } if (sp.isEmpty()) { Query moreLikeThis = MoreLikeThisHelper.getMoreLikeThis(reader, analyzer, mltQueryString); if (moreLikeThis != null) { qs.add(moreLikeThis); } } else { Query similarityQuery = SimSearchUtils.getSimilarityQuery(sp, reader, mltQueryString); if (similarityQuery != null) { qs.add(similarityQuery); } } } } else if (query.startsWith("spellcheck?")) { String spellcheckQueryString = query.replace("spellcheck?", ""); if (reader != null) { return new LuceneRequestFacade<>(SpellcheckHelper.getSpellcheckQuery(spellcheckQueryString, reader)); } } else if (query.startsWith("suggest?")) { String suggestQueryString = query.replace("suggest?", ""); if (reader != null) { return new LuceneRequestFacade<>(SuggestHelper.getSuggestQuery(suggestQueryString)); } } else { try { qs.add(queryParser.parse(query)); } catch (ParseException e) { throw new RuntimeException(e); } } } else if (planResult.evaluateNonFullTextConstraints()) { addNonFullTextConstraints(qs, plan, reader); } if (qs.size() == 0 && plan.getSortOrder() != null) { //This case indicates that query just had order by and no //property restriction defined. In this case property //existence queries for each sort entry List orders = removeNativeSort(plan.getSortOrder()); for (int i = 0; i < orders.size(); i++) { OrderEntry oe = orders.get(i); PropertyDefinition pd = planResult.getOrderedProperty(i); PropertyRestriction orderRest = new PropertyRestriction(); orderRest.propertyName = oe.getPropertyName(); Query q = createQuery(oe.getPropertyName(), orderRest, pd); if (q != null) { qs.add(q); } } } if (qs.size() == 0) { if (reader == null) { //When called in planning mode then some queries like rep:similar //cannot create query as reader is not provided. In such case we //just return match all queries return new LuceneRequestFacade(new MatchAllDocsQuery()); } //For purely nodeType based queries all the documents would have to //be returned (if the index definition has a single rule) if (planResult.evaluateNodeTypeRestriction()) { return new LuceneRequestFacade(new MatchAllDocsQuery()); } throw new IllegalStateException("No query created for filter " + filter); } return performAdditionalWraps(qs); } /** * Perform additional wraps on the list of queries to allow, for example, the NOT CONTAINS to * play properly when sent to lucene. * * @param qs the list of queries. Cannot be null. * @return the request facade */ @NotNull public static LuceneRequestFacade performAdditionalWraps(@NotNull List qs) { if (qs.size() == 1) { Query q = qs.get(0); if (q instanceof BooleanQuery) { BooleanQuery ibq = (BooleanQuery) q; boolean onlyNotClauses = true; for (BooleanClause c : ibq.getClauses()) { if (c.getOccur() != BooleanClause.Occur.MUST_NOT) { onlyNotClauses = false; break; } } if (onlyNotClauses) { // if we have only NOT CLAUSES we have to add a match all docs (*.*) for the // query to work // This check is needed now for Older version of lucene(Implementation in LuceneIndex.java) ibq.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD); } } return new LuceneRequestFacade<>(qs.get(0)); } BooleanQuery bq = new BooleanQuery(); for (Query q : qs) { boolean unwrapped = false; if (q instanceof BooleanQuery) { unwrapped = unwrapMustNot((BooleanQuery) q, bq); } if (!unwrapped) { bq.add(q, MUST); } } return new LuceneRequestFacade<>(bq); } /** * unwraps any NOT clauses from the provided boolean query into another boolean query. * * @param input the query to be analysed for the existence of NOT clauses. Cannot be null. * @param output the query where the unwrapped NOTs will be saved into. Cannot be null. * @return true if there where at least one unwrapped NOT. false otherwise. */ private static boolean unwrapMustNot(@NotNull BooleanQuery input, @NotNull BooleanQuery output) { requireNonNull(input); requireNonNull(output); boolean unwrapped = false; for (BooleanClause bc : input.getClauses()) { if (bc.getOccur() == BooleanClause.Occur.MUST_NOT) { output.add(bc); unwrapped = true; } } if (unwrapped) { // if we have unwrapped "must not" conditions, // then we need to unwrap "must" conditions as well for (BooleanClause bc : input.getClauses()) { if (bc.getOccur() == BooleanClause.Occur.MUST) { output.add(bc); } } } return unwrapped; } private static FulltextQueryTermsProvider getIndexAgumentor(IndexPlan plan, IndexAugmentorFactory augmentorFactory) { PlanResult planResult = getPlanResult(plan); if (augmentorFactory != null) { return augmentorFactory.getFulltextQueryTermsProvider(planResult.indexingRule.getNodeTypeName()); } return null; } private static void addNonFullTextConstraints(List qs, IndexPlan plan, IndexReader reader) { Filter filter = plan.getFilter(); PlanResult planResult = getPlanResult(plan); IndexDefinition defn = planResult.indexDefinition; if (!filter.matchesAllTypes()) { addNodeTypeConstraints(planResult.indexingRule, qs, filter); } String path = getPathRestriction(plan); switch (filter.getPathRestriction()) { case ALL_CHILDREN: if (defn.evaluatePathRestrictions()) { if ("/".equals(path)) { break; } qs.add(new TermQuery(newAncestorTerm(path))); } break; case DIRECT_CHILDREN: if (defn.evaluatePathRestrictions()) { BooleanQuery bq = new BooleanQuery(); bq.add(new BooleanClause(new TermQuery(newAncestorTerm(path)), BooleanClause.Occur.MUST)); bq.add(new BooleanClause(newDepthQuery(path, planResult), BooleanClause.Occur.MUST)); qs.add(bq); } break; case EXACT: // For transformed paths, we can only add path restriction if absolute path to property can be // deduced if (planResult.isPathTransformed()) { String parentPathSegment = planResult.getParentPathSegment(); if (!CollectionUtils.toStream(PathUtils.elements(parentPathSegment)).anyMatch("*"::equals)) { qs.add(new TermQuery(newPathTerm(path + parentPathSegment))); } } else { qs.add(new TermQuery(newPathTerm(path))); } break; case PARENT: if (denotesRoot(path)) { // there's no parent of the root node // we add a path that can not possibly occur because there // is no way to say "match no documents" in Lucene qs.add(new TermQuery(new Term(FieldNames.PATH, "///"))); } else { // For transformed paths, we can only add path restriction if absolute path to property can be // deduced if (planResult.isPathTransformed()) { String parentPathSegment = planResult.getParentPathSegment(); if (!CollectionUtils.toStream(PathUtils.elements(parentPathSegment)).anyMatch("*"::equals)) { qs.add(new TermQuery(newPathTerm(getParentPath(path) + parentPathSegment))); } } else { qs.add(new TermQuery(newPathTerm(getParentPath(path)))); } } break; case NO_RESTRICTION: break; } for (PropertyRestriction pr : filter.getPropertyRestrictions()) { String name = pr.propertyName; if (QueryConstants.REP_EXCERPT.equals(name) || QueryConstants.OAK_SCORE_EXPLANATION.equals(name) || QueryConstants.REP_FACET.equals(name)) { continue; } if (QueryConstants.RESTRICTION_LOCAL_NAME.equals(name)) { if (planResult.evaluateNodeNameRestriction()) { Query q = createNodeNameQuery(pr); if (q != null) { qs.add(q); } } continue; } if (IndexConstants.INDEX_TAG_OPTION.equals(name) || IndexConstants.INDEX_NAME_OPTION.equals(name)) { continue; } if (pr.first != null && pr.first.equals(pr.last) && pr.firstIncluding && pr.lastIncluding) { String first = pr.first.getValue(STRING); first = first.replace("\\", ""); if (JCR_PATH.equals(name)) { qs.add(new TermQuery(newPathTerm(first))); continue; } else if ("*".equals(name)) { //TODO Revisit reference constraint. For performant impl //references need to be indexed in a different manner addReferenceConstraint(first, qs, reader); continue; } } PropertyDefinition pd = planResult.getPropDefn(pr); if (pd == null) { continue; } Query q = createQuery(planResult.getPropertyName(pr), pr, pd); if (q != null) { qs.add(q); } } } private static Query createLikeQuery(String name, String first) { first = QueryUtils.sqlLikeToLuceneWildcardQuery(first); int indexOfWS = first.indexOf(WildcardQuery.WILDCARD_STRING); int indexOfWC = first.indexOf(WildcardQuery.WILDCARD_CHAR); int len = first.length(); if (indexOfWS == len || indexOfWC == len) { // remove trailing "*" for prefix query first = first.substring(0, first.length() - 1); if (JCR_PATH.equals(name)) { return new PrefixQuery(newPathTerm(first)); } else { return new PrefixQuery(new Term(name, first)); } } else { if (JCR_PATH.equals(name)) { return new WildcardQuery(newPathTerm(first)); } else { return new WildcardQuery(new Term(name, first)); } } } @Nullable private static Query createQuery(String propertyName, PropertyRestriction pr, PropertyDefinition defn) { int propType = determinePropertyType(defn, pr); if (pr.isNullRestriction()) { return new TermQuery(new Term(FieldNames.NULL_PROPS, defn.name)); } //If notNullCheckEnabled explicitly enabled use the simple TermQuery //otherwise later fallback to range query if (pr.isNotNullRestriction() && defn.notNullCheckEnabled) { return new TermQuery(new Term(FieldNames.NOT_NULL_PROPS, defn.name)); } switch (propType) { case PropertyType.DATE: { Long first = pr.first != null ? FieldFactory.dateToLong(pr.first.getValue(Type.DATE)) : null; Long last = pr.last != null ? FieldFactory.dateToLong(pr.last.getValue(Type.DATE)) : null; Long not = pr.not != null ? FieldFactory.dateToLong(pr.not.getValue(Type.DATE)) : null; if (pr.first != null && pr.first.equals(pr.last) && pr.firstIncluding && pr.lastIncluding) { // [property]=[value] return NumericRangeQuery.newLongRange(propertyName, first, first, true, true); } else if (pr.first != null && pr.last != null) { return NumericRangeQuery.newLongRange(propertyName, first, last, pr.firstIncluding, pr.lastIncluding); } else if (pr.first != null && pr.last == null) { // '>' & '>=' use cases return NumericRangeQuery.newLongRange(propertyName, first, null, pr.firstIncluding, true); } else if (pr.last != null && !pr.last.equals(pr.first)) { // '<' & '<=' return NumericRangeQuery.newLongRange(propertyName, null, last, true, pr.lastIncluding); } else if (pr.list != null) { BooleanQuery in = new BooleanQuery(); for (PropertyValue value : pr.list) { Long dateVal = FieldFactory.dateToLong(value.getValue(Type.DATE)); in.add(NumericRangeQuery.newLongRange(propertyName, dateVal, dateVal, true, true), BooleanClause.Occur.SHOULD); } return in; } else if (pr.isNotNullRestriction()) { // not null. As we are indexing generic dates which can be beyond epoch. So using complete numeric range return NumericRangeQuery.newLongRange(propertyName, Long.MIN_VALUE, Long.MAX_VALUE, true, true); } else if (pr.isNot && pr.not != null) { // -[property]=[value] BooleanQuery bool = new BooleanQuery(); // This will exclude entries with [property]=[value] bool.add(NumericRangeQuery.newLongRange(propertyName, not, not, true, true), MUST_NOT); return bool; } break; } case PropertyType.DOUBLE: { Double first = pr.first != null ? pr.first.getValue(DOUBLE) : null; Double last = pr.last != null ? pr.last.getValue(DOUBLE) : null; if (pr.first != null && pr.first.equals(pr.last) && pr.firstIncluding && pr.lastIncluding) { // [property]=[value] return NumericRangeQuery.newDoubleRange(propertyName, first, first, true, true); } else if (pr.first != null && pr.last != null) { return NumericRangeQuery.newDoubleRange(propertyName, first, last, pr.firstIncluding, pr.lastIncluding); } else if (pr.first != null && pr.last == null) { // '>' & '>=' use cases return NumericRangeQuery.newDoubleRange(propertyName, first, null, pr.firstIncluding, true); } else if (pr.last != null && !pr.last.equals(pr.first)) { // '<' & '<=' return NumericRangeQuery.newDoubleRange(propertyName, null, last, true, pr.lastIncluding); } else if (pr.list != null) { BooleanQuery in = new BooleanQuery(); for (PropertyValue value : pr.list) { Double doubleVal = value.getValue(DOUBLE); in.add(NumericRangeQuery.newDoubleRange(propertyName, doubleVal, doubleVal, true, true), BooleanClause.Occur.SHOULD); } return in; } else if (pr.isNotNullRestriction()) { // not null. return NumericRangeQuery.newDoubleRange(propertyName, Double.MIN_VALUE, Double.MAX_VALUE, true, true); } else if (pr.isNot && pr.not != null) { // -[property]=[value] BooleanQuery bool = new BooleanQuery(); // This will exclude entries with [property]=[value] bool.add(NumericRangeQuery.newDoubleRange(propertyName, pr.not.getValue(DOUBLE), pr.not.getValue(DOUBLE), true, true), MUST_NOT); return bool; } break; } case PropertyType.LONG: { Long first = pr.first != null ? pr.first.getValue(LONG) : null; Long last = pr.last != null ? pr.last.getValue(LONG) : null; if (pr.first != null && pr.first.equals(pr.last) && pr.firstIncluding && pr.lastIncluding) { // [property]=[value] return NumericRangeQuery.newLongRange(propertyName, first, first, true, true); } else if (pr.first != null && pr.last != null) { return NumericRangeQuery.newLongRange(propertyName, first, last, pr.firstIncluding, pr.lastIncluding); } else if (pr.first != null && pr.last == null) { // '>' & '>=' use cases return NumericRangeQuery.newLongRange(propertyName, first, null, pr.firstIncluding, true); } else if (pr.last != null && !pr.last.equals(pr.first)) { // '<' & '<=' return NumericRangeQuery.newLongRange(propertyName, null, last, true, pr.lastIncluding); } else if (pr.list != null) { BooleanQuery in = new BooleanQuery(); for (PropertyValue value : pr.list) { Long longVal = value.getValue(LONG); in.add(NumericRangeQuery.newLongRange(propertyName, longVal, longVal, true, true), BooleanClause.Occur.SHOULD); } return in; } else if (pr.isNotNullRestriction()) { // not null. return NumericRangeQuery.newLongRange(propertyName, Long.MIN_VALUE, Long.MAX_VALUE, true, true); } else if (pr.isNot && pr.not != null) { // -[property]=[value] BooleanQuery bool = new BooleanQuery(); // This will exclude entries with [property]=[value] bool.add(NumericRangeQuery.newLongRange(propertyName, pr.not.getValue(LONG), pr.not.getValue(LONG), true, true), MUST_NOT); return bool; } break; } default: { if (pr.isLike) { return createLikeQuery(propertyName, pr.first.getValue(STRING)); } //TODO Confirm that all other types can be treated as string String first = pr.first != null ? pr.first.getValue(STRING) : null; String last = pr.last != null ? pr.last.getValue(STRING) : null; if (pr.first != null && pr.first.equals(pr.last) && pr.firstIncluding && pr.lastIncluding) { // [property]=[value] return new TermQuery(new Term(propertyName, first)); } else if (pr.first != null && pr.last != null) { return TermRangeQuery.newStringRange(propertyName, first, last, pr.firstIncluding, pr.lastIncluding); } else if (pr.first != null && pr.last == null) { // '>' & '>=' use cases return TermRangeQuery.newStringRange(propertyName, first, null, pr.firstIncluding, true); } else if (pr.last != null && !pr.last.equals(pr.first)) { // '<' & '<=' return TermRangeQuery.newStringRange(propertyName, null, last, true, pr.lastIncluding); } else if (pr.list != null) { BooleanQuery in = new BooleanQuery(); for (PropertyValue value : pr.list) { String strVal = value.getValue(STRING); in.add(new TermQuery(new Term(propertyName, strVal)), BooleanClause.Occur.SHOULD); } return in; } else if (pr.isNotNullRestriction()) { return new TermRangeQuery(propertyName, null, null, true, true); } else if (pr.isNot && pr.not != null) { // -[property]=[value] BooleanQuery bool = new BooleanQuery(); // This will exclude entries with [property]=[value] bool.add(new TermQuery(new Term(propertyName, pr.not.getValue(STRING))), MUST_NOT); return bool; } } } throw new IllegalStateException("PropertyRestriction not handled " + pr + " for index " + defn); } static long getVersion(IndexSearcher indexSearcher) { IndexReader reader = indexSearcher.getIndexReader(); if (reader instanceof DirectoryReader) { return ((DirectoryReader) reader).getVersion(); } return -1; } private static Query createNodeNameQuery(PropertyRestriction pr) { String first = pr.first != null ? pr.first.getValue(STRING) : null; if (pr.first != null && pr.first.equals(pr.last) && pr.firstIncluding && pr.lastIncluding) { // [property]=[value] return new TermQuery(new Term(FieldNames.NODE_NAME, first)); } if (pr.isLike) { return createLikeQuery(FieldNames.NODE_NAME, first); } throw new IllegalStateException("For nodeName queries only EQUALS and LIKE are supported " + pr); } private static void addReferenceConstraint(String uuid, List qs, IndexReader reader) { if (reader == null) { // getPlan call qs.add(new TermQuery(new Term("*", uuid))); return; } // reference query BooleanQuery bq = new BooleanQuery(); Collection fields = MultiFields.getIndexedFields(reader); for (String f : fields) { bq.add(new TermQuery(new Term(f, uuid)), SHOULD); } qs.add(bq); } private static void addNodeTypeConstraints(IndexingRule defn, List qs, Filter filter) { BooleanQuery bq = new BooleanQuery(); PropertyDefinition primaryType = defn.getConfig(JCR_PRIMARYTYPE); //TODO OAK-2198 Add proper nodeType query support if (primaryType != null && primaryType.propertyIndex) { for (String type : filter.getPrimaryTypes()) { bq.add(new TermQuery(new Term(JCR_PRIMARYTYPE, type)), SHOULD); } } PropertyDefinition mixinType = defn.getConfig(JCR_MIXINTYPES); if (mixinType != null && mixinType.propertyIndex) { for (String type : filter.getMixinTypes()) { bq.add(new TermQuery(new Term(JCR_MIXINTYPES, type)), SHOULD); } } if (bq.clauses().size() != 0) { qs.add(bq); } } static Query getFullTextQuery(final IndexPlan plan, FullTextExpression ft, final Analyzer analyzer, final FulltextQueryTermsProvider augmentor) { final PlanResult pr = getPlanResult(plan); // a reference to the query, so it can be set in the visitor // (a "non-local return") final AtomicReference result = new AtomicReference<>(); ft.accept(new FullTextVisitor() { @Override public boolean visit(FullTextContains contains) { visitTerm(contains.getPropertyName(), contains.getRawText(), null, contains.isNot()); return true; } @Override public boolean visit(FullTextOr or) { BooleanQuery q = new BooleanQuery(); for (FullTextExpression e : or.list) { Query x = getFullTextQuery(plan, e, analyzer, augmentor); q.add(x, SHOULD); } result.set(q); return true; } @Override public boolean visit(FullTextAnd and) { BooleanQuery q = new BooleanQuery(); for (FullTextExpression e : and.list) { Query x = getFullTextQuery(plan, e, analyzer, augmentor); /* Only unwrap the clause if MUST_NOT(x) */ boolean hasMustNot = false; if (x instanceof BooleanQuery) { BooleanQuery bq = (BooleanQuery) x; if ((bq.getClauses().length == 1) && (bq.getClauses()[0].getOccur() == BooleanClause.Occur.MUST_NOT)) { hasMustNot = true; q.add(bq.getClauses()[0]); } } if (!hasMustNot) { q.add(x, MUST); } } result.set(q); return true; } @Override public boolean visit(FullTextTerm term) { return visitTerm(term.getPropertyName(), term.getText(), term.getBoost(), term.isNot()); } private boolean visitTerm(String propertyName, String text, String boost, boolean not) { String p = getLuceneFieldName(propertyName, pr); Query q = tokenToQuery(text, p, pr, analyzer, augmentor); if (q == null) { return false; } if (boost != null) { q.setBoost(Float.parseFloat(boost)); } if (not) { BooleanQuery bq = new BooleanQuery(); bq.add(q, MUST_NOT); result.set(bq); } else { result.set(q); } return true; } }); return result.get(); } static String getLuceneFieldName(@Nullable String p, PlanResult pr) { if (p == null) { return FieldNames.FULLTEXT; } if (isNodePath(p)) { if (pr.isPathTransformed()) { p = PathUtils.getName(p); } else { //Get rid of /* as aggregated fulltext field name is the //node relative path p = FieldNames.createFulltextFieldName(PathUtils.getParentPath(p)); } } else { if (pr.isPathTransformed()) { p = PathUtils.getName(p); } p = FieldNames.createAnalyzedFieldName(p); } if ("*".equals(p)) { p = FieldNames.FULLTEXT; } return p; } private static Query tokenToQuery(String text, String fieldName, PlanResult pr, Analyzer analyzer, FulltextQueryTermsProvider augmentor) { Query ret; IndexingRule indexingRule = pr.indexingRule; //Expand the query on fulltext field if (FieldNames.FULLTEXT.equals(fieldName) && !indexingRule.getNodeScopeAnalyzedProps().isEmpty()) { BooleanQuery in = new BooleanQuery(); for (PropertyDefinition pd : indexingRule.getNodeScopeAnalyzedProps()) { Query q = tokenToQuery(text, FieldNames.createAnalyzedFieldName(pd.name), analyzer); q.setBoost(pd.boost); in.add(q, BooleanClause.Occur.SHOULD); } //Add the query for actual fulltext field also. That query would //not be boosted in.add(tokenToQuery(text, fieldName, analyzer), BooleanClause.Occur.SHOULD); ret = in; } else { ret = tokenToQuery(text, fieldName, analyzer); } //Augment query terms if available (as a 'SHOULD' clause) if (FieldNames.FULLTEXT.equals(fieldName)) { Query subQuery = new BooleanQuery(); if (pr.indexDefinition.isDynamicBoostLiteEnabled()) { subQuery = tokenToQuery(text, FieldNames.SIMILARITY_TAGS, analyzer); // De-boosting dynamic boost based query so other clauses will have more relevance subQuery.setBoost(DYNAMIC_BOOST_WEIGHT); } else if (augmentor != null) { subQuery = augmentor.getQueryTerm(text, analyzer, pr.indexDefinition.getDefinitionNodeState()); } if (subQuery != null) { BooleanQuery query = new BooleanQuery(); query.add(ret, BooleanClause.Occur.SHOULD); query.add(subQuery, BooleanClause.Occur.SHOULD); ret = query; } } return ret; } static Query tokenToQuery(String text, String fieldName, Analyzer analyzer) { if (analyzer == null) { return null; } StandardQueryParser parserHelper = new StandardQueryParser(analyzer); parserHelper.setAllowLeadingWildcard(true); parserHelper.setDefaultOperator(StandardQueryConfigHandler.Operator.AND); text = rewriteQueryText(text); try { return parserHelper.parse(text, fieldName); } catch (QueryNodeException e) { throw new RuntimeException(e); } } private static Query newDepthQuery(String path, PlanResult planResult) { int depth = PathUtils.getDepth(path) + planResult.getParentDepth() + 1; return NumericRangeQuery.newIntRange(FieldNames.PATH_DEPTH, depth, depth, true, true); } @SuppressWarnings("Guava") private static Iterator mergePropertyIndexResult(IndexPlan plan, NodeState rootState, Iterator itr) { PlanResult pr = getPlanResult(plan); HybridPropertyIndexLookup lookup = new HybridPropertyIndexLookup(pr.indexPath, NodeStateUtils.getNode(rootState, pr.indexPath), plan.getPathPrefix(), false); PropertyIndexResult pir = pr.getPropertyIndexResult(); FluentIterable paths; if (pir != null) { Iterable queryResult = lookup.query(plan.getFilter(), pir.propertyName, pir.pr); paths = FluentIterable.from(queryResult) .transform(path -> pr.isPathTransformed() ? pr.transformPath(path) : path) .filter(x -> x != null); } else { checkState(pr.evaluateSyncNodeTypeRestriction()); //Either of property or nodetype should not be null Filter filter = plan.getFilter(); paths = FluentIterable.from(Iterables.concat( lookup.query(filter, JCR_PRIMARYTYPE, newName(filter.getPrimaryTypes())), lookup.query(filter, JCR_MIXINTYPES, newName(filter.getMixinTypes())))); } //No need for path restriction evaluation as thats taken care by PropertyIndex impl itself //via content mirror strategy FluentIterable propIndex = paths .transform(path -> new FulltextResultRow(path, 0, null, null, null)); //Property index itr should come first return Iterators.concat(propIndex.iterator(), itr); } class DelayedLuceneFacetProvider implements FacetProvider { private final LucenePropertyIndex index; private final Query query; private final IndexPlan plan; private final SecureFacetConfiguration config; private final Map> cachedResults = new HashMap<>(); DelayedLuceneFacetProvider(LucenePropertyIndex index, Query query, IndexPlan plan, SecureFacetConfiguration config) { this.index = index; this.query = query; this.plan = plan; this.config = config; } @Override public List getFacets(int numberOfFacets, String columnName) throws IOException { if (!CACHE_FACET_RESULTS) { LOG.trace("{} = {} getting uncached results for columnName = {}", CACHE_FACET_RESULTS_NAME, CACHE_FACET_RESULTS, columnName); return getFacetsUncached(numberOfFacets, columnName); } String cacheKey = columnName + "/" + numberOfFacets; if (cachedResults.containsKey(cacheKey)) { LOG.trace("columnName = {} returning Facet Data from cache.", columnName); return cachedResults.get(cacheKey); } LOG.trace("columnName = {} facet Data not present in cache...", columnName); if (EAGER_FACET_CACHE_FILL) { fillFacetCache(numberOfFacets); if (cachedResults.containsKey(cacheKey)) { LOG.trace("columnName = {} now found", cacheKey); return cachedResults.get(cacheKey); } LOG.warn("Facet data for {} not found: read using query", cacheKey); } List result = getFacetsUncached(numberOfFacets, columnName); cachedResults.put(cacheKey, result); return result; } private List fillFacetCache(int numberOfFacets) throws IOException { List result = null; LuceneIndexNode indexNode = index.acquireIndexNode(plan); try { IndexSearcher searcher = indexNode.getSearcher(); Facets facets = FacetHelper.getFacets(searcher, query, plan, config); if (facets != null) { List allColumnNames = FacetHelper.getFacetColumnNamesFromPlan(plan); for (String column : allColumnNames) { result = getFacetsUncached(facets, numberOfFacets, column); String cc = column + "/" + numberOfFacets; cachedResults.put(cc, result); } } } finally { indexNode.release(); } return result; } private List getFacetsUncached(int numberOfFacets, String columnName) throws IOException { LuceneIndexNode indexNode = index.acquireIndexNode(plan); try { IndexSearcher searcher = indexNode.getSearcher(); String facetFieldName = FulltextIndex.parseFacetField(columnName); Facets facets = FacetHelper.getFacets(searcher, query, plan, config); if (facets != null) { try { ImmutableList.Builder res = new ImmutableList.Builder<>(); FacetResult topChildren = facets.getTopChildren(numberOfFacets, facetFieldName); if (topChildren != null) { for (LabelAndValue lav : topChildren.labelValues) { res.add(new Facet( lav.label, lav.value.intValue() )); } return res.build(); } } catch (IllegalArgumentException iae) { LOG.debug(iae.getMessage(), iae); LOG.warn("facets for {} not yet indexed: " + iae, facetFieldName); } } return null; } finally { indexNode.release(); } } private List getFacetsUncached(Facets facets, int numberOfFacets, String columnName) throws IOException { String facetFieldName = FulltextIndex.parseFacetField(columnName); try { ImmutableList.Builder res = new ImmutableList.Builder<>(); FacetResult topChildren = facets.getTopChildren(numberOfFacets, facetFieldName); if (topChildren == null) { return null; } for (LabelAndValue lav : topChildren.labelValues) { res.add(new Facet( lav.label, lav.value.intValue() )); } return res.build(); } catch (IllegalArgumentException iae) { LOG.debug(iae.getMessage(), iae); LOG.warn("facets for {} not yet indexed: {}", facetFieldName, iae); return null; } } } static class LuceneFacetProvider implements FacetProvider { private final Facets facets; LuceneFacetProvider(Facets facets) { this.facets = facets; } @Override public List getFacets(int numberOfFacets, String columnName) throws IOException { String facetFieldName = FulltextIndex.parseFacetField(columnName); if (facets != null) { try { ImmutableList.Builder res = new ImmutableList.Builder<>(); FacetResult topChildren = facets.getTopChildren(numberOfFacets, facetFieldName); if (topChildren != null) { for (LabelAndValue lav : topChildren.labelValues) { res.add(new Facet( lav.label, lav.value.intValue() )); } return res.build(); } } catch (IllegalArgumentException iae) { LOG.debug(iae.getMessage(), iae); LOG.warn("facets for {} not yet indexed: " + iae, facetFieldName); } } return null; } } /** * A index node implementation that acquires the underlying index only if * actually needed. This is to avoid downloading the index for the planning * phase, if there is no chance that the index is actually used. */ static class LazyLuceneIndexNode implements LuceneIndexNode { private AtomicBoolean released = new AtomicBoolean(); private IndexTracker tracker; private String indexPath; private volatile LuceneIndexNode indexNode; LazyLuceneIndexNode(IndexTracker tracker, String indexPath) { this.tracker = tracker; this.indexPath = indexPath; } @Override public void release() { if (released.getAndSet(true)) { // already released return; } if (indexNode != null) { indexNode.release(); } // to ensure it is not used after releasing indexNode = null; tracker = null; indexPath = null; } private void checkNotReleased() { if (released.get()) { throw new IllegalStateException("Already released"); } } @Override public LuceneIndexDefinition getDefinition() { checkNotReleased(); return tracker.getIndexDefinition(indexPath); } private LuceneIndexNode getIndexNode() { LuceneIndexNode n = findIndexNode(); if (n == null) { String message = "No index node, corrupt index? " + indexPath; LOG.warn(message); throw new IllegalStateException(message); } return n; } @Nullable private LuceneIndexNode findIndexNode() { checkNotReleased(); LuceneIndexNode n = indexNode; // double checked locking implemented in the correct way for Java 5 // and newer (actually I don't think this is ever called // concurrently right now, but better be save) if (n == null) { synchronized (this) { n = indexNode; if (n == null) { n = indexNode = tracker.acquireIndexNode(indexPath); } } } return n; } @Override public int getIndexNodeId() { return getIndexNode().getIndexNodeId(); } @Nullable @Override public LuceneIndexStatistics getIndexStatistics() { LuceneIndexNode n = findIndexNode(); if (n == null) { return null; } return n.getIndexStatistics(); } @Override public IndexSearcher getSearcher() { return getIndexNode().getSearcher(); } @Override public List getPrimaryReaders() { return getIndexNode().getPrimaryReaders(); } @Override public @Nullable Directory getSuggestDirectory() { return getIndexNode().getSuggestDirectory(); } @Override public List getNRTReaders() { return getIndexNode().getNRTReaders(); } @Override public @Nullable AnalyzingInfixSuggester getLookup() { return getIndexNode().getLookup(); } @Override public @Nullable LuceneIndexWriter getLocalWriter() throws IOException { return getIndexNode().getLocalWriter(); } @Override public void refreshReadersOnWriteIfRequired() { getIndexNode().refreshReadersOnWriteIfRequired(); } } static abstract class LuceneResultRowIterator extends AbstractIterator implements IteratorRewoundStateProvider { } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy