org.elasticsearch.search.fetch.FetchPhase Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of elasticsearch Show documentation
Show all versions of elasticsearch Show documentation
Elasticsearch subproject :distribution:archives:integ-test-zip
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/
package org.elasticsearch.search.fetch;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.search.TotalHits;
import org.elasticsearch.common.CheckedBiConsumer;
import org.elasticsearch.common.document.DocumentField;
import org.elasticsearch.common.lucene.index.SequentialStoredFieldsLeafReader;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.common.xcontent.XContentHelper;
import org.elasticsearch.common.xcontent.support.XContentMapValues;
import org.elasticsearch.core.Nullable;
import org.elasticsearch.core.Tuple;
import org.elasticsearch.index.fieldvisitor.CustomFieldsVisitor;
import org.elasticsearch.index.fieldvisitor.FieldsVisitor;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.SourceFieldMapper;
import org.elasticsearch.index.mapper.Uid;
import org.elasticsearch.index.query.SearchExecutionContext;
import org.elasticsearch.search.LeafNestedDocuments;
import org.elasticsearch.search.NestedDocuments;
import org.elasticsearch.search.SearchContextSourcePrinter;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
import org.elasticsearch.search.SearchShardTarget;
import org.elasticsearch.search.fetch.FetchSubPhase.HitContext;
import org.elasticsearch.search.fetch.subphase.FetchSourceContext;
import org.elasticsearch.search.fetch.subphase.InnerHitsContext;
import org.elasticsearch.search.fetch.subphase.InnerHitsPhase;
import org.elasticsearch.search.internal.SearchContext;
import org.elasticsearch.search.lookup.SourceLookup;
import org.elasticsearch.search.profile.ProfileResult;
import org.elasticsearch.tasks.TaskCancelledException;
import org.elasticsearch.xcontent.XContentType;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.function.Function;
import static java.util.Collections.emptyMap;
/**
* Fetch phase of a search request, used to fetch the actual top matching documents to be returned to the client, identified
* after reducing all of the matches returned by the query phase
*/
public class FetchPhase {
private static final Logger LOGGER = LogManager.getLogger(FetchPhase.class);
private final FetchSubPhase[] fetchSubPhases;
public FetchPhase(List fetchSubPhases) {
this.fetchSubPhases = fetchSubPhases.toArray(new FetchSubPhase[fetchSubPhases.size() + 1]);
this.fetchSubPhases[fetchSubPhases.size()] = new InnerHitsPhase(this);
}
public void execute(SearchContext context) {
if (LOGGER.isTraceEnabled()) {
LOGGER.trace("{}", new SearchContextSourcePrinter(context));
}
if (context.isCancelled()) {
throw new TaskCancelledException("cancelled");
}
if (context.docIdsToLoadSize() == 0) {
// no individual hits to process, so we shortcut
SearchHits hits = new SearchHits(new SearchHit[0], context.queryResult().getTotalHits(), context.queryResult().getMaxScore());
context.fetchResult().shardResult(hits, null);
return;
}
Profiler profiler = context.getProfilers() == null ? Profiler.NOOP : context.getProfilers().startProfilingFetchPhase();
SearchHits hits = null;
try {
hits = buildSearchHits(context, profiler);
} finally {
// Always finish profiling
ProfileResult profileResult = profiler.finish();
// Only set the shardResults if building search hits was successful
if (hits != null) {
context.fetchResult().shardResult(hits, profileResult);
}
}
}
private SearchHits buildSearchHits(SearchContext context, Profiler profiler) {
DocIdToIndex[] docs = new DocIdToIndex[context.docIdsToLoadSize()];
for (int index = 0; index < context.docIdsToLoadSize(); index++) {
docs[index] = new DocIdToIndex(context.docIdsToLoad()[index], index);
}
// make sure that we iterate in doc id order
Arrays.sort(docs);
Map> storedToRequestedFields = new HashMap<>();
FieldsVisitor fieldsVisitor = createStoredFieldsVisitor(context, storedToRequestedFields);
profiler.visitor(fieldsVisitor);
FetchContext fetchContext = new FetchContext(context);
SearchHit[] hits = new SearchHit[context.docIdsToLoadSize()];
List processors = getProcessors(context.shardTarget(), fetchContext, profiler);
NestedDocuments nestedDocuments = context.getSearchExecutionContext().getNestedDocuments();
int currentReaderIndex = -1;
LeafReaderContext currentReaderContext = null;
LeafNestedDocuments leafNestedDocuments = null;
CheckedBiConsumer fieldReader = null;
boolean hasSequentialDocs = hasSequentialDocs(docs);
for (int index = 0; index < context.docIdsToLoadSize(); index++) {
if (context.isCancelled()) {
throw new TaskCancelledException("cancelled");
}
int docId = docs[index].docId;
try {
int readerIndex = ReaderUtil.subIndex(docId, context.searcher().getIndexReader().leaves());
if (currentReaderIndex != readerIndex) {
profiler.startNextReader();
try {
currentReaderContext = context.searcher().getIndexReader().leaves().get(readerIndex);
currentReaderIndex = readerIndex;
if (currentReaderContext.reader() instanceof SequentialStoredFieldsLeafReader
&& hasSequentialDocs
&& docs.length >= 10) {
// All the docs to fetch are adjacent but Lucene stored fields are optimized
// for random access and don't optimize for sequential access - except for merging.
// So we do a little hack here and pretend we're going to do merges in order to
// get better sequential access.
SequentialStoredFieldsLeafReader lf = (SequentialStoredFieldsLeafReader) currentReaderContext.reader();
fieldReader = lf.getSequentialStoredFieldsReader()::visitDocument;
} else {
fieldReader = currentReaderContext.reader()::document;
}
for (FetchSubPhaseProcessor processor : processors) {
processor.setNextReader(currentReaderContext);
}
leafNestedDocuments = nestedDocuments.getLeafNestedDocuments(currentReaderContext);
} finally {
profiler.stopNextReader();
}
}
assert currentReaderContext != null;
HitContext hit = prepareHitContext(
context,
profiler,
leafNestedDocuments,
fieldsVisitor,
docId,
storedToRequestedFields,
currentReaderContext,
fieldReader
);
for (FetchSubPhaseProcessor processor : processors) {
processor.process(hit);
}
hits[docs[index].index] = hit.hit();
} catch (Exception e) {
throw new FetchPhaseExecutionException(context.shardTarget(), "Error running fetch phase for doc [" + docId + "]", e);
}
}
if (context.isCancelled()) {
throw new TaskCancelledException("cancelled");
}
TotalHits totalHits = context.queryResult().getTotalHits();
return new SearchHits(hits, totalHits, context.queryResult().getMaxScore());
}
List getProcessors(SearchShardTarget target, FetchContext context, Profiler profiler) {
try {
List processors = new ArrayList<>();
for (FetchSubPhase fsp : fetchSubPhases) {
FetchSubPhaseProcessor processor = fsp.getProcessor(context);
if (processor != null) {
processors.add(profiler.profile(fsp.getClass().getSimpleName(), "", processor));
}
}
return processors;
} catch (Exception e) {
throw new FetchPhaseExecutionException(target, "Error building fetch sub-phases", e);
}
}
static class DocIdToIndex implements Comparable {
final int docId;
final int index;
DocIdToIndex(int docId, int index) {
this.docId = docId;
this.index = index;
}
@Override
public int compareTo(DocIdToIndex o) {
return Integer.compare(docId, o.docId);
}
}
private FieldsVisitor createStoredFieldsVisitor(SearchContext context, Map> storedToRequestedFields) {
StoredFieldsContext storedFieldsContext = context.storedFieldsContext();
if (storedFieldsContext == null) {
// no fields specified, default to return source if no explicit indication
if (context.hasScriptFields() == false && context.hasFetchSourceContext() == false) {
context.fetchSourceContext(new FetchSourceContext(true));
}
boolean loadSource = sourceRequired(context);
return new FieldsVisitor(loadSource);
} else if (storedFieldsContext.fetchFields() == false) {
// disable stored fields entirely
return null;
} else {
for (String fieldNameOrPattern : context.storedFieldsContext().fieldNames()) {
if (fieldNameOrPattern.equals(SourceFieldMapper.NAME)) {
FetchSourceContext fetchSourceContext = context.hasFetchSourceContext()
? context.fetchSourceContext()
: FetchSourceContext.FETCH_SOURCE;
context.fetchSourceContext(new FetchSourceContext(true, fetchSourceContext.includes(), fetchSourceContext.excludes()));
continue;
}
SearchExecutionContext searchExecutionContext = context.getSearchExecutionContext();
Collection fieldNames = searchExecutionContext.getMatchingFieldNames(fieldNameOrPattern);
for (String fieldName : fieldNames) {
MappedFieldType fieldType = searchExecutionContext.getFieldType(fieldName);
String storedField = fieldType.name();
Set requestedFields = storedToRequestedFields.computeIfAbsent(storedField, key -> new HashSet<>());
requestedFields.add(fieldName);
}
}
boolean loadSource = sourceRequired(context);
if (storedToRequestedFields.isEmpty()) {
// empty list specified, default to disable _source if no explicit indication
return new FieldsVisitor(loadSource);
} else {
return new CustomFieldsVisitor(storedToRequestedFields.keySet(), loadSource);
}
}
}
private boolean sourceRequired(SearchContext context) {
return context.sourceRequested() || context.fetchFieldsContext() != null;
}
private HitContext prepareHitContext(
SearchContext context,
Profiler profiler,
LeafNestedDocuments nestedDocuments,
FieldsVisitor fieldsVisitor,
int docId,
Map> storedToRequestedFields,
LeafReaderContext subReaderContext,
CheckedBiConsumer storedFieldReader
) throws IOException {
if (nestedDocuments.advance(docId - subReaderContext.docBase) == null) {
return prepareNonNestedHitContext(
context,
profiler,
fieldsVisitor,
docId,
storedToRequestedFields,
subReaderContext,
storedFieldReader
);
} else {
return prepareNestedHitContext(
context,
profiler,
docId,
nestedDocuments,
storedToRequestedFields,
subReaderContext,
storedFieldReader
);
}
}
/**
* Resets the provided {@link HitContext} with information on the current
* document. This includes the following:
* - Adding an initial {@link SearchHit} instance.
* - Loading the document source and setting it on {@link HitContext#sourceLookup()}. This
* allows fetch subphases that use the hit context to access the preloaded source.
*/
private HitContext prepareNonNestedHitContext(
SearchContext context,
Profiler profiler,
FieldsVisitor fieldsVisitor,
int docId,
Map> storedToRequestedFields,
LeafReaderContext subReaderContext,
CheckedBiConsumer fieldReader
) throws IOException {
int subDocId = docId - subReaderContext.docBase;
SearchExecutionContext searchExecutionContext = context.getSearchExecutionContext();
if (fieldsVisitor == null) {
SearchHit hit = new SearchHit(docId, null, new Text(searchExecutionContext.getType()), null, null);
return new HitContext(hit, subReaderContext, subDocId);
} else {
SearchHit hit;
loadStoredFields(
context.getSearchExecutionContext()::getFieldType,
profiler,
searchExecutionContext.getType(),
fieldReader,
fieldsVisitor,
subDocId
);
Uid uid = fieldsVisitor.uid();
if (fieldsVisitor.fields().isEmpty() == false) {
Map docFields = new HashMap<>();
Map metaFields = new HashMap<>();
fillDocAndMetaFields(context, fieldsVisitor, storedToRequestedFields, docFields, metaFields);
hit = new SearchHit(docId, uid.id(), new Text(searchExecutionContext.getType()), docFields, metaFields);
} else {
hit = new SearchHit(docId, uid.id(), new Text(searchExecutionContext.getType()), emptyMap(), emptyMap());
}
HitContext hitContext = new HitContext(hit, subReaderContext, subDocId);
if (fieldsVisitor.source() != null) {
// Store the loaded source on the hit context so that fetch subphases can access it.
// Also make it available to scripts by storing it on the shared SearchLookup instance.
hitContext.sourceLookup().setSource(fieldsVisitor.source());
SourceLookup scriptSourceLookup = context.getSearchExecutionContext().lookup().source();
scriptSourceLookup.setSegmentAndDocument(subReaderContext, subDocId);
scriptSourceLookup.setSource(fieldsVisitor.source());
}
return hitContext;
}
}
/**
* Resets the provided {@link HitContext} with information on the current
* nested document. This includes the following:
* - Adding an initial {@link SearchHit} instance.
* - Loading the document source, filtering it based on the nested document ID, then
* setting it on {@link HitContext#sourceLookup()}. This allows fetch subphases that
* use the hit context to access the preloaded source.
*/
@SuppressWarnings("unchecked")
private HitContext prepareNestedHitContext(
SearchContext context,
Profiler profiler,
int topDocId,
LeafNestedDocuments nestedInfo,
Map> storedToRequestedFields,
LeafReaderContext subReaderContext,
CheckedBiConsumer storedFieldReader
) throws IOException {
// Also if highlighting is requested on nested documents we need to fetch the _source from the root document,
// otherwise highlighting will attempt to fetch the _source from the nested doc, which will fail,
// because the entire _source is only stored with the root document.
boolean needSource = sourceRequired(context) || context.highlight() != null;
Uid rootId;
Map rootSourceAsMap = null;
XContentType rootSourceContentType = null;
SearchExecutionContext searchExecutionContext = context.getSearchExecutionContext();
if (context instanceof InnerHitsContext.InnerHitSubContext) {
InnerHitsContext.InnerHitSubContext innerHitsContext = (InnerHitsContext.InnerHitSubContext) context;
rootId = innerHitsContext.getRootId();
if (needSource) {
SourceLookup rootLookup = innerHitsContext.getRootLookup();
rootSourceAsMap = rootLookup.source();
rootSourceContentType = rootLookup.sourceContentType();
}
} else {
FieldsVisitor rootFieldsVisitor = new FieldsVisitor(needSource);
loadStoredFields(
searchExecutionContext::getFieldType,
profiler,
searchExecutionContext.getType(),
storedFieldReader,
rootFieldsVisitor,
nestedInfo.rootDoc()
);
rootFieldsVisitor.postProcess(searchExecutionContext::getFieldType, searchExecutionContext.getType());
rootId = rootFieldsVisitor.uid();
if (needSource) {
if (rootFieldsVisitor.source() != null) {
Tuple> tuple = XContentHelper.convertToMap(rootFieldsVisitor.source(), false);
rootSourceAsMap = tuple.v2();
rootSourceContentType = tuple.v1();
} else {
rootSourceAsMap = Collections.emptyMap();
}
}
}
Map docFields = emptyMap();
Map metaFields = emptyMap();
if (context.hasStoredFields() && context.storedFieldsContext().fieldNames().isEmpty() == false) {
FieldsVisitor nestedFieldsVisitor = new CustomFieldsVisitor(storedToRequestedFields.keySet(), false);
loadStoredFields(
searchExecutionContext::getFieldType,
profiler,
searchExecutionContext.getType(),
storedFieldReader,
nestedFieldsVisitor,
nestedInfo.doc()
);
if (nestedFieldsVisitor.fields().isEmpty() == false) {
docFields = new HashMap<>();
metaFields = new HashMap<>();
fillDocAndMetaFields(context, nestedFieldsVisitor, storedToRequestedFields, docFields, metaFields);
}
}
SearchHit.NestedIdentity nestedIdentity = nestedInfo.nestedIdentity();
SearchHit hit = new SearchHit(
topDocId,
rootId.id(),
new Text(searchExecutionContext.getType()),
nestedIdentity,
docFields,
metaFields
);
HitContext hitContext = new HitContext(hit, subReaderContext, nestedInfo.doc());
if (rootSourceAsMap != null && rootSourceAsMap.isEmpty() == false) {
// Isolate the nested json array object that matches with nested hit and wrap it back into the same json
// structure with the nested json array object being the actual content. The latter is important, so that
// features like source filtering and highlighting work consistent regardless of whether the field points
// to a json object array for consistency reasons on how we refer to fields
Map nestedSourceAsMap = new HashMap<>();
Map current = nestedSourceAsMap;
for (SearchHit.NestedIdentity nested = nestedIdentity; nested != null; nested = nested.getChild()) {
String nestedPath = nested.getField().string();
current.put(nestedPath, new HashMap<>());
List