org.elasticsearch.percolator.PercolatorService Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of elasticsearch Show documentation
Show all versions of elasticsearch Show documentation
Elasticsearch subproject :server
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.percolator;
import com.carrotsearch.hppc.IntObjectHashMap;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.index.memory.ExtendedMemoryIndex;
import org.apache.lucene.index.memory.MemoryIndex;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CloseableThreadLocal;
import org.elasticsearch.ElasticsearchParseException;
import org.elasticsearch.action.percolate.PercolateResponse;
import org.elasticsearch.action.percolate.PercolateShardRequest;
import org.elasticsearch.action.percolate.PercolateShardResponse;
import org.elasticsearch.cache.recycler.PageCacheRecycler;
import org.elasticsearch.cluster.ClusterService;
import org.elasticsearch.cluster.action.index.MappingUpdatedAction;
import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver;
import org.elasticsearch.common.HasContextAndHeaders;
import org.elasticsearch.common.ParseFieldMatcher;
import org.elasticsearch.common.bytes.BytesArray;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.component.AbstractComponent;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.io.stream.BytesStreamOutput;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.lucene.search.Queries;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.common.unit.ByteSizeUnit;
import org.elasticsearch.common.unit.ByteSizeValue;
import org.elasticsearch.common.util.BigArrays;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.IndexService;
import org.elasticsearch.index.engine.Engine;
import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.index.fielddata.SortedBinaryDocValues;
import org.elasticsearch.index.mapper.*;
import org.elasticsearch.index.mapper.DocumentMapperForType;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.index.mapper.Mapping;
import org.elasticsearch.index.mapper.ParsedDocument;
import org.elasticsearch.index.mapper.Uid;
import org.elasticsearch.index.mapper.internal.UidFieldMapper;
import org.elasticsearch.index.percolator.stats.ShardPercolateService;
import org.elasticsearch.index.query.ParsedQuery;
import org.elasticsearch.index.shard.IndexShard;
import org.elasticsearch.indices.IndicesService;
import org.elasticsearch.percolator.QueryCollector.Count;
import org.elasticsearch.percolator.QueryCollector.Match;
import org.elasticsearch.percolator.QueryCollector.MatchAndScore;
import org.elasticsearch.percolator.QueryCollector.MatchAndSort;
import org.elasticsearch.script.ScriptService;
import org.elasticsearch.search.SearchParseElement;
import org.elasticsearch.search.SearchShardTarget;
import org.elasticsearch.search.aggregations.AggregationPhase;
import org.elasticsearch.search.aggregations.InternalAggregation;
import org.elasticsearch.search.aggregations.InternalAggregation.ReduceContext;
import org.elasticsearch.search.aggregations.InternalAggregations;
import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
import org.elasticsearch.search.aggregations.pipeline.SiblingPipelineAggregator;
import org.elasticsearch.search.highlight.HighlightField;
import org.elasticsearch.search.highlight.HighlightPhase;
import org.elasticsearch.search.internal.SearchContext;
import org.elasticsearch.search.sort.SortParseElement;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import static org.elasticsearch.common.util.CollectionUtils.eagerTransform;
import static org.elasticsearch.index.mapper.SourceToParse.source;
import static org.elasticsearch.percolator.QueryCollector.count;
import static org.elasticsearch.percolator.QueryCollector.match;
import static org.elasticsearch.percolator.QueryCollector.matchAndScore;
public class PercolatorService extends AbstractComponent {
public final static float NO_SCORE = Float.NEGATIVE_INFINITY;
public final static String TYPE_NAME = ".percolator";
private final IndexNameExpressionResolver indexNameExpressionResolver;
private final IndicesService indicesService;
private final IntObjectHashMap percolatorTypes;
private final PageCacheRecycler pageCacheRecycler;
private final BigArrays bigArrays;
private final ClusterService clusterService;
private final PercolatorIndex single;
private final PercolatorIndex multi;
private final HighlightPhase highlightPhase;
private final AggregationPhase aggregationPhase;
private final SortParseElement sortParseElement;
private final ScriptService scriptService;
private final MappingUpdatedAction mappingUpdatedAction;
private final CloseableThreadLocal cache;
private final ParseFieldMatcher parseFieldMatcher;
@Inject
public PercolatorService(Settings settings, IndexNameExpressionResolver indexNameExpressionResolver, IndicesService indicesService,
PageCacheRecycler pageCacheRecycler, BigArrays bigArrays,
HighlightPhase highlightPhase, ClusterService clusterService,
AggregationPhase aggregationPhase, ScriptService scriptService,
MappingUpdatedAction mappingUpdatedAction) {
super(settings);
this.indexNameExpressionResolver = indexNameExpressionResolver;
this.parseFieldMatcher = new ParseFieldMatcher(settings);
this.indicesService = indicesService;
this.pageCacheRecycler = pageCacheRecycler;
this.bigArrays = bigArrays;
this.clusterService = clusterService;
this.highlightPhase = highlightPhase;
this.aggregationPhase = aggregationPhase;
this.scriptService = scriptService;
this.mappingUpdatedAction = mappingUpdatedAction;
this.sortParseElement = new SortParseElement();
final long maxReuseBytes = settings.getAsBytesSize("indices.memory.memory_index.size_per_thread", new ByteSizeValue(1, ByteSizeUnit.MB)).bytes();
cache = new CloseableThreadLocal() {
@Override
protected MemoryIndex initialValue() {
// TODO: should we expose payloads as an option? should offsets be turned on always?
return new ExtendedMemoryIndex(true, false, maxReuseBytes);
}
};
single = new SingleDocumentPercolatorIndex(cache);
multi = new MultiDocumentPercolatorIndex(cache);
percolatorTypes = new IntObjectHashMap<>(6);
percolatorTypes.put(countPercolator.id(), countPercolator);
percolatorTypes.put(queryCountPercolator.id(), queryCountPercolator);
percolatorTypes.put(matchPercolator.id(), matchPercolator);
percolatorTypes.put(queryPercolator.id(), queryPercolator);
percolatorTypes.put(scoringPercolator.id(), scoringPercolator);
percolatorTypes.put(topMatchingPercolator.id(), topMatchingPercolator);
}
public ReduceResult reduce(byte percolatorTypeId, List shardResults, HasContextAndHeaders headersContext) {
PercolatorType percolatorType = percolatorTypes.get(percolatorTypeId);
return percolatorType.reduce(shardResults, headersContext);
}
public PercolateShardResponse percolate(PercolateShardRequest request) {
IndexService percolateIndexService = indicesService.indexServiceSafe(request.shardId().getIndex());
IndexShard indexShard = percolateIndexService.shardSafe(request.shardId().id());
indexShard.readAllowed(); // check if we can read the shard...
ShardPercolateService shardPercolateService = indexShard.shardPercolateService();
shardPercolateService.prePercolate();
long startTime = System.nanoTime();
// TODO: The filteringAliases should be looked up at the coordinating node and serialized with all shard request,
// just like is done in other apis.
String[] filteringAliases = indexNameExpressionResolver.filteringAliases(
clusterService.state(),
indexShard.shardId().index().name(),
request.indices()
);
Query aliasFilter = percolateIndexService.aliasesService().aliasFilter(filteringAliases);
SearchShardTarget searchShardTarget = new SearchShardTarget(clusterService.localNode().id(), request.shardId().getIndex(), request.shardId().id());
final PercolateContext context = new PercolateContext(
request, searchShardTarget, indexShard, percolateIndexService, pageCacheRecycler, bigArrays, scriptService, aliasFilter, parseFieldMatcher
);
// Some queries (function_score query when for decay functions) rely on a SearchContext being set:
// We switch types because this context needs to be in the context of the percolate queries in the shard and
// not the in memory percolate doc
String[] previousTypes = context.types();
context.types(new String[]{TYPE_NAME});
SearchContext.setCurrent(context);
try {
ParsedDocument parsedDocument = parseRequest(percolateIndexService, request, context, request.shardId().getIndex());
if (context.percolateQueries().isEmpty()) {
return new PercolateShardResponse(context, request.shardId());
}
if (request.docSource() != null && request.docSource().length() != 0) {
parsedDocument = parseFetchedDoc(context, request.docSource(), percolateIndexService, request.shardId().getIndex(), request.documentType());
} else if (parsedDocument == null) {
throw new IllegalArgumentException("Nothing to percolate");
}
if (context.percolateQuery() == null && (context.trackScores() || context.doSort || context.aggregations() != null) || context.aliasFilter() != null) {
context.percolateQuery(new MatchAllDocsQuery());
}
if (context.doSort && !context.limit) {
throw new IllegalArgumentException("Can't sort if size isn't specified");
}
if (context.highlight() != null && !context.limit) {
throw new IllegalArgumentException("Can't highlight if size isn't specified");
}
if (context.size() < 0) {
context.size(0);
}
// parse the source either into one MemoryIndex, if it is a single document or index multiple docs if nested
PercolatorIndex percolatorIndex;
boolean isNested = indexShard.mapperService().documentMapper(request.documentType()).hasNestedObjects();
if (parsedDocument.docs().size() > 1) {
assert isNested;
percolatorIndex = multi;
} else {
percolatorIndex = single;
}
PercolatorType action;
if (request.onlyCount()) {
action = context.percolateQuery() != null ? queryCountPercolator : countPercolator;
} else {
if (context.doSort) {
action = topMatchingPercolator;
} else if (context.percolateQuery() != null) {
action = context.trackScores() ? scoringPercolator : queryPercolator;
} else {
action = matchPercolator;
}
}
context.percolatorTypeId = action.id();
percolatorIndex.prepare(context, parsedDocument);
return action.doPercolate(request, context, isNested);
} finally {
context.types(previousTypes);
SearchContext.removeCurrent();
context.close();
shardPercolateService.postPercolate(System.nanoTime() - startTime);
}
}
private ParsedDocument parseRequest(IndexService documentIndexService, PercolateShardRequest request, PercolateContext context, String index) {
BytesReference source = request.source();
if (source == null || source.length() == 0) {
return null;
}
// TODO: combine all feature parse elements into one map
Map hlElements = highlightPhase.parseElements();
Map aggregationElements = aggregationPhase.parseElements();
ParsedDocument doc = null;
XContentParser parser = null;
try {
parser = XContentFactory.xContent(source).createParser(source);
String currentFieldName = null;
XContentParser.Token token;
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
if (token == XContentParser.Token.FIELD_NAME) {
currentFieldName = parser.currentName();
// we need to check the "doc" here, so the next token will be START_OBJECT which is
// the actual document starting
if ("doc".equals(currentFieldName)) {
if (doc != null) {
throw new ElasticsearchParseException("Either specify doc or get, not both");
}
MapperService mapperService = documentIndexService.mapperService();
DocumentMapperForType docMapper = mapperService.documentMapperWithAutoCreate(request.documentType());
doc = docMapper.getDocumentMapper().parse(source(parser).index(index).type(request.documentType()).flyweight(true));
if (docMapper.getMapping() != null) {
doc.addDynamicMappingsUpdate(docMapper.getMapping());
}
if (doc.dynamicMappingsUpdate() != null) {
mappingUpdatedAction.updateMappingOnMasterSynchronously(request.shardId().getIndex(), request.documentType(), doc.dynamicMappingsUpdate());
}
// the document parsing exists the "doc" object, so we need to set the new current field.
currentFieldName = parser.currentName();
}
} else if (token == XContentParser.Token.START_OBJECT) {
SearchParseElement element = hlElements.get(currentFieldName);
if (element == null) {
element = aggregationElements.get(currentFieldName);
}
if ("query".equals(currentFieldName)) {
if (context.percolateQuery() != null) {
throw new ElasticsearchParseException("Either specify query or filter, not both");
}
context.percolateQuery(documentIndexService.queryParserService().parse(parser).query());
} else if ("filter".equals(currentFieldName)) {
if (context.percolateQuery() != null) {
throw new ElasticsearchParseException("Either specify query or filter, not both");
}
ParsedQuery parsedQuery = documentIndexService.queryParserService().parseInnerFilter(parser);
if(parsedQuery != null) {
context.percolateQuery(new ConstantScoreQuery(parsedQuery.query()));
}
} else if ("sort".equals(currentFieldName)) {
parseSort(parser, context);
} else if (element != null) {
element.parse(parser, context);
}
} else if (token == XContentParser.Token.START_ARRAY) {
if ("sort".equals(currentFieldName)) {
parseSort(parser, context);
}
} else if (token == null) {
break;
} else if (token.isValue()) {
if ("size".equals(currentFieldName)) {
context.size(parser.intValue());
if (context.size() < 0) {
throw new ElasticsearchParseException("size is set to [{}] and is expected to be higher or equal to 0", context.size());
}
} else if ("sort".equals(currentFieldName)) {
parseSort(parser, context);
} else if ("track_scores".equals(currentFieldName) || "trackScores".equals(currentFieldName)) {
context.trackScores(parser.booleanValue());
}
}
}
// We need to get the actual source from the request body for highlighting, so parse the request body again
// and only get the doc source.
if (context.highlight() != null) {
parser.close();
currentFieldName = null;
parser = XContentFactory.xContent(source).createParser(source);
token = parser.nextToken();
assert token == XContentParser.Token.START_OBJECT;
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
if (token == XContentParser.Token.FIELD_NAME) {
currentFieldName = parser.currentName();
} else if (token == XContentParser.Token.START_OBJECT) {
if ("doc".equals(currentFieldName)) {
BytesStreamOutput bStream = new BytesStreamOutput();
XContentBuilder builder = XContentFactory.contentBuilder(XContentType.SMILE, bStream);
builder.copyCurrentStructure(parser);
builder.close();
doc.setSource(bStream.bytes());
break;
} else {
parser.skipChildren();
}
} else if (token == null) {
break;
}
}
}
} catch (Throwable e) {
throw new ElasticsearchParseException("failed to parse request", e);
} finally {
if (parser != null) {
parser.close();
}
}
return doc;
}
private void parseSort(XContentParser parser, PercolateContext context) throws Exception {
sortParseElement.parse(parser, context);
// null, means default sorting by relevancy
if (context.sort() == null) {
context.doSort = true;
} else {
throw new ElasticsearchParseException("Only _score desc is supported");
}
}
private ParsedDocument parseFetchedDoc(PercolateContext context, BytesReference fetchedDoc, IndexService documentIndexService, String index, String type) {
ParsedDocument doc = null;
XContentParser parser = null;
try {
parser = XContentFactory.xContent(fetchedDoc).createParser(fetchedDoc);
MapperService mapperService = documentIndexService.mapperService();
DocumentMapperForType docMapper = mapperService.documentMapperWithAutoCreate(type);
doc = docMapper.getDocumentMapper().parse(source(parser).index(index).type(type).flyweight(true));
if (context.highlight() != null) {
doc.setSource(fetchedDoc);
}
} catch (Throwable e) {
throw new ElasticsearchParseException("failed to parse request", e);
} finally {
if (parser != null) {
parser.close();
}
}
if (doc == null) {
throw new ElasticsearchParseException("No doc to percolate in the request");
}
return doc;
}
public void close() {
cache.close();
}
interface PercolatorType {
// 0x00 is reserved for empty type.
byte id();
ReduceResult reduce(List shardResults, HasContextAndHeaders headersContext);
PercolateShardResponse doPercolate(PercolateShardRequest request, PercolateContext context, boolean isNested);
}
private final PercolatorType countPercolator = new PercolatorType() {
@Override
public byte id() {
return 0x01;
}
@Override
public ReduceResult reduce(List shardResults, HasContextAndHeaders headersContext) {
long finalCount = 0;
for (PercolateShardResponse shardResponse : shardResults) {
finalCount += shardResponse.count();
}
assert !shardResults.isEmpty();
InternalAggregations reducedAggregations = reduceAggregations(shardResults, headersContext);
return new ReduceResult(finalCount, reducedAggregations);
}
@Override
public PercolateShardResponse doPercolate(PercolateShardRequest request, PercolateContext context, boolean isNested) {
long count = 0;
for (Map.Entry entry : context.percolateQueries().entrySet()) {
try {
Query existsQuery = entry.getValue();
if (isNested) {
existsQuery = new BooleanQuery.Builder()
.add(existsQuery, Occur.MUST)
.add(Queries.newNonNestedFilter(), Occur.FILTER)
.build();
}
if (Lucene.exists(context.docSearcher(), existsQuery)) {
count ++;
}
} catch (Throwable e) {
logger.debug("[" + entry.getKey() + "] failed to execute query", e);
throw new PercolateException(context.indexShard().shardId(), "failed to execute", e);
}
}
return new PercolateShardResponse(count, context, request.shardId());
}
};
private final PercolatorType queryCountPercolator = new PercolatorType() {
@Override
public byte id() {
return 0x02;
}
@Override
public ReduceResult reduce(List shardResults, HasContextAndHeaders headersContext) {
return countPercolator.reduce(shardResults, headersContext);
}
@Override
public PercolateShardResponse doPercolate(PercolateShardRequest request, PercolateContext context, boolean isNested) {
long count = 0;
Engine.Searcher percolatorSearcher = context.indexShard().acquireSearcher("percolate");
try {
Count countCollector = count(logger, context, isNested);
queryBasedPercolating(percolatorSearcher, context, countCollector);
count = countCollector.counter();
} catch (Throwable e) {
logger.warn("failed to execute", e);
} finally {
percolatorSearcher.close();
}
return new PercolateShardResponse(count, context, request.shardId());
}
};
private final PercolatorType matchPercolator = new PercolatorType() {
@Override
public byte id() {
return 0x03;
}
@Override
public ReduceResult reduce(List shardResults, HasContextAndHeaders headersContext) {
long foundMatches = 0;
int numMatches = 0;
for (PercolateShardResponse response : shardResults) {
foundMatches += response.count();
numMatches += response.matches().length;
}
int requestedSize = shardResults.get(0).requestedSize();
// Use a custom impl of AbstractBigArray for Object[]?
List finalMatches = new ArrayList<>(requestedSize == 0 ? numMatches : requestedSize);
outer:
for (PercolateShardResponse response : shardResults) {
Text index = new Text(response.getIndex());
for (int i = 0; i < response.matches().length; i++) {
float score = response.scores().length == 0 ? NO_SCORE : response.scores()[i];
Text match = new Text(new BytesArray(response.matches()[i]));
Map hl = response.hls().isEmpty() ? null : response.hls().get(i);
finalMatches.add(new PercolateResponse.Match(index, match, score, hl));
if (requestedSize != 0 && finalMatches.size() == requestedSize) {
break outer;
}
}
}
assert !shardResults.isEmpty();
InternalAggregations reducedAggregations = reduceAggregations(shardResults, headersContext);
return new ReduceResult(foundMatches, finalMatches.toArray(new PercolateResponse.Match[finalMatches.size()]), reducedAggregations);
}
@Override
public PercolateShardResponse doPercolate(PercolateShardRequest request, PercolateContext context, boolean isNested) {
long count = 0;
List matches = new ArrayList<>();
List