org.elasticsearch.percolator.PercolatorService Maven / Gradle / Ivy
The newest version!
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.percolator;
import com.carrotsearch.hppc.IntObjectHashMap;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.index.memory.ExtendedMemoryIndex;
import org.apache.lucene.index.memory.MemoryIndex;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CloseableThreadLocal;
import org.elasticsearch.ElasticsearchParseException;
import org.elasticsearch.action.percolate.PercolateResponse;
import org.elasticsearch.action.percolate.PercolateShardRequest;
import org.elasticsearch.action.percolate.PercolateShardResponse;
import org.elasticsearch.cache.recycler.PageCacheRecycler;
import org.elasticsearch.cluster.ClusterService;
import org.elasticsearch.cluster.action.index.MappingUpdatedAction;
import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver;
import org.elasticsearch.common.HasContextAndHeaders;
import org.elasticsearch.common.ParseFieldMatcher;
import org.elasticsearch.common.bytes.BytesArray;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.component.AbstractComponent;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.io.stream.BytesStreamOutput;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.lucene.search.Queries;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.common.unit.ByteSizeUnit;
import org.elasticsearch.common.unit.ByteSizeValue;
import org.elasticsearch.common.util.BigArrays;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.IndexService;
import org.elasticsearch.index.engine.Engine;
import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.index.fielddata.SortedBinaryDocValues;
import org.elasticsearch.index.mapper.*;
import org.elasticsearch.index.mapper.DocumentMapperForType;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.index.mapper.Mapping;
import org.elasticsearch.index.mapper.ParsedDocument;
import org.elasticsearch.index.mapper.Uid;
import org.elasticsearch.index.mapper.internal.UidFieldMapper;
import org.elasticsearch.index.percolator.stats.ShardPercolateService;
import org.elasticsearch.index.query.ParsedQuery;
import org.elasticsearch.index.shard.IndexShard;
import org.elasticsearch.indices.IndicesService;
import org.elasticsearch.percolator.QueryCollector.Count;
import org.elasticsearch.percolator.QueryCollector.Match;
import org.elasticsearch.percolator.QueryCollector.MatchAndScore;
import org.elasticsearch.percolator.QueryCollector.MatchAndSort;
import org.elasticsearch.script.ScriptService;
import org.elasticsearch.search.SearchParseElement;
import org.elasticsearch.search.SearchShardTarget;
import org.elasticsearch.search.aggregations.AggregationPhase;
import org.elasticsearch.search.aggregations.InternalAggregation;
import org.elasticsearch.search.aggregations.InternalAggregation.ReduceContext;
import org.elasticsearch.search.aggregations.InternalAggregations;
import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
import org.elasticsearch.search.aggregations.pipeline.SiblingPipelineAggregator;
import org.elasticsearch.search.highlight.HighlightField;
import org.elasticsearch.search.highlight.HighlightPhase;
import org.elasticsearch.search.internal.SearchContext;
import org.elasticsearch.search.sort.SortParseElement;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import static org.elasticsearch.common.util.CollectionUtils.eagerTransform;
import static org.elasticsearch.index.mapper.SourceToParse.source;
import static org.elasticsearch.percolator.QueryCollector.count;
import static org.elasticsearch.percolator.QueryCollector.match;
import static org.elasticsearch.percolator.QueryCollector.matchAndScore;
public class PercolatorService extends AbstractComponent {
public final static float NO_SCORE = Float.NEGATIVE_INFINITY;
public final static String TYPE_NAME = ".percolator";
private final IndexNameExpressionResolver indexNameExpressionResolver;
private final IndicesService indicesService;
private final IntObjectHashMap percolatorTypes;
private final PageCacheRecycler pageCacheRecycler;
private final BigArrays bigArrays;
private final ClusterService clusterService;
private final PercolatorIndex single;
private final PercolatorIndex multi;
private final HighlightPhase highlightPhase;
private final AggregationPhase aggregationPhase;
private final SortParseElement sortParseElement;
private final ScriptService scriptService;
private final MappingUpdatedAction mappingUpdatedAction;
private final CloseableThreadLocal cache;
private final ParseFieldMatcher parseFieldMatcher;
@Inject
public PercolatorService(Settings settings, IndexNameExpressionResolver indexNameExpressionResolver, IndicesService indicesService,
PageCacheRecycler pageCacheRecycler, BigArrays bigArrays,
HighlightPhase highlightPhase, ClusterService clusterService,
AggregationPhase aggregationPhase, ScriptService scriptService,
MappingUpdatedAction mappingUpdatedAction) {
super(settings);
this.indexNameExpressionResolver = indexNameExpressionResolver;
this.parseFieldMatcher = new ParseFieldMatcher(settings);
this.indicesService = indicesService;
this.pageCacheRecycler = pageCacheRecycler;
this.bigArrays = bigArrays;
this.clusterService = clusterService;
this.highlightPhase = highlightPhase;
this.aggregationPhase = aggregationPhase;
this.scriptService = scriptService;
this.mappingUpdatedAction = mappingUpdatedAction;
this.sortParseElement = new SortParseElement();
final long maxReuseBytes = settings.getAsBytesSize("indices.memory.memory_index.size_per_thread", new ByteSizeValue(1, ByteSizeUnit.MB)).bytes();
cache = new CloseableThreadLocal() {
@Override
protected MemoryIndex initialValue() {
// TODO: should we expose payloads as an option? should offsets be turned on always?
return new ExtendedMemoryIndex(true, false, maxReuseBytes);
}
};
single = new SingleDocumentPercolatorIndex(cache);
multi = new MultiDocumentPercolatorIndex(cache);
percolatorTypes = new IntObjectHashMap<>(6);
percolatorTypes.put(countPercolator.id(), countPercolator);
percolatorTypes.put(queryCountPercolator.id(), queryCountPercolator);
percolatorTypes.put(matchPercolator.id(), matchPercolator);
percolatorTypes.put(queryPercolator.id(), queryPercolator);
percolatorTypes.put(scoringPercolator.id(), scoringPercolator);
percolatorTypes.put(topMatchingPercolator.id(), topMatchingPercolator);
}
public ReduceResult reduce(byte percolatorTypeId, List shardResults, HasContextAndHeaders headersContext) {
PercolatorType percolatorType = percolatorTypes.get(percolatorTypeId);
return percolatorType.reduce(shardResults, headersContext);
}
public PercolateShardResponse percolate(PercolateShardRequest request) {
IndexService percolateIndexService = indicesService.indexServiceSafe(request.shardId().getIndex());
IndexShard indexShard = percolateIndexService.shardSafe(request.shardId().id());
indexShard.readAllowed(); // check if we can read the shard...
ShardPercolateService shardPercolateService = indexShard.shardPercolateService();
shardPercolateService.prePercolate();
long startTime = System.nanoTime();
// TODO: The filteringAliases should be looked up at the coordinating node and serialized with all shard request,
// just like is done in other apis.
String[] filteringAliases = indexNameExpressionResolver.filteringAliases(
clusterService.state(),
indexShard.shardId().index().name(),
request.indices()
);
Query aliasFilter = percolateIndexService.aliasesService().aliasFilter(filteringAliases);
SearchShardTarget searchShardTarget = new SearchShardTarget(clusterService.localNode().id(), request.shardId().getIndex(), request.shardId().id());
final PercolateContext context = new PercolateContext(
request, searchShardTarget, indexShard, percolateIndexService, pageCacheRecycler, bigArrays, scriptService, aliasFilter, parseFieldMatcher
);
// Some queries (function_score query when for decay functions) rely on a SearchContext being set:
// We switch types because this context needs to be in the context of the percolate queries in the shard and
// not the in memory percolate doc
String[] previousTypes = context.types();
context.types(new String[]{TYPE_NAME});
SearchContext.setCurrent(context);
try {
ParsedDocument parsedDocument = parseRequest(percolateIndexService, request, context, request.shardId().getIndex());
if (context.percolateQueries().isEmpty()) {
return new PercolateShardResponse(context, request.shardId());
}
if (request.docSource() != null && request.docSource().length() != 0) {
parsedDocument = parseFetchedDoc(context, request.docSource(), percolateIndexService, request.shardId().getIndex(), request.documentType());
} else if (parsedDocument == null) {
throw new IllegalArgumentException("Nothing to percolate");
}
if (context.percolateQuery() == null && (context.trackScores() || context.doSort || context.aggregations() != null) || context.aliasFilter() != null) {
context.percolateQuery(new MatchAllDocsQuery());
}
if (context.doSort && !context.limit) {
throw new IllegalArgumentException("Can't sort if size isn't specified");
}
if (context.highlight() != null && !context.limit) {
throw new IllegalArgumentException("Can't highlight if size isn't specified");
}
if (context.size() < 0) {
context.size(0);
}
// parse the source either into one MemoryIndex, if it is a single document or index multiple docs if nested
PercolatorIndex percolatorIndex;
boolean isNested = indexShard.mapperService().documentMapper(request.documentType()).hasNestedObjects();
if (parsedDocument.docs().size() > 1) {
assert isNested;
percolatorIndex = multi;
} else {
percolatorIndex = single;
}
PercolatorType action;
if (request.onlyCount()) {
action = context.percolateQuery() != null ? queryCountPercolator : countPercolator;
} else {
if (context.doSort) {
action = topMatchingPercolator;
} else if (context.percolateQuery() != null) {
action = context.trackScores() ? scoringPercolator : queryPercolator;
} else {
action = matchPercolator;
}
}
context.percolatorTypeId = action.id();
percolatorIndex.prepare(context, parsedDocument);
return action.doPercolate(request, context, isNested);
} finally {
context.types(previousTypes);
SearchContext.removeCurrent();
context.close();
shardPercolateService.postPercolate(System.nanoTime() - startTime);
}
}
private ParsedDocument parseRequest(IndexService documentIndexService, PercolateShardRequest request, PercolateContext context, String index) {
BytesReference source = request.source();
if (source == null || source.length() == 0) {
return null;
}
// TODO: combine all feature parse elements into one map
Map hlElements = highlightPhase.parseElements();
Map aggregationElements = aggregationPhase.parseElements();
ParsedDocument doc = null;
XContentParser parser = null;
try {
parser = XContentFactory.xContent(source).createParser(source);
String currentFieldName = null;
XContentParser.Token token;
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
if (token == XContentParser.Token.FIELD_NAME) {
currentFieldName = parser.currentName();
// we need to check the "doc" here, so the next token will be START_OBJECT which is
// the actual document starting
if ("doc".equals(currentFieldName)) {
if (doc != null) {
throw new ElasticsearchParseException("Either specify doc or get, not both");
}
MapperService mapperService = documentIndexService.mapperService();
DocumentMapperForType docMapper = mapperService.documentMapperWithAutoCreate(request.documentType());
doc = docMapper.getDocumentMapper().parse(source(parser).index(index).type(request.documentType()).flyweight(true));
if (docMapper.getMapping() != null) {
doc.addDynamicMappingsUpdate(docMapper.getMapping());
}
if (doc.dynamicMappingsUpdate() != null) {
mappingUpdatedAction.updateMappingOnMasterSynchronously(request.shardId().getIndex(), request.documentType(), doc.dynamicMappingsUpdate());
}
// the document parsing exists the "doc" object, so we need to set the new current field.
currentFieldName = parser.currentName();
}
} else if (token == XContentParser.Token.START_OBJECT) {
SearchParseElement element = hlElements.get(currentFieldName);
if (element == null) {
element = aggregationElements.get(currentFieldName);
}
if ("query".equals(currentFieldName)) {
if (context.percolateQuery() != null) {
throw new ElasticsearchParseException("Either specify query or filter, not both");
}
context.percolateQuery(documentIndexService.queryParserService().parse(parser).query());
} else if ("filter".equals(currentFieldName)) {
if (context.percolateQuery() != null) {
throw new ElasticsearchParseException("Either specify query or filter, not both");
}
ParsedQuery parsedQuery = documentIndexService.queryParserService().parseInnerFilter(parser);
if(parsedQuery != null) {
context.percolateQuery(new ConstantScoreQuery(parsedQuery.query()));
}
} else if ("sort".equals(currentFieldName)) {
parseSort(parser, context);
} else if (element != null) {
element.parse(parser, context);
}
} else if (token == XContentParser.Token.START_ARRAY) {
if ("sort".equals(currentFieldName)) {
parseSort(parser, context);
}
} else if (token == null) {
break;
} else if (token.isValue()) {
if ("size".equals(currentFieldName)) {
context.size(parser.intValue());
if (context.size() < 0) {
throw new ElasticsearchParseException("size is set to [{}] and is expected to be higher or equal to 0", context.size());
}
} else if ("sort".equals(currentFieldName)) {
parseSort(parser, context);
} else if ("track_scores".equals(currentFieldName) || "trackScores".equals(currentFieldName)) {
context.trackScores(parser.booleanValue());
}
}
}
// We need to get the actual source from the request body for highlighting, so parse the request body again
// and only get the doc source.
if (context.highlight() != null) {
parser.close();
currentFieldName = null;
parser = XContentFactory.xContent(source).createParser(source);
token = parser.nextToken();
assert token == XContentParser.Token.START_OBJECT;
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
if (token == XContentParser.Token.FIELD_NAME) {
currentFieldName = parser.currentName();
} else if (token == XContentParser.Token.START_OBJECT) {
if ("doc".equals(currentFieldName)) {
BytesStreamOutput bStream = new BytesStreamOutput();
XContentBuilder builder = XContentFactory.contentBuilder(XContentType.SMILE, bStream);
builder.copyCurrentStructure(parser);
builder.close();
doc.setSource(bStream.bytes());
break;
} else {
parser.skipChildren();
}
} else if (token == null) {
break;
}
}
}
} catch (Throwable e) {
throw new ElasticsearchParseException("failed to parse request", e);
} finally {
if (parser != null) {
parser.close();
}
}
return doc;
}
private void parseSort(XContentParser parser, PercolateContext context) throws Exception {
sortParseElement.parse(parser, context);
// null, means default sorting by relevancy
if (context.sort() == null) {
context.doSort = true;
} else {
throw new ElasticsearchParseException("Only _score desc is supported");
}
}
private ParsedDocument parseFetchedDoc(PercolateContext context, BytesReference fetchedDoc, IndexService documentIndexService, String index, String type) {
ParsedDocument doc = null;
XContentParser parser = null;
try {
parser = XContentFactory.xContent(fetchedDoc).createParser(fetchedDoc);
MapperService mapperService = documentIndexService.mapperService();
DocumentMapperForType docMapper = mapperService.documentMapperWithAutoCreate(type);
doc = docMapper.getDocumentMapper().parse(source(parser).index(index).type(type).flyweight(true));
if (context.highlight() != null) {
doc.setSource(fetchedDoc);
}
} catch (Throwable e) {
throw new ElasticsearchParseException("failed to parse request", e);
} finally {
if (parser != null) {
parser.close();
}
}
if (doc == null) {
throw new ElasticsearchParseException("No doc to percolate in the request");
}
return doc;
}
public void close() {
cache.close();
}
interface PercolatorType {
// 0x00 is reserved for empty type.
byte id();
ReduceResult reduce(List shardResults, HasContextAndHeaders headersContext);
PercolateShardResponse doPercolate(PercolateShardRequest request, PercolateContext context, boolean isNested);
}
private final PercolatorType countPercolator = new PercolatorType() {
@Override
public byte id() {
return 0x01;
}
@Override
public ReduceResult reduce(List shardResults, HasContextAndHeaders headersContext) {
long finalCount = 0;
for (PercolateShardResponse shardResponse : shardResults) {
finalCount += shardResponse.count();
}
assert !shardResults.isEmpty();
InternalAggregations reducedAggregations = reduceAggregations(shardResults, headersContext);
return new ReduceResult(finalCount, reducedAggregations);
}
@Override
public PercolateShardResponse doPercolate(PercolateShardRequest request, PercolateContext context, boolean isNested) {
long count = 0;
for (Map.Entry entry : context.percolateQueries().entrySet()) {
try {
Query existsQuery = entry.getValue();
if (isNested) {
existsQuery = new BooleanQuery.Builder()
.add(existsQuery, Occur.MUST)
.add(Queries.newNonNestedFilter(), Occur.FILTER)
.build();
}
if (Lucene.exists(context.docSearcher(), existsQuery)) {
count ++;
}
} catch (Throwable e) {
logger.debug("[" + entry.getKey() + "] failed to execute query", e);
throw new PercolateException(context.indexShard().shardId(), "failed to execute", e);
}
}
return new PercolateShardResponse(count, context, request.shardId());
}
};
private final PercolatorType queryCountPercolator = new PercolatorType() {
@Override
public byte id() {
return 0x02;
}
@Override
public ReduceResult reduce(List shardResults, HasContextAndHeaders headersContext) {
return countPercolator.reduce(shardResults, headersContext);
}
@Override
public PercolateShardResponse doPercolate(PercolateShardRequest request, PercolateContext context, boolean isNested) {
long count = 0;
Engine.Searcher percolatorSearcher = context.indexShard().acquireSearcher("percolate");
try {
Count countCollector = count(logger, context, isNested);
queryBasedPercolating(percolatorSearcher, context, countCollector);
count = countCollector.counter();
} catch (Throwable e) {
logger.warn("failed to execute", e);
} finally {
percolatorSearcher.close();
}
return new PercolateShardResponse(count, context, request.shardId());
}
};
private final PercolatorType matchPercolator = new PercolatorType() {
@Override
public byte id() {
return 0x03;
}
@Override
public ReduceResult reduce(List shardResults, HasContextAndHeaders headersContext) {
long foundMatches = 0;
int numMatches = 0;
for (PercolateShardResponse response : shardResults) {
foundMatches += response.count();
numMatches += response.matches().length;
}
int requestedSize = shardResults.get(0).requestedSize();
// Use a custom impl of AbstractBigArray for Object[]?
List finalMatches = new ArrayList<>(requestedSize == 0 ? numMatches : requestedSize);
outer:
for (PercolateShardResponse response : shardResults) {
Text index = new Text(response.getIndex());
for (int i = 0; i < response.matches().length; i++) {
float score = response.scores().length == 0 ? NO_SCORE : response.scores()[i];
Text match = new Text(new BytesArray(response.matches()[i]));
Map hl = response.hls().isEmpty() ? null : response.hls().get(i);
finalMatches.add(new PercolateResponse.Match(index, match, score, hl));
if (requestedSize != 0 && finalMatches.size() == requestedSize) {
break outer;
}
}
}
assert !shardResults.isEmpty();
InternalAggregations reducedAggregations = reduceAggregations(shardResults, headersContext);
return new ReduceResult(foundMatches, finalMatches.toArray(new PercolateResponse.Match[finalMatches.size()]), reducedAggregations);
}
@Override
public PercolateShardResponse doPercolate(PercolateShardRequest request, PercolateContext context, boolean isNested) {
long count = 0;
List matches = new ArrayList<>();
List