org.elasticsearch.search.aggregations.timeseries.TimeSeriesIndexSearcher Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of elasticsearch Show documentation
Show all versions of elasticsearch Show documentation
Elasticsearch subproject :server
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/
package org.elasticsearch.search.aggregations.timeseries;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Weight;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.PriorityQueue;
import org.elasticsearch.cluster.metadata.DataStream;
import org.elasticsearch.index.mapper.DataStreamTimestampFieldMapper;
import org.elasticsearch.index.mapper.TimeSeriesIdFieldMapper;
import org.elasticsearch.search.aggregations.AggregationExecutionContext;
import org.elasticsearch.search.aggregations.BucketCollector;
import org.elasticsearch.search.aggregations.LeafBucketCollector;
import org.elasticsearch.search.sort.SortOrder;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.function.IntSupplier;
import static org.elasticsearch.index.IndexSortConfig.TIME_SERIES_SORT;
/**
* An IndexSearcher wrapper that executes the searches in time-series indices by traversing them by tsid and timestamp
* TODO: Convert it to use index sort instead of hard-coded tsid and timestamp values
*/
public class TimeSeriesIndexSearcher {
private static final int CHECK_CANCELLED_SCORER_INTERVAL = 1 << 11;
// We need to delegate to the other searcher here as opposed to extending IndexSearcher and inheriting default implementations as the
// IndexSearcher would most of the time be a ContextIndexSearcher that has important logic related to e.g. document-level security.
private final IndexSearcher searcher;
private final List cancellations;
private final boolean tsidReverse;
private final boolean timestampReverse;
public TimeSeriesIndexSearcher(IndexSearcher searcher, List cancellations) {
this.searcher = searcher;
this.cancellations = cancellations;
assert TIME_SERIES_SORT.length == 2;
assert TIME_SERIES_SORT[0].getField().equals(TimeSeriesIdFieldMapper.NAME);
assert TIME_SERIES_SORT[1].getField().equals(DataStreamTimestampFieldMapper.DEFAULT_PATH);
this.tsidReverse = TIME_SERIES_SORT[0].getOrder() == SortOrder.DESC;
this.timestampReverse = TIME_SERIES_SORT[1].getOrder() == SortOrder.DESC;
}
public void search(Query query, BucketCollector bucketCollector) throws IOException {
int seen = 0;
query = searcher.rewrite(query);
Weight weight = searcher.createWeight(query, bucketCollector.scoreMode(), 1);
int[] tsidOrd = new int[1];
// Create LeafWalker for each subreader
List leafWalkers = new ArrayList<>();
for (LeafReaderContext leaf : searcher.getIndexReader().leaves()) {
if (++seen % CHECK_CANCELLED_SCORER_INTERVAL == 0) {
checkCancelled();
}
Scorer scorer = weight.scorer(leaf);
if (scorer != null) {
LeafWalker leafWalker = new LeafWalker(leaf, scorer, bucketCollector, () -> tsidOrd[0]);
if (leafWalker.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
leafWalkers.add(leafWalker);
}
} else {
// Even though we will not walk through this aggregation as a part of normal processing
// this is needed to trigger actions in some bucketCollectors that bypass the normal iteration logic
// for example, global aggregator triggers a separate iterator that ignores the query but still needs
// to know all leaves
bucketCollector.getLeafCollector(new AggregationExecutionContext(leaf, null, null, null));
}
}
PriorityQueue queue = new PriorityQueue<>(searcher.getIndexReader().leaves().size()) {
@Override
protected boolean lessThan(LeafWalker a, LeafWalker b) {
if (timestampReverse) {
return a.timestamp > b.timestamp;
} else {
return a.timestamp < b.timestamp;
}
}
};
// The priority queue is filled for each TSID in order. When a walker moves
// to the next TSID it is removed from the queue. Once the queue is empty,
// we refill it with walkers positioned on the next TSID. Within the queue
// walkers are ordered by timestamp.
while (populateQueue(leafWalkers, queue)) {
do {
if (++seen % CHECK_CANCELLED_SCORER_INTERVAL == 0) {
checkCancelled();
}
LeafWalker walker = queue.top();
walker.collectCurrent();
if (walker.nextDoc() == DocIdSetIterator.NO_MORE_DOCS || walker.shouldPop()) {
queue.pop();
} else {
queue.updateTop();
}
} while (queue.size() > 0);
tsidOrd[0]++;
}
}
// Re-populate the queue with walkers on the same TSID.
private boolean populateQueue(List leafWalkers, PriorityQueue queue) throws IOException {
BytesRef currentTsid = null;
assert queue.size() == 0;
Iterator it = leafWalkers.iterator();
while (it.hasNext()) {
LeafWalker leafWalker = it.next();
if (leafWalker.docId == DocIdSetIterator.NO_MORE_DOCS) {
// If a walker is exhausted then we can remove it from consideration
// entirely
it.remove();
continue;
}
BytesRef tsid = leafWalker.getTsid();
if (currentTsid == null) {
currentTsid = tsid;
}
int comp = tsid.compareTo(currentTsid);
if (comp == 0) {
queue.add(leafWalker);
} else if ((tsidReverse && comp > 0) || (false == tsidReverse && comp < 0)) {
// We've found a walker on a lower TSID, so we remove all walkers
// collected so far from the queue and reset our comparison TSID
// to be the lower value
queue.clear();
queue.add(leafWalker);
currentTsid = tsid;
}
}
assert queueAllHaveTsid(queue, currentTsid);
// If all walkers are exhausted then nothing will have been added to the queue
// and we're done
return queue.size() > 0;
}
private static boolean queueAllHaveTsid(PriorityQueue queue, BytesRef tsid) throws IOException {
for (LeafWalker leafWalker : queue) {
BytesRef walkerId = leafWalker.tsids.lookupOrd(leafWalker.tsids.ordValue());
assert walkerId.equals(tsid) : tsid.utf8ToString() + " != " + walkerId.utf8ToString();
}
return true;
}
private void checkCancelled() {
for (Runnable r : cancellations) {
r.run();
}
}
private static class LeafWalker {
private final LeafBucketCollector collector;
private final Bits liveDocs;
private final DocIdSetIterator iterator;
private final SortedDocValues tsids;
private final SortedNumericDocValues timestamps; // TODO can we have this just a NumericDocValues?
private final BytesRefBuilder scratch = new BytesRefBuilder();
int docId = -1;
int tsidOrd;
long timestamp;
LeafWalker(LeafReaderContext context, Scorer scorer, BucketCollector bucketCollector, IntSupplier tsidOrdSupplier)
throws IOException {
AggregationExecutionContext aggCtx = new AggregationExecutionContext(context, scratch::get, () -> timestamp, tsidOrdSupplier);
this.collector = bucketCollector.getLeafCollector(aggCtx);
liveDocs = context.reader().getLiveDocs();
this.collector.setScorer(scorer);
iterator = scorer.iterator();
tsids = DocValues.getSorted(context.reader(), TimeSeriesIdFieldMapper.NAME);
timestamps = DocValues.getSortedNumeric(context.reader(), DataStream.TimestampField.FIXED_TIMESTAMP_FIELD);
}
void collectCurrent() throws IOException {
assert tsids.docID() == docId;
assert timestamps.docID() == docId;
collector.collect(docId);
}
int nextDoc() throws IOException {
if (docId == DocIdSetIterator.NO_MORE_DOCS) {
return DocIdSetIterator.NO_MORE_DOCS;
}
do {
docId = iterator.nextDoc();
} while (docId != DocIdSetIterator.NO_MORE_DOCS && isInvalidDoc(docId));
if (docId != DocIdSetIterator.NO_MORE_DOCS) {
timestamp = timestamps.nextValue();
}
return docId;
}
BytesRef getTsid() throws IOException {
tsidOrd = tsids.ordValue();
scratch.copyBytes(tsids.lookupOrd(tsidOrd));
return scratch.get();
}
// invalid if the doc is deleted or if it doesn't have a tsid or timestamp entry
private boolean isInvalidDoc(int docId) throws IOException {
return (liveDocs != null && liveDocs.get(docId) == false)
|| tsids.advanceExact(docId) == false
|| timestamps.advanceExact(docId) == false;
}
// true if the TSID ord has changed since the last time we checked
boolean shouldPop() throws IOException {
if (tsidOrd != tsids.ordValue()) {
return true;
} else {
return false;
}
}
}
}