org.elasticsearch.index.engine.LuceneChangesSnapshot Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of elasticsearch Show documentation
Show all versions of elasticsearch Show documentation
Elasticsearch subproject :server
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/
package org.elasticsearch.index.engine;
import org.apache.lucene.codecs.StoredFieldsReader;
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.DocValuesFieldExistsQuery;
import org.apache.lucene.search.FieldDoc;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopFieldCollector;
import org.apache.lucene.util.ArrayUtil;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.lucene.index.SequentialStoredFieldsLeafReader;
import org.elasticsearch.core.internal.io.IOUtils;
import org.elasticsearch.index.fieldvisitor.FieldsVisitor;
import org.elasticsearch.index.mapper.IdFieldMapper;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.index.mapper.MappingLookup;
import org.elasticsearch.index.mapper.SeqNoFieldMapper;
import org.elasticsearch.index.mapper.SourceFieldMapper;
import org.elasticsearch.index.mapper.Uid;
import org.elasticsearch.index.translog.Translog;
import org.elasticsearch.transport.Transports;
import java.io.Closeable;
import java.io.IOException;
import java.util.Comparator;
import java.util.List;
import java.util.concurrent.atomic.AtomicBoolean;
/**
* A {@link Translog.Snapshot} from changes in a Lucene index
*/
final class LuceneChangesSnapshot implements Translog.Snapshot {
static final int DEFAULT_BATCH_SIZE = 1024;
private final int searchBatchSize;
private final long fromSeqNo, toSeqNo;
private long lastSeenSeqNo;
private int skippedOperations;
private final boolean requiredFullRange;
private final boolean singleConsumer;
private final IndexSearcher indexSearcher;
private final MapperService mapperService;
private int docIndex = 0;
private final boolean accessStats;
private final int totalHits;
private ScoreDoc[] scoreDocs;
private final ParallelArray parallelArray;
private final Closeable onClose;
private int storedFieldsReaderOrd = -1;
private StoredFieldsReader storedFieldsReader = null;
private final Thread creationThread; // for assertion
/**
* Creates a new "translog" snapshot from Lucene for reading operations whose seq# in the specified range.
*
* @param engineSearcher the internal engine searcher which will be taken over if the snapshot is opened successfully
* @param mapperService the mapper service which will be mainly used to resolve the document's type and uid
* @param searchBatchSize the number of documents should be returned by each search
* @param fromSeqNo the min requesting seq# - inclusive
* @param toSeqNo the maximum requesting seq# - inclusive
* @param requiredFullRange if true, the snapshot will strictly check for the existence of operations between fromSeqNo and toSeqNo
* @param singleConsumer true if the snapshot is accessed by a single thread that creates the snapshot
* @param accessStats true if the stats of the snapshot can be accessed via {@link #totalOperations()}
*/
LuceneChangesSnapshot(
Engine.Searcher engineSearcher,
MapperService mapperService,
int searchBatchSize,
long fromSeqNo,
long toSeqNo,
boolean requiredFullRange,
boolean singleConsumer,
boolean accessStats
) throws IOException {
if (fromSeqNo < 0 || toSeqNo < 0 || fromSeqNo > toSeqNo) {
throw new IllegalArgumentException("Invalid range; from_seqno [" + fromSeqNo + "], to_seqno [" + toSeqNo + "]");
}
if (searchBatchSize <= 0) {
throw new IllegalArgumentException("Search_batch_size must be positive [" + searchBatchSize + "]");
}
final AtomicBoolean closed = new AtomicBoolean();
this.onClose = () -> {
if (closed.compareAndSet(false, true)) {
IOUtils.close(engineSearcher);
}
};
this.mapperService = mapperService;
final long requestingSize = (toSeqNo - fromSeqNo) == Long.MAX_VALUE ? Long.MAX_VALUE : (toSeqNo - fromSeqNo + 1L);
this.creationThread = Thread.currentThread();
this.searchBatchSize = requestingSize < searchBatchSize ? Math.toIntExact(requestingSize) : searchBatchSize;
this.fromSeqNo = fromSeqNo;
this.toSeqNo = toSeqNo;
this.lastSeenSeqNo = fromSeqNo - 1;
this.requiredFullRange = requiredFullRange;
this.singleConsumer = singleConsumer;
this.indexSearcher = newIndexSearcher(engineSearcher);
this.indexSearcher.setQueryCache(null);
this.accessStats = accessStats;
this.parallelArray = new ParallelArray(this.searchBatchSize);
final TopDocs topDocs = searchOperations(null, accessStats);
this.totalHits = Math.toIntExact(topDocs.totalHits.value);
this.scoreDocs = topDocs.scoreDocs;
fillParallelArray(scoreDocs, parallelArray);
}
@Override
public void close() throws IOException {
assert assertAccessingThread();
onClose.close();
}
@Override
public int totalOperations() {
assert assertAccessingThread();
if (accessStats == false) {
throw new IllegalStateException("Access stats of a snapshot created with [access_stats] is false");
}
return totalHits;
}
@Override
public int skippedOperations() {
assert assertAccessingThread();
return skippedOperations;
}
@Override
public Translog.Operation next() throws IOException {
assert assertAccessingThread();
Translog.Operation op = null;
for (int idx = nextDocIndex(); idx != -1; idx = nextDocIndex()) {
op = readDocAsOp(idx);
if (op != null) {
break;
}
}
if (requiredFullRange) {
rangeCheck(op);
}
if (op != null) {
lastSeenSeqNo = op.seqNo();
}
return op;
}
private boolean assertAccessingThread() {
assert singleConsumer == false || creationThread == Thread.currentThread()
: "created by [" + creationThread + "] != current thread [" + Thread.currentThread() + "]";
assert Transports.assertNotTransportThread("reading changes snapshot may involve slow IO");
return true;
}
private void rangeCheck(Translog.Operation op) {
if (op == null) {
if (lastSeenSeqNo < toSeqNo) {
throw new MissingHistoryOperationsException(
"Not all operations between from_seqno ["
+ fromSeqNo
+ "] "
+ "and to_seqno ["
+ toSeqNo
+ "] found; prematurely terminated last_seen_seqno ["
+ lastSeenSeqNo
+ "]"
);
}
} else {
final long expectedSeqNo = lastSeenSeqNo + 1;
if (op.seqNo() != expectedSeqNo) {
throw new MissingHistoryOperationsException(
"Not all operations between from_seqno ["
+ fromSeqNo
+ "] "
+ "and to_seqno ["
+ toSeqNo
+ "] found; expected seqno ["
+ expectedSeqNo
+ "]; found ["
+ op
+ "]"
);
}
}
}
private int nextDocIndex() throws IOException {
// we have processed all docs in the current search - fetch the next batch
if (docIndex == scoreDocs.length && docIndex > 0) {
final ScoreDoc prev = scoreDocs[scoreDocs.length - 1];
scoreDocs = searchOperations((FieldDoc) prev, false).scoreDocs;
fillParallelArray(scoreDocs, parallelArray);
docIndex = 0;
}
if (docIndex < scoreDocs.length) {
int idx = docIndex;
docIndex++;
return idx;
}
return -1;
}
private void fillParallelArray(ScoreDoc[] scoreDocs, ParallelArray parallelArray) throws IOException {
if (scoreDocs.length > 0) {
for (int i = 0; i < scoreDocs.length; i++) {
scoreDocs[i].shardIndex = i;
}
parallelArray.useSequentialStoredFieldsReader = singleConsumer && scoreDocs.length >= 10 && hasSequentialAccess(scoreDocs);
if (parallelArray.useSequentialStoredFieldsReader == false) {
storedFieldsReaderOrd = -1;
storedFieldsReader = null;
}
// for better loading performance we sort the array by docID and
// then visit all leaves in order.
if (parallelArray.useSequentialStoredFieldsReader == false) {
ArrayUtil.introSort(scoreDocs, Comparator.comparingInt(i -> i.doc));
}
int docBase = -1;
int maxDoc = 0;
List leaves = indexSearcher.getIndexReader().leaves();
int readerIndex = 0;
CombinedDocValues combinedDocValues = null;
LeafReaderContext leaf = null;
for (ScoreDoc scoreDoc : scoreDocs) {
if (scoreDoc.doc >= docBase + maxDoc) {
do {
leaf = leaves.get(readerIndex++);
docBase = leaf.docBase;
maxDoc = leaf.reader().maxDoc();
} while (scoreDoc.doc >= docBase + maxDoc);
combinedDocValues = new CombinedDocValues(leaf.reader());
}
final int segmentDocID = scoreDoc.doc - docBase;
final int index = scoreDoc.shardIndex;
parallelArray.leafReaderContexts[index] = leaf;
parallelArray.seqNo[index] = combinedDocValues.docSeqNo(segmentDocID);
parallelArray.primaryTerm[index] = combinedDocValues.docPrimaryTerm(segmentDocID);
parallelArray.version[index] = combinedDocValues.docVersion(segmentDocID);
parallelArray.isTombStone[index] = combinedDocValues.isTombstone(segmentDocID);
parallelArray.hasRecoverySource[index] = combinedDocValues.hasRecoverySource(segmentDocID);
}
// now sort back based on the shardIndex. we use this to store the previous index
if (parallelArray.useSequentialStoredFieldsReader == false) {
ArrayUtil.introSort(scoreDocs, Comparator.comparingInt(i -> i.shardIndex));
}
}
}
private static boolean hasSequentialAccess(ScoreDoc[] scoreDocs) {
for (int i = 0; i < scoreDocs.length - 1; i++) {
if (scoreDocs[i].doc + 1 != scoreDocs[i + 1].doc) {
return false;
}
}
return true;
}
private static IndexSearcher newIndexSearcher(Engine.Searcher engineSearcher) throws IOException {
return new IndexSearcher(Lucene.wrapAllDocsLive(engineSearcher.getDirectoryReader()));
}
private static Query rangeQuery(long fromSeqNo, long toSeqNo) {
return new BooleanQuery.Builder().add(LongPoint.newRangeQuery(SeqNoFieldMapper.NAME, fromSeqNo, toSeqNo), BooleanClause.Occur.MUST)
// exclude non-root nested documents
.add(new DocValuesFieldExistsQuery(SeqNoFieldMapper.PRIMARY_TERM_NAME), BooleanClause.Occur.MUST)
.build();
}
static int countOperations(Engine.Searcher engineSearcher, long fromSeqNo, long toSeqNo) throws IOException {
if (fromSeqNo < 0 || toSeqNo < 0 || fromSeqNo > toSeqNo) {
throw new IllegalArgumentException("Invalid range; from_seqno [" + fromSeqNo + "], to_seqno [" + toSeqNo + "]");
}
return newIndexSearcher(engineSearcher).count(rangeQuery(fromSeqNo, toSeqNo));
}
private TopDocs searchOperations(FieldDoc after, boolean accurateTotalHits) throws IOException {
final Query rangeQuery = rangeQuery(Math.max(fromSeqNo, lastSeenSeqNo), toSeqNo);
assert accurateTotalHits == false || after == null : "accurate total hits is required by the first batch only";
final SortField sortBySeqNo = new SortField(SeqNoFieldMapper.NAME, SortField.Type.LONG);
sortBySeqNo.setCanUsePoints();
final TopFieldCollector collector = TopFieldCollector.create(
new Sort(sortBySeqNo),
searchBatchSize,
after,
accurateTotalHits ? Integer.MAX_VALUE : 0
);
indexSearcher.search(rangeQuery, collector);
return collector.topDocs();
}
private Translog.Operation readDocAsOp(int docIndex) throws IOException {
final LeafReaderContext leaf = parallelArray.leafReaderContexts[docIndex];
final int segmentDocID = scoreDocs[docIndex].doc - leaf.docBase;
final long primaryTerm = parallelArray.primaryTerm[docIndex];
assert primaryTerm > 0 : "nested child document must be excluded";
final long seqNo = parallelArray.seqNo[docIndex];
// Only pick the first seen seq#
if (seqNo == lastSeenSeqNo) {
skippedOperations++;
return null;
}
final long version = parallelArray.version[docIndex];
final String sourceField = parallelArray.hasRecoverySource[docIndex]
? SourceFieldMapper.RECOVERY_SOURCE_NAME
: SourceFieldMapper.NAME;
final FieldsVisitor fields = new FieldsVisitor(true, sourceField);
if (parallelArray.useSequentialStoredFieldsReader) {
if (storedFieldsReaderOrd != leaf.ord) {
if (leaf.reader() instanceof SequentialStoredFieldsLeafReader) {
storedFieldsReader = ((SequentialStoredFieldsLeafReader) leaf.reader()).getSequentialStoredFieldsReader();
storedFieldsReaderOrd = leaf.ord;
} else {
storedFieldsReader = null;
storedFieldsReaderOrd = -1;
}
}
}
if (storedFieldsReader != null) {
assert singleConsumer : "Sequential access optimization must not be enabled for multiple consumers";
assert parallelArray.useSequentialStoredFieldsReader;
assert storedFieldsReaderOrd == leaf.ord : storedFieldsReaderOrd + " != " + leaf.ord;
storedFieldsReader.visitDocument(segmentDocID, fields);
} else {
leaf.reader().document(segmentDocID, fields);
}
MappingLookup mappingLookup = mapperService.mappingLookup();
fields.postProcess(mapperService::fieldType, mappingLookup.hasMappings() ? mappingLookup.getType() : null);
final Translog.Operation op;
final boolean isTombstone = parallelArray.isTombStone[docIndex];
if (isTombstone && fields.uid() == null) {
op = new Translog.NoOp(seqNo, primaryTerm, fields.source().utf8ToString());
assert version == 1L : "Noop tombstone should have version 1L; actual version [" + version + "]";
assert assertDocSoftDeleted(leaf.reader(), segmentDocID) : "Noop but soft_deletes field is not set [" + op + "]";
} else {
final String id = fields.uid().id();
final String type = fields.uid().type();
final Term uid = new Term(IdFieldMapper.NAME, Uid.encodeId(id));
if (isTombstone) {
op = new Translog.Delete(type, id, uid, seqNo, primaryTerm, version);
assert assertDocSoftDeleted(leaf.reader(), segmentDocID) : "Delete op but soft_deletes field is not set [" + op + "]";
} else {
final BytesReference source = fields.source();
if (source == null) {
// TODO: Callers should ask for the range that source should be retained. Thus we should always
// check for the existence source once we make peer-recovery to send ops after the local checkpoint.
if (requiredFullRange) {
throw new MissingHistoryOperationsException(
"source not found for seqno=" + seqNo + " from_seqno=" + fromSeqNo + " to_seqno=" + toSeqNo
);
} else {
skippedOperations++;
return null;
}
}
// TODO: pass the latest timestamp from engine.
final long autoGeneratedIdTimestamp = -1;
op = new Translog.Index(
type,
id,
seqNo,
primaryTerm,
version,
source.toBytesRef().bytes,
fields.routing(),
autoGeneratedIdTimestamp
);
}
}
assert fromSeqNo <= op.seqNo() && op.seqNo() <= toSeqNo && lastSeenSeqNo < op.seqNo()
: "Unexpected operation; "
+ "last_seen_seqno ["
+ lastSeenSeqNo
+ "], from_seqno ["
+ fromSeqNo
+ "], to_seqno ["
+ toSeqNo
+ "], op ["
+ op
+ "]";
return op;
}
private boolean assertDocSoftDeleted(LeafReader leafReader, int segmentDocId) throws IOException {
final NumericDocValues ndv = leafReader.getNumericDocValues(Lucene.SOFT_DELETES_FIELD);
if (ndv == null || ndv.advanceExact(segmentDocId) == false) {
throw new IllegalStateException("DocValues for field [" + Lucene.SOFT_DELETES_FIELD + "] is not found");
}
return ndv.longValue() == 1;
}
private static final class ParallelArray {
final LeafReaderContext[] leafReaderContexts;
final long[] version;
final long[] seqNo;
final long[] primaryTerm;
final boolean[] isTombStone;
final boolean[] hasRecoverySource;
boolean useSequentialStoredFieldsReader = false;
ParallelArray(int size) {
version = new long[size];
seqNo = new long[size];
primaryTerm = new long[size];
isTombStone = new boolean[size];
hasRecoverySource = new boolean[size];
leafReaderContexts = new LeafReaderContext[size];
}
}
// for testing
boolean useSequentialStoredFieldsReader() {
return storedFieldsReader != null;
}
}