org.elasticsearch.common.lucene.uid.PerThreadIDVersionAndSeqNoLookup Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of elasticsearch Show documentation
Show all versions of elasticsearch Show documentation
Elasticsearch subproject :server
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/
package org.elasticsearch.common.lucene.uid;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.lucene.uid.VersionsAndSeqNoResolver.DocIdAndSeqNo;
import org.elasticsearch.common.lucene.uid.VersionsAndSeqNoResolver.DocIdAndVersion;
import org.elasticsearch.index.mapper.SeqNoFieldMapper;
import org.elasticsearch.index.mapper.VersionFieldMapper;
import java.io.IOException;
import static org.elasticsearch.index.seqno.SequenceNumbers.UNASSIGNED_PRIMARY_TERM;
import static org.elasticsearch.index.seqno.SequenceNumbers.UNASSIGNED_SEQ_NO;
/** Utility class to do efficient primary-key (only 1 doc contains the
* given term) lookups by segment, re-using the enums. This class is
* not thread safe, so it is the caller's job to create and use one
* instance of this per thread. Do not use this if a term may appear
* in more than one document! It will only return the first one it
* finds.
* This class uses live docs, so it should be cached based on the
* {@link org.apache.lucene.index.IndexReader#getReaderCacheHelper() reader cache helper}
* rather than the {@link LeafReader#getCoreCacheHelper() core cache helper}.
*/
final class PerThreadIDVersionAndSeqNoLookup {
// TODO: do we really need to store all this stuff? some if it might not speed up anything.
// we keep it around for now, to reduce the amount of e.g. hash lookups by field and stuff
/** terms enum for uid field */
final String uidField;
private final TermsEnum termsEnum;
/** Reused for iteration (when the term exists) */
private PostingsEnum docsEnum;
/** used for assertions to make sure class usage meets assumptions */
private final Object readerKey;
/**
* Initialize lookup for the provided segment
*/
PerThreadIDVersionAndSeqNoLookup(LeafReader reader, String uidField, boolean trackReaderKey) throws IOException {
this.uidField = uidField;
final Terms terms = reader.terms(uidField);
if (terms == null) {
// If a segment contains only no-ops, it does not have _uid but has both _soft_deletes and _tombstone fields.
final NumericDocValues softDeletesDV = reader.getNumericDocValues(Lucene.SOFT_DELETES_FIELD);
final NumericDocValues tombstoneDV = reader.getNumericDocValues(SeqNoFieldMapper.TOMBSTONE_NAME);
// this is a special case when we pruned away all IDs in a segment since all docs are deleted.
final boolean allDocsDeleted = (softDeletesDV != null && reader.numDocs() == 0);
if ((softDeletesDV == null || tombstoneDV == null) && allDocsDeleted == false) {
throw new IllegalArgumentException(
"reader does not have _uid terms but not a no-op segment; "
+ "_soft_deletes ["
+ softDeletesDV
+ "], _tombstone ["
+ tombstoneDV
+ "]"
);
}
termsEnum = null;
} else {
termsEnum = terms.iterator();
}
if (reader.getNumericDocValues(VersionFieldMapper.NAME) == null) {
throw new IllegalArgumentException("reader misses the [" + VersionFieldMapper.NAME + "] field; _uid terms [" + terms + "]");
}
Object readerKey = null;
assert trackReaderKey ? (readerKey = reader.getCoreCacheHelper().getKey()) != null : readerKey == null;
this.readerKey = readerKey;
}
PerThreadIDVersionAndSeqNoLookup(LeafReader reader, String uidField) throws IOException {
this(reader, uidField, true);
}
/** Return null if id is not found.
* We pass the {@link LeafReaderContext} as an argument so that things
* still work with reader wrappers that hide some documents while still
* using the same cache key. Otherwise we'd have to disable caching
* entirely for these readers.
*/
public DocIdAndVersion lookupVersion(BytesRef id, boolean loadSeqNo, LeafReaderContext context) throws IOException {
assert readerKey == null || context.reader().getCoreCacheHelper().getKey().equals(readerKey)
: "context's reader is not the same as the reader class was initialized on.";
int docID = getDocID(id, context);
if (docID != DocIdSetIterator.NO_MORE_DOCS) {
final long seqNo;
final long term;
if (loadSeqNo) {
seqNo = readNumericDocValues(context.reader(), SeqNoFieldMapper.NAME, docID);
term = readNumericDocValues(context.reader(), SeqNoFieldMapper.PRIMARY_TERM_NAME, docID);
} else {
seqNo = UNASSIGNED_SEQ_NO;
term = UNASSIGNED_PRIMARY_TERM;
}
final long version = readNumericDocValues(context.reader(), VersionFieldMapper.NAME, docID);
return new DocIdAndVersion(docID, version, seqNo, term, context.reader(), context.docBase);
} else {
return null;
}
}
/**
* returns the internal lucene doc id for the given id bytes.
* {@link DocIdSetIterator#NO_MORE_DOCS} is returned if not found
* */
private int getDocID(BytesRef id, LeafReaderContext context) throws IOException {
// termsEnum can possibly be null here if this leaf contains only no-ops.
if (termsEnum != null && termsEnum.seekExact(id)) {
final Bits liveDocs = context.reader().getLiveDocs();
int docID = DocIdSetIterator.NO_MORE_DOCS;
// there may be more than one matching docID, in the case of nested docs, so we want the last one:
docsEnum = termsEnum.postings(docsEnum, 0);
for (int d = docsEnum.nextDoc(); d != DocIdSetIterator.NO_MORE_DOCS; d = docsEnum.nextDoc()) {
if (liveDocs != null && liveDocs.get(d) == false) {
continue;
}
docID = d;
}
return docID;
} else {
return DocIdSetIterator.NO_MORE_DOCS;
}
}
private static long readNumericDocValues(LeafReader reader, String field, int docId) throws IOException {
final NumericDocValues dv = reader.getNumericDocValues(field);
if (dv == null || dv.advanceExact(docId) == false) {
assert false : "document [" + docId + "] does not have docValues for [" + field + "]";
throw new IllegalStateException("document [" + docId + "] does not have docValues for [" + field + "]");
}
return dv.longValue();
}
/** Return null if id is not found. */
DocIdAndSeqNo lookupSeqNo(BytesRef id, LeafReaderContext context) throws IOException {
assert readerKey == null || context.reader().getCoreCacheHelper().getKey().equals(readerKey)
: "context's reader is not the same as the reader class was initialized on.";
final int docID = getDocID(id, context);
if (docID != DocIdSetIterator.NO_MORE_DOCS) {
final long seqNo = readNumericDocValues(context.reader(), SeqNoFieldMapper.NAME, docID);
return new DocIdAndSeqNo(docID, seqNo, context);
} else {
return null;
}
}
}