
org.apache.lucene.index.SegmentReader Maven / Gradle / Ivy
Show all versions of org.apache.servicemix.bundles.lucene
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.index;
import java.io.IOException;
import java.util.Set;
import java.util.concurrent.CopyOnWriteArraySet;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.codecs.FieldInfosFormat;
import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.codecs.KnnVectorsReader;
import org.apache.lucene.codecs.NormsProducer;
import org.apache.lucene.codecs.PointsReader;
import org.apache.lucene.codecs.StoredFieldsReader;
import org.apache.lucene.codecs.TermVectorsReader;
import org.apache.lucene.internal.hppc.LongArrayList;
import org.apache.lucene.internal.tests.SegmentReaderAccess;
import org.apache.lucene.internal.tests.TestSecrets;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.IOUtils;
/**
* IndexReader implementation over a single segment.
*
* Instances pointing to the same segment (but with different deletes, etc) may share the same
* core data.
*
* @lucene.experimental
*/
public final class SegmentReader extends CodecReader {
private final SegmentCommitInfo si;
// this is the original SI that IW uses internally but it's mutated behind the scenes
// and we don't want this SI to be used for anything. Yet, IW needs this to do maintainance
// and lookup pooled readers etc.
private final SegmentCommitInfo originalSi;
private final LeafMetaData metaData;
private final Bits liveDocs;
private final Bits hardLiveDocs;
// Normally set to si.maxDoc - si.delDocCount, unless we
// were created as an NRT reader from IW, in which case IW
// tells us the number of live docs:
private final int numDocs;
final SegmentCoreReaders core;
final SegmentDocValues segDocValues;
/**
* True if we are holding RAM only liveDocs or DV updates, i.e. the SegmentCommitInfo delGen
* doesn't match our liveDocs.
*/
final boolean isNRT;
final DocValuesProducer docValuesProducer;
final FieldInfos fieldInfos;
/**
* Constructs a new SegmentReader with a new core.
*
* @throws CorruptIndexException if the index is corrupt
* @throws IOException if there is a low-level IO error
*/
SegmentReader(SegmentCommitInfo si, int createdVersionMajor, IOContext context)
throws IOException {
this.si = si.clone();
this.originalSi = si;
this.metaData =
new LeafMetaData(
createdVersionMajor,
si.info.getMinVersion(),
si.info.getIndexSort(),
si.info.getHasBlocks());
// We pull liveDocs/DV updates from disk:
this.isNRT = false;
core = new SegmentCoreReaders(si.info.dir, si, context);
segDocValues = new SegmentDocValues();
boolean success = false;
final Codec codec = si.info.getCodec();
try {
if (si.hasDeletions()) {
// NOTE: the bitvector is stored using the regular directory, not cfs
hardLiveDocs =
liveDocs = codec.liveDocsFormat().readLiveDocs(directory(), si, IOContext.READONCE);
} else {
assert si.getDelCount() == 0;
hardLiveDocs = liveDocs = null;
}
numDocs = si.info.maxDoc() - si.getDelCount();
fieldInfos = initFieldInfos();
docValuesProducer = initDocValuesProducer();
assert assertLiveDocs(isNRT, hardLiveDocs, liveDocs);
success = true;
} finally {
// With lock-less commits, it's entirely possible (and
// fine) to hit a FileNotFound exception above. In
// this case, we want to explicitly close any subset
// of things that were opened so that we don't have to
// wait for a GC to do so.
if (!success) {
doClose();
}
}
}
/**
* Create new SegmentReader sharing core from a previous SegmentReader and using the provided
* liveDocs, and recording whether those liveDocs were carried in ram (isNRT=true).
*/
SegmentReader(
SegmentCommitInfo si,
SegmentReader sr,
Bits liveDocs,
Bits hardLiveDocs,
int numDocs,
boolean isNRT)
throws IOException {
if (numDocs > si.info.maxDoc()) {
throw new IllegalArgumentException("numDocs=" + numDocs + " but maxDoc=" + si.info.maxDoc());
}
if (liveDocs != null && liveDocs.length() != si.info.maxDoc()) {
throw new IllegalArgumentException(
"maxDoc=" + si.info.maxDoc() + " but liveDocs.size()=" + liveDocs.length());
}
this.si = si.clone();
this.originalSi = si;
this.metaData = sr.getMetaData();
this.liveDocs = liveDocs;
this.hardLiveDocs = hardLiveDocs;
assert assertLiveDocs(isNRT, hardLiveDocs, liveDocs);
this.isNRT = isNRT;
this.numDocs = numDocs;
this.core = sr.core;
core.incRef();
this.segDocValues = sr.segDocValues;
boolean success = false;
try {
fieldInfos = initFieldInfos();
docValuesProducer = initDocValuesProducer();
success = true;
} finally {
if (!success) {
doClose();
}
}
}
private static boolean assertLiveDocs(boolean isNRT, Bits hardLiveDocs, Bits liveDocs) {
if (isNRT) {
assert hardLiveDocs == null || liveDocs != null
: " liveDocs must be non null if hardLiveDocs are non null";
} else {
assert hardLiveDocs == liveDocs : "non-nrt case must have identical liveDocs";
}
return true;
}
/** init most recent DocValues for the current commit */
private DocValuesProducer initDocValuesProducer() throws IOException {
if (fieldInfos.hasDocValues() == false) {
return null;
} else {
Directory dir;
if (core.cfsReader != null) {
dir = core.cfsReader;
} else {
dir = si.info.dir;
}
if (si.hasFieldUpdates()) {
return new SegmentDocValuesProducer(si, dir, core.coreFieldInfos, fieldInfos, segDocValues);
} else {
// simple case, no DocValues updates
return segDocValues.getDocValuesProducer(-1L, si, dir, fieldInfos);
}
}
}
/** init most recent FieldInfos for the current commit */
private FieldInfos initFieldInfos() throws IOException {
if (!si.hasFieldUpdates()) {
return core.coreFieldInfos;
} else {
// updates always outside of CFS
FieldInfosFormat fisFormat = si.info.getCodec().fieldInfosFormat();
final String segmentSuffix = Long.toString(si.getFieldInfosGen(), Character.MAX_RADIX);
return fisFormat.read(si.info.dir, si.info, segmentSuffix, IOContext.READONCE);
}
}
@Override
public Bits getLiveDocs() {
ensureOpen();
return liveDocs;
}
@Override
protected void doClose() throws IOException {
// System.out.println("SR.close seg=" + si);
try {
core.decRef();
} finally {
if (docValuesProducer instanceof SegmentDocValuesProducer) {
segDocValues.decRef(((SegmentDocValuesProducer) docValuesProducer).dvGens);
} else if (docValuesProducer != null) {
segDocValues.decRef(LongArrayList.from(-1L));
}
}
}
@Override
public FieldInfos getFieldInfos() {
ensureOpen();
return fieldInfos;
}
@Override
public int numDocs() {
// Don't call ensureOpen() here (it could affect performance)
return numDocs;
}
@Override
public int maxDoc() {
// Don't call ensureOpen() here (it could affect performance)
return si.info.maxDoc();
}
@Override
public TermVectorsReader getTermVectorsReader() {
ensureOpen();
if (core.termVectorsReaderOrig == null) {
return null;
} else {
return core.termVectorsReaderOrig.clone();
}
}
@Override
public StoredFieldsReader getFieldsReader() {
ensureOpen();
return core.fieldsReaderOrig.clone();
}
@Override
public PointsReader getPointsReader() {
ensureOpen();
return core.pointsReader;
}
@Override
public NormsProducer getNormsReader() {
ensureOpen();
return core.normsProducer;
}
@Override
public DocValuesProducer getDocValuesReader() {
ensureOpen();
return docValuesProducer;
}
@Override
public KnnVectorsReader getVectorReader() {
return core.knnVectorsReader;
}
@Override
public FieldsProducer getPostingsReader() {
ensureOpen();
return core.fields;
}
@Override
public String toString() {
// SegmentInfo.toString takes dir and number of
// *pending* deletions; so we reverse compute that here:
return si.toString(si.info.maxDoc() - numDocs - si.getDelCount());
}
/** Return the name of the segment this reader is reading. */
public String getSegmentName() {
return si.info.name;
}
/** Return the SegmentInfoPerCommit of the segment this reader is reading. */
public SegmentCommitInfo getSegmentInfo() {
return si;
}
/** Returns the directory this index resides in. */
public Directory directory() {
// Don't ensureOpen here -- in certain cases, when a
// cloned/reopened reader needs to commit, it may call
// this method on the closed original reader
return si.info.dir;
}
private final Set readerClosedListeners = new CopyOnWriteArraySet<>();
@Override
protected void notifyReaderClosedListeners() throws IOException {
synchronized (readerClosedListeners) {
IOUtils.applyToAll(readerClosedListeners, l -> l.onClose(readerCacheHelper.getKey()));
}
}
private final IndexReader.CacheHelper readerCacheHelper =
new IndexReader.CacheHelper() {
private final IndexReader.CacheKey cacheKey = new IndexReader.CacheKey();
@Override
public CacheKey getKey() {
return cacheKey;
}
@Override
public void addClosedListener(ClosedListener listener) {
ensureOpen();
readerClosedListeners.add(listener);
}
};
@Override
public CacheHelper getReaderCacheHelper() {
return readerCacheHelper;
}
/**
* Wrap the cache helper of the core to add ensureOpen() calls that make sure users do not
* register closed listeners on closed indices.
*/
private final IndexReader.CacheHelper coreCacheHelper =
new IndexReader.CacheHelper() {
@Override
public CacheKey getKey() {
return core.getCacheHelper().getKey();
}
@Override
public void addClosedListener(ClosedListener listener) {
ensureOpen();
core.getCacheHelper().addClosedListener(listener);
}
};
@Override
public CacheHelper getCoreCacheHelper() {
return coreCacheHelper;
}
@Override
public LeafMetaData getMetaData() {
return metaData;
}
/**
* Returns the original SegmentInfo passed to the segment reader on creation time. {@link
* #getSegmentInfo()} returns a clone of this instance.
*/
SegmentCommitInfo getOriginalSegmentInfo() {
return originalSi;
}
/**
* Returns the live docs that are not hard-deleted. This is an expert API to be used with
* soft-deletes to filter out document that hard deleted for instance due to aborted documents or
* to distinguish soft and hard deleted documents ie. a rolled back tombstone.
*
* @lucene.experimental
*/
public Bits getHardLiveDocs() {
return hardLiveDocs;
}
@Override
public void checkIntegrity() throws IOException {
super.checkIntegrity();
if (core.cfsReader != null) {
core.cfsReader.checkIntegrity();
}
}
static {
TestSecrets.setSegmentReaderAccess(
new SegmentReaderAccess() {
@Override
public Object getCore(SegmentReader segmentReader) {
return segmentReader.core;
}
});
}
}