org.apache.lucene.codecs.memory.DirectDocValuesProducer Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.codecs.memory;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicLong;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.RandomAccessOrds;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.Accountables;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.RamUsageEstimator;
/**
* Reader for {@link DirectDocValuesFormat}
*/
class DirectDocValuesProducer extends DocValuesProducer {
// metadata maps (just file pointers and minimal stuff)
private final Map numerics = new HashMap<>();
private final Map binaries = new HashMap<>();
private final Map sorteds = new HashMap<>();
private final Map sortedSets = new HashMap<>();
private final Map sortedNumerics = new HashMap<>();
private final IndexInput data;
// ram instances we have already loaded
private final Map numericInstances = new HashMap<>();
private final Map binaryInstances = new HashMap<>();
private final Map sortedInstances = new HashMap<>();
private final Map sortedSetInstances = new HashMap<>();
private final Map sortedNumericInstances = new HashMap<>();
private final Map docsWithFieldInstances = new HashMap<>();
private final int numEntries;
private final int maxDoc;
private final AtomicLong ramBytesUsed;
private final int version;
private final boolean merging;
static final byte NUMBER = 0;
static final byte BYTES = 1;
static final byte SORTED = 2;
static final byte SORTED_SET = 3;
static final byte SORTED_SET_SINGLETON = 4;
static final byte SORTED_NUMERIC = 5;
static final byte SORTED_NUMERIC_SINGLETON = 6;
static final int VERSION_START = 3;
static final int VERSION_CURRENT = VERSION_START;
// clone for merge: when merging we don't do any instances.put()s
DirectDocValuesProducer(DirectDocValuesProducer original) throws IOException {
assert Thread.holdsLock(original);
numerics.putAll(original.numerics);
binaries.putAll(original.binaries);
sorteds.putAll(original.sorteds);
sortedSets.putAll(original.sortedSets);
sortedNumerics.putAll(original.sortedNumerics);
data = original.data.clone();
numericInstances.putAll(original.numericInstances);
binaryInstances.putAll(original.binaryInstances);
sortedInstances.putAll(original.sortedInstances);
sortedSetInstances.putAll(original.sortedSetInstances);
sortedNumericInstances.putAll(original.sortedNumericInstances);
docsWithFieldInstances.putAll(original.docsWithFieldInstances);
numEntries = original.numEntries;
maxDoc = original.maxDoc;
ramBytesUsed = new AtomicLong(original.ramBytesUsed.get());
version = original.version;
merging = true;
}
DirectDocValuesProducer(SegmentReadState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException {
maxDoc = state.segmentInfo.maxDoc();
merging = false;
String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension);
// read in the entries from the metadata file.
ChecksumIndexInput in = state.directory.openChecksumInput(metaName, state.context);
ramBytesUsed = new AtomicLong(RamUsageEstimator.shallowSizeOfInstance(getClass()));
boolean success = false;
try {
version = CodecUtil.checkIndexHeader(in, metaCodec, VERSION_START, VERSION_CURRENT,
state.segmentInfo.getId(), state.segmentSuffix);
numEntries = readFields(in, state.fieldInfos);
CodecUtil.checkFooter(in);
success = true;
} finally {
if (success) {
IOUtils.close(in);
} else {
IOUtils.closeWhileHandlingException(in);
}
}
String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension);
this.data = state.directory.openInput(dataName, state.context);
success = false;
try {
final int version2 = CodecUtil.checkIndexHeader(data, dataCodec, VERSION_START, VERSION_CURRENT,
state.segmentInfo.getId(), state.segmentSuffix);
if (version != version2) {
throw new CorruptIndexException("Format versions mismatch: meta=" + version + ", data=" + version2, data);
}
// NOTE: data file is too costly to verify checksum against all the bytes on open,
// but for now we at least verify proper structure of the checksum footer: which looks
// for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
// such as file truncation.
CodecUtil.retrieveChecksum(data);
success = true;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(this.data);
}
}
}
private NumericEntry readNumericEntry(IndexInput meta) throws IOException {
NumericEntry entry = new NumericEntry();
entry.offset = meta.readLong();
entry.count = meta.readInt();
entry.missingOffset = meta.readLong();
if (entry.missingOffset != -1) {
entry.missingBytes = meta.readLong();
} else {
entry.missingBytes = 0;
}
entry.byteWidth = meta.readByte();
return entry;
}
private BinaryEntry readBinaryEntry(IndexInput meta) throws IOException {
BinaryEntry entry = new BinaryEntry();
entry.offset = meta.readLong();
entry.numBytes = meta.readInt();
entry.count = meta.readInt();
entry.missingOffset = meta.readLong();
if (entry.missingOffset != -1) {
entry.missingBytes = meta.readLong();
} else {
entry.missingBytes = 0;
}
return entry;
}
private SortedEntry readSortedEntry(IndexInput meta) throws IOException {
SortedEntry entry = new SortedEntry();
entry.docToOrd = readNumericEntry(meta);
entry.values = readBinaryEntry(meta);
return entry;
}
private SortedSetEntry readSortedSetEntry(IndexInput meta, boolean singleton) throws IOException {
SortedSetEntry entry = new SortedSetEntry();
if (singleton == false) {
entry.docToOrdAddress = readNumericEntry(meta);
}
entry.ords = readNumericEntry(meta);
entry.values = readBinaryEntry(meta);
return entry;
}
private SortedNumericEntry readSortedNumericEntry(IndexInput meta, boolean singleton) throws IOException {
SortedNumericEntry entry = new SortedNumericEntry();
if (singleton == false) {
entry.docToAddress = readNumericEntry(meta);
}
entry.values = readNumericEntry(meta);
return entry;
}
private int readFields(IndexInput meta, FieldInfos infos) throws IOException {
int numEntries = 0;
int fieldNumber = meta.readVInt();
while (fieldNumber != -1) {
numEntries++;
FieldInfo info = infos.fieldInfo(fieldNumber);
int fieldType = meta.readByte();
if (fieldType == NUMBER) {
numerics.put(info.name, readNumericEntry(meta));
} else if (fieldType == BYTES) {
binaries.put(info.name, readBinaryEntry(meta));
} else if (fieldType == SORTED) {
SortedEntry entry = readSortedEntry(meta);
sorteds.put(info.name, entry);
binaries.put(info.name, entry.values);
} else if (fieldType == SORTED_SET) {
SortedSetEntry entry = readSortedSetEntry(meta, false);
sortedSets.put(info.name, entry);
binaries.put(info.name, entry.values);
} else if (fieldType == SORTED_SET_SINGLETON) {
SortedSetEntry entry = readSortedSetEntry(meta, true);
sortedSets.put(info.name, entry);
binaries.put(info.name, entry.values);
} else if (fieldType == SORTED_NUMERIC) {
SortedNumericEntry entry = readSortedNumericEntry(meta, false);
sortedNumerics.put(info.name, entry);
} else if (fieldType == SORTED_NUMERIC_SINGLETON) {
SortedNumericEntry entry = readSortedNumericEntry(meta, true);
sortedNumerics.put(info.name, entry);
} else {
throw new CorruptIndexException("invalid entry type: " + fieldType + ", field= " + info.name, meta);
}
fieldNumber = meta.readVInt();
}
return numEntries;
}
@Override
public long ramBytesUsed() {
return ramBytesUsed.get();
}
@Override
public synchronized Collection getChildResources() {
List resources = new ArrayList<>();
resources.addAll(Accountables.namedAccountables("numeric field", numericInstances));
resources.addAll(Accountables.namedAccountables("binary field", binaryInstances));
resources.addAll(Accountables.namedAccountables("sorted field", sortedInstances));
resources.addAll(Accountables.namedAccountables("sorted set field", sortedSetInstances));
resources.addAll(Accountables.namedAccountables("sorted numeric field", sortedNumericInstances));
resources.addAll(Accountables.namedAccountables("missing bitset field", docsWithFieldInstances));
return Collections.unmodifiableList(resources);
}
@Override
public String toString() {
return getClass().getSimpleName() + "(entries=" + numEntries + ")";
}
@Override
public void checkIntegrity() throws IOException {
CodecUtil.checksumEntireFile(data.clone());
}
@Override
public synchronized NumericDocValues getNumeric(FieldInfo field) throws IOException {
NumericRawValues instance = numericInstances.get(field.name);
if (instance == null) {
// Lazy load
instance = loadNumeric(numerics.get(field.name));
if (!merging) {
numericInstances.put(field.name, instance);
ramBytesUsed.addAndGet(instance.ramBytesUsed());
}
}
return instance.numerics;
}
private NumericRawValues loadNumeric(NumericEntry entry) throws IOException {
NumericRawValues ret = new NumericRawValues();
IndexInput data = this.data.clone();
data.seek(entry.offset + entry.missingBytes);
switch (entry.byteWidth) {
case 1:
{
final byte[] values = new byte[entry.count];
data.readBytes(values, 0, entry.count);
ret.bytesUsed = RamUsageEstimator.sizeOf(values);
ret.numerics = new NumericDocValues() {
@Override
public long get(int idx) {
return values[idx];
}
};
return ret;
}
case 2:
{
final short[] values = new short[entry.count];
for(int i=0;i> 3];
for (int i = 0; i < bits.length; i++) {
bits[i] = data.readLong();
}
instance = new FixedBitSet(bits, maxDoc);
if (!merging) {
docsWithFieldInstances.put(field.name, instance);
ramBytesUsed.addAndGet(instance.ramBytesUsed());
}
}
}
return instance;
}
}
@Override
public Bits getDocsWithField(FieldInfo field) throws IOException {
switch(field.getDocValuesType()) {
case SORTED_SET:
return DocValues.docsWithValue(getSortedSet(field), maxDoc);
case SORTED_NUMERIC:
return DocValues.docsWithValue(getSortedNumeric(field), maxDoc);
case SORTED:
return DocValues.docsWithValue(getSorted(field), maxDoc);
case BINARY:
BinaryEntry be = binaries.get(field.name);
return getMissingBits(field, be.missingOffset, be.missingBytes);
case NUMERIC:
NumericEntry ne = numerics.get(field.name);
return getMissingBits(field, ne.missingOffset, ne.missingBytes);
default:
throw new AssertionError();
}
}
@Override
public synchronized DocValuesProducer getMergeInstance() throws IOException {
return new DirectDocValuesProducer(this);
}
@Override
public void close() throws IOException {
data.close();
}
static class BinaryRawValues implements Accountable {
byte[] bytes;
int[] address;
@Override
public long ramBytesUsed() {
long bytesUsed = RamUsageEstimator.sizeOf(bytes);
if (address != null) {
bytesUsed += RamUsageEstimator.sizeOf(address);
}
return bytesUsed;
}
@Override
public Collection getChildResources() {
List resources = new ArrayList<>();
if (address != null) {
resources.add(Accountables.namedAccountable("addresses", RamUsageEstimator.sizeOf(address)));
}
resources.add(Accountables.namedAccountable("bytes", RamUsageEstimator.sizeOf(bytes)));
return Collections.unmodifiableList(resources);
}
@Override
public String toString() {
return getClass().getSimpleName();
}
}
static class NumericRawValues implements Accountable {
NumericDocValues numerics;
long bytesUsed;
@Override
public long ramBytesUsed() {
return bytesUsed;
}
@Override
public String toString() {
return getClass().getSimpleName();
}
}
static class SortedRawValues implements Accountable {
NumericRawValues docToOrd;
@Override
public long ramBytesUsed() {
return docToOrd.ramBytesUsed();
}
@Override
public Collection getChildResources() {
return docToOrd.getChildResources();
}
@Override
public String toString() {
return getClass().getSimpleName();
}
}
static class SortedNumericRawValues implements Accountable {
NumericRawValues docToAddress;
NumericRawValues values;
@Override
public long ramBytesUsed() {
long bytesUsed = values.ramBytesUsed();
if (docToAddress != null) {
bytesUsed += docToAddress.ramBytesUsed();
}
return bytesUsed;
}
@Override
public Collection getChildResources() {
List resources = new ArrayList<>();
if (docToAddress != null) {
resources.add(Accountables.namedAccountable("addresses", docToAddress));
}
resources.add(Accountables.namedAccountable("values", values));
return Collections.unmodifiableList(resources);
}
@Override
public String toString() {
return getClass().getSimpleName();
}
}
static class SortedSetRawValues implements Accountable {
NumericRawValues docToOrdAddress;
NumericRawValues ords;
@Override
public long ramBytesUsed() {
long bytesUsed = ords.ramBytesUsed();
if (docToOrdAddress != null) {
bytesUsed += docToOrdAddress.ramBytesUsed();
}
return bytesUsed;
}
@Override
public Collection getChildResources() {
List resources = new ArrayList<>();
if (docToOrdAddress != null) {
resources.add(Accountables.namedAccountable("addresses", docToOrdAddress));
}
resources.add(Accountables.namedAccountable("ordinals", ords));
return Collections.unmodifiableList(resources);
}
@Override
public String toString() {
return getClass().getSimpleName();
}
}
static class NumericEntry {
long offset;
int count;
long missingOffset;
long missingBytes;
byte byteWidth;
int packedIntsVersion;
}
static class BinaryEntry {
long offset;
long missingOffset;
long missingBytes;
int count;
int numBytes;
int minLength;
int maxLength;
int packedIntsVersion;
int blockSize;
}
static class SortedEntry {
NumericEntry docToOrd;
BinaryEntry values;
}
static class SortedSetEntry {
NumericEntry docToOrdAddress;
NumericEntry ords;
BinaryEntry values;
}
static class SortedNumericEntry {
NumericEntry docToAddress;
NumericEntry values;
}
static class FSTEntry {
long offset;
long numOrds;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy