org.elasticsearch.index.fielddata.plain.PackedArrayIndexFieldData Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of elasticsearch Show documentation
Show all versions of elasticsearch Show documentation
Elasticsearch subproject :server
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.fielddata.plain;
import com.google.common.base.Preconditions;
import org.apache.lucene.index.*;
import org.apache.lucene.util.*;
import org.apache.lucene.util.BitSet;
import org.apache.lucene.util.packed.PackedInts;
import org.apache.lucene.util.packed.PackedLongValues;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.breaker.CircuitBreaker;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.fielddata.*;
import org.elasticsearch.index.fielddata.IndexFieldData.XFieldComparatorSource.Nested;
import org.elasticsearch.index.fielddata.fieldcomparator.LongValuesComparatorSource;
import org.elasticsearch.index.fielddata.ordinals.Ordinals;
import org.elasticsearch.index.fielddata.ordinals.OrdinalsBuilder;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.indices.breaker.CircuitBreakerService;
import org.elasticsearch.search.MultiValueMode;
import java.io.IOException;
import java.util.*;
/**
* Stores numeric data into bit-packed arrays for better memory efficiency.
*/
public class PackedArrayIndexFieldData extends AbstractIndexFieldData implements IndexNumericFieldData {
public static class Builder implements IndexFieldData.Builder {
private NumericType numericType;
public Builder setNumericType(NumericType numericType) {
this.numericType = numericType;
return this;
}
@Override
public IndexFieldData build(Index index, Settings indexSettings, MappedFieldType fieldType,
IndexFieldDataCache cache, CircuitBreakerService breakerService, MapperService mapperService) {
return new PackedArrayIndexFieldData(index, indexSettings, fieldType.names(), fieldType.fieldDataType(), cache, numericType, breakerService);
}
}
private final NumericType numericType;
private final CircuitBreakerService breakerService;
public PackedArrayIndexFieldData(Index index, Settings indexSettings, MappedFieldType.Names fieldNames,
FieldDataType fieldDataType, IndexFieldDataCache cache, NumericType numericType,
CircuitBreakerService breakerService) {
super(index, indexSettings, fieldNames, fieldDataType, cache);
Preconditions.checkNotNull(numericType);
Preconditions.checkArgument(EnumSet.of(NumericType.BOOLEAN, NumericType.BYTE, NumericType.SHORT, NumericType.INT, NumericType.LONG).contains(numericType), getClass().getSimpleName() + " only supports integer types, not " + numericType);
this.numericType = numericType;
this.breakerService = breakerService;
}
@Override
public NumericType getNumericType() {
return numericType;
}
@Override
public AtomicNumericFieldData loadDirect(LeafReaderContext context) throws Exception {
final LeafReader reader = context.reader();
Terms terms = reader.terms(getFieldNames().indexName());
AtomicNumericFieldData data = null;
PackedArrayEstimator estimator = new PackedArrayEstimator(breakerService.getBreaker(CircuitBreaker.FIELDDATA), getNumericType(), getFieldNames().fullName());
if (terms == null) {
data = AtomicLongFieldData.empty(reader.maxDoc());
estimator.adjustForNoTerms(data.ramBytesUsed());
return data;
}
// TODO: how can we guess the number of terms? numerics end up creating more terms per value...
// Lucene encodes numeric data so that the lexicographical (encoded) order matches the integer order so we know the sequence of
// longs is going to be monotonically increasing
final PackedLongValues.Builder valuesBuilder = PackedLongValues.monotonicBuilder(PackedInts.COMPACT);
final float acceptableTransientOverheadRatio = fieldDataType.getSettings().getAsFloat("acceptable_transient_overhead_ratio", OrdinalsBuilder.DEFAULT_ACCEPTABLE_OVERHEAD_RATIO);
TermsEnum termsEnum = estimator.beforeLoad(terms);
assert !getNumericType().isFloatingPoint();
boolean success = false;
try (OrdinalsBuilder builder = new OrdinalsBuilder(-1, reader.maxDoc(), acceptableTransientOverheadRatio)) {
BytesRefIterator iter = builder.buildFromTerms(termsEnum);
BytesRef term;
while ((term = iter.next()) != null) {
final long value = numericType.toLong(term);
valuesBuilder.add(value);
}
final PackedLongValues values = valuesBuilder.build();
final Ordinals build = builder.build(fieldDataType.getSettings());
CommonSettings.MemoryStorageFormat formatHint = CommonSettings.getMemoryStorageHint(fieldDataType);
RandomAccessOrds ordinals = build.ordinals();
if (FieldData.isMultiValued(ordinals) || formatHint == CommonSettings.MemoryStorageFormat.ORDINALS) {
final long ramBytesUsed = build.ramBytesUsed() + values.ramBytesUsed();
data = new AtomicLongFieldData(ramBytesUsed) {
@Override
public SortedNumericDocValues getLongValues() {
return withOrdinals(build, values, reader.maxDoc());
}
@Override
public Collection getChildResources() {
List resources = new ArrayList<>();
resources.add(Accountables.namedAccountable("ordinals", build));
resources.add(Accountables.namedAccountable("values", values));
return Collections.unmodifiableList(resources);
}
};
} else {
final BitSet docsWithValues = builder.buildDocsWithValuesSet();
long minV, maxV;
minV = maxV = 0;
if (values.size() > 0) {
minV = values.get(0);
maxV = values.get(values.size() - 1);
}
final float acceptableOverheadRatio = fieldDataType.getSettings().getAsFloat("acceptable_overhead_ratio", PackedInts.DEFAULT);
final int pageSize = fieldDataType.getSettings().getAsInt("single_value_page_size", 1024);
if (formatHint == null) {
formatHint = chooseStorageFormat(reader, values, build, ordinals, minV, maxV, acceptableOverheadRatio, pageSize);
}
logger.trace("single value format for field [{}] set to [{}]", getFieldNames().fullName(), formatHint);
switch (formatHint) {
case PACKED:
// Encode document without a value with a special value
long missingV = 0;
if (docsWithValues != null) {
if ((maxV - minV + 1) == values.size()) {
// values are dense
if (minV > Long.MIN_VALUE) {
missingV = --minV;
} else {
assert maxV != Long.MAX_VALUE;
missingV = ++maxV;
}
} else {
for (long i = 1; i < values.size(); ++i) {
if (values.get(i) > values.get(i - 1) + 1) {
missingV = values.get(i - 1) + 1;
break;
}
}
}
missingV -= minV;
}
final long missingValue = missingV;
final long minValue = minV;
final long maxValue = maxV;
final long valuesDelta = maxValue - minValue;
int bitsRequired = valuesDelta < 0 ? 64 : PackedInts.bitsRequired(valuesDelta);
final PackedInts.Mutable sValues = PackedInts.getMutable(reader.maxDoc(), bitsRequired, acceptableOverheadRatio);
if (docsWithValues != null) {
sValues.fill(0, sValues.size(), missingV);
}
for (int i = 0; i < reader.maxDoc(); i++) {
ordinals.setDocument(i);
if (ordinals.cardinality() > 0) {
final long ord = ordinals.ordAt(0);
long value = values.get(ord);
sValues.set(i, value - minValue);
}
}
long ramBytesUsed = values.ramBytesUsed() + (docsWithValues == null ? 0 : docsWithValues.ramBytesUsed());
data = new AtomicLongFieldData(ramBytesUsed) {
@Override
public SortedNumericDocValues getLongValues() {
if (docsWithValues == null) {
return singles(sValues, minValue);
} else {
return sparseSingles(sValues, minValue, missingValue, reader.maxDoc());
}
}
@Override
public Collection getChildResources() {
List resources = new ArrayList<>();
resources.add(Accountables.namedAccountable("values", sValues));
if (docsWithValues != null) {
resources.add(Accountables.namedAccountable("missing bitset", docsWithValues));
}
return Collections.unmodifiableList(resources);
}
};
break;
case PAGED:
final PackedLongValues.Builder dpValues = PackedLongValues.deltaPackedBuilder(pageSize, acceptableOverheadRatio);
long lastValue = 0;
for (int i = 0; i < reader.maxDoc(); i++) {
ordinals.setDocument(i);
if (ordinals.cardinality() > 0) {
final long ord = ordinals.ordAt(i);
lastValue = values.get(ord);
}
dpValues.add(lastValue);
}
final PackedLongValues pagedValues = dpValues.build();
ramBytesUsed = pagedValues.ramBytesUsed();
if (docsWithValues != null) {
ramBytesUsed += docsWithValues.ramBytesUsed();
}
data = new AtomicLongFieldData(ramBytesUsed) {
@Override
public SortedNumericDocValues getLongValues() {
return pagedSingles(pagedValues, docsWithValues);
}
@Override
public Collection getChildResources() {
List resources = new ArrayList<>();
resources.add(Accountables.namedAccountable("values", pagedValues));
if (docsWithValues != null) {
resources.add(Accountables.namedAccountable("missing bitset", docsWithValues));
}
return Collections.unmodifiableList(resources);
}
};
break;
case ORDINALS:
ramBytesUsed = build.ramBytesUsed() + values.ramBytesUsed();
data = new AtomicLongFieldData(ramBytesUsed) {
@Override
public SortedNumericDocValues getLongValues() {
return withOrdinals(build, values, reader.maxDoc());
}
@Override
public Collection getChildResources() {
List resources = new ArrayList<>();
resources.add(Accountables.namedAccountable("ordinals", build));
resources.add(Accountables.namedAccountable("values", values));
return Collections.unmodifiableList(resources);
}
};
break;
default:
throw new ElasticsearchException("unknown memory format: " + formatHint);
}
}
success = true;
return data;
} finally {
if (!success) {
// If something went wrong, unwind any current estimations we've made
estimator.afterLoad(termsEnum, 0);
} else {
// Adjust as usual, based on the actual size of the field data
estimator.afterLoad(termsEnum, data.ramBytesUsed());
}
}
}
protected CommonSettings.MemoryStorageFormat chooseStorageFormat(LeafReader reader, PackedLongValues values, Ordinals build, RandomAccessOrds ordinals,
long minValue, long maxValue, float acceptableOverheadRatio, int pageSize) {
CommonSettings.MemoryStorageFormat format;
// estimate memory usage for a single packed array
long packedDelta = maxValue - minValue + 1; // allow for a missing value
// valuesDelta can be negative if the difference between max and min values overflows the positive side of longs.
int bitsRequired = packedDelta < 0 ? 64 : PackedInts.bitsRequired(packedDelta);
PackedInts.FormatAndBits formatAndBits = PackedInts.fastestFormatAndBits(reader.maxDoc(), bitsRequired, acceptableOverheadRatio);
final long singleValuesSize = formatAndBits.format.longCount(PackedInts.VERSION_CURRENT, reader.maxDoc(), formatAndBits.bitsPerValue) * 8L;
// ordinal memory usage
final long ordinalsSize = build.ramBytesUsed() + values.ramBytesUsed();
// estimate the memory signature of paged packing
long pagedSingleValuesSize = (reader.maxDoc() / pageSize + 1) * RamUsageEstimator.NUM_BYTES_OBJECT_REF; // array of pages
int pageIndex = 0;
long pageMinOrdinal = Long.MAX_VALUE;
long pageMaxOrdinal = Long.MIN_VALUE;
for (int i = 1; i < reader.maxDoc(); ++i, pageIndex = (pageIndex + 1) % pageSize) {
ordinals.setDocument(i);
if (ordinals.cardinality() > 0) {
long ordinal = ordinals.ordAt(0);
pageMaxOrdinal = Math.max(ordinal, pageMaxOrdinal);
pageMinOrdinal = Math.min(ordinal, pageMinOrdinal);
}
if (pageIndex == pageSize - 1) {
// end of page, we now know enough to estimate memory usage
pagedSingleValuesSize += getPageMemoryUsage(values, acceptableOverheadRatio, pageSize, pageMinOrdinal, pageMaxOrdinal);
pageMinOrdinal = Long.MAX_VALUE;
pageMaxOrdinal = Long.MIN_VALUE;
}
}
if (pageIndex > 0) {
// last page estimation
pageIndex++;
pagedSingleValuesSize += getPageMemoryUsage(values, acceptableOverheadRatio, pageSize, pageMinOrdinal, pageMaxOrdinal);
}
if (ordinalsSize < singleValuesSize) {
if (ordinalsSize < pagedSingleValuesSize) {
format = CommonSettings.MemoryStorageFormat.ORDINALS;
} else {
format = CommonSettings.MemoryStorageFormat.PAGED;
}
} else {
if (pagedSingleValuesSize < singleValuesSize) {
format = CommonSettings.MemoryStorageFormat.PAGED;
} else {
format = CommonSettings.MemoryStorageFormat.PACKED;
}
}
return format;
}
private long getPageMemoryUsage(PackedLongValues values, float acceptableOverheadRatio, int pageSize, long pageMinOrdinal, long pageMaxOrdinal) {
int bitsRequired;
long pageMemorySize = 0;
PackedInts.FormatAndBits formatAndBits;
if (pageMaxOrdinal == Long.MIN_VALUE) {
// empty page - will use the null reader which just stores size
pageMemorySize += RamUsageEstimator.alignObjectSize(RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + RamUsageEstimator.NUM_BYTES_INT);
} else {
long pageMinValue = values.get(pageMinOrdinal);
long pageMaxValue = values.get(pageMaxOrdinal);
long pageDelta = pageMaxValue - pageMinValue;
if (pageDelta != 0) {
bitsRequired = pageDelta < 0 ? 64 : PackedInts.bitsRequired(pageDelta);
formatAndBits = PackedInts.fastestFormatAndBits(pageSize, bitsRequired, acceptableOverheadRatio);
pageMemorySize += formatAndBits.format.longCount(PackedInts.VERSION_CURRENT, pageSize, formatAndBits.bitsPerValue) * RamUsageEstimator.NUM_BYTES_LONG;
pageMemorySize += RamUsageEstimator.NUM_BYTES_LONG; // min value per page storage
} else {
// empty page
pageMemorySize += RamUsageEstimator.alignObjectSize(RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + RamUsageEstimator.NUM_BYTES_INT);
}
}
return pageMemorySize;
}
@Override
protected AtomicNumericFieldData empty(int maxDoc) {
return AtomicLongFieldData.empty(maxDoc);
}
@Override
public XFieldComparatorSource comparatorSource(@Nullable Object missingValue, MultiValueMode sortMode, Nested nested) {
return new LongValuesComparatorSource(this, missingValue, sortMode, nested);
}
/**
* Estimator that wraps numeric field data loading in a
* RamAccountingTermsEnum, adjusting the breaker after data has been
* loaded
*/
public class PackedArrayEstimator implements PerValueEstimator {
private final CircuitBreaker breaker;
private final NumericType type;
private final String fieldName;
public PackedArrayEstimator(CircuitBreaker breaker, NumericType type, String fieldName) {
this.breaker = breaker;
this.type = type;
this.fieldName = fieldName;
}
/**
* @return number of bytes per term, based on the NumericValue.requiredBits()
*/
@Override
public long bytesPerValue(BytesRef term) {
// Estimate about about 0.8 (8 / 10) compression ratio for
// numbers, but at least 4 bytes
return Math.max(type.requiredBits() / 10, 4);
}
/**
* @return A TermsEnum wrapped in a RamAccountingTermsEnum
*/
@Override
public TermsEnum beforeLoad(Terms terms) throws IOException {
return new RamAccountingTermsEnum(type.wrapTermsEnum(terms.iterator()), breaker, this, this.fieldName);
}
/**
* Adjusts the breaker based on the aggregated value from the RamAccountingTermsEnum
*
* @param termsEnum terms that were wrapped and loaded
* @param actualUsed actual field data memory usage
*/
@Override
public void afterLoad(TermsEnum termsEnum, long actualUsed) {
assert termsEnum instanceof RamAccountingTermsEnum;
long estimatedBytes = ((RamAccountingTermsEnum) termsEnum).getTotalBytes();
breaker.addWithoutBreaking(-(estimatedBytes - actualUsed));
}
/**
* Adjust the breaker when no terms were actually loaded, but the field
* data takes up space regardless. For instance, when ordinals are
* used.
*
* @param actualUsed bytes actually used
*/
public void adjustForNoTerms(long actualUsed) {
breaker.addWithoutBreaking(actualUsed);
}
}
private static SortedNumericDocValues withOrdinals(Ordinals ordinals, final LongValues values, int maxDoc) {
final RandomAccessOrds ords = ordinals.ordinals();
final SortedDocValues singleOrds = DocValues.unwrapSingleton(ords);
if (singleOrds != null) {
final NumericDocValues singleValues = new NumericDocValues() {
@Override
public long get(int docID) {
final int ord = singleOrds.getOrd(docID);
if (ord >= 0) {
return values.get(singleOrds.getOrd(docID));
} else {
return 0;
}
}
};
return DocValues.singleton(singleValues, DocValues.docsWithValue(ords, maxDoc));
} else {
return new SortedNumericDocValues() {
@Override
public long valueAt(int index) {
return values.get(ords.ordAt(index));
}
@Override
public void setDocument(int doc) {
ords.setDocument(doc);
}
@Override
public int count() {
return ords.cardinality();
}
};
}
}
private static SortedNumericDocValues singles(final NumericDocValues deltas, final long minValue) {
final NumericDocValues values;
if (minValue == 0) {
values = deltas;
} else {
values = new NumericDocValues() {
@Override
public long get(int docID) {
return minValue + deltas.get(docID);
}
};
}
return DocValues.singleton(values, null);
}
private static SortedNumericDocValues sparseSingles(final NumericDocValues deltas, final long minValue, final long missingValue, final int maxDoc) {
final NumericDocValues values = new NumericDocValues() {
@Override
public long get(int docID) {
final long delta = deltas.get(docID);
if (delta == missingValue) {
return 0;
}
return minValue + delta;
}
};
final Bits docsWithFields = new Bits() {
@Override
public boolean get(int index) {
return deltas.get(index) != missingValue;
}
@Override
public int length() {
return maxDoc;
}
};
return DocValues.singleton(values, docsWithFields);
}
private static SortedNumericDocValues pagedSingles(final PackedLongValues values, final Bits docsWithValue) {
return DocValues.singleton(new NumericDocValues() {
// we need to wrap since NumericDocValues must return 0 when a doc has no value
@Override
public long get(int docID) {
if (docsWithValue == null || docsWithValue.get(docID)) {
return values.get(docID);
} else {
return 0;
}
}
}, docsWithValue);
}
}