org.elasticsearch.index.fielddata.plain.ParentChildIndexFieldData Maven / Gradle / Ivy
The newest version!
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.fielddata.plain;
import com.carrotsearch.hppc.ObjectObjectHashMap;
import com.carrotsearch.hppc.cursors.ObjectObjectCursor;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.ImmutableSortedSet;
import org.apache.lucene.index.*;
import org.apache.lucene.index.MultiDocValues.OrdinalMap;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LongValues;
import org.apache.lucene.util.PagedBytes;
import org.apache.lucene.util.packed.PackedInts;
import org.apache.lucene.util.packed.PackedLongValues;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.Version;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.breaker.CircuitBreaker;
import org.elasticsearch.common.collect.ImmutableOpenMap;
import org.elasticsearch.common.lease.Releasable;
import org.elasticsearch.common.lease.Releasables;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.fielddata.*;
import org.elasticsearch.index.fielddata.IndexFieldData.XFieldComparatorSource.Nested;
import org.elasticsearch.index.fielddata.fieldcomparator.BytesRefFieldComparatorSource;
import org.elasticsearch.index.fielddata.ordinals.Ordinals;
import org.elasticsearch.index.fielddata.ordinals.OrdinalsBuilder;
import org.elasticsearch.index.mapper.DocumentMapper;
import org.elasticsearch.index.mapper.DocumentTypeListener;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.MappedFieldType.Names;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.index.mapper.internal.ParentFieldMapper;
import org.elasticsearch.index.mapper.internal.UidFieldMapper;
import org.elasticsearch.indices.breaker.CircuitBreakerService;
import org.elasticsearch.search.MultiValueMode;
import java.io.IOException;
import java.util.*;
import java.util.concurrent.TimeUnit;
/**
* ParentChildIndexFieldData is responsible for loading the id cache mapping
* needed for has_child and has_parent queries into memory.
*/
public class ParentChildIndexFieldData extends AbstractIndexFieldData implements IndexParentChildFieldData, DocumentTypeListener {
private final NavigableSet parentTypes;
private final CircuitBreakerService breakerService;
// If child type (a type with _parent field) is added or removed, we want to make sure modifications don't happen
// while loading.
private final Object lock = new Object();
public ParentChildIndexFieldData(Index index, Settings indexSettings, MappedFieldType.Names fieldNames,
FieldDataType fieldDataType, IndexFieldDataCache cache, MapperService mapperService,
CircuitBreakerService breakerService) {
super(index, indexSettings, fieldNames, fieldDataType, cache);
this.breakerService = breakerService;
if (Version.indexCreated(indexSettings).before(Version.V_2_0_0_beta1)) {
parentTypes = new TreeSet<>();
for (DocumentMapper documentMapper : mapperService.docMappers(false)) {
beforeCreate(documentMapper);
}
mapperService.addTypeListener(this);
} else {
ImmutableSortedSet.Builder builder = ImmutableSortedSet.naturalOrder();
for (DocumentMapper mapper : mapperService.docMappers(false)) {
ParentFieldMapper parentFieldMapper = mapper.parentFieldMapper();
if (parentFieldMapper.active()) {
builder.add(parentFieldMapper.type());
}
}
parentTypes = builder.build();
}
}
@Override
public XFieldComparatorSource comparatorSource(@Nullable Object missingValue, MultiValueMode sortMode, Nested nested) {
return new BytesRefFieldComparatorSource(this, missingValue, sortMode, nested);
}
@Override
public AtomicParentChildFieldData load(LeafReaderContext context) {
if (Version.indexCreated(indexSettings()).onOrAfter(Version.V_2_0_0_beta1)) {
final LeafReader reader = context.reader();
return new AbstractAtomicParentChildFieldData() {
public Set types() {
return parentTypes;
}
@Override
public SortedDocValues getOrdinalsValues(String type) {
try {
return DocValues.getSorted(reader, ParentFieldMapper.joinField(type));
} catch (IOException e) {
throw new IllegalStateException("cannot load join doc values field for type [" + type + "]", e);
}
}
@Override
public long ramBytesUsed() {
// unknown
return 0;
}
@Override
public Collection getChildResources() {
return Collections.emptyList();
}
@Override
public void close() throws ElasticsearchException {
}
};
} else {
try {
return cache.load(context, this);
} catch (Throwable e) {
if (e instanceof ElasticsearchException) {
throw (ElasticsearchException) e;
} else {
throw new ElasticsearchException(e.getMessage(), e);
}
}
}
}
@Override
public AbstractAtomicParentChildFieldData loadDirect(LeafReaderContext context) throws Exception {
// Make this method throw an UnsupportedOperationException in 3.0, only
// needed for indices created BEFORE 2.0
LeafReader reader = context.reader();
final float acceptableTransientOverheadRatio = fieldDataType.getSettings().getAsFloat(
"acceptable_transient_overhead_ratio", OrdinalsBuilder.DEFAULT_ACCEPTABLE_OVERHEAD_RATIO
);
final NavigableSet parentTypes = new TreeSet<>();
synchronized (lock) {
for (String parentType : this.parentTypes) {
parentTypes.add(new BytesRef(parentType));
}
}
boolean success = false;
ParentChildAtomicFieldData data = null;
ParentChildFilteredTermsEnum termsEnum = new ParentChildFilteredTermsEnum(
new ParentChildIntersectTermsEnum(reader, UidFieldMapper.NAME, ParentFieldMapper.NAME),
parentTypes
);
ParentChildEstimator estimator = new ParentChildEstimator(breakerService.getBreaker(CircuitBreaker.FIELDDATA), termsEnum);
TermsEnum estimatedTermsEnum = estimator.beforeLoad(null);
ObjectObjectHashMap typeBuilders = new ObjectObjectHashMap<>();
try {
try {
PostingsEnum docsEnum = null;
for (BytesRef term = estimatedTermsEnum.next(); term != null; term = estimatedTermsEnum.next()) {
// Usually this would be estimatedTermsEnum, but the
// abstract TermsEnum class does not support the .type()
// and .id() methods, so we skip using the wrapped
// TermsEnum and delegate directly to the
// ParentChildFilteredTermsEnum that was originally wrapped
String type = termsEnum.type();
TypeBuilder typeBuilder = typeBuilders.get(type);
if (typeBuilder == null) {
typeBuilders.put(type, typeBuilder = new TypeBuilder(acceptableTransientOverheadRatio, reader));
}
BytesRef id = termsEnum.id();
final long termOrd = typeBuilder.builder.nextOrdinal();
assert termOrd == typeBuilder.termOrdToBytesOffset.size();
typeBuilder.termOrdToBytesOffset.add(typeBuilder.bytes.copyUsingLengthPrefix(id));
docsEnum = estimatedTermsEnum.postings(docsEnum, PostingsEnum.NONE);
for (int docId = docsEnum.nextDoc(); docId != DocIdSetIterator.NO_MORE_DOCS; docId = docsEnum.nextDoc()) {
typeBuilder.builder.addDoc(docId);
}
}
ImmutableOpenMap.Builder typeToAtomicFieldData = ImmutableOpenMap.builder(typeBuilders.size());
for (ObjectObjectCursor cursor : typeBuilders) {
PagedBytes.Reader bytesReader = cursor.value.bytes.freeze(true);
final Ordinals ordinals = cursor.value.builder.build(fieldDataType.getSettings());
typeToAtomicFieldData.put(
cursor.key,
new PagedBytesAtomicFieldData(bytesReader, cursor.value.termOrdToBytesOffset.build(), ordinals)
);
}
data = new ParentChildAtomicFieldData(typeToAtomicFieldData.build());
} finally {
for (ObjectObjectCursor cursor : typeBuilders) {
cursor.value.builder.close();
}
}
success = true;
return data;
} finally {
if (success) {
estimator.afterLoad(estimatedTermsEnum, data.ramBytesUsed());
} else {
estimator.afterLoad(estimatedTermsEnum, 0);
}
}
}
@Override
public void beforeCreate(DocumentMapper mapper) {
// Remove in 3.0
synchronized (lock) {
ParentFieldMapper parentFieldMapper = mapper.parentFieldMapper();
if (parentFieldMapper.active()) {
// A _parent field can never be added to an existing mapping, so a _parent field either exists on
// a new created or doesn't exists. This is why we can update the known parent types via DocumentTypeListener
if (parentTypes.add(parentFieldMapper.type())) {
clear();
}
}
}
}
@Override
protected AtomicParentChildFieldData empty(int maxDoc) {
return new ParentChildAtomicFieldData(ImmutableOpenMap.of());
}
class TypeBuilder {
final PagedBytes bytes;
final PackedLongValues.Builder termOrdToBytesOffset;
final OrdinalsBuilder builder;
TypeBuilder(float acceptableTransientOverheadRatio, LeafReader reader) throws IOException {
bytes = new PagedBytes(15);
termOrdToBytesOffset = PackedLongValues.monotonicBuilder(PackedInts.COMPACT);
builder = new OrdinalsBuilder(-1, reader.maxDoc(), acceptableTransientOverheadRatio);
}
}
public static class Builder implements IndexFieldData.Builder {
@Override
public IndexFieldData> build(Index index, Settings indexSettings, MappedFieldType fieldType,
IndexFieldDataCache cache, CircuitBreakerService breakerService,
MapperService mapperService) {
return new ParentChildIndexFieldData(index, indexSettings, fieldType.names(), fieldType.fieldDataType(), cache,
mapperService, breakerService);
}
}
/**
* Estimator that wraps parent/child id field data by wrapping the data
* in a RamAccountingTermsEnum.
*/
public class ParentChildEstimator implements PerValueEstimator {
private final CircuitBreaker breaker;
private final TermsEnum filteredEnum;
// The TermsEnum is passed in here instead of being generated in the
// beforeLoad() function since it's filtered inside the previous
// TermsEnum wrappers
public ParentChildEstimator(CircuitBreaker breaker, TermsEnum filteredEnum) {
this.breaker = breaker;
this.filteredEnum = filteredEnum;
}
/**
* General overhead for ids is 2 times the length of the ID
*/
@Override
public long bytesPerValue(BytesRef term) {
if (term == null) {
return 0;
}
return 2 * term.length;
}
/**
* Wraps the already filtered {@link TermsEnum} in a
* {@link RamAccountingTermsEnum} and returns it
*/
@Override
public TermsEnum beforeLoad(Terms terms) throws IOException {
return new RamAccountingTermsEnum(filteredEnum, breaker, this, "parent/child id cache");
}
/**
* Adjusts the breaker based on the difference between the actual usage
* and the aggregated estimations.
*/
@Override
public void afterLoad(TermsEnum termsEnum, long actualUsed) {
assert termsEnum instanceof RamAccountingTermsEnum;
long estimatedBytes = ((RamAccountingTermsEnum) termsEnum).getTotalBytes();
breaker.addWithoutBreaking(-(estimatedBytes - actualUsed));
}
}
@Override
public IndexParentChildFieldData loadGlobal(DirectoryReader indexReader) {
if (indexReader.leaves().size() <= 1) {
// ordinals are already global
return this;
}
try {
return cache.load(indexReader, this);
} catch (Throwable e) {
if (e instanceof ElasticsearchException) {
throw (ElasticsearchException) e;
} else {
throw new ElasticsearchException(e.getMessage(), e);
}
}
}
private static OrdinalMap buildOrdinalMap(AtomicParentChildFieldData[] atomicFD, String parentType) throws IOException {
final SortedDocValues[] ordinals = new SortedDocValues[atomicFD.length];
for (int i = 0; i < ordinals.length; ++i) {
ordinals[i] = atomicFD[i].getOrdinalsValues(parentType);
}
return OrdinalMap.build(null, ordinals, PackedInts.DEFAULT);
}
private static class OrdinalMapAndAtomicFieldData {
final OrdinalMap ordMap;
final AtomicParentChildFieldData[] fieldData;
public OrdinalMapAndAtomicFieldData(OrdinalMap ordMap, AtomicParentChildFieldData[] fieldData) {
this.ordMap = ordMap;
this.fieldData = fieldData;
}
}
@Override
public IndexParentChildFieldData localGlobalDirect(DirectoryReader indexReader) throws Exception {
final long startTime = System.nanoTime();
final Set parentTypes;
if (Version.indexCreated(indexSettings()).before(Version.V_2_0_0_beta1)) {
synchronized (lock) {
parentTypes = ImmutableSet.copyOf(this.parentTypes);
}
} else {
parentTypes = this.parentTypes;
}
long ramBytesUsed = 0;
final Map perType = new HashMap<>();
for (String type : parentTypes) {
final AtomicParentChildFieldData[] fieldData = new AtomicParentChildFieldData[indexReader.leaves().size()];
for (LeafReaderContext context : indexReader.leaves()) {
fieldData[context.ord] = load(context);
}
final OrdinalMap ordMap = buildOrdinalMap(fieldData, type);
ramBytesUsed += ordMap.ramBytesUsed();
perType.put(type, new OrdinalMapAndAtomicFieldData(ordMap, fieldData));
}
final AtomicParentChildFieldData[] fielddata = new AtomicParentChildFieldData[indexReader.leaves().size()];
for (int i = 0; i < fielddata.length; ++i) {
fielddata[i] = new GlobalAtomicFieldData(parentTypes, perType, i);
}
breakerService.getBreaker(CircuitBreaker.FIELDDATA).addWithoutBreaking(ramBytesUsed);
if (logger.isDebugEnabled()) {
logger.debug(
"Global-ordinals[_parent] took {}",
new TimeValue(System.nanoTime() - startTime, TimeUnit.NANOSECONDS)
);
}
return new GlobalFieldData(indexReader, fielddata, ramBytesUsed, perType);
}
private static class GlobalAtomicFieldData extends AbstractAtomicParentChildFieldData {
private final Set types;
private final Map atomicFD;
private final int segmentIndex;
public GlobalAtomicFieldData(Set types, Map atomicFD, int segmentIndex) {
this.types = types;
this.atomicFD = atomicFD;
this.segmentIndex = segmentIndex;
}
@Override
public Set types() {
return types;
}
@Override
public SortedDocValues getOrdinalsValues(String type) {
final OrdinalMapAndAtomicFieldData atomicFD = this.atomicFD.get(type);
if (atomicFD == null) {
return DocValues.emptySorted();
}
final OrdinalMap ordMap = atomicFD.ordMap;
final SortedDocValues[] allSegmentValues = new SortedDocValues[atomicFD.fieldData.length];
for (int i = 0; i < allSegmentValues.length; ++i) {
allSegmentValues[i] = atomicFD.fieldData[i].getOrdinalsValues(type);
}
final SortedDocValues segmentValues = allSegmentValues[segmentIndex];
if (segmentValues.getValueCount() == ordMap.getValueCount()) {
// ords are already global
return segmentValues;
}
final LongValues globalOrds = ordMap.getGlobalOrds(segmentIndex);
return new SortedDocValues() {
@Override
public BytesRef lookupOrd(int ord) {
final int segmentIndex = ordMap.getFirstSegmentNumber(ord);
final int segmentOrd = (int) ordMap.getFirstSegmentOrd(ord);
return allSegmentValues[segmentIndex].lookupOrd(segmentOrd);
}
@Override
public int getValueCount() {
return (int) ordMap.getValueCount();
}
@Override
public int getOrd(int docID) {
final int segmentOrd = segmentValues.getOrd(docID);
// TODO: is there a way we can get rid of this branch?
if (segmentOrd >= 0) {
return (int) globalOrds.get(segmentOrd);
} else {
return segmentOrd;
}
}
};
}
@Override
public long ramBytesUsed() {
// this class does not take memory on its own, the index-level field data does
// it through the use of ordinal maps
return 0;
}
@Override
public Collection getChildResources() {
return Collections.emptyList();
}
@Override
public void close() {
List closeables = new ArrayList<>();
for (OrdinalMapAndAtomicFieldData fds : atomicFD.values()) {
closeables.addAll(Arrays.asList(fds.fieldData));
}
Releasables.close(closeables);
}
}
public class GlobalFieldData implements IndexParentChildFieldData, Accountable {
private final Object coreCacheKey;
private final List leaves;
private final AtomicParentChildFieldData[] fielddata;
private final long ramBytesUsed;
private final Map ordinalMapPerType;
GlobalFieldData(IndexReader reader, AtomicParentChildFieldData[] fielddata, long ramBytesUsed, Map ordinalMapPerType) {
this.coreCacheKey = reader.getCoreCacheKey();
this.leaves = reader.leaves();
this.ramBytesUsed = ramBytesUsed;
this.fielddata = fielddata;
this.ordinalMapPerType = ordinalMapPerType;
}
@Override
public Names getFieldNames() {
return ParentChildIndexFieldData.this.getFieldNames();
}
@Override
public FieldDataType getFieldDataType() {
return ParentChildIndexFieldData.this.getFieldDataType();
}
@Override
public AtomicParentChildFieldData load(LeafReaderContext context) {
assert context.reader().getCoreCacheKey() == leaves.get(context.ord).reader().getCoreCacheKey();
return fielddata[context.ord];
}
@Override
public AtomicParentChildFieldData loadDirect(LeafReaderContext context) throws Exception {
return load(context);
}
@Override
public XFieldComparatorSource comparatorSource(Object missingValue, MultiValueMode sortMode, Nested nested) {
throw new UnsupportedOperationException("No sorting on global ords");
}
@Override
public void clear() {
ParentChildIndexFieldData.this.clear();
}
@Override
public Index index() {
return ParentChildIndexFieldData.this.index();
}
@Override
public long ramBytesUsed() {
return ramBytesUsed;
}
@Override
public Collection getChildResources() {
return Collections.emptyList();
}
@Override
public IndexParentChildFieldData loadGlobal(DirectoryReader indexReader) {
if (indexReader.getCoreCacheKey() == coreCacheKey) {
return this;
}
throw new IllegalStateException();
}
@Override
public IndexParentChildFieldData localGlobalDirect(DirectoryReader indexReader) throws Exception {
return loadGlobal(indexReader);
}
}
/**
* Returns the global ordinal map for the specified type
*/
// TODO: OrdinalMap isn't expose in the field data framework, because it is an implementation detail.
// However the JoinUtil works directly with OrdinalMap, so this is a hack to get access to OrdinalMap
// I don't think we should expose OrdinalMap in IndexFieldData, because only parent/child relies on it and for the
// rest of the code OrdinalMap is an implementation detail, but maybe we can expose it in IndexParentChildFieldData interface?
public static MultiDocValues.OrdinalMap getOrdinalMap(IndexParentChildFieldData indexParentChildFieldData, String type) {
if (indexParentChildFieldData instanceof ParentChildIndexFieldData.GlobalFieldData) {
return ((GlobalFieldData) indexParentChildFieldData).ordinalMapPerType.get(type).ordMap;
} else {
// one segment, local ordinals are global
return null;
}
}
}