org.apache.druid.query.groupby.epinephelinae.SpillingGrouper Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of druid-processing Show documentation
Show all versions of druid-processing Show documentation
A module that is everything required to understands Druid Segments
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.query.groupby.epinephelinae;
import com.fasterxml.jackson.core.JsonGenerator;
import com.fasterxml.jackson.databind.MappingIterator;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.SerializerProvider;
import com.google.common.base.Function;
import com.google.common.base.Preconditions;
import com.google.common.base.Supplier;
import com.google.common.collect.Iterators;
import net.jpountz.lz4.LZ4BlockInputStream;
import net.jpountz.lz4.LZ4BlockOutputStream;
import org.apache.druid.java.util.common.CloseableIterators;
import org.apache.druid.java.util.common.io.Closer;
import org.apache.druid.java.util.common.jackson.JacksonUtils;
import org.apache.druid.java.util.common.logger.Logger;
import org.apache.druid.java.util.common.parsers.CloseableIterator;
import org.apache.druid.query.BaseQuery;
import org.apache.druid.query.aggregation.AggregatorAdapters;
import org.apache.druid.query.aggregation.AggregatorFactory;
import org.apache.druid.query.groupby.orderby.DefaultLimitSpec;
import org.apache.druid.segment.ColumnSelectorFactory;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
import java.nio.file.Files;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
/**
* Grouper based around a single underlying {@link BufferHashGrouper}. Not thread-safe.
*
* When the underlying grouper is full, its contents are sorted and written to temporary files using "spillMapper".
*/
public class SpillingGrouper implements Grouper
{
private static final Logger log = new Logger(SpillingGrouper.class);
private static final AggregateResult DISK_FULL = AggregateResult.partial(
0,
"Not enough disk space to execute this query. Try raising druid.query.groupBy.maxOnDiskStorage."
);
private final Grouper grouper;
private final KeySerde keySerde;
private final LimitedTemporaryStorage temporaryStorage;
private final ObjectMapper spillMapper;
private final AggregatorFactory[] aggregatorFactories;
private final Comparator> keyObjComparator;
private final Comparator> defaultOrderKeyObjComparator;
private final List files = new ArrayList<>();
private final List dictionaryFiles = new ArrayList<>();
private final boolean sortHasNonGroupingFields;
private boolean diskFull = false;
private boolean spillingAllowed;
public SpillingGrouper(
final Supplier bufferSupplier,
final KeySerdeFactory keySerdeFactory,
final ColumnSelectorFactory columnSelectorFactory,
final AggregatorFactory[] aggregatorFactories,
final int bufferGrouperMaxSize,
final float bufferGrouperMaxLoadFactor,
final int bufferGrouperInitialBuckets,
final LimitedTemporaryStorage temporaryStorage,
final ObjectMapper spillMapper,
final boolean spillingAllowed,
final DefaultLimitSpec limitSpec,
final boolean sortHasNonGroupingFields,
final int mergeBufferSize
)
{
this.keySerde = keySerdeFactory.factorize();
this.keyObjComparator = keySerdeFactory.objectComparator(false);
this.defaultOrderKeyObjComparator = keySerdeFactory.objectComparator(true);
if (limitSpec != null) {
// Sanity check; must not have "offset" at this point.
Preconditions.checkState(!limitSpec.isOffset(), "Cannot push down offsets");
LimitedBufferHashGrouper limitGrouper = new LimitedBufferHashGrouper<>(
bufferSupplier,
keySerde,
AggregatorAdapters.factorizeBuffered(columnSelectorFactory, Arrays.asList(aggregatorFactories)),
bufferGrouperMaxSize,
bufferGrouperMaxLoadFactor,
bufferGrouperInitialBuckets,
limitSpec.getLimit(),
sortHasNonGroupingFields
);
// if configured buffer size is too small to support limit push down, don't apply that optimization
if (!limitGrouper.validateBufferCapacity(mergeBufferSize)) {
if (sortHasNonGroupingFields) {
log.debug("Ignoring forceLimitPushDown, insufficient buffer capacity.");
}
// sortHasNonGroupingFields can only be true here if the user specified forceLimitPushDown
// in the query context. Result merging requires that all results are sorted by the same
// ordering where all ordering fields are contained in the grouping key.
// If sortHasNonGroupingFields is true, we use the default ordering that sorts by all grouping key fields
// with lexicographic ascending order.
// If sortHasNonGroupingFields is false, then the OrderBy fields are all in the grouping key, so we
// can use that ordering.
this.grouper = new BufferHashGrouper<>(
bufferSupplier,
keySerde,
AggregatorAdapters.factorizeBuffered(columnSelectorFactory, Arrays.asList(aggregatorFactories)),
bufferGrouperMaxSize,
bufferGrouperMaxLoadFactor,
bufferGrouperInitialBuckets,
sortHasNonGroupingFields
);
} else {
this.grouper = limitGrouper;
}
} else {
this.grouper = new BufferHashGrouper<>(
bufferSupplier,
keySerde,
AggregatorAdapters.factorizeBuffered(columnSelectorFactory, Arrays.asList(aggregatorFactories)),
bufferGrouperMaxSize,
bufferGrouperMaxLoadFactor,
bufferGrouperInitialBuckets,
true
);
}
this.aggregatorFactories = aggregatorFactories;
this.temporaryStorage = temporaryStorage;
this.spillMapper = spillMapper;
this.spillingAllowed = spillingAllowed;
this.sortHasNonGroupingFields = sortHasNonGroupingFields;
}
@Override
public void init()
{
grouper.init();
}
@Override
public boolean isInitialized()
{
return grouper.isInitialized();
}
@Override
public AggregateResult aggregate(KeyType key, int keyHash)
{
if (diskFull) {
// If the prior return was DISK_FULL, then return it again. When we return DISK_FULL to a processing thread,
// it skips the rest of the segment and the query is canceled. However, it's possible that the next segment
// starts processing before cancellation can kick in. We want that one, if it occurs, to see DISK_FULL too.
return DISK_FULL;
}
final AggregateResult result = grouper.aggregate(key, keyHash);
if (result.isOk() || !spillingAllowed || temporaryStorage.maxSize() <= 0) {
return result;
} else {
// Expecting all-or-nothing behavior.
assert result.getCount() == 0;
// Warning: this can potentially block up a processing thread for a while.
try {
spill();
}
catch (TemporaryStorageFullException e) {
diskFull = true;
return DISK_FULL;
}
catch (IOException e) {
throw new RuntimeException(e);
}
// Try again.
return grouper.aggregate(key, keyHash);
}
}
@Override
public void reset()
{
grouper.reset();
deleteFiles();
}
@Override
public void close()
{
grouper.close();
keySerde.reset();
deleteFiles();
}
/**
* Returns a dictionary of string keys added to this grouper. Note that the dictionary of keySerde is spilled on
* local storage whenever the inner grouper is spilled. If there are spilled dictionaries, this method loads them
* from disk and returns a merged dictionary.
*
* @return a dictionary which is a list of unique strings
*/
public List mergeAndGetDictionary()
{
final Set mergedDictionary = new HashSet<>(keySerde.getDictionary());
for (File dictFile : dictionaryFiles) {
try (
final InputStream fileStream = Files.newInputStream(dictFile.toPath());
final LZ4BlockInputStream blockStream = new LZ4BlockInputStream(fileStream);
final MappingIterator dictIterator = spillMapper.readValues(
spillMapper.getFactory().createParser(blockStream),
spillMapper.getTypeFactory().constructType(String.class)
)
) {
while (dictIterator.hasNext()) {
mergedDictionary.add(dictIterator.next());
}
}
catch (IOException e) {
throw new RuntimeException(e);
}
}
return new ArrayList<>(mergedDictionary);
}
public boolean isSpillingAllowed()
{
return spillingAllowed;
}
public void setSpillingAllowed(final boolean spillingAllowed)
{
this.spillingAllowed = spillingAllowed;
}
@Override
public CloseableIterator> iterator(final boolean sorted)
{
final List>> iterators = new ArrayList<>(1 + files.size());
iterators.add(grouper.iterator(sorted));
final Closer closer = Closer.create();
for (final File file : files) {
final MappingIterator> fileIterator = read(file, keySerde.keyClazz());
iterators.add(
CloseableIterators.withEmptyBaggage(
Iterators.transform(
fileIterator,
new Function, Entry>()
{
final ReusableEntry reusableEntry =
ReusableEntry.create(keySerde, aggregatorFactories.length);
@Override
public Entry apply(Entry entry)
{
final Object[] deserializedValues = reusableEntry.getValues();
for (int i = 0; i < deserializedValues.length; i++) {
deserializedValues[i] = aggregatorFactories[i].deserialize(entry.getValues()[i]);
if (deserializedValues[i] instanceof Integer) {
// Hack to satisfy the groupBy unit tests; perhaps we could do better by adjusting Jackson config.
deserializedValues[i] = ((Integer) deserializedValues[i]).longValue();
}
}
reusableEntry.setKey(entry.getKey());
return reusableEntry;
}
}
)
)
);
closer.register(fileIterator);
}
final Iterator> baseIterator;
if (sortHasNonGroupingFields) {
baseIterator = CloseableIterators.mergeSorted(iterators, defaultOrderKeyObjComparator);
} else {
baseIterator = sorted ?
CloseableIterators.mergeSorted(iterators, keyObjComparator) :
CloseableIterators.concat(iterators);
}
return CloseableIterators.wrap(baseIterator, closer);
}
private void spill() throws IOException
{
try (CloseableIterator> iterator = grouper.iterator(true)) {
files.add(spill(iterator));
dictionaryFiles.add(spill(keySerde.getDictionary().iterator()));
grouper.reset();
}
}
private File spill(Iterator iterator) throws IOException
{
try (
final LimitedTemporaryStorage.LimitedOutputStream out = temporaryStorage.createFile();
final LZ4BlockOutputStream compressedOut = new LZ4BlockOutputStream(out);
final JsonGenerator jsonGenerator = spillMapper.getFactory().createGenerator(compressedOut)
) {
final SerializerProvider serializers = spillMapper.getSerializerProviderInstance();
while (iterator.hasNext()) {
BaseQuery.checkInterrupted();
JacksonUtils.writeObjectUsingSerializerProvider(jsonGenerator, serializers, iterator.next());
}
return out.getFile();
}
}
private MappingIterator> read(final File file, final Class keyClazz)
{
try {
return spillMapper.readValues(
spillMapper.getFactory().createParser(new LZ4BlockInputStream(new FileInputStream(file))),
spillMapper.getTypeFactory().constructParametricType(ReusableEntry.class, keyClazz)
);
}
catch (IOException e) {
throw new RuntimeException(e);
}
}
private void deleteFiles()
{
for (final File file : files) {
temporaryStorage.delete(file);
}
files.clear();
}
}