io.druid.query.topn.PooledTopNAlgorithm Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of druid-processing Show documentation
Show all versions of druid-processing Show documentation
A module that is everything required to understands Druid Segments
/*
* Licensed to Metamarkets Group Inc. (Metamarkets) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Metamarkets licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package io.druid.query.topn;
import com.metamx.common.Pair;
import com.metamx.common.guava.CloseQuietly;
import io.druid.collections.ResourceHolder;
import io.druid.collections.StupidPool;
import io.druid.query.aggregation.BufferAggregator;
import io.druid.segment.Capabilities;
import io.druid.segment.Cursor;
import io.druid.segment.DimensionSelector;
import io.druid.segment.data.IndexedInts;
import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.concurrent.atomic.AtomicInteger;
/**
*/
public class PooledTopNAlgorithm
extends BaseTopNAlgorithm
{
private final Capabilities capabilities;
private final TopNQuery query;
private final StupidPool bufferPool;
private static final int AGG_UNROLL_COUNT = 8; // Must be able to fit loop below
public PooledTopNAlgorithm(
Capabilities capabilities,
TopNQuery query,
StupidPool bufferPool
)
{
super(capabilities);
this.capabilities = capabilities;
this.query = query;
this.bufferPool = bufferPool;
}
@Override
public PooledTopNParams makeInitParams(
DimensionSelector dimSelector, Cursor cursor
)
{
ResourceHolder resultsBufHolder = bufferPool.take();
ByteBuffer resultsBuf = resultsBufHolder.get();
resultsBuf.clear();
final int cardinality = dimSelector.getValueCardinality();
final TopNMetricSpecBuilder arrayProvider = new BaseArrayProvider(
dimSelector,
query,
capabilities
)
{
private final int[] positions = new int[cardinality];
@Override
public int[] build()
{
Pair startEnd = computeStartEnd(cardinality);
Arrays.fill(positions, 0, startEnd.lhs, SKIP_POSITION_VALUE);
Arrays.fill(positions, startEnd.lhs, startEnd.rhs, INIT_POSITION_VALUE);
Arrays.fill(positions, startEnd.rhs, positions.length, SKIP_POSITION_VALUE);
return positions;
}
};
final int numBytesToWorkWith = resultsBuf.remaining();
final int[] aggregatorSizes = new int[query.getAggregatorSpecs().size()];
int numBytesPerRecord = 0;
for (int i = 0; i < query.getAggregatorSpecs().size(); ++i) {
aggregatorSizes[i] = query.getAggregatorSpecs().get(i).getMaxIntermediateSize();
numBytesPerRecord += aggregatorSizes[i];
}
final int numValuesPerPass = numBytesToWorkWith / numBytesPerRecord;
return PooledTopNParams.builder()
.withDimSelector(dimSelector)
.withCursor(cursor)
.withCardinality(cardinality)
.withResultsBufHolder(resultsBufHolder)
.withResultsBuf(resultsBuf)
.withArrayProvider(arrayProvider)
.withNumBytesPerRecord(numBytesPerRecord)
.withNumValuesPerPass(numValuesPerPass)
.withAggregatorSizes(aggregatorSizes)
.build();
}
@Override
protected int[] makeDimValSelector(PooledTopNParams params, int numProcessed, int numToProcess)
{
final TopNMetricSpecBuilder arrayProvider = params.getArrayProvider();
if (!query.getDimensionSpec().preservesOrdering()) {
return arrayProvider.build();
}
arrayProvider.ignoreFirstN(numProcessed);
arrayProvider.keepOnlyN(numToProcess);
return query.getTopNMetricSpec().configureOptimizer(arrayProvider).build();
}
@Override
protected int computeNewLength(int[] dimValSelector, int numProcessed, int numToProcess)
{
int valid = 0;
int length = 0;
for (int i = numProcessed; i < dimValSelector.length && valid < numToProcess; i++) {
length++;
if (SKIP_POSITION_VALUE != dimValSelector[i]) {
valid++;
}
}
return length;
}
@Override
protected int[] updateDimValSelector(int[] dimValSelector, int numProcessed, int numToProcess)
{
final int[] retVal = Arrays.copyOf(dimValSelector, dimValSelector.length);
final int validEnd = Math.min(retVal.length, numProcessed + numToProcess);
final int end = Math.max(retVal.length, validEnd);
Arrays.fill(retVal, 0, numProcessed, SKIP_POSITION_VALUE);
Arrays.fill(retVal, validEnd, end, SKIP_POSITION_VALUE);
return retVal;
}
@Override
protected BufferAggregator[] makeDimValAggregateStore(PooledTopNParams params)
{
return makeBufferAggregators(params.getCursor(), query.getAggregatorSpecs());
}
/**
* Use aggressive loop unrolling to aggregate the data
*
* How this works: The aggregates are evaluated AGG_UNROLL_COUNT at a time. This was chosen to be 8 rather arbitrarily.
* The offsets into the output buffer are precalculated and stored in aggregatorOffsets
*
* For queries whose aggregate count is less than AGG_UNROLL_COUNT, the aggregates evaluted in a switch statement.
* See http://en.wikipedia.org/wiki/Duff's_device for more information on this kind of approach
*
* This allows out of order execution of the code. In local tests, the JVM inlines all the way to this function.
*
* If there are more than AGG_UNROLL_COUNT aggregates, then the remainder is calculated with the switch, and the
* blocks of AGG_UNROLL_COUNT are calculated in a partially unrolled for-loop.
*
* Putting the switch first allows for optimization for the common case (less than AGG_UNROLL_COUNT aggs) but
* still optimizes the high quantity of aggregate queries which benefit greatly from any speed improvements
* (they simply take longer to start with).
*/
@Override
protected void scanAndAggregate(
final PooledTopNParams params,
final int[] positions,
final BufferAggregator[] theAggregators,
final int numProcessed
)
{
final ByteBuffer resultsBuf = params.getResultsBuf();
final int numBytesPerRecord = params.getNumBytesPerRecord();
final int[] aggregatorSizes = params.getAggregatorSizes();
final Cursor cursor = params.getCursor();
final DimensionSelector dimSelector = params.getDimSelector();
final int[] aggregatorOffsets = new int[aggregatorSizes.length];
for (int j = 0, offset = 0; j < aggregatorSizes.length; ++j) {
aggregatorOffsets[j] = offset;
offset += aggregatorSizes[j];
}
final int aggSize = theAggregators.length;
final int aggExtra = aggSize % AGG_UNROLL_COUNT;
final AtomicInteger currentPosition = new AtomicInteger(0);
while (!cursor.isDone()) {
final IndexedInts dimValues = dimSelector.getRow();
final int dimSize = dimValues.size();
final int dimExtra = dimSize % AGG_UNROLL_COUNT;
switch(dimExtra){
case 7:
aggregateDimValue(
positions,
theAggregators,
numProcessed,
resultsBuf,
numBytesPerRecord,
aggregatorOffsets,
aggSize,
aggExtra,
dimValues.get(6),
currentPosition
);
case 6:
aggregateDimValue(
positions,
theAggregators,
numProcessed,
resultsBuf,
numBytesPerRecord,
aggregatorOffsets,
aggSize,
aggExtra,
dimValues.get(5),
currentPosition
);
case 5:
aggregateDimValue(
positions,
theAggregators,
numProcessed,
resultsBuf,
numBytesPerRecord,
aggregatorOffsets,
aggSize,
aggExtra,
dimValues.get(4),
currentPosition
);
case 4:
aggregateDimValue(
positions,
theAggregators,
numProcessed,
resultsBuf,
numBytesPerRecord,
aggregatorOffsets,
aggSize,
aggExtra,
dimValues.get(3),
currentPosition
);
case 3:
aggregateDimValue(
positions,
theAggregators,
numProcessed,
resultsBuf,
numBytesPerRecord,
aggregatorOffsets,
aggSize,
aggExtra,
dimValues.get(2),
currentPosition
);
case 2:
aggregateDimValue(
positions,
theAggregators,
numProcessed,
resultsBuf,
numBytesPerRecord,
aggregatorOffsets,
aggSize,
aggExtra,
dimValues.get(1),
currentPosition
);
case 1:
aggregateDimValue(
positions,
theAggregators,
numProcessed,
resultsBuf,
numBytesPerRecord,
aggregatorOffsets,
aggSize,
aggExtra,
dimValues.get(0),
currentPosition
);
}
for (int i = dimExtra; i < dimSize; i += AGG_UNROLL_COUNT) {
aggregateDimValue(
positions,
theAggregators,
numProcessed,
resultsBuf,
numBytesPerRecord,
aggregatorOffsets,
aggSize,
aggExtra,
dimValues.get(i),
currentPosition
);
aggregateDimValue(
positions,
theAggregators,
numProcessed,
resultsBuf,
numBytesPerRecord,
aggregatorOffsets,
aggSize,
aggExtra,
dimValues.get(i + 1),
currentPosition
);
aggregateDimValue(
positions,
theAggregators,
numProcessed,
resultsBuf,
numBytesPerRecord,
aggregatorOffsets,
aggSize,
aggExtra,
dimValues.get(i + 2),
currentPosition
);
aggregateDimValue(
positions,
theAggregators,
numProcessed,
resultsBuf,
numBytesPerRecord,
aggregatorOffsets,
aggSize,
aggExtra,
dimValues.get(i + 3),
currentPosition
);
aggregateDimValue(
positions,
theAggregators,
numProcessed,
resultsBuf,
numBytesPerRecord,
aggregatorOffsets,
aggSize,
aggExtra,
dimValues.get(i + 4),
currentPosition
);
aggregateDimValue(
positions,
theAggregators,
numProcessed,
resultsBuf,
numBytesPerRecord,
aggregatorOffsets,
aggSize,
aggExtra,
dimValues.get(i + 5),
currentPosition
);
aggregateDimValue(
positions,
theAggregators,
numProcessed,
resultsBuf,
numBytesPerRecord,
aggregatorOffsets,
aggSize,
aggExtra,
dimValues.get(i + 6),
currentPosition
);
aggregateDimValue(
positions,
theAggregators,
numProcessed,
resultsBuf,
numBytesPerRecord,
aggregatorOffsets,
aggSize,
aggExtra,
dimValues.get(i + 7),
currentPosition
);
}
cursor.advance();
}
}
private static void aggregateDimValue(
final int[] positions,
final BufferAggregator[] theAggregators,
final int numProcessed,
final ByteBuffer resultsBuf,
final int numBytesPerRecord,
final int[] aggregatorOffsets,
final int aggSize,
final int aggExtra,
final int dimIndex,
final AtomicInteger currentPosition
)
{
if (SKIP_POSITION_VALUE == positions[dimIndex]) {
return;
}
if (INIT_POSITION_VALUE == positions[dimIndex]) {
positions[dimIndex] = currentPosition.getAndIncrement() * numBytesPerRecord;
final int pos = positions[dimIndex];
for (int j = 0; j < aggSize; ++j) {
theAggregators[j].init(resultsBuf, pos + aggregatorOffsets[j]);
}
}
final int position = positions[dimIndex];
switch(aggExtra) {
case 7:
theAggregators[6].aggregate(resultsBuf, position + aggregatorOffsets[6]);
case 6:
theAggregators[5].aggregate(resultsBuf, position + aggregatorOffsets[5]);
case 5:
theAggregators[4].aggregate(resultsBuf, position + aggregatorOffsets[4]);
case 4:
theAggregators[3].aggregate(resultsBuf, position + aggregatorOffsets[3]);
case 3:
theAggregators[2].aggregate(resultsBuf, position + aggregatorOffsets[2]);
case 2:
theAggregators[1].aggregate(resultsBuf, position + aggregatorOffsets[1]);
case 1:
theAggregators[0].aggregate(resultsBuf, position + aggregatorOffsets[0]);
}
for (int j = aggExtra; j < aggSize; j += AGG_UNROLL_COUNT) {
theAggregators[j].aggregate(resultsBuf, position + aggregatorOffsets[j]);
theAggregators[j+1].aggregate(resultsBuf, position + aggregatorOffsets[j+1]);
theAggregators[j+2].aggregate(resultsBuf, position + aggregatorOffsets[j+2]);
theAggregators[j+3].aggregate(resultsBuf, position + aggregatorOffsets[j+3]);
theAggregators[j+4].aggregate(resultsBuf, position + aggregatorOffsets[j+4]);
theAggregators[j+5].aggregate(resultsBuf, position + aggregatorOffsets[j+5]);
theAggregators[j+6].aggregate(resultsBuf, position + aggregatorOffsets[j+6]);
theAggregators[j+7].aggregate(resultsBuf, position + aggregatorOffsets[j+7]);
}
}
@Override
protected void updateResults(
PooledTopNParams params,
int[] positions,
BufferAggregator[] theAggregators,
TopNResultBuilder resultBuilder
)
{
final ByteBuffer resultsBuf = params.getResultsBuf();
final int[] aggregatorSizes = params.getAggregatorSizes();
final DimensionSelector dimSelector = params.getDimSelector();
for (int i = 0; i < positions.length; i++) {
int position = positions[i];
if (position >= 0) {
Object[] vals = new Object[theAggregators.length];
for (int j = 0; j < theAggregators.length; j++) {
vals[j] = theAggregators[j].get(resultsBuf, position);
position += aggregatorSizes[j];
}
resultBuilder.addEntry(
dimSelector.lookupName(i),
i,
vals
);
}
}
}
@Override
protected void closeAggregators(BufferAggregator[] bufferAggregators)
{
for (BufferAggregator agg : bufferAggregators) {
agg.close();
}
}
@Override
public void cleanup(PooledTopNParams params)
{
if (params != null) {
ResourceHolder resultsBufHolder = params.getResultsBufHolder();
if (resultsBufHolder != null) {
resultsBufHolder.get().clear();
}
CloseQuietly.close(resultsBufHolder);
}
}
public static class PooledTopNParams extends TopNParams
{
private final ResourceHolder resultsBufHolder;
private final ByteBuffer resultsBuf;
private final int[] aggregatorSizes;
private final int numBytesPerRecord;
private final TopNMetricSpecBuilder arrayProvider;
public PooledTopNParams(
DimensionSelector dimSelector,
Cursor cursor,
int cardinality,
ResourceHolder resultsBufHolder,
ByteBuffer resultsBuf,
int[] aggregatorSizes,
int numBytesPerRecord,
int numValuesPerPass,
TopNMetricSpecBuilder arrayProvider
)
{
super(dimSelector, cursor, cardinality, numValuesPerPass);
this.resultsBufHolder = resultsBufHolder;
this.resultsBuf = resultsBuf;
this.aggregatorSizes = aggregatorSizes;
this.numBytesPerRecord = numBytesPerRecord;
this.arrayProvider = arrayProvider;
}
public static Builder builder()
{
return new Builder();
}
public ResourceHolder getResultsBufHolder()
{
return resultsBufHolder;
}
public ByteBuffer getResultsBuf()
{
return resultsBuf;
}
public int[] getAggregatorSizes()
{
return aggregatorSizes;
}
public int getNumBytesPerRecord()
{
return numBytesPerRecord;
}
public TopNMetricSpecBuilder getArrayProvider()
{
return arrayProvider;
}
public static class Builder
{
private DimensionSelector dimSelector;
private Cursor cursor;
private int cardinality;
private ResourceHolder resultsBufHolder;
private ByteBuffer resultsBuf;
private int[] aggregatorSizes;
private int numBytesPerRecord;
private int numValuesPerPass;
private TopNMetricSpecBuilder arrayProvider;
public Builder()
{
dimSelector = null;
cursor = null;
cardinality = 0;
resultsBufHolder = null;
resultsBuf = null;
aggregatorSizes = null;
numBytesPerRecord = 0;
numValuesPerPass = 0;
arrayProvider = null;
}
public Builder withDimSelector(DimensionSelector dimSelector)
{
this.dimSelector = dimSelector;
return this;
}
public Builder withCursor(Cursor cursor)
{
this.cursor = cursor;
return this;
}
public Builder withCardinality(int cardinality)
{
this.cardinality = cardinality;
return this;
}
public Builder withResultsBufHolder(ResourceHolder resultsBufHolder)
{
this.resultsBufHolder = resultsBufHolder;
return this;
}
public Builder withResultsBuf(ByteBuffer resultsBuf)
{
this.resultsBuf = resultsBuf;
return this;
}
public Builder withAggregatorSizes(int[] aggregatorSizes)
{
this.aggregatorSizes = aggregatorSizes;
return this;
}
public Builder withNumBytesPerRecord(int numBytesPerRecord)
{
this.numBytesPerRecord = numBytesPerRecord;
return this;
}
public Builder withNumValuesPerPass(int numValuesPerPass)
{
this.numValuesPerPass = numValuesPerPass;
return this;
}
public Builder withArrayProvider(TopNMetricSpecBuilder arrayProvider)
{
this.arrayProvider = arrayProvider;
return this;
}
public PooledTopNParams build()
{
return new PooledTopNParams(
dimSelector,
cursor,
cardinality,
resultsBufHolder,
resultsBuf,
aggregatorSizes,
numBytesPerRecord,
numValuesPerPass,
arrayProvider
);
}
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy