org.apache.druid.query.groupby.epinephelinae.Grouper Maven / Gradle / Ivy
Show all versions of druid-processing Show documentation
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.query.groupby.epinephelinae;
import com.google.common.base.Preconditions;
import org.apache.druid.java.util.common.parsers.CloseableIterator;
import org.apache.druid.query.aggregation.AggregatorFactory;
import javax.annotation.Nullable;
import java.io.Closeable;
import java.nio.ByteBuffer;
import java.util.Comparator;
import java.util.List;
import java.util.function.ToIntFunction;
/**
* Groupers aggregate metrics from rows that they typically get from a ColumnSelectorFactory, under
* grouping keys that some outside driver is passing in. They can also iterate over the grouped
* rows after the aggregation is done.
*
* They work sort of like a map of KeyType to aggregated values, except they don't support
* random lookups.
*
* See {@link VectorGrouper} for a vectorized version.
*
* @param type of the key that will be passed in
*/
public interface Grouper extends Closeable
{
/**
* Initialize the grouper.
* This method needs to be called before calling {@link #aggregate(Object)} and {@link #aggregate(Object, int)}.
*/
void init();
/**
* Check this grouper is initialized or not.
*
* @return true if the grouper is already initialized, otherwise false.
*/
boolean isInitialized();
/**
* Aggregate the current row with the provided key. Some implementations are thread-safe and
* some are not.
*
* @param key key object
* @param keyHash result of {@link #hashFunction()} on the key
*
* @return result that is ok if the row was aggregated, not ok if a resource limit was hit
*/
AggregateResult aggregate(KeyType key, int keyHash);
/**
* Aggregate the current row with the provided key. Some implementations are thread-safe and
* some are not.
*
* @param key key
*
* @return result that is ok if the row was aggregated, not ok if a resource limit was hit
*/
default AggregateResult aggregate(KeyType key)
{
Preconditions.checkNotNull(key, "key");
return aggregate(key, hashFunction().applyAsInt(key));
}
/**
* Reset the grouper to its initial state.
*/
void reset();
default ToIntFunction hashFunction()
{
return Groupers::hashObject;
}
/**
* Close the grouper and release associated resources.
*/
@Override
void close();
/**
* Iterate through entries.
*
* Some implementations allow writes even after this method is called. After you are done with the iterator
* returned by this method, you should either call {@link #close()} (if you are done with the Grouper) or
* {@link #reset()} (if you want to reuse it). Some implementations allow calling {@link #iterator(boolean)} again if
* you want another iterator. But, this method must not be called by multiple threads concurrently.
*
* If "sorted" is true then the iterator will return sorted results. It will use KeyType's natural ordering on
* deserialized objects, and will use the {@link KeySerde#bufferComparator()} on serialized objects. Woe be unto you
* if these comparators are not equivalent.
*
* Callers must process and discard the returned {@link Entry}s immediately because some implementations can reuse the
* key objects.
*
* @param sorted return sorted results
*
* @return entry iterator
*/
CloseableIterator> iterator(boolean sorted);
interface Entry
{
T getKey();
Object[] getValues();
}
interface KeySerdeFactory
{
/**
* Return max dictionary size threshold.
*
* @return max dictionary size
*/
long getMaxDictionarySize();
/**
* Create a new {@link KeySerde}, which may be stateful.
*/
KeySerde factorize();
/**
* Create a new {@link KeySerde} with the given dictionary.
*/
KeySerde factorizeWithDictionary(List dictionary);
/**
* Copies a key. Required if the key from an {@link Entry} from {@link #iterator} will be retained past the
* following call to next().
*/
T copyKey(T key);
/**
* Return an object that knows how to compare two serialized key instances. Will be called by the
* {@link #iterator(boolean)} method if sorting is enabled.
*
* @param forceDefaultOrder Return a comparator that sorts by the key in default lexicographic ascending order,
* regardless of any other conditions (e.g., presence of OrderBySpecs).
*
* @return comparator for key objects.
*/
Comparator> objectComparator(boolean forceDefaultOrder);
}
/**
* Possibly-stateful object responsible for serde and comparison of keys. Does not need to be thread-safe.
*/
interface KeySerde
{
/**
* Size of the keys returned by {@link #toByteBuffer(Object)} (which must be a fixed size)
*/
int keySize();
/**
* Class of the keys.
*/
Class keyClazz();
/**
* Return the dictionary of this KeySerde. The return value should not be null.
*/
List getDictionary();
/**
* Serialize a key. This will be called by the {@link #aggregate(Object)} method. The buffer will not
* be retained after the aggregate method returns, so reusing buffers is OK.
*
* This method may return null, which indicates that some internal resource limit has been reached and
* no more keys can be generated. In this situation you can call {@link #reset()} and try again, although
* beware the caveats on that method.
*
* @param key key object
*
* @return serialized key, or null if we are unable to serialize more keys due to resource limits
*/
@Nullable
ByteBuffer toByteBuffer(T key);
/**
* Create a reusable key that can be passed to {@link #readFromByteBuffer}.
*/
T createKey();
/**
* Deserialize a key from a buffer. Will be called by the {@link #iterator(boolean)} method.
*
* @param key object from {@link #createKey()}
* @param buffer buffer containing the key
* @param position key start position in the buffer
*/
void readFromByteBuffer(T key, ByteBuffer buffer, int position);
/**
* Return an object that knows how to compare two serialized keys. Will be called by the
* {@link #iterator(boolean)} method if sorting is enabled.
*
* @return comparator for keys
*/
BufferComparator bufferComparator();
/**
* When pushing down limits, it may also be necessary to compare aggregated values along with the key
* using the bufferComparator.
*
* @param aggregatorFactories Array of aggregators from a GroupByQuery
* @param aggregatorOffsets Offsets for each aggregator in aggregatorFactories pointing to their location
* within the grouping key + aggs buffer.
*
* @return comparator for keys + aggs
*/
BufferComparator bufferComparatorWithAggregators(AggregatorFactory[] aggregatorFactories, int[] aggregatorOffsets);
/**
* Reset the keySerde to its initial state. After this method is called, {@link #readFromByteBuffer}
* and {@link #bufferComparator()} may no longer work properly on previously-serialized keys.
*/
void reset();
}
interface BufferComparator
{
int compare(ByteBuffer lhsBuffer, ByteBuffer rhsBuffer, int lhsPosition, int rhsPosition);
}
}