All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.druid.query.groupby.epinephelinae.Grouper Maven / Gradle / Ivy

/*
 * Licensed to Metamarkets Group Inc. (Metamarkets) under one
 * or more contributor license agreements. See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership. Metamarkets licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License. You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package io.druid.query.groupby.epinephelinae;

import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.base.Preconditions;
import io.druid.query.aggregation.AggregatorFactory;

import java.io.Closeable;
import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.Comparator;
import java.util.Iterator;
import java.util.function.ToIntFunction;

/**
 * Groupers aggregate metrics from rows that they typically get from a ColumnSelectorFactory, under
 * grouping keys that some outside driver is passing in. They can also iterate over the grouped
 * rows after the aggregation is done.
 * 

* They work sort of like a map of KeyType to aggregated values, except they don't support * random lookups. * * @param type of the key that will be passed in */ public interface Grouper extends Closeable { /** * Initialize the grouper. * This method needs to be called before calling {@link #aggregate(Object)} and {@link #aggregate(Object, int)}. */ void init(); /** * Check this grouper is initialized or not. * * @return true if the grouper is already initialized, otherwise false. */ boolean isInitialized(); /** * Aggregate the current row with the provided key. Some implementations are thread-safe and * some are not. * * @param key key object * @param keyHash result of {@link #hashFunction()} on the key * * @return result that is ok if the row was aggregated, not ok if a resource limit was hit */ AggregateResult aggregate(KeyType key, int keyHash); /** * Aggregate the current row with the provided key. Some implementations are thread-safe and * some are not. * * @param key key * * @return result that is ok if the row was aggregated, not ok if a resource limit was hit */ default AggregateResult aggregate(KeyType key) { Preconditions.checkNotNull(key, "key"); return aggregate(key, hashFunction().applyAsInt(key)); } /** * Reset the grouper to its initial state. */ void reset(); default ToIntFunction hashFunction() { return Groupers::hash; } /** * Close the grouper and release associated resources. */ @Override void close(); /** * Iterate through entries. *

* Once this method is called, writes are no longer safe. After you are done with the iterator returned by this * method, you should either call {@link #close()} (if you are done with the Grouper), {@link #reset()} (if you * want to reuse it), or {@link #iterator(boolean)} again if you want another iterator. This method is not thread-safe * and must not be called by multiple threads concurrently. *

* If "sorted" is true then the iterator will return sorted results. It will use KeyType's natural ordering on * deserialized objects, and will use the {@link KeySerde#comparator()} on serialized objects. Woe be unto you * if these comparators are not equivalent. *

* Callers must process and discard the returned {@link Entry}s immediately because some implementations can reuse the * key objects. * * @param sorted return sorted results * * @return entry iterator */ Iterator> iterator(final boolean sorted); class Entry { final T key; final Object[] values; @JsonCreator public Entry( @JsonProperty("k") T key, @JsonProperty("v") Object[] values ) { this.key = key; this.values = values; } @JsonProperty("k") public T getKey() { return key; } @JsonProperty("v") public Object[] getValues() { return values; } @Override public boolean equals(Object o) { if (this == o) { return true; } if (o == null || getClass() != o.getClass()) { return false; } Entry entry = (Entry) o; if (!key.equals(entry.key)) { return false; } // Probably incorrect - comparing Object[] arrays with Arrays.equals return Arrays.equals(values, entry.values); } @Override public int hashCode() { int result = key.hashCode(); result = 31 * result + Arrays.hashCode(values); return result; } @Override public String toString() { return "Entry{" + "key=" + key + ", values=" + Arrays.toString(values) + '}'; } } interface KeySerdeFactory { /** * Create a new KeySerde, which may be stateful. */ KeySerde factorize(); /** * Return an object that knows how to compare two serialized key instances. Will be called by the * {@link #iterator(boolean)} method if sorting is enabled. * * @param forceDefaultOrder Return a comparator that sorts by the key in default lexicographic ascending order, * regardless of any other conditions (e.g., presence of OrderBySpecs). * * @return comparator for key objects. */ Comparator> objectComparator(boolean forceDefaultOrder); } /** * Possibly-stateful object responsible for serde and comparison of keys. Does not need to be thread-safe. */ interface KeySerde { /** * Size of the keys returned by {@link #toByteBuffer(Object)} (which must be a fixed size) */ int keySize(); /** * Class of the keys. */ Class keyClazz(); /** * Serialize a key. This will be called by the {@link #aggregate(Comparable)} method. The buffer will not * be retained after the aggregate method returns, so reusing buffers is OK. *

* This method may return null, which indicates that some internal resource limit has been reached and * no more keys can be generated. In this situation you can call {@link #reset()} and try again, although * beware the caveats on that method. * * @param key key object * * @return serialized key, or null if we are unable to serialize more keys due to resource limits */ ByteBuffer toByteBuffer(T key); /** * Deserialize a key from a buffer. Will be called by the {@link #iterator(boolean)} method. * * @param buffer buffer containing the key * @param position key start position in the buffer * * @return key object */ T fromByteBuffer(ByteBuffer buffer, int position); /** * Return an object that knows how to compare two serialized keys. Will be called by the * {@link #iterator(boolean)} method if sorting is enabled. * * @return comparator for keys */ BufferComparator bufferComparator(); /** * When pushing down limits, it may also be necessary to compare aggregated values along with the key * using the bufferComparator. * * @param aggregatorFactories Array of aggregators from a GroupByQuery * @param aggregatorOffsets Offsets for each aggregator in aggregatorFactories pointing to their location * within the grouping key + aggs buffer. * * @return comparator for keys + aggs */ BufferComparator bufferComparatorWithAggregators(AggregatorFactory[] aggregatorFactories, int[] aggregatorOffsets); /** * Reset the keySerde to its initial state. After this method is called, {@link #fromByteBuffer(ByteBuffer, int)} * and {@link #bufferComparator()} may no longer work properly on previously-serialized keys. */ void reset(); } interface BufferComparator { int compare(ByteBuffer lhsBuffer, ByteBuffer rhsBuffer, int lhsPosition, int rhsPosition); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy