All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.druid.query.groupby.epinephelinae.Grouper Maven / Gradle / Ivy

There is a newer version: 31.0.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.druid.query.groupby.epinephelinae;

import com.google.common.base.Preconditions;
import org.apache.druid.java.util.common.parsers.CloseableIterator;
import org.apache.druid.query.aggregation.AggregatorFactory;

import javax.annotation.Nullable;
import java.io.Closeable;
import java.nio.ByteBuffer;
import java.util.Comparator;
import java.util.List;
import java.util.function.ToIntFunction;

/**
 * Groupers aggregate metrics from rows that they typically get from a ColumnSelectorFactory, under
 * grouping keys that some outside driver is passing in. They can also iterate over the grouped
 * rows after the aggregation is done.
 *
 * They work sort of like a map of KeyType to aggregated values, except they don't support
 * random lookups.
 *
 * See {@link VectorGrouper} for a vectorized version.
 *
 * @param  type of the key that will be passed in
 */
public interface Grouper extends Closeable
{
  /**
   * Initialize the grouper.
   * This method needs to be called before calling {@link #aggregate(Object)} and {@link #aggregate(Object, int)}.
   */
  void init();

  /**
   * Check this grouper is initialized or not.
   *
   * @return true if the grouper is already initialized, otherwise false.
   */
  boolean isInitialized();

  /**
   * Aggregate the current row with the provided key. Some implementations are thread-safe and
   * some are not.
   *
   * @param key     key object
   * @param keyHash result of {@link #hashFunction()} on the key
   *
   * @return result that is ok if the row was aggregated, not ok if a resource limit was hit
   */
  AggregateResult aggregate(KeyType key, int keyHash);

  /**
   * Aggregate the current row with the provided key. Some implementations are thread-safe and
   * some are not.
   *
   * @param key key
   *
   * @return result that is ok if the row was aggregated, not ok if a resource limit was hit
   */
  default AggregateResult aggregate(KeyType key)
  {
    Preconditions.checkNotNull(key, "key");
    return aggregate(key, hashFunction().applyAsInt(key));
  }

  /**
   * Reset the grouper to its initial state.
   */
  void reset();

  default ToIntFunction hashFunction()
  {
    return Groupers::hashObject;
  }

  /**
   * Close the grouper and release associated resources.
   */
  @Override
  void close();

  /**
   * Iterate through entries.
   * 

* Some implementations allow writes even after this method is called. After you are done with the iterator * returned by this method, you should either call {@link #close()} (if you are done with the Grouper) or * {@link #reset()} (if you want to reuse it). Some implementations allow calling {@link #iterator(boolean)} again if * you want another iterator. But, this method must not be called by multiple threads concurrently. *

* If "sorted" is true then the iterator will return sorted results. It will use KeyType's natural ordering on * deserialized objects, and will use the {@link KeySerde#bufferComparator()} on serialized objects. Woe be unto you * if these comparators are not equivalent. *

* Callers must process and discard the returned {@link Entry}s immediately because some implementations can reuse the * key objects. * * @param sorted return sorted results * * @return entry iterator */ CloseableIterator> iterator(boolean sorted); interface Entry { T getKey(); Object[] getValues(); } interface KeySerdeFactory { /** * Return max dictionary size threshold. * * @return max dictionary size */ long getMaxDictionarySize(); /** * Create a new {@link KeySerde}, which may be stateful. */ KeySerde factorize(); /** * Create a new {@link KeySerde} with the given dictionary. */ KeySerde factorizeWithDictionary(List dictionary); /** * Copies a key. Required if the key from an {@link Entry} from {@link #iterator} will be retained past the * following call to next(). */ T copyKey(T key); /** * Return an object that knows how to compare two serialized key instances. Will be called by the * {@link #iterator(boolean)} method if sorting is enabled. * * @param forceDefaultOrder Return a comparator that sorts by the key in default lexicographic ascending order, * regardless of any other conditions (e.g., presence of OrderBySpecs). * * @return comparator for key objects. */ Comparator> objectComparator(boolean forceDefaultOrder); } /** * Possibly-stateful object responsible for serde and comparison of keys. Does not need to be thread-safe. */ interface KeySerde { /** * Size of the keys returned by {@link #toByteBuffer(Object)} (which must be a fixed size) */ int keySize(); /** * Class of the keys. */ Class keyClazz(); /** * Return the dictionary of this KeySerde. The return value should not be null. */ List getDictionary(); /** * Serialize a key. This will be called by the {@link #aggregate(Object)} method. The buffer will not * be retained after the aggregate method returns, so reusing buffers is OK. *

* This method may return null, which indicates that some internal resource limit has been reached and * no more keys can be generated. In this situation you can call {@link #reset()} and try again, although * beware the caveats on that method. * * @param key key object * * @return serialized key, or null if we are unable to serialize more keys due to resource limits */ @Nullable ByteBuffer toByteBuffer(T key); /** * Create a reusable key that can be passed to {@link #readFromByteBuffer}. */ T createKey(); /** * Deserialize a key from a buffer. Will be called by the {@link #iterator(boolean)} method. * * @param key object from {@link #createKey()} * @param buffer buffer containing the key * @param position key start position in the buffer */ void readFromByteBuffer(T key, ByteBuffer buffer, int position); /** * Return an object that knows how to compare two serialized keys. Will be called by the * {@link #iterator(boolean)} method if sorting is enabled. * * @return comparator for keys */ BufferComparator bufferComparator(); /** * When pushing down limits, it may also be necessary to compare aggregated values along with the key * using the bufferComparator. * * @param aggregatorFactories Array of aggregators from a GroupByQuery * @param aggregatorOffsets Offsets for each aggregator in aggregatorFactories pointing to their location * within the grouping key + aggs buffer. * * @return comparator for keys + aggs */ BufferComparator bufferComparatorWithAggregators(AggregatorFactory[] aggregatorFactories, int[] aggregatorOffsets); /** * Reset the keySerde to its initial state. After this method is called, {@link #readFromByteBuffer} * and {@link #bufferComparator()} may no longer work properly on previously-serialized keys. */ void reset(); } interface BufferComparator { int compare(ByteBuffer lhsBuffer, ByteBuffer rhsBuffer, int lhsPosition, int rhsPosition); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy