All Downloads are FREE. Search and download functionalities are using the official Maven repository.

src.com.android.internal.util.HeavyHitterSketch Maven / Gradle / Ivy

Go to download

A library jar that provides APIs for Applications written for the Google Android Platform.

There is a newer version: 15-robolectric-12650502
Show newest version
/*
 * Copyright (C) 2020 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.android.internal.util;

import android.annotation.NonNull;
import android.annotation.Nullable;
import android.util.SparseArray;
import android.util.SparseIntArray;

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;

/**
 * 

A utility which processes a sequence of input (stream) and output the heavy hitters * (the frequent ones).

* * @param The type of the input. * @see Stream Algorithm for * the definion of heavy hitters and the list of algorithms for detecting it. *

* {@hide} */ public interface HeavyHitterSketch { /** * Return the default implementation. * * @return The default implementation. */ static @NonNull HeavyHitterSketch newDefault() { return new HeavyHitterSketchImpl(); } /** * Set the configuration with given parameters * * @param inputSize The amount of the input. * @param capacity The maximum number of distinct input it should track; it defines the lower * bound of the output. */ void setConfig(int inputSize, int capacity); /** * Add a new input to the current sketch. * * @param newInstance The new input */ void add(@Nullable T newInstance); /** * @param k The number of heavy hitters it should return, k < capacity, a value of 0 * will be equivalent to capacity - 1 * @param holder The list array into which the elements of the tops are to be stored; a new list * would be created and returned if this parameter is null. * @param freqs Optional, the frequencies of each items in the returned result * @return The top K heavy hitters(frequent input) */ @Nullable List getTopHeavyHitters(int k, @Nullable List holder, @Nullable List freqs); /** * @param holder The list array into which the elements of the candidates are to be stored; a * new list would be created and returned if this parameter is null. * @return The candidate heavy hitters so far, it could include false postives. */ @Nullable List getCandidates(@Nullable List holder); /** * Reset this heavy hitter counter */ void reset(); /** * @return The ratio of the input to be used as the validation data, Float.NaN means no * validation is needed. */ float getRequiredValidationInputRatio(); /** * The default implementation of the {@link HeavyHitterSketch}. * *

Currently it consists of two passes: the first pass will take the input into * the MG(Misra–Gries) summary; while the secondary pass will validate the output against the * input in order to eliminate false postivies.

* *

For sure there are better approaches which takes only one pass, but also comes along with * overheads in terms of cpu/memory cost; the MG summary would be a trivial and good enough * pick.

* * @param The type of the input. * @see Misra–Gries * summary for the detailed explanation of the algorithm. */ final class HeavyHitterSketchImpl implements HeavyHitterSketch { /** * The array to track the current heavy hitters, its size < {@link #mCapacity}. */ private final SparseArray mObjects = new SparseArray<>(); /** * The frequencies of the current heavy hitters, its size < {@link #mCapacity}. */ private final SparseIntArray mFrequencies = new SparseIntArray(); /** * The amount of the input of each pass */ private int mPassSize; /** * The amount of the total input it expects */ private int mTotalSize; /** * The maximum number of distinct input it should track */ private int mCapacity; /** * The amount of inputs it already received. */ private int mNumInputs; /** * Whether or not it's configured properly. */ private boolean mConfigured; /** * Set the configuration with given parameters * * @param inputSize The amount of the input. * @param capacity The maximum number of distinct input it should track; it defines the * lower bound of the output. */ public void setConfig(final int inputSize, final int capacity) { if (inputSize < capacity || inputSize <= 1) { mConfigured = false; throw new IllegalArgumentException(); } reset(); mTotalSize = inputSize; mPassSize = inputSize >> 1; mCapacity = capacity; mConfigured = true; } /** * Add a new input to the current sketch. * * @param newInstance The new input */ @Override public void add(@Nullable final T newInstance) { if (!mConfigured) { throw new IllegalStateException(); } if (mNumInputs < mPassSize) { addToMGSummary(newInstance); } else if (mNumInputs < mTotalSize) { // Validation pass validate(newInstance); } } /** * Add an input to the MG summary. * *

Note the frequency in the result set is an estimation. Every (obj, freq') pair * in the result set, will have the following property: * (freq - inputSize / capacity) ≤ freq' ≤ freq * The above freq' is the estimated frequency, while the freq is the actual frequency. *

*/ private void addToMGSummary(@Nullable final T newInstance) { final int hashCode = newInstance != null ? newInstance.hashCode() : 0; final int index = mObjects.indexOfKey(hashCode); // MG summary if (index >= 0) { mFrequencies.setValueAt(index, mFrequencies.valueAt(index) + 1); } else if (mObjects.size() < mCapacity - 1) { mObjects.put(hashCode, newInstance); mFrequencies.put(hashCode, 1); } else { for (int i = mFrequencies.size() - 1; i >= 0; i--) { final int val = mFrequencies.valueAt(i) - 1; if (val == 0) { mObjects.removeAt(i); mFrequencies.removeAt(i); } else { mFrequencies.setValueAt(i, val); } } } if (++mNumInputs == mPassSize) { // Clear all the frequencies as we are going to validate them next for (int i = mFrequencies.size() - 1; i >= 0; i--) { mFrequencies.setValueAt(i, 0); } } } /** * Validate the results from MG summary; the ones with frequencies less than lower boundary * will be removed from the set. */ private void validate(@Nullable final T newInstance) { final int hashCode = newInstance != null ? newInstance.hashCode() : 0; final int index = mObjects.indexOfKey(hashCode); if (index >= 0) { mFrequencies.setValueAt(index, mFrequencies.valueAt(index) + 1); } if (++mNumInputs == mTotalSize) { final int lower = mPassSize / mCapacity; // Remove any occurrences with frequencies less than lower boundary for (int i = mFrequencies.size() - 1; i >= 0; i--) { final int val = mFrequencies.valueAt(i); if (val < lower) { mFrequencies.removeAt(i); mObjects.removeAt(i); } } } } /** * @param k The number of heavy hitters it should return, k < capacity, a value of 0 * will be equivalent to capacity - 1 * @param holder The list array into which the elements of the tops are to be stored; a new * list would be created and returned if this parameter is null. * @param freqs Optional, the frequencies of each items in the returned result * @return The top K heavy hitters(frequent input) */ @Override @Nullable public List getTopHeavyHitters(final int k, final @Nullable List holder, final @Nullable List freqs) { if (!mConfigured) { throw new IllegalStateException(); } if (k >= mCapacity) { throw new IllegalArgumentException(); } if (mNumInputs < mTotalSize) { // It hasn't had all the inputs yet. throw new IllegalStateException(); } ArrayList indexes = null; for (int i = mFrequencies.size() - 1; i >= 0; i--) { final int val = mFrequencies.valueAt(i); if (val > 0) { if (indexes == null) { indexes = new ArrayList<>(); } indexes.add(i); } } if (indexes == null) { return null; } Collections.sort(indexes, (a, b) -> mFrequencies.valueAt(b) - mFrequencies.valueAt(a)); final List result = holder != null ? holder : new ArrayList(); final int max = Math.min(k == 0 ? (mCapacity - 1) : k, indexes.size()); for (int i = 0; i < max; i++) { final int index = indexes.get(i); final T obj = mObjects.valueAt(index); if (obj != null) { result.add(obj); if (freqs != null) { freqs.add((float) mFrequencies.valueAt(index) / mPassSize); } } } return result; } /** * @param holder The list array into which the elements of the candidates are to be stored; * a new list would be created and returned if this parameter is null. * @return The candidate heavy hitters so far, it could include false postives. */ @Nullable public List getCandidates(final @Nullable List holder) { if (!mConfigured) { throw new IllegalStateException(); } if (mNumInputs < mPassSize) { // It hasn't done with the first pass yet, return nothing return null; } List result = holder != null ? holder : new ArrayList(); for (int i = mObjects.size() - 1; i >= 0; i--) { final T obj = mObjects.valueAt(i); if (obj != null) { result.add(obj); } } return result; } /** * Reset this heavy hitter counter */ @Override public void reset() { mNumInputs = 0; mObjects.clear(); mFrequencies.clear(); } /** * @return The ratio of the input to be used as the validation data, Float.NaN means no * validation is needed. */ public float getRequiredValidationInputRatio() { return 0.5f; } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy