All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.symcpe.wraith.aggregators.FineCountingAggregator Maven / Gradle / Ivy

/**
 * Copyright 2016 Symantec Corporation.
 * 
 * Licensed under the Apache License, Version 2.0 (the “License”); 
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.symcpe.wraith.aggregators;

import java.util.HashSet;
import java.util.Map;
import java.util.Set;

import io.symcpe.wraith.Constants;

/**
 * To count exact values for a given quantity. The maximum hard limit of number
 * of accurately counted values is 100K to guarantee predictable performance.
 * This aggregator does counting based on a hashset and pre-hashes inbound
 * values to integers using MD5 thus ensuring predictable size.
*
* * Each instance can / will store up to 400K bytes therefore allowing a decent * count number of accurate aggregations to be run.
*
* * For quantities greater than 100K, you will need to use * {@link CoarseCountingAggregator} which utilizes HyperLogLog statistical data * structure for cardinality analysis. * * Limits are enforced by dropping data. * * @author ambud_sharma */ public class FineCountingAggregator implements CountingAggregator { private static final boolean DISABLE_HARD_LIMIT_CHECKS = false; private static final long serialVersionUID = 1L; private int hardLimit; private Set set; public FineCountingAggregator() { } private FineCountingAggregator(int hardLimit) { this.hardLimit = hardLimit; initialize(); } @Override public void initialize(Map conf) { hardLimit = Integer.parseInt( conf.getOrDefault(Constants.AGGREGATIONS_FCOUNT_LIMIT, Constants.DEFAULT_AGGREGATION_FCOUNT_LIMIT) .toString()); initialize(); } protected void initialize() { set = new HashSet<>((int) (hardLimit * Constants.SET_CAPACITY_AMPLIFICATION), Constants.HASHSET_LOAD_FACTOR); } @Override public int getHardLimit() { return hardLimit; } @Override public CountingAggregator getInstance() { return new FineCountingAggregator(hardLimit); } @Override public long size() { return set.size(); } @Override public boolean add(Object aggregationValue) { return set.add((Integer)aggregationValue); } @Override public boolean disableLimitChecks() { return DISABLE_HARD_LIMIT_CHECKS; } @Override public Set getDatastructure() { return set; } @Override public void reset() { set.clear(); } @Override public long getCardinality() { return size(); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy