
com.hazelcast.mapreduce.aggregation.impl.DistinctValuesAggregation Maven / Gradle / Ivy
/*
* Copyright (c) 2008-2016, Hazelcast, Inc. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.hazelcast.mapreduce.aggregation.impl;
import com.hazelcast.mapreduce.Collator;
import com.hazelcast.mapreduce.Combiner;
import com.hazelcast.mapreduce.CombinerFactory;
import com.hazelcast.mapreduce.Context;
import com.hazelcast.mapreduce.Mapper;
import com.hazelcast.mapreduce.Reducer;
import com.hazelcast.mapreduce.ReducerFactory;
import com.hazelcast.mapreduce.aggregation.Supplier;
import com.hazelcast.nio.ObjectDataInput;
import com.hazelcast.nio.ObjectDataOutput;
import com.hazelcast.nio.serialization.IdentifiedDataSerializable;
import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
import java.io.IOException;
import java.util.HashSet;
import java.util.Map;
import java.util.Random;
import java.util.Set;
/**
* The predefined distinct value aggregation.
*
* @param the input key type
* @param the input value type
* @param the common super type for all distinct values
*/
public class DistinctValuesAggregation
implements AggType, Set, Set> {
private static final int DEFAULT_DISTRIBUTION_FACTOR = 20;
@Override
public Collator>, Set> getCollator() {
return new Collator>, Set>() {
@Override
public Set collate(Iterable>> values) {
Set distinctValues = new HashSet();
for (Map.Entry> value : values) {
distinctValues.addAll(value.getValue());
}
return distinctValues;
}
};
}
@Override
public Mapper getMapper(Supplier supplier) {
return new DistinctValueMapper(supplier);
}
@Override
public CombinerFactory> getCombinerFactory() {
return new DistinctValuesCombinerFactory();
}
@Override
public ReducerFactory, Set> getReducerFactory() {
return new DistinctValuesReducerFactory();
}
/**
* Distinct values CombinerFactory
*
* @param the distinct values type
*/
static class DistinctValuesCombinerFactory
extends AbstractAggregationCombinerFactory> {
@Override
public Combiner> newCombiner(Integer key) {
return new DistinctValuesCombiner();
}
@Override
public int getId() {
return AggregationsDataSerializerHook.DISTINCT_VALUES_COMBINER_FACTORY;
}
}
/**
* Distinct values Combiner
*
* @param the distinct values type
*/
private static class DistinctValuesCombiner
extends Combiner> {
private final Set distinctValues = new HashSet();
@Override
public void combine(DistinctType value) {
distinctValues.add(value);
}
@Override
public Set finalizeChunk() {
Set distinctValues = new SetAdapter();
distinctValues.addAll(this.distinctValues);
this.distinctValues.clear();
return distinctValues;
}
}
/**
* Distinct values ReducerFactory
*
* @param the distinct values type
*/
static class DistinctValuesReducerFactory
extends AbstractAggregationReducerFactory, Set> {
@Override
public Reducer, Set> newReducer(Integer key) {
return new DistinctValuesReducer();
}
@Override
public int getId() {
return AggregationsDataSerializerHook.DISTINCT_VALUES_REDUCER_FACTORY;
}
}
/**
* Distinct values Reducer
*
* @param the distinct values type
*/
private static class DistinctValuesReducer
extends Reducer, Set> {
private final Set distinctValues = new SetAdapter();
@Override
public void reduce(Set value) {
distinctValues.addAll(value);
}
@Override
public Set finalizeReduce() {
return distinctValues;
}
}
/**
* A special mapper for distributing reducing of distinct values
*
* @param the input key type
* @param the input value type
* @param the type of distinct values
*/
@SuppressFBWarnings("SE_NO_SERIALVERSIONID")
static class DistinctValueMapper
implements Mapper, IdentifiedDataSerializable {
// These keys are used to distribute reducer steps around the cluster
private static final int[] DISTRIBUTION_KEYS;
static {
Random random = new Random();
DISTRIBUTION_KEYS = new int[DEFAULT_DISTRIBUTION_FACTOR];
for (int i = 0; i < DISTRIBUTION_KEYS.length; i++) {
DISTRIBUTION_KEYS[i] = random.nextInt();
}
}
private transient SimpleEntry entry = new SimpleEntry();
private transient int keyPosition;
private Supplier supplier;
DistinctValueMapper() {
}
DistinctValueMapper(Supplier supplier) {
this.supplier = supplier;
}
@Override
public void map(Key key, Value value, Context context) {
int mappingKey = key();
entry.key = key;
entry.value = value;
DistinctType valueOut = supplier.apply(entry);
if (valueOut != null) {
context.emit(mappingKey, valueOut);
}
}
@Override
public int getFactoryId() {
return AggregationsDataSerializerHook.F_ID;
}
@Override
public int getId() {
return AggregationsDataSerializerHook.DISTINCT_VALUES_MAPPER;
}
@Override
public void writeData(ObjectDataOutput out)
throws IOException {
out.writeObject(supplier);
}
@Override
public void readData(ObjectDataInput in)
throws IOException {
supplier = in.readObject();
}
private int key() {
if (keyPosition >= DISTRIBUTION_KEYS.length) {
keyPosition = 0;
}
return keyPosition++;
}
}
/**
* Internal implementation of an map entry with changeable value to prevent
* to much object allocation while supplying
*
* @param key type
* @param value type
*/
private static final class SimpleEntry
implements Map.Entry {
private K key;
private V value;
@Override
public K getKey() {
return key;
}
@Override
public V getValue() {
return value;
}
@Override
public V setValue(V value) {
throw new UnsupportedOperationException();
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy