All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.hazelcast.mapreduce.aggregation.impl.DistinctValuesAggregation Maven / Gradle / Ivy

There is a newer version: 5.4.0
Show newest version
/*
 * Copyright (c) 2008-2016, Hazelcast, Inc. All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.hazelcast.mapreduce.aggregation.impl;

import com.hazelcast.mapreduce.Collator;
import com.hazelcast.mapreduce.Combiner;
import com.hazelcast.mapreduce.CombinerFactory;
import com.hazelcast.mapreduce.Context;
import com.hazelcast.mapreduce.Mapper;
import com.hazelcast.mapreduce.Reducer;
import com.hazelcast.mapreduce.ReducerFactory;
import com.hazelcast.mapreduce.aggregation.Supplier;
import com.hazelcast.nio.ObjectDataInput;
import com.hazelcast.nio.ObjectDataOutput;
import com.hazelcast.nio.serialization.IdentifiedDataSerializable;
import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;

import java.io.IOException;
import java.util.HashSet;
import java.util.Map;
import java.util.Random;
import java.util.Set;

/**
 * The predefined distinct value aggregation.
 *
 * @param           the input key type
 * @param         the input value type
 * @param  the common super type for all distinct values
 */
public class DistinctValuesAggregation
        implements AggType, Set, Set> {

    private static final int DEFAULT_DISTRIBUTION_FACTOR = 20;

    @Override
    public Collator>, Set> getCollator() {
        return new Collator>, Set>() {

            @Override
            public Set collate(Iterable>> values) {
                Set distinctValues = new HashSet();
                for (Map.Entry> value : values) {
                    distinctValues.addAll(value.getValue());
                }
                return distinctValues;
            }
        };
    }

    @Override
    public Mapper getMapper(Supplier supplier) {
        return new DistinctValueMapper(supplier);
    }

    @Override
    public CombinerFactory> getCombinerFactory() {
        return new DistinctValuesCombinerFactory();
    }

    @Override
    public ReducerFactory, Set> getReducerFactory() {
        return new DistinctValuesReducerFactory();
    }

    /**
     * Distinct values CombinerFactory
     *
     * @param  the distinct values type
     */
    static class DistinctValuesCombinerFactory
            extends AbstractAggregationCombinerFactory> {

        @Override
        public Combiner> newCombiner(Integer key) {
            return new DistinctValuesCombiner();
        }

        @Override
        public int getId() {
            return AggregationsDataSerializerHook.DISTINCT_VALUES_COMBINER_FACTORY;
        }
    }

    /**
     * Distinct values Combiner
     *
     * @param  the distinct values type
     */
    private static class DistinctValuesCombiner
            extends Combiner> {

        private final Set distinctValues = new HashSet();

        @Override
        public void combine(DistinctType value) {
            distinctValues.add(value);
        }

        @Override
        public Set finalizeChunk() {
            Set distinctValues = new SetAdapter();
            distinctValues.addAll(this.distinctValues);
            this.distinctValues.clear();
            return distinctValues;
        }
    }

    /**
     * Distinct values ReducerFactory
     *
     * @param  the distinct values type
     */
    static class DistinctValuesReducerFactory
            extends AbstractAggregationReducerFactory, Set> {

        @Override
        public Reducer, Set> newReducer(Integer key) {
            return new DistinctValuesReducer();
        }

        @Override
        public int getId() {
            return AggregationsDataSerializerHook.DISTINCT_VALUES_REDUCER_FACTORY;
        }
    }

    /**
     * Distinct values Reducer
     *
     * @param  the distinct values type
     */
    private static class DistinctValuesReducer
            extends Reducer, Set> {

        private final Set distinctValues = new SetAdapter();

        @Override
        public void reduce(Set value) {
            distinctValues.addAll(value);
        }

        @Override
        public Set finalizeReduce() {
            return distinctValues;
        }
    }

    /**
     * A special mapper for distributing reducing of distinct values
     *
     * @param           the input key type
     * @param         the input value type
     * @param  the type of distinct values
     */
    @SuppressFBWarnings("SE_NO_SERIALVERSIONID")
    static class DistinctValueMapper
            implements Mapper, IdentifiedDataSerializable {

        // These keys are used to distribute reducer steps around the cluster
        private static final int[] DISTRIBUTION_KEYS;

        static {
            Random random = new Random();
            DISTRIBUTION_KEYS = new int[DEFAULT_DISTRIBUTION_FACTOR];
            for (int i = 0; i < DISTRIBUTION_KEYS.length; i++) {
                DISTRIBUTION_KEYS[i] = random.nextInt();
            }
        }

        private transient SimpleEntry entry = new SimpleEntry();
        private transient int keyPosition;

        private Supplier supplier;

        DistinctValueMapper() {
        }

        DistinctValueMapper(Supplier supplier) {
            this.supplier = supplier;
        }

        @Override
        public void map(Key key, Value value, Context context) {
            int mappingKey = key();
            entry.key = key;
            entry.value = value;
            DistinctType valueOut = supplier.apply(entry);
            if (valueOut != null) {
                context.emit(mappingKey, valueOut);
            }
        }

        @Override
        public int getFactoryId() {
            return AggregationsDataSerializerHook.F_ID;
        }

        @Override
        public int getId() {
            return AggregationsDataSerializerHook.DISTINCT_VALUES_MAPPER;
        }

        @Override
        public void writeData(ObjectDataOutput out)
                throws IOException {

            out.writeObject(supplier);
        }

        @Override
        public void readData(ObjectDataInput in)
                throws IOException {

            supplier = in.readObject();
        }

        private int key() {
            if (keyPosition >= DISTRIBUTION_KEYS.length) {
                keyPosition = 0;
            }
            return keyPosition++;
        }
    }

    /**
     * Internal implementation of an map entry with changeable value to prevent
     * to much object allocation while supplying
     *
     * @param  key type
     * @param  value type
     */
    private static final class SimpleEntry
            implements Map.Entry {

        private K key;
        private V value;

        @Override
        public K getKey() {
            return key;
        }

        @Override
        public V getValue() {
            return value;
        }

        @Override
        public V setValue(V value) {
            throw new UnsupportedOperationException();
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy