com.hazelcast.mapreduce.impl.task.DefaultContext Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of hazelcast-all Show documentation
There is a newer version: 5.0-BETA-1
/*
 * Copyright (c) 2008-2015, Hazelcast, Inc. All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.hazelcast.mapreduce.impl.task;

import com.hazelcast.mapreduce.Combiner;
import com.hazelcast.mapreduce.CombinerFactory;
import com.hazelcast.mapreduce.Context;
import com.hazelcast.mapreduce.impl.CombinerResultList;
import com.hazelcast.mapreduce.impl.HashMapAdapter;
import com.hazelcast.mapreduce.impl.MapReduceUtil;

import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.atomic.AtomicIntegerFieldUpdater;

/**
 * This is the internal default implementation of a map reduce context mappers emit values to. It controls the emitted
 * values to be combined using either the set {@link com.hazelcast.mapreduce.Combiner} or by utilizing the internal
 * collecting combiner (which is just a better HashMap ;-)).

 * In addition to that it is responsible to notify about an the {@link com.hazelcast.mapreduce.impl.task.MapCombineTask}
 * about an emitted value to eventually send out chunks on reaching the chunk size limit.
 *
 * @param 
 * @param 
 */
public class DefaultContext
        implements Context {

    private static final AtomicIntegerFieldUpdater COLLECTED_UPDATER = AtomicIntegerFieldUpdater
            .newUpdater(DefaultContext.class, "collected");

    private final ConcurrentMap> combiners = new ConcurrentHashMap>();
    private final CombinerFactory combinerFactory;
    private final MapCombineTask mapCombineTask;

    // This field is only accessed through the updater
    private volatile int collected;

    private volatile int partitionId;

    protected DefaultContext(CombinerFactory combinerFactory, MapCombineTask mapCombineTask) {
        this.mapCombineTask = mapCombineTask;
        this.combinerFactory = combinerFactory != null ? combinerFactory : new CollectingCombinerFactory();
    }

    public void setPartitionId(int partitionId) {
        this.partitionId = partitionId;
    }

    @Override
    public void emit(KeyIn key, ValueIn value) {
        Combiner combiner = getOrCreateCombiner(key);
        combiner.combine(value);
        COLLECTED_UPDATER.incrementAndGet(this);
        mapCombineTask.onEmit(this, partitionId);
    }

    public  Map requestChunk() {
        int mapSize = MapReduceUtil.mapSize(combiners.size());
        Map chunkMap = new HashMapAdapter(mapSize);
        for (Map.Entry> entry : combiners.entrySet()) {
            Combiner combiner = entry.getValue();
            Chunk chunk = (Chunk) combiner.finalizeChunk();
            combiner.reset();

            if (chunk != null) {
                chunkMap.put(entry.getKey(), chunk);
            }
        }
        COLLECTED_UPDATER.set(this, 0);
        return chunkMap;
    }

    public int getCollected() {
        return collected;
    }

    public void finalizeCombiners() {
        for (Combiner combiner : combiners.values()) {
            combiner.finalizeCombine();
        }
    }

    public Combiner getOrCreateCombiner(KeyIn key) {
        Combiner combiner = combiners.get(key);
        if (combiner == null) {
            combiner = combinerFactory.newCombiner(key);
            combiner.beginCombine();

            Combiner temp = combiners.putIfAbsent(key, combiner);
            if (temp != null) {
                combiner = temp;
            }
        }
        return combiner;
    }

    /**
     * This {@link com.hazelcast.mapreduce.CombinerFactory} implementation is used
     * if no specific CombinerFactory was set in the configuration of the job to
     * do mapper aside combining of the emitted values.

     *
     * @param    type of the key
     * @param  type of the value
     */
    private static class CollectingCombinerFactory
            implements CombinerFactory> {

        @Override
        public Combiner> newCombiner(KeyIn key) {
            return new Combiner>() {

                private final List values = new ArrayList();

                @Override
                public void combine(ValueIn value) {
                    values.add(value);
                }

                @Override
                public List finalizeChunk() {
                    return new CombinerResultList(this.values);
                }

                @Override
                public void reset() {
                    this.values.clear();
                }
            };
        }
    }

}