All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.aamend.hadoop.clustering.mapreduce.CanopyCreateInitMapper Maven / Gradle / Ivy

package com.aamend.hadoop.clustering.mapreduce;

import com.aamend.hadoop.clustering.cluster.Canopy;
import com.aamend.hadoop.clustering.cluster.CanopyWritable;
import com.aamend.hadoop.clustering.cluster.Cluster;
import com.aamend.hadoop.clustering.distance.DistanceMeasure;
import com.google.common.collect.Lists;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.ArrayPrimitiveWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapreduce.Mapper;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;

/**
 * Author: [email protected]
 * Date: 21/03/14
 */
public class CanopyCreateInitMapper extends
        Mapper {

    private float t1;
    private float t2;
    private int nextCanopyId;
    private DistanceMeasure measure;
    private Collection canopies = Lists.newArrayList();

    private static final Text KEY = new Text();
    private static final Logger LOGGER =
            LoggerFactory.getLogger(CanopyCreateInitMapper.class);

    @Override
    protected void setup(Context context) throws IOException, InterruptedException {

        // Retrieve params fom configuration
        Configuration conf = context.getConfiguration();
        measure = Canopy.configureMeasure(conf);
        t1 = conf.getFloat(Canopy.CLUSTER_T1, 1.0f);
        t2 = conf.getFloat(Canopy.CLUSTER_T2, 0.8f);
    }

    @Override
    protected void map(WritableComparable key, ArrayPrimitiveWritable value, Context context)
            throws IOException, InterruptedException {

        int[] point = (int[]) value.get();
        boolean stronglyBound = false;
        for (Cluster canopy : canopies) {
            double dist = measure.distance(canopy.getCenter(), point);
            if (dist < t1) {
                KEY.set(Arrays.toString(canopy.getCenter()));
                Cluster newCluster;
                if (dist < t2) {
                    newCluster = new Canopy(canopy.getId(), point, 1L);
                    LOGGER.debug("Adding (T2) {} to Cluster center {}", Arrays.toString((int[]) value.get()),
                            Arrays.toString(canopy.getCenter()));
                } else {
                    newCluster = new Canopy(canopy.getId(), point, 0L);
                    LOGGER.debug("Adding (T1) {} to Cluster center {}", Arrays.toString((int[]) value.get()),
                            Arrays.toString(canopy.getCenter()));
                }
                context.write(KEY, new CanopyWritable(newCluster));
            }
            stronglyBound = stronglyBound || dist < t2;
        }
        if (!stronglyBound) {
            nextCanopyId++;
            Cluster canopy = new Canopy(nextCanopyId, point, 1L);
            LOGGER.debug("Creating a new Cluster {}", canopy.asFormattedString());
            canopies.add(canopy);
            KEY.set(Arrays.toString(canopy.getCenter()));
            context.write(KEY, new CanopyWritable(canopy));
        }

    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy