All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.mahout.clustering.spectral.AffinityMatrixInputMapper Maven / Gradle / Ivy

There is a newer version: 0.13.0
Show newest version
/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.mahout.clustering.spectral;

import java.io.IOException;
import java.util.regex.Pattern;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.mahout.math.hadoop.DistributedRowMatrix;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * 

Handles reading the files representing the affinity matrix. Since the affinity * matrix is representative of a graph, each line in all the files should * take the form:

* * {@code i,j,value} * *

where {@code i} and {@code j} are the {@code i}th and * {@code j} data points in the entire set, and {@code value} * represents some measurement of their relative absolute magnitudes. This * is, simply, a method for representing a graph textually. */ public class AffinityMatrixInputMapper extends Mapper { private static final Logger log = LoggerFactory.getLogger(AffinityMatrixInputMapper.class); private static final Pattern COMMA_PATTERN = Pattern.compile(","); @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String[] elements = COMMA_PATTERN.split(value.toString()); log.debug("(DEBUG - MAP) Key[{}], Value[{}]", key.get(), value); // enforce well-formed textual representation of the graph if (elements.length != 3) { throw new IOException("Expected input of length 3, received " + elements.length + ". Please make sure you adhere to " + "the structure of (i,j,value) for representing a graph in text. " + "Input line was: '" + value + "'."); } if (elements[0].isEmpty() || elements[1].isEmpty() || elements[2].isEmpty()) { throw new IOException("Found an element of 0 length. Please be sure you adhere to the structure of " + "(i,j,value) for representing a graph in text."); } // parse the line of text into a DistributedRowMatrix entry, // making the row (elements[0]) the key to the Reducer, and // setting the column (elements[1]) in the entry itself DistributedRowMatrix.MatrixEntryWritable toAdd = new DistributedRowMatrix.MatrixEntryWritable(); IntWritable row = new IntWritable(Integer.valueOf(elements[0])); toAdd.setRow(-1); // already set as the Reducer's key toAdd.setCol(Integer.valueOf(elements[1])); toAdd.setVal(Double.valueOf(elements[2])); context.write(row, toAdd); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy