All Downloads are FREE. Search and download functionalities are using the official Maven repository.

dist.edu.umd.cloud9.example.hits.InlinkCounter Maven / Gradle / Ivy

There is a newer version: 2.0.1
Show newest version
/**
 * 
 */
package edu.umd.cloud9.example.hits;

import java.io.IOException;
import java.util.Iterator;
import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.log4j.Logger;

import edu.umd.cloud9.io.array.ArrayListOfIntsWritable;
import edu.umd.cloud9.util.map.HMapIV;
import edu.umd.cloud9.util.map.MapIV;

/**
 * @author michaelmcgrath
 * 
 */
public class InlinkCounter extends Configured implements Tool {

	private static final Logger sLogger = Logger.getLogger(InlinkCounter.class);

	/**
	 * @param args
	 */
	private static class AFormatMapper extends MapReduceBase implements
			Mapper {
		private LongWritable valOut = new LongWritable(1);
		private IntWritable keyOut = new IntWritable();

		public void map(LongWritable key, Text value,
				OutputCollector output,
				Reporter reporter) throws IOException {

			ArrayListOfIntsWritable links = new ArrayListOfIntsWritable();
			String line = ((Text) value).toString();
			StringTokenizer itr = new StringTokenizer(line);
			if (itr.hasMoreTokens()) {
				itr.nextToken();
			}
			while (itr.hasMoreTokens()) {
				keyOut.set(Integer.parseInt(itr.nextToken()));
				output.collect(keyOut, valOut);
			}
			// emit mentioned mentioner -> mentioned (mentioners) in links
			// emit mentioner mentioned -> mentioner (mentions) outlinks
			// emit mentioned a
			// emit mentioner 1

		}

	}

	private static class AFormatMapperIMC extends MapReduceBase implements
			Mapper {
		private HITSNode valOut = new HITSNode();
		private IntWritable keyOut = new IntWritable();
		private static OutputCollector mOutput;
		private static HMapIV adjLists = new HMapIV();

		public void configure(JobConf jc) {
			adjLists.clear();
		}

		public void map(LongWritable key, Text value,
				OutputCollector output, Reporter reporter)
				throws IOException {

			mOutput = output;

			ArrayListOfIntsWritable links = new ArrayListOfIntsWritable();
			String line = ((Text) value).toString();
			StringTokenizer itr = new StringTokenizer(line);
			if (itr.hasMoreTokens()) {
				links.add(Integer.parseInt(itr.nextToken()));
				// add to HMap here
			}
			while (itr.hasMoreTokens()) {
				int curr = Integer.parseInt(itr.nextToken());
				if (adjLists.containsKey(curr)) {
					ArrayListOfIntsWritable list = adjLists.get(curr);
					list.trimToSize();
					links.trimToSize();
					//FIXME
					//list.addAll(links.getArray());
					adjLists.put(curr, list);
				} else {
					links.trimToSize();
					adjLists.put(curr, links);
				}
			}
		}

		public void close() throws IOException {
			for (MapIV.Entry e : adjLists.entrySet()) {
				keyOut.set(e.getKey());
				valOut.setNodeId(e.getKey());
				valOut.setARank((float) 0.0);
				valOut.setHRank((float) 0.0);
				valOut.setType(HITSNode.TYPE_AUTH_COMPLETE);
				//FIXME
				//valOut.setAdjacencyList(e.getValue());
				mOutput.collect(keyOut, valOut);
			}
		}

	}

	private static class AFormatCombiner extends MapReduceBase implements
			Reducer {
		private LongWritable valIn;
		private LongWritable valOut = new LongWritable();
		ArrayListOfIntsWritable adjList = new ArrayListOfIntsWritable();

		public void reduce(IntWritable key, Iterator values,
				OutputCollector output,
				Reporter reporter) throws IOException {
			// ArrayListOfIntsWritable adjList = new ArrayListOfIntsWritable();
			long sum = 0;
			// System.out.println(key.toString());
			// System.out.println(adjList.toString());
			while (values.hasNext()) {
				sum += values.next().get();
			}
			valOut.set(sum);
			output.collect(key, valOut);
		}
	}

	private static class AFormatReducer extends MapReduceBase implements
			Reducer {
		private LongWritable valIn;
		private LongWritable valOut = new LongWritable();
		ArrayListOfIntsWritable adjList = new ArrayListOfIntsWritable();

		public void reduce(IntWritable key, Iterator values,
				OutputCollector output,
				Reporter reporter) throws IOException {
			// ArrayListOfIntsWritable adjList = new ArrayListOfIntsWritable();
			long sum = 0;
			// System.out.println(key.toString());
			// System.out.println(adjList.toString());
			while (values.hasNext()) {
				sum += values.next().get();
			}

			if (sum > 100000) {
				valOut.set(sum);
				output.collect(key, valOut);
			}

		}
	}

	private static int printUsage() {
		System.out
				.println("usage: [input-path] [output-path] [num-mappers] [num-reducers]");
		ToolRunner.printGenericCommandUsage(System.out);
		return -1;
	}

	public int run(String[] args) throws Exception {

		if (args.length != 4) {
			printUsage();
			return -1;
		}

		String inputPath = args[0];
		String outputPath = args[1];

		int mapTasks = Integer.parseInt(args[2]);
		int reduceTasks = Integer.parseInt(args[3]);

		sLogger.info("Tool: Counter");
		sLogger.info(" - input path: " + inputPath);
		sLogger.info(" - output path: " + outputPath);
		sLogger.info(" - number of mappers: " + mapTasks);
		sLogger.info(" - number of reducers: " + reduceTasks);

		JobConf conf = new JobConf(InlinkCounter.class);
		conf.setJobName("InlinkCounter -- Web Graph");

		conf.setNumMapTasks(mapTasks);
		conf.setNumReduceTasks(reduceTasks);

		FileInputFormat.setInputPaths(conf, new Path(inputPath));
		FileOutputFormat.setOutputPath(conf, new Path(outputPath));
		FileOutputFormat.setCompressOutput(conf, false);

		// conf.setInputFormat(SequenceFileInputFormat.class);
		conf.setOutputKeyClass(IntWritable.class);
		conf.setOutputValueClass(LongWritable.class);
		// conf.setOutputFormat(SequenceFileOutputFormat.class);

		// InputSampler.Sampler sampler = new
		// InputSampler.RandomSampler(0.1, 10, 10);
		// InputSampler.writePartitionFile(conf, sampler);
		// conf.setPartitionerClass(TotalOrderPartitioner.class);
		conf.setMapperClass(AFormatMapper.class);
		conf.setCombinerClass(AFormatCombiner.class);
		conf.setReducerClass(AFormatReducer.class);

		// Delete the output directory if it exists already
		Path outputDir = new Path(outputPath);
		FileSystem.get(conf).delete(outputDir, true);

		long startTime = System.currentTimeMillis();
		sLogger.info("Starting job");
		JobClient.runJob(conf);
		sLogger.info("Job Finished in "
				+ (System.currentTimeMillis() - startTime) / 1000.0
				+ " seconds");

		return 0;
	}

	public static void main(String[] args) throws Exception {
		int res = ToolRunner
				.run(new Configuration(), new InlinkCounter(), args);
		System.exit(res);
	}

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy