org.apache.hama.examples.SemiClusterJobDriver Maven / Gradle / Ivy
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hama.examples;
import java.io.IOException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hama.HamaConfiguration;
import org.apache.hama.bsp.HashPartitioner;
import org.apache.hama.bsp.TextInputFormat;
import org.apache.hama.bsp.TextOutputFormat;
import org.apache.hama.bsp.message.compress.Bzip2Compressor;
import org.apache.hama.graph.GraphJob;
import org.apache.hama.ml.semiclustering.SemiClusterMessage;
import org.apache.hama.ml.semiclustering.SemiClusterTextReader;
import org.apache.hama.ml.semiclustering.SemiClusterVertexOutputWriter;
import org.apache.hama.ml.semiclustering.SemiClusteringVertex;
public class SemiClusterJobDriver {
protected static final Log LOG = LogFactory
.getLog(SemiClusterJobDriver.class);
private static final String outputPathString = "semicluster.outputpath";
private static final String inputPathString = "semicluster.inputmatrixpath";
private static final String requestedGraphJobMaxIterationString = "hama.graph.max.iteration";
private static final String semiClusterMaximumVertexCount = "semicluster.max.vertex.count";
private static final String graphJobMessageSentCount = "semicluster.max.message.sent.count";
private static final String graphJobVertexMaxClusterCount = "vertex.max.cluster.count";
public static void startTask(HamaConfiguration conf) throws IOException,
InterruptedException, ClassNotFoundException {
GraphJob semiClusterJob = new GraphJob(conf, SemiClusterJobDriver.class);
semiClusterJob.setCompressionCodec(Bzip2Compressor.class);
semiClusterJob
.setVertexOutputWriterClass(SemiClusterVertexOutputWriter.class);
semiClusterJob.setJobName("SemiClusterJob");
semiClusterJob.setVertexClass(SemiClusteringVertex.class);
semiClusterJob.setInputPath(new Path(conf.get(inputPathString)));
semiClusterJob.setOutputPath(new Path(conf.get(outputPathString)));
semiClusterJob.set("hama.graph.self.ref", "true");
semiClusterJob.set("hama.graph.repair", "true");
semiClusterJob.setVertexIDClass(Text.class);
semiClusterJob.setVertexValueClass(SemiClusterMessage.class);
semiClusterJob.setEdgeValueClass(DoubleWritable.class);
semiClusterJob.setInputKeyClass(LongWritable.class);
semiClusterJob.setInputValueClass(Text.class);
semiClusterJob.setInputFormat(TextInputFormat.class);
semiClusterJob.setVertexInputReaderClass(SemiClusterTextReader.class);
semiClusterJob.setPartitioner(HashPartitioner.class);
semiClusterJob.setOutputFormat(TextOutputFormat.class);
semiClusterJob.setOutputKeyClass(Text.class);
semiClusterJob.setOutputValueClass(Text.class);
long startTime = System.currentTimeMillis();
if (semiClusterJob.waitForCompletion(true)) {
System.out.println("Job Finished in "
+ (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
}
}
private static void printUsage() {
LOG.info("Usage: SemiClusterO
© 2015 - 2025 Weber Informatics LLC | Privacy Policy