org.apache.hadoop.examples.BaileyBorweinPlouffe Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of hadoop-mapreduce-examples Show documentation
Show all versions of hadoop-mapreduce-examples Show documentation
Apache Hadoop MapReduce Examples
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.examples;
import java.io.BufferedOutputStream;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintStream;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.MRJobConfig;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import com.google.common.base.Charsets;
/**
* A map/reduce program that uses Bailey-Borwein-Plouffe to compute exact
* digits of Pi.
* This program is able to calculate digit positions
* lower than a certain limit, which is roughly 10^8.
* If the limit is exceeded,
* the corresponding results may be incorrect due to overflow errors.
* For computing higher bits of Pi, consider using distbbp.
*
* Reference:
*
* [1] David H. Bailey, Peter B. Borwein and Simon Plouffe. On the Rapid
* Computation of Various Polylogarithmic Constants.
* Math. Comp., 66:903-913, 1996.
*/
public class BaileyBorweinPlouffe extends Configured implements Tool {
public static final String DESCRIPTION
= "A map/reduce program that uses Bailey-Borwein-Plouffe to compute exact digits of Pi.";
private static final String NAME = "mapreduce." +
BaileyBorweinPlouffe.class.getSimpleName();
//custom job properties
private static final String WORKING_DIR_PROPERTY = NAME + ".dir";
private static final String HEX_FILE_PROPERTY = NAME + ".hex.file";
private static final String DIGIT_START_PROPERTY = NAME + ".digit.start";
private static final String DIGIT_SIZE_PROPERTY = NAME + ".digit.size";
private static final String DIGIT_PARTS_PROPERTY = NAME + ".digit.parts";
private static final Log LOG = LogFactory.getLog(BaileyBorweinPlouffe.class);
/** Mapper class computing digits of Pi. */
public static class BbpMapper extends
Mapper {
/** Compute the (offset+1)th to (offset+length)th digits. */
protected void map(LongWritable offset, IntWritable length,
final Context context) throws IOException, InterruptedException {
LOG.info("offset=" + offset + ", length=" + length);
// compute digits
final byte[] bytes = new byte[length.get() >> 1];
long d = offset.get();
for (int i = 0; i < bytes.length; d += 4) {
final long digits = hexDigits(d);
bytes[i++] = (byte) (digits >> 8);
bytes[i++] = (byte) digits;
}
// output map results
context.write(offset, new BytesWritable(bytes));
}
}
/** Reducer for concatenating map outputs. */
public static class BbpReducer extends
Reducer {
/** Storing hex digits */
private final List hex = new ArrayList();
/** Concatenate map outputs. */
@Override
protected void reduce(LongWritable offset, Iterable values,
Context context) throws IOException, InterruptedException {
// read map outputs
for (BytesWritable bytes : values) {
for (int i = 0; i < bytes.getLength(); i++)
hex.add(bytes.getBytes()[i]);
}
LOG.info("hex.size() = " + hex.size());
}
/** Write output to files. */
@Override
protected void cleanup(Context context
) throws IOException, InterruptedException {
final Configuration conf = context.getConfiguration();
final Path dir = new Path(conf.get(WORKING_DIR_PROPERTY));
final FileSystem fs = dir.getFileSystem(conf);
// write hex output
{
final Path hexfile = new Path(conf.get(HEX_FILE_PROPERTY));
final OutputStream out = new BufferedOutputStream(fs.create(hexfile));
try {
for (byte b : hex)
out.write(b);
} finally {
out.close();
}
}
// If the starting digit is 1,
// the hex value can be converted to decimal value.
if (conf.getInt(DIGIT_START_PROPERTY, 1) == 1) {
final Path outfile = new Path(dir, "pi.txt");
LOG.info("Writing text output to " + outfile);
final OutputStream outputstream = fs.create(outfile);
try {
final PrintWriter out = new PrintWriter(
new OutputStreamWriter(outputstream, Charsets.UTF_8), true);
// write hex text
print(out, hex.iterator(), "Pi = 0x3.", "%02X", 5, 5);
out.println("Total number of hexadecimal digits is "
+ 2 * hex.size() + ".");
// write decimal text
final Fraction dec = new Fraction(hex);
final int decDigits = 2 * hex.size(); // TODO: this is conservative.
print(out, new Iterator() {
private int i = 0;
public boolean hasNext() {
return i < decDigits;
}
public Integer next() {
i++;
return dec.times10();
}
public void remove() {
}
}, "Pi = 3.", "%d", 10, 5);
out.println("Total number of decimal digits is " + decDigits + ".");
} finally {
outputstream.close();
}
}
}
}
/** Print out elements in a nice format. */
private static void print(PrintWriter out, Iterator iterator,
String prefix, String format, int elementsPerGroup, int groupsPerLine) {
final StringBuilder sb = new StringBuilder("\n");
for (int i = 0; i < prefix.length(); i++)
sb.append(" ");
final String spaces = sb.toString();
out.print("\n" + prefix);
for (int i = 0; iterator.hasNext(); i++) {
if (i > 0 && i % elementsPerGroup == 0)
out.print((i / elementsPerGroup) % groupsPerLine == 0 ? spaces : " ");
out.print(String.format(format, iterator.next()));
}
out.println();
}
/** Input split for the {@link BbpInputFormat}. */
public static class BbpSplit extends InputSplit implements Writable {
private final static String[] EMPTY = {};
private long offset;
private int size;
/** Public default constructor for the Writable interface. */
public BbpSplit() {
}
private BbpSplit(int i, long offset, int size) {
LOG.info("Map #" + i + ": workload=" + workload(offset, size)
+ ", offset=" + offset + ", size=" + size);
this.offset = offset;
this.size = size;
}
private long getOffset() {
return offset;
}
/** {@inheritDoc} */
public long getLength() {
return size;
}
/** No location is needed. */
public String[] getLocations() {
return EMPTY;
}
/** {@inheritDoc} */
public void readFields(DataInput in) throws IOException {
offset = in.readLong();
size = in.readInt();
}
/** {@inheritDoc} */
public void write(DataOutput out) throws IOException {
out.writeLong(offset);
out.writeInt(size);
}
}
/**
* Input format for the {@link BbpMapper}.
* Keys and values represent offsets and sizes, respectively.
*/
public static class BbpInputFormat
extends InputFormat {
/** {@inheritDoc} */
public List getSplits(JobContext context) {
//get the property values
final int startDigit = context.getConfiguration().getInt(
DIGIT_START_PROPERTY, 1);
final int nDigits = context.getConfiguration().getInt(
DIGIT_SIZE_PROPERTY, 100);
final int nMaps = context.getConfiguration().getInt(
DIGIT_PARTS_PROPERTY, 1);
//create splits
final List splits = new ArrayList(nMaps);
final int[] parts = partition(startDigit - 1, nDigits, nMaps);
for (int i = 0; i < parts.length; ++i) {
final int k = i < parts.length - 1 ? parts[i+1]: nDigits+startDigit-1;
splits.add(new BbpSplit(i, parts[i], k - parts[i]));
}
return splits;
}
/** {@inheritDoc} */
public RecordReader createRecordReader(
InputSplit generic, TaskAttemptContext context) {
final BbpSplit split = (BbpSplit)generic;
//return a record reader
return new RecordReader() {
boolean done = false;
public void initialize(InputSplit split, TaskAttemptContext context) {
}
public boolean nextKeyValue() {
//Each record only contains one key.
return !done ? done = true : false;
}
public LongWritable getCurrentKey() {
return new LongWritable(split.getOffset());
}
public IntWritable getCurrentValue() {
return new IntWritable((int)split.getLength());
}
public float getProgress() {
return done? 1f: 0f;
}
public void close() {
}
};
}
}
/** Create and setup a job */
private static Job createJob(String name, Configuration conf
) throws IOException {
final Job job = Job.getInstance(conf, NAME + "_" + name);
final Configuration jobconf = job.getConfiguration();
job.setJarByClass(BaileyBorweinPlouffe.class);
// setup mapper
job.setMapperClass(BbpMapper.class);
job.setMapOutputKeyClass(LongWritable.class);
job.setMapOutputValueClass(BytesWritable.class);
// setup reducer
job.setReducerClass(BbpReducer.class);
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(BytesWritable.class);
job.setNumReduceTasks(1);
// setup input
job.setInputFormatClass(BbpInputFormat.class);
// disable task timeout
jobconf.setLong(MRJobConfig.TASK_TIMEOUT, 0);
// do not use speculative execution
jobconf.setBoolean(MRJobConfig.MAP_SPECULATIVE, false);
jobconf.setBoolean(MRJobConfig.REDUCE_SPECULATIVE, false);
return job;
}
/** Run a map/reduce job to compute Pi. */
private static void compute(int startDigit, int nDigits, int nMaps,
String workingDir, Configuration conf, PrintStream out
) throws IOException {
final String name = startDigit + "_" + nDigits;
//setup wroking directory
out.println("Working Directory = " + workingDir);
out.println();
final FileSystem fs = FileSystem.get(conf);
final Path dir = fs.makeQualified(new Path(workingDir));
if (fs.exists(dir)) {
throw new IOException("Working directory " + dir
+ " already exists. Please remove it first.");
} else if (!fs.mkdirs(dir)) {
throw new IOException("Cannot create working directory " + dir);
}
out.println("Start Digit = " + startDigit);
out.println("Number of Digits = " + nDigits);
out.println("Number of Maps = " + nMaps);
// setup a job
final Job job = createJob(name, conf);
final Path hexfile = new Path(dir, "pi_" + name + ".hex");
FileOutputFormat.setOutputPath(job, new Path(dir, "out"));
// setup custom properties
job.getConfiguration().set(WORKING_DIR_PROPERTY, dir.toString());
job.getConfiguration().set(HEX_FILE_PROPERTY, hexfile.toString());
job.getConfiguration().setInt(DIGIT_START_PROPERTY, startDigit);
job.getConfiguration().setInt(DIGIT_SIZE_PROPERTY, nDigits);
job.getConfiguration().setInt(DIGIT_PARTS_PROPERTY, nMaps);
// start a map/reduce job
out.println("\nStarting Job ...");
final long startTime = System.currentTimeMillis();
try {
if (!job.waitForCompletion(true)) {
out.println("Job failed.");
System.exit(1);
}
} catch (Exception e) {
throw new RuntimeException(e);
} finally {
final double duration = (System.currentTimeMillis() - startTime)/1000.0;
out.println("Duration is " + duration + " seconds.");
}
out.println("Output file: " + hexfile);
}
/**
* Parse arguments and then runs a map/reduce job.
* @return a non-zero value if there is an error. Otherwise, return 0.
*/
public int run(String[] args) throws IOException {
if (args.length != 4) {
System.err.println("Usage: java " + getClass().getName()
+ " ");
ToolRunner.printGenericCommandUsage(System.err);
return -1;
}
final int startDigit = Integer.parseInt(args[0]);
final int nDigits = Integer.parseInt(args[1]);
final int nMaps = Integer.parseInt(args[2]);
final String workingDir = args[3];
if (startDigit <= 0) {
throw new IllegalArgumentException("startDigit = " + startDigit+" <= 0");
} else if (nDigits <= 0) {
throw new IllegalArgumentException("nDigits = " + nDigits + " <= 0");
} else if (nDigits % BBP_HEX_DIGITS != 0) {
throw new IllegalArgumentException("nDigits = " + nDigits
+ " is not a multiple of " + BBP_HEX_DIGITS);
} else if (nDigits - 1L + startDigit > IMPLEMENTATION_LIMIT + BBP_HEX_DIGITS) {
throw new UnsupportedOperationException("nDigits - 1 + startDigit = "
+ (nDigits - 1L + startDigit)
+ " > IMPLEMENTATION_LIMIT + BBP_HEX_DIGITS,"
+ ", where IMPLEMENTATION_LIMIT=" + IMPLEMENTATION_LIMIT
+ "and BBP_HEX_DIGITS=" + BBP_HEX_DIGITS);
} else if (nMaps <= 0) {
throw new IllegalArgumentException("nMaps = " + nMaps + " <= 0");
}
compute(startDigit, nDigits, nMaps, workingDir, getConf(), System.out);
return 0;
}
/** The main method for running it as a stand alone command. */
public static void main(String[] argv) throws Exception {
System.exit(ToolRunner.run(null, new BaileyBorweinPlouffe(), argv));
}
/////////////////////////////////////////////////////////////////////
// static fields and methods for Bailey-Borwein-Plouffe algorithm. //
/////////////////////////////////////////////////////////////////////
/** Limitation of the program.
* The program may return incorrect results if the limit is exceeded.
* The default value is 10^8.
* The program probably can handle some higher values such as 2^28.
*/
private static final long IMPLEMENTATION_LIMIT = 100000000;
private static final long ACCURACY_BIT = 32;
private static final long BBP_HEX_DIGITS = 4;
private static final long BBP_MULTIPLIER = 1 << (4 * BBP_HEX_DIGITS);
/**
* Compute the exact (d+1)th to (d+{@link #BBP_HEX_DIGITS})th
* hex digits of pi.
*/
static long hexDigits(final long d) {
if (d < 0) {
throw new IllegalArgumentException("d = " + d + " < 0");
} else if (d > IMPLEMENTATION_LIMIT) {
throw new IllegalArgumentException("d = " + d
+ " > IMPLEMENTATION_LIMIT = " + IMPLEMENTATION_LIMIT);
}
final double s1 = sum(1, d);
final double s4 = sum(4, d);
final double s5 = sum(5, d);
final double s6 = sum(6, d);
double pi = s1 + s1;
if (pi >= 1)
pi--;
pi *= 2;
if (pi >= 1)
pi--;
pi -= s4;
if (pi < 0)
pi++;
pi -= s4;
if (pi < 0)
pi++;
pi -= s5;
if (pi < 0)
pi++;
pi -= s6;
if (pi < 0)
pi++;
return (long) (pi * BBP_MULTIPLIER);
}
/**
* Approximate the fraction part of
* $16^d \sum_{k=0}^\infty \frac{16^{d-k}}{8k+j}$
* for d > 0 and j = 1, 4, 5, 6.
*/
private static double sum(final long j, final long d) {
long k = j == 1 ? 1 : 0;
double s = 0;
if (k <= d) {
s = 1.0 / ((d << 3) | j);
for (; k < d; k++) {
final long n = (k << 3) | j;
s += mod((d - k) << 2, n) * 1.0 / n;
if (s >= 1)
s--;
}
k++;
}
if (k >= 1L << (ACCURACY_BIT - 7))
return s;
for (;; k++) {
final long n = (k << 3) | j;
final long shift = (k - d) << 2;
if (ACCURACY_BIT <= shift || 1L << (ACCURACY_BIT - shift) < n) {
return s;
}
s += 1.0 / (n << shift);
if (s >= 1)
s--;
}
}
/** Compute $2^e \mod n$ for e > 0, n > 2 */
static long mod(final long e, final long n) {
long mask = (e & 0xFFFFFFFF00000000L) == 0 ? 0x00000000FFFFFFFFL
: 0xFFFFFFFF00000000L;
mask &= (e & 0xFFFF0000FFFF0000L & mask) == 0 ? 0x0000FFFF0000FFFFL
: 0xFFFF0000FFFF0000L;
mask &= (e & 0xFF00FF00FF00FF00L & mask) == 0 ? 0x00FF00FF00FF00FFL
: 0xFF00FF00FF00FF00L;
mask &= (e & 0xF0F0F0F0F0F0F0F0L & mask) == 0 ? 0x0F0F0F0F0F0F0F0FL
: 0xF0F0F0F0F0F0F0F0L;
mask &= (e & 0xCCCCCCCCCCCCCCCCL & mask) == 0 ? 0x3333333333333333L
: 0xCCCCCCCCCCCCCCCCL;
mask &= (e & 0xAAAAAAAAAAAAAAAAL & mask) == 0 ? 0x5555555555555555L
: 0xAAAAAAAAAAAAAAAAL;
long r = 2;
for (mask >>= 1; mask > 0; mask >>= 1) {
r *= r;
r %= n;
if ((e & mask) != 0) {
r += r;
if (r >= n)
r -= n;
}
}
return r;
}
/** Represent a number x in hex for 1 > x >= 0 */
private static class Fraction {
private final int[] integers; // only use 24-bit
private int first = 0; // index to the first non-zero integer
/** Construct a fraction represented by the bytes. */
Fraction(List bytes) {
integers = new int[(bytes.size() + 2) / 3];
for (int i = 0; i < bytes.size(); i++) {
final int b = 0xFF & bytes.get(i);
integers[integers.length - 1 - i / 3] |= b << ((2 - i % 3) << 3);
}
skipZeros();
}
/**
* Compute y = 10*x and then set x to the fraction part of y, where x is the
* fraction represented by this object.
* @return integer part of y
*/
int times10() {
int carry = 0;
for (int i = first; i < integers.length; i++) {
integers[i] <<= 1;
integers[i] += carry + (integers[i] << 2);
carry = integers[i] >> 24;
integers[i] &= 0xFFFFFF;
}
skipZeros();
return carry;
}
private void skipZeros() {
for(; first < integers.length && integers[first] == 0; first++)
;
}
}
/**
* Partition input so that the workload of each part is
* approximately the same.
*/
static int[] partition(final int offset, final int size, final int nParts) {
final int[] parts = new int[nParts];
final long total = workload(offset, size);
final int remainder = offset % 4;
parts[0] = offset;
for (int i = 1; i < nParts; i++) {
final long target = offset + i*(total/nParts) + i*(total%nParts)/nParts;
//search the closest value
int low = parts[i - 1];
int high = offset + size;
for (; high > low + 4;) {
final int mid = (high + low - 2 * remainder) / 8 * 4 + remainder;
final long midvalue = workload(mid);
if (midvalue == target)
high = low = mid;
else if (midvalue > target)
high = mid;
else
low = mid;
}
parts[i] = high == low? high:
workload(high)-target > target-workload(low)?
low: high;
}
return parts;
}
private static final long MAX_N = 4294967295L; // prevent overflow
/** Estimate the workload for input size n (in some unit). */
private static long workload(final long n) {
if (n < 0) {
throw new IllegalArgumentException("n = " + n + " < 0");
} else if (n > MAX_N) {
throw new IllegalArgumentException("n = " + n + " > MAX_N = " + MAX_N);
}
return (n & 1L) == 0L ? (n >> 1) * (n + 1) : n * ((n + 1) >> 1);
}
private static long workload(long offset, long size) {
return workload(offset + size) - workload(offset);
}
}