All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.twitter.elephantbird.util.HadoopUtils Maven / Gradle / Ivy

The newest version!
package com.twitter.elephantbird.util;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.util.Iterator;
import java.util.List;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;

import com.google.common.base.Charsets;
import com.google.common.base.Preconditions;
import com.google.common.base.Splitter;
import com.google.common.collect.Lists;
import com.google.common.io.Closeables;

import org.apache.commons.codec.binary.Base64;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.Counter;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.TaskInputOutputContext;
import org.json.simple.JSONArray;
import org.json.simple.JSONValue;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * Various Hadoop specific utilities.
 */
public class HadoopUtils {
  private static final Logger LOG = LoggerFactory.getLogger(HadoopUtils.class);
  private static final Splitter COMMA_SPLITTER = Splitter.on(',');

  private HadoopUtils() { }

  /**
   * MapReduce counters are available only with {@link TaskInputOutputContext},
   * but most interfaces use super classes, though the actual object is a
   * subclass (e.g. Mapper.Context).
   * 
* This utility method checks the type and returns the appropriate counter. * In the rare (may be unexpected) case where ctx is not a * TaskInputOutputContext, a dummy counter is returned after printing * a warning. */ public static Counter getCounter(JobContext ctx, String group, String counter) { if (ctx instanceof TaskInputOutputContext) { Counter c = HadoopCompat.getCounter((TaskInputOutputContext) ctx, group, counter); if (c != null) { return c; } } String name = group + ":" + counter; LOG.warn("Using a dummy counter for " + name + " because it does not already exist."); return HadoopCompat.newGenericCounter(name, name, 0); } /** * @deprecated use {@link #setClassConf(Configuration, String, Class)} */ @Deprecated public static void setInputFormatClass(Configuration conf, String configKey, Class clazz) { setClassConf(conf, configKey, clazz); } /** * A helper to set configuration to class name. * Throws a RuntimeExcpetion if the * configuration is already set to a different class name. */ public static void setClassConf(Configuration conf, String configKey, Class clazz) { String existingClass = conf.get(configKey); String className = clazz.getName(); if (existingClass != null && !existingClass.equals(className)) { throw new RuntimeException( "Already registered a different thriftClass for " + configKey + ". old: " + existingClass + " new: " + className); } else { conf.set(configKey, className); } } /** * Writes an object into a configuration by converting it to a base64 encoded string. * This is done by serializing the object to bytes via an {@link ObjectOutputStream}, * gzip compressing those bytes, and then base64 encoding the compressed bytes. * * NOTE: obj must implement {@link java.io.Serializable} * * @param key for the configuration * @param obj to write (must be Serializable) * @param conf to write to * @throws IOException */ public static void writeObjectToConfAsBase64(String key, Object obj, Configuration conf) throws IOException { ByteArrayOutputStream baos = null; GZIPOutputStream gos = null; ObjectOutputStream oos = null; try { baos = new ByteArrayOutputStream(); gos = new GZIPOutputStream(baos); oos = new ObjectOutputStream(gos); oos.writeObject(obj); } finally { Closeables.close(oos, false); Closeables.close(gos, false); Closeables.close(baos, false); } conf.set(key, new String(Base64.encodeBase64(baos.toByteArray()), Charsets.UTF_8)); } /** * Reads an object (that was written using * {@link #writeObjectToConfAsBase64}) from a configuration. * * @param key for the configuration * @param conf to read from * @return the read object, or null if key is not present in conf * @throws IOException */ @SuppressWarnings("unchecked") public static T readObjectFromConfAsBase64(String key, Configuration conf) throws IOException { String b64 = conf.get(key); if (b64 == null) { return null; } byte[] bytes = Base64.decodeBase64(b64.getBytes(Charsets.UTF_8)); ByteArrayInputStream bais = null; GZIPInputStream gis = null; ObjectInputStream ois = null; try { bais = new ByteArrayInputStream(bytes); gis = new GZIPInputStream(bais); ois = new ObjectInputStream(gis); return (T) ois.readObject(); } catch (ClassNotFoundException e) { LOG.error("Could not read object from config with key " + key, e); throw new IOException(e); } catch (ClassCastException e) { LOG.error("Couldn't cast object read from config with key " + key, e); throw new IOException(e); } finally { Closeables.close(ois, false); Closeables.close(gis, false); Closeables.close(bais, false); } } /** * Writes a list of strings into a configuration by converting it to a json array * * @param key for the configuration * @param list to write * @param conf to write to */ public static void writeStringListToConfAsJson(String key, List list, Configuration conf) { Preconditions.checkNotNull(list); conf.set(key, JSONArray.toJSONString(list)); } /** * Reads a list of strings stored as a json array from a configuration * * @param key for the configuration * @param conf to read from * @return the read list of strings, or null if key is not present in conf */ @SuppressWarnings("unchecked") public static List readStringListFromConfAsJson(String key, Configuration conf) { String json = conf.get(key); if (json == null) { return null; } return Lists.newArrayList(((JSONArray) JSONValue.parse(json))); } /** * Writes a list of strings into a configuration by base64 encoding them and separating * them with commas * * @param key for the configuration * @param list to write * @param conf to write to */ public static void writeStringListToConfAsBase64(String key, List list, Configuration conf) { Preconditions.checkNotNull(list); Iterator iter = list.iterator(); StringBuilder sb = new StringBuilder(); while(iter.hasNext()) { byte[] bytes = Base64.encodeBase64(iter.next().getBytes(Charsets.UTF_8), false); sb.append(new String(bytes, Charsets.UTF_8)); if (iter.hasNext()) { sb.append(','); } } conf.set(key, sb.toString()); } /** * Reads a list of strings stored as comma separated base64 * * @param key for the configuration * @param conf to read from * @return the read list of strings, or null if key is not present in conf */ @SuppressWarnings("unchecked") public static List readStringListFromConfAsBase64(String key, Configuration conf) { String b64List = conf.get(key); if (b64List == null) { return null; } List strings = Lists.newArrayList(); for (String b64 : COMMA_SPLITTER.split(b64List)) { byte[] bytes = Base64.decodeBase64(b64.getBytes(Charsets.UTF_8)); strings.add(new String(bytes, Charsets.UTF_8)); } return strings; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy