All Downloads are FREE. Search and download functionalities are using the official Maven repository.

datafu.hourglass.mapreduce.DistributedCacheHelper Maven / Gradle / Ivy

/**
* Copyright 2013 LinkedIn, Inc
* 
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
* 
* http://www.apache.org/licenses/LICENSE-2.0
* 
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/

package datafu.hourglass.mapreduce;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

/**
 * Methods for working with the Hadoop distributed cache.
 * 
 * @author "Matthew Hayes"
 *
 */
public class DistributedCacheHelper 
{
  /**
   * Deserializes an object from a path in HDFS.
   * 
   * @param conf Hadoop configuration
   * @param path Path to deserialize from
   * @return Deserialized object
   * @throws IOException
   */
  public static Object readObject(Configuration conf, org.apache.hadoop.fs.Path path) throws IOException
  {
    String localPath = null;
    Path[] localCacheFiles = DistributedCache.getLocalCacheFiles(conf);
    for (Path localCacheFile : localCacheFiles)
    {
      if (localCacheFile.toString().endsWith(path.toString()))
      {
        localPath = localCacheFile.toString();
        break;
      }
    }
    if (localPath == null)
    {
      throw new RuntimeException("Could not find " + path + " in local cache");
    }
    FileInputStream inputStream = new FileInputStream(new File(localPath));
    ObjectInputStream objStream = new ObjectInputStream(inputStream);
    
    try
    {
      try {
        return objStream.readObject();
      } catch (ClassNotFoundException e) {
        throw new RuntimeException(e);
      }
    }
    finally
    {
      objStream.close();
      inputStream.close();
    }
  }
  
  /**
   * Serializes an object to a path in HDFS and adds the file to the distributed cache.
   * 
   * @param conf Hadoop configuration
   * @param obj Object to serialize
   * @param path Path to serialize object to
   * @throws IOException
   */
  public static void writeObject(Configuration conf, Object obj, org.apache.hadoop.fs.Path path) throws IOException
  {
    FileSystem fs = FileSystem.get(conf);
    FSDataOutputStream outputStream = fs.create(path, true);
    ObjectOutputStream objStream = new ObjectOutputStream(outputStream);
    objStream.writeObject(obj);
    objStream.close();
    outputStream.close();
    DistributedCache.addCacheFile(path.toUri(), conf);
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy