All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.twitter.elephantbird.pig.store.LzoJsonStorage Maven / Gradle / Ivy

There is a newer version: 4.17
Show newest version
package com.twitter.elephantbird.pig.store;

import java.io.IOException;
import java.util.Map;
import java.util.Set;

import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.OutputFormat;
import org.apache.pig.data.Tuple;
import org.json.simple.JSONObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.common.base.Joiner;
import com.google.common.collect.Sets;
import com.twitter.elephantbird.mapreduce.output.LzoTextOutputFormat;

/**
 * A storage class to store the ouput of each tuple in a delimited file
 * like PigStorage, but LZO compressed.
 */
public class LzoJsonStorage extends BaseStoreFunc {
  private static final Logger LOG = LoggerFactory.getLogger(LzoJsonStorage.class);

  // If null, keep all keys.
  private final Set keysToKeep_;
  private final JSONObject json = new JSONObject();

  public LzoJsonStorage() {
    keysToKeep_ = null;
    LOG.info("Initialized LzoJsonStorage. Keeping all keys.");
  }

  public LzoJsonStorage(String...keysToKeep) {
    keysToKeep_ = Sets.newHashSet(keysToKeep);
    LOG.info("Initialized LzoJsonStorage. Keeping keys " + Joiner.on(", ").join(keysToKeep_) + ".");
  }


  /**
   * The first element is expected to be a map, or null. Anything else causes an error.
   * @param tuple the tuple to write.
   */
  @Override
  @SuppressWarnings("unchecked")
  public void putNext(Tuple tuple) throws IOException {
    json.clear();
    if (tuple != null && tuple.size() >= 1) {
      Map map = (Map) tuple.get(0);
      if (keysToKeep_ == null) {
        json.putAll(map);
      } else {
        for (Map.Entry entry : map.entrySet()) {
          if (keysToKeep_.contains(entry.getKey())) {
            json.put(entry.getKey(), entry.getValue());
          }
        }
      }
    }
    try {
      writer.write(null, new Text(json.toString()));
    } catch (InterruptedException e) {
      // Under what circumstances does this happen?
      throw new IOException(e);
    }
  }

  @Override
  public OutputFormat getOutputFormat() {
    return new LzoTextOutputFormat();
  }


}





© 2015 - 2024 Weber Informatics LLC | Privacy Policy