All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.archive.hadoop.func.JSONViewEvalFunc Maven / Gradle / Ivy

The newest version!
package org.archive.hadoop.func;

import java.io.IOException;
import java.util.ArrayList;
import java.util.logging.Logger;

import org.apache.pig.EvalFunc;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
import org.archive.format.json.JSONUtils;
import org.json.JSONException;
import org.json.JSONObject;

public class JSONViewEvalFunc extends EvalFunc {
	private static final Logger LOG =
		Logger.getLogger(JSONViewEvalFunc.class.getName());

	protected TupleFactory mTupleFactory = TupleFactory.getInstance();
	private ArrayList mProtoTuple = null;

	public JSONViewEvalFunc() {
		mProtoTuple = new ArrayList();
	}
	
	@Override
	public Tuple exec(Tuple tup) throws IOException {
		// [0] is the JSON. Remaining elements are Strings describing paths
		// into the JSON to "flatten" into a single tuple:
		if(tup == null || tup.size() == 0) {
			return null;
		}
		try {
			JSONObject json = new JSONObject(tup.get(0).toString());
			for(int i = 1; i < tup.size(); i++) {
				String path = tup.get(i).toString();
				String result = JSONUtils.extractSingle(json, path);
				mProtoTuple.add(result);
			}
		} catch (JSONException e) {
			LOG.warning("Failed to parse JSON:"+e.getMessage());
			return null;
		}
		Tuple t = mTupleFactory.newTuple(mProtoTuple);
		mProtoTuple.clear();
		return t;
	}
}