parquet.pig.summary.Summary Maven / Gradle / Ivy
/**
* Copyright 2012 Twitter, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package parquet.pig.summary;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.Properties;
import org.apache.pig.Algebraic;
import org.apache.pig.EvalFunc;
import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.data.DataBag;
import org.apache.pig.data.DataType;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
import org.apache.pig.impl.logicalLayer.FrontendException;
import org.apache.pig.impl.logicalLayer.schema.Schema;
import org.apache.pig.impl.util.UDFContext;
import org.apache.pig.impl.util.Utils;
import org.apache.pig.parser.ParserException;
import org.codehaus.jackson.JsonParseException;
import org.codehaus.jackson.map.JsonMappingException;
/**
* computes a summary of the input to a json string
*
* @author Julien Le Dem
*
*/
public class Summary extends EvalFunc implements Algebraic {
private static final TupleFactory TF = TupleFactory.getInstance();
private Schema inputSchema;
private String signature;
public static class Initial extends EvalFunc {
private Schema inputSchema;
@Override
public void setUDFContextSignature(String signature) {
inputSchema = Summary.getInputSchema(signature);
}
@Override
public Tuple exec(Tuple t) throws IOException {
return new JSONTuple(sumUp(inputSchema, t));
}
}
public static class Intermediate extends EvalFunc {
@Override
public Tuple exec(Tuple t) throws IOException {
return new JSONTuple(merge(t));
}
}
public static class Final extends EvalFunc {
@Override
public String exec(Tuple t) throws IOException {
return SummaryData.toPrettyJSON(merge(t));
}
}
private static final class JSONTuple implements Tuple {
private static final long serialVersionUID = 1L;
private TupleSummaryData data;
public JSONTuple(TupleSummaryData data) {
this.data = data;
}
@Override
public void readFields(DataInput dataInput) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public void write(DataOutput dataOutput) throws IOException {
Tuple t = TF.newTuple(json());
t.write(dataOutput);
}
@Override
public int compareTo(Object o) {
throw new UnsupportedOperationException();
}
@Override
public void append(Object o) {
throw new UnsupportedOperationException();
}
@Override
public Object get(int i) throws ExecException {
if (i == 0) {
return json();
}
throw new ExecException();
}
private String json() {
return SummaryData.toJSON(data);
}
@Override
public List