com.proofpoint.hive.serde.JsonSerde Maven / Gradle / Ivy
The newest version!
/*
* Copyright 2011 Proofpoint, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.proofpoint.hive.serde;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.serde.Constants;
import org.apache.hadoop.hive.serde2.SerDe;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.io.BinaryComparable;
import org.apache.hadoop.io.Writable;
import org.codehaus.jackson.JsonFactory;
import org.codehaus.jackson.JsonNode;
import org.codehaus.jackson.JsonParser;
import org.codehaus.jackson.map.ObjectMapper;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import static java.lang.String.format;
import static java.util.Arrays.asList;
import static org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
import static org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
import static org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.getStructTypeInfo;
import static org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo;
import static org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfosFromTypeString;
public class JsonSerde
implements SerDe
{
private final JsonFactory jsonFactory = new ObjectMapper().getJsonFactory();
private ObjectInspector rowObjectInspector;
protected StructTypeInfo rootTypeInfo;
protected ColumnNameMap columnNameMap;
private boolean ignoreErrors;
@Override
public void initialize(Configuration configuration, Properties table)
throws SerDeException
{
String columnNamesProperty = table.getProperty(Constants.LIST_COLUMNS);
if ((columnNamesProperty == null) || columnNamesProperty.isEmpty()) {
throw new SerDeException("table has no columns");
}
String columnTypesProperty = table.getProperty(Constants.LIST_COLUMN_TYPES);
if ((columnTypesProperty == null) || columnTypesProperty.isEmpty()) {
throw new SerDeException("table has no column types");
}
List columnNames = asList(columnNamesProperty.split(","));
List columnTypes = getTypeInfosFromTypeString(columnTypesProperty);
if (columnNames.size() != columnTypes.size()) {
throw new SerDeException(format("columns size (%s) does not match column types size (%s)", columnNames.size(), columnTypes.size()));
}
rootTypeInfo = (StructTypeInfo) getStructTypeInfo(columnNames, columnTypes);
rowObjectInspector = getStandardJavaObjectInspectorFromTypeInfo(rootTypeInfo);
columnNameMap = new ColumnNameMap(rootTypeInfo);
ignoreErrors = Boolean.parseBoolean(table.getProperty("errors.ignore"));
}
@Override
public Class extends Writable> getSerializedClass()
{
throw new UnsupportedOperationException("serialization not supported");
}
@Override
public Writable serialize(Object o, ObjectInspector objectInspector)
throws SerDeException
{
throw new UnsupportedOperationException("serialization not supported");
}
@Override
public Object deserialize(Writable writable)
throws SerDeException
{
if (!(writable instanceof BinaryComparable)) {
throw new SerDeException("expected BinaryComparable: " + writable.getClass().getName());
}
try {
return doDeserialize((BinaryComparable) writable);
}
catch (SerDeException e) {
if (ignoreErrors) {
return null;
}
throw e;
}
}
private Object doDeserialize(BinaryComparable binary)
throws SerDeException
{
try {
JsonParser jsonParser = jsonFactory.createJsonParser(binary.getBytes(), 0, binary.getLength());
return buildStruct(jsonParser.readValueAsTree());
}
catch (IOException e) {
throw new SerDeException("error parsing JSON", e);
}
}
@Override
public ObjectInspector getObjectInspector()
throws SerDeException
{
return rowObjectInspector;
}
protected Object buildStruct(JsonNode tree)
throws SerDeException
{
return processFields(tree);
}
protected Object[] processFields(JsonNode tree)
throws SerDeException
{
return getStructNodeValue(null, tree, rootTypeInfo);
}
private Object getNodeValue(String columnName, JsonNode node, TypeInfo typeInfo)
throws SerDeException
{
if (node.isNull()) {
return null;
}
switch (typeInfo.getCategory()) {
case LIST:
return getListNodeValue(columnName, node, (ListTypeInfo) typeInfo);
case MAP:
return getMapNodeValue(columnName, node, (MapTypeInfo) typeInfo);
case PRIMITIVE:
return getPrimitiveNodeValue(columnName, node, (PrimitiveTypeInfo) typeInfo);
case STRUCT:
return getStructNodeValue(columnName, node, (StructTypeInfo) typeInfo);
default:
throw new SerDeException(format("unexpected type category (%s) for column: %s", typeInfo.getCategory(), columnName));
}
}
private Object getListNodeValue(String columnName, JsonNode node, ListTypeInfo typeInfo)
throws SerDeException
{
if (!node.isArray()) {
throw new SerDeException(format("expected list, found %s for column %s", node.getClass().getSimpleName(), columnName));
}
List