org.apache.hadoop.hive.serde2.SerDeUtils Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.serde2;
import java.nio.charset.Charset;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.serde2.AbstractSerDe;
import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveCharObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveIntervalDayTimeObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveIntervalYearMonthObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveVarcharObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampLocalTZObjectInspector;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.Text;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* SerDeUtils.
*
*/
public final class SerDeUtils {
public static final char QUOTE = '"';
public static final char COLON = ':';
public static final char COMMA = ',';
// we should use '\0' for COLUMN_NAME_DELIMITER if column name contains COMMA
// but we should also take care of the backward compatibility
public static final char COLUMN_COMMENTS_DELIMITER = '\0';
public static final String LBRACKET = "[";
public static final String RBRACKET = "]";
public static final String LBRACE = "{";
public static final String RBRACE = "}";
// lower case null is used within json objects
private static final String JSON_NULL = "null";
public static final String LIST_SINK_OUTPUT_FORMATTER = "list.sink.output.formatter";
public static final String LIST_SINK_OUTPUT_PROTOCOL = "list.sink.output.protocol";
public static final Logger LOG = LoggerFactory.getLogger(SerDeUtils.class.getName());
/**
* Escape a String in JSON format.
*/
public static String escapeString(String str) {
int length = str.length();
StringBuilder escape = new StringBuilder(length + 16);
for (int i = 0; i < length; ++i) {
char c = str.charAt(i);
switch (c) {
case '"':
case '\\':
escape.append('\\');
escape.append(c);
break;
case '\b':
escape.append('\\');
escape.append('b');
break;
case '\f':
escape.append('\\');
escape.append('f');
break;
case '\n':
escape.append('\\');
escape.append('n');
break;
case '\r':
escape.append('\\');
escape.append('r');
break;
case '\t':
escape.append('\\');
escape.append('t');
break;
default:
// Control characeters! According to JSON RFC u0020
if (c < ' ') {
String hex = Integer.toHexString(c);
escape.append('\\');
escape.append('u');
for (int j = 4; j > hex.length(); --j) {
escape.append('0');
}
escape.append(hex);
} else {
escape.append(c);
}
break;
}
}
return (escape.toString());
}
public static String lightEscapeString(String str) {
int length = str.length();
StringBuilder escape = new StringBuilder(length + 16);
for (int i = 0; i < length; ++i) {
char c = str.charAt(i);
switch (c) {
case '\n':
escape.append('\\');
escape.append('n');
break;
case '\r':
escape.append('\\');
escape.append('r');
break;
case '\t':
escape.append('\\');
escape.append('t');
break;
default:
escape.append(c);
break;
}
}
return (escape.toString());
}
/**
* Convert a Object to a standard Java object in compliance with JDBC 3.0 (see JDBC 3.0
* Specification, Table B-3: Mapping from JDBC Types to Java Object Types).
*
* This method is kept consistent with HiveResultSetMetaData#hiveTypeToSqlType .
*/
public static Object toThriftPayload(Object val, ObjectInspector valOI, int version) {
if (valOI.getCategory() == ObjectInspector.Category.PRIMITIVE) {
if (val == null) {
return null;
}
Object obj = ObjectInspectorUtils.copyToStandardObject(val, valOI,
ObjectInspectorUtils.ObjectInspectorCopyOption.JAVA);
// uses string type for binary before HIVE_CLI_SERVICE_PROTOCOL_V6
if (version < 5 && ((PrimitiveObjectInspector)valOI).getPrimitiveCategory() ==
PrimitiveObjectInspector.PrimitiveCategory.BINARY) {
// todo HIVE-5269
return new String((byte[])obj);
}
return obj;
}
// for now, expose non-primitive as a string
// TODO: expose non-primitive as a structured object while maintaining JDBC compliance
return SerDeUtils.getJSONString(val, valOI);
}
public static String getJSONString(Object o, ObjectInspector oi) {
return getJSONString(o, oi, JSON_NULL);
}
/**
* Use this if you need to have custom representation of top level null .
* (ie something other than 'null')
* eg, for hive output, we want to to print NULL for a null map object.
* @param o Object
* @param oi ObjectInspector
* @param nullStr The custom string used to represent null value
* @return
*/
public static String getJSONString(Object o, ObjectInspector oi, String nullStr) {
StringBuilder sb = new StringBuilder();
buildJSONString(sb, o, oi, nullStr);
return sb.toString();
}
static void buildJSONString(StringBuilder sb, Object o, ObjectInspector oi, String nullStr) {
switch (oi.getCategory()) {
case PRIMITIVE: {
PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi;
if (o == null) {
sb.append(nullStr);
} else {
switch (poi.getPrimitiveCategory()) {
case BOOLEAN: {
boolean b = ((BooleanObjectInspector) poi).get(o);
sb.append(b ? "true" : "false");
break;
}
case BYTE: {
sb.append(((ByteObjectInspector) poi).get(o));
break;
}
case SHORT: {
sb.append(((ShortObjectInspector) poi).get(o));
break;
}
case INT: {
sb.append(((IntObjectInspector) poi).get(o));
break;
}
case LONG: {
sb.append(((LongObjectInspector) poi).get(o));
break;
}
case FLOAT: {
sb.append(((FloatObjectInspector) poi).get(o));
break;
}
case DOUBLE: {
sb.append(((DoubleObjectInspector) poi).get(o));
break;
}
case STRING: {
sb.append('"');
sb.append(escapeString(((StringObjectInspector) poi)
.getPrimitiveJavaObject(o)));
sb.append('"');
break;
}
case CHAR: {
sb.append('"');
sb.append(escapeString(((HiveCharObjectInspector) poi)
.getPrimitiveJavaObject(o).toString()));
sb.append('"');
break;
}
case VARCHAR: {
sb.append('"');
sb.append(escapeString(((HiveVarcharObjectInspector) poi)
.getPrimitiveJavaObject(o).toString()));
sb.append('"');
break;
}
case DATE: {
sb.append('"');
sb.append(((DateObjectInspector) poi)
.getPrimitiveWritableObject(o));
sb.append('"');
break;
}
case TIMESTAMP: {
sb.append('"');
sb.append(((TimestampObjectInspector) poi)
.getPrimitiveWritableObject(o));
sb.append('"');
break;
}
case TIMESTAMPLOCALTZ: {
sb.append('"');
sb.append(((TimestampLocalTZObjectInspector) poi).getPrimitiveWritableObject(o));
sb.append('"');
break;
}
case BINARY: {
BytesWritable bw = ((BinaryObjectInspector) oi).getPrimitiveWritableObject(o);
Text txt = new Text();
txt.set(bw.getBytes(), 0, bw.getLength());
sb.append(txt.toString());
break;
}
case DECIMAL: {
sb.append(((HiveDecimalObjectInspector) oi).getPrimitiveJavaObject(o));
break;
}
case INTERVAL_YEAR_MONTH: {
sb.append(((HiveIntervalYearMonthObjectInspector) oi).getPrimitiveJavaObject(o));
break;
}
case INTERVAL_DAY_TIME: {
sb.append(((HiveIntervalDayTimeObjectInspector) oi).getPrimitiveJavaObject(o));
break;
}
default:
throw new RuntimeException("Unknown primitive type: "
+ poi.getPrimitiveCategory());
}
}
break;
}
case LIST: {
ListObjectInspector loi = (ListObjectInspector) oi;
ObjectInspector listElementObjectInspector = loi
.getListElementObjectInspector();
List> olist = loi.getList(o);
if (olist == null) {
sb.append(nullStr);
} else {
sb.append(LBRACKET);
for (int i = 0; i < olist.size(); i++) {
if (i > 0) {
sb.append(COMMA);
}
buildJSONString(sb, olist.get(i), listElementObjectInspector, JSON_NULL);
}
sb.append(RBRACKET);
}
break;
}
case MAP: {
MapObjectInspector moi = (MapObjectInspector) oi;
ObjectInspector mapKeyObjectInspector = moi.getMapKeyObjectInspector();
ObjectInspector mapValueObjectInspector = moi
.getMapValueObjectInspector();
Map, ?> omap = moi.getMap(o);
if (omap == null) {
sb.append(nullStr);
} else {
sb.append(LBRACE);
boolean first = true;
for (Object entry : omap.entrySet()) {
if (first) {
first = false;
} else {
sb.append(COMMA);
}
Map.Entry, ?> e = (Map.Entry, ?>) entry;
buildJSONString(sb, e.getKey(), mapKeyObjectInspector, JSON_NULL);
sb.append(COLON);
buildJSONString(sb, e.getValue(), mapValueObjectInspector, JSON_NULL);
}
sb.append(RBRACE);
}
break;
}
case STRUCT: {
StructObjectInspector soi = (StructObjectInspector) oi;
List extends StructField> structFields = soi.getAllStructFieldRefs();
if (o == null) {
sb.append(nullStr);
} else {
sb.append(LBRACE);
for (int i = 0; i < structFields.size(); i++) {
if (i > 0) {
sb.append(COMMA);
}
sb.append(QUOTE);
sb.append(structFields.get(i).getFieldName());
sb.append(QUOTE);
sb.append(COLON);
buildJSONString(sb, soi.getStructFieldData(o, structFields.get(i)),
structFields.get(i).getFieldObjectInspector(), JSON_NULL);
}
sb.append(RBRACE);
}
break;
}
case UNION: {
UnionObjectInspector uoi = (UnionObjectInspector) oi;
if (o == null) {
sb.append(nullStr);
} else {
sb.append(LBRACE);
sb.append(uoi.getTag(o));
sb.append(COLON);
buildJSONString(sb, uoi.getField(o),
uoi.getObjectInspectors().get(uoi.getTag(o)), JSON_NULL);
sb.append(RBRACE);
}
break;
}
default:
throw new RuntimeException("Unknown type in ObjectInspector!");
}
}
/**
* return false though element is null if nullsafe flag is true for that
*/
public static boolean hasAnyNullObject(List o, StructObjectInspector loi,
boolean[] nullSafes) {
List extends StructField> fields = loi.getAllStructFieldRefs();
for (int i = 0; i < o.size();i++) {
if ((nullSafes == null || !nullSafes[i])
&& hasAnyNullObject(o.get(i), fields.get(i).getFieldObjectInspector())) {
return true;
}
}
return false;
}
/**
* True if Object passed is representing null object.
*
* @param o The object
* @param oi The ObjectInspector
*
* @return true if the object passed is representing NULL object
* false otherwise
*/
public static boolean hasAnyNullObject(Object o, ObjectInspector oi) {
switch (oi.getCategory()) {
case PRIMITIVE: {
if (o == null) {
return true;
}
return false;
}
case LIST: {
ListObjectInspector loi = (ListObjectInspector) oi;
ObjectInspector listElementObjectInspector = loi
.getListElementObjectInspector();
List> olist = loi.getList(o);
if (olist == null) {
return true;
} else {
// there are no elements in the list
if (olist.size() == 0) {
return false;
}
// if all the elements are representing null, then return true
for (int i = 0; i < olist.size(); i++) {
if (hasAnyNullObject(olist.get(i), listElementObjectInspector)) {
return true;
}
}
return false;
}
}
case MAP: {
MapObjectInspector moi = (MapObjectInspector) oi;
ObjectInspector mapKeyObjectInspector = moi.getMapKeyObjectInspector();
ObjectInspector mapValueObjectInspector = moi
.getMapValueObjectInspector();
Map, ?> omap = moi.getMap(o);
if (omap == null) {
return true;
} else {
// there are no elements in the map
if (omap.entrySet().size() == 0) {
return false;
}
// if all the entries of map are representing null, then return true
for (Map.Entry, ?> entry : omap.entrySet()) {
if (hasAnyNullObject(entry.getKey(), mapKeyObjectInspector)
|| hasAnyNullObject(entry.getValue(), mapValueObjectInspector)) {
return true;
}
}
return false;
}
}
case STRUCT: {
StructObjectInspector soi = (StructObjectInspector) oi;
List extends StructField> structFields = soi.getAllStructFieldRefs();
if (o == null) {
return true;
} else {
// there are no fields in the struct
if (structFields.size() == 0) {
return false;
}
// if any the fields of struct are representing null, then return true
for (int i = 0; i < structFields.size(); i++) {
if (hasAnyNullObject(soi.getStructFieldData(o, structFields.get(i)),
structFields.get(i).getFieldObjectInspector())) {
return true;
}
}
return false;
}
}
case UNION: {
UnionObjectInspector uoi = (UnionObjectInspector) oi;
if (o == null) {
return true;
} else {
// there are no elements in the union
if (uoi.getObjectInspectors().size() == 0) {
return false;
}
return hasAnyNullObject(uoi.getField(o),
uoi.getObjectInspectors().get(uoi.getTag(o)));
}
}
default:
throw new RuntimeException("Unknown type in ObjectInspector!");
}
}
/**
* Returns the union of table and partition properties,
* with partition properties taking precedence.
* @param tblProps
* @param partProps
* @return the overlayed properties
*/
public static Properties createOverlayedProperties(Properties tblProps, Properties partProps) {
Properties props = new Properties();
props.putAll(tblProps);
if (partProps != null) {
props.putAll(partProps);
}
return props;
}
/**
* Initializes a SerDe.
* @param deserializer
* @param conf
* @param tblProps
* @param partProps
* @throws SerDeException
*/
public static void initializeSerDe(Deserializer deserializer, Configuration conf,
Properties tblProps, Properties partProps)
throws SerDeException {
if (deserializer instanceof AbstractSerDe) {
((AbstractSerDe) deserializer).initialize(conf, tblProps, partProps);
String msg = ((AbstractSerDe) deserializer).getConfigurationErrors();
if (msg != null && !msg.isEmpty()) {
throw new SerDeException(msg);
}
} else {
deserializer.initialize(conf, createOverlayedProperties(tblProps, partProps));
}
}
/**
* Initializes a SerDe.
* @param deserializer
* @param conf
* @param tblProps
* @param partProps
* @throws SerDeException
*/
public static void initializeSerDeWithoutErrorCheck(Deserializer deserializer,
Configuration conf, Properties tblProps,
Properties partProps) throws SerDeException {
if (deserializer instanceof AbstractSerDe) {
((AbstractSerDe) deserializer).initialize(conf, tblProps, partProps);
} else {
deserializer.initialize(conf, createOverlayedProperties(tblProps, partProps));
}
}
private SerDeUtils() {
// prevent instantiation
}
public static Text transformTextToUTF8(Text text, Charset previousCharset) {
return new Text(new String(text.getBytes(), 0, text.getLength(), previousCharset));
}
public static Text transformTextFromUTF8(Text text, Charset targetCharset) {
return new Text(new String(text.getBytes(), 0, text.getLength()).getBytes(targetCharset));
}
public static void writeLong(byte[] writeBuffer, int offset, long value) {
writeBuffer[offset] = (byte) ((value >> 0) & 0xff);
writeBuffer[offset + 1] = (byte) ((value >> 8) & 0xff);
writeBuffer[offset + 2] = (byte) ((value >> 16) & 0xff);
writeBuffer[offset + 3] = (byte) ((value >> 24) & 0xff);
writeBuffer[offset + 4] = (byte) ((value >> 32) & 0xff);
writeBuffer[offset + 5] = (byte) ((value >> 40) & 0xff);
writeBuffer[offset + 6] = (byte) ((value >> 48) & 0xff);
writeBuffer[offset + 7] = (byte) ((value >> 56) & 0xff);
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy