All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hive.serde2.SerDeUtils Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hive.serde2;

import java.nio.charset.Charset;
import java.util.List;
import java.util.Map;
import java.util.Properties;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.serde2.AbstractSerDe;
import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveCharObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveIntervalDayTimeObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveIntervalYearMonthObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveVarcharObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampLocalTZObjectInspector;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.Text;
import com.facebook.presto.hive.$internal.org.slf4j.Logger;
import com.facebook.presto.hive.$internal.org.slf4j.LoggerFactory;

/**
 * SerDeUtils.
 *
 */
public final class SerDeUtils {

  public static final char QUOTE = '"';
  public static final char COLON = ':';
  public static final char COMMA = ',';
  // we should use '\0' for COLUMN_NAME_DELIMITER if column name contains COMMA
  // but we should also take care of the backward compatibility
  public static final char COLUMN_COMMENTS_DELIMITER = '\0';
  public static final String LBRACKET = "[";
  public static final String RBRACKET = "]";
  public static final String LBRACE = "{";
  public static final String RBRACE = "}";

  // lower case null is used within json objects
  private static final String JSON_NULL = "null";
  public static final String LIST_SINK_OUTPUT_FORMATTER = "list.sink.output.formatter";
  public static final String LIST_SINK_OUTPUT_PROTOCOL = "list.sink.output.protocol";
  public static final Logger LOG = LoggerFactory.getLogger(SerDeUtils.class.getName());

  /**
   * Escape a String in JSON format.
   */
  public static String escapeString(String str) {
    int length = str.length();
    StringBuilder escape = new StringBuilder(length + 16);

    for (int i = 0; i < length; ++i) {
      char c = str.charAt(i);
      switch (c) {
      case '"':
      case '\\':
        escape.append('\\');
        escape.append(c);
        break;
      case '\b':
        escape.append('\\');
        escape.append('b');
        break;
      case '\f':
        escape.append('\\');
        escape.append('f');
        break;
      case '\n':
        escape.append('\\');
        escape.append('n');
        break;
      case '\r':
        escape.append('\\');
        escape.append('r');
        break;
      case '\t':
        escape.append('\\');
        escape.append('t');
        break;
      default:
        // Control characeters! According to JSON RFC u0020
        if (c < ' ') {
          String hex = Integer.toHexString(c);
          escape.append('\\');
          escape.append('u');
          for (int j = 4; j > hex.length(); --j) {
            escape.append('0');
          }
          escape.append(hex);
        } else {
          escape.append(c);
        }
        break;
      }
    }
    return (escape.toString());
  }

  public static String lightEscapeString(String str) {
    int length = str.length();
    StringBuilder escape = new StringBuilder(length + 16);

    for (int i = 0; i < length; ++i) {
      char c = str.charAt(i);
      switch (c) {
      case '\n':
        escape.append('\\');
        escape.append('n');
        break;
      case '\r':
        escape.append('\\');
        escape.append('r');
        break;
      case '\t':
        escape.append('\\');
        escape.append('t');
        break;
      default:
        escape.append(c);
        break;
      }
    }
    return (escape.toString());
  }

  /**
   * Convert a Object to a standard Java object in compliance with JDBC 3.0 (see JDBC 3.0
   * Specification, Table B-3: Mapping from JDBC Types to Java Object Types).
   *
   * This method is kept consistent with HiveResultSetMetaData#hiveTypeToSqlType .
   */
  public static Object toThriftPayload(Object val, ObjectInspector valOI, int version) {
    if (valOI.getCategory() == ObjectInspector.Category.PRIMITIVE) {
      if (val == null) {
        return null;
      }
      Object obj = ObjectInspectorUtils.copyToStandardObject(val, valOI,
          ObjectInspectorUtils.ObjectInspectorCopyOption.JAVA);
      // uses string type for binary before HIVE_CLI_SERVICE_PROTOCOL_V6
      if (version < 5 && ((PrimitiveObjectInspector)valOI).getPrimitiveCategory() ==
          PrimitiveObjectInspector.PrimitiveCategory.BINARY) {
        // todo HIVE-5269
        return new String((byte[])obj);
      }
      return obj;
    }
    // for now, expose non-primitive as a string
    // TODO: expose non-primitive as a structured object while maintaining JDBC compliance
    return SerDeUtils.getJSONString(val, valOI);
  }

  public static String getJSONString(Object o, ObjectInspector oi) {
    return getJSONString(o, oi, JSON_NULL);
  }

  /**
   * Use this if you need to have custom representation of top level null .
   * (ie something other than 'null')
   * eg, for hive output, we want to to print NULL for a null map object.
   * @param o Object
   * @param oi ObjectInspector
   * @param nullStr The custom string used to represent null value
   * @return
   */
  public static String getJSONString(Object o, ObjectInspector oi, String nullStr) {
    StringBuilder sb = new StringBuilder();
    buildJSONString(sb, o, oi, nullStr);
    return sb.toString();
  }


  static void buildJSONString(StringBuilder sb, Object o, ObjectInspector oi, String nullStr) {
    switch (oi.getCategory()) {
    case PRIMITIVE: {
      PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi;
      if (o == null) {
        sb.append(nullStr);
      } else {
        switch (poi.getPrimitiveCategory()) {
        case BOOLEAN: {
          boolean b = ((BooleanObjectInspector) poi).get(o);
          sb.append(b ? "true" : "false");
          break;
        }
        case BYTE: {
          sb.append(((ByteObjectInspector) poi).get(o));
          break;
        }
        case SHORT: {
          sb.append(((ShortObjectInspector) poi).get(o));
          break;
        }
        case INT: {
          sb.append(((IntObjectInspector) poi).get(o));
          break;
        }
        case LONG: {
          sb.append(((LongObjectInspector) poi).get(o));
          break;
        }
        case FLOAT: {
          sb.append(((FloatObjectInspector) poi).get(o));
          break;
        }
        case DOUBLE: {
          sb.append(((DoubleObjectInspector) poi).get(o));
          break;
        }
        case STRING: {
          sb.append('"');
          sb.append(escapeString(((StringObjectInspector) poi)
              .getPrimitiveJavaObject(o)));
          sb.append('"');
          break;
        }
        case CHAR: {
          sb.append('"');
          sb.append(escapeString(((HiveCharObjectInspector) poi)
              .getPrimitiveJavaObject(o).toString()));
          sb.append('"');
          break;
        }
        case VARCHAR: {
          sb.append('"');
          sb.append(escapeString(((HiveVarcharObjectInspector) poi)
              .getPrimitiveJavaObject(o).toString()));
          sb.append('"');
          break;
        }
        case DATE: {
          sb.append('"');
          sb.append(((DateObjectInspector) poi)
              .getPrimitiveWritableObject(o));
          sb.append('"');
          break;
        }
        case TIMESTAMP: {
          sb.append('"');
          sb.append(((TimestampObjectInspector) poi)
              .getPrimitiveWritableObject(o));
          sb.append('"');
          break;
        }
        case TIMESTAMPLOCALTZ: {
          sb.append('"');
          sb.append(((TimestampLocalTZObjectInspector) poi).getPrimitiveWritableObject(o));
          sb.append('"');
          break;
        }
        case BINARY: {
          BytesWritable bw = ((BinaryObjectInspector) oi).getPrimitiveWritableObject(o);
          Text txt = new Text();
          txt.set(bw.getBytes(), 0, bw.getLength());
          sb.append(txt.toString());
          break;
        }
        case DECIMAL: {
          sb.append(((HiveDecimalObjectInspector) oi).getPrimitiveJavaObject(o));
          break;
        }
        case INTERVAL_YEAR_MONTH: {
          sb.append(((HiveIntervalYearMonthObjectInspector) oi).getPrimitiveJavaObject(o));
          break;
        }
        case INTERVAL_DAY_TIME: {
          sb.append(((HiveIntervalDayTimeObjectInspector) oi).getPrimitiveJavaObject(o));
          break;
        }

        default:
          throw new RuntimeException("Unknown primitive type: "
              + poi.getPrimitiveCategory());
        }
      }
      break;
    }
    case LIST: {
      ListObjectInspector loi = (ListObjectInspector) oi;
      ObjectInspector listElementObjectInspector = loi
          .getListElementObjectInspector();
      List olist = loi.getList(o);
      if (olist == null) {
        sb.append(nullStr);
      } else {
        sb.append(LBRACKET);
        for (int i = 0; i < olist.size(); i++) {
          if (i > 0) {
            sb.append(COMMA);
          }
          buildJSONString(sb, olist.get(i), listElementObjectInspector, JSON_NULL);
        }
        sb.append(RBRACKET);
      }
      break;
    }
    case MAP: {
      MapObjectInspector moi = (MapObjectInspector) oi;
      ObjectInspector mapKeyObjectInspector = moi.getMapKeyObjectInspector();
      ObjectInspector mapValueObjectInspector = moi
          .getMapValueObjectInspector();
      Map omap = moi.getMap(o);
      if (omap == null) {
        sb.append(nullStr);
      } else {
        sb.append(LBRACE);
        boolean first = true;
        for (Object entry : omap.entrySet()) {
          if (first) {
            first = false;
          } else {
            sb.append(COMMA);
          }
          Map.Entry e = (Map.Entry) entry;
          buildJSONString(sb, e.getKey(), mapKeyObjectInspector, JSON_NULL);
          sb.append(COLON);
          buildJSONString(sb, e.getValue(), mapValueObjectInspector, JSON_NULL);
        }
        sb.append(RBRACE);
      }
      break;
    }
    case STRUCT: {
      StructObjectInspector soi = (StructObjectInspector) oi;
      List structFields = soi.getAllStructFieldRefs();
      if (o == null) {
        sb.append(nullStr);
      } else {
        sb.append(LBRACE);
        for (int i = 0; i < structFields.size(); i++) {
          if (i > 0) {
            sb.append(COMMA);
          }
          sb.append(QUOTE);
          sb.append(structFields.get(i).getFieldName());
          sb.append(QUOTE);
          sb.append(COLON);
          buildJSONString(sb, soi.getStructFieldData(o, structFields.get(i)),
              structFields.get(i).getFieldObjectInspector(), JSON_NULL);
        }
        sb.append(RBRACE);
      }
      break;
    }
    case UNION: {
      UnionObjectInspector uoi = (UnionObjectInspector) oi;
      if (o == null) {
        sb.append(nullStr);
      } else {
        sb.append(LBRACE);
        sb.append(uoi.getTag(o));
        sb.append(COLON);
        buildJSONString(sb, uoi.getField(o),
              uoi.getObjectInspectors().get(uoi.getTag(o)), JSON_NULL);
        sb.append(RBRACE);
      }
      break;
    }
    default:
      throw new RuntimeException("Unknown type in ObjectInspector!");
    }
  }

  /**
   * return false though element is null if nullsafe flag is true for that
   */
  public static boolean hasAnyNullObject(List o, StructObjectInspector loi,
      boolean[] nullSafes) {
    List fields = loi.getAllStructFieldRefs();
    for (int i = 0; i < o.size();i++) {
      if ((nullSafes == null || !nullSafes[i])
          && hasAnyNullObject(o.get(i), fields.get(i).getFieldObjectInspector())) {
        return true;
      }
    }
    return false;
  }
  /**
   * True if Object passed is representing null object.
   *
   * @param o The object
   * @param oi The ObjectInspector
   *
   * @return true if the object passed is representing NULL object
   *         false otherwise
   */
  public static boolean hasAnyNullObject(Object o, ObjectInspector oi) {
    switch (oi.getCategory()) {
    case PRIMITIVE: {
      if (o == null) {
        return true;
      }
      return false;
    }
    case LIST: {
      ListObjectInspector loi = (ListObjectInspector) oi;
      ObjectInspector listElementObjectInspector = loi
          .getListElementObjectInspector();
      List olist = loi.getList(o);
      if (olist == null) {
        return true;
      } else {
        // there are no elements in the list
        if (olist.size() == 0) {
          return false;
        }
        // if all the elements are representing null, then return true
        for (int i = 0; i < olist.size(); i++) {
          if (hasAnyNullObject(olist.get(i), listElementObjectInspector)) {
            return true;
          }
        }
        return false;
      }
    }
    case MAP: {
      MapObjectInspector moi = (MapObjectInspector) oi;
      ObjectInspector mapKeyObjectInspector = moi.getMapKeyObjectInspector();
      ObjectInspector mapValueObjectInspector = moi
          .getMapValueObjectInspector();
      Map omap = moi.getMap(o);
      if (omap == null) {
        return true;
      } else {
        // there are no elements in the map
        if (omap.entrySet().size() == 0) {
          return false;
        }
        // if all the entries of map are representing null, then return true
        for (Map.Entry entry : omap.entrySet()) {
          if (hasAnyNullObject(entry.getKey(), mapKeyObjectInspector)
              || hasAnyNullObject(entry.getValue(), mapValueObjectInspector)) {
            return true;
          }
        }
        return false;
      }
    }
    case STRUCT: {
      StructObjectInspector soi = (StructObjectInspector) oi;
      List structFields = soi.getAllStructFieldRefs();
      if (o == null) {
        return true;
      } else {
        // there are no fields in the struct
        if (structFields.size() == 0) {
          return false;
        }
        // if any the fields of struct are representing null, then return true
        for (int i = 0; i < structFields.size(); i++) {
          if (hasAnyNullObject(soi.getStructFieldData(o, structFields.get(i)),
              structFields.get(i).getFieldObjectInspector())) {
            return true;
          }
        }
        return false;
      }
    }
    case UNION: {
      UnionObjectInspector uoi = (UnionObjectInspector) oi;
      if (o == null) {
        return true;
      } else {
        // there are no elements in the union
        if (uoi.getObjectInspectors().size() == 0) {
          return false;
        }
        return hasAnyNullObject(uoi.getField(o),
            uoi.getObjectInspectors().get(uoi.getTag(o)));
      }
    }
    default:
      throw new RuntimeException("Unknown type in ObjectInspector!");
    }
  }

  /**
   * Returns the union of table and partition properties,
   * with partition properties taking precedence.
   * @param tblProps
   * @param partProps
   * @return the overlayed properties
   */
  public static Properties createOverlayedProperties(Properties tblProps, Properties partProps) {
    Properties props = new Properties();
    props.putAll(tblProps);
    if (partProps != null) {
      props.putAll(partProps);
    }
    return props;
  }

  /**
   * Initializes a SerDe.
   * @param deserializer
   * @param conf
   * @param tblProps
   * @param partProps
   * @throws SerDeException
   */
  public static void initializeSerDe(Deserializer deserializer, Configuration conf,
                                            Properties tblProps, Properties partProps)
                                                throws SerDeException {
    if (deserializer instanceof AbstractSerDe) {
      ((AbstractSerDe) deserializer).initialize(conf, tblProps, partProps);
      String msg = ((AbstractSerDe) deserializer).getConfigurationErrors();
      if (msg != null && !msg.isEmpty()) {
        throw new SerDeException(msg);
      }
    } else {
      deserializer.initialize(conf, createOverlayedProperties(tblProps, partProps));
    }
  }

  /**
   * Initializes a SerDe.
   * @param deserializer
   * @param conf
   * @param tblProps
   * @param partProps
   * @throws SerDeException
   */
  public static void initializeSerDeWithoutErrorCheck(Deserializer deserializer,
                                                      Configuration conf, Properties tblProps,
                                                      Properties partProps) throws SerDeException {
    if (deserializer instanceof AbstractSerDe) {
      ((AbstractSerDe) deserializer).initialize(conf, tblProps, partProps);
    } else {
      deserializer.initialize(conf, createOverlayedProperties(tblProps, partProps));
    }
  }

  private SerDeUtils() {
    // prevent instantiation
  }

  public static Text transformTextToUTF8(Text text, Charset previousCharset) {
    return new Text(new String(text.getBytes(), 0, text.getLength(), previousCharset));
  }

  public static Text transformTextFromUTF8(Text text, Charset targetCharset) {
    return new Text(new String(text.getBytes(), 0, text.getLength()).getBytes(targetCharset));
  }

  public static void writeLong(byte[] writeBuffer, int offset, long value) {
    writeBuffer[offset] = (byte) ((value >> 0)  & 0xff);
    writeBuffer[offset + 1] = (byte) ((value >> 8)  & 0xff);
    writeBuffer[offset + 2] = (byte) ((value >> 16) & 0xff);
    writeBuffer[offset + 3] = (byte) ((value >> 24) & 0xff);
    writeBuffer[offset + 4] = (byte) ((value >> 32) & 0xff);
    writeBuffer[offset + 5] = (byte) ((value >> 40) & 0xff);
    writeBuffer[offset + 6] = (byte) ((value >> 48) & 0xff);
    writeBuffer[offset + 7] = (byte) ((value >> 56) & 0xff);
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy