All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.facebook.hive.orc.OrcSerde Maven / Gradle / Ivy

There is a newer version: 0.18.9
Show newest version
//  Copyright (c) 2013, Facebook, Inc.  All rights reserved.

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.facebook.hive.orc;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Properties;

import org.apache.hadoop.conf.Configuration;
import com.facebook.hive.orc.lazy.OrcLazyRowObjectInspector;
import org.apache.hadoop.hive.serde2.SerDe;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.SerDeStats;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.apache.hadoop.io.Writable;

/**
 * A serde class for ORC.
 * It transparently passes the object to/from the ORC file reader/writer.
 */
public class OrcSerde implements SerDe {
  private static final ArrayList EMPTY_STRING_ARRAYLIST = new ArrayList(0);
  private final OrcSerdeRow row = new OrcSerdeRow();
  private ObjectInspector inspector = null;

  final class OrcSerdeRow implements Writable {
    private Object realRow;
    private ObjectInspector inspector;

    @Override
    public void write(DataOutput dataOutput) throws IOException {
      throw new UnsupportedOperationException("can't write the bundle");
    }

    @Override
    public void readFields(DataInput dataInput) throws IOException {
      throw new UnsupportedOperationException("can't read the bundle");
    }

    ObjectInspector getInspector() {
      return inspector;
    }

    Object getRow() {
      return realRow;
    }
  }

  @Override
  public void initialize(Configuration conf, Properties table) {
    // Read the configuration parameters
    String columnNameProperty = table.getProperty("columns");
    // NOTE: if "columns.types" is missing, all columns will be of String type
    String columnTypeProperty = table.getProperty("columns.types");

    // Parse the configuration parameters
    ArrayList columnNames = EMPTY_STRING_ARRAYLIST;
    if (columnNameProperty != null && columnNameProperty.length() > 0) {
      String[] splits = columnNameProperty.split(",");
      columnNames = new ArrayList(splits.length);

      for(String name: splits) {
        columnNames.add(name);
      }
    }
    if (columnTypeProperty == null) {
      // Default type: all string
      StringBuilder sb = new StringBuilder();
      for (int i = 0; i < columnNames.size(); i++) {
        if (i > 0) {
          sb.append(":");
        }
        sb.append("string");
      }
      columnTypeProperty = sb.toString();
    }

    ArrayList fieldTypes =
      TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
    StructTypeInfo rootType = new StructTypeInfo();
    rootType.setAllStructFieldNames(columnNames);
    rootType.setAllStructFieldTypeInfos(fieldTypes);
    inspector = new OrcLazyRowObjectInspector(rootType);
  }

  @Override
  public Class getSerializedClass() {
    return OrcSerdeRow.class;
  }

  @Override
  public Writable serialize(Object realRow, ObjectInspector inspector) {
    row.realRow = realRow;
    row.inspector = inspector;
    return row;
  }

  @Override
  public Object deserialize(Writable writable) throws SerDeException {
    return writable;
  }

  @Override
  public ObjectInspector getObjectInspector() throws SerDeException {
    return inspector;
  }

  /**
   * Always returns null, since serialized size doesn't make sense in the
   * context of ORC files.
   * @return null
   */
  @Override
  public SerDeStats getSerDeStats() {
    return null;
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy