org.apache.hadoop.hive.ql.metadata.VirtualColumn Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of hive-exec
There is a newer version: 4.0.1
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hive.ql.metadata;

import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.ListIterator;
import java.util.Map;

import com.google.common.collect.ImmutableSet;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Iterables;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.common.classification.InterfaceAudience;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.exec.ColumnInfo;
import org.apache.hadoop.hive.ql.io.RecordIdentifier;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;

@InterfaceAudience.Private
public enum VirtualColumn {
  FILENAME("INPUT__FILE__NAME", TypeInfoFactory.stringTypeInfo),
  BLOCKOFFSET("BLOCK__OFFSET__INSIDE__FILE", TypeInfoFactory.longTypeInfo),
  ROWOFFSET("ROW__OFFSET__INSIDE__BLOCK", TypeInfoFactory.longTypeInfo),

  RAWDATASIZE("RAW__DATA__SIZE", TypeInfoFactory.longTypeInfo),
  /**
   * {@link org.apache.hadoop.hive.ql.io.RecordIdentifier}
   */
  ROWID("ROW__ID", RecordIdentifier.StructInfo.typeInfo, true, RecordIdentifier.StructInfo.oi),
  ROWISDELETED("ROW__IS__DELETED", TypeInfoFactory.booleanTypeInfo),
  PARTITION_SPEC_ID("PARTITION__SPEC__ID", TypeInfoFactory.intTypeInfo),
  PARTITION_HASH("PARTITION__HASH", TypeInfoFactory.longTypeInfo),
  FILE_PATH("FILE__PATH", TypeInfoFactory.stringTypeInfo),
  ROW_POSITION("ROW__POSITION", TypeInfoFactory.longTypeInfo),
  SNAPSHOT_ID("SNAPSHOT__ID", TypeInfoFactory.longTypeInfo),

  /**
   * GROUPINGID is used with GROUP BY GROUPINGS SETS, ROLLUP and CUBE.
   * It composes a bit vector with the "0" and "1" values for every
   * column which is GROUP BY section. "1" is for a row in the result
   * set if that column has been aggregated in that row. Otherwise the
   * value is "0".  Returns the decimal representation of the bit vector.
   */
  GROUPINGID("GROUPING__ID", TypeInfoFactory.longTypeInfo);

  public static final ImmutableSet VIRTUAL_COLUMN_NAMES =
      ImmutableSet.of(FILENAME.getName(), BLOCKOFFSET.getName(), ROWOFFSET.getName(),
          RAWDATASIZE.getName(), GROUPINGID.getName(), ROWID.getName(), ROWISDELETED.getName(),
          PARTITION_SPEC_ID.getName(), PARTITION_HASH.getName(), FILE_PATH.getName(), ROW_POSITION.getName());

  public static final ImmutableMap VIRTUAL_COLUMN_NAME_MAP =
       new ImmutableMap.Builder().putAll(getColumnNameMap()).build();

  private static Map getColumnNameMap() {
    Map map = new HashMap();
    for (VirtualColumn virtualColumn : values()) {
      map.put(virtualColumn.name, virtualColumn);
    }
    return map;
  }

  private final String name;
  private final TypeInfo typeInfo;
  private final boolean isHidden;
  private final ObjectInspector oi;

  VirtualColumn(String name, PrimitiveTypeInfo typeInfo) {
    this(name, typeInfo, true, 
      PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(typeInfo));
  }

  VirtualColumn(String name, TypeInfo typeInfo, boolean isHidden, ObjectInspector oi) {
    this.name = name;
    this.typeInfo = typeInfo;
    this.isHidden = isHidden;
    this.oi = oi;
  }

  public static List getStatsRegistry(Configuration conf) {
    List l = new ArrayList();
    l.add(RAWDATASIZE);
    return l;
  }

  public static List getRegistry(Configuration conf) {
    ArrayList l = new ArrayList();
    l.add(BLOCKOFFSET);
    l.add(FILENAME);
    if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVEROWOFFSET)) {
      l.add(ROWOFFSET);
    }
    l.add(ROWID);
    l.add(ROWISDELETED);

    return l;
  }

  public TypeInfo getTypeInfo() {
    return typeInfo;
  }

  public String getName() {
    return this.name;
  }

  public boolean isHidden() {
    return isHidden;
  }

  public boolean getIsHidden() {
    return isHidden;
  }

  public ObjectInspector getObjectInspector() {
    return oi;
  }

  public static Collection removeVirtualColumns(final Collection columns) {
    Iterables.removeAll(columns, VIRTUAL_COLUMN_NAMES);
    return columns;
  }

  public static List removeVirtualColumnTypes(final List columnNames,
      final List columnTypes) {
    if (columnNames.size() != columnTypes.size()) {
      throw new IllegalArgumentException("Number of column names in configuration " +
          columnNames.size() + " differs from column types " + columnTypes.size());
    }

    int i = 0;
    ListIterator it = columnTypes.listIterator();
    while(it.hasNext()) {
      it.next();
      if (VIRTUAL_COLUMN_NAMES.contains(columnNames.get(i))) {
        it.remove();
      }
      ++i;
    }
    return columnTypes;
  }

  public static StructObjectInspector getVCSObjectInspector(List vcs) {
    List names = new ArrayList(vcs.size());
    List inspectors = new ArrayList(vcs.size());
    for (VirtualColumn vc : vcs) {
      names.add(vc.getName());
      inspectors.add(vc.oi);
    }
    return ObjectInspectorFactory.getStandardStructObjectInspector(names, inspectors);
  }

  public static boolean isVirtualColumnBasedOnAlias(ColumnInfo column) {
    // Not using method column.getIsVirtualCol() because partitioning columns
    // are also treated as virtual columns in ColumnInfo.
    if (column.getAlias() != null
        && VirtualColumn.VIRTUAL_COLUMN_NAMES.contains(column.getAlias().toUpperCase())) {
      return true;
    }
    return false;
  }
}