org.apache.hadoop.hive.ql.metadata.VirtualColumn Maven / Gradle / Ivy
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.metadata;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.ListIterator;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterables;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.common.classification.InterfaceAudience;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.io.RecordIdentifier;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
@InterfaceAudience.Private
public class VirtualColumn implements Serializable {
private static final long serialVersionUID = 1L;
public static final VirtualColumn FILENAME = new VirtualColumn("INPUT__FILE__NAME", (PrimitiveTypeInfo)TypeInfoFactory.stringTypeInfo);
public static final VirtualColumn BLOCKOFFSET = new VirtualColumn("BLOCK__OFFSET__INSIDE__FILE", (PrimitiveTypeInfo)TypeInfoFactory.longTypeInfo);
public static final VirtualColumn ROWOFFSET = new VirtualColumn("ROW__OFFSET__INSIDE__BLOCK", (PrimitiveTypeInfo)TypeInfoFactory.longTypeInfo);
public static final VirtualColumn RAWDATASIZE = new VirtualColumn("RAW__DATA__SIZE", (PrimitiveTypeInfo)TypeInfoFactory.longTypeInfo);
/**
* {@link org.apache.hadoop.hive.ql.io.RecordIdentifier}
*/
public static final VirtualColumn ROWID = new VirtualColumn("ROW__ID", RecordIdentifier.StructInfo.typeInfo, true, RecordIdentifier.StructInfo.oi);
/**
* GROUPINGID is used with GROUP BY GROUPINGS SETS, ROLLUP and CUBE.
* It composes a bit vector with the "0" and "1" values for every
* column which is GROUP BY section. "1" is for a row in the result
* set if that column has been aggregated in that row. Otherwise the
* value is "0". Returns the decimal representation of the bit vector.
*/
public static final VirtualColumn GROUPINGID =
new VirtualColumn("GROUPING__ID", (PrimitiveTypeInfo) TypeInfoFactory.intTypeInfo);
public static ImmutableSet VIRTUAL_COLUMN_NAMES =
ImmutableSet.of(FILENAME.getName(), BLOCKOFFSET.getName(), ROWOFFSET.getName(),
RAWDATASIZE.getName(), GROUPINGID.getName(), ROWID.getName());
private final String name;
private final TypeInfo typeInfo;
private final boolean isHidden;
private final ObjectInspector oi;
private VirtualColumn(String name, PrimitiveTypeInfo typeInfo) {
this(name, typeInfo, true,
PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(typeInfo));
}
private VirtualColumn(String name, TypeInfo typeInfo, boolean isHidden, ObjectInspector oi) {
this.name = name;
this.typeInfo = typeInfo;
this.isHidden = isHidden;
this.oi = oi;
}
public static List getStatsRegistry(Configuration conf) {
List l = new ArrayList();
if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_STATS_COLLECT_RAWDATASIZE)) {
l.add(RAWDATASIZE);
}
return l;
}
public static List getRegistry(Configuration conf) {
ArrayList l = new ArrayList();
l.add(BLOCKOFFSET);
l.add(FILENAME);
if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVEROWOFFSET)) {
l.add(ROWOFFSET);
}
l.add(ROWID);
return l;
}
public TypeInfo getTypeInfo() {
return typeInfo;
}
public String getName() {
return this.name;
}
public boolean isHidden() {
return isHidden;
}
public boolean getIsHidden() {
return isHidden;
}
public ObjectInspector getObjectInspector() {
return oi;
}
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if(!(o instanceof VirtualColumn)) {
return false;
}
VirtualColumn c = (VirtualColumn) o;
return this.name.equals(c.name)
&& this.typeInfo.getTypeName().equals(c.getTypeInfo().getTypeName());
}
@Override
public int hashCode() {
int c = 19;
c = 31 * name.hashCode() + c;
return 31 * typeInfo.getTypeName().hashCode() + c;
}
public static Collection removeVirtualColumns(final Collection columns) {
Iterables.removeAll(columns, VIRTUAL_COLUMN_NAMES);
return columns;
}
public static List removeVirtualColumnTypes(final List columnNames,
final List columnTypes) {
if (columnNames.size() != columnTypes.size()) {
throw new IllegalArgumentException("Number of column names in configuration " +
columnNames.size() + " differs from column types " + columnTypes.size());
}
int i = 0;
ListIterator it = columnTypes.listIterator();
while(it.hasNext()) {
it.next();
if (VIRTUAL_COLUMN_NAMES.contains(columnNames.get(i))) {
it.remove();
}
++i;
}
return columnTypes;
}
public static StructObjectInspector getVCSObjectInspector(List vcs) {
List names = new ArrayList(vcs.size());
List inspectors = new ArrayList(vcs.size());
for (VirtualColumn vc : vcs) {
names.add(vc.getName());
inspectors.add(vc.oi);
}
return ObjectInspectorFactory.getStandardStructObjectInspector(names, inspectors);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy