org.elasticsearch.hadoop.hive.HiveUtils Maven / Gradle / Ivy
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.hadoop.hive;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Properties;
import org.apache.commons.logging.Log;
import org.apache.hadoop.hive.serde2.io.TimestampWritable;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.elasticsearch.hadoop.cfg.Settings;
import org.elasticsearch.hadoop.rest.InitializationUtils;
import org.elasticsearch.hadoop.util.FieldAlias;
import org.elasticsearch.hadoop.util.ObjectUtils;
import org.elasticsearch.hadoop.util.SettingsUtils;
import org.elasticsearch.hadoop.util.StringUtils;
import org.elasticsearch.hadoop.util.unit.Booleans;
abstract class HiveUtils {
// Date type available since Hive 0.12
static final boolean DATE_WRITABLE_AVAILABLE = ObjectUtils.isClassPresent(HiveConstants.DATE_WRITABLE,
TimestampWritable.class.getClassLoader());
static StandardStructObjectInspector structObjectInspector(Properties tableProperties) {
// extract column info - don't use Hive constants as they were renamed in 0.9 breaking compatibility
// the column names are saved as the given inspector to #serialize doesn't preserves them (maybe because it's an external table)
// use the class since StructType requires it ...
List columnNames = StringUtils.tokenize(tableProperties.getProperty(HiveConstants.COLUMNS), ",");
List colTypes = TypeInfoUtils.getTypeInfosFromTypeString(tableProperties.getProperty(HiveConstants.COLUMNS_TYPES));
// create a standard writable Object Inspector - used later on by serialization/deserialization
List inspectors = new ArrayList();
for (TypeInfo typeInfo : colTypes) {
inspectors.add(TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(typeInfo));
}
return ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, inspectors);
}
static StructTypeInfo typeInfo(StructObjectInspector inspector) {
return (StructTypeInfo) TypeInfoUtils.getTypeInfoFromObjectInspector(inspector);
}
static Collection columnToAlias(Settings settings) {
FieldAlias fa = alias(settings);
List columnNames = StringUtils.tokenize(settings.getProperty(HiveConstants.COLUMNS), ",");
// eliminate virtual columns
// we can't use virtual columns since some distro don't have this field...
// for (VirtualColumn vc : VirtualColumn.VIRTUAL_COLUMNS) {
// columnNames.remove(vc.getName());
// }
for (String vc : HiveConstants.VIRTUAL_COLUMNS) {
columnNames.remove(vc);
}
for (int i = 0; i < columnNames.size(); i++) {
String original = columnNames.get(i);
String alias = fa.toES(original);
if (alias != null) {
columnNames.set(i, alias);
}
}
return columnNames;
}
static FieldAlias alias(Settings settings) {
Map aliasMap = SettingsUtils.aliases(settings.getProperty(HiveConstants.MAPPING_NAMES), true);
// add default aliases for serialization (_colX -> mapping name)
Map columnMap = columnMap(settings);
for (Entry entry : columnMap.entrySet()) {
String columnName = entry.getKey();
String columnIndex = entry.getValue();
if (!aliasMap.isEmpty()) {
String alias = aliasMap.get(columnName);
if (alias != null) {
columnName = alias;
}
}
aliasMap.put(columnIndex, columnName);
}
return new FieldAlias(aliasMap, true);
}
static Map columnMap(Settings settings) {
return columnMap(settings.getProperty(HiveConstants.COLUMNS));
}
// returns a map of {:_colX}
private static Map columnMap(String columnString) {
// add default aliases for serialization (mapping name -> _colX)
List columnNames = StringUtils.tokenize(columnString, ",");
if (columnNames.isEmpty()) {
return Collections.emptyMap();
}
Map columns = new LinkedHashMap();
for (int i = 0; i < columnNames.size(); i++) {
columns.put(columnNames.get(i), HiveConstants.UNNAMED_COLUMN_PREFIX + i);
}
return columns;
}
static void init(Settings settings, Log log) {
InitializationUtils.checkIdForOperation(settings);
InitializationUtils.setFieldExtractorIfNotSet(settings, HiveFieldExtractor.class, log);
InitializationUtils.discoverEsVersion(settings, log);
}
static void fixHive13InvalidComments(Settings settings, Properties tbl) {
if (Booleans.parseBoolean(settings.getProperty("es.hive.disable.columns.comments.fix"))) {
return;
}
settings.setProperty(HiveConstants.COLUMN_COMMENTS, "");
tbl.setProperty(HiveConstants.COLUMN_COMMENTS, "");
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy