org.apache.hadoop.hive.hbase.ColumnMappings Maven / Gradle / Ivy
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* This source file is based on code taken from SQLLine 1.0.2
* See SQLLine notice in LICENSE
*/
package org.apache.hadoop.hive.hbase;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.Properties;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import com.google.common.collect.Iterators;
public class ColumnMappings implements Iterable {
private final int keyIndex;
private final int timestampIndex;
private final ColumnMapping[] columnsMapping;
public ColumnMappings(List columnMapping, int keyIndex) {
this(columnMapping, keyIndex, -1);
}
public ColumnMappings(List columnMapping, int keyIndex, int timestampIndex) {
this.columnsMapping = columnMapping.toArray(new ColumnMapping[columnMapping.size()]);
this.keyIndex = keyIndex;
this.timestampIndex = timestampIndex;
}
@Override
public Iterator iterator() {
return Iterators.forArray(columnsMapping);
}
public int size() {
return columnsMapping.length;
}
String toNamesString(Properties tbl, String autogenerate) {
if (autogenerate != null && autogenerate.equals("true")) {
StringBuilder sb = new StringBuilder();
HBaseSerDeHelper.generateColumns(tbl, Arrays.asList(columnsMapping), sb);
return sb.toString();
}
return StringUtils.EMPTY; // return empty string
}
String toTypesString(Properties tbl, Configuration conf, String autogenerate)
throws SerDeException {
StringBuilder sb = new StringBuilder();
if (autogenerate != null && autogenerate.equals("true")) {
HBaseSerDeHelper.generateColumnTypes(tbl, Arrays.asList(columnsMapping), sb, conf);
} else {
for (ColumnMapping colMap : columnsMapping) {
if (sb.length() > 0) {
sb.append(":");
}
if (colMap.hbaseRowKey) {
// the row key column becomes a STRING
sb.append(serdeConstants.STRING_TYPE_NAME);
} else if (colMap.qualifierName == null) {
// a column family become a MAP
sb.append(serdeConstants.MAP_TYPE_NAME + "<" + serdeConstants.STRING_TYPE_NAME + ","
+ serdeConstants.STRING_TYPE_NAME + ">");
} else {
// an individual column becomes a STRING
sb.append(serdeConstants.STRING_TYPE_NAME);
}
}
}
return sb.toString();
}
void setHiveColumnDescription(String serdeName,
List columnNames, List columnTypes) throws SerDeException {
if (columnsMapping.length != columnNames.size()) {
throw new SerDeException(serdeName + ": columns has " + columnNames.size() +
" elements while hbase.columns.mapping has " + columnsMapping.length + " elements" +
" (counting the key if implicit)");
}
// check that the mapping schema is right;
// check that the "column-family:" is mapped to Map
// where key extends LazyPrimitive, ?> and thus has type Category.PRIMITIVE
for (int i = 0; i < columnNames.size(); i++) {
ColumnMapping colMap = columnsMapping[i];
colMap.columnName = columnNames.get(i);
colMap.columnType = columnTypes.get(i);
if (colMap.qualifierName == null && !colMap.hbaseRowKey && !colMap.hbaseTimestamp) {
TypeInfo typeInfo = columnTypes.get(i);
if ((typeInfo.getCategory() != ObjectInspector.Category.MAP) ||
(((MapTypeInfo) typeInfo).getMapKeyTypeInfo().getCategory()
!= ObjectInspector.Category.PRIMITIVE)) {
throw new SerDeException(
serdeName + ": hbase column family '" + colMap.familyName
+ "' should be mapped to Map extends LazyPrimitive, ?>,?>, that is "
+ "the Key for the map should be of primitive type, but is mapped to "
+ typeInfo.getTypeName());
}
}
if (colMap.hbaseTimestamp) {
TypeInfo typeInfo = columnTypes.get(i);
if (!colMap.isCategory(PrimitiveCategory.TIMESTAMP) &&
!colMap.isCategory(PrimitiveCategory.LONG)) {
throw new SerDeException(serdeName + ": timestamp columns should be of " +
"timestamp or bigint type, but is mapped to " + typeInfo.getTypeName());
}
}
}
}
/**
* Utility method for parsing a string of the form '-,b,s,-,s:b,...' as a means of specifying
* whether to use a binary or an UTF string format to serialize and de-serialize primitive
* data types like boolean, byte, short, int, long, float, and double. This applies to
* regular columns and also to map column types which are associated with an HBase column
* family. For the map types, we apply the specification to the key or the value provided it
* is one of the above primitive types. The specifier is a colon separated value of the form
* -:s, or b:b where we have 's', 'b', or '-' on either side of the colon. 's' is for string
* format storage, 'b' is for native fixed width byte oriented storage, and '-' uses the
* table level default.
*
* @param hbaseTableDefaultStorageType - the specification associated with the table property
* hbase.table.default.storage.type
* @throws SerDeException on parse error.
*/
void parseColumnStorageTypes(String hbaseTableDefaultStorageType) throws SerDeException {
boolean tableBinaryStorage = false;
if (hbaseTableDefaultStorageType != null && !"".equals(hbaseTableDefaultStorageType)) {
if (hbaseTableDefaultStorageType.equals("binary")) {
tableBinaryStorage = true;
} else if (!hbaseTableDefaultStorageType.equals("string")) {
throw new SerDeException("Error: " + HBaseSerDe.HBASE_TABLE_DEFAULT_STORAGE_TYPE +
" parameter must be specified as" +
" 'string' or 'binary'; '" + hbaseTableDefaultStorageType +
"' is not a valid specification for this table/serde property.");
}
}
// parse the string to determine column level storage type for primitive types
// 's' is for variable length string format storage
// 'b' is for fixed width binary storage of bytes
// '-' is for table storage type, which defaults to UTF8 string
// string data is always stored in the default escaped storage format; the data types
// byte, short, int, long, float, and double have a binary byte oriented storage option
for (ColumnMapping colMap : columnsMapping) {
TypeInfo colType = colMap.columnType;
String mappingSpec = colMap.mappingSpec;
String[] mapInfo = mappingSpec.split("#");
String[] storageInfo = null;
if (mapInfo.length == 2) {
storageInfo = mapInfo[1].split(":");
}
if (storageInfo == null) {
// use the table default storage specification
if (colType.getCategory() == ObjectInspector.Category.PRIMITIVE) {
if (!colType.getTypeName().equals(serdeConstants.STRING_TYPE_NAME)) {
colMap.binaryStorage.add(tableBinaryStorage);
} else {
colMap.binaryStorage.add(false);
}
} else if (colType.getCategory() == ObjectInspector.Category.MAP) {
TypeInfo keyTypeInfo = ((MapTypeInfo) colType).getMapKeyTypeInfo();
TypeInfo valueTypeInfo = ((MapTypeInfo) colType).getMapValueTypeInfo();
if (keyTypeInfo.getCategory() == ObjectInspector.Category.PRIMITIVE &&
!keyTypeInfo.getTypeName().equals(serdeConstants.STRING_TYPE_NAME)) {
colMap.binaryStorage.add(tableBinaryStorage);
} else {
colMap.binaryStorage.add(false);
}
if (valueTypeInfo.getCategory() == ObjectInspector.Category.PRIMITIVE &&
!valueTypeInfo.getTypeName().equals(serdeConstants.STRING_TYPE_NAME)) {
colMap.binaryStorage.add(tableBinaryStorage);
} else {
colMap.binaryStorage.add(false);
}
} else {
colMap.binaryStorage.add(false);
}
} else if (storageInfo.length == 1) {
// we have a storage specification for a primitive column type
String storageOption = storageInfo[0];
if ((colType.getCategory() == ObjectInspector.Category.MAP) ||
!(storageOption.equals("-") || "string".startsWith(storageOption) ||
"binary".startsWith(storageOption))) {
throw new SerDeException("Error: A column storage specification is one of the following:"
+ " '-', a prefix of 'string', or a prefix of 'binary'. "
+ storageOption + " is not a valid storage option specification for "
+ colMap.columnName);
}
if (colType.getCategory() == ObjectInspector.Category.PRIMITIVE &&
!colType.getTypeName().equals(serdeConstants.STRING_TYPE_NAME)) {
if ("-".equals(storageOption)) {
colMap.binaryStorage.add(tableBinaryStorage);
} else if ("binary".startsWith(storageOption)) {
colMap.binaryStorage.add(true);
} else {
colMap.binaryStorage.add(false);
}
} else {
colMap.binaryStorage.add(false);
}
} else if (storageInfo.length == 2) {
// we have a storage specification for a map column type
String keyStorage = storageInfo[0];
String valStorage = storageInfo[1];
if ((colType.getCategory() != ObjectInspector.Category.MAP) ||
!(keyStorage.equals("-") || "string".startsWith(keyStorage) ||
"binary".startsWith(keyStorage)) ||
!(valStorage.equals("-") || "string".startsWith(valStorage) ||
"binary".startsWith(valStorage))) {
throw new SerDeException("Error: To specify a valid column storage type for a Map"
+ " column, use any two specifiers from '-', a prefix of 'string', "
+ " and a prefix of 'binary' separated by a ':'."
+ " Valid examples are '-:-', 's:b', etc. They specify the storage type for the"
+ " key and value parts of the Map,?> respectively."
+ " Invalid storage specification for column "
+ colMap.columnName
+ "; " + storageInfo[0] + ":" + storageInfo[1]);
}
TypeInfo keyTypeInfo = ((MapTypeInfo) colType).getMapKeyTypeInfo();
TypeInfo valueTypeInfo = ((MapTypeInfo) colType).getMapValueTypeInfo();
if (keyTypeInfo.getCategory() == ObjectInspector.Category.PRIMITIVE &&
!keyTypeInfo.getTypeName().equals(serdeConstants.STRING_TYPE_NAME)) {
if (keyStorage.equals("-")) {
colMap.binaryStorage.add(tableBinaryStorage);
} else if ("binary".startsWith(keyStorage)) {
colMap.binaryStorage.add(true);
} else {
colMap.binaryStorage.add(false);
}
} else {
colMap.binaryStorage.add(false);
}
if (valueTypeInfo.getCategory() == ObjectInspector.Category.PRIMITIVE &&
!valueTypeInfo.getTypeName().equals(serdeConstants.STRING_TYPE_NAME)) {
if (valStorage.equals("-")) {
colMap.binaryStorage.add(tableBinaryStorage);
} else if ("binary".startsWith(valStorage)) {
colMap.binaryStorage.add(true);
} else {
colMap.binaryStorage.add(false);
}
} else {
colMap.binaryStorage.add(false);
}
if (colMap.binaryStorage.size() != 2) {
throw new SerDeException("Error: In parsing the storage specification for column "
+ colMap.columnName);
}
} else {
// error in storage specification
throw new SerDeException("Error: " + HBaseSerDe.HBASE_COLUMNS_MAPPING + " storage specification "
+ mappingSpec + " is not valid for column: "
+ colMap.columnName);
}
}
}
public ColumnMapping getKeyMapping() {
return columnsMapping[keyIndex];
}
public ColumnMapping getTimestampMapping() {
return timestampIndex < 0 ? null : columnsMapping[timestampIndex];
}
public int getKeyIndex() {
return keyIndex;
}
public int getTimestampIndex() {
return timestampIndex;
}
public ColumnMapping[] getColumnsMapping() {
return columnsMapping;
}
/**
* Represents a mapping from a single Hive column to an HBase column qualifier, column family or row key.
*/
// todo use final fields
public static class ColumnMapping {
ColumnMapping() {
binaryStorage = new ArrayList(2);
}
String columnName;
TypeInfo columnType;
String familyName;
String qualifierName;
byte[] familyNameBytes;
byte[] qualifierNameBytes;
List binaryStorage;
boolean hbaseRowKey;
boolean hbaseTimestamp;
String mappingSpec;
String qualifierPrefix;
byte[] qualifierPrefixBytes;
public String getColumnName() {
return columnName;
}
public TypeInfo getColumnType() {
return columnType;
}
public String getFamilyName() {
return familyName;
}
public String getQualifierName() {
return qualifierName;
}
public byte[] getFamilyNameBytes() {
return familyNameBytes;
}
public byte[] getQualifierNameBytes() {
return qualifierNameBytes;
}
public List getBinaryStorage() {
return binaryStorage;
}
public boolean isHbaseRowKey() {
return hbaseRowKey;
}
public String getMappingSpec() {
return mappingSpec;
}
public String getQualifierPrefix() {
return qualifierPrefix;
}
public byte[] getQualifierPrefixBytes() {
return qualifierPrefixBytes;
}
public boolean isCategory(ObjectInspector.Category category) {
return columnType.getCategory() == category;
}
public boolean isCategory(PrimitiveCategory category) {
return columnType.getCategory() == ObjectInspector.Category.PRIMITIVE &&
((PrimitiveTypeInfo)columnType).getPrimitiveCategory() == category;
}
public boolean isComparable() {
return binaryStorage.get(0) || isCategory(PrimitiveCategory.STRING);
}
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy