All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hive.hbase.ColumnMappings Maven / Gradle / Ivy

There is a newer version: 4.0.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/*
 * This source file is based on code taken from SQLLine 1.0.2
 * See SQLLine notice in LICENSE
 */

package org.apache.hadoop.hive.hbase;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.Properties;

import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;

import com.google.common.collect.Iterators;

public class ColumnMappings implements Iterable {

  private final int keyIndex;
  private final int timestampIndex;
  private final ColumnMapping[] columnsMapping;

  public ColumnMappings(List columnMapping, int keyIndex) {
    this(columnMapping, keyIndex, -1);
  }

  public ColumnMappings(List columnMapping, int keyIndex, int timestampIndex) {
    this.columnsMapping = columnMapping.toArray(new ColumnMapping[columnMapping.size()]);
    this.keyIndex = keyIndex;
    this.timestampIndex = timestampIndex;
  }

  @Override
  public Iterator iterator() {
    return Iterators.forArray(columnsMapping);
  }

  public int size() {
    return columnsMapping.length;
  }

  String toNamesString(Properties tbl, String autogenerate) {
    if (autogenerate != null && autogenerate.equals("true")) {
      StringBuilder sb = new StringBuilder();
      HBaseSerDeHelper.generateColumns(tbl, Arrays.asList(columnsMapping), sb);
      return sb.toString();
    }

    return StringUtils.EMPTY; // return empty string
  }

  String toTypesString(Properties tbl, Configuration conf, String autogenerate)
      throws SerDeException {
    StringBuilder sb = new StringBuilder();

    if (autogenerate != null && autogenerate.equals("true")) {
      HBaseSerDeHelper.generateColumnTypes(tbl, Arrays.asList(columnsMapping), sb, conf);
    } else {
      for (ColumnMapping colMap : columnsMapping) {
        if (sb.length() > 0) {
          sb.append(":");
        }
        if (colMap.hbaseRowKey) {
          // the row key column becomes a STRING
          sb.append(serdeConstants.STRING_TYPE_NAME);
        } else if (colMap.qualifierName == null) {
          // a column family become a MAP
          sb.append(serdeConstants.MAP_TYPE_NAME + "<" + serdeConstants.STRING_TYPE_NAME + ","
              + serdeConstants.STRING_TYPE_NAME + ">");
        } else {
          // an individual column becomes a STRING
          sb.append(serdeConstants.STRING_TYPE_NAME);
        }
      }
    }

    return sb.toString();
  }

  void setHiveColumnDescription(String serdeName,
      List columnNames, List columnTypes) throws SerDeException {
    if (columnsMapping.length != columnNames.size()) {
      throw new SerDeException(serdeName + ": columns has " + columnNames.size() +
          " elements while hbase.columns.mapping has " + columnsMapping.length + " elements" +
          " (counting the key if implicit)");
    }

    // check that the mapping schema is right;
    // check that the "column-family:" is mapped to  Map
    // where key extends LazyPrimitive and thus has type Category.PRIMITIVE
    for (int i = 0; i < columnNames.size(); i++) {
      ColumnMapping colMap = columnsMapping[i];
      colMap.columnName = columnNames.get(i);
      colMap.columnType = columnTypes.get(i);
      if (colMap.qualifierName == null && !colMap.hbaseRowKey && !colMap.hbaseTimestamp) {
        TypeInfo typeInfo = columnTypes.get(i);
        if ((typeInfo.getCategory() != ObjectInspector.Category.MAP) ||
            (((MapTypeInfo) typeInfo).getMapKeyTypeInfo().getCategory()
                != ObjectInspector.Category.PRIMITIVE)) {

          throw new SerDeException(
              serdeName + ": hbase column family '" + colMap.familyName
                  + "' should be mapped to Map,?>, that is "
                  + "the Key for the map should be of primitive type, but is mapped to "
                  + typeInfo.getTypeName());
        }
      }
      if (colMap.hbaseTimestamp) {
        TypeInfo typeInfo = columnTypes.get(i);
        if (!colMap.isCategory(PrimitiveCategory.TIMESTAMP) &&
            !colMap.isCategory(PrimitiveCategory.LONG)) {
          throw new SerDeException(serdeName + ": timestamp columns should be of " +
              "timestamp or bigint type, but is mapped to " + typeInfo.getTypeName());
        }
      }
    }
  }

  /**
   * Utility method for parsing a string of the form '-,b,s,-,s:b,...' as a means of specifying
   * whether to use a binary or an UTF string format to serialize and de-serialize primitive
   * data types like boolean, byte, short, int, long, float, and double. This applies to
   * regular columns and also to map column types which are associated with an HBase column
   * family. For the map types, we apply the specification to the key or the value provided it
   * is one of the above primitive types. The specifier is a colon separated value of the form
   * -:s, or b:b where we have 's', 'b', or '-' on either side of the colon. 's' is for string
   * format storage, 'b' is for native fixed width byte oriented storage, and '-' uses the
   * table level default.
   *
   * @param hbaseTableDefaultStorageType - the specification associated with the table property
   *        hbase.table.default.storage.type
   * @throws SerDeException on parse error.
   */
  void parseColumnStorageTypes(String hbaseTableDefaultStorageType) throws SerDeException {

    boolean tableBinaryStorage = false;

    if (hbaseTableDefaultStorageType != null && !"".equals(hbaseTableDefaultStorageType)) {
      if (hbaseTableDefaultStorageType.equals("binary")) {
        tableBinaryStorage = true;
      } else if (!hbaseTableDefaultStorageType.equals("string")) {
        throw new SerDeException("Error: " + HBaseSerDe.HBASE_TABLE_DEFAULT_STORAGE_TYPE +
            " parameter must be specified as" +
            " 'string' or 'binary'; '" + hbaseTableDefaultStorageType +
            "' is not a valid specification for this table/serde property.");
      }
    }

    // parse the string to determine column level storage type for primitive types
    // 's' is for variable length string format storage
    // 'b' is for fixed width binary storage of bytes
    // '-' is for table storage type, which defaults to UTF8 string
    // string data is always stored in the default escaped storage format; the data types
    // byte, short, int, long, float, and double have a binary byte oriented storage option
    for (ColumnMapping colMap : columnsMapping) {
      TypeInfo colType = colMap.columnType;
      String mappingSpec = colMap.mappingSpec;
      String[] mapInfo = mappingSpec.split("#");
      String[] storageInfo = null;

      if (mapInfo.length == 2) {
        storageInfo = mapInfo[1].split(":");
      }

      if (storageInfo == null) {

        // use the table default storage specification
        if (colType.getCategory() == ObjectInspector.Category.PRIMITIVE) {
          if (!colType.getTypeName().equals(serdeConstants.STRING_TYPE_NAME)) {
            colMap.binaryStorage.add(tableBinaryStorage);
          } else {
            colMap.binaryStorage.add(false);
          }
        } else if (colType.getCategory() == ObjectInspector.Category.MAP) {
          TypeInfo keyTypeInfo = ((MapTypeInfo) colType).getMapKeyTypeInfo();
          TypeInfo valueTypeInfo = ((MapTypeInfo) colType).getMapValueTypeInfo();

          if (keyTypeInfo.getCategory() == ObjectInspector.Category.PRIMITIVE &&
              !keyTypeInfo.getTypeName().equals(serdeConstants.STRING_TYPE_NAME)) {
            colMap.binaryStorage.add(tableBinaryStorage);
          } else {
            colMap.binaryStorage.add(false);
          }

          if (valueTypeInfo.getCategory() == ObjectInspector.Category.PRIMITIVE &&
              !valueTypeInfo.getTypeName().equals(serdeConstants.STRING_TYPE_NAME)) {
            colMap.binaryStorage.add(tableBinaryStorage);
          } else {
            colMap.binaryStorage.add(false);
          }
        } else {
          colMap.binaryStorage.add(false);
        }

      } else if (storageInfo.length == 1) {
        // we have a storage specification for a primitive column type
        String storageOption = storageInfo[0];

        if ((colType.getCategory() == ObjectInspector.Category.MAP) ||
            !(storageOption.equals("-") || "string".startsWith(storageOption) ||
                "binary".startsWith(storageOption))) {
          throw new SerDeException("Error: A column storage specification is one of the following:"
              + " '-', a prefix of 'string', or a prefix of 'binary'. "
              + storageOption + " is not a valid storage option specification for "
              + colMap.columnName);
        }

        if (colType.getCategory() == ObjectInspector.Category.PRIMITIVE &&
            !colType.getTypeName().equals(serdeConstants.STRING_TYPE_NAME)) {

          if ("-".equals(storageOption)) {
            colMap.binaryStorage.add(tableBinaryStorage);
          } else if ("binary".startsWith(storageOption)) {
            colMap.binaryStorage.add(true);
          } else {
            colMap.binaryStorage.add(false);
          }
        } else {
          colMap.binaryStorage.add(false);
        }

      } else if (storageInfo.length == 2) {
        // we have a storage specification for a map column type

        String keyStorage = storageInfo[0];
        String valStorage = storageInfo[1];

        if ((colType.getCategory() != ObjectInspector.Category.MAP) ||
            !(keyStorage.equals("-") || "string".startsWith(keyStorage) ||
                "binary".startsWith(keyStorage)) ||
            !(valStorage.equals("-") || "string".startsWith(valStorage) ||
                "binary".startsWith(valStorage))) {
          throw new SerDeException("Error: To specify a valid column storage type for a Map"
              + " column, use any two specifiers from '-', a prefix of 'string', "
              + " and a prefix of 'binary' separated by a ':'."
              + " Valid examples are '-:-', 's:b', etc. They specify the storage type for the"
              + " key and value parts of the Map respectively."
              + " Invalid storage specification for column "
              + colMap.columnName
              + "; " + storageInfo[0] + ":" + storageInfo[1]);
        }

        TypeInfo keyTypeInfo = ((MapTypeInfo) colType).getMapKeyTypeInfo();
        TypeInfo valueTypeInfo = ((MapTypeInfo) colType).getMapValueTypeInfo();

        if (keyTypeInfo.getCategory() == ObjectInspector.Category.PRIMITIVE &&
            !keyTypeInfo.getTypeName().equals(serdeConstants.STRING_TYPE_NAME)) {

          if (keyStorage.equals("-")) {
            colMap.binaryStorage.add(tableBinaryStorage);
          } else if ("binary".startsWith(keyStorage)) {
            colMap.binaryStorage.add(true);
          } else {
            colMap.binaryStorage.add(false);
          }
        } else {
          colMap.binaryStorage.add(false);
        }

        if (valueTypeInfo.getCategory() == ObjectInspector.Category.PRIMITIVE &&
            !valueTypeInfo.getTypeName().equals(serdeConstants.STRING_TYPE_NAME)) {
          if (valStorage.equals("-")) {
            colMap.binaryStorage.add(tableBinaryStorage);
          } else if ("binary".startsWith(valStorage)) {
            colMap.binaryStorage.add(true);
          } else {
            colMap.binaryStorage.add(false);
          }
        } else {
          colMap.binaryStorage.add(false);
        }

        if (colMap.binaryStorage.size() != 2) {
          throw new SerDeException("Error: In parsing the storage specification for column "
              + colMap.columnName);
        }

      } else {
        // error in storage specification
        throw new SerDeException("Error: " + HBaseSerDe.HBASE_COLUMNS_MAPPING + " storage specification "
            + mappingSpec + " is not valid for column: "
            + colMap.columnName);
      }
    }
  }

  public ColumnMapping getKeyMapping() {
    return columnsMapping[keyIndex];
  }

  public ColumnMapping getTimestampMapping() {
    return timestampIndex < 0 ? null : columnsMapping[timestampIndex];
  }

  public int getKeyIndex() {
    return keyIndex;
  }

  public int getTimestampIndex() {
    return timestampIndex;
  }

  public ColumnMapping[] getColumnsMapping() {
    return columnsMapping;
  }

  /**
   * Represents a mapping from a single Hive column to an HBase column qualifier, column family or row key.
   */
  // todo use final fields
  public static class ColumnMapping {

    ColumnMapping() {
      binaryStorage = new ArrayList(2);
    }

    String columnName;
    TypeInfo columnType;

    String familyName;
    String qualifierName;
    byte[] familyNameBytes;
    byte[] qualifierNameBytes;
    List binaryStorage;
    boolean hbaseRowKey;
    boolean hbaseTimestamp;
    String mappingSpec;
    String qualifierPrefix;
    byte[] qualifierPrefixBytes;
    boolean doPrefixCut;

    public String getColumnName() {
      return columnName;
    }

    public TypeInfo getColumnType() {
      return columnType;
    }

    public String getFamilyName() {
      return familyName;
    }

    public String getQualifierName() {
      return qualifierName;
    }

    public byte[] getFamilyNameBytes() {
      return familyNameBytes;
    }

    public byte[] getQualifierNameBytes() {
      return qualifierNameBytes;
    }

    public List getBinaryStorage() {
      return binaryStorage;
    }

    public boolean isHbaseRowKey() {
      return hbaseRowKey;
    }

    public String getMappingSpec() {
      return mappingSpec;
    }

    public String getQualifierPrefix() {
      return qualifierPrefix;
    }

    public byte[] getQualifierPrefixBytes() {
      return qualifierPrefixBytes;
    }

    public boolean isDoPrefixCut(){
      return doPrefixCut;
    }

    public boolean isCategory(ObjectInspector.Category category) {
      return columnType.getCategory() == category;
    }

    public boolean isCategory(PrimitiveCategory category) {
      return columnType.getCategory() == ObjectInspector.Category.PRIMITIVE &&
          ((PrimitiveTypeInfo)columnType).getPrimitiveCategory() == category;
    }

    public boolean isComparable() {
      return binaryStorage.get(0) || isCategory(PrimitiveCategory.STRING);
    }
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy