Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.hive.hcatalog.mapreduce;
import com.facebook.presto.hive.$internal.org.slf4j.Logger;
import com.facebook.presto.hive.$internal.org.slf4j.LoggerFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.io.RCFileOutputFormat;
import org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat;
import org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat;
import org.apache.hadoop.hive.ql.io.parquet.convert.HiveSchemaConverter;
import org.apache.hadoop.hive.ql.io.parquet.serde.ParquetTableUtils;
import org.apache.hadoop.hive.ql.io.parquet.write.DataWritableWriteSupport;
import org.apache.orc.OrcConf;
import org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat;
import org.apache.hadoop.hive.serde2.avro.AvroSerDe;
import org.apache.hadoop.hive.serde2.avro.AvroSerdeUtils;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.apache.hadoop.mapred.OutputFormat;
import org.apache.hive.hcatalog.data.schema.HCatFieldSchema;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import com.facebook.presto.hive.$internal.com.google.common.collect.Maps;
/**
* This class is a place to put all the code associated with
* Special cases. If there is a corner case required to make
* a particular format work that is above and beyond the generic
* use, it belongs here, for example. Over time, the goal is to
* try to minimize usage of this, but it is a useful overflow
* class that allows us to still be as generic as possible
* in the main codeflow path, and call attention to the special
* cases here.
*
* Note : For all methods introduced here, please document why
* the special case is necessary, providing a jira number if
* possible.
*/
public class SpecialCases {
static final private Logger LOG = LoggerFactory.getLogger(SpecialCases.class);
/**
* Method to do any file-format specific special casing while
* instantiating a storage handler to write. We set any parameters
* we want to be visible to the job in jobProperties, and this will
* be available to the job via jobconf at run time.
*
* This is mostly intended to be used by StorageHandlers that wrap
* File-based OutputFormats such as FosterStorageHandler that wraps
* RCFile, ORC, etc.
*
* @param jobProperties : map to write to
* @param jobInfo : information about this output job to read from
* @param ofclass : the output format in use
*/
public static void addSpecialCasesParametersToOutputJobProperties(
Map jobProperties,
OutputJobInfo jobInfo, Class extends OutputFormat> ofclass) {
if (ofclass == RCFileOutputFormat.class) {
// RCFile specific parameter
jobProperties.put(HiveConf.ConfVars.HIVE_RCFILE_COLUMN_NUMBER_CONF.varname,
Integer.toOctalString(
jobInfo.getOutputSchema().getFields().size()));
} else if (ofclass == OrcOutputFormat.class) {
// Special cases for ORC
// We need to check table properties to see if a couple of parameters,
// such as compression parameters are defined. If they are, then we copy
// them to job properties, so that it will be available in jobconf at runtime
// See HIVE-5504 for details
Map tableProps = jobInfo.getTableInfo().getTable().getParameters();
for (OrcConf property : OrcConf.values()){
String propName = property.getAttribute();
if (tableProps.containsKey(propName)){
jobProperties.put(propName, tableProps.get(propName));
}
}
} else if (ofclass == AvroContainerOutputFormat.class) {
// Special cases for Avro. As with ORC, we make table properties that
// Avro is interested in available in jobconf at runtime
Map tableProps = jobInfo.getTableInfo().getTable().getParameters();
for (AvroSerdeUtils.AvroTableProperties property : AvroSerdeUtils.AvroTableProperties.values()) {
String propName = property.getPropName();
if (tableProps.containsKey(propName)){
String propVal = tableProps.get(propName);
jobProperties.put(propName,tableProps.get(propName));
}
}
Properties properties = new Properties();
properties.put("name",jobInfo.getTableName());
List colNames = jobInfo.getOutputSchema().getFieldNames();
List colTypes = new ArrayList();
for (HCatFieldSchema field : jobInfo.getOutputSchema().getFields()){
colTypes.add(TypeInfoUtils.getTypeInfoFromTypeString(field.getTypeString()));
}
if (jobProperties.get(AvroSerdeUtils.AvroTableProperties.SCHEMA_LITERAL.getPropName())==null
|| jobProperties.get(AvroSerdeUtils.AvroTableProperties.SCHEMA_LITERAL.getPropName()).isEmpty()) {
jobProperties.put(AvroSerdeUtils.AvroTableProperties.SCHEMA_LITERAL.getPropName(),
AvroSerDe.getSchemaFromCols(properties, colNames, colTypes, null).toString());
}
} else if (ofclass == MapredParquetOutputFormat.class) {
//Handle table properties
Properties tblProperties = new Properties();
Map tableProps = jobInfo.getTableInfo().getTable().getParameters();
for (String key : tableProps.keySet()) {
if (ParquetTableUtils.isParquetProperty(key)) {
tblProperties.put(key, tableProps.get(key));
}
}
//Handle table schema
List colNames = jobInfo.getOutputSchema().getFieldNames();
List colTypes = new ArrayList();
for (HCatFieldSchema field : jobInfo.getOutputSchema().getFields()){
colTypes.add(TypeInfoUtils.getTypeInfoFromTypeString(field.getTypeString()));
}
String parquetSchema = HiveSchemaConverter.convert(colNames, colTypes).toString();
jobProperties.put(DataWritableWriteSupport.PARQUET_HIVE_SCHEMA, parquetSchema);
jobProperties.putAll(Maps.fromProperties(tblProperties));
}
}
/**
* Method to do any storage-handler specific special casing while instantiating a
* HCatLoader
*
* @param conf : configuration to write to
* @param tableInfo : the table definition being used
*/
public static void addSpecialCasesParametersForHCatLoader(
Configuration conf, HCatTableInfo tableInfo) {
if ((tableInfo == null) || (tableInfo.getStorerInfo() == null)){
return;
}
String shClass = tableInfo.getStorerInfo().getStorageHandlerClass();
if ((shClass != null) && shClass.equals("org.apache.hadoop.hive.hbase.HBaseStorageHandler")){
// NOTE: The reason we use a string name of the hive hbase handler here is
// because we do not want to introduce a compile-dependency on the hive-hbase-handler
// module from within hive-hcatalog.
// This parameter was added due to the requirement in HIVE-7072
conf.set("pig.noSplitCombination", "true");
}
}
}