org.apache.hadoop.hive.serde2.avro.AvroSerDe Maven / Gradle / Ivy
The newest version!
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.serde2.avro;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import org.apache.avro.Schema;
import org.apache.hadoop.hive.common.StringInternUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.AbstractSerDe;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.SerDeSpec;
import org.apache.hadoop.hive.serde2.SerDeUtils;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.apache.hadoop.io.Writable;
/**
* Read or write Avro data from Hive.
*/
@SerDeSpec(schemaProps = {
serdeConstants.LIST_COLUMNS, serdeConstants.LIST_COLUMN_TYPES,
AvroSerDe.LIST_COLUMN_COMMENTS, AvroSerDe.TABLE_NAME, AvroSerDe.TABLE_COMMENT,
AvroSerdeUtils.SCHEMA_LITERAL, AvroSerdeUtils.SCHEMA_URL,
AvroSerdeUtils.SCHEMA_NAMESPACE, AvroSerdeUtils.SCHEMA_NAME, AvroSerdeUtils.SCHEMA_DOC})
public class AvroSerDe extends AbstractSerDe {
private static final Logger LOG = LoggerFactory.getLogger(AvroSerDe.class);
public static final String TABLE_NAME = "name";
public static final String TABLE_COMMENT = "comment";
public static final String LIST_COLUMN_COMMENTS = "columns.comments";
public static final String DECIMAL_TYPE_NAME = "decimal";
public static final String CHAR_TYPE_NAME = "char";
public static final String VARCHAR_TYPE_NAME = "varchar";
public static final String DATE_TYPE_NAME = "date";
public static final String TIMESTAMP_TYPE_NAME = "timestamp-millis";
public static final String WRITER_TIME_ZONE = "writer.time.zone";
public static final String WRITER_PROLEPTIC = "writer.proleptic";
public static final String WRITER_ZONE_CONVERSION_LEGACY = "writer.zone.conversion.legacy";
public static final String AVRO_PROP_LOGICAL_TYPE = "logicalType";
public static final String AVRO_PROP_PRECISION = "precision";
public static final String AVRO_PROP_SCALE = "scale";
public static final String AVRO_PROP_MAX_LENGTH = "maxLength";
public static final String AVRO_STRING_TYPE_NAME = "string";
public static final String AVRO_INT_TYPE_NAME = "int";
public static final String AVRO_LONG_TYPE_NAME = "long";
private ObjectInspector oi;
private List columnNames;
private List columnTypes;
private Schema schema;
private AvroDeserializer avroDeserializer = null;
private AvroSerializer avroSerializer = null;
private boolean badSchema = false;
@Override
public void initialize(Configuration configuration, Properties tableProperties, Properties partitionProperties)
throws SerDeException {
/*
* Avro should always use the table properties for initialization (see
* HIVE-6835). The tableProperties is modified directly by this SerDe when
* the user supplies a schema file so do not make a copy.
*/
super.initialize(configuration, tableProperties, null);
// Reset member variables so we don't get in a half-constructed state
if (schema != null) {
LOG.debug("Resetting already initialized AvroSerDe");
}
LOG.debug("AvroSerde::initialize(): Preset value of avro.schema.literal == "
+ tableProperties.get(AvroSerdeUtils.AvroTableProperties.SCHEMA_LITERAL.getPropName()));
schema = null;
oi = null;
columnNames = null;
columnTypes = null;
final String columnNameProperty = tableProperties.getProperty(serdeConstants.LIST_COLUMNS);
final String columnTypeProperty = tableProperties.getProperty(serdeConstants.LIST_COLUMN_TYPES);
final String columnCommentProperty = tableProperties.getProperty(LIST_COLUMN_COMMENTS, "");
final String columnNameDelimiter = tableProperties.containsKey(serdeConstants.COLUMN_NAME_DELIMITER)
? tableProperties.getProperty(serdeConstants.COLUMN_NAME_DELIMITER)
: String.valueOf(SerDeUtils.COMMA);
boolean gotColTypesFromColProps = true;
if (hasExternalSchema(tableProperties)
|| columnNameProperty == null || columnNameProperty.isEmpty()
|| columnTypeProperty == null || columnTypeProperty.isEmpty()) {
schema = determineSchemaOrReturnErrorSchema(configuration, tableProperties);
gotColTypesFromColProps = false;
} else {
// Get column names and sort order
columnNames = StringInternUtils.internStringsInList(
Arrays.asList(columnNameProperty.split(columnNameDelimiter)));
columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
schema = getSchemaFromCols(tableProperties, columnNames, columnTypes, columnCommentProperty);
}
tableProperties.setProperty(AvroSerdeUtils.AvroTableProperties.SCHEMA_LITERAL.getPropName(), schema.toString());
LOG.debug("Avro schema is: {}", schema);
if (this.configuration.isPresent()) {
this.configuration.get().set(AvroSerdeUtils.AvroTableProperties.AVRO_SERDE_SCHEMA.getPropName(),
schema.toString(false));
} else {
LOG.debug("Configuration null, not inserting schema");
}
badSchema = (schema == SchemaResolutionProblem.SIGNAL_BAD_SCHEMA);
AvroObjectInspectorGenerator aoig = new AvroObjectInspectorGenerator(schema);
this.columnNames = StringInternUtils.internStringsInList(aoig.getColumnNames());
this.columnTypes = aoig.getColumnTypes();
this.oi = aoig.getObjectInspector();
// HIVE-22595: Update the column/type properties to reflect the current, since the
// these properties may be used
if (!gotColTypesFromColProps) {
LOG.info("Updating column name/type properties based on current schema");
tableProperties.setProperty(serdeConstants.LIST_COLUMNS, String.join(",", columnNames));
tableProperties.setProperty(serdeConstants.LIST_COLUMN_TYPES, String.join(",", TypeInfoUtils.getTypeStringsFromTypeInfo(columnTypes)));
}
if (badSchema) {
throw new SerDeException("Invalid schema reported");
}
this.avroSerializer = new AvroSerializer(configuration);
this.avroDeserializer = new AvroDeserializer(configuration);
}
private boolean hasExternalSchema(Properties properties) {
return properties.getProperty(AvroSerdeUtils.AvroTableProperties.SCHEMA_LITERAL.getPropName()) != null
|| properties.getProperty(AvroSerdeUtils.AvroTableProperties.SCHEMA_URL.getPropName()) != null;
}
private boolean hasExternalSchema(Map tableParams) {
return tableParams.containsKey(AvroSerdeUtils.AvroTableProperties.SCHEMA_LITERAL.getPropName())
|| tableParams.containsKey(AvroSerdeUtils.AvroTableProperties.SCHEMA_URL.getPropName());
}
public static Schema getSchemaFromCols(Properties properties,
List columnNames, List columnTypes, String columnCommentProperty) {
List columnComments;
if (columnCommentProperty == null || columnCommentProperty.isEmpty()) {
columnComments = new ArrayList();
} else {
//Comments are separated by "\0" in columnCommentProperty, see method getSchema
//in MetaStoreUtils where this string columns.comments is generated
columnComments = Arrays.asList(columnCommentProperty.split("\0"));
LOG.debug("columnComments is {}", columnCommentProperty);
}
if (columnNames.size() != columnTypes.size()) {
throw new IllegalArgumentException("AvroSerde initialization failed. Number of column " +
"name and column type differs. columnNames = " + columnNames + ", columnTypes = " +
columnTypes);
}
final String tableName = properties.getProperty(TABLE_NAME);
final String tableComment = properties.getProperty(TABLE_COMMENT);
TypeInfoToSchema typeInfoToSchema = new TypeInfoToSchema();
return typeInfoToSchema.convert(columnNames, columnTypes, columnComments,
properties.getProperty(AvroSerdeUtils.AvroTableProperties.SCHEMA_NAMESPACE.getPropName()),
properties.getProperty(AvroSerdeUtils.AvroTableProperties.SCHEMA_NAME.getPropName(), tableName),
properties.getProperty(AvroSerdeUtils.AvroTableProperties.SCHEMA_DOC.getPropName(), tableComment));
}
/**
* Attempt to determine the schema via the usual means, but do not throw
* an exception if we fail. Instead, signal failure via a special
* schema. This is used because Hive calls init on the serde during
* any call, including calls to update the serde properties, meaning
* if the serde is in a bad state, there is no way to update that state.
*/
private Schema determineSchemaOrReturnErrorSchema(Configuration conf, Properties props) {
try {
return AvroSerdeUtils.determineSchemaOrThrowException(conf, props);
} catch (AvroSerdeException he) {
LOG.warn("Encountered AvroSerdeException determining schema. Returning signal schema to indicate problem",
he);
return SchemaResolutionProblem.SIGNAL_BAD_SCHEMA;
} catch (Exception e) {
LOG.warn("Encountered exception determining schema. Returning signal schema to indicate problem", e);
return SchemaResolutionProblem.SIGNAL_BAD_SCHEMA;
}
}
@Override
public Class extends Writable> getSerializedClass() {
return AvroGenericRecordWritable.class;
}
@Override
public Writable serialize(Object o, ObjectInspector objectInspector) throws SerDeException {
if(badSchema) {
throw new BadSchemaException();
}
return avroSerializer.serialize(o, objectInspector, columnNames, columnTypes, schema);
}
@Override
public Object deserialize(Writable writable) throws SerDeException {
if(badSchema) {
throw new BadSchemaException();
}
return avroDeserializer.deserialize(columnNames, columnTypes, writable, schema);
}
@Override
public ObjectInspector getObjectInspector() throws SerDeException {
return oi;
}
@Override
public boolean shouldStoreFieldsInMetastore(Map tableParams) {
return !hasExternalSchema(tableParams);
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy