Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.io.parquet.write;
import org.apache.hadoop.hive.common.type.Date;
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.common.type.Timestamp;
import org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe;
import org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTimeUtils;
import org.apache.hadoop.hive.serde2.io.DateWritableV2;
import org.apache.hadoop.hive.serde2.io.ParquetHiveRecord;
import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveCharObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveVarcharObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
import org.apache.parquet.io.api.Binary;
import org.apache.parquet.io.api.RecordConsumer;
import org.apache.parquet.schema.GroupType;
import org.apache.parquet.schema.OriginalType;
import org.apache.parquet.schema.Type;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.List;
import java.util.Map;
/**
*
* DataWritableWriter sends a record to the Parquet API with the expected schema in order
* to be written to a file.
* This class is only used through DataWritableWriteSupport class.
*/
public class DataWritableWriter {
private static final Logger LOG = LoggerFactory.getLogger(DataWritableWriter.class);
protected final RecordConsumer recordConsumer;
private final GroupType schema;
/* This writer will be created when writing the first row in order to get
information about how to inspect the record data. */
private DataWriter messageWriter;
public DataWritableWriter(final RecordConsumer recordConsumer, final GroupType schema) {
this.recordConsumer = recordConsumer;
this.schema = schema;
}
/**
* It writes a record to Parquet.
* @param record Contains the record that is going to be written.
*/
public void write(final ParquetHiveRecord record) {
if (record != null) {
if (messageWriter == null) {
try {
messageWriter = createMessageWriter(record.getObjectInspector(), schema);
} catch (RuntimeException e) {
String errorMessage = "Parquet record is malformed: " + e.getMessage();
LOG.error(errorMessage, e);
throw new RuntimeException(errorMessage, e);
}
}
messageWriter.write(record.getObject());
}
}
private MessageDataWriter createMessageWriter(StructObjectInspector inspector, GroupType schema) {
return new MessageDataWriter(inspector, schema);
}
/**
* Creates a writer for the specific object inspector. The returned writer will be used
* to call Parquet API for the specific data type.
* @param inspector The object inspector used to get the correct value type.
* @param type Type that contains information about the type schema.
* @return A ParquetWriter object used to call the Parquet API fo the specific data type.
*/
private DataWriter createWriter(ObjectInspector inspector, Type type) {
if (type.isPrimitive()) {
checkInspectorCategory(inspector, ObjectInspector.Category.PRIMITIVE);
PrimitiveObjectInspector primitiveInspector = (PrimitiveObjectInspector)inspector;
switch (primitiveInspector.getPrimitiveCategory()) {
case BOOLEAN:
return new BooleanDataWriter((BooleanObjectInspector)inspector);
case BYTE:
return new ByteDataWriter((ByteObjectInspector)inspector);
case SHORT:
return new ShortDataWriter((ShortObjectInspector)inspector);
case INT:
return new IntDataWriter((IntObjectInspector)inspector);
case LONG:
return new LongDataWriter((LongObjectInspector)inspector);
case FLOAT:
return new FloatDataWriter((FloatObjectInspector)inspector);
case DOUBLE:
return new DoubleDataWriter((DoubleObjectInspector)inspector);
case STRING:
return new StringDataWriter((StringObjectInspector)inspector);
case CHAR:
return new CharDataWriter((HiveCharObjectInspector)inspector);
case VARCHAR:
return new VarcharDataWriter((HiveVarcharObjectInspector)inspector);
case BINARY:
return new BinaryDataWriter((BinaryObjectInspector)inspector);
case TIMESTAMP:
return new TimestampDataWriter((TimestampObjectInspector)inspector);
case DECIMAL:
return new DecimalDataWriter((HiveDecimalObjectInspector)inspector);
case DATE:
return new DateDataWriter((DateObjectInspector)inspector);
default:
throw new IllegalArgumentException("Unsupported primitive data type: " + primitiveInspector.getPrimitiveCategory());
}
} else {
GroupType groupType = type.asGroupType();
OriginalType originalType = type.getOriginalType();
if (originalType != null && originalType.equals(OriginalType.LIST)) {
checkInspectorCategory(inspector, ObjectInspector.Category.LIST);
return new ListDataWriter((ListObjectInspector)inspector, groupType);
} else if (originalType != null && originalType.equals(OriginalType.MAP)) {
checkInspectorCategory(inspector, ObjectInspector.Category.MAP);
return new MapDataWriter((MapObjectInspector)inspector, groupType);
} else {
checkInspectorCategory(inspector, ObjectInspector.Category.STRUCT);
return new StructDataWriter((StructObjectInspector)inspector, groupType);
}
}
}
/**
* Checks that an inspector matches the category indicated as a parameter.
* @param inspector The object inspector to check
* @param category The category to match
* @throws IllegalArgumentException if inspector does not match the category
*/
private void checkInspectorCategory(ObjectInspector inspector, ObjectInspector.Category category) {
if (!inspector.getCategory().equals(category)) {
throw new IllegalArgumentException("Invalid data type: expected " + category
+ " type, but found: " + inspector.getCategory());
}
}
private interface DataWriter {
void write(Object value);
}
private class GroupDataWriter implements DataWriter {
private StructObjectInspector inspector;
private List structFields;
private DataWriter[] structWriters;
public GroupDataWriter(StructObjectInspector inspector, GroupType groupType) {
this.inspector = inspector;
structFields = this.inspector.getAllStructFieldRefs();
structWriters = new DataWriter[structFields.size()];
for (int i = 0; i < structFields.size(); i++) {
StructField field = structFields.get(i);
structWriters[i] = createWriter(field.getFieldObjectInspector(), groupType.getType(i));
}
}
@Override
public void write(Object value) {
for (int i = 0; i < structFields.size(); i++) {
StructField field = structFields.get(i);
Object fieldValue = inspector.getStructFieldData(value, field);
if (fieldValue != null) {
String fieldName = field.getFieldName();
DataWriter writer = structWriters[i];
recordConsumer.startField(fieldName, i);
writer.write(fieldValue);
recordConsumer.endField(fieldName, i);
}
}
}
}
private class MessageDataWriter extends GroupDataWriter implements DataWriter {
public MessageDataWriter(StructObjectInspector inspector, GroupType groupType) {
super(inspector, groupType);
}
@Override
public void write(Object value) {
recordConsumer.startMessage();
if (value != null) {
super.write(value);
}
recordConsumer.endMessage();
}
}
private class StructDataWriter extends GroupDataWriter implements DataWriter {
public StructDataWriter(StructObjectInspector inspector, GroupType groupType) {
super(inspector, groupType);
}
@Override
public void write(Object value) {
recordConsumer.startGroup();
super.write(value);
recordConsumer.endGroup();
}
}
private class ListDataWriter implements DataWriter {
private ListObjectInspector inspector;
private String elementName;
private DataWriter elementWriter;
private String repeatedGroupName;
public ListDataWriter(ListObjectInspector inspector, GroupType groupType) {
this.inspector = inspector;
// Get the internal array structure
GroupType repeatedType = groupType.getType(0).asGroupType();
this.repeatedGroupName = repeatedType.getName();
Type elementType = repeatedType.getType(0);
this.elementName = elementType.getName();
ObjectInspector elementInspector = this.inspector.getListElementObjectInspector();
this.elementWriter = createWriter(elementInspector, elementType);
}
@Override
public void write(Object value) {
recordConsumer.startGroup();
int listLength = inspector.getListLength(value);
if (listLength > 0) {
recordConsumer.startField(repeatedGroupName, 0);
for (int i = 0; i < listLength; i++) {
Object element = inspector.getListElement(value, i);
recordConsumer.startGroup();
if (element != null) {
recordConsumer.startField(elementName, 0);
elementWriter.write(element);
recordConsumer.endField(elementName, 0);
}
recordConsumer.endGroup();
}
recordConsumer.endField(repeatedGroupName, 0);
}
recordConsumer.endGroup();
}
}
private class MapDataWriter implements DataWriter {
private MapObjectInspector inspector;
private String repeatedGroupName;
private String keyName, valueName;
private DataWriter keyWriter, valueWriter;
public MapDataWriter(MapObjectInspector inspector, GroupType groupType) {
this.inspector = inspector;
// Get the internal map structure (MAP_KEY_VALUE)
GroupType repeatedType = groupType.getType(0).asGroupType();
this.repeatedGroupName = repeatedType.getName();
// Get key element information
Type keyType = repeatedType.getType(0);
ObjectInspector keyInspector = this.inspector.getMapKeyObjectInspector();
this.keyName = keyType.getName();
this.keyWriter = createWriter(keyInspector, keyType);
// Get value element information
Type valuetype = repeatedType.getType(1);
ObjectInspector valueInspector = this.inspector.getMapValueObjectInspector();
this.valueName = valuetype.getName();
this.valueWriter = createWriter(valueInspector, valuetype);
}
@Override
public void write(Object value) {
recordConsumer.startGroup();
Map mapValues = inspector.getMap(value);
if (mapValues != null && mapValues.size() > 0) {
recordConsumer.startField(repeatedGroupName, 0);
for (Map.Entry keyValue : mapValues.entrySet()) {
recordConsumer.startGroup();
if (keyValue != null) {
// write key element
Object keyElement = keyValue.getKey();
recordConsumer.startField(keyName, 0);
keyWriter.write(keyElement);
recordConsumer.endField(keyName, 0);
// write value element
Object valueElement = keyValue.getValue();
if (valueElement != null) {
recordConsumer.startField(valueName, 1);
valueWriter.write(valueElement);
recordConsumer.endField(valueName, 1);
}
}
recordConsumer.endGroup();
}
recordConsumer.endField(repeatedGroupName, 0);
}
recordConsumer.endGroup();
}
}
private class BooleanDataWriter implements DataWriter {
private BooleanObjectInspector inspector;
public BooleanDataWriter(BooleanObjectInspector inspector) {
this.inspector = inspector;
}
@Override
public void write(Object value) {
recordConsumer.addBoolean(inspector.get(value));
}
}
private class ByteDataWriter implements DataWriter {
private ByteObjectInspector inspector;
public ByteDataWriter(ByteObjectInspector inspector) {
this.inspector = inspector;
}
@Override
public void write(Object value) {
recordConsumer.addInteger(inspector.get(value));
}
}
private class ShortDataWriter implements DataWriter {
private ShortObjectInspector inspector;
public ShortDataWriter(ShortObjectInspector inspector) {
this.inspector = inspector;
}
@Override
public void write(Object value) {
recordConsumer.addInteger(inspector.get(value));
}
}
private class IntDataWriter implements DataWriter {
private IntObjectInspector inspector;
public IntDataWriter(IntObjectInspector inspector) {
this.inspector = inspector;
}
@Override
public void write(Object value) {
recordConsumer.addInteger(inspector.get(value));
}
}
private class LongDataWriter implements DataWriter {
private LongObjectInspector inspector;
public LongDataWriter(LongObjectInspector inspector) {
this.inspector = inspector;
}
@Override
public void write(Object value) {
recordConsumer.addLong(inspector.get(value));
}
}
private class FloatDataWriter implements DataWriter {
private FloatObjectInspector inspector;
public FloatDataWriter(FloatObjectInspector inspector) {
this.inspector = inspector;
}
@Override
public void write(Object value) {
recordConsumer.addFloat(inspector.get(value));
}
}
private class DoubleDataWriter implements DataWriter {
private DoubleObjectInspector inspector;
public DoubleDataWriter(DoubleObjectInspector inspector) {
this.inspector = inspector;
}
@Override
public void write(Object value) {
recordConsumer.addDouble(inspector.get(value));
}
}
private class StringDataWriter implements DataWriter {
private StringObjectInspector inspector;
public StringDataWriter(StringObjectInspector inspector) {
this.inspector = inspector;
}
@Override
public void write(Object value) {
String v = inspector.getPrimitiveJavaObject(value);
recordConsumer.addBinary(Binary.fromString(v));
}
}
private class CharDataWriter implements DataWriter {
private HiveCharObjectInspector inspector;
public CharDataWriter(HiveCharObjectInspector inspector) {
this.inspector = inspector;
}
@Override
public void write(Object value) {
String v = inspector.getPrimitiveJavaObject(value).getStrippedValue();
recordConsumer.addBinary(Binary.fromString(v));
}
}
private class VarcharDataWriter implements DataWriter {
private HiveVarcharObjectInspector inspector;
public VarcharDataWriter(HiveVarcharObjectInspector inspector) {
this.inspector = inspector;
}
@Override
public void write(Object value) {
String v = inspector.getPrimitiveJavaObject(value).getValue();
recordConsumer.addBinary(Binary.fromString(v));
}
}
private class BinaryDataWriter implements DataWriter {
private BinaryObjectInspector inspector;
public BinaryDataWriter(BinaryObjectInspector inspector) {
this.inspector = inspector;
}
@Override
public void write(Object value) {
byte[] vBinary = inspector.getPrimitiveJavaObject(value);
recordConsumer.addBinary(Binary.fromByteArray(vBinary));
}
}
private class TimestampDataWriter implements DataWriter {
private TimestampObjectInspector inspector;
public TimestampDataWriter(TimestampObjectInspector inspector) {
this.inspector = inspector;
}
@Override
public void write(Object value) {
Timestamp ts = inspector.getPrimitiveJavaObject(value);
recordConsumer.addBinary(NanoTimeUtils.getNanoTime(ts, false).toBinary());
}
}
private class DecimalDataWriter implements DataWriter {
private HiveDecimalObjectInspector inspector;
public DecimalDataWriter(HiveDecimalObjectInspector inspector) {
this.inspector = inspector;
}
@Override
public void write(Object value) {
HiveDecimal vDecimal = inspector.getPrimitiveJavaObject(value);
DecimalTypeInfo decTypeInfo = (DecimalTypeInfo)inspector.getTypeInfo();
recordConsumer.addBinary(decimalToBinary(vDecimal, decTypeInfo));
}
private Binary decimalToBinary(final HiveDecimal hiveDecimal, final DecimalTypeInfo decimalTypeInfo) {
int prec = decimalTypeInfo.precision();
int scale = decimalTypeInfo.scale();
byte[] decimalBytes = hiveDecimal.bigIntegerBytesScaled(scale);
// Estimated number of bytes needed.
int precToBytes = ParquetHiveSerDe.PRECISION_TO_BYTE_COUNT[prec - 1];
if (precToBytes == decimalBytes.length) {
// No padding needed.
return Binary.fromByteArray(decimalBytes);
}
byte[] tgt = new byte[precToBytes];
if (hiveDecimal.signum() == -1) {
// For negative number, initializing bits to 1
for (int i = 0; i < precToBytes; i++) {
tgt[i] |= 0xFF;
}
}
System.arraycopy(decimalBytes, 0, tgt, precToBytes - decimalBytes.length, decimalBytes.length); // Padding leading zeroes/ones.
return Binary.fromByteArray(tgt);
}
}
private class DateDataWriter implements DataWriter {
private DateObjectInspector inspector;
public DateDataWriter(DateObjectInspector inspector) {
this.inspector = inspector;
}
@Override
public void write(Object value) {
Date vDate = inspector.getPrimitiveJavaObject(value);
recordConsumer.addInteger(DateWritableV2.dateToDays(vDate));
}
}
}