org.apache.hadoop.hive.serde2.fast.DeserializeRead Maven / Gradle / Ivy
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.serde2.fast;
import java.io.IOException;
import org.apache.hadoop.hive.serde2.io.DateWritable;
import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
import org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable;
import org.apache.hadoop.hive.serde2.io.HiveIntervalYearMonthWritable;
import org.apache.hadoop.hive.serde2.io.TimestampWritable;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
/*
* Directly deserialize with the caller reading field-by-field a serialization format.
*
* The caller is responsible for calling the read method for the right type of each field
* (after calling readNextField).
*
* Reading some fields require a results object to receive value information. A separate
* results object is created by the caller at initialization per different field even for the same
* type.
*
* Some type values are by reference to either bytes in the deserialization buffer or to
* other type specific buffers. So, those references are only valid until the next time set is
* called.
*/
public abstract class DeserializeRead {
protected TypeInfo[] typeInfos;
protected boolean useExternalBuffer;
protected Category[] categories;
protected PrimitiveCategory[] primitiveCategories;
/**
* Constructor.
*
* When useExternalBuffer is specified true and readNextField reads a string/char/varchar/binary
* field, it will request an external buffer to receive the data of format conversion.
*
* if (deserializeRead.readNextField()) {
* if (deserializeRead.currentExternalBufferNeeded) {
*
* deserializeRead.copyToExternalBuffer(externalBuffer, externalBufferStart);
* } else {
*
* }
*
* @param typeInfos
* @param useExternalBuffer Specify true when the caller is prepared to provide a bytes buffer
* to receive a string/char/varchar/binary field that needs format
* conversion.
*/
public DeserializeRead(TypeInfo[] typeInfos, boolean useExternalBuffer) {
this.typeInfos = typeInfos;
final int count = typeInfos.length;
categories = new Category[count];
primitiveCategories = new PrimitiveCategory[count];
for (int i = 0; i < count; i++) {
TypeInfo typeInfo = typeInfos[i];
Category category = typeInfo.getCategory();
categories[i] = category;
if (category == Category.PRIMITIVE) {
PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfo;
PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory();
primitiveCategories[i] = primitiveCategory;
switch (primitiveCategory) {
case DATE:
if (currentDateWritable == null) {
currentDateWritable = new DateWritable();
}
break;
case TIMESTAMP:
if (currentTimestampWritable == null) {
currentTimestampWritable = new TimestampWritable();
}
break;
case INTERVAL_YEAR_MONTH:
if (currentHiveIntervalYearMonthWritable == null) {
currentHiveIntervalYearMonthWritable = new HiveIntervalYearMonthWritable();
}
break;
case INTERVAL_DAY_TIME:
if (currentHiveIntervalDayTimeWritable == null) {
currentHiveIntervalDayTimeWritable = new HiveIntervalDayTimeWritable();
}
break;
case DECIMAL:
if (currentHiveDecimalWritable == null) {
currentHiveDecimalWritable = new HiveDecimalWritable();
}
break;
default:
// No writable needed for this data type.
}
}
this.useExternalBuffer = useExternalBuffer;
}
}
// Don't allow for public.
protected DeserializeRead() {
}
/*
* The type information for all fields.
*/
public TypeInfo[] typeInfos() {
return typeInfos;
}
/*
* Set the range of bytes to be deserialized.
*/
public abstract void set(byte[] bytes, int offset, int length);
/*
* Reads the the next field.
*
* Afterwards, reading is positioned to the next field.
*
* @return Return true when the field was not null and data is put in the appropriate
* current* member.
* Otherwise, false when the field is null.
*
*/
public abstract boolean readNextField() throws IOException;
/*
* Reads through an undesired field.
*
* No data values are valid after this call.
* Designed for skipping columns that are not included.
*/
public abstract void skipNextField() throws IOException;
/*
* Returns true if the readField method is supported;
*/
public boolean isReadFieldSupported() {
return false;
}
/*
* When supported, read a field by field number (i.e. random access).
*
* Currently, only LazySimpleDeserializeRead supports this.
*
* @return Return true when the field was not null and data is put in the appropriate
* current* member.
* Otherwise, false when the field is null.
*/
public boolean readField(int fieldIndex) throws IOException {
throw new RuntimeException("Not supported");
}
/*
* Call this method may be called after all the all fields have been read to check
* for unread fields.
*
* Note that when optimizing reading to stop reading unneeded include columns, worrying
* about whether all data is consumed is not appropriate (often we aren't reading it all by
* design).
*
* Since LazySimpleDeserializeRead parses the line through the last desired column it does
* support this function.
*/
public abstract boolean isEndOfInputReached();
/*
* Get detailed read position information to help diagnose exceptions.
*/
public abstract String getDetailedReadPositionString();
/*
* These members hold the current value that was read when readNextField return false.
*/
/*
* BOOLEAN.
*/
public boolean currentBoolean;
/*
* BYTE.
*/
public byte currentByte;
/*
* SHORT.
*/
public short currentShort;
/*
* INT.
*/
public int currentInt;
/*
* LONG.
*/
public long currentLong;
/*
* FLOAT.
*/
public float currentFloat;
/*
* DOUBLE.
*/
public double currentDouble;
/*
* STRING, CHAR, VARCHAR, and BINARY.
*
* For CHAR and VARCHAR when the caller takes responsibility for
* truncation/padding issues.
*
* When currentExternalBufferNeeded is true, conversion is needed into an external buffer of
* at least currentExternalBufferNeededLen bytes. Use copyToExternalBuffer to get the result.
*
* Otherwise, currentBytes, currentBytesStart, and currentBytesLength are the result.
*/
public boolean currentExternalBufferNeeded;
public int currentExternalBufferNeededLen;
public void copyToExternalBuffer(byte[] externalBuffer, int externalBufferStart) throws IOException {
throw new RuntimeException("Not implemented");
}
public byte[] currentBytes;
public int currentBytesStart;
public int currentBytesLength;
/*
* DATE.
*/
public DateWritable currentDateWritable;
/*
* TIMESTAMP.
*/
public TimestampWritable currentTimestampWritable;
/*
* INTERVAL_YEAR_MONTH.
*/
public HiveIntervalYearMonthWritable currentHiveIntervalYearMonthWritable;
/*
* INTERVAL_DAY_TIME.
*/
public HiveIntervalDayTimeWritable currentHiveIntervalDayTimeWritable;
/*
* DECIMAL.
*/
public HiveDecimalWritable currentHiveDecimalWritable;
}