org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryArray Maven / Gradle / Ivy
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.serde2.lazybinary;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef;
import org.apache.hadoop.hive.serde2.lazy.LazyObject;
import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.RecordInfo;
import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.VInt;
import org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryListObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
/**
* LazyBinaryArray is serialized as follows: start A b b b b b b end bytes[] ->
* |--------|---|---|---|---| ... |---|---|
*
* Section A is the null-bytes. Suppose the list has N elements, then there are
* (N+7)/8 bytes used as null-bytes. Each bit corresponds to an element and it
* indicates whether that element is null (0) or not null (1).
*
* After A, all b(s) represent the elements of the list. Each of them is again a
* LazyBinaryObject.
*
*/
public class LazyBinaryArray extends
LazyBinaryNonPrimitive {
/**
* Whether the data is already parsed or not.
*/
boolean parsed = false;
/**
* The length of the array. Only valid when the data is parsed.
*/
int arraySize = 0;
/**
* The start positions and lengths of array elements. Only valid when the data
* is parsed.
*/
int[] elementStart;
int[] elementLength;
/**
* Whether an element is initialized or not.
*/
boolean[] elementInited;
/**
* Whether an element is null or not. Because length is 0 does not means the
* field is null. In particular, a 0-length string is not null.
*/
boolean[] elementIsNull;
/**
* The elements of the array. Note that we call arrayElements[i].init(bytes,
* begin, length) only when that element is accessed.
*/
LazyBinaryObject[] arrayElements;
/**
* Construct a LazyBinaryArray object with the ObjectInspector.
*
* @param oi
* the oi representing the type of this LazyBinaryArray
*/
protected LazyBinaryArray(LazyBinaryListObjectInspector oi) {
super(oi);
}
/**
* Set the row data for this LazyBinaryArray.
*
* @see LazyObject#init(ByteArrayRef, int, int)
*/
@Override
public void init(ByteArrayRef bytes, int start, int length) {
super.init(bytes, start, length);
parsed = false;
}
/**
* Enlarge the size of arrays storing information for the elements inside the
* array.
*/
private void adjustArraySize(int newSize) {
if (elementStart == null || elementStart.length < newSize) {
elementStart = new int[newSize];
elementLength = new int[newSize];
elementInited = new boolean[newSize];
elementIsNull = new boolean[newSize];
arrayElements = new LazyBinaryObject[newSize];
}
}
VInt vInt = new LazyBinaryUtils.VInt();
RecordInfo recordInfo = new LazyBinaryUtils.RecordInfo();
/**
* Parse the bytes and fill elementStart, elementLength, elementInited and
* elementIsNull.
*/
private void parse() {
byte[] bytes = this.bytes.getData();
// get the vlong that represents the map size
LazyBinaryUtils.readVInt(bytes, start, vInt);
arraySize = vInt.value;
if (0 == arraySize) {
parsed = true;
return;
}
// adjust arrays
adjustArraySize(arraySize);
// find out the null-bytes
int arryByteStart = start + vInt.length;
int nullByteCur = arryByteStart;
int nullByteEnd = arryByteStart + (arraySize + 7) / 8;
// the begin the real elements
int lastElementByteEnd = nullByteEnd;
// the list element object inspector
ObjectInspector listEleObjectInspector = ((ListObjectInspector) oi)
.getListElementObjectInspector();
// parsing elements one by one
for (int i = 0; i < arraySize; i++) {
elementIsNull[i] = true;
if ((bytes[nullByteCur] & (1 << (i % 8))) != 0) {
elementIsNull[i] = false;
LazyBinaryUtils.checkObjectByteInfo(listEleObjectInspector, bytes,
lastElementByteEnd, recordInfo);
elementStart[i] = lastElementByteEnd + recordInfo.elementOffset;
elementLength[i] = recordInfo.elementSize;
lastElementByteEnd = elementStart[i] + elementLength[i];
}
// move onto the next null byte
if (7 == (i % 8)) {
nullByteCur++;
}
}
Arrays.fill(elementInited, 0, arraySize, false);
parsed = true;
}
/**
* Returns the actual primitive object at the index position inside the array
* represented by this LazyBinaryObject.
*/
public Object getListElementObject(int index) {
if (!parsed) {
parse();
}
if (index < 0 || index >= arraySize) {
return null;
}
return uncheckedGetElement(index);
}
/**
* Get the element without checking out-of-bound index.
*
* @param index
* index to the array element
*/
private Object uncheckedGetElement(int index) {
if (elementIsNull[index]) {
return null;
} else {
if (!elementInited[index]) {
elementInited[index] = true;
if (arrayElements[index] == null) {
arrayElements[index] = LazyBinaryFactory.createLazyBinaryObject((oi)
.getListElementObjectInspector());
}
arrayElements[index].init(bytes, elementStart[index],
elementLength[index]);
}
}
return arrayElements[index].getObject();
}
/**
* Returns the array size.
*/
public int getListLength() {
if (!parsed) {
parse();
}
return arraySize;
}
/**
* cachedList is reused every time getList is called. Different
* LazyBianryArray instances cannot share the same cachedList.
*/
ArrayList