org.apache.hadoop.hive.serde2.lazy.LazyArray Maven / Gradle / Ivy
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.serde2.lazy;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyListObjectInspector;
/**
* LazyArray stores an array of Lazy Objects.
*
* LazyArray does not deal with the case of a NULL array. That is handled by the
* parent LazyObject.
*/
public class LazyArray extends LazyNonPrimitive {
/**
* Whether the data is already parsed or not.
*/
boolean parsed = false;
/**
* The length of the array. Only valid when the data is parsed. -1 when the
* array is NULL.
*/
int arrayLength = 0;
/**
* The start positions of array elements. Only valid when the data is parsed.
* Note that startPosition[arrayLength] = begin + length + 1; that makes sure
* we can use the same formula to compute the length of each element of the
* array.
*/
int[] startPosition;
/**
* Whether init() has been called on the element or not.
*/
boolean[] elementInited;
/**
* The elements of the array. Note that we do arrayElements[i]. init(bytes,
* begin, length) only when that element is accessed.
*/
LazyObject[] arrayElements;
/**
* Construct a LazyArray object with the ObjectInspector.
*
* @param oi
* the oi representing the type of this LazyArray as well as meta
* information like separator etc.
*/
protected LazyArray(LazyListObjectInspector oi) {
super(oi);
}
/**
* Set the row data for this LazyArray.
*
* @see LazyObject#init(ByteArrayRef, int, int)
*/
@Override
public void init(ByteArrayRef bytes, int start, int length) {
super.init(bytes, start, length);
parsed = false;
cachedList = null;
}
/**
* Enlarge the size of arrays storing information for the elements inside the
* array.
*/
private void enlargeArrays() {
if (startPosition == null) {
int initialSize = 2;
startPosition = new int[initialSize];
arrayElements = new LazyObject[initialSize];
elementInited = new boolean[initialSize];
} else {
startPosition = Arrays.copyOf(startPosition, startPosition.length * 2);
arrayElements = Arrays.copyOf(arrayElements, arrayElements.length * 2);
elementInited = Arrays.copyOf(elementInited, elementInited.length * 2);
}
}
/**
* Parse the bytes and fill arrayLength and startPosition.
*/
private void parse() {
parsed = true;
byte separator = oi.getSeparator();
boolean isEscaped = oi.isEscaped();
byte escapeChar = oi.getEscapeChar();
// empty array?
if (length == 0) {
arrayLength = 0;
return;
}
byte[] bytes = this.bytes.getData();
arrayLength = 0;
int arrayByteEnd = start + length;
int elementByteBegin = start;
int elementByteEnd = start;
// Go through all bytes in the byte[]
while (elementByteEnd <= arrayByteEnd) {
// Reached the end of a field?
if (elementByteEnd == arrayByteEnd || bytes[elementByteEnd] == separator) {
// Array size not big enough?
if (startPosition == null || arrayLength + 1 == startPosition.length) {
enlargeArrays();
}
startPosition[arrayLength] = elementByteBegin;
arrayLength++;
elementByteBegin = elementByteEnd + 1;
elementByteEnd++;
} else {
if (isEscaped && bytes[elementByteEnd] == escapeChar
&& elementByteEnd + 1 < arrayByteEnd) {
// ignore the char after escape_char
elementByteEnd += 2;
} else {
elementByteEnd++;
}
}
}
// Store arrayByteEnd+1 in startPosition[arrayLength]
// so that we can use the same formula to compute the length of
// each element in the array: startPosition[i+1] - startPosition[i] - 1
startPosition[arrayLength] = arrayByteEnd + 1;
if (arrayLength > 0) {
Arrays.fill(elementInited, 0, arrayLength, false);
}
}
/**
* Returns the actual primitive object at the index position inside the array
* represented by this LazyObject.
*/
public Object getListElementObject(int index) {
if (!parsed) {
parse();
}
if (index < 0 || index >= arrayLength) {
return null;
}
return uncheckedGetElement(index);
}
/**
* Get the element without checking out-of-bound index.
*/
private Object uncheckedGetElement(int index) {
if (elementInited[index]) {
return arrayElements[index].getObject();
}
elementInited[index] = true;
int elementStart = startPosition[index];
int elementLength = startPosition[index + 1] - elementStart - 1;
if (arrayElements[index] == null) {
arrayElements[index] = LazyFactory.createLazyObject(oi.getListElementObjectInspector());
}
if (isNull(oi.getNullSequence(), bytes, elementStart, elementLength)) {
arrayElements[index].setNull();
} else {
arrayElements[index].init(bytes, elementStart, elementLength);
}
return arrayElements[index].getObject();
}
/**
* Returns -1 for null array.
*/
public int getListLength() {
if (!parsed) {
parse();
}
return arrayLength;
}
/**
* cachedList is reused every time getList is called. Different LazyArray
* instances cannot share the same cachedList.
*/
ArrayList