All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryArray Maven / Gradle / Ivy

There is a newer version: 4.0.0
Show newest version
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hive.serde2.lazybinary;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef;
import org.apache.hadoop.hive.serde2.lazy.LazyObject;
import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.RecordInfo;
import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.VInt;
import org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryListObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;

/**
 * LazyBinaryArray is serialized as follows: start A b b b b b b end bytes[] ->
 * |--------|---|---|---|---| ... |---|---|
 * 
 * Section A is the null-bytes. Suppose the list has N elements, then there are
 * (N+7)/8 bytes used as null-bytes. Each bit corresponds to an element and it
 * indicates whether that element is null (0) or not null (1).
 * 
 * After A, all b(s) represent the elements of the list. Each of them is again a
 * LazyBinaryObject.
 * 
 */

public class LazyBinaryArray extends
    LazyBinaryNonPrimitive {

  /**
   * Whether the data is already parsed or not.
   */
  boolean parsed = false;
  /**
   * The length of the array. Only valid when the data is parsed.
   */
  int arraySize = 0;

  /**
   * The start positions and lengths of array elements. Only valid when the data
   * is parsed.
   */
  int[] elementStart;
  int[] elementLength;

  /**
   * Whether an element is initialized or not.
   */
  boolean[] elementInited;

  /**
   * Whether an element is null or not. Because length is 0 does not means the
   * field is null. In particular, a 0-length string is not null.
   */
  boolean[] elementIsNull;

  /**
   * The elements of the array. Note that we call arrayElements[i].init(bytes,
   * begin, length) only when that element is accessed.
   */
  LazyBinaryObject[] arrayElements;

  /**
   * Construct a LazyBinaryArray object with the ObjectInspector.
   * 
   * @param oi
   *          the oi representing the type of this LazyBinaryArray
   */
  protected LazyBinaryArray(LazyBinaryListObjectInspector oi) {
    super(oi);
  }

  /**
   * Set the row data for this LazyBinaryArray.
   * 
   * @see LazyObject#init(ByteArrayRef, int, int)
   */
  @Override
  public void init(ByteArrayRef bytes, int start, int length) {
    super.init(bytes, start, length);
    parsed = false;
  }

  /**
   * Enlarge the size of arrays storing information for the elements inside the
   * array.
   */
  private void adjustArraySize(int newSize) {
    if (elementStart == null || elementStart.length < newSize) {
      elementStart = new int[newSize];
      elementLength = new int[newSize];
      elementInited = new boolean[newSize];
      elementIsNull = new boolean[newSize];
      arrayElements = new LazyBinaryObject[newSize];
    }
  }

  VInt vInt = new LazyBinaryUtils.VInt();
  RecordInfo recordInfo = new LazyBinaryUtils.RecordInfo();

  /**
   * Parse the bytes and fill elementStart, elementLength, elementInited and
   * elementIsNull.
   */
  private void parse() {

    byte[] bytes = this.bytes.getData();

    // get the vlong that represents the map size
    LazyBinaryUtils.readVInt(bytes, start, vInt);
    arraySize = vInt.value;
    if (0 == arraySize) {
      parsed = true;
      return;
    }

    // adjust arrays
    adjustArraySize(arraySize);
    // find out the null-bytes
    int arryByteStart = start + vInt.length;
    int nullByteCur = arryByteStart;
    int nullByteEnd = arryByteStart + (arraySize + 7) / 8;
    // the begin the real elements
    int lastElementByteEnd = nullByteEnd;
    // the list element object inspector
    ObjectInspector listEleObjectInspector = ((ListObjectInspector) oi)
        .getListElementObjectInspector();
    // parsing elements one by one
    for (int i = 0; i < arraySize; i++) {
      elementIsNull[i] = true;
      if ((bytes[nullByteCur] & (1 << (i % 8))) != 0) {
        elementIsNull[i] = false;
        LazyBinaryUtils.checkObjectByteInfo(listEleObjectInspector, bytes,
            lastElementByteEnd, recordInfo);
        elementStart[i] = lastElementByteEnd + recordInfo.elementOffset;
        elementLength[i] = recordInfo.elementSize;
        lastElementByteEnd = elementStart[i] + elementLength[i];
      }
      // move onto the next null byte
      if (7 == (i % 8)) {
        nullByteCur++;
      }
    }

    Arrays.fill(elementInited, 0, arraySize, false);
    parsed = true;
  }

  /**
   * Returns the actual primitive object at the index position inside the array
   * represented by this LazyBinaryObject.
   */
  public Object getListElementObject(int index) {
    if (!parsed) {
      parse();
    }
    if (index < 0 || index >= arraySize) {
      return null;
    }
    return uncheckedGetElement(index);
  }

  /**
   * Get the element without checking out-of-bound index.
   * 
   * @param index
   *          index to the array element
   */
  private Object uncheckedGetElement(int index) {

    if (elementIsNull[index]) {
      return null;
    } else {
      if (!elementInited[index]) {
        elementInited[index] = true;
        if (arrayElements[index] == null) {
          arrayElements[index] = LazyBinaryFactory.createLazyBinaryObject((oi)
              .getListElementObjectInspector());
        }
        arrayElements[index].init(bytes, elementStart[index],
            elementLength[index]);
      }
    }
    return arrayElements[index].getObject();
  }

  /**
   * Returns the array size.
   */
  public int getListLength() {
    if (!parsed) {
      parse();
    }
    return arraySize;
  }

  /**
   * cachedList is reused every time getList is called. Different
   * LazyBianryArray instances cannot share the same cachedList.
   */
  ArrayList cachedList;

  /**
   * Returns the List of actual primitive objects. Returns null for null array.
   */
  public List getList() {
    if (!parsed) {
      parse();
    }
    if (cachedList == null) {
      cachedList = new ArrayList(arraySize);
    } else {
      cachedList.clear();
    }
    for (int index = 0; index < arraySize; index++) {
      cachedList.add(uncheckedGetElement(index));
    }
    return cachedList;
  }
}