All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hive.ql.exec.persistence.FlatRowContainer Maven / Gradle / Ivy

There is a newer version: 4.0.0
Show newest version
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hive.ql.exec.persistence;

import java.io.ObjectOutputStream;
import java.util.AbstractCollection;
import java.util.AbstractList;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.ListIterator;
import java.util.NoSuchElementException;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.serde2.SerDe;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.io.ShortWritable;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.Writable;

@SuppressWarnings("deprecation")
public class FlatRowContainer extends AbstractCollection
    implements MapJoinRowContainer, AbstractRowContainer.RowIterator>, List {
  private static final Object[] EMPTY_OBJECT_ARRAY = new Object[0];
  private static final int UNKNOWN = Integer.MAX_VALUE;

  private static Log LOG = LogFactory.getLog(FlatRowContainer.class);

  /**
   * In lazy mode, 0s element contains context for deserialization and all the other
   * elements contains byte arrays to be deserialized. After deserialization, the array
   * contains row count * row size elements - a matrix of rows stored.
   */
  private Object[] array;
  /**
   * This is kind of tricky. UNKNOWN number means unknown. Other positive numbers represent
   * row length (see array javadoc). Non-positive numbers mean row length is zero (thus,
   * array is empty); they represent (negated) number of rows (for joins w/o projections).
   */
  private int rowLength = UNKNOWN;
  private byte aliasFilter = (byte) 0xff;
  private boolean isAliasFilterSet = true; // by default assume no filter tag so we are good

  public FlatRowContainer() {
    this.array = EMPTY_OBJECT_ARRAY;
  }

  /** Called when loading the hashtable. */
  public void add(MapJoinObjectSerDeContext context,
      BytesWritable value) throws HiveException {
    SerDe serde = context.getSerDe();
    isAliasFilterSet = !context.hasFilterTag(); // has tag => need to set later
    if (rowLength == UNKNOWN) {
      try {
        rowLength = ObjectInspectorUtils.getStructSize(serde.getObjectInspector());
      } catch (SerDeException ex) {
        throw new HiveException("Get structure size error", ex);
      }
      if (rowLength == 0) {
        array = EMPTY_OBJECT_ARRAY;
      }
    }
    if (rowLength > 0) {
      int rowCount = (array.length / rowLength);
      listRealloc(array.length + rowLength);
      read(serde, value, rowCount);
    } else {
      --rowLength; // see rowLength javadoc
    }
  }

  // Implementation of AbstractRowContainer and assorted methods

  @Override
  public void addRow(List t) throws HiveException {
    LOG.debug("Add is called with " + t.size() + " objects");
    // This is not called when building HashTable; we don't expect it to be called ever.
    int offset = prepareForAdd(t.size());
    if (offset < 0) return;
    for (int i = 0; i < t.size(); ++i) {
      this.array[offset + i] = t.get(i);
    }
  }

  @Override
  public void addRow(Object[] value) throws HiveException {
    LOG.debug("Add is called with " + value.length + " objects");
    // This is not called when building HashTable; we don't expect it to be called ever.
    int offset = prepareForAdd(value.length);
    if (offset < 0) return;
    System.arraycopy(value, 0, this.array, offset, value.length);
  }

  private int prepareForAdd(int len) throws HiveException {
    if (rowLength < 0) {
      if (len != 0) {
        throw new HiveException("Different size rows: 0 and " + len);
      }
      --rowLength; // see rowLength javadoc
      return -1;
    }
    if (rowLength != len) {
      throw new HiveException("Different size rows: " + rowLength + " and " + len);
    }
    int oldLen = this.array.length;
    listRealloc(oldLen + len);
    return oldLen;
  }

  @Override
  public void write(MapJoinObjectSerDeContext valueContext, ObjectOutputStream out) {
    throw new UnsupportedOperationException(this.getClass().getName() + " cannot be serialized");
  }

  @Override
  public AbstractRowContainer.RowIterator> rowIter() throws HiveException {
    if (array.length == rowLength) {
      // optimize for common case - just one row for a key, container acts as iterator
      return this;
    }
    return rowLength > 0 ? new RowIterator() : new EmptyRowIterator(-rowLength);
  }

  @Override
  public List first() throws HiveException {
    if (array.length != rowLength) {
      throw new AssertionError("Incorrect iterator usage, not single-row");
    }
    return this; // optimize for common case - just one row for a key, container acts as row
  }

  @Override
  public List next() {
    return null; // single-row case, there's no next
  }

  /** Iterator for row length 0. */
  private static class EmptyRowIterator implements AbstractRowContainer.RowIterator> {
    private static final List EMPTY_ROW = new ArrayList();
    private int rowCount;
    public EmptyRowIterator(int rowCount) {
      this.rowCount = rowCount;
    }

    @Override
    public List first() throws HiveException {
      return next();
    }

    @Override
    public List next() throws HiveException {
      return (--rowCount < 0) ? null : EMPTY_ROW;
    }
  }

  /** Row iterator for non-zero-length rows. */
  private class RowIterator implements AbstractRowContainer.RowIterator> {
    private int index = 0;

    @Override
    public List first() throws HiveException {
      index = 0;
      if (array.length > 0) {
        return new ReadOnlySubList(0, rowLength);
      }
      return null;
    }

    @Override
    public List next() {
      index += rowLength;
      if (index < array.length) {
        return new ReadOnlySubList(index, rowLength);
      }
      return null;
    }
  }

  private void read(SerDe serde, Writable writable, int rowOffset) throws HiveException {
    try {
      ObjectInspectorUtils.copyStructToArray(
          serde.deserialize(writable), serde.getObjectInspector(),
          ObjectInspectorCopyOption.WRITABLE, this.array, rowOffset * rowLength);
    } catch (SerDeException ex) {
      throw new HiveException("Lazy deserialize error", ex);
    }
  }

  @Override
  public boolean hasRows() throws HiveException {
    return rowCount() > 0;
  }

  @Override
  public boolean isSingleRow() throws HiveException {
    return rowCount() == 1;
  }

  @Override
  public int rowCount() throws HiveException {
    return rowLength > 0 ? (array.length / rowLength) : -rowLength; // see rowLength javadoc
  }

  @Override
  public void clearRows() {
    array = EMPTY_OBJECT_ARRAY;
    rowLength = 0;
  }

  @Override
  public byte getAliasFilter() throws HiveException {
    ensureAliasFilter();
    return this.aliasFilter;
  }

  private void ensureAliasFilter() throws HiveException {
    if (!isAliasFilterSet && rowLength > 0) {
      for (int offset = rowLength - 1; offset < array.length; offset += rowLength) {
        aliasFilter &= ((ShortWritable)array[offset]).get();
      }
    }
    isAliasFilterSet = true;
  }

  @Override
  public MapJoinRowContainer copy() throws HiveException {
    FlatRowContainer result = new FlatRowContainer();
    result.array = new Object[this.array.length];
    System.arraycopy(this.array, 0, result.array, 0, this.array.length);
    result.rowLength = rowLength;
    result.aliasFilter = aliasFilter;
    return result;
  }

  // Implementation of List and assorted methods

  private void listRealloc(int length) {
    Object[] array = new Object[length];
    if (this.array.length > 0) {
      System.arraycopy(this.array, 0, array, 0, this.array.length);
    }
    this.array = array;
  }

  @Override
  public int size() {
    checkSingleRow();
    return array.length;
  }

  @Override
  public Object get(int index) {
    return array[index];
  }

  private class ReadOnlySubList extends AbstractList {
    private int offset;
    private int size;

    ReadOnlySubList(int from, int size) {
      this.offset = from;
      this.size = size;
    }

    public Object get(int index) {
      return array[index + offset];
    }

    public int size() {
      return size;
    }

    public Iterator iterator() {
      return listIterator();
    }

    public ListIterator listIterator(int index) {
      return listIteratorInternal(offset + index, offset, offset + size);
    }

    public List subList(int fromIndex, int toIndex) {
      return new ReadOnlySubList(offset + fromIndex, toIndex - fromIndex);
    }

    public Object[] toArray() {
      Object[] result = new Object[size];
      System.arraycopy(array, offset, result, 0, size);
      return result;
    }
  } // end ReadOnlySubList

  @Override
  public Object[] toArray() {
    checkSingleRow();
    return array;
  }

  @Override
  public Iterator iterator() {
    return listIterator();
  }

  @Override
  public ListIterator listIterator() {
    return listIterator(0);
  }

  @Override
  public ListIterator listIterator(final int index) {
    checkSingleRow();
    return listIteratorInternal(index, 0, array.length);
  }

  private ListIterator listIteratorInternal(
      final int index, final int iterMinPos, final int iterMaxPos) {
    return new ListIterator() {
      private int pos = index - 1;
      public int nextIndex() {
        return pos + 1;
      }
      public int previousIndex() {
        return pos - 1;
      }
      public boolean hasNext() {
        return nextIndex() < iterMaxPos;
      }
      public boolean hasPrevious() {
        return previousIndex() >= iterMinPos;
      }
      public Object next() {
        if (!hasNext()) throw new NoSuchElementException();
        return get(++pos);
      }
      public Object previous() {
        if (!hasPrevious()) throw new NoSuchElementException();
        return get(--pos);
      }

      public void remove() { throw new UnsupportedOperationException(); }
      public void set(Object e) { throw new UnsupportedOperationException(); }
      public void add(Object e) { throw new UnsupportedOperationException(); }
    }; // end ListIterator
  }

  @Override
  public int indexOf(Object o) {
    checkSingleRow();
    for (int i = 0; i < array.length; ++i) {
      if (o == null) {
        if (array[i] == null) return i;
      } else {
        if (o.equals(array[i])) return i;
      }
    }
    return -1;
  }

  private void checkSingleRow() throws AssertionError {
    if (array.length != rowLength) {
      throw new AssertionError("Incorrect list usage, not single-row");
    }
  }

  @Override
  public int lastIndexOf(Object o) {
    checkSingleRow();
    for (int i = array.length - 1; i >= 0; --i) {
      if (o == null) {
        if (array[i] == null) return i;
      } else {
        if (o.equals(array[i])) return i;
      }
    }
    return -1;
  }

  @Override
  public List subList(int fromIndex, int toIndex) {
    checkSingleRow();
    return new ReadOnlySubList(fromIndex, toIndex - fromIndex);
  }

  public boolean addAll(int index, Collection c) {
    throw new UnsupportedOperationException();
  }
  public Object set(int index, Object element) { throw new UnsupportedOperationException(); }
  public void add(int index, Object element) { throw new UnsupportedOperationException(); }
  public Object remove(int index) { throw new UnsupportedOperationException(); }
}