org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinBaseOperator Maven / Gradle / Ivy

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hive.ql.exec.vector;

import java.util.Collection;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.Future;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
import org.apache.hadoop.hive.ql.exec.persistence.HybridHashTableContainer;
import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer;
import org.apache.hadoop.hive.ql.exec.persistence.ObjectContainer;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.plan.MapJoinDesc;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.io.DataOutputBuffer;

/**
 * The *NON-NATIVE* base vector map join operator class used by VectorMapJoinOperator and
 * VectorMapJoinOuterFilteredOperator.
 *
 * It has common variables and code for the output batch, Hybrid Grace spill batch, and more.
 */
public class VectorMapJoinBaseOperator extends MapJoinOperator implements VectorizationContextRegion {

  private static final Log LOG = LogFactory.getLog(VectorMapJoinBaseOperator.class.getName());

  private static final long serialVersionUID = 1L;

  protected VectorizationContext vOutContext;

  // The above members are initialized by the constructor and must not be
  // transient.
  //---------------------------------------------------------------------------

  protected transient VectorizedRowBatch outputBatch;
  protected transient VectorizedRowBatch scratchBatch;  // holds restored (from disk) big table rows

  protected transient Map outputVectorAssignRowMap;

  protected transient VectorizedRowBatchCtx vrbCtx = null;

  protected transient int tag;  // big table alias

  public VectorMapJoinBaseOperator() {
    super();
  }

  public VectorMapJoinBaseOperator (VectorizationContext vContext, OperatorDesc conf)
    throws HiveException {
    super();

    MapJoinDesc desc = (MapJoinDesc) conf;
    this.conf = desc;

    order = desc.getTagOrder();
    numAliases = desc.getExprs().size();
    posBigTable = (byte) desc.getPosBigTable();
    filterMaps = desc.getFilterMap();
    noOuterJoin = desc.isNoOuterJoin();

     // We are making a new output vectorized row batch.
    vOutContext = new VectorizationContext(getName(), desc.getOutputColumnNames());
  }

  @Override
  public Collection> initializeOp(Configuration hconf) throws HiveException {

    Collection> result = super.initializeOp(hconf);

    vrbCtx = new VectorizedRowBatchCtx();
    vrbCtx.init(vOutContext.getScratchColumnTypeMap(), (StructObjectInspector) this.outputObjInspector);

    outputBatch = vrbCtx.createVectorizedRowBatch();

    outputVectorAssignRowMap = new HashMap();

    return result;
  }

  /**
   * 'forwards' the (row-mode) record into the (vectorized) output batch
   */
  @Override
  protected void internalForward(Object row, ObjectInspector outputOI) throws HiveException {
    Object[] values = (Object[]) row;
    VectorAssignRowSameBatch va = outputVectorAssignRowMap.get(outputOI);
    if (va == null) {
      va = new VectorAssignRowSameBatch();
      va.init((StructObjectInspector) outputOI, vOutContext.getProjectedColumns());
      va.setOneBatch(outputBatch);
      outputVectorAssignRowMap.put(outputOI, va);
    }

    va.assignRow(outputBatch.size, values);

    ++outputBatch.size;
    if (outputBatch.size == VectorizedRowBatch.DEFAULT_SIZE) {
      flushOutput();
    }
  }

  private void flushOutput() throws HiveException {
    forward(outputBatch, null);
    outputBatch.reset();
  }

  @Override
  public void closeOp(boolean aborted) throws HiveException {
    super.closeOp(aborted);
    for (MapJoinTableContainer tableContainer : mapJoinTables) {
      if (tableContainer != null) {
        tableContainer.dumpMetrics();
      }
    }
    if (!aborted && 0 < outputBatch.size) {
      flushOutput();
    }
  }

  /**
   * For a vectorized row batch from the rows feed from the super MapJoinOperator.
   */
  @Override
  protected void reProcessBigTable(int partitionId)
      throws HiveException {

    if (scratchBatch == null) {
      // The process method was not called -- no big table rows.
      return;
    }

    HybridHashTableContainer.HashPartition partition = firstSmallTable.getHashPartitions()[partitionId];
    ObjectContainer bigTable = partition.getMatchfileObjContainer();

    DataOutputBuffer dataOutputBuffer = new DataOutputBuffer();
    while (bigTable.hasNext()) {
      Object row = bigTable.next();
      VectorizedBatchUtil.addProjectedRowToBatchFrom(row,
          (StructObjectInspector) inputObjInspectors[posBigTable],
          scratchBatch.size, scratchBatch, dataOutputBuffer);
      scratchBatch.size++;

      if (scratchBatch.size == VectorizedRowBatch.DEFAULT_SIZE) {
        process(scratchBatch, tag); // call process once we have a full batch
        scratchBatch.reset();
        dataOutputBuffer.reset();
      }
    }
    // Process the row batch that has less than DEFAULT_SIZE rows
    if (scratchBatch.size > 0) {
      process(scratchBatch, tag);
      scratchBatch.reset();
      dataOutputBuffer.reset();
    }
    bigTable.clear();
  }

  @Override
  public VectorizationContext getOuputVectorizationContext() {
    return vOutContext;
  }
}