org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinOuterGenerateResultOperator Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of hive-exec Show documentation
The newest version!
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hive.ql.exec.vector.mapjoin;

import java.io.IOException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.ql.exec.JoinUtil;
import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMap;
import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult;
import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashTableResult;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.hive.serde2.WriteBuffers.ByteSegmentRef;

/**
 * This class has methods for generating vectorized join results for outer joins.
 *
 * The big difference between inner joins and outer joins is the treatment of null and non-matching
 * keys.
 *
 * Inner joins ignore null keys.  Outer joins include big table rows with null keys in the result.
 *
 * (Left non-full) outer joins include big table rows that do not match the small table.  Small
 * table columns for non-matches will be NULL.
 *
 * Another important difference is filtering.  For outer joins to include the necessary rows,
 * filtering must be done after the hash table lookup.  That is because filtering does not
 * eliminate rows, but changes them from match to non-matching rows.  They will still appear in
 * the join result.
 *
 * One vector outer join optimization is referencing bytes outer keys.  When a bytes key appears
 * in the small table results area, instead of copying the bytes key we reference the big table key.
 * Bytes column vectors allow a by reference entry to bytes.  It is safe to do a by reference
 * since it is within the same row.
 *
 * Outer join uses a hash map since small table columns can be included in the join result.
 */
public abstract class VectorMapJoinOuterGenerateResultOperator
        extends VectorMapJoinGenerateResultOperator {

  private static final long serialVersionUID = 1L;
  private static final Log LOG = LogFactory.getLog(VectorMapJoinOuterGenerateResultOperator.class.getName());

  //---------------------------------------------------------------------------
  // Outer join specific members.
  //

  // An array of hash map results so we can do lookups on the whole batch before output result
  // generation.
  protected transient VectorMapJoinHashMapResult hashMapResults[];

  // Pre-allocated member for remembering the big table's selected array at the beginning of
  // the process method before applying any filter.  For outer join we need to remember which
  // rows did not match since they will appear the in outer join result with NULLs for the
  // small table.
  protected transient int[] inputSelected;

  // Pre-allocated member for storing the (physical) batch index of matching row (single- or
  // multi-small-table-valued) indexes during a process call.
  protected transient int[] allMatchs;

  /*
   *  Pre-allocated members for storing information equal key series for small-table matches.
   *
   *  ~HashMapResultIndices
   *                Index into the hashMapResults array for the match.
   *  ~AllMatchIndices
   *                (Logical) indices into allMatchs to the first row of a match of a
   *                possible series of duplicate keys.
   *  ~IsSingleValue
   *                Whether there is 1 or multiple small table values.
   *  ~DuplicateCounts
   *                The duplicate count for each matched key.
   *
   */
  protected transient int[] equalKeySeriesHashMapResultIndices;
  protected transient int[] equalKeySeriesAllMatchIndices;
  protected transient boolean[] equalKeySeriesIsSingleValue;
  protected transient int[] equalKeySeriesDuplicateCounts;

  // Pre-allocated member for storing the (physical) batch index of rows that need to be spilled.
  protected transient int[] spills;

  // Pre-allocated member for storing index into the hashSetResults for each spilled row.
  protected transient int[] spillHashMapResultIndices;

  // Pre-allocated member for storing any non-spills, non-matches, or merged row indexes during a
  // process method call.
  protected transient int[] nonSpills;
  protected transient int[] noMatchs;
  protected transient int[] merged;

  public VectorMapJoinOuterGenerateResultOperator() {
    super();
  }

  public VectorMapJoinOuterGenerateResultOperator(VectorizationContext vContext, OperatorDesc conf)
              throws HiveException {
    super(vContext, conf);
  }

  /*
   * Setup our outer join specific members.
   */
  protected void commonSetup(VectorizedRowBatch batch) throws HiveException {
    super.commonSetup(batch);

    // Outer join specific.
    VectorMapJoinHashMap baseHashMap = (VectorMapJoinHashMap) vectorMapJoinHashTable;

    hashMapResults = new VectorMapJoinHashMapResult[batch.DEFAULT_SIZE];
    for (int i = 0; i < hashMapResults.length; i++) {
      hashMapResults[i] = baseHashMap.createHashMapResult();
    }

    inputSelected = new int[batch.DEFAULT_SIZE];

    allMatchs = new int[batch.DEFAULT_SIZE];

    equalKeySeriesHashMapResultIndices = new int[batch.DEFAULT_SIZE];
    equalKeySeriesAllMatchIndices = new int[batch.DEFAULT_SIZE];
    equalKeySeriesIsSingleValue = new boolean[batch.DEFAULT_SIZE];
    equalKeySeriesDuplicateCounts = new int[batch.DEFAULT_SIZE];

    spills = new int[batch.DEFAULT_SIZE];
    spillHashMapResultIndices = new int[batch.DEFAULT_SIZE];

    nonSpills = new int[batch.DEFAULT_SIZE];
    noMatchs = new int[batch.DEFAULT_SIZE];
    merged = new int[batch.DEFAULT_SIZE];

  }

  //-----------------------------------------------------------------------------------------------

  /*
   * Outer join (hash map).
   */

  /**
   * Do the per-batch setup for an outer join.
   */
  protected void outerPerBatchSetup(VectorizedRowBatch batch) {

    // For join operators that can generate small table results, reset their
    // (target) scratch columns.

    for (int column : smallTableOutputVectorColumns) {
      ColumnVector smallTableColumn = batch.cols[column];
      smallTableColumn.reset();
    }

    for (int column : bigTableOuterKeyOutputVectorColumns) {
      ColumnVector bigTableOuterKeyColumn = batch.cols[column];
      bigTableOuterKeyColumn.reset();
    }
  }

  /**
   * Apply the value expression to rows in the (original) input selected array.
   *
   * @param batch
   *          The vectorized row batch.
   * @param inputSelectedInUse
   *          Whether the (original) input batch is selectedInUse.
   * @param inputLogicalSize
   *          The (original) input batch size.
   */
  private void doValueExprOnInputSelected(VectorizedRowBatch batch,
      boolean inputSelectedInUse, int inputLogicalSize) {

    int saveBatchSize = batch.size;
    int[] saveSelected = batch.selected;
    boolean saveSelectedInUse = batch.selectedInUse;

    batch.size = inputLogicalSize;
    batch.selected = inputSelected;
    batch.selectedInUse = inputSelectedInUse;

    if (bigTableValueExpressions != null) {
      for(VectorExpression ve: bigTableValueExpressions) {
        ve.evaluate(batch);
      }
    }

    batch.size = saveBatchSize;
    batch.selected = saveSelected;
    batch.selectedInUse = saveSelectedInUse;
  }

  /**
   * Apply the value expression to rows specified by a selected array.
   *
   * @param batch
   *          The vectorized row batch.
   * @param selected
   *          The (physical) batch indices to apply the expression to.
   * @param size
   *          The size of selected.
   */
  private void doValueExpr(VectorizedRowBatch batch,
      int[] selected, int size) {

    int saveBatchSize = batch.size;
    int[] saveSelected = batch.selected;
    boolean saveSelectedInUse = batch.selectedInUse;

    batch.size = size;
    batch.selected = selected;
    batch.selectedInUse = true;

    if (bigTableValueExpressions != null) {
      for(VectorExpression ve: bigTableValueExpressions) {
        ve.evaluate(batch);
      }
    }

    batch.size = saveBatchSize;
    batch.selected = saveSelected;
    batch.selectedInUse = saveSelectedInUse;
  }

  /**
   * Remove (subtract) members from the input selected array and produce the results into
   * a difference array.
   *
   * @param inputSelectedInUse
   *          Whether the (original) input batch is selectedInUse.
   * @param inputLogicalSize
   *          The (original) input batch size.
   * @param remove
   *          The indices to remove.  They must all be present in input selected array.
   * @param removeSize
   *          The size of remove.
   * @param difference
   *          The resulting difference -- the input selected array indices not in the
   *          remove array.
   * @return
   *          The resulting size of the difference array.
   * @throws HiveException 
   */
  private int subtractFromInputSelected(boolean inputSelectedInUse, int inputLogicalSize,
      int[] remove, int removeSize, int[] difference) throws HiveException {

    // if (!verifyMonotonicallyIncreasing(remove, removeSize)) {
    //   throw new HiveException("remove is not in sort order and unique");
    // }

   int differenceCount = 0;

   // Determine which rows are left.
   int removeIndex = 0;
   if (inputSelectedInUse) {
     for (int i = 0; i < inputLogicalSize; i++) {
       int candidateIndex = inputSelected[i];
       if (removeIndex < removeSize && candidateIndex == remove[removeIndex]) {
         removeIndex++;
       } else {
         difference[differenceCount++] = candidateIndex;
       }
     }
   } else {
     for (int candidateIndex = 0; candidateIndex < inputLogicalSize; candidateIndex++) {
       if (removeIndex < removeSize && candidateIndex == remove[removeIndex]) {
         removeIndex++;
       } else {
         difference[differenceCount++] = candidateIndex;
       }
     }
   }

   if (removeIndex != removeSize) {
     throw new HiveException("Not all batch indices removed");
   }

   // if (!verifyMonotonicallyIncreasing(difference, differenceCount)) {
   //   throw new HiveException("difference is not in sort order and unique");
   // }

   return differenceCount;
 }

  /**
   * Remove (subtract) members from an array and produce the results into
   * a difference array.

   * @param all
   *          The selected array containing all members.
   * @param allSize
   *          The size of all.
   * @param remove
   *          The indices to remove.  They must all be present in input selected array.
   * @param removeSize
   *          The size of remove.
   * @param difference
   *          The resulting difference -- the all array indices not in the
   *          remove array.
   * @return
   *          The resulting size of the difference array.
   * @throws HiveException 
   */
  private int subtract(int[] all, int allSize,
      int[] remove, int removeSize, int[] difference) throws HiveException {

    // if (!verifyMonotonicallyIncreasing(remove, removeSize)) {
    //   throw new HiveException("remove is not in sort order and unique");
    // }

    int differenceCount = 0;

    // Determine which rows are left.
    int removeIndex = 0;
    for (int i = 0; i < allSize; i++) {
      int candidateIndex = all[i];
      if (removeIndex < removeSize && candidateIndex == remove[removeIndex]) {
        removeIndex++;
      } else {
        difference[differenceCount++] = candidateIndex;
      }
    }

    if (removeIndex != removeSize) {
      throw new HiveException("Not all batch indices removed");
    }

    return differenceCount;
  }

  /**
   * Sort merge two select arrays so the resulting array is ordered by (batch) index.
   *
   * @param selected1
   * @param selected1Count
   * @param selected2
   * @param selected2Count
   * @param sortMerged
   *          The resulting sort merge of selected1 and selected2.
   * @return
   *          The resulting size of the sortMerged array.
   * @throws HiveException 
   */
  private int sortMerge(int[] selected1, int selected1Count,
          int[] selected2, int selected2Count, int[] sortMerged) throws HiveException {

    // if (!verifyMonotonicallyIncreasing(selected1, selected1Count)) {
    //   throw new HiveException("selected1 is not in sort order and unique");
    // }

    // if (!verifyMonotonicallyIncreasing(selected2, selected2Count)) {
    //   throw new HiveException("selected1 is not in sort order and unique");
    // }


    int sortMergeCount = 0;

    int selected1Index = 0;
    int selected2Index = 0;
    for (int i = 0; i < selected1Count + selected2Count; i++) {
      if (selected1Index < selected1Count && selected2Index < selected2Count) {
        if (selected1[selected1Index] < selected2[selected2Index]) {
          sortMerged[sortMergeCount++] = selected1[selected1Index++];
        } else {
          sortMerged[sortMergeCount++] = selected2[selected2Index++];
        }
      } else if (selected1Index < selected1Count) {
        sortMerged[sortMergeCount++] = selected1[selected1Index++];
      } else {
        sortMerged[sortMergeCount++] = selected2[selected2Index++];
      }
    }

    // if (!verifyMonotonicallyIncreasing(sortMerged, sortMergeCount)) {
    //   throw new HiveException("sortMerged is not in sort order and unique");
    // }

    return sortMergeCount;
  }

  /**
   * Generate the outer join output results for one vectorized row batch.
   *
   * @param batch
   *          The big table batch with any matching and any non matching rows both as
   *          selected in use.
   * @param allMatchCount
   *          Number of matches in allMatchs.
   * @param equalKeySeriesCount
   *          Number of single value matches.
   * @param atLeastOneNonMatch
   *          Whether at least one row was a non-match.
   * @param inputSelectedInUse
   *          A copy of the batch's selectedInUse flag on input to the process method.
   * @param inputLogicalSize
   *          The batch's size on input to the process method.
   * @param spillCount
   *          Number of spills in spills.
   * @param hashMapResultCount
   *          Number of entries in hashMapResults.
   */
  public void finishOuter(VectorizedRowBatch batch,
      int allMatchCount, int equalKeySeriesCount, boolean atLeastOneNonMatch,
      boolean inputSelectedInUse, int inputLogicalSize,
      int spillCount, int hashMapResultCount) throws IOException, HiveException {

    // Get rid of spills before we start modifying the batch.
    if (spillCount > 0) {
      spillHashMapBatch(batch, (VectorMapJoinHashTableResult[]) hashMapResults,
          spills, spillHashMapResultIndices, spillCount);
    }

    int noMatchCount = 0;
    if (spillCount > 0) {

      // Subtract the spills to get all match and non-match rows.
      int nonSpillCount = subtractFromInputSelected(
              inputSelectedInUse, inputLogicalSize, spills, spillCount, nonSpills);

      if (LOG.isDebugEnabled()) {
        LOG.debug("finishOuter spillCount > 0" +
            " nonSpills " + intArrayToRangesString(nonSpills, nonSpillCount));
      }
  
      // Big table value expressions apply to ALL matching and non-matching rows.
      if (bigTableValueExpressions != null) {
  
        doValueExpr(batch, nonSpills, nonSpillCount);
  
      }
  
      if (atLeastOneNonMatch) {
        noMatchCount = subtract(nonSpills, nonSpillCount, allMatchs, allMatchCount,
                noMatchs);

        if (LOG.isDebugEnabled()) {
          LOG.debug("finishOuter spillCount > 0" +
              " noMatchs " + intArrayToRangesString(noMatchs, noMatchCount));
        }

      }
    } else {

      // Run value expressions over original (whole) input batch.
      doValueExprOnInputSelected(batch, inputSelectedInUse, inputLogicalSize);

      if (atLeastOneNonMatch) {
        noMatchCount = subtractFromInputSelected(
            inputSelectedInUse, inputLogicalSize, allMatchs, allMatchCount, noMatchs);

        if (LOG.isDebugEnabled()) {
          LOG.debug("finishOuter spillCount == 0" +
              " noMatchs " + intArrayToRangesString(noMatchs, noMatchCount));
        }
      }
    }

    // When we generate results into the overflow batch, we may still end up with fewer rows
    // in the big table batch.  So, nulSel and the batch's selected array will be rebuilt with
    // just the big table rows that need to be forwarded, minus any rows processed with the
    // overflow batch.
    if (allMatchCount > 0) {

      int numSel = 0;
      for (int i = 0; i < equalKeySeriesCount; i++) {
        int hashMapResultIndex = equalKeySeriesHashMapResultIndices[i];
        VectorMapJoinHashMapResult hashMapResult = hashMapResults[hashMapResultIndex];
        int allMatchesIndex = equalKeySeriesAllMatchIndices[i];
        boolean isSingleValue = equalKeySeriesIsSingleValue[i];
        int duplicateCount = equalKeySeriesDuplicateCounts[i];

        if (isSingleValue) {
          numSel = generateHashMapResultSingleValue(
                      batch, hashMapResult, allMatchs, allMatchesIndex, duplicateCount, numSel);
        } else {
          generateHashMapResultMultiValue(
              batch, hashMapResult, allMatchs, allMatchesIndex, duplicateCount);
        }
      }

      // The number of single value rows that were generated in the big table batch.
      batch.size = numSel;
      batch.selectedInUse = true;

      if (LOG.isDebugEnabled()) {
        LOG.debug("finishOuter allMatchCount > 0" +
            " batch.selected " + intArrayToRangesString(batch.selected, batch.size));
      }

    } else {
      batch.size = 0;
    }

    if (noMatchCount > 0) {
      if (batch.size > 0) {

        generateOuterNulls(batch, noMatchs, noMatchCount);
  
        // Merge noMatchs and (match) selected.
        int mergeCount = sortMerge(
                noMatchs, noMatchCount, batch.selected, batch.size, merged);
    
        if (LOG.isDebugEnabled()) {
          LOG.debug("finishOuter noMatchCount > 0 && batch.size > 0" +
              " merged " + intArrayToRangesString(merged, mergeCount));
        }

        System.arraycopy(merged, 0, batch.selected, 0, mergeCount);
        batch.size = mergeCount;
        batch.selectedInUse = true;
      } else {

        // We can use the whole batch for output of no matches.

        generateOuterNullsRepeatedAll(batch);

        System.arraycopy(noMatchs, 0, batch.selected, 0, noMatchCount);
        batch.size = noMatchCount;
        batch.selectedInUse = true;

        if (LOG.isDebugEnabled()) {
          LOG.debug("finishOuter noMatchCount > 0 && batch.size == 0" +
              " batch.selected " + intArrayToRangesString(batch.selected, batch.size));
        }
      }
    }
  }

   /**
    * Generate the non matching outer join output results for one vectorized row batch.
    *
    * For each non matching row specified by parameter, generate nulls for the small table results.
    *
    * @param batch
    *          The big table batch with any matching and any non matching rows both as
    *          selected in use.
    * @param noMatchs
    *          A subset of the rows of the batch that are non matches.
    * @param noMatchSize
    *          Number of non matches in noMatchs.
    */
   protected void generateOuterNulls(VectorizedRowBatch batch, int[] noMatchs,
       int noMatchSize) throws IOException, HiveException {

     // Set null information in the small table results area.

     for (int i = 0; i < noMatchSize; i++) {
       int batchIndex = noMatchs[i];

       // Mark any scratch small table scratch columns that would normally receive a copy of the
       // key as null, too.
       for (int column : bigTableOuterKeyOutputVectorColumns) {
         ColumnVector colVector = batch.cols[column];
         colVector.noNulls = false;
         colVector.isNull[batchIndex] = true;
       }

       // Small table values are set to null.
       for (int column : smallTableOutputVectorColumns) {
         ColumnVector colVector = batch.cols[column];
         colVector.noNulls = false;
         colVector.isNull[batchIndex] = true;
       }
     }
   }

  /**
   * Generate the outer join output results for one vectorized row batch with a repeated key.
   *
   * Any filter expressions will apply now since hash map lookup for outer join is complete.
   *
   * @param batch
   *          The big table batch with any matching and any non matching rows both as
   *          selected in use.
   * @param joinResult
   *          The hash map lookup result for the repeated key.
   * @param hashMapResults
   *          The array of all hash map results for the batch.
   * @param someRowsFilteredOut
   *          Whether some rows of the repeated key batch were knocked out by the filter.
   * @param inputSelectedInUse
   *          A copy of the batch's selectedInUse flag on input to the process method.
   * @param inputLogicalSize
   *          The batch's size on input to the process method.
   * @param scratch1
   *          Pre-allocated storage to internal use.
   * @param scratch2
   *          Pre-allocated storage to internal use.
   */
  public void finishOuterRepeated(VectorizedRowBatch batch, JoinUtil.JoinResult joinResult,
      VectorMapJoinHashMapResult hashMapResult, boolean someRowsFilteredOut,
      boolean inputSelectedInUse, int inputLogicalSize)
          throws IOException, HiveException {

    // LOG.debug("finishOuterRepeated batch #" + batchCounter + " " + joinResult.name() + " batch.size " + batch.size + " someRowsFilteredOut " + someRowsFilteredOut);

    switch (joinResult) {
    case MATCH:

      // Rows we looked up as one repeated key are a match.  But filtered out rows
      // need to be generated as non-matches, too.

      if (someRowsFilteredOut) {

        // For the filtered out rows that didn't (logically) get looked up in the hash table,
        // we need to generate no match results for those too...

        // Run value expressions over original (whole) input batch.
        doValueExprOnInputSelected(batch, inputSelectedInUse, inputLogicalSize);

        // Now calculate which rows were filtered out (they are logically no matches).

        // Determine which rows are non matches by determining the delta between inputSelected and
        // (current) batch selected.

        int noMatchCount = subtractFromInputSelected(
                inputSelectedInUse, inputLogicalSize, batch.selected, batch.size, noMatchs);

        generateOuterNulls(batch, noMatchs, noMatchCount);

        // Now generate the matchs.  Single small table values will be put into the big table
        // batch and come back in matchs.  Any multiple small table value results will go into
        // the overflow batch.
        generateHashMapResultRepeatedAll(batch, hashMapResult);

        // Merge noMatchs and (match) selected.
        int mergeCount = sortMerge(
                noMatchs, noMatchCount, batch.selected, batch.size, merged);

        System.arraycopy(merged, 0, batch.selected, 0, mergeCount);
        batch.size = mergeCount;
        batch.selectedInUse = true;
      } else {

        // Just run our value expressions over input batch.

        if (bigTableValueExpressions != null) {
          for(VectorExpression ve: bigTableValueExpressions) {
            ve.evaluate(batch);
          }
        }

        generateHashMapResultRepeatedAll(batch, hashMapResult);
      }
      break;

    case SPILL:

      // Rows we looked up as one repeated key need to spill.  But filtered out rows
      // need to be generated as non-matches, too.

      spillBatchRepeated(batch, (VectorMapJoinHashTableResult) hashMapResult);

      // After using selected to generate spills, generate non-matches, if any.
      if (someRowsFilteredOut) {

        // Determine which rows are non matches by determining the delta between inputSelected and
        // (current) batch selected.

        int noMatchCount = subtractFromInputSelected(
                inputSelectedInUse, inputLogicalSize, batch.selected, batch.size, noMatchs);

        System.arraycopy(noMatchs, 0, batch.selected, 0, noMatchCount);
        batch.size = noMatchCount;
        batch.selectedInUse = true;

        generateOuterNullsRepeatedAll(batch);
      } else {
        batch.size = 0;
      }

      break;

    case NOMATCH:

      if (someRowsFilteredOut) {

        // When the repeated no match is due to filtering, we need to restore the
        // selected information.

        if (inputSelectedInUse) {
          System.arraycopy(inputSelected, 0, batch.selected, 0, inputLogicalSize);
        }
        batch.size = inputLogicalSize;
      }

      // Run our value expressions over whole batch.
      if (bigTableValueExpressions != null) {
        for(VectorExpression ve: bigTableValueExpressions) {
          ve.evaluate(batch);
        }
      }

      generateOuterNullsRepeatedAll(batch);
      break;
    }
  }

  /**
   * Generate the non-match outer join output results for the whole repeating vectorized
   * row batch.
   *
   * Each row will get nulls for all small table values.
   *
   * @param batch
   *          The big table batch.
   */
  protected void generateOuterNullsRepeatedAll(VectorizedRowBatch batch) throws HiveException {

    for (int column : smallTableOutputVectorColumns) {
      ColumnVector colVector = batch.cols[column];
      colVector.noNulls = false;
      colVector.isNull[0] = true;
      colVector.isRepeating = true;
    }

    // Mark any scratch small table scratch columns that would normally receive a copy of the key
    // as null, too.
   for (int column : bigTableOuterKeyOutputVectorColumns) {
      ColumnVector colVector = batch.cols[column];
      colVector.noNulls = false;
      colVector.isNull[0] = true;
      colVector.isRepeating = true;
    }
  }
}