All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hive.ql.exec.spark.HiveKVResultCache Maven / Gradle / Ivy

There is a newer version: 4.0.0
Show newest version
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hive.ql.exec.spark;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.hive.common.ObjectPair;
import org.apache.hadoop.hive.ql.io.HiveKey;
import org.apache.hadoop.io.BytesWritable;

import scala.Tuple2;

import com.esotericsoftware.kryo.io.Input;
import com.esotericsoftware.kryo.io.Output;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;

/**
 * A cache with fixed buffer. If the buffer is full, new entries will
 * be written to disk. This class is thread safe since multiple threads
 * could access it (doesn't have to be concurrently), for example,
 * the StreamThread in ScriptOperator.
 */
@SuppressWarnings("unchecked")
class HiveKVResultCache {
  private static final Log LOG = LogFactory.getLog(HiveKVResultCache.class);

  @VisibleForTesting
  static final int IN_MEMORY_NUM_ROWS = 1024;

  private ObjectPair[] writeBuffer;
  private ObjectPair[] readBuffer;

  private File parentFile;
  private File tmpFile;

  private int readCursor = 0;
  private int writeCursor = 0;

  // Indicate if the read buffer has data, for example,
  // when in reading, data on disk could be pull in
  private boolean readBufferUsed = false;
  private int rowsInReadBuffer = 0;

  private Input input;
  private Output output;

  public HiveKVResultCache() {
    writeBuffer = new ObjectPair[IN_MEMORY_NUM_ROWS];
    readBuffer = new ObjectPair[IN_MEMORY_NUM_ROWS];
    for (int i = 0; i < IN_MEMORY_NUM_ROWS; i++) {
      writeBuffer[i] = new ObjectPair();
      readBuffer[i] = new ObjectPair();
    }
  }

  private void switchBufferAndResetCursor() {
    ObjectPair[] tmp = readBuffer;
    rowsInReadBuffer = writeCursor;
    readBuffer = writeBuffer;
    readBufferUsed = true;
    readCursor = 0;
    writeBuffer = tmp;
    writeCursor = 0;
  }

  private void setupOutput() throws IOException {
    if (parentFile == null) {
      while (true) {
        parentFile = File.createTempFile("hive-resultcache", "");
        if (parentFile.delete() && parentFile.mkdir()) {
          parentFile.deleteOnExit();
          break;
        }
        if (LOG.isDebugEnabled()) {
          LOG.debug("Retry creating tmp result-cache directory...");
        }
      }
    }

    if (tmpFile == null || input != null) {
      tmpFile = File.createTempFile("ResultCache", ".tmp", parentFile);
      LOG.info("ResultCache created temp file " + tmpFile.getAbsolutePath());
      tmpFile.deleteOnExit();
    }

    FileOutputStream fos = null;
    try {
      fos = new FileOutputStream(tmpFile);
      output = new Output(fos);
    } finally {
      if (output == null && fos != null) {
        fos.close();
      }
    }
  }

  private BytesWritable readValue(Input input) {
    return new BytesWritable(input.readBytes(input.readInt()));
  }

  private void writeValue(Output output, BytesWritable bytesWritable) {
    int size = bytesWritable.getLength();
    output.writeInt(size);
    output.writeBytes(bytesWritable.getBytes(), 0, size);
  }

  private HiveKey readHiveKey(Input input) {
    HiveKey hiveKey = new HiveKey(
      input.readBytes(input.readInt()), input.readInt());
    hiveKey.setDistKeyLength(input.readInt());
    return hiveKey;
  }

  private void writeHiveKey(Output output, HiveKey hiveKey) {
    int size = hiveKey.getLength();
    output.writeInt(size);
    output.writeBytes(hiveKey.getBytes(), 0, size);
    output.writeInt(hiveKey.hashCode());
    output.writeInt(hiveKey.getDistKeyLength());
  }

  public synchronized void add(HiveKey key, BytesWritable value) {
    if (writeCursor >= IN_MEMORY_NUM_ROWS) { // Write buffer is full
      if (!readBufferUsed) { // Read buffer isn't used, switch buffer
        switchBufferAndResetCursor();
      } else {
        // Need to spill from write buffer to disk
        try {
          if (output == null) {
            setupOutput();
          }
          for (int i = 0; i < IN_MEMORY_NUM_ROWS; i++) {
            ObjectPair pair = writeBuffer[i];
            writeHiveKey(output, pair.getFirst());
            writeValue(output, pair.getSecond());
            pair.setFirst(null);
            pair.setSecond(null);
          }
          writeCursor = 0;
        } catch (Exception e) {
          clear(); // Clean up the cache
          throw new RuntimeException("Failed to spill rows to disk", e);
        }
      }
    }
    ObjectPair pair = writeBuffer[writeCursor++];
    pair.setFirst(key);
    pair.setSecond(value);
  }

  public synchronized void clear() {
    writeCursor = readCursor = rowsInReadBuffer = 0;
    readBufferUsed = false;

    if (parentFile != null) {
      if (input != null) {
        try {
          input.close();
        } catch (Throwable ignored) {
        }
        input = null;
      }
      if (output != null) {
        try {
          output.close();
        } catch (Throwable ignored) {
        }
        output = null;
      }
      try {
        FileUtil.fullyDelete(parentFile);
      } catch (Throwable ignored) {
      }
      parentFile = null;
      tmpFile = null;
    }
  }

  public synchronized boolean hasNext() {
    return readBufferUsed || writeCursor > 0;
  }

  public synchronized Tuple2 next() {
    Preconditions.checkState(hasNext());
    if (!readBufferUsed) {
      try {
        if (input == null && output != null) {
          // Close output stream if open
          output.close();
          output = null;

          FileInputStream fis = null;
          try {
            fis = new FileInputStream(tmpFile);
            input = new Input(fis);
          } finally {
            if (input == null && fis != null) {
              fis.close();
            }
          }
        }
        if (input != null) {
          // Load next batch from disk
          for (int i = 0; i < IN_MEMORY_NUM_ROWS; i++) {
            ObjectPair pair = readBuffer[i];
            pair.setFirst(readHiveKey(input));
            pair.setSecond(readValue(input));
          }
          if (input.eof()) {
            input.close();
            input = null;
          }
          rowsInReadBuffer = IN_MEMORY_NUM_ROWS;
          readBufferUsed = true;
          readCursor = 0;
        } else if (writeCursor == 1) {
          ObjectPair pair = writeBuffer[0];
          Tuple2 row = new Tuple2(
            pair.getFirst(), pair.getSecond());
          pair.setFirst(null);
          pair.setSecond(null);
          writeCursor = 0;
          return row;
        } else {
          // No record on disk, more data in write buffer
          switchBufferAndResetCursor();
        }
      } catch (Exception e) {
        clear(); // Clean up the cache
        throw new RuntimeException("Failed to load rows from disk", e);
      }
    }
    ObjectPair pair = readBuffer[readCursor];
    Tuple2 row = new Tuple2(
      pair.getFirst(), pair.getSecond());
    pair.setFirst(null);
    pair.setSecond(null);
    if (++readCursor >= rowsInReadBuffer) {
      readBufferUsed = false;
      rowsInReadBuffer = 0;
      readCursor = 0;
    }
    return row;
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy