org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinRowBytesContainer Maven / Gradle / Ivy
The newest version!
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.exec.vector.mapjoin;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.hive.serde2.ByteStream.Output;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
/**
* An eager bytes container that puts row bytes to an output stream.
*/
public class VectorMapJoinRowBytesContainer {
private static final Log LOG = LogFactory.getLog(VectorMapJoinRowBytesContainer.class);
private File parentFile;
private File tmpFile;
// We buffer in a org.apache.hadoop.hive.serde2.ByteStream.Output since that is what
// is used by VectorSerializeRow / SerializeWrite. Periodically, we flush this buffer
// to disk.
private Output output;
private int rowBeginPos;
private static final int OUTPUT_SIZE = 4096;
private static final int THRESHOLD = 8 * (OUTPUT_SIZE / 10);
private static final int INPUT_SIZE = 4096;
private FileOutputStream fileOutputStream;
private boolean isOpen;
private byte[] readBuffer;
private byte[] largeRowBuffer;
private int readOffset;
private int readLength;
private int readNextCount;
private int readNextIndex;
private static final int MAX_READS = 256;
private byte[][] readNextBytes;
private int readNextOffsets[];
private int readNextLengths[];
private byte[] currentBytes;
private int currentOffset;
private int currentLength;
private long totalWriteLength;
private long totalReadLength;
private FileInputStream fileInputStream;
public VectorMapJoinRowBytesContainer() {
output = new Output();
readBuffer = new byte[INPUT_SIZE];
readNextBytes = new byte[MAX_READS][];
readNextOffsets = new int[MAX_READS];
readNextLengths = new int[MAX_READS];
isOpen = false;
totalWriteLength = 0;
totalReadLength = 0;
}
private void setupOutputFileStreams() throws IOException {
parentFile = File.createTempFile("bytes-container", "");
if (parentFile.delete() && parentFile.mkdir()) {
parentFile.deleteOnExit();
}
tmpFile = File.createTempFile("BytesContainer", ".tmp", parentFile);
LOG.debug("BytesContainer created temp file " + tmpFile.getAbsolutePath());
tmpFile.deleteOnExit();
fileOutputStream = new FileOutputStream(tmpFile);
}
private void initFile() {
try {
setupOutputFileStreams();
} catch (IOException e) {
throw new RuntimeException("Failed to create temporary output file on disk", e);
}
}
public Output getOuputForRowBytes() {
if (!isOpen) {
initFile();
isOpen = true;
}
// Reserve space for the int length.
output.reserve(4);
rowBeginPos = output.getLength();
return output;
}
public void finishRow() throws IOException {
int length = output.getLength() - rowBeginPos;
output.writeInt(rowBeginPos - 4, length);
if (output.getLength() > THRESHOLD) {
fileOutputStream.write(output.getData(), 0, output.getLength());
totalWriteLength += output.getLength();
output.reset();
}
}
public void prepareForReading() throws IOException {
if (!isOpen) {
return;
}
if (output.getLength() > 0) {
fileOutputStream.write(output.getData(), 0, output.getLength());
totalWriteLength += output.getLength();
fileOutputStream.flush();
output.reset();
}
if (fileInputStream != null) {
fileInputStream.close();
}
fileInputStream = new FileInputStream(tmpFile);
readNextIndex = 0;
readNextCount = 0;
}
private int readInt() {
int value = (((readBuffer[readOffset] & 0xFF) << 24) |
((readBuffer[readOffset + 1] & 0xFF) << 16) |
((readBuffer[readOffset + 2] & 0xFF) << 8) |
((readBuffer[readOffset + 3] & 0xFF)));
readOffset += 4;
return value;
}
// Call when nextReadIndex == nextReadCount.
private void bufferedRead() throws IOException {
// Reset for reading.
readNextIndex = 0;
// Reset for filling.
readNextCount = 0;
if (readOffset < readLength) {
// Move unprocessed remainder to beginning of buffer.
int unprocessLength = readLength - readOffset;
System.arraycopy(readBuffer, readOffset, readBuffer, 0, unprocessLength);
int maxReadLength = readBuffer.length - unprocessLength;
int partialReadLength = fileInputStream.read(readBuffer, unprocessLength, maxReadLength);
if (partialReadLength == -1) {
partialReadLength = 0;
}
totalReadLength += partialReadLength;
readLength = unprocessLength + partialReadLength;
readOffset = 0;
} else {
readOffset = 0;
readLength = fileInputStream.read(readBuffer, 0, readBuffer.length);
if (readLength == -1) {
readLength = 0;
}
totalReadLength += readLength;
}
if (readLength == 0) {
return;
}
if (readLength < 0) {
throw new IOException("Negative read length");
}
// Get length word.
if (readLength < 4) {
throw new IOException("Expecting 4 byte length");
}
while (true) {
// Use Input class to read length.
int saveReadOffset = readOffset;
int rowLength = readInt();
if (rowLength < 0) {
throw new IOException("Negative row length");
}
int remainingLength = readLength - readOffset;
if (remainingLength < rowLength) {
if (readNextCount > 0) {
// Leave this one for the next round.
readOffset = saveReadOffset;
break;
}
// Buffer needed to bridge.
if (largeRowBuffer == null || largeRowBuffer.length < rowLength) {
int newLargeBufferLength = Math.max(Integer.highestOneBit(rowLength) << 1, INPUT_SIZE);
largeRowBuffer = new byte[newLargeBufferLength];
}
System.arraycopy(readBuffer, readOffset, largeRowBuffer, 0, remainingLength);
int expectedPartialLength = rowLength - remainingLength;
int partialReadLength = fileInputStream.read(largeRowBuffer, remainingLength, expectedPartialLength);
if (partialReadLength == -1) {
throw new IOException("Unexpected EOF (total write length " + totalWriteLength +
", total read length " + totalReadLength + ", read length " +
expectedPartialLength + ")");
}
if (expectedPartialLength != partialReadLength) {
throw new IOException("Unable to read a complete row of length " + rowLength +
" (total write length " + totalWriteLength +
", total read length " + totalReadLength + ", read length " +
expectedPartialLength + ", actual length " + partialReadLength + ")");
}
totalReadLength += partialReadLength;
readNextBytes[readNextCount] = largeRowBuffer;
readNextOffsets[readNextCount] = 0;
readNextLengths[readNextCount] = rowLength;
// Indicate we used the last row's bytes for large buffer.
readOffset = readLength;
readNextCount++;
break;
}
readNextBytes[readNextCount] = readBuffer;
readNextOffsets[readNextCount] = readOffset;
readNextLengths[readNextCount] = rowLength;
readOffset += rowLength;
readNextCount++;
if (readNextCount >= readNextBytes.length){
break;
}
if (readLength - readOffset < 4) {
// Handle in next round.
break;
}
}
}
public boolean readNext() throws IOException {
if (!isOpen) {
return false;
}
if (readNextIndex >= readNextCount) {
bufferedRead();
// Any more left?
if (readNextIndex >= readNextCount) {
return false;
}
}
currentBytes = readNextBytes[readNextIndex];
currentOffset = readNextOffsets[readNextIndex];
currentLength = readNextLengths[readNextIndex];
readNextIndex++;
return true;
}
public byte[] currentBytes() {
return currentBytes;
}
public int currentOffset() {
return currentOffset;
}
public int currentLength() {
return currentLength;
}
public void clear() {
if (fileInputStream != null) {
try {
fileInputStream.close();
} catch (Throwable ignored) {
}
fileInputStream = null;
}
if (fileOutputStream != null) {
try {
fileOutputStream.close();
} catch (Throwable ignored) {
}
fileOutputStream = null;
}
try {
FileUtil.fullyDelete(parentFile);
} catch (Throwable ignored) {
}
parentFile = null;
tmpFile = null;
isOpen = false;
totalWriteLength = 0;
}
}