Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.exec.persistence;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import com.facebook.presto.hive.$internal.org.apache.commons.codec.digest.DigestUtils;
import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import com.facebook.presto.hive.$internal.org.slf4j.Logger;
import com.facebook.presto.hive.$internal.org.slf4j.LoggerFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocalFileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.common.FileUtils;
import org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils;
import org.apache.hadoop.hive.ql.io.HiveOutputFormat;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.metadata.HiveUtils;
import org.apache.hadoop.hive.ql.plan.TableDesc;
import org.apache.hadoop.hive.serde2.AbstractSerDe;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hive.common.util.ReflectionUtil;
/**
* Simple persistent container for rows.
*
* This container interface only accepts adding or appending new rows and iterating through the rows
* in the order of their insertions.
*
* The iterator interface is a lightweight first()/next() API rather than the Java Iterator
* interface. This way we do not need to create an Iterator object every time we want to start a new
* iteration. Below is simple example of how to convert a typical Java's Iterator code to the LW
* iterator interface.
*
* Iterator itr = rowContainer.iterator(); while (itr.hasNext()) { v = itr.next(); // do anything
* with v }
*
* can be rewritten to:
*
* for ( v = rowContainer.first(); v != null; v = rowContainer.next()) { // do anything with v }
*
* Once the first is called, it will not be able to write again. So there can not be any writes
* after read. It can be read multiple times, but it does not support multiple reader interleaving
* reading.
*
*/
public class RowContainer>
implements AbstractRowContainer, AbstractRowContainer.RowIterator {
protected static final Logger LOG = LoggerFactory.getLogger(RowContainer.class);
// max # of rows can be put into one block
private static final int BLOCKSIZE = 25000;
private ROW[] currentWriteBlock; // the last block that add() should append to
private ROW[] currentReadBlock; // the current block where the cursor is in
// since currentReadBlock may assigned to currentWriteBlock, we need to store
// original read block
private ROW[] firstReadBlockPointer;
private int blockSize; // number of objects in the block before it is spilled
// to disk
private int numFlushedBlocks; // total # of blocks
private long size; // total # of elements in the RowContainer
private File tmpFile; // temporary file holding the spilled blocks
Path tempOutPath = null;
private File parentDir;
private int itrCursor; // iterator cursor in the currBlock
private int readBlockSize; // size of current read block
private int addCursor; // append cursor in the lastBlock
private AbstractSerDe serde; // serialization/deserialization for the row
private ObjectInspector standardOI; // object inspector for the row
private List