Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.io;
import com.google.common.collect.Lists;
import org.apache.hadoop.hive.llap.DebugUtils;
import java.util.Arrays;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.function.Consumer;
import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.Decimal64ColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.ListColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.StructColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.UnionColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx;
import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
import org.apache.hadoop.hive.serde2.io.ByteWritable;
import org.apache.hadoop.hive.serde2.io.DateWritableV2;
import org.apache.hadoop.hive.serde2.io.DoubleWritable;
import org.apache.hadoop.hive.serde2.io.HiveCharWritable;
import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable;
import org.apache.hadoop.hive.serde2.io.ShortWritable;
import org.apache.hadoop.hive.serde2.io.TimestampWritableV2;
import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo;
import org.apache.hadoop.io.BooleanWritable;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.RecordReader;
/**
* A record reader wrapper that converts VRB reader into an OI-based reader.
* Due to the fact that changing table OIs in the plan after compilation is nearly impossible,
* this is made an abstract class where type-specific implementations can plug in certain details,
* so that the data produced after wrapping a vectorized reader would conform to the original OIs.
*/
public abstract class BatchToRowReader
implements RecordReader {
protected static final Logger LOG = LoggerFactory.getLogger(BatchToRowReader.class);
private final NullWritable key;
private final VectorizedRowBatch batch;
private final RecordReader vrbReader;
private final List schema;
private final boolean[] included;
private int rowInBatch = 0;
protected List virtualColumnHandlers;
public BatchToRowReader(RecordReader vrbReader,
VectorizedRowBatchCtx vrbCtx, List includedCols) {
this.vrbReader = vrbReader;
this.key = vrbReader.createKey();
this.batch = vrbReader.createValue();
this.schema = Lists.newArrayList(vrbCtx.getRowColumnTypeInfos());
// TODO: does this include partition columns?
boolean[] included = new boolean[schema.size()];
if (includedCols != null) {
for (int colIx : includedCols) {
included[colIx] = true;
}
} else {
Arrays.fill(included, true);
}
virtualColumnHandlers = requestedVirtualColumns();
for (VirtualColumnHandler handler : virtualColumnHandlers) {
int idx = vrbCtx.findVirtualColumnNum(handler.virtualColumn);
if (idx >= 0) {
included[idx] = true;
handler.indexInSchema = idx;
batch.cols[idx].noNulls = false;
Arrays.fill(batch.cols[idx].isNull, true);
}
}
if (LOG.isDebugEnabled()) {
LOG.debug("Including the columns " + DebugUtils.toString(included));
}
this.included = included;
}
/**
* Wrapper class to map a virtual column to a handler defined by subclasses of {@link BatchToRowReader}.
* The handler should be a set operation which sets the value of the virtual column value
* in the current row.
*/
public static class VirtualColumnHandler {
private final VirtualColumn virtualColumn;
private final Consumer