jp.co.yahoo.yosegi.hive.io.HiveVectorizedReaderSetting Maven / Gradle / Ivy
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package jp.co.yahoo.yosegi.hive.io;
import jp.co.yahoo.yosegi.config.Configuration;
import jp.co.yahoo.yosegi.hive.io.vector.ColumnVectorAssignorFactory;
import jp.co.yahoo.yosegi.hive.io.vector.IColumnVectorAssignor;
import jp.co.yahoo.yosegi.spread.expression.IExpressionNode;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx;
import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.mapred.FileSplit;
import org.apache.hadoop.mapred.JobConf;
import java.io.IOException;
import java.util.List;
public class HiveVectorizedReaderSetting implements IVectorizedReaderSetting {
private final boolean[] projectionColumn;
private final Object[] partitionValues;
private final HiveReaderSetting hiveReaderConfig;
private final VectorizedRowBatchCtx rbCtx;
private final IColumnVectorAssignor[] assignors;
private final int[] needColumnIds;
private final String[] columnNames;
/**
* Initialize.
*/
public HiveVectorizedReaderSetting(
final boolean[] projectionColumn ,
final Object[] partitionValues ,
final VectorizedRowBatchCtx rbCtx ,
final IColumnVectorAssignor[] assignors ,
final int[] needColumnIds ,
final String[] columnNames ,
final HiveReaderSetting hiveReaderConfig ) {
this.projectionColumn = projectionColumn;
this.partitionValues = partitionValues;
this.rbCtx = rbCtx;
this.hiveReaderConfig = hiveReaderConfig;
this.assignors = assignors;
this.needColumnIds = needColumnIds;
this.columnNames = columnNames;
}
/**
* Initialize.
*/
public HiveVectorizedReaderSetting(
final FileSplit split ,
final JobConf job ,
final HiveReaderSetting hiveReaderConfig ) throws IOException {
this.hiveReaderConfig = hiveReaderConfig;
rbCtx = Utilities.getVectorizedRowBatchCtx( job );
partitionValues = new Object[rbCtx.getPartitionColumnCount()];
if ( 0 < partitionValues.length ) {
rbCtx.getPartitionValues( rbCtx, job, split, partitionValues );
}
TypeInfo[] typeInfos = rbCtx.getRowColumnTypeInfos();
columnNames = rbCtx.getRowColumnNames();
needColumnIds = createNeedColumnId( ColumnProjectionUtils.getReadColumnIDs( job ) );
projectionColumn = new boolean[columnNames.length];
assignors = new IColumnVectorAssignor[columnNames.length];
for ( int id : needColumnIds ) {
projectionColumn[id] = true;
assignors[id] = ColumnVectorAssignorFactory.create( typeInfos[id] );
}
}
/**
* Convert from list to array of int.
*/
public int[] createNeedColumnId( final List needColumnIdList ) {
int[] result = new int[needColumnIdList.size()];
for ( int i = 0 ; i < result.length ; i++ ) {
result[i] = needColumnIdList.get(i).intValue();
}
return result;
}
@Override
public boolean isVectorMode() {
return hiveReaderConfig.isVectorMode();
}
@Override
public boolean isDisableSkipBlock() {
return hiveReaderConfig.isDisableSkipBlock();
}
@Override
public boolean isDisableFilterPushdown() {
return hiveReaderConfig.isDisableFilterPushdown();
}
@Override
public Configuration getReaderConfig() {
return hiveReaderConfig.getReaderConfig();
}
@Override
public IExpressionNode getExpressionNode() {
return hiveReaderConfig.getExpressionNode();
}
@Override
public VectorizedRowBatch createVectorizedRowBatch() {
return rbCtx.createVectorizedRowBatch();
}
@Override
public void setPartitionValues( final VectorizedRowBatch outputBatch ) {
if ( 0 < partitionValues.length ) {
rbCtx.addPartitionColsToBatch( outputBatch , partitionValues );
}
}
@Override
public IColumnVectorAssignor[] getAssignors() {
return assignors;
}
@Override
public int[] getNeedColumnIds() {
return needColumnIds;
}
@Override
public String[] getColumnNames() {
return columnNames;
}
}