All Downloads are FREE. Search and download functionalities are using the official Maven repository.

jp.co.yahoo.yosegi.hive.io.HiveReaderSetting Maven / Gradle / Ivy

There is a newer version: 2.0.6_hive-1.2.1000.2.6.4.0-91
Show newest version
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * 

* http://www.apache.org/licenses/LICENSE-2.0 *

* Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package jp.co.yahoo.yosegi.hive.io; import jp.co.yahoo.yosegi.config.Configuration; import jp.co.yahoo.yosegi.hive.pushdown.HiveExprOrNode; import jp.co.yahoo.yosegi.spread.expression.IExpressionNode; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.SerializationUtilities; import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.ql.plan.MapWork; import org.apache.hadoop.hive.ql.plan.PartitionDesc; import org.apache.hadoop.hive.ql.plan.TableScanDesc; import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; import org.apache.hadoop.mapred.FileSplit; import org.apache.hadoop.mapred.JobConf; import java.io.Serializable; import java.util.ArrayList; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Properties; import java.util.Set; public class HiveReaderSetting implements IReaderSetting { private final Configuration config; private final IExpressionNode node; private final boolean isVectorModeFlag; private final boolean disableSkipBlock; private final boolean disableFilterPushdown; /** * Initialize. */ public HiveReaderSetting( final Configuration config , final IExpressionNode node , final boolean isVectorModeFlag , final boolean disableSkipBlock , final boolean disableFilterPushdown ) { this.config = config; this.node = node; this.isVectorModeFlag = isVectorModeFlag; this.disableSkipBlock = disableSkipBlock; this.disableFilterPushdown = disableFilterPushdown; } /** * |Set the object to be read and initialize. */ public HiveReaderSetting( final FileSplit split, final JobConf job ) { config = new Configuration(); disableSkipBlock = job.getBoolean( "yosegi.disable.block.skip" , false ); disableFilterPushdown = job.getBoolean( "yosegi.disable.filter.pushdown" , false ); List filterExprs = new ArrayList(); String filterExprSerialized = job.get( TableScanDesc.FILTER_EXPR_CONF_STR ); if ( filterExprSerialized != null ) { filterExprs.add( SerializationUtilities.deserializeExpression( filterExprSerialized ) ); } MapWork mapWork; try { mapWork = Utilities.getMapWork(job); } catch ( Exception ex ) { mapWork = null; } if (job.get("yosegi.expand") != null) { config.set("spread.reader.expand.column", job.get("yosegi.expand")); } Iterator> jobConfIterator = job.iterator(); while ( jobConfIterator.hasNext() ) { Map.Entry keyValue = jobConfIterator.next(); if ( keyValue.getKey().startsWith( "yosegi.flatten" ) ) { String yosegiKeyName = keyValue.getKey().replace( "yosegi.flatten" , "spread.reader.flatten.column" ); config.set( yosegiKeyName , keyValue.getValue() ); } } if ( mapWork == null ) { node = createExpressionNode( filterExprs ); isVectorModeFlag = false; return; } node = createExpressionNode( filterExprs ); config.set( "spread.reader.read.column.names" , createReadColumnNames( job.get( ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR , null ) ) ); isVectorModeFlag = Utilities.getIsVectorized( job ); } /** * Create the setting of the column to be read by Yosegi. */ public String createReadColumnNames( final String readColumnNames ) { if ( readColumnNames == null || readColumnNames.isEmpty() ) { return null; } StringBuilder jsonStringBuilder = new StringBuilder(); jsonStringBuilder.append( "[" ); int addCount = 0; for ( String readColumnName : readColumnNames.split( "," ) ) { if ( readColumnName.isEmpty() ) { continue; } if ( addCount != 0 ) { jsonStringBuilder.append( "," ); } jsonStringBuilder.append( "[\"" ); jsonStringBuilder.append( readColumnName ); jsonStringBuilder.append( "\"]" ); addCount++; } jsonStringBuilder.append( "]" ); return jsonStringBuilder.toString(); } /** * Convert Hive filter condition to Yosegi filter condition. */ public IExpressionNode createExpressionNode( final List filterExprs ) { HiveExprOrNode hiveOrNode = new HiveExprOrNode(); for ( ExprNodeGenericFuncDesc filterExpr : filterExprs ) { if ( filterExpr != null ) { hiveOrNode.addChildNode( filterExpr ); } } return hiveOrNode.getPushDownFilterNode(); } /** * Create a Set containing path candidates. */ public Set createPathSet( final Path target ) { Set result = new HashSet(); result.add( target.toString() ); result.add( target.toUri().toString() ); result.add( target.getParent().toUri().toString() ); return result; } @Override public boolean isVectorMode() { return isVectorModeFlag; } @Override public boolean isDisableSkipBlock() { return disableSkipBlock; } @Override public boolean isDisableFilterPushdown() { return disableFilterPushdown; } @Override public Configuration getReaderConfig() { return config; } @Override public IExpressionNode getExpressionNode() { return node; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy