jp.co.yahoo.yosegi.hive.io.HiveReaderSetting Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of yosegi-hive Show documentation
Yosegi Hive library.
There is a newer version: 2.0.6_hive-1.2.1000.2.6.4.0-91
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package jp.co.yahoo.yosegi.hive.io;

import jp.co.yahoo.yosegi.config.Configuration;
import jp.co.yahoo.yosegi.hive.pushdown.HiveExprOrNode;
import jp.co.yahoo.yosegi.spread.expression.IExpressionNode;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.SerializationUtilities;
import org.apache.hadoop.hive.ql.exec.TableScanOperator;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
import org.apache.hadoop.hive.ql.plan.MapWork;
import org.apache.hadoop.hive.ql.plan.PartitionDesc;
import org.apache.hadoop.hive.ql.plan.TableScanDesc;
import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
import org.apache.hadoop.mapred.FileSplit;
import org.apache.hadoop.mapred.JobConf;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;

public class HiveReaderSetting implements IReaderSetting {

  private final Configuration config;
  private final IExpressionNode node;
  private final boolean isVectorModeFlag;
  private final boolean disableSkipBlock;
  private final boolean disableFilterPushdown;

  /**
   * Initialize.
   */
  public HiveReaderSetting(
      final Configuration config ,
      final IExpressionNode node ,
      final boolean isVectorModeFlag ,
      final boolean disableSkipBlock ,
      final boolean disableFilterPushdown ) {
    this.config = config;
    this.node = node;
    this.isVectorModeFlag = isVectorModeFlag;
    this.disableSkipBlock = disableSkipBlock;
    this.disableFilterPushdown = disableFilterPushdown;
  }

  /**
   * |Set the object to be read and initialize.
   */
  public HiveReaderSetting( final FileSplit split, final JobConf job ) {
    config = new Configuration();

    disableSkipBlock = job.getBoolean( "yosegi.disable.block.skip" , false );
    disableFilterPushdown = job.getBoolean( "yosegi.disable.filter.pushdown" , false );

    List filterExprs = new ArrayList();
    String filterExprSerialized = job.get( TableScanDesc.FILTER_EXPR_CONF_STR );
    if ( filterExprSerialized != null ) {
      filterExprs.add( SerializationUtilities.deserializeExpression( filterExprSerialized ) );
    }

    MapWork mapWork;
    try {
      mapWork = Utilities.getMapWork(job);
    } catch ( Exception ex ) {
      mapWork = null;
    }

    if (job.get("yosegi.expand") != null) {
      config.set("spread.reader.expand.column", job.get("yosegi.expand"));
    }
    Iterator> jobConfIterator = job.iterator();
    while ( jobConfIterator.hasNext() ) {
      Map.Entry keyValue = jobConfIterator.next();
      if ( keyValue.getKey().startsWith( "yosegi.flatten" ) ) {
        String yosegiKeyName = keyValue.getKey().replace(
            "yosegi.flatten" , "spread.reader.flatten.column" );
        config.set( yosegiKeyName , keyValue.getValue() );
      }
    }

    if ( mapWork == null ) {
      node = createExpressionNode( filterExprs );
      isVectorModeFlag = false;
      return;
    }

    node = createExpressionNode( filterExprs );

    config.set( "spread.reader.read.column.names" , createReadColumnNames(
        job.get( ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR , null ) ) );

    isVectorModeFlag = Utilities.getIsVectorized( job );
  }

  /**
   * Create the setting of the column to be read by Yosegi.
   */
  public String createReadColumnNames( final String readColumnNames ) {
    if ( readColumnNames == null || readColumnNames.isEmpty() ) {
      return null;
    }
    StringBuilder jsonStringBuilder = new StringBuilder();
    jsonStringBuilder.append( "[" );
    int addCount = 0;
    for ( String readColumnName : readColumnNames.split( "," ) ) {
      if ( readColumnName.isEmpty() ) {
        continue;
      }
      if ( addCount != 0 ) {
        jsonStringBuilder.append( "," );
      }
      jsonStringBuilder.append( "[\"" );
      jsonStringBuilder.append( readColumnName );
      jsonStringBuilder.append( "\"]" );
      addCount++;
    }
    jsonStringBuilder.append( "]" );
    return jsonStringBuilder.toString();
  }

  /**
   * Convert Hive filter condition to Yosegi filter condition.
   */
  public IExpressionNode createExpressionNode( final List filterExprs ) {
    HiveExprOrNode hiveOrNode = new HiveExprOrNode();
    for ( ExprNodeGenericFuncDesc filterExpr : filterExprs ) {
      if ( filterExpr != null ) {
        hiveOrNode.addChildNode( filterExpr );
      }
    }

    return hiveOrNode.getPushDownFilterNode();
  }

  /**
   * Create a Set containing path candidates.
   */
  public Set createPathSet( final Path target ) {
    Set result = new HashSet();
    result.add( target.toString() );
    result.add( target.toUri().toString() );
    result.add( target.getParent().toUri().toString() );

    return result;
  }

  @Override
  public boolean isVectorMode() {
    return isVectorModeFlag;
  }

  @Override
  public boolean isDisableSkipBlock() {
    return disableSkipBlock;
  }

  @Override
  public boolean isDisableFilterPushdown() {
    return disableFilterPushdown;
  }

  @Override
  public Configuration getReaderConfig() {
    return config;
  }

  @Override
  public IExpressionNode getExpressionNode() {
    return node;
  }

}