All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hive.ql.parse.DruidSqlOperatorConverter Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hive.ql.parse;

import io.prestosql.hive.$internal.com.google.common.base.Function;
import io.prestosql.hive.$internal.com.google.common.base.Preconditions;
import io.prestosql.hive.$internal.com.google.common.collect.ImmutableList;
import io.prestosql.hive.$internal.com.google.common.collect.Iterables;
import io.prestosql.hive.$internal.com.google.common.collect.Maps;
import org.apache.calcite.adapter.druid.DirectOperatorConversion;
import org.apache.calcite.adapter.druid.DruidExpressions;
import org.apache.calcite.adapter.druid.DruidQuery;
import org.apache.calcite.adapter.druid.ExtractOperatorConversion;
import org.apache.calcite.adapter.druid.FloorOperatorConversion;
import org.apache.calcite.adapter.druid.UnarySuffixOperatorConversion;
import org.apache.calcite.config.CalciteConnectionConfig;
import org.apache.calcite.rel.type.RelDataType;
import org.apache.calcite.rex.RexCall;
import org.apache.calcite.rex.RexLiteral;
import org.apache.calcite.rex.RexNode;
import org.apache.calcite.sql.SqlFunction;
import org.apache.calcite.sql.SqlKind;
import org.apache.calcite.sql.SqlOperator;
import org.apache.calcite.sql.fun.SqlStdOperatorTable;
import org.apache.calcite.sql.type.SqlTypeUtil;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveConcat;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveDateAddSqlOperator;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveDateSubSqlOperator;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveExtractDate;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFloorDate;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFromUnixTimeSqlOperator;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveToDateSqlOperator;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTruncSqlOperator;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveUnixTimestampSqlOperator;
import org.joda.time.Period;

import javax.annotation.Nullable;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.TimeZone;

/**
 * Contains custom Druid SQL operator converter classes, contains either:
 * Hive specific OperatorConversion logic that can not be part of Calcite
 * Some temporary OperatorConversion that is not release by Calcite yet
 */
public class DruidSqlOperatorConverter {

  private static final String YYYY_MM_DD = "yyyy-MM-dd";
  public static final String DEFAULT_TS_FORMAT = "yyyy-MM-dd HH:mm:ss";

  private DruidSqlOperatorConverter() {
  }

  private static Map druidOperatorMap = null;

  public static final Map getDefaultMap() {
    if (druidOperatorMap == null) {
      druidOperatorMap = new HashMap();
      DruidQuery.DEFAULT_OPERATORS_LIST.stream().forEach(op -> druidOperatorMap.put(op.calciteOperator(), op));

      //Override Hive specific operators
      druidOperatorMap.putAll(Maps.asMap(HiveFloorDate.ALL_FUNCTIONS,
          (Function) input -> new
              FloorOperatorConversion()
      ));
      druidOperatorMap.putAll(Maps.asMap(HiveExtractDate.ALL_FUNCTIONS,
          (Function) input -> new
              ExtractOperatorConversion()
      ));
      druidOperatorMap.put(HiveConcat.INSTANCE, new DirectOperatorConversion(HiveConcat.INSTANCE, "concat"));
      druidOperatorMap
          .put(SqlStdOperatorTable.SUBSTRING, new DruidSqlOperatorConverter.DruidSubstringOperatorConversion());
      druidOperatorMap
          .put(SqlStdOperatorTable.IS_NULL, new UnaryFunctionOperatorConversion(SqlStdOperatorTable.IS_NULL, "isnull"));
      druidOperatorMap.put(SqlStdOperatorTable.IS_NOT_NULL,
          new UnaryFunctionOperatorConversion(SqlStdOperatorTable.IS_NOT_NULL, "notnull")
      );
      druidOperatorMap.put(HiveTruncSqlOperator.INSTANCE, new DruidDateTruncOperatorConversion());
      druidOperatorMap.put(HiveToDateSqlOperator.INSTANCE, new DruidToDateOperatorConversion());
      druidOperatorMap.put(HiveFromUnixTimeSqlOperator.INSTANCE, new DruidFormUnixTimeOperatorConversion());
      druidOperatorMap.put(HiveUnixTimestampSqlOperator.INSTANCE, new DruidUnixTimestampOperatorConversion());
      druidOperatorMap.put(HiveDateAddSqlOperator.INSTANCE,
          new DruidDateArithmeticOperatorConversion(1, HiveDateAddSqlOperator.INSTANCE)
      );
      druidOperatorMap.put(HiveDateSubSqlOperator.INSTANCE,
          new DruidDateArithmeticOperatorConversion(-1, HiveDateSubSqlOperator.INSTANCE)
      );
    }
    return druidOperatorMap;
  }

  /**
   * Druid operator converter from Hive Substring to Druid SubString.
   * This is a temporary fix that can be removed once we move to a Calcite version including the following.
   * https://issues.apache.org/jira/browse/CALCITE-2226
   */
  public static class DruidSubstringOperatorConversion
      extends org.apache.calcite.adapter.druid.SubstringOperatorConversion {
    @Nullable @Override public String toDruidExpression(RexNode rexNode, RelDataType rowType, DruidQuery query
    ) {
      final RexCall call = (RexCall) rexNode;
      final String arg = DruidExpressions.toDruidExpression(call.getOperands().get(0), rowType, query);
      if (arg == null) {
        return null;
      }

      final String indexStart;
      final String length;
      // SQL is 1-indexed, Druid is 0-indexed.
      if (!call.getOperands().get(1).isA(SqlKind.LITERAL)) {
        final String indexExp = DruidExpressions.toDruidExpression(call.getOperands().get(1), rowType, query);
        if (indexExp == null) {
          return null;
        }
        indexStart = DruidQuery.format("(%s - 1)", indexExp);
      } else {
        final int index = RexLiteral.intValue(call.getOperands().get(1)) - 1;
        indexStart = DruidExpressions.numberLiteral(index);
      }

      if (call.getOperands().size() > 2) {
        //case substring from index with length
        if (!call.getOperands().get(2).isA(SqlKind.LITERAL)) {
          length = DruidExpressions.toDruidExpression(call.getOperands().get(2), rowType, query);
          if (length == null) {
            return null;
          }
        } else {
          length = DruidExpressions.numberLiteral(RexLiteral.intValue(call.getOperands().get(2)));
        }

      } else {
        //case substring from index to the end
        length = DruidExpressions.numberLiteral(-1);
      }
      return DruidQuery.format("substring(%s, %s, %s)", arg, indexStart, length);
    }
  }

  /**
   * Operator conversion form Hive TRUNC UDF to Druid Date Time UDFs.
   */
  public static class DruidDateTruncOperatorConversion
      implements org.apache.calcite.adapter.druid.DruidSqlOperatorConverter {

    @Override public SqlOperator calciteOperator() {
      return HiveTruncSqlOperator.INSTANCE;
    }

    @Nullable @Override public String toDruidExpression(RexNode rexNode, RelDataType rowType, DruidQuery query
    ) {
      final RexCall call = (RexCall) rexNode;
      //can handle only case trunc date type
      if (call.getOperands().size() < 1) {
        throw new IllegalStateException("trunc() requires at least 1 argument, got " + call.getOperands().size());
      }
      if (call.getOperands().size() == 1) {
        final String arg = DruidExpressions.toDruidExpression(call.getOperands().get(0), rowType, query);
        if (arg == null) {
          return null;
        }
        if (SqlTypeUtil.isDatetime(call.getOperands().get(0).getType())) {
          return applyTimestampFormat(
              DruidExpressions.applyTimestampFloor(arg, Period.days(1).toString(), "", timezoneId(query)), YYYY_MM_DD,
              timezoneId(query)
          );
        }
        return null;
      } else if (call.getOperands().size() == 2) {
        final String arg = DruidExpressions.toDruidExpression(call.getOperands().get(0), rowType, query);
        if (arg == null) {
          return null;
        }
        String granularity = DruidExpressions.toDruidExpression(call.getOperands().get(1), rowType, query);
        if (granularity == null) {
          return null;
        }
        final String unit;
        if ("'MONTH'".equals(granularity) || "'MON'".equals(granularity) || "'MM'".equals(granularity)) {
          unit = Period.months(1).toString();
        } else if ("'YEAR'".equals(granularity) || "'YYYY'".equals(granularity) || "'YY'".equals(granularity)) {
          unit = Period.years(1).toString();
        } else if ("'QUARTER'".equals(granularity) || "'Q'".equals(granularity)) {
          unit = Period.months(3).toString();
        } else {
          unit = null;
        }
        if (unit == null) {
          //bail out can not infer unit
          return null;
        }
        return applyTimestampFormat(DruidExpressions.applyTimestampFloor(arg, unit, "", timezoneId(query)), YYYY_MM_DD,
            timezoneId(query)
        );
      }
      return null;
    }
  }

  /**
   * Expression operator conversion form Hive TO_DATE operator to Druid Date cast.
   */
  public static class DruidToDateOperatorConversion
      implements org.apache.calcite.adapter.druid.DruidSqlOperatorConverter {

    @Override public SqlOperator calciteOperator() {
      return HiveToDateSqlOperator.INSTANCE;
    }

    @Nullable @Override public String toDruidExpression(RexNode rexNode, RelDataType rowType, DruidQuery query
    ) {
      final RexCall call = (RexCall) rexNode;
      if (call.getOperands().size() != 1) {
        throw new IllegalStateException("to_date() requires 1 argument, got " + call.getOperands().size());
      }
      final String arg = DruidExpressions.toDruidExpression(call.getOperands().get(0), rowType, query);
      if (arg == null) {
        return null;
      }
      return DruidExpressions.applyTimestampFloor(arg, Period.days(1).toString(), "", timezoneId(query));
    }
  }

  public static class DruidUnixTimestampOperatorConversion
      implements org.apache.calcite.adapter.druid.DruidSqlOperatorConverter {

    @Override public SqlOperator calciteOperator() {
      return HiveUnixTimestampSqlOperator.INSTANCE;
    }

    @Nullable @Override public String toDruidExpression(RexNode rexNode, RelDataType rowType, DruidQuery query
    ) {
      final RexCall call = (RexCall) rexNode;
      final String arg0 = DruidExpressions.toDruidExpression(call.getOperands().get(0), rowType, query);
      if (arg0 == null) {
        return null;
      }
      if (SqlTypeUtil.isDatetime((call.getOperands().get(0).getType()))) {
        // Timestamp is represented as long internally no need to any thing here
        return DruidExpressions.functionCall("div", ImmutableList.of(arg0, DruidExpressions.numberLiteral(1000)));
      }
      // dealing with String type
      final String format = call.getOperands().size() == 2 ? DruidExpressions
          .toDruidExpression(call.getOperands().get(1), rowType, query) : DEFAULT_TS_FORMAT;
      return DruidExpressions
          .functionCall("unix_timestamp", ImmutableList.of(arg0, DruidExpressions.stringLiteral(format)));
    }
  }

  public static class DruidFormUnixTimeOperatorConversion
      implements org.apache.calcite.adapter.druid.DruidSqlOperatorConverter {

    @Override public SqlOperator calciteOperator() {
      return HiveFromUnixTimeSqlOperator.INSTANCE;
    }

    @Nullable @Override public String toDruidExpression(RexNode rexNode, RelDataType rowType, DruidQuery query
    ) {
      final RexCall call = (RexCall) rexNode;
      if (call.getOperands().size() < 1 || call.getOperands().size() > 2) {
        throw new IllegalStateException("form_unixtime() requires 1 or 2 argument, got " + call.getOperands().size());
      }
      final String arg = DruidExpressions.toDruidExpression(call.getOperands().get(0), rowType, query);
      if (arg == null) {
        return null;
      }

      final String numMillis = DruidQuery.format("(%s * '1000')", arg);
      final String format =
          call.getOperands().size() == 1 ? DruidExpressions.stringLiteral(DEFAULT_TS_FORMAT) : DruidExpressions
              .toDruidExpression(call.getOperands().get(1), rowType, query);
      return DruidExpressions.functionCall("timestamp_format",
          ImmutableList.of(numMillis, format, DruidExpressions.stringLiteral(timezoneId(query).getID()))
      );
    }
  }

  /**
   * Base class for Date Add/Sub operator conversion
   */
  public static class DruidDateArithmeticOperatorConversion
      implements org.apache.calcite.adapter.druid.DruidSqlOperatorConverter {

    private final int direction;
    private final SqlOperator operator;

    public DruidDateArithmeticOperatorConversion(int direction, SqlOperator operator) {
      this.direction = direction;
      this.operator = operator;
      Preconditions.checkArgument(direction == 1 || direction == -1);
    }

    @Override public SqlOperator calciteOperator() {
      return operator;
    }

    @Nullable @Override public String toDruidExpression(RexNode rexNode, RelDataType rowType, DruidQuery query
    ) {
      final RexCall call = (RexCall) rexNode;
      if (call.getOperands().size() != 2) {
        throw new IllegalStateException("date_add/date_sub() requires 2 arguments, got " + call.getOperands().size());
      }
      final String arg0 = DruidExpressions.toDruidExpression(call.getOperands().get(0), rowType, query);
      final String arg1 = DruidExpressions.toDruidExpression(call.getOperands().get(1), rowType, query);
      if (arg0 == null || arg1 == null) {
        return null;
      }

      final String steps = direction == -1 ? DruidQuery.format("-( %s )", arg1) : arg1;
      return DruidExpressions.functionCall("timestamp_shift", ImmutableList
          .of(arg0, DruidExpressions.stringLiteral("P1D"), steps,
              DruidExpressions.stringLiteral(timezoneId(query).getID())
          ));
    }
  }

  /**
   * utility function to extract timezone id from Druid query
   * @param query Druid Rel
   * @return time zone
   */
  private static TimeZone timezoneId(final DruidQuery query) {
    return TimeZone.getTimeZone(
        query.getTopNode().getCluster().getPlanner().getContext().unwrap(CalciteConnectionConfig.class).timeZone());
  }

  private static String applyTimestampFormat(String arg, String format, TimeZone timeZone) {
    return DruidExpressions.functionCall("timestamp_format",
        ImmutableList.of(arg, DruidExpressions.stringLiteral(format), DruidExpressions.stringLiteral(timeZone.getID()))
    );
  }

  public static class UnaryFunctionOperatorConversion implements org.apache.calcite.adapter.druid.DruidSqlOperatorConverter {

    private final SqlOperator operator;
    private final String druidOperator;

    public UnaryFunctionOperatorConversion(SqlOperator operator, String druidOperator) {
      this.operator = operator;
      this.druidOperator = druidOperator;
    }

    @Override public SqlOperator calciteOperator() {
      return operator;
    }

    @Override public String toDruidExpression(RexNode rexNode, RelDataType rowType,
        DruidQuery druidQuery) {
      final RexCall call = (RexCall) rexNode;

      final List druidExpressions = DruidExpressions.toDruidExpressions(
          druidQuery, rowType,
          call.getOperands());

      if (druidExpressions == null) {
        return null;
      }

      return DruidQuery.format("%s(%s)", druidOperator, Iterables.getOnlyElement(druidExpressions));
    }
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy