Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to you under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.calcite.adapter.druid;
import org.apache.calcite.DataContext;
import org.apache.calcite.avatica.ColumnMetaData;
import org.apache.calcite.config.CalciteConnectionConfig;
import org.apache.calcite.interpreter.BindableRel;
import org.apache.calcite.interpreter.Bindables;
import org.apache.calcite.interpreter.Compiler;
import org.apache.calcite.interpreter.Node;
import org.apache.calcite.interpreter.Sink;
import org.apache.calcite.linq4j.Enumerable;
import org.apache.calcite.linq4j.Ord;
import org.apache.calcite.plan.RelOptCluster;
import org.apache.calcite.plan.RelOptCost;
import org.apache.calcite.plan.RelOptPlanner;
import org.apache.calcite.plan.RelOptRule;
import org.apache.calcite.plan.RelOptTable;
import org.apache.calcite.plan.RelTraitSet;
import org.apache.calcite.rel.AbstractRelNode;
import org.apache.calcite.rel.RelFieldCollation;
import org.apache.calcite.rel.RelFieldCollation.Direction;
import org.apache.calcite.rel.RelNode;
import org.apache.calcite.rel.RelWriter;
import org.apache.calcite.rel.core.Aggregate;
import org.apache.calcite.rel.core.AggregateCall;
import org.apache.calcite.rel.core.Filter;
import org.apache.calcite.rel.core.Project;
import org.apache.calcite.rel.core.Sort;
import org.apache.calcite.rel.core.TableScan;
import org.apache.calcite.rel.metadata.RelMdUtil;
import org.apache.calcite.rel.metadata.RelMetadataQuery;
import org.apache.calcite.rel.type.RelDataType;
import org.apache.calcite.rel.type.RelDataTypeField;
import org.apache.calcite.rex.RexCall;
import org.apache.calcite.rex.RexInputRef;
import org.apache.calcite.rex.RexLiteral;
import org.apache.calcite.rex.RexNode;
import org.apache.calcite.runtime.Hook;
import org.apache.calcite.schema.ScannableTable;
import org.apache.calcite.sql.SqlKind;
import org.apache.calcite.sql.SqlOperator;
import org.apache.calcite.sql.fun.SqlStdOperatorTable;
import org.apache.calcite.sql.type.SqlTypeFamily;
import org.apache.calcite.sql.type.SqlTypeName;
import org.apache.calcite.sql.validate.SqlValidatorUtil;
import org.apache.calcite.util.ImmutableBitSet;
import org.apache.calcite.util.Litmus;
import org.apache.calcite.util.Pair;
import org.apache.calcite.util.Util;
import com.fasterxml.jackson.core.JsonFactory;
import com.fasterxml.jackson.core.JsonGenerator;
import com.google.common.base.Function;
import com.google.common.base.Preconditions;
import com.google.common.base.Predicate;
import com.google.common.base.Strings;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import org.joda.time.Interval;
import java.io.IOException;
import java.io.StringWriter;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.TimeZone;
import java.util.regex.Pattern;
import javax.annotation.Nullable;
/**
* Relational expression representing a scan of a Druid data set.
*/
public class DruidQuery extends AbstractRelNode implements BindableRel {
/**
* Provides a standard list of supported Calcite operators that can be converted to
* Druid Expressions. This can be used as is or re-adapted based on underline
* engine operator syntax.
*/
public static final List DEFAULT_OPERATORS_LIST =
ImmutableList.builder()
.add(new DirectOperatorConversion(SqlStdOperatorTable.EXP, "exp"))
.add(new DirectOperatorConversion(SqlStdOperatorTable.CONCAT, "concat"))
.add(new DirectOperatorConversion(SqlStdOperatorTable.DIVIDE_INTEGER, "div"))
.add(new DirectOperatorConversion(SqlStdOperatorTable.LIKE, "like"))
.add(new DirectOperatorConversion(SqlStdOperatorTable.LN, "log"))
.add(new DirectOperatorConversion(SqlStdOperatorTable.SQRT, "sqrt"))
.add(new DirectOperatorConversion(SqlStdOperatorTable.LOWER, "lower"))
.add(new DirectOperatorConversion(SqlStdOperatorTable.LOG10, "log10"))
.add(new DirectOperatorConversion(SqlStdOperatorTable.REPLACE, "replace"))
.add(new DirectOperatorConversion(SqlStdOperatorTable.UPPER, "upper"))
.add(new DirectOperatorConversion(SqlStdOperatorTable.POWER, "pow"))
.add(new DirectOperatorConversion(SqlStdOperatorTable.ABS, "abs"))
.add(new DirectOperatorConversion(SqlStdOperatorTable.SIN, "sin"))
.add(new DirectOperatorConversion(SqlStdOperatorTable.COS, "cos"))
.add(new DirectOperatorConversion(SqlStdOperatorTable.TAN, "tan"))
.add(new DirectOperatorConversion(SqlStdOperatorTable.CASE, "case_searched"))
.add(new DirectOperatorConversion(SqlStdOperatorTable.CHAR_LENGTH, "strlen"))
.add(new DirectOperatorConversion(SqlStdOperatorTable.CHARACTER_LENGTH, "strlen"))
.add(new BinaryOperatorConversion(SqlStdOperatorTable.EQUALS, "=="))
.add(new BinaryOperatorConversion(SqlStdOperatorTable.NOT_EQUALS, "!="))
.add(new NaryOperatorConverter(SqlStdOperatorTable.OR, "||"))
.add(new NaryOperatorConverter(SqlStdOperatorTable.AND, "&&"))
.add(new BinaryOperatorConversion(SqlStdOperatorTable.LESS_THAN, "<"))
.add(new BinaryOperatorConversion(SqlStdOperatorTable.LESS_THAN_OR_EQUAL, "<="))
.add(new BinaryOperatorConversion(SqlStdOperatorTable.GREATER_THAN, ">"))
.add(new BinaryOperatorConversion(SqlStdOperatorTable.GREATER_THAN_OR_EQUAL, ">="))
.add(new BinaryOperatorConversion(SqlStdOperatorTable.PLUS, "+"))
.add(new BinaryOperatorConversion(SqlStdOperatorTable.MINUS, "-"))
.add(new BinaryOperatorConversion(SqlStdOperatorTable.MULTIPLY, "*"))
.add(new BinaryOperatorConversion(SqlStdOperatorTable.DIVIDE, "/"))
.add(new BinaryOperatorConversion(SqlStdOperatorTable.MOD, "%"))
.add(new DruidSqlCastConverter())
.add(new ExtractOperatorConversion())
.add(new UnaryPrefixOperatorConversion(SqlStdOperatorTable.NOT, "!"))
.add(new UnaryPrefixOperatorConversion(SqlStdOperatorTable.UNARY_MINUS, "-"))
.add(new UnarySuffixOperatorConversion(SqlStdOperatorTable.IS_FALSE, "<= 0"))
.add(new UnarySuffixOperatorConversion(SqlStdOperatorTable.IS_NOT_TRUE, "<= 0"))
.add(new UnarySuffixOperatorConversion(SqlStdOperatorTable.IS_TRUE, "> 0"))
.add(new UnarySuffixOperatorConversion(SqlStdOperatorTable.IS_NOT_FALSE, "> 0"))
.add(new UnarySuffixOperatorConversion(SqlStdOperatorTable.IS_NULL, "== null"))
.add(new UnarySuffixOperatorConversion(SqlStdOperatorTable.IS_NOT_NULL, "!= null"))
.add(new FloorOperatorConversion())
.add(new CeilOperatorConversion())
.add(new SubstringOperatorConversion())
.build();
protected QuerySpec querySpec;
final RelOptTable table;
final DruidTable druidTable;
final ImmutableList intervals;
final ImmutableList rels;
/**
* This operator map provides DruidSqlOperatorConverter instance to convert a Calcite RexNode to
* Druid Expression when possible.
*/
final Map converterOperatorMap;
private static final Pattern VALID_SIG = Pattern.compile("sf?p?(a?|ah|ah?o)l?");
private static final String EXTRACT_COLUMN_NAME_PREFIX = "extract";
private static final String FLOOR_COLUMN_NAME_PREFIX = "floor";
protected static final String DRUID_QUERY_FETCH = "druid.query.fetch";
private static final int DAYS_IN_TEN_YEARS = 10 * 365;
/**
* Creates a DruidQuery.
*
* @param cluster Cluster
* @param traitSet Traits
* @param table Table
* @param druidTable Druid table
* @param intervals Intervals for the query
* @param rels Internal relational expressions
* @param converterOperatorMap mapping of Calcite Sql Operator to Druid Expression API.
*/
protected DruidQuery(RelOptCluster cluster, RelTraitSet traitSet,
RelOptTable table, DruidTable druidTable,
List intervals, List rels,
Map converterOperatorMap) {
super(cluster, traitSet);
this.table = table;
this.druidTable = druidTable;
this.intervals = ImmutableList.copyOf(intervals);
this.rels = ImmutableList.copyOf(rels);
this.converterOperatorMap = Preconditions.checkNotNull(converterOperatorMap, "Operator map "
+ "can not be null");
assert isValid(Litmus.THROW, null);
}
/** Returns whether a signature represents an sequence of relational operators
* that can be translated into a valid Druid query. */
static boolean isValidSignature(String signature) {
return VALID_SIG.matcher(signature).matches();
}
/** Creates a DruidQuery. */
public static DruidQuery create(RelOptCluster cluster, RelTraitSet traitSet,
RelOptTable table, DruidTable druidTable, List rels) {
final ImmutableMap.Builder mapBuilder = ImmutableMap
.builder();
for (DruidSqlOperatorConverter converter : DEFAULT_OPERATORS_LIST) {
mapBuilder.put(converter.calciteOperator(), converter);
}
return create(cluster, traitSet, table, druidTable, druidTable.intervals, rels,
mapBuilder.build());
}
/** Creates a DruidQuery. */
public static DruidQuery create(RelOptCluster cluster, RelTraitSet traitSet,
RelOptTable table, DruidTable druidTable, List rels,
Map converterOperatorMap) {
return create(cluster, traitSet, table, druidTable, druidTable.intervals, rels,
converterOperatorMap);
}
/**
* Creates a DruidQuery.
*/
private static DruidQuery create(RelOptCluster cluster, RelTraitSet traitSet,
RelOptTable table, DruidTable druidTable, List intervals,
List rels, Map converterOperatorMap) {
return new DruidQuery(cluster, traitSet, table, druidTable, intervals, rels,
converterOperatorMap);
}
/** Extends a DruidQuery. */
public static DruidQuery extendQuery(DruidQuery query, RelNode r) {
final ImmutableList.Builder builder = ImmutableList.builder();
return DruidQuery.create(query.getCluster(), r.getTraitSet().replace(query.getConvention()),
query.getTable(), query.druidTable, query.intervals,
builder.addAll(query.rels).add(r).build(), query.getOperatorConversionMap());
}
/** Extends a DruidQuery. */
public static DruidQuery extendQuery(DruidQuery query,
List intervals) {
return DruidQuery.create(query.getCluster(), query.getTraitSet(), query.getTable(),
query.druidTable, intervals, query.rels, query.getOperatorConversionMap());
}
/**
* @param rexNode leaf Input Ref to Druid Column
* @param rowType row type
* @param druidQuery druid query
*
* @return {@link Pair} of Column name and Extraction Function on the top of the input ref or
* {@link Pair of(null, null)} when can not translate to valid Druid column
*/
protected static Pair toDruidColumn(RexNode rexNode,
RelDataType rowType, DruidQuery druidQuery) {
final String columnName;
final ExtractionFunction extractionFunction;
final Granularity granularity;
switch (rexNode.getKind()) {
case INPUT_REF:
columnName = extractColumnName(rexNode, rowType, druidQuery);
//@TODO we can remove this ugly check by treating druid time columns as LONG
if (rexNode.getType().getFamily() == SqlTypeFamily.DATE
|| rexNode.getType().getFamily() == SqlTypeFamily.TIMESTAMP) {
extractionFunction = TimeExtractionFunction
.createDefault(druidQuery.getConnectionConfig().timeZone());
} else {
extractionFunction = null;
}
break;
case EXTRACT:
granularity = DruidDateTimeUtils
.extractGranularity(rexNode, druidQuery.getConnectionConfig().timeZone());
if (granularity == null) {
// unknown Granularity
return Pair.of(null, null);
}
if (!TimeExtractionFunction.isValidTimeExtract((RexCall) rexNode)) {
return Pair.of(null, null);
}
extractionFunction =
TimeExtractionFunction.createExtractFromGranularity(granularity,
druidQuery.getConnectionConfig().timeZone());
columnName =
extractColumnName(((RexCall) rexNode).getOperands().get(1), rowType, druidQuery);
break;
case FLOOR:
granularity = DruidDateTimeUtils
.extractGranularity(rexNode, druidQuery.getConnectionConfig().timeZone());
if (granularity == null) {
// unknown Granularity
return Pair.of(null, null);
}
if (!TimeExtractionFunction.isValidTimeFloor((RexCall) rexNode)) {
return Pair.of(null, null);
}
extractionFunction =
TimeExtractionFunction
.createFloorFromGranularity(granularity, druidQuery.getConnectionConfig().timeZone());
columnName =
extractColumnName(((RexCall) rexNode).getOperands().get(0), rowType, druidQuery);
break;
case CAST:
// CASE we have a cast over InputRef. Check that cast is valid
if (!isValidLeafCast(rexNode)) {
return Pair.of(null, null);
}
columnName =
extractColumnName(((RexCall) rexNode).getOperands().get(0), rowType, druidQuery);
// CASE CAST to TIME/DATE need to make sure that we have valid extraction fn
final SqlTypeName toTypeName = rexNode.getType().getSqlTypeName();
if (toTypeName.getFamily() == SqlTypeFamily.TIMESTAMP
|| toTypeName.getFamily() == SqlTypeFamily.DATETIME) {
extractionFunction = TimeExtractionFunction.translateCastToTimeExtract(rexNode,
TimeZone.getTimeZone(druidQuery.getConnectionConfig().timeZone()));
if (extractionFunction == null) {
// no extraction Function means cast is not valid thus bail out
return Pair.of(null, null);
}
} else {
extractionFunction = null;
}
break;
default:
return Pair.of(null, null);
}
return Pair.of(columnName, extractionFunction);
}
/**
* @param rexNode rexNode
*
* @return true if the operand is an inputRef and it is a valid Druid Cast operation
*/
private static boolean isValidLeafCast(RexNode rexNode) {
assert rexNode.isA(SqlKind.CAST);
final RexNode input = ((RexCall) rexNode).getOperands().get(0);
if (!input.isA(SqlKind.INPUT_REF)) {
// it is not a leaf cast don't bother going further.
return false;
}
final SqlTypeName toTypeName = rexNode.getType().getSqlTypeName();
if (toTypeName.getFamily() == SqlTypeFamily.CHARACTER) {
// CAST of input to character type
return true;
}
if (toTypeName.getFamily() == SqlTypeFamily.NUMERIC) {
// CAST of input to numeric type, it is part of a bounded comparison
return true;
}
if (toTypeName.getFamily() == SqlTypeFamily.TIMESTAMP
|| toTypeName.getFamily() == SqlTypeFamily.DATETIME) {
// CAST of literal to timestamp type
return true;
}
if (toTypeName.getFamily().contains(input.getType())) {
// same type it is okay to push it
return true;
}
// Currently other CAST operations cannot be pushed to Druid
return false;
}
/**
* @param rexNode Druid input ref node
* @param rowType rowType
* @param query Druid Query
*
* @return Druid column name or null when not possible to translate.
*/
@Nullable
protected static String extractColumnName(RexNode rexNode, RelDataType rowType,
DruidQuery query) {
if (rexNode.getKind() == SqlKind.INPUT_REF) {
final RexInputRef ref = (RexInputRef) rexNode;
final String columnName = rowType.getFieldNames().get(ref.getIndex());
if (columnName == null) {
return null;
}
// calcite has this un-direct renaming of timestampFieldName to native druid `__time`
if (query.getDruidTable().timestampFieldName.equals(columnName)) {
return DruidTable.DEFAULT_TIMESTAMP_COLUMN;
}
return columnName;
}
return null;
}
/**
* Equivalent of String.format(Locale.ENGLISH, message, formatArgs).
*/
public static String format(String message, Object... formatArgs) {
return String.format(Locale.ENGLISH, message, formatArgs);
}
/** Returns a string describing the operations inside this query.
*
*
For example, "sfpahol" means {@link TableScan} (s)
* followed by {@link Filter} (f)
* followed by {@link Project} (p)
* followed by {@link Aggregate} (a)
* followed by {@link Filter} (h)
* followed by {@link Project} (o)
* followed by {@link Sort} (l).
*
* @see #isValidSignature(String)
*/
String signature() {
final StringBuilder b = new StringBuilder();
boolean flag = false;
for (RelNode rel : rels) {
b.append(rel instanceof TableScan ? 's'
: (rel instanceof Project && flag) ? 'o'
: (rel instanceof Filter && flag) ? 'h'
: rel instanceof Aggregate ? 'a'
: rel instanceof Filter ? 'f'
: rel instanceof Sort ? 'l'
: rel instanceof Project ? 'p'
: '!');
flag = flag || rel instanceof Aggregate;
}
return b.toString();
}
@Override public boolean isValid(Litmus litmus, Context context) {
if (!super.isValid(litmus, context)) {
return false;
}
final String signature = signature();
if (!isValidSignature(signature)) {
return litmus.fail("invalid signature [{}]", signature);
}
if (rels.isEmpty()) {
return litmus.fail("must have at least one rel");
}
for (int i = 0; i < rels.size(); i++) {
final RelNode r = rels.get(i);
if (i == 0) {
if (!(r instanceof TableScan)) {
return litmus.fail("first rel must be TableScan, was ", r);
}
if (r.getTable() != table) {
return litmus.fail("first rel must be based on table table");
}
} else {
final List inputs = r.getInputs();
if (inputs.size() != 1 || inputs.get(0) != rels.get(i - 1)) {
return litmus.fail("each rel must have a single input");
}
if (r instanceof Aggregate) {
final Aggregate aggregate = (Aggregate) r;
if (aggregate.getGroupSets().size() != 1
|| aggregate.indicator) {
return litmus.fail("no grouping sets");
}
}
if (r instanceof Filter) {
final Filter filter = (Filter) r;
final DruidJsonFilter druidJsonFilter = DruidJsonFilter
.toDruidFilters(filter.getCondition(), filter.getInput().getRowType(), this);
if (druidJsonFilter == null) {
return litmus.fail("invalid filter [{}]", filter.getCondition());
}
}
if (r instanceof Sort) {
final Sort sort = (Sort) r;
if (sort.offset != null && RexLiteral.intValue(sort.offset) != 0) {
return litmus.fail("offset not supported");
}
}
}
}
return true;
}
protected Map getOperatorConversionMap() {
return converterOperatorMap;
}
@Override public RelNode copy(RelTraitSet traitSet, List inputs) {
assert inputs.isEmpty();
return this;
}
@Override public RelDataType deriveRowType() {
return getCluster().getTypeFactory().createStructType(
Pair.right(Util.last(rels).getRowType().getFieldList()),
getQuerySpec().fieldNames);
}
public TableScan getTableScan() {
return (TableScan) rels.get(0);
}
public RelNode getTopNode() {
return Util.last(rels);
}
@Override public RelOptTable getTable() {
return table;
}
public DruidTable getDruidTable() {
return druidTable;
}
@Override public RelWriter explainTerms(RelWriter pw) {
for (RelNode rel : rels) {
if (rel instanceof TableScan) {
TableScan tableScan = (TableScan) rel;
pw.item("table", tableScan.getTable().getQualifiedName());
pw.item("intervals", intervals);
} else if (rel instanceof Filter) {
pw.item("filter", ((Filter) rel).getCondition());
} else if (rel instanceof Project) {
if (((Project) rel).getInput() instanceof Aggregate) {
pw.item("post_projects", ((Project) rel).getProjects());
} else {
pw.item("projects", ((Project) rel).getProjects());
}
} else if (rel instanceof Aggregate) {
final Aggregate aggregate = (Aggregate) rel;
pw.item("groups", aggregate.getGroupSet())
.item("aggs", aggregate.getAggCallList());
} else if (rel instanceof Sort) {
final Sort sort = (Sort) rel;
for (Ord ord
: Ord.zip(sort.collation.getFieldCollations())) {
pw.item("sort" + ord.i, ord.e.getFieldIndex());
}
for (Ord ord
: Ord.zip(sort.collation.getFieldCollations())) {
pw.item("dir" + ord.i, ord.e.shortString());
}
pw.itemIf("fetch", sort.fetch, sort.fetch != null);
} else {
throw new AssertionError("rel type not supported in Druid query "
+ rel);
}
}
return pw;
}
@Override public RelOptCost computeSelfCost(RelOptPlanner planner,
RelMetadataQuery mq) {
return Util.last(rels)
.computeSelfCost(planner, mq)
// Cost increases with the number of fields queried.
// A plan returning 100 or more columns will have 2x the cost of a
// plan returning 2 columns.
// A plan where all extra columns are pruned will be preferred.
.multiplyBy(
RelMdUtil.linear(querySpec.fieldNames.size(), 2, 100, 1d, 2d))
.multiplyBy(getQueryTypeCostMultiplier())
// A Scan leaf filter is better than having filter spec if possible.
.multiplyBy(rels.size() > 1 && rels.get(1) instanceof Filter ? 0.5 : 1.0)
// a plan with sort pushed to druid is better than doing sort outside of druid
.multiplyBy(Util.last(rels) instanceof Sort ? 0.1 : 1.0)
.multiplyBy(getIntervalCostMultiplier());
}
private double getIntervalCostMultiplier() {
int days = 0;
for (Interval interval : intervals) {
days += interval.toDuration().getStandardDays();
}
// Cost increases with the wider interval being queries.
// A plan querying 10 or more years of data will have 10x the cost of a
// plan returning 1 day data.
// A plan where least interval is queries will be preferred.
return RelMdUtil.linear(days, 1, DAYS_IN_TEN_YEARS, 0.1d, 1d);
}
private double getQueryTypeCostMultiplier() {
// Cost of Select > GroupBy > Timeseries > TopN
switch (querySpec.queryType) {
case SELECT:
return .1;
case GROUP_BY:
return .08;
case TIMESERIES:
return .06;
case TOP_N:
return .04;
default:
return .2;
}
}
@Override public void register(RelOptPlanner planner) {
for (RelOptRule rule : DruidRules.RULES) {
planner.addRule(rule);
}
for (RelOptRule rule : Bindables.RULES) {
planner.addRule(rule);
}
}
@Override public Class