All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.cdap.plugin.gcp.bigquery.sqlengine.builder.BigQueryJoinSQLBuilder Maven / Gradle / Ivy

/*
 * Copyright © 2021 Cask Data, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */

package io.cdap.plugin.gcp.bigquery.sqlengine.builder;

import com.google.cloud.bigquery.DatasetId;
import com.google.common.annotations.VisibleForTesting;
import io.cdap.cdap.etl.api.engine.sql.SQLEngineException;
import io.cdap.cdap.etl.api.join.JoinCondition;
import io.cdap.cdap.etl.api.join.JoinDefinition;
import io.cdap.cdap.etl.api.join.JoinField;
import io.cdap.cdap.etl.api.join.JoinKey;
import io.cdap.cdap.etl.api.join.JoinStage;

import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;

/**
 * Helper class used to generate BigQuery SQL Statements for Joins.
 * 

* Many methods in this class have proteced visibility for the purposes of testing. */ public class BigQueryJoinSQLBuilder extends BigQueryBaseSQLBuilder { private final JoinDefinition joinDefinition; private final StringBuilder builder; private final DatasetId dataset; private final Map stageToBQTableNameMap; private final Map stageToFullTableNameMap; private final Map stageToTableAliasMap; public BigQueryJoinSQLBuilder(JoinDefinition joinDefinition, DatasetId dataset, Map stageToBQTableNameMap) { this(joinDefinition, dataset, stageToBQTableNameMap, new HashMap<>(), new HashMap<>(), new StringBuilder()); } @VisibleForTesting protected BigQueryJoinSQLBuilder(JoinDefinition joinDefinition, DatasetId dataset, Map stageToBQTableNameMap, Map stageToFullTableNameMap, Map stageToTableAliasMap, StringBuilder builder) { this.joinDefinition = joinDefinition; this.builder = builder; this.dataset = dataset; this.stageToBQTableNameMap = stageToBQTableNameMap; this.stageToFullTableNameMap = stageToFullTableNameMap; this.stageToTableAliasMap = stageToTableAliasMap; } @Override public String getQuery() { if (joinDefinition.getCondition().getOp() == JoinCondition.Op.KEY_EQUALITY) { return getFieldEqualityQuery(); } else { return getExpressionQuery(); } } private String getFieldEqualityQuery() { // Build aliases for all tables. addTableNamesAndAliasesForJoinDefinition(); appendSelectStatement(); appendFieldEqualityClause(); return builder.toString(); } private String getExpressionQuery() { JoinCondition.OnExpression onExpression = (JoinCondition.OnExpression) joinDefinition.getCondition(); // Build aliases for all tables. addTableNamesAndAliasesForJoinDefinition(onExpression.getDatasetAliases()); appendSelectStatement(); appendOnExpressionClause(); return builder.toString(); } /** * Appends Select columns and From statement *

* SELECT FROM ... */ private void appendSelectStatement() { builder.append(SELECT).append(getSelectedFields()).append(FROM); } @VisibleForTesting protected String getSelectedFields() { return joinDefinition.getSelectedFields().stream() .map(this::buildSelectedField) .collect(Collectors.joining(COMMA)); } @VisibleForTesting protected String buildSelectedField(JoinField joinField) { StringBuilder builder = new StringBuilder(); builder.append(getTableAlias(joinField.getStageName())); builder.append(DOT); builder.append(joinField.getFieldName()); if (joinField.getAlias() != null) { builder.append(AS); builder.append(quoteAlias(joinField.getAlias())); } return builder.toString(); } private void appendFieldEqualityClause() { List stages = joinDefinition.getStages(); Map stageNameToJoinKeyMap = new HashMap<>(); for (JoinKey joinKey : ((JoinCondition.OnKeys) joinDefinition.getCondition()).getKeys()) { stageNameToJoinKeyMap.put(joinKey.getStageName(), joinKey); } boolean joinOnNullKeys = ((JoinCondition.OnKeys) joinDefinition.getCondition()).isNullSafe(); appendFieldEqualityClause(stages, stageNameToJoinKeyMap, joinOnNullKeys); } @VisibleForTesting protected void appendFieldEqualityClause(List stages, Map stageNameToJoinKeyMap, boolean joinOnNullKeys) { JoinStage prev = null; for (JoinStage curr : stages) { if (prev == null) { appendFullTableNameAndAlias(curr.getStageName()); } else { builder.append(SPACE); appendJoinOnKeyOperation(prev, curr, stageNameToJoinKeyMap, joinOnNullKeys); } prev = curr; } } private void appendOnExpressionClause() { JoinCondition.OnExpression onExpression = (JoinCondition.OnExpression) joinDefinition.getCondition(); JoinStage left = joinDefinition.getStages().get(0); JoinStage right = joinDefinition.getStages().get(1); // Append Join Statement for these 2 stages // ... JOIN ON ... appendFullTableNameAndAlias(left.getStageName()); builder.append(SPACE); appendJoinStatement(left, right); builder.append(onExpression.getExpression()); } /** * Appends join on key operation * * @param left left stage in the join * @param right right stage in the join * @param stageNameToJoinKeyMap Map containing all stage names and join keys * @param joinOnNullKeys whether the join should include null kets */ @VisibleForTesting protected void appendJoinOnKeyOperation(JoinStage left, JoinStage right, Map stageNameToJoinKeyMap, boolean joinOnNullKeys) { // Append Join Statement for these 2 stages appendJoinStatement(left, right); String leftAlias = getTableAlias(left.getStageName()); String rightAlias = getTableAlias(right.getStageName()); JoinKey leftKey = stageNameToJoinKeyMap.get(left.getStageName()); JoinKey rightKey = stageNameToJoinKeyMap.get(right.getStageName()); // Append Join on key conditions appendJoinOnKeyClause(leftAlias, leftKey, rightAlias, rightKey, joinOnNullKeys); } /** * Appends join statement between 2 tables *

* ... [JOIN_TYPE] JOIN right ON ... * * @param left left stage in the join * @param right right stage in the join */ private void appendJoinStatement(JoinStage left, JoinStage right) { String joinType; if (left.isRequired() && right.isRequired()) { joinType = "INNER"; } else if (left.isRequired() && !right.isRequired()) { joinType = "LEFT OUTER"; } else if (!left.isRequired() && right.isRequired()) { joinType = "RIGHT OUTER"; } else { joinType = "FULL OUTER"; } // ... JOIN ON ... builder.append(joinType); builder.append(JOIN); appendFullTableNameAndAlias(right.getStageName()); builder.append(ON); } /** * Appends a table name and alias for a given stage. *

* ...`project.dataset.bqtable` AS `somealias`... * * @param stageName name of the stage to use. */ private void appendFullTableNameAndAlias(String stageName) { builder.append(getFullTableName(stageName)).append(AS).append(getTableAlias(stageName)); } /** * Append join on key statement *

* When not joining on null keys, the result is: * ... left.l1 = right.r1 [AND left.l2 = right.r2 ...] *

* When joining on null keys, the result is: * ... (left.l1 = right.r1 OR (left.l1 IS NULL AND right.r1 IS NULL)) * [AND (left.l2 = right.r2 OR (left.l2 IS NULL AND right.r2 IS NULL)) ...] * * @param leftAlias * @param leftKey * @param rightAlias * @param rightKey * @param joinOnNullKeys */ @VisibleForTesting protected void appendJoinOnKeyClause(String leftAlias, JoinKey leftKey, String rightAlias, JoinKey rightKey, boolean joinOnNullKeys) { // ... ON [left.l1 = right.r1] appendEquals(leftAlias, leftKey.getFields().get(0), rightAlias, rightKey.getFields().get(0), joinOnNullKeys); for (int i = 1; i < leftKey.getFields().size(); i++) { // ... [AND left.rN = right.rN] builder.append(AND); appendEquals(leftAlias, leftKey.getFields().get(i), rightAlias, rightKey.getFields().get(i), joinOnNullKeys); } } /** * Appends Equality clause *

* When not joining on null keys, the result is: * ... left.l1 = right.r1 ... *

* When joining on null keys, the result is: * ... (left.l1 = right.r1 OR (left.l1 IS NULL AND right.r1 IS NULL)) ... * * @param leftTable Alias for the left table * @param leftField Alias for the left field * @param rightTable Alias for the right table * @param rightField Alias for the right field * @param joinOnNullKeys if null kets should be included */ @VisibleForTesting protected void appendEquals(String leftTable, String leftField, String rightTable, String rightField, boolean joinOnNullKeys) { if (joinOnNullKeys) { builder.append(OPEN_GROUP); } // ...table1.column1 = table2.column2... builder.append(leftTable).append(DOT).append(leftField); builder.append(EQ); builder.append(rightTable).append(DOT).append(rightField); if (joinOnNullKeys) { // ... OR (table1.column1 IS NULL AND table2.column2 IS NULL))... builder.append(OR).append(OPEN_GROUP); builder.append(leftTable).append(DOT).append(leftField).append(IS_NULL); builder.append(AND); builder.append(rightTable).append(DOT).append(rightField).append(IS_NULL); builder.append(CLOSE_GROUP).append(CLOSE_GROUP); } } @VisibleForTesting protected String getBQTableName(String stageName) { String result = stageToBQTableNameMap.get(stageName); if (result == null) { throw new SQLEngineException(String.format("Unable to determine BQ table name for stage '%s'", stageName)); } return result; } private void addTableNamesAndAliasesForJoinDefinition() { addTableNamesAndAliasesForJoinDefinition(Collections.emptyMap()); } private void addTableNamesAndAliasesForJoinDefinition(Map aliasOverrides) { for (JoinStage stage : joinDefinition.getStages()) { String stageName = stage.getStageName(); if (!stageToFullTableNameMap.containsKey(stageName)) { addFullTableName(stageName); } if (!stageToTableAliasMap.containsKey(stageName)) { addTableAlias(stageName, aliasOverrides.getOrDefault(stageName, stageName)); } } } /** * Aad the full table name for this table * * @param stageName */ @VisibleForTesting protected void addFullTableName(String stageName) { String bqTableName = getBQTableName(stageName); stageToFullTableNameMap.put(stageName, String.format("`%s.%s.%s`", dataset.getProject(), dataset.getDataset(), bqTableName)); } private void addTableAlias(String stageName, String alias) { stageToTableAliasMap.put(stageName, quoteAlias(alias)); } /** * Add backticks to quote aliases for a given table. *

* This ensures aliases are escaped and support full unicode characters. * * @param alias alias to escape * @return quoted alias */ @VisibleForTesting protected static String quoteAlias(String alias) { return String.format("`%s`", alias); } /** * Get fully qualified (quoted) table name for a supplied stage. */ @VisibleForTesting protected String getFullTableName(String stageName) { String fullTableName = stageToFullTableNameMap.get(stageName); if (fullTableName == null) { throw new SQLEngineException(String.format("Unable to determine full table name for stage '%s'", stageName)); } return fullTableName; } /** * Get table alias for a supplied stage. */ private String getTableAlias(String stageName) { String tableAlias = stageToTableAliasMap.get(stageName); if (tableAlias == null) { throw new SQLEngineException(String.format("Unable to determine table alias for stage '%s'", stageName)); } return tableAlias; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy