All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.paimon.flink.action.MergeIntoAction Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.paimon.flink.action;

import org.apache.paimon.CoreOptions;
import org.apache.paimon.flink.LogicalTypeConversion;
import org.apache.paimon.table.FileStoreTable;
import org.apache.paimon.types.DataField;
import org.apache.paimon.types.DataType;

import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.data.GenericRowData;
import org.apache.flink.table.data.RowData;
import org.apache.flink.table.data.conversion.DataStructureConverter;
import org.apache.flink.table.data.conversion.DataStructureConverters;
import org.apache.flink.table.types.utils.TypeConversions;
import org.apache.flink.types.RowKind;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.annotation.Nullable;

import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.stream.Collectors;
import java.util.stream.Stream;

import static org.apache.paimon.CoreOptions.MergeEngine.DEDUPLICATE;
import static org.apache.paimon.CoreOptions.MergeEngine.PARTIAL_UPDATE;
import static org.apache.paimon.utils.ParameterUtils.parseCommaSeparatedKeyValues;

/**
 * Flink action for 'MERGE INTO', which references the syntax as follows (we use 'upsert' semantics
 * instead of 'update'):
 *
 * 

 *  MERGE INTO target-table
 *  USING source-table | source-expr AS source-alias
 *  ON merge-condition
 *  WHEN MATCHED [AND matched-condition]
 *    THEN UPDATE SET xxx
 *  WHEN MATCHED [AND matched-condition]
 *    THEN DELETE
 *  WHEN NOT MATCHED [AND not-matched-condition]
 *    THEN INSERT VALUES (xxx)
 *  WHEN NOT MATCHED BY SOURCE [AND not-matched-by-source-condition]
 *    THEN UPDATE SET xxx
 *  WHEN NOT MATCHED BY SOURCE [AND not-matched-by-source-condition]
 *    THEN DELETE
 * 
* *

It builds a query to find the rows to be changed. INNER JOIN with merge-condition is used to * find MATCHED rows, and NOT EXISTS with merge-condition is used to find NOT MATCHED rows, then the * condition of each action is used to filter the rows. */ public class MergeIntoAction extends TableActionBase { private static final Logger LOG = LoggerFactory.getLogger(MergeIntoAction.class); // primary keys of target table private final List primaryKeys; // converters for Row to RowData private final List> converters; // field names of target table private final List targetFieldNames; // target table @Nullable private String targetAlias; // source table private String sourceTable; // sqls to config environment and create source table @Nullable private String[] sourceSqls; // merge condition private String mergeCondition; // actions to be taken private boolean matchedUpsert; private boolean notMatchedUpsert; private boolean matchedDelete; private boolean notMatchedDelete; private boolean insert; // upsert @Nullable String matchedUpsertCondition; @Nullable private String matchedUpsertSet; @Nullable String notMatchedBySourceUpsertCondition; @Nullable String notMatchedBySourceUpsertSet; // delete @Nullable String matchedDeleteCondition; @Nullable String notMatchedBySourceDeleteCondition; // insert @Nullable private String notMatchedInsertCondition; @Nullable private String notMatchedInsertValues; public MergeIntoAction(String warehouse, String database, String tableName) { this(warehouse, database, tableName, Collections.emptyMap()); } public MergeIntoAction( String warehouse, String database, String tableName, Map catalogConfig) { super(warehouse, database, tableName, catalogConfig); if (!(table instanceof FileStoreTable)) { throw new UnsupportedOperationException( String.format( "Only FileStoreTable supports merge-into action. The table type is '%s'.", table.getClass().getName())); } // init primaryKeys of target table primaryKeys = ((FileStoreTable) table).schema().primaryKeys(); if (primaryKeys.isEmpty()) { throw new UnsupportedOperationException( "merge-into action doesn't support table with no primary keys defined."); } // init DataStructureConverters converters = table.rowType().getFieldTypes().stream() .map(LogicalTypeConversion::toLogicalType) .map(TypeConversions::fromLogicalToDataType) .map(DataStructureConverters::getConverter) .collect(Collectors.toList()); // init field names of target table targetFieldNames = table.rowType().getFields().stream() .map(DataField::name) .collect(Collectors.toList()); } public MergeIntoAction withTargetAlias(String targetAlias) { this.targetAlias = targetAlias; return this; } public MergeIntoAction withSourceTable(String sourceTable) { this.sourceTable = sourceTable; return this; } public MergeIntoAction withSourceSqls(String... sourceSqls) { this.sourceSqls = sourceSqls; return this; } public MergeIntoAction withMergeCondition(String mergeCondition) { this.mergeCondition = mergeCondition; return this; } public MergeIntoAction withMatchedUpsert( @Nullable String matchedUpsertCondition, String matchedUpsertSet) { this.matchedUpsert = true; this.matchedUpsertCondition = matchedUpsertCondition; this.matchedUpsertSet = matchedUpsertSet; return this; } public MergeIntoAction withNotMatchedBySourceUpsert( @Nullable String notMatchedBySourceUpsertCondition, String notMatchedBySourceUpsertSet) { this.notMatchedUpsert = true; this.notMatchedBySourceUpsertCondition = notMatchedBySourceUpsertCondition; this.notMatchedBySourceUpsertSet = notMatchedBySourceUpsertSet; return this; } public MergeIntoAction withMatchedDelete(@Nullable String matchedDeleteCondition) { this.matchedDelete = true; this.matchedDeleteCondition = matchedDeleteCondition; return this; } public MergeIntoAction withNotMatchedBySourceDelete( @Nullable String notMatchedBySourceDeleteCondition) { this.notMatchedDelete = true; this.notMatchedBySourceDeleteCondition = notMatchedBySourceDeleteCondition; return this; } public MergeIntoAction withNotMatchedInsert( @Nullable String notMatchedInsertCondition, String notMatchedInsertValues) { this.insert = true; this.notMatchedInsertCondition = notMatchedInsertCondition; this.notMatchedInsertValues = notMatchedInsertValues; return this; } public void validate() { if (!matchedUpsert && !notMatchedUpsert && !matchedDelete && !notMatchedDelete && !insert) { throw new IllegalArgumentException( "Must specify at least one merge action. Run 'merge_into --help' for help."); } CoreOptions.MergeEngine mergeEngine = CoreOptions.fromMap(table.options()).mergeEngine(); boolean supportMergeInto = mergeEngine == DEDUPLICATE || mergeEngine == PARTIAL_UPDATE; if (!supportMergeInto) { throw new UnsupportedOperationException( String.format("Merge engine %s can not support merge-into.", mergeEngine)); } if ((matchedUpsert && matchedDelete) && (matchedUpsertCondition == null || matchedDeleteCondition == null)) { throw new IllegalArgumentException( "If both matched-upsert and matched-delete actions are present, their conditions must both be present too."); } if ((notMatchedUpsert && notMatchedDelete) && (notMatchedBySourceUpsertCondition == null || notMatchedBySourceDeleteCondition == null)) { throw new IllegalArgumentException( "If both not-matched-by-source-upsert and not-matched-by--source-delete actions are present, " + "their conditions must both be present too.\n"); } if (notMatchedBySourceUpsertSet != null && notMatchedBySourceUpsertSet.equals("*")) { throw new IllegalArgumentException( "The '*' cannot be used in not_matched_by_source_upsert_set"); } } @Override public void run() throws Exception { DataStream dataStream = buildDataStream(); batchSink(dataStream).await(); } public DataStream buildDataStream() { // handle aliases handleTargetAlias(); // handle sqls handleSqls(); // get data streams for all actions List> dataStreams = Stream.of( getMatchedUpsertDataStream(), getNotMatchedUpsertDataStream(), getMatchedDeleteDataStream(), getNotMatchedDeleteDataStream(), getInsertDataStream()) .filter(Optional::isPresent) .map(Optional::get) .collect(Collectors.toList()); DataStream firstDs = dataStreams.get(0); return firstDs.union(dataStreams.stream().skip(1).toArray(DataStream[]::new)); } private void handleTargetAlias() { if (targetAlias != null) { // create a view 'targetAlias' in the path of target table, then we can find it with the // qualified name batchTEnv.createTemporaryView( escapedTargetName(), batchTEnv.from(identifier.getFullName())); } } private void handleSqls() { // NOTE: sql may change current catalog and database if (sourceSqls != null) { for (String sql : sourceSqls) { try { batchTEnv.executeSql(sql).await(); } catch (Throwable t) { String errMsg = "Error occurs when executing sql:\n%s"; LOG.error(String.format(errMsg, sql), t); throw new RuntimeException(String.format(errMsg, sql), t); } } } } private Optional> getMatchedUpsertDataStream() { if (!matchedUpsert) { return Optional.empty(); } List project; // extract project if (matchedUpsertSet.equals("*")) { // if sourceName is qualified like 'default.S', we should build a project like S.* String[] splits = sourceTable.split("\\."); project = Collections.singletonList(splits[splits.length - 1] + ".*"); } else { // validate upsert changes // no need to check primary keys changes because merge condition must contain all pks // of the target table Map changes = parseCommaSeparatedKeyValues(matchedUpsertSet); for (String targetField : changes.keySet()) { if (!targetFieldNames.contains(targetField)) { throw new RuntimeException( String.format( "Invalid column reference '%s' of table '%s' at matched-upsert action.", targetField, identifier.getFullName())); } } // replace field names // the table name is added before column name to avoid ambiguous column reference project = targetFieldNames.stream() .map(name -> changes.getOrDefault(name, targetTableName() + "." + name)) .collect(Collectors.toList()); } // use inner join to find matched records String query = String.format( "SELECT %s FROM %s INNER JOIN %s ON %s %s", String.join(",", project), escapedTargetName(), escapedSourceName(), mergeCondition, matchedUpsertCondition == null ? "" : "WHERE " + matchedUpsertCondition); LOG.info("Query used for matched-update:\n{}", query); Table source = batchTEnv.sqlQuery(query); checkSchema("matched-upsert", source); return Optional.of(toDataStream(source, RowKind.UPDATE_AFTER, converters)); } private Optional> getNotMatchedUpsertDataStream() { if (!notMatchedUpsert) { return Optional.empty(); } // validate upsert change Map changes = parseCommaSeparatedKeyValues(notMatchedBySourceUpsertSet); for (String targetField : changes.keySet()) { if (!targetFieldNames.contains(targetField)) { throw new RuntimeException( String.format( "Invalid column reference '%s' of table '%s' at not-matched-by-source-upsert action.\nRun --help for help.", targetField, identifier.getFullName())); } if (primaryKeys.contains(targetField)) { throw new RuntimeException( "Not allowed to change primary key in not-matched-by-source-upsert-set.\nRun --help for help."); } } // replace field names (won't be ambiguous here) List project = targetFieldNames.stream() .map(name -> changes.getOrDefault(name, name)) .collect(Collectors.toList()); // use not exists to find not matched records String query = String.format( "SELECT %s FROM %s WHERE NOT EXISTS (SELECT * FROM %s WHERE %s) %s", String.join(",", project), escapedTargetName(), escapedSourceName(), mergeCondition, notMatchedBySourceUpsertCondition == null ? "" : String.format("AND (%s)", notMatchedBySourceUpsertCondition)); LOG.info("Query used for not-matched-by-source-upsert:\n{}", query); Table source = batchTEnv.sqlQuery(query); checkSchema("not-matched-by-source-upsert", source); return Optional.of(toDataStream(source, RowKind.UPDATE_AFTER, converters)); } private Optional> getMatchedDeleteDataStream() { if (!matchedDelete) { return Optional.empty(); } // the table name is added before column name to avoid ambiguous column reference List project = targetFieldNames.stream() .map(name -> targetTableName() + "." + name) .collect(Collectors.toList()); // use inner join to find matched records String query = String.format( "SELECT %s FROM %s INNER JOIN %s ON %s %s", String.join(",", project), escapedTargetName(), escapedSourceName(), mergeCondition, matchedDeleteCondition == null ? "" : "WHERE " + matchedDeleteCondition); LOG.info("Query used by matched-delete:\n{}", query); Table source = batchTEnv.sqlQuery(query); checkSchema("matched-delete", source); return Optional.of(toDataStream(source, RowKind.DELETE, converters)); } private Optional> getNotMatchedDeleteDataStream() { if (!notMatchedDelete) { return Optional.empty(); } // use not exists to find not matched records String query = String.format( "SELECT %s FROM %s WHERE NOT EXISTS (SELECT * FROM %s WHERE %s) %s", String.join(",", targetFieldNames), escapedTargetName(), escapedSourceName(), mergeCondition, notMatchedBySourceDeleteCondition == null ? "" : String.format("AND (%s)", notMatchedBySourceDeleteCondition)); LOG.info("Query used by not-matched-by-source-delete:\n{}", query); Table source = batchTEnv.sqlQuery(query); checkSchema("not-matched-by-source-delete", source); return Optional.of(toDataStream(source, RowKind.DELETE, converters)); } private Optional> getInsertDataStream() { if (!insert) { return Optional.empty(); } // use not exist to find rows to insert String query = String.format( "SELECT %s FROM %s WHERE NOT EXISTS (SELECT * FROM %s WHERE %s) %s", notMatchedInsertValues, escapedSourceName(), escapedTargetName(), mergeCondition, notMatchedInsertCondition == null ? "" : String.format("AND (%s)", notMatchedInsertCondition)); LOG.info("Query used by not-matched-insert:\n{}", query); Table source = batchTEnv.sqlQuery(query); checkSchema("not-matched-insert", source); return Optional.of(toDataStream(source, RowKind.INSERT, converters)); } private void checkSchema(String action, Table source) { List actualTypes = toPaimonTypes(source.getResolvedSchema().getColumnDataTypes()); List expectedTypes = this.table.rowType().getFieldTypes(); if (!compatibleCheck(actualTypes, expectedTypes)) { throw new IllegalStateException( String.format( "The schema of result in action '%s' is invalid.\n" + "Result schema: [%s]\n" + "Expected schema: [%s]", action, actualTypes.stream() .map(DataType::asSQLString) .collect(Collectors.joining(", ")), expectedTypes.stream() .map(DataType::asSQLString) .collect(Collectors.joining(", ")))); } } // pass converters to avoid "not serializable" exception private DataStream toDataStream( Table source, RowKind kind, List> converters) { return batchTEnv .toChangelogStream(source) .map( row -> { int arity = row.getArity(); GenericRowData rowData = new GenericRowData(kind, arity); for (int i = 0; i < arity; i++) { rowData.setField( i, converters.get(i).toInternalOrNull(row.getField(i))); } return rowData; }); } private String targetTableName() { return targetAlias == null ? identifier.getObjectName() : targetAlias; } private String escapedTargetName() { return String.format( "`%s`.`%s`.`%s`", catalogName, identifier.getDatabaseName(), targetTableName()); } private String escapedSourceName() { return Arrays.stream(sourceTable.split("\\.")) .map(s -> String.format("`%s`", s)) .collect(Collectors.joining(".")); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy