org.apache.paimon.flink.AbstractFlinkTableFactory Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.paimon.flink;
import org.apache.paimon.CoreOptions.LogChangelogMode;
import org.apache.paimon.CoreOptions.LogConsistency;
import org.apache.paimon.CoreOptions.StreamingReadMode;
import org.apache.paimon.annotation.VisibleForTesting;
import org.apache.paimon.catalog.Catalog;
import org.apache.paimon.catalog.CatalogContext;
import org.apache.paimon.catalog.Identifier;
import org.apache.paimon.flink.log.LogStoreTableFactory;
import org.apache.paimon.flink.sink.FlinkTableSink;
import org.apache.paimon.flink.source.DataTableSource;
import org.apache.paimon.flink.source.SystemTableSource;
import org.apache.paimon.options.Options;
import org.apache.paimon.options.OptionsUtils;
import org.apache.paimon.schema.Schema;
import org.apache.paimon.schema.SchemaManager;
import org.apache.paimon.table.FileStoreTable;
import org.apache.paimon.table.FileStoreTableFactory;
import org.apache.paimon.table.Table;
import org.apache.paimon.utils.Preconditions;
import org.apache.flink.api.common.RuntimeExecutionMode;
import org.apache.flink.configuration.ConfigOption;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.configuration.ExecutionOptions;
import org.apache.flink.configuration.ReadableConfig;
import org.apache.flink.table.api.TableConfig;
import org.apache.flink.table.api.ValidationException;
import org.apache.flink.table.catalog.CatalogTable;
import org.apache.flink.table.connector.sink.DynamicTableSink;
import org.apache.flink.table.connector.source.DynamicTableSource;
import org.apache.flink.table.factories.DynamicTableFactory;
import org.apache.flink.table.factories.DynamicTableSinkFactory;
import org.apache.flink.table.factories.DynamicTableSourceFactory;
import org.apache.flink.table.types.logical.RowType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.annotation.Nullable;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.regex.Pattern;
import static org.apache.paimon.CoreOptions.LOG_CHANGELOG_MODE;
import static org.apache.paimon.CoreOptions.LOG_CONSISTENCY;
import static org.apache.paimon.CoreOptions.SCAN_MODE;
import static org.apache.paimon.CoreOptions.STREAMING_READ_MODE;
import static org.apache.paimon.CoreOptions.StartupMode.FROM_SNAPSHOT;
import static org.apache.paimon.CoreOptions.StartupMode.FROM_SNAPSHOT_FULL;
import static org.apache.paimon.flink.FlinkConnectorOptions.LOG_SYSTEM;
import static org.apache.paimon.flink.FlinkConnectorOptions.NONE;
import static org.apache.paimon.flink.LogicalTypeConversion.toLogicalType;
import static org.apache.paimon.flink.log.LogStoreTableFactory.discoverLogStoreFactory;
/** Abstract paimon factory to create table source and table sink. */
public abstract class AbstractFlinkTableFactory
implements DynamicTableSourceFactory, DynamicTableSinkFactory {
private static final Logger LOG = LoggerFactory.getLogger(AbstractFlinkTableFactory.class);
@Nullable private final FlinkCatalog flinkCatalog;
public AbstractFlinkTableFactory(@Nullable FlinkCatalog flinkCatalog) {
this.flinkCatalog = flinkCatalog;
}
@Override
public DynamicTableSource createDynamicTableSource(Context context) {
CatalogTable origin = context.getCatalogTable().getOrigin();
boolean isStreamingMode =
context.getConfiguration().get(ExecutionOptions.RUNTIME_MODE)
== RuntimeExecutionMode.STREAMING;
if (origin instanceof SystemCatalogTable) {
return new SystemTableSource(
((SystemCatalogTable) origin).table(),
isStreamingMode,
context.getObjectIdentifier());
} else {
return new DataTableSource(
context.getObjectIdentifier(),
buildPaimonTable(context),
isStreamingMode,
context,
createOptionalLogStoreFactory(context).orElse(null));
}
}
@Override
public DynamicTableSink createDynamicTableSink(Context context) {
return new FlinkTableSink(
context.getObjectIdentifier(),
buildPaimonTable(context),
context,
createOptionalLogStoreFactory(context).orElse(null));
}
@Override
public Set> requiredOptions() {
return Collections.emptySet();
}
@Override
public Set> optionalOptions() {
return new HashSet<>();
}
// ~ Tools ------------------------------------------------------------------
public static Optional createOptionalLogStoreFactory(
DynamicTableFactory.Context context) {
return createOptionalLogStoreFactory(
context.getClassLoader(), context.getCatalogTable().getOptions());
}
static Optional createOptionalLogStoreFactory(
ClassLoader classLoader, Map options) {
Options configOptions = new Options();
options.forEach(configOptions::setString);
if (configOptions.get(LOG_SYSTEM).equalsIgnoreCase(NONE)) {
// Use file store continuous reading
validateFileStoreContinuous(configOptions);
return Optional.empty();
} else if (configOptions.get(SCAN_MODE) == FROM_SNAPSHOT
|| configOptions.get(SCAN_MODE) == FROM_SNAPSHOT_FULL) {
throw new ValidationException(
String.format(
"Log system does not support %s and %s scan mode",
FROM_SNAPSHOT, FROM_SNAPSHOT_FULL));
}
return Optional.of(discoverLogStoreFactory(classLoader, configOptions.get(LOG_SYSTEM)));
}
private static void validateFileStoreContinuous(Options options) {
LogChangelogMode changelogMode = options.get(LOG_CHANGELOG_MODE);
StreamingReadMode streamingReadMode = options.get(STREAMING_READ_MODE);
if (changelogMode == LogChangelogMode.UPSERT) {
throw new ValidationException(
"File store continuous reading does not support upsert changelog mode.");
}
LogConsistency consistency = options.get(LOG_CONSISTENCY);
if (consistency == LogConsistency.EVENTUAL) {
throw new ValidationException(
"File store continuous reading does not support eventual consistency mode.");
}
if (streamingReadMode == StreamingReadMode.LOG) {
throw new ValidationException(
"File store continuous reading does not support the log streaming read mode.");
}
}
static CatalogContext createCatalogContext(DynamicTableFactory.Context context) {
return CatalogContext.create(
Options.fromMap(context.getCatalogTable().getOptions()), new FlinkFileIOLoader());
}
Table buildPaimonTable(DynamicTableFactory.Context context) {
CatalogTable origin = context.getCatalogTable().getOrigin();
Table table;
Map dynamicOptions = getDynamicConfigOptions(context);
dynamicOptions.forEach(
(key, newValue) -> {
String oldValue = origin.getOptions().get(key);
if (!Objects.equals(oldValue, newValue)) {
SchemaManager.checkAlterTableOption(key, oldValue, newValue, true);
}
});
Map newOptions = new HashMap<>();
newOptions.putAll(origin.getOptions());
// dynamic options should override origin options
newOptions.putAll(dynamicOptions);
FileStoreTable fileStoreTable;
if (origin instanceof DataCatalogTable) {
fileStoreTable = (FileStoreTable) ((DataCatalogTable) origin).table();
} else if (flinkCatalog == null) {
// In case Paimon is directly used as a Flink connector, instead of through catalog.
fileStoreTable = FileStoreTableFactory.create(createCatalogContext(context));
} else {
// In cases like materialized table, the Paimon table might not be DataCatalogTable,
// but can still be acquired through the catalog.
Identifier identifier =
Identifier.create(
context.getObjectIdentifier().getDatabaseName(),
context.getObjectIdentifier().getObjectName());
try {
fileStoreTable = (FileStoreTable) flinkCatalog.catalog().getTable(identifier);
} catch (Catalog.TableNotExistException e) {
throw new RuntimeException(e);
}
}
table = fileStoreTable.copyWithoutTimeTravel(newOptions);
// notice that the Paimon table schema must be the same with the Flink's
Schema schema = FlinkCatalog.fromCatalogTable(context.getCatalogTable());
RowType rowType = toLogicalType(schema.rowType());
List partitionKeys = schema.partitionKeys();
List primaryKeys = schema.primaryKeys();
// compare fields to ignore the outside nullability and nested fields' comments
Preconditions.checkArgument(
schemaEquals(toLogicalType(table.rowType()), rowType),
"Flink schema and store schema are not the same, "
+ "store schema is %s, Flink schema is %s",
table.rowType(),
rowType);
Preconditions.checkArgument(
table.partitionKeys().equals(partitionKeys),
"Flink partitionKeys and store partitionKeys are not the same, "
+ "store partitionKeys is %s, Flink partitionKeys is %s",
table.partitionKeys(),
partitionKeys);
Preconditions.checkArgument(
table.primaryKeys().equals(primaryKeys),
"Flink primaryKeys and store primaryKeys are not the same, "
+ "store primaryKeys is %s, Flink primaryKeys is %s",
table.primaryKeys(),
primaryKeys);
return table;
}
@VisibleForTesting
static boolean schemaEquals(RowType rowType1, RowType rowType2) {
List fieldList1 = rowType1.getFields();
List fieldList2 = rowType2.getFields();
if (fieldList1.size() != fieldList2.size()) {
return false;
}
for (int i = 0; i < fieldList1.size(); i++) {
RowType.RowField f1 = fieldList1.get(i);
RowType.RowField f2 = fieldList2.get(i);
if (!f1.getName().equals(f2.getName()) || !f1.getType().equals(f2.getType())) {
return false;
}
}
return true;
}
/**
* The dynamic option's format is:
*
* Global Options: key = value .
*
*
Table Options: {@link
* FlinkConnectorOptions#TABLE_DYNAMIC_OPTION_PREFIX}${catalog}.${database}.${tableName}.key =
* value.
*
*
These job level options will be extracted and injected into the target table option. Table
* options will override global options if there are conflicts.
*
* @param context The table factory context.
* @return The dynamic options of this target table.
*/
static Map getDynamicConfigOptions(DynamicTableFactory.Context context) {
ReadableConfig config = context.getConfiguration();
Map conf;
if (config instanceof Configuration) {
conf = ((Configuration) config).toMap();
} else if (config instanceof TableConfig) {
conf = ((TableConfig) config).getConfiguration().toMap();
} else {
throw new IllegalArgumentException("Unexpected config: " + config.getClass());
}
String template =
String.format(
"(%s)(%s|\\*)\\.(%s|\\*)\\.(%s|\\*)\\.(.+)",
FlinkConnectorOptions.TABLE_DYNAMIC_OPTION_PREFIX,
context.getObjectIdentifier().getCatalogName(),
context.getObjectIdentifier().getDatabaseName(),
context.getObjectIdentifier().getObjectName());
Pattern pattern = Pattern.compile(template);
Map optionsFromTableConfig =
OptionsUtils.convertToDynamicTableProperties(conf, "", pattern, 5);
if (!optionsFromTableConfig.isEmpty()) {
LOG.info(
"Loading dynamic table options for {} in table config: {}",
context.getObjectIdentifier().getObjectName(),
optionsFromTableConfig);
}
return optionsFromTableConfig;
}
}