org.apache.paimon.flink.AbstractFlinkTableFactory Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.paimon.flink;
import org.apache.paimon.CoreOptions.LogChangelogMode;
import org.apache.paimon.CoreOptions.LogConsistency;
import org.apache.paimon.CoreOptions.StreamingReadMode;
import org.apache.paimon.annotation.VisibleForTesting;
import org.apache.paimon.catalog.CatalogContext;
import org.apache.paimon.data.Timestamp;
import org.apache.paimon.flink.log.LogStoreTableFactory;
import org.apache.paimon.flink.sink.FlinkTableSink;
import org.apache.paimon.flink.source.DataTableSource;
import org.apache.paimon.flink.source.SystemTableSource;
import org.apache.paimon.flink.source.table.PushedRichTableSource;
import org.apache.paimon.flink.source.table.PushedTableSource;
import org.apache.paimon.flink.source.table.RichTableSource;
import org.apache.paimon.lineage.LineageMeta;
import org.apache.paimon.lineage.LineageMetaFactory;
import org.apache.paimon.lineage.TableLineageEntity;
import org.apache.paimon.lineage.TableLineageEntityImpl;
import org.apache.paimon.options.Options;
import org.apache.paimon.schema.Schema;
import org.apache.paimon.schema.SchemaManager;
import org.apache.paimon.table.FileStoreTable;
import org.apache.paimon.table.FileStoreTableFactory;
import org.apache.paimon.table.Table;
import org.apache.paimon.utils.Preconditions;
import org.apache.flink.api.common.RuntimeExecutionMode;
import org.apache.flink.configuration.ConfigOption;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.configuration.ExecutionOptions;
import org.apache.flink.configuration.PipelineOptions;
import org.apache.flink.configuration.ReadableConfig;
import org.apache.flink.table.api.TableConfig;
import org.apache.flink.table.api.ValidationException;
import org.apache.flink.table.catalog.CatalogTable;
import org.apache.flink.table.connector.sink.DynamicTableSink;
import org.apache.flink.table.connector.source.DynamicTableSource;
import org.apache.flink.table.factories.DynamicTableFactory;
import org.apache.flink.table.factories.DynamicTableSinkFactory;
import org.apache.flink.table.factories.DynamicTableSourceFactory;
import org.apache.flink.table.types.logical.RowType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.annotation.Nullable;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.function.BiConsumer;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import static org.apache.paimon.CoreOptions.LOG_CHANGELOG_MODE;
import static org.apache.paimon.CoreOptions.LOG_CONSISTENCY;
import static org.apache.paimon.CoreOptions.SCAN_MODE;
import static org.apache.paimon.CoreOptions.STREAMING_READ_MODE;
import static org.apache.paimon.CoreOptions.StartupMode.FROM_SNAPSHOT;
import static org.apache.paimon.CoreOptions.StartupMode.FROM_SNAPSHOT_FULL;
import static org.apache.paimon.flink.FlinkConnectorOptions.LOG_SYSTEM;
import static org.apache.paimon.flink.FlinkConnectorOptions.NONE;
import static org.apache.paimon.flink.FlinkConnectorOptions.SCAN_PUSH_DOWN;
import static org.apache.paimon.flink.LogicalTypeConversion.toLogicalType;
import static org.apache.paimon.flink.log.LogStoreTableFactory.discoverLogStoreFactory;
/** Abstract paimon factory to create table source and table sink. */
public abstract class AbstractFlinkTableFactory
implements DynamicTableSourceFactory, DynamicTableSinkFactory {
private static final Logger LOG = LoggerFactory.getLogger(AbstractFlinkTableFactory.class);
@Override
public DynamicTableSource createDynamicTableSource(Context context) {
CatalogTable origin = context.getCatalogTable().getOrigin();
boolean isStreamingMode =
context.getConfiguration().get(ExecutionOptions.RUNTIME_MODE)
== RuntimeExecutionMode.STREAMING;
if (origin instanceof SystemCatalogTable) {
return new PushedTableSource(
new SystemTableSource(
((SystemCatalogTable) origin).table(),
isStreamingMode,
context.getObjectIdentifier()));
} else {
Table table = buildPaimonTable(context);
if (table instanceof FileStoreTable) {
storeTableLineage(
((FileStoreTable) table).catalogEnvironment().lineageMetaFactory(),
context,
(entity, lineageFactory) -> {
try (LineageMeta lineage =
lineageFactory.create(() -> Options.fromMap(table.options()))) {
lineage.saveSourceTableLineage(entity);
} catch (Exception e) {
throw new RuntimeException(e);
}
});
}
DataTableSource source =
new DataTableSource(
context.getObjectIdentifier(),
table,
isStreamingMode,
context,
createOptionalLogStoreFactory(context).orElse(null));
return new Options(table.options()).get(SCAN_PUSH_DOWN)
? new PushedRichTableSource(source)
: new RichTableSource(source);
}
}
@Override
public DynamicTableSink createDynamicTableSink(Context context) {
Table table = buildPaimonTable(context);
if (table instanceof FileStoreTable) {
storeTableLineage(
((FileStoreTable) table).catalogEnvironment().lineageMetaFactory(),
context,
(entity, lineageFactory) -> {
try (LineageMeta lineage =
lineageFactory.create(() -> Options.fromMap(table.options()))) {
lineage.saveSinkTableLineage(entity);
} catch (Exception e) {
throw new RuntimeException(e);
}
});
}
return new FlinkTableSink(
context.getObjectIdentifier(),
table,
context,
createOptionalLogStoreFactory(context).orElse(null));
}
private void storeTableLineage(
@Nullable LineageMetaFactory lineageMetaFactory,
Context context,
BiConsumer tableLineage) {
if (lineageMetaFactory != null) {
String pipelineName = context.getConfiguration().get(PipelineOptions.NAME);
if (pipelineName == null) {
throw new ValidationException("Cannot get pipeline name for lineage meta.");
}
tableLineage.accept(
new TableLineageEntityImpl(
context.getObjectIdentifier().getDatabaseName(),
context.getObjectIdentifier().getObjectName(),
pipelineName,
Timestamp.fromEpochMillis(System.currentTimeMillis())),
lineageMetaFactory);
}
}
@Override
public Set> requiredOptions() {
return Collections.emptySet();
}
@Override
public Set> optionalOptions() {
return new HashSet<>();
}
// ~ Tools ------------------------------------------------------------------
public static Optional createOptionalLogStoreFactory(
DynamicTableFactory.Context context) {
return createOptionalLogStoreFactory(
context.getClassLoader(), context.getCatalogTable().getOptions());
}
static Optional createOptionalLogStoreFactory(
ClassLoader classLoader, Map options) {
Options configOptions = new Options();
options.forEach(configOptions::setString);
if (configOptions.get(LOG_SYSTEM).equalsIgnoreCase(NONE)) {
// Use file store continuous reading
validateFileStoreContinuous(configOptions);
return Optional.empty();
} else if (configOptions.get(SCAN_MODE) == FROM_SNAPSHOT
|| configOptions.get(SCAN_MODE) == FROM_SNAPSHOT_FULL) {
throw new ValidationException(
String.format(
"Log system does not support %s and %s scan mode",
FROM_SNAPSHOT, FROM_SNAPSHOT_FULL));
}
return Optional.of(discoverLogStoreFactory(classLoader, configOptions.get(LOG_SYSTEM)));
}
private static void validateFileStoreContinuous(Options options) {
LogChangelogMode changelogMode = options.get(LOG_CHANGELOG_MODE);
StreamingReadMode streamingReadMode = options.get(STREAMING_READ_MODE);
if (changelogMode == LogChangelogMode.UPSERT) {
throw new ValidationException(
"File store continuous reading does not support upsert changelog mode.");
}
LogConsistency consistency = options.get(LOG_CONSISTENCY);
if (consistency == LogConsistency.EVENTUAL) {
throw new ValidationException(
"File store continuous reading does not support eventual consistency mode.");
}
if (streamingReadMode == StreamingReadMode.LOG) {
throw new ValidationException(
"File store continuous reading does not support the log streaming read mode.");
}
}
static CatalogContext createCatalogContext(DynamicTableFactory.Context context) {
return CatalogContext.create(
Options.fromMap(context.getCatalogTable().getOptions()), new FlinkFileIOLoader());
}
static Table buildPaimonTable(DynamicTableFactory.Context context) {
CatalogTable origin = context.getCatalogTable().getOrigin();
Table table;
Map dynamicOptions = getDynamicTableConfigOptions(context);
dynamicOptions.forEach(
(key, value) -> {
if (origin.getOptions().get(key) == null
|| !origin.getOptions().get(key).equals(value)) {
SchemaManager.checkAlterTableOption(key);
}
});
Map newOptions = new HashMap<>();
newOptions.putAll(origin.getOptions());
newOptions.putAll(dynamicOptions);
// notice that the Paimon table schema must be the same with the Flink's
if (origin instanceof DataCatalogTable) {
FileStoreTable fileStoreTable = (FileStoreTable) ((DataCatalogTable) origin).table();
table = fileStoreTable.copyWithoutTimeTravel(newOptions);
} else {
table =
FileStoreTableFactory.create(createCatalogContext(context))
.copyWithoutTimeTravel(newOptions);
}
Schema schema = FlinkCatalog.fromCatalogTable(context.getCatalogTable());
RowType rowType = toLogicalType(schema.rowType());
List partitionKeys = schema.partitionKeys();
List primaryKeys = schema.primaryKeys();
// compare fields to ignore the outside nullability and nested fields' comments
Preconditions.checkArgument(
schemaEquals(toLogicalType(table.rowType()), rowType),
"Flink schema and store schema are not the same, "
+ "store schema is %s, Flink schema is %s",
table.rowType(),
rowType);
Preconditions.checkArgument(
table.partitionKeys().equals(partitionKeys),
"Flink partitionKeys and store partitionKeys are not the same, "
+ "store partitionKeys is %s, Flink partitionKeys is %s",
table.partitionKeys(),
partitionKeys);
Preconditions.checkArgument(
table.primaryKeys().equals(primaryKeys),
"Flink primaryKeys and store primaryKeys are not the same, "
+ "store primaryKeys is %s, Flink primaryKeys is %s",
table.primaryKeys(),
primaryKeys);
return table;
}
@VisibleForTesting
static boolean schemaEquals(RowType rowType1, RowType rowType2) {
List fieldList1 = rowType1.getFields();
List fieldList2 = rowType2.getFields();
if (fieldList1.size() != fieldList2.size()) {
return false;
}
for (int i = 0; i < fieldList1.size(); i++) {
RowType.RowField f1 = fieldList1.get(i);
RowType.RowField f2 = fieldList2.get(i);
if (!f1.getName().equals(f2.getName()) || !f1.getType().equals(f2.getType())) {
return false;
}
}
return true;
}
/**
* The dynamic option's format is:
*
* {@link
* FlinkConnectorOptions#TABLE_DYNAMIC_OPTION_PREFIX}.${catalog}.${database}.${tableName}.key =
* value. These job level configs will be extracted and injected into the target table option.
*
* @param context The table factory context.
* @return The dynamic options of this target table.
*/
static Map getDynamicTableConfigOptions(DynamicTableFactory.Context context) {
Map optionsFromTableConfig = new HashMap<>();
ReadableConfig config = context.getConfiguration();
Map conf;
if (config instanceof Configuration) {
conf = ((Configuration) config).toMap();
} else if (config instanceof TableConfig) {
conf = ((TableConfig) config).getConfiguration().toMap();
} else {
throw new IllegalArgumentException("Unexpected config: " + config.getClass());
}
String template =
String.format(
"(%s)\\.(%s|\\*)\\.(%s|\\*)\\.(%s|\\*)\\.(.+)",
FlinkConnectorOptions.TABLE_DYNAMIC_OPTION_PREFIX,
context.getObjectIdentifier().getCatalogName(),
context.getObjectIdentifier().getDatabaseName(),
context.getObjectIdentifier().getObjectName());
Pattern pattern = Pattern.compile(template);
conf.keySet()
.forEach(
(key) -> {
if (key.startsWith(FlinkConnectorOptions.TABLE_DYNAMIC_OPTION_PREFIX)) {
Matcher matcher = pattern.matcher(key);
if (matcher.find()) {
optionsFromTableConfig.put(matcher.group(5), conf.get(key));
}
}
});
if (!optionsFromTableConfig.isEmpty()) {
LOG.info(
"Loading dynamic table options for {} in table config: {}",
context.getObjectIdentifier().getObjectName(),
optionsFromTableConfig);
}
return optionsFromTableConfig;
}
}