All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.delta.flink.internal.table.DeltaDynamicTableFactory Maven / Gradle / Ivy

There is a newer version: 3.2.1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.delta.flink.internal.table;

import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

import org.apache.flink.configuration.ConfigOption;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.configuration.GlobalConfiguration;
import org.apache.flink.core.fs.Path;
import org.apache.flink.table.catalog.ResolvedSchema;
import org.apache.flink.table.connector.sink.DynamicTableSink;
import org.apache.flink.table.connector.source.DynamicTableSource;
import org.apache.flink.table.factories.DynamicTableSinkFactory;
import org.apache.flink.table.factories.DynamicTableSourceFactory;
import org.apache.flink.table.factories.FactoryUtil;
import org.apache.flink.table.factories.FactoryUtil.TableFactoryHelper;
import org.apache.flink.table.types.logical.RowType;

/**
 * Creates a {@link DynamicTableSink} and {@link DynamicTableSource} instance representing DeltaLake
 * table.
 *
 * 

* This implementation automatically resolves all necessary object for creating instance of {@link * io.delta.flink.sink.DeltaSink} and {@link io.delta.flink.source.DeltaSource} except Delta table's * path that needs to be provided explicitly. */ public class DeltaDynamicTableFactory implements DynamicTableSinkFactory, DynamicTableSourceFactory { public static final String DELTA_CONNECTOR_IDENTIFIER = "delta"; /** * Flag that will be set to true only when creating Delta Table Factory from Delta Catalog. If * this flag is set to false, then createDynamicTableSink and createDynamicTableSource methods * will throw an exception when called. */ public final boolean isFromCatalog; /** * This constructor is meant to be use by Flink Factory discovery mechanism. This constructor * will set "fromCatalog" field to false which will make createDynamicTableSink and * createDynamicTableSource methods throw an exception informing that Flink SQL support for * Delta tables must be used with Delta Catalog only. * * @implNote In order to support * {@link org.apache.flink.table.api.bridge.java.StreamTableEnvironment}, factory must not throw * exception from constructor. The StreamTableEnvironment when initialized loads and cache all * Factories defined in META-INF/service/org.apache.flink.table.factories.Factory file before * executing any SQL/Table call. */ public DeltaDynamicTableFactory() { this.isFromCatalog = false; } private DeltaDynamicTableFactory(boolean isFromCatalog) { if (!isFromCatalog) { throw new RuntimeException("FromCatalog parameter must be set to true."); } this.isFromCatalog = true; } static DeltaDynamicTableFactory fromCatalog() { return new DeltaDynamicTableFactory(true); } @Override public String factoryIdentifier() { return DELTA_CONNECTOR_IDENTIFIER; } @Override public DynamicTableSink createDynamicTableSink(Context context) { if (!isFromCatalog) { throw notFromDeltaCatalogException(); } // Check if requested table is Delta or not. FactoryUtil.TableFactoryHelper helper = FactoryUtil.createTableFactoryHelper(this, context); org.apache.flink.configuration.Configuration options = getQueryOptions(helper); String connectorType = options.get(FactoryUtil.CONNECTOR); if (!DELTA_CONNECTOR_IDENTIFIER.equals(connectorType)) { // Look for Table factory proper fort this table type. DynamicTableSinkFactory sinkFactory = FactoryUtil.discoverFactory(this.getClass().getClassLoader(), DynamicTableSinkFactory.class, connectorType); return sinkFactory.createDynamicTableSink(context); } // This must have been a Delta Table, so continue with this factory DeltaTableFactoryHelper.validateSinkQueryOptions(options); ResolvedSchema tableSchema = context.getCatalogTable().getResolvedSchema(); org.apache.hadoop.conf.Configuration conf = HadoopUtils.getHadoopConfiguration(GlobalConfiguration.loadConfiguration()); RowType rowType = (RowType) tableSchema.toSinkRowDataType().getLogicalType(); return new DeltaDynamicTableSink( new Path(options.get(DeltaTableConnectorOptions.TABLE_PATH)), conf, rowType, context.getCatalogTable() ); } @Override public DynamicTableSource createDynamicTableSource(Context context) { if (!isFromCatalog) { throw notFromDeltaCatalogException(); } // Check if requested table is Delta or not. FactoryUtil.TableFactoryHelper helper = FactoryUtil.createTableFactoryHelper(this, context); org.apache.flink.configuration.Configuration options = getQueryOptions(helper); String connectorType = options.get(FactoryUtil.CONNECTOR); if (!DELTA_CONNECTOR_IDENTIFIER.equals(connectorType)) { // Look for Table factory proper fort this table type. DynamicTableSourceFactory sourceFactory = FactoryUtil.discoverFactory(this.getClass().getClassLoader(), DynamicTableSourceFactory.class, connectorType); return sourceFactory.createDynamicTableSource(context); } // This must have been a Delta Table, so continue with this factory QueryOptions queryOptions = DeltaTableFactoryHelper.validateSourceQueryOptions(options); org.apache.hadoop.conf.Configuration hadoopConf = HadoopUtils.getHadoopConfiguration(GlobalConfiguration.loadConfiguration()); List columns = ((RowType) context .getCatalogTable() .getResolvedSchema() .toPhysicalRowDataType() .getLogicalType() ).getFieldNames(); return new DeltaDynamicTableSource( hadoopConf, queryOptions, columns ); } @Override public Set> forwardOptions() { final Set> options = new HashSet<>(); options.add(DeltaTableConnectorOptions.TABLE_PATH); return options; } @Override public Set> requiredOptions() { // We do not use Flink's helper validation logic. We are using our own instead. return Collections.emptySet(); } @Override public Set> optionalOptions() { // We do not use Flink's helper validation logic. We are using our own instead. return Collections.emptySet(); } private RuntimeException notFromDeltaCatalogException() { return new RuntimeException("Delta Table SQL/Table API was used without Delta Catalog. " + "It is required to use Delta Catalog with all Flink SQL operations that involve " + "Delta table. Please see documentation for details -> TODO DC add link to docs"); } private Configuration getQueryOptions(TableFactoryHelper helper) { // This cast is generally safe because FactoryUtil.TableFactoryHelper::getOptions() // will always return an instance of org.apache.flink.configuration.Configuration. // The Configuration type we are casting to, provide 'toMap()' method that allows us to get // all properties provided by Catalog and Query Hints to the factory. // The original 'ReadableConfig' type provide only `get(propertyName)` method. // By doing this cast we can validate and throw an exception if any none allowed options // were used. Without this cast, we could only verify known properties and silently ignore // unknown ones. return (Configuration) helper.getOptions(); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy