org.apache.flink.table.api.TableDescriptor Maven / Gradle / Ivy
Show all versions of flink-table-api-java Show documentation
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.table.api;
import org.apache.flink.annotation.PublicEvolving;
import org.apache.flink.configuration.ConfigOption;
import org.apache.flink.configuration.ConfigurationUtils;
import org.apache.flink.table.catalog.CatalogTable;
import org.apache.flink.table.catalog.TableDistribution;
import org.apache.flink.table.connector.format.Format;
import org.apache.flink.table.factories.FactoryUtil;
import org.apache.flink.table.utils.EncodingUtils;
import org.apache.flink.util.Preconditions;
import javax.annotation.Nullable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.stream.Collectors;
/**
* Describes a {@link CatalogTable} representing a source or sink.
*
* A {@link TableDescriptor} is a template for creating a {@link CatalogTable} instance. It
* closely resembles the "CREATE TABLE" SQL DDL statement, containing schema, connector options, and
* other characteristics. Since tables in Flink are typically backed by external systems, the
* descriptor describes how a connector (and possibly its format) are configured.
*
*
This can be used to register a table in the Table API, see {@link
* TableEnvironment#createTemporaryTable(String, TableDescriptor)}.
*/
@PublicEvolving
public class TableDescriptor {
private final @Nullable Schema schema;
private final Map options;
private final @Nullable TableDistribution distribution;
private final List partitionKeys;
private final @Nullable String comment;
protected TableDescriptor(
@Nullable Schema schema,
Map options,
@Nullable TableDistribution distribution,
List partitionKeys,
@Nullable String comment) {
this.schema = schema;
this.options = Collections.unmodifiableMap(options);
this.distribution = distribution;
this.partitionKeys = Collections.unmodifiableList(partitionKeys);
this.comment = comment;
}
/**
* Creates a new {@link Builder} for a table using the given connector.
*
* @param connector The factory identifier for the connector.
*/
public static Builder forConnector(String connector) {
Preconditions.checkNotNull(connector, "Table descriptors require a connector identifier.");
final Builder descriptorBuilder = new Builder();
descriptorBuilder.option(FactoryUtil.CONNECTOR, connector);
return descriptorBuilder;
}
/**
* Creates a new {@link Builder} for a managed table.
*
* @deprecated This method will be removed soon. Please see FLIP-346 for more details.
*/
@Deprecated
public static Builder forManaged() {
return new Builder();
}
// ---------------------------------------------------------------------------------------------
public Optional getSchema() {
return Optional.ofNullable(schema);
}
public Map getOptions() {
return options;
}
public Optional getDistribution() {
return Optional.ofNullable(distribution);
}
public List getPartitionKeys() {
return partitionKeys;
}
public Optional getComment() {
return Optional.ofNullable(comment);
}
// ---------------------------------------------------------------------------------------------
/** Converts this descriptor into a {@link CatalogTable}. */
public CatalogTable toCatalogTable() {
final Schema schema =
getSchema()
.orElseThrow(
() ->
new ValidationException(
"Missing schema in TableDescriptor. "
+ "A schema is typically required. "
+ "It can only be omitted at certain "
+ "documented locations."));
return CatalogTable.newBuilder()
.schema(schema)
.options(getOptions())
.distribution(distribution)
.partitionKeys(partitionKeys)
.comment(getComment().orElse(null))
.build();
}
/** Converts this immutable instance into a mutable {@link Builder}. */
public Builder toBuilder() {
return new Builder(this);
}
// ---------------------------------------------------------------------------------------------
@Override
public String toString() {
final String escapedPartitionKeys =
partitionKeys.stream()
.map(EncodingUtils::escapeIdentifier)
.collect(Collectors.joining(", "));
final String distributedBy = distribution == null ? "" : distribution.toString();
final String partitionedBy =
!partitionKeys.isEmpty()
? String.format("PARTITIONED BY (%s)", escapedPartitionKeys)
: "";
final String serializedOptions =
options.entrySet().stream()
.map(
entry ->
String.format(
" '%s' = '%s'",
EncodingUtils.escapeSingleQuotes(entry.getKey()),
EncodingUtils.escapeSingleQuotes(entry.getValue())))
.collect(Collectors.joining(String.format(",%n")));
return String.format(
"%s%nCOMMENT '%s'%n%s%s%nWITH (%n%s%n)",
schema != null ? schema : "",
comment != null ? comment : "",
distributedBy,
partitionedBy,
serializedOptions);
}
@Override
public boolean equals(Object obj) {
if (this == obj) {
return true;
}
if (obj == null || getClass() != obj.getClass()) {
return false;
}
TableDescriptor that = (TableDescriptor) obj;
return Objects.equals(schema, that.schema)
&& options.equals(that.options)
&& Objects.equals(distribution, that.distribution)
&& partitionKeys.equals(that.partitionKeys)
&& Objects.equals(comment, that.comment);
}
@Override
public int hashCode() {
return Objects.hash(schema, options, distribution, partitionKeys, comment);
}
// ---------------------------------------------------------------------------------------------
/** Builder for {@link TableDescriptor}. */
@PublicEvolving
public static class Builder {
private @Nullable Schema schema;
private final Map options;
private @Nullable TableDistribution distribution;
private final List partitionKeys;
private @Nullable String comment;
protected Builder() {
this.options = new HashMap<>();
this.partitionKeys = new ArrayList<>();
}
protected Builder(TableDescriptor descriptor) {
this.schema = descriptor.getSchema().orElse(null);
this.options = new HashMap<>(descriptor.getOptions());
this.distribution = descriptor.getDistribution().orElse(null);
this.partitionKeys = new ArrayList<>(descriptor.getPartitionKeys());
this.comment = descriptor.getComment().orElse(null);
}
/**
* Define the schema of the {@link TableDescriptor}.
*
* The schema is typically required. It is optional only in cases where the schema can be
* inferred, e.g. {@link Table#insertInto(TableDescriptor)}.
*/
public Builder schema(@Nullable Schema schema) {
this.schema = schema;
return this;
}
/** Sets the given option on the table. */
public Builder option(ConfigOption configOption, T value) {
Preconditions.checkNotNull(configOption, "Config option must not be null.");
Preconditions.checkNotNull(value, "Value must not be null.");
options.put(configOption.key(), ConfigurationUtils.convertValue(value, String.class));
return this;
}
/**
* Sets the given option on the table.
*
* Option keys must be fully specified. When defining options for a {@link Format
* format}, use {@link #format(FormatDescriptor)} instead.
*
*
Example:
*
*
{@code
* TableDescriptor.forConnector("kafka")
* .option("scan.startup.mode", "latest-offset")
* .build();
* }
*/
public Builder option(String key, String value) {
Preconditions.checkNotNull(key, "Key must not be null.");
Preconditions.checkNotNull(value, "Value must not be null.");
options.put(key, value);
return this;
}
/**
* Defines the {@link Format format} to be used for this table.
*
* Note that not every connector requires a format to be specified, while others may use
* multiple formats. In the latter case, use {@link #format(ConfigOption, FormatDescriptor)}
* instead to specify for which option the format should be configured.
*/
public Builder format(String format) {
return format(FactoryUtil.FORMAT, FormatDescriptor.forFormat(format).build());
}
/**
* Defines the format to be used for this table.
*
*
Note that not every connector requires a format to be specified, while others may use
* multiple formats.
*
*
Options of the provided {@param formatDescriptor} are automatically prefixed. For
* example,
*
*
{@code
* descriptorBuilder.format(FormatDescriptor.forFormat("json")
* .option(JsonOptions.IGNORE_PARSE_ERRORS, true)
* .build()
* }
*
* will result in the options
*
*
{@code
* 'format' = 'json'
* 'json.ignore-parse-errors' = 'true'
* }
*/
public Builder format(FormatDescriptor formatDescriptor) {
return format(FactoryUtil.FORMAT, formatDescriptor);
}
/**
* Defines the format to be used for this table.
*
* Note that not every connector requires a format to be specified, while others may use
* multiple formats.
*
*
Options of the provided {@param formatDescriptor} are automatically prefixed. For
* example,
*
*
{@code
* descriptorBuilder.format(KafkaOptions.KEY_FORMAT, FormatDescriptor.forFormat("json")
* .option(JsonOptions.IGNORE_PARSE_ERRORS, true)
* .build()
* }
*
* will result in the options
*
*
{@code
* 'key.format' = 'json'
* 'key.json.ignore-parse-errors' = 'true'
* }
*/
public Builder format(
ConfigOption formatOption, FormatDescriptor formatDescriptor) {
Preconditions.checkNotNull(formatOption, "Format option must not be null.");
Preconditions.checkNotNull(formatDescriptor, "Format descriptor must not be null.");
option(formatOption, formatDescriptor.getFormat());
final String optionPrefix =
FactoryUtil.getFormatPrefix(formatOption, formatDescriptor.getFormat());
formatDescriptor
.getOptions()
.forEach(
(key, value) -> {
if (key.startsWith(optionPrefix)) {
throw new ValidationException(
String.format(
"Format options set using #format(FormatDescriptor) should not contain the prefix '%s', but found '%s'.",
optionPrefix, key));
}
final String prefixedKey = optionPrefix + key;
option(prefixedKey, value);
});
return this;
}
/**
* Defines that the table should be distributed into buckets using a hash algorithm over the
* given columns. The number of buckets is connector-defined.
*/
public Builder distributedByHash(String... bucketKeys) {
validateBucketKeys(bucketKeys);
this.distribution = TableDistribution.ofHash(Arrays.asList(bucketKeys), null);
return this;
}
/**
* Defines that the table should be distributed into the given number of buckets using a
* hash algorithm over the given columns.
*/
public Builder distributedByHash(int numberOfBuckets, String... bucketKeys) {
validateBucketKeys(bucketKeys);
this.distribution =
TableDistribution.ofHash(Arrays.asList(bucketKeys), numberOfBuckets);
return this;
}
/**
* Defines that the table should be distributed into buckets using a range algorithm over
* the given columns. The number of buckets is connector-defined.
*/
public Builder distributedByRange(String... bucketKeys) {
validateBucketKeys(bucketKeys);
this.distribution = TableDistribution.ofRange(Arrays.asList(bucketKeys), null);
return this;
}
/**
* Defines that the table should be distributed into the given number of buckets using a
* range algorithm over the given columns.
*/
public Builder distributedByRange(int numberOfBuckets, String... bucketKeys) {
validateBucketKeys(bucketKeys);
this.distribution =
TableDistribution.ofRange(Arrays.asList(bucketKeys), numberOfBuckets);
return this;
}
/**
* Defines that the table should be distributed into buckets over the given columns. The
* number of buckets and used algorithm are connector-defined.
*/
public Builder distributedBy(String... bucketKeys) {
validateBucketKeys(bucketKeys);
this.distribution = TableDistribution.ofUnknown(Arrays.asList(bucketKeys), null);
return this;
}
/**
* Defines that the table should be distributed into the given number of buckets by the
* given columns. The used algorithm is connector-defined.
*/
public Builder distributedBy(int numberOfBuckets, String... bucketKeys) {
validateBucketKeys(bucketKeys);
this.distribution =
TableDistribution.ofUnknown(Arrays.asList(bucketKeys), numberOfBuckets);
return this;
}
/**
* Defines that the table should be distributed into the given number of buckets. The
* algorithm is connector-defined.
*/
public Builder distributedInto(int numberOfBuckets) {
this.distribution = TableDistribution.ofUnknown(numberOfBuckets);
return this;
}
private static void validateBucketKeys(String[] bucketKeys) {
Preconditions.checkNotNull(bucketKeys, "Bucket keys must not be null.");
if (bucketKeys.length == 0) {
throw new ValidationException(
"At least one bucket key must be defined for a distribution.");
}
}
/** Define which columns this table is partitioned by. */
public Builder partitionedBy(String... partitionKeys) {
this.partitionKeys.addAll(Arrays.asList(partitionKeys));
return this;
}
/** Define the comment for this table. */
public Builder comment(@Nullable String comment) {
this.comment = comment;
return this;
}
/** Returns an immutable instance of {@link TableDescriptor}. */
public TableDescriptor build() {
return new TableDescriptor(schema, options, distribution, partitionKeys, comment);
}
}
}