
org.apache.flink.table.sinks.PartitionableTableSink Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.table.sinks;
import org.apache.flink.annotation.Experimental;
import org.apache.flink.table.connector.sink.DynamicTableSink;
import org.apache.flink.table.connector.sink.abilities.SupportsPartitioning;
import java.util.Map;
/**
* An interface for partitionable {@link TableSink}. A partitionable sink can writes query results
* to partitions.
*
* Partition columns are defined via catalog table.
*
*
For example, a partitioned table named {@code my_table} with a table schema {@code [a INT, b
* VARCHAR, c DOUBLE, dt VARCHAR, country VARCHAR]} is partitioned on columns {@code dt, country}.
* Then {@code dt} is the first partition column, and {@code country} is the secondary partition
* column.
*
*
We can insert data into table partitions using INSERT INTO PARTITION syntax, for example:
*
*
*
* INSERT INTO my_table PARTITION (dt='2019-06-20', country='bar') select a, b, c from my_view
*
*
*
* When all the partition columns are set a value in PARTITION clause, it is inserting into a
* static partition. It will writes the query result into a static partition, i.e. {@code
* dt='2019-06-20', country='bar'}. The user specified static partitions will be told to the sink
* via {@link #setStaticPartition(Map)}.
*
*
The INSERT INTO PARTITION syntax also supports dynamic partition inserts.
*
*
*
* INSERT INTO my_table PARTITION (dt='2019-06-20') select a, b, c, country from another_view
*
*
*
* When partial partition columns (prefix part of all partition columns) are set a value in
* PARTITION clause, it is writing the query result into a dynamic partition. In the above example,
* the static partition part is {@code dt='2019-06-20'} which will be told to the sink via {@link
* #setStaticPartition(Map)}. And the {@code country} is the dynamic partition which will be get
* from each record.
*
* @deprecated This interface will not be supported in the new sink design around {@link
* DynamicTableSink}. Use {@link SupportsPartitioning} instead. See FLIP-95 for more
* information.
*/
@Deprecated
@Experimental
public interface PartitionableTableSink {
/**
* Sets the static partition into the {@link TableSink}. The static partition may be partial of
* all partition columns. See the class Javadoc for more details.
*
*
The static partition is represented as a {@code Map} which maps from
* partition field name to partition value. The partition values are all encoded as strings,
* i.e. encoded using String.valueOf(...). For example, if we have a static partition {@code
* f0=1024, f1="foo", f2="bar"}. f0 is an integer type, f1 and f2 are string types. They will
* all be encoded as strings: "1024", "foo", "bar". And can be decoded to original literals
* based on the field types.
*
* @param partitions user specified static partition
*/
void setStaticPartition(Map partitions);
/**
* If returns true, sink can trust all records will definitely be grouped by partition fields
* before consumed by the {@link TableSink}, i.e. the sink will receive all elements of one
* partition and then all elements of another partition, elements of different partitions will
* not be mixed. For some sinks, this can be used to reduce number of the partition writers to
* improve writing performance.
*
* This method is used to configure the behavior of input whether to be grouped by partition,
* if true, at the same time the sink should also configure itself, i.e. set an internal field
* that changes the writing behavior (writing one partition at a time).
*
* @param supportsGrouping whether the execution mode supports grouping, e.g. grouping (usually
* use sort to implement) is only supported in batch mode, not supported in streaming mode.
* @return whether data need to be grouped by partition before consumed by the sink. Default is
* false. If {@code supportsGrouping} is false, it should never return true (requires
* grouping), otherwise it will fail.
*/
default boolean configurePartitionGrouping(boolean supportsGrouping) {
return false;
}
}