All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.delta.flink.sink.internal.DeltaBucketAssigner Maven / Gradle / Ivy

There is a newer version: 3.2.1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package io.delta.flink.sink.internal;

import java.util.LinkedHashMap;

import io.delta.flink.sink.RowDataDeltaSinkBuilder;
import org.apache.flink.core.io.SimpleVersionedSerializer;
import org.apache.flink.streaming.api.functions.sink.filesystem.BucketAssigner;
import org.apache.flink.streaming.api.functions.sink.filesystem.bucketassigners.SimpleVersionedStringSerializer;
import org.apache.flink.table.utils.PartitionPathUtils;

/**
 * Custom implementation of {@link BucketAssigner} class required to provide behaviour on how
 * to map particular events to buckets (aka partitions).
 * 

* This implementation can be perceived as a utility class for complying to the DeltaLake's * partitioning style (that follows Apache Hive's partitioning style by providing the partitioning * column's and its values as FS directories paths, e.g. "/some_path/table_1/date=2020-01-01") * It's still possible for users to roll out their own version of {@link BucketAssigner} * and pass it to the {@link DeltaSinkBuilder} during creation of the sink. *

* This {@link DeltaBucketAssigner} is applicable only to {@link DeltaSinkBuilder} and not to * {@link RowDataDeltaSinkBuilder}. The former lets you use this * {@link DeltaBucketAssigner} to provide the required custom bucketing behaviour, while the latter * doesn't expose a custom bucketing API, and you can provide the partition column keys only. *

* Thus, this {@link DeltaBucketAssigner} is currently not exposed to the user through any public * API. *

* In the future, if you'd like to implement your own custom bucketing... *

 *     /////////////////////////////////////////////////////////////////////////////////
 *     // implements a custom partition computer
 *     /////////////////////////////////////////////////////////////////////////////////
 *     static class CustomPartitionColumnComputer implements DeltaPartitionComputer<RowData> {
 *
 *         @Override
 *         public LinkedHashMap<String, String> generatePartitionValues(
 *                 RowData element, BucketAssigner.Context context) {
 *             String f1 = element.getString(0).toString();
 *             int f3 = element.getInt(2);
 *             LinkedHashMap<String, String> partitionSpec = new LinkedHashMap<>();
 *             partitionSpec.put("f1", f1);
 *             partitionSpec.put("f3", Integer.toString(f3));
 *             return partitionSpec;
 *         }
 *     }
 *     ...
 *     /////////////////////////////////////////
 *     // creates partition assigner for a custom partition computer
 *     /////////////////////////////////////////
 *     DeltaBucketAssignerInternal<RowData> partitionAssigner =
 *                 new DeltaBucketAssignerInternal<>(new CustomPartitionColumnComputer());
 *
 *     ...
 *
 *     /////////////////////////////////////////////////////////////////////////////////
 *     // create the builder
 *     /////////////////////////////////////////////////////////////////////////////////
 *
 *     DeltaSinkBuilder<RowData></RowData> foo =
 *      new DeltaSinkBuilder.DefaultDeltaFormatBuilder<>(
 *         ...,
 *         partitionAssigner,
 *         ...)
 * 
* * @param The type of input elements. */ public class DeltaBucketAssigner implements BucketAssigner { private static final long serialVersionUID = -6033643154550226022L; private final DeltaPartitionComputer partitionComputer; public DeltaBucketAssigner(DeltaPartitionComputer partitionComputer) { this.partitionComputer = partitionComputer; } @Override public String getBucketId(T element, BucketAssigner.Context context) { LinkedHashMap partitionValues = this.partitionComputer.generatePartitionValues(element, context); return PartitionPathUtils.generatePartitionPath(partitionValues); } @Override public SimpleVersionedSerializer getSerializer() { return SimpleVersionedStringSerializer.INSTANCE; } @Override public String toString() { return "DeltaBucketAssigner"; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy