All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.paimon.flink.sink.StoreMultiCommitter Maven / Gradle / Ivy

There is a newer version: 0.9.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.paimon.flink.sink;

import org.apache.paimon.catalog.Catalog;
import org.apache.paimon.catalog.CatalogLoader;
import org.apache.paimon.catalog.Identifier;
import org.apache.paimon.manifest.ManifestCommittable;
import org.apache.paimon.manifest.WrappedManifestCommittable;
import org.apache.paimon.table.FileStoreTable;
import org.apache.paimon.table.sink.CommitMessage;

import org.apache.flink.api.java.tuple.Tuple2;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;

/**
 * {@link StoreMultiCommitter} for multiple dynamic store. During the commit process, it will group
 * the WrappedManifestCommittables by their table identifier and use different committers to commit
 * to different tables.
 */
public class StoreMultiCommitter
        implements Committer {

    private final Catalog catalog;
    private final Context context;

    // To make the commit behavior consistent with that of Committer,
    //    StoreMultiCommitter manages multiple committers which are
    //    referenced by table id.
    private final Map tableCommitters;

    // Currently, only compact_database job needs to ignore empty commit and set dynamic options
    private final boolean ignoreEmptyCommit;
    private final Map dynamicOptions;

    public StoreMultiCommitter(CatalogLoader catalogLoader, Context context) {
        this(catalogLoader, context, false, Collections.emptyMap());
    }

    public StoreMultiCommitter(
            CatalogLoader catalogLoader,
            Context context,
            boolean ignoreEmptyCommit,
            Map dynamicOptions) {
        this.catalog = catalogLoader.load();
        this.context = context;
        this.ignoreEmptyCommit = ignoreEmptyCommit;
        this.dynamicOptions = dynamicOptions;
        this.tableCommitters = new HashMap<>();
    }

    @Override
    public boolean forceCreatingSnapshot() {
        return true;
    }

    @Override
    public WrappedManifestCommittable combine(
            long checkpointId, long watermark, List committables) {
        WrappedManifestCommittable wrappedManifestCommittable =
                new WrappedManifestCommittable(checkpointId, watermark);
        return combine(checkpointId, watermark, wrappedManifestCommittable, committables);
    }

    @Override
    public WrappedManifestCommittable combine(
            long checkpointId,
            long watermark,
            WrappedManifestCommittable wrappedManifestCommittable,
            List committables) {
        for (MultiTableCommittable committable : committables) {
            Identifier identifier =
                    Identifier.create(committable.getDatabase(), committable.getTable());
            ManifestCommittable manifestCommittable =
                    wrappedManifestCommittable.computeCommittableIfAbsent(
                            identifier, checkpointId, watermark);

            switch (committable.kind()) {
                case FILE:
                    CommitMessage file = (CommitMessage) committable.wrappedCommittable();
                    manifestCommittable.addFileCommittable(file);
                    break;
                case LOG_OFFSET:
                    LogOffsetCommittable offset =
                            (LogOffsetCommittable) committable.wrappedCommittable();
                    StoreCommitter committer = tableCommitters.get(identifier);
                    manifestCommittable.addLogOffset(
                            offset.bucket(), offset.offset(), committer.allowLogOffsetDuplicate());
                    break;
            }
        }
        return wrappedManifestCommittable;
    }

    @Override
    public void commit(List committables)
            throws IOException, InterruptedException {
        if (committables.isEmpty()) {
            return;
        }

        // key by table id
        Map> committableMap = groupByTable(committables);
        committableMap.keySet().forEach(this::getStoreCommitter);

        long checkpointId = committables.get(0).checkpointId();
        long watermark = committables.get(0).watermark();
        for (Map.Entry entry : tableCommitters.entrySet()) {
            List committableList = committableMap.get(entry.getKey());
            StoreCommitter committer = entry.getValue();
            if (committableList != null) {
                committer.commit(committableList);
            } else {
                // try best to commit empty snapshot, but tableCommitters may not contain all tables
                if (committer.forceCreatingSnapshot()) {
                    ManifestCommittable combine =
                            committer.combine(checkpointId, watermark, Collections.emptyList());
                    committer.commit(Collections.singletonList(combine));
                }
            }
        }
    }

    @Override
    public int filterAndCommit(
            List globalCommittables, boolean checkAppendFiles)
            throws IOException {
        int result = 0;
        for (Map.Entry> entry :
                groupByTable(globalCommittables).entrySet()) {
            result +=
                    getStoreCommitter(entry.getKey())
                            .filterAndCommit(entry.getValue(), checkAppendFiles);
        }
        return result;
    }

    private Map> groupByTable(
            List committables) {
        return committables.stream()
                .flatMap(
                        wrapped -> {
                            Map manifestCommittables =
                                    wrapped.manifestCommittables();
                            return manifestCommittables.entrySet().stream()
                                    .map(entry -> Tuple2.of(entry.getKey(), entry.getValue()));
                        })
                .collect(
                        Collectors.groupingBy(
                                t -> t.f0, Collectors.mapping(t -> t.f1, Collectors.toList())));
    }

    @Override
    public Map> groupByCheckpoint(
            Collection committables) {
        Map> grouped = new HashMap<>();
        for (MultiTableCommittable c : committables) {
            grouped.computeIfAbsent(c.checkpointId(), k -> new ArrayList<>()).add(c);
        }
        return grouped;
    }

    private StoreCommitter getStoreCommitter(Identifier tableId) {
        StoreCommitter committer = tableCommitters.get(tableId);

        if (committer == null) {
            FileStoreTable table;
            try {
                table = (FileStoreTable) catalog.getTable(tableId).copy(dynamicOptions);
            } catch (Catalog.TableNotExistException e) {
                throw new RuntimeException(
                        String.format(
                                "Failed to get committer for table %s", tableId.getFullName()),
                        e);
            }
            committer =
                    new StoreCommitter(
                            table,
                            table.newCommit(context.commitUser())
                                    .ignoreEmptyCommit(ignoreEmptyCommit),
                            context);
            tableCommitters.put(tableId, committer);
        }

        return committer;
    }

    @Override
    public void close() throws Exception {
        for (StoreCommitter committer : tableCommitters.values()) {
            committer.close();
        }
        if (catalog != null) {
            catalog.close();
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy