All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.paimon.flink.sink.partition.PartitionMarkDoneTrigger Maven / Gradle / Ivy

There is a newer version: 0.9.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.paimon.flink.sink.partition;

import org.apache.paimon.CoreOptions;
import org.apache.paimon.annotation.VisibleForTesting;
import org.apache.paimon.fs.Path;
import org.apache.paimon.options.Options;
import org.apache.paimon.partition.PartitionTimeExtractor;
import org.apache.paimon.utils.StringUtils;

import org.apache.flink.api.common.state.ListState;
import org.apache.flink.api.common.state.ListStateDescriptor;
import org.apache.flink.api.common.state.OperatorStateStore;
import org.apache.flink.api.common.typeutils.base.ListSerializer;
import org.apache.flink.api.common.typeutils.base.StringSerializer;

import javax.annotation.Nullable;

import java.time.Duration;
import java.time.LocalDateTime;
import java.time.ZoneId;
import java.time.format.DateTimeParseException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

import static org.apache.paimon.CoreOptions.PARTITION_MARK_DONE_WHEN_END_INPUT;
import static org.apache.paimon.flink.FlinkConnectorOptions.PARTITION_IDLE_TIME_TO_DONE;
import static org.apache.paimon.flink.FlinkConnectorOptions.PARTITION_TIME_INTERVAL;
import static org.apache.paimon.utils.PartitionPathUtils.extractPartitionSpecFromPath;

/** Trigger to mark partitions done with streaming job. */
public class PartitionMarkDoneTrigger {

    private static final ListStateDescriptor> PENDING_PARTITIONS_STATE_DESC =
            new ListStateDescriptor<>(
                    "mark-done-pending-partitions",
                    new ListSerializer<>(StringSerializer.INSTANCE));

    private final State state;
    private final PartitionTimeExtractor timeExtractor;
    // can be null when markDoneWhenEndInput is true
    @Nullable private final Long timeInterval;
    // can be null when markDoneWhenEndInput is true
    @Nullable private final Long idleTime;
    private final boolean markDoneWhenEndInput;
    private final Map pendingPartitions;

    public PartitionMarkDoneTrigger(
            State state,
            PartitionTimeExtractor timeExtractor,
            @Nullable Duration timeInterval,
            @Nullable Duration idleTime,
            boolean markDoneWhenEndInput)
            throws Exception {
        this(
                state,
                timeExtractor,
                timeInterval,
                idleTime,
                System.currentTimeMillis(),
                markDoneWhenEndInput);
    }

    public PartitionMarkDoneTrigger(
            State state,
            PartitionTimeExtractor timeExtractor,
            @Nullable Duration timeInterval,
            @Nullable Duration idleTime,
            long currentTimeMillis,
            boolean markDoneWhenEndInput)
            throws Exception {
        this.pendingPartitions = new HashMap<>();
        this.state = state;
        this.timeExtractor = timeExtractor;
        this.timeInterval = timeInterval == null ? null : timeInterval.toMillis();
        this.idleTime = idleTime == null ? null : idleTime.toMillis();
        this.markDoneWhenEndInput = markDoneWhenEndInput;
        state.restore().forEach(p -> pendingPartitions.put(p, currentTimeMillis));
    }

    public void notifyPartition(String partition) {
        notifyPartition(partition, System.currentTimeMillis());
    }

    @VisibleForTesting
    void notifyPartition(String partition, long currentTimeMillis) {
        if (!StringUtils.isNullOrWhitespaceOnly(partition)) {
            this.pendingPartitions.put(partition, currentTimeMillis);
        }
    }

    public List donePartitions(boolean endInput) {
        return donePartitions(endInput, System.currentTimeMillis());
    }

    @VisibleForTesting
    List donePartitions(boolean endInput, long currentTimeMillis) {
        if (endInput && markDoneWhenEndInput) {
            return new ArrayList<>(pendingPartitions.keySet());
        }

        if (timeInterval == null || idleTime == null) {
            return Collections.emptyList();
        }

        List needDone = new ArrayList<>();
        Iterator> iter = pendingPartitions.entrySet().iterator();
        while (iter.hasNext()) {
            Map.Entry entry = iter.next();
            String partition = entry.getKey();

            long lastUpdateTime = entry.getValue();
            long partitionStartTime =
                    extractDateTime(partition)
                            .atZone(ZoneId.systemDefault())
                            .toInstant()
                            .toEpochMilli();
            long partitionEndTime = partitionStartTime + timeInterval;
            lastUpdateTime = Math.max(lastUpdateTime, partitionEndTime);

            if (currentTimeMillis - lastUpdateTime > idleTime) {
                needDone.add(partition);
                iter.remove();
            }
        }
        return needDone;
    }

    @VisibleForTesting
    LocalDateTime extractDateTime(String partition) {
        try {
            return timeExtractor.extract(extractPartitionSpecFromPath(new Path(partition)));
        } catch (DateTimeParseException e) {
            throw new RuntimeException("Can't extract datetime from partition " + partition, e);
        }
    }

    public void snapshotState() throws Exception {
        state.update(new ArrayList<>(pendingPartitions.keySet()));
    }

    /** State to store partitions. */
    public interface State {
        List restore() throws Exception;

        void update(List partitions) throws Exception;
    }

    /** State to store partitions with streaming job. */
    private static class PartitionMarkDoneTriggerState implements State {

        private final boolean isRestored;
        private final ListState> pendingPartitionsState;

        public PartitionMarkDoneTriggerState(boolean isRestored, OperatorStateStore stateStore)
                throws Exception {
            this.isRestored = isRestored;
            this.pendingPartitionsState = stateStore.getListState(PENDING_PARTITIONS_STATE_DESC);
        }

        @Override
        public List restore() throws Exception {
            List pendingPartitions = new ArrayList<>();
            if (isRestored) {
                Iterator> state = pendingPartitionsState.get().iterator();
                if (state.hasNext()) {
                    pendingPartitions.addAll(state.next());
                }
            }
            return pendingPartitions;
        }

        @Override
        public void update(List partitions) throws Exception {
            pendingPartitionsState.update(Collections.singletonList(partitions));
        }
    }

    public static PartitionMarkDoneTrigger create(
            CoreOptions coreOptions, boolean isRestored, OperatorStateStore stateStore)
            throws Exception {
        Options options = coreOptions.toConfiguration();
        return new PartitionMarkDoneTrigger(
                new PartitionMarkDoneTrigger.PartitionMarkDoneTriggerState(isRestored, stateStore),
                new PartitionTimeExtractor(
                        coreOptions.partitionTimestampPattern(),
                        coreOptions.partitionTimestampFormatter()),
                options.get(PARTITION_TIME_INTERVAL),
                options.get(PARTITION_IDLE_TIME_TO_DONE),
                options.get(PARTITION_MARK_DONE_WHEN_END_INPUT));
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy