All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.paimon.flink.clone.CopyFileOperator Maven / Gradle / Ivy

There is a newer version: 0.9.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.paimon.flink.clone;

import org.apache.paimon.CoreOptions;
import org.apache.paimon.catalog.Catalog;
import org.apache.paimon.catalog.Identifier;
import org.apache.paimon.flink.FlinkCatalogFactory;
import org.apache.paimon.fs.FileIO;
import org.apache.paimon.fs.Path;
import org.apache.paimon.options.Options;
import org.apache.paimon.table.Table;
import org.apache.paimon.utils.IOUtils;

import org.apache.flink.streaming.api.operators.AbstractStreamOperator;
import org.apache.flink.streaming.api.operators.OneInputStreamOperator;
import org.apache.flink.streaming.runtime.streamrecord.StreamRecord;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.HashMap;
import java.util.Map;

/** A Operator to copy files. */
public class CopyFileOperator extends AbstractStreamOperator
        implements OneInputStreamOperator {

    private static final Logger LOG = LoggerFactory.getLogger(CopyFileOperator.class);

    private final Map sourceCatalogConfig;
    private final Map targetCatalogConfig;

    private transient Catalog sourceCatalog;
    private transient Catalog targetCatalog;

    private transient Map srcLocations;
    private transient Map targetLocations;

    public CopyFileOperator(
            Map sourceCatalogConfig, Map targetCatalogConfig) {
        this.sourceCatalogConfig = sourceCatalogConfig;
        this.targetCatalogConfig = targetCatalogConfig;
    }

    @Override
    public void open() throws Exception {
        sourceCatalog =
                FlinkCatalogFactory.createPaimonCatalog(Options.fromMap(sourceCatalogConfig));
        targetCatalog =
                FlinkCatalogFactory.createPaimonCatalog(Options.fromMap(targetCatalogConfig));
        srcLocations = new HashMap<>();
        targetLocations = new HashMap<>();
    }

    @Override
    public void processElement(StreamRecord streamRecord) throws Exception {
        CloneFileInfo cloneFileInfo = streamRecord.getValue();

        FileIO sourceTableFileIO = sourceCatalog.fileIO();
        FileIO targetTableFileIO = targetCatalog.fileIO();

        Path sourceTableRootPath =
                srcLocations.computeIfAbsent(
                        cloneFileInfo.getSourceIdentifier(),
                        key -> {
                            try {
                                return pathOfTable(
                                        sourceCatalog.getTable(Identifier.fromString(key)));
                            } catch (Catalog.TableNotExistException e) {
                                throw new RuntimeException(e);
                            }
                        });
        Path targetTableRootPath =
                targetLocations.computeIfAbsent(
                        cloneFileInfo.getTargetIdentifier(),
                        key -> {
                            try {
                                return pathOfTable(
                                        targetCatalog.getTable(Identifier.fromString(key)));
                            } catch (Catalog.TableNotExistException e) {
                                throw new RuntimeException(e);
                            }
                        });

        String filePathExcludeTableRoot = cloneFileInfo.getFilePathExcludeTableRoot();
        Path sourcePath = new Path(cloneFileInfo.getSourceFilePath());
        Path targetPath = new Path(targetTableRootPath + filePathExcludeTableRoot);

        if (targetTableFileIO.exists(targetPath)
                && targetTableFileIO.getFileSize(targetPath)
                        == sourceTableFileIO.getFileSize(sourcePath)) {

            if (LOG.isDebugEnabled()) {
                LOG.debug(
                        "Skipping clone target file {} because it already exists and has the same size.",
                        targetPath);
            }

            // We still send record to SnapshotHintOperator to avoid the following corner case:
            //
            // When cloning two tables under a catalog, after clone table A is completed,
            // the job fails due to snapshot expiration when cloning table B.
            // If we don't re-send file information of table A to SnapshotHintOperator,
            // the snapshot hint file of A will not be created after the restart.
            output.collect(streamRecord);
            return;
        }

        if (LOG.isDebugEnabled()) {
            LOG.debug("Begin copy file from {} to {}.", sourcePath, targetPath);
        }
        IOUtils.copyBytes(
                sourceTableFileIO.newInputStream(sourcePath),
                targetTableFileIO.newOutputStream(targetPath, true));
        if (LOG.isDebugEnabled()) {
            LOG.debug("End copy file from {} to {}.", sourcePath, targetPath);
        }

        output.collect(streamRecord);
    }

    private Path pathOfTable(Table table) {
        return new Path(table.options().get(CoreOptions.PATH.key()));
    }

    @Override
    public void close() throws Exception {
        if (sourceCatalog != null) {
            sourceCatalog.close();
        }
        if (targetCatalog != null) {
            targetCatalog.close();
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy