All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.paimon.flink.action.CompactDatabaseActionFactory Maven / Gradle / Ivy

There is a newer version: 0.9.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.paimon.flink.action;

import org.apache.paimon.utils.TimeUtils;

import java.util.Optional;

import static org.apache.paimon.flink.action.CompactActionFactory.checkCompactStrategy;

/** Factory to create {@link CompactDatabaseAction}. */
public class CompactDatabaseActionFactory implements ActionFactory {

    public static final String IDENTIFIER = "compact_database";

    private static final String INCLUDING_DATABASES = "including_databases";
    private static final String INCLUDING_TABLES = "including_tables";
    private static final String EXCLUDING_TABLES = "excluding_tables";
    private static final String MODE = "mode";
    private static final String PARTITION_IDLE_TIME = "partition_idle_time";

    @Override
    public String identifier() {
        return IDENTIFIER;
    }

    @Override
    public Optional create(MultipleParameterToolAdapter params) {
        CompactDatabaseAction action = new CompactDatabaseAction(catalogConfigMap(params));

        action.includingDatabases(params.get(INCLUDING_DATABASES))
                .includingTables(params.get(INCLUDING_TABLES))
                .excludingTables(params.get(EXCLUDING_TABLES))
                .withDatabaseCompactMode(params.get(MODE))
                .withTableOptions(optionalConfigMap(params, TABLE_CONF));
        String partitionIdleTime = params.get(PARTITION_IDLE_TIME);
        if (partitionIdleTime != null) {
            action.withPartitionIdleTime(TimeUtils.parseDuration(partitionIdleTime));
        }

        String compactStrategy = params.get(COMPACT_STRATEGY);
        if (checkCompactStrategy(compactStrategy)) {
            action.withFullCompaction(compactStrategy.trim().equalsIgnoreCase(FULL));
        }

        return Optional.of(action);
    }

    @Override
    public void printHelp() {
        System.out.println(
                "Action \"compact_database\" runs a dedicated job for compacting one or multiple database.");
        System.out.println();

        System.out.println("Syntax:");
        System.out.println(
                "  compact_database --warehouse  --including_databases  "
                        + "[--including_tables ] "
                        + "[--excluding_tables ] "
                        + "[--mode ]"
                        + "[--partition_idle_time ]"
                        + "[--compact_strategy ]");
        System.out.println(
                "  compact_database --warehouse s3://path/to/warehouse --including_databases  "
                        + "[--catalog_conf  [--catalog_conf  ...]]");
        System.out.println();

        System.out.println(
                "--including_databases is used to specify which databases are to be compacted. "
                        + "You must use '|' to separate multiple databases, Regular expression is supported.");

        System.out.println(
                "--including_tables is used to specify which source tables are to be compacted. "
                        + "You must use '|' to separate multiple tables, the format is `databaseName.tableName`, Regular expression is supported.");
        System.out.println(
                "--excluding_tables is used to specify which source tables are not to be compacted. "
                        + "The usage is same as --including_tables.");
        System.out.println(
                "--excluding_tables has higher priority than --including_tables if you specified both.");
        System.out.println(
                "--mode is used to specify compaction mode. Possible values: divided, combined.");
        System.out.println(
                "--partition_idle_time is used to do a full compaction for partition which had not receive any new data for 'partition_idle_time' time. And only these partitions will be compacted.");
        System.out.println("--partition_idle_time is only supported in batch mode. ");
        System.out.println(
                "--compact_strategy determines how to pick files to be merged, the default is determined by the runtime execution mode. "
                        + "`full` : Only supports batch mode. All files will be selected for merging."
                        + "`minor`: Pick the set of files that need to be merged based on specified conditions.");

        System.out.println();

        System.out.println("Examples:");
        System.out.println(
                "  compact_database --warehouse hdfs:///path/to/warehouse --including_databases test_db");
        System.out.println(
                "  compact_database --warehouse s3:///path/to/warehouse "
                        + "--including_databases test_db "
                        + "--catalog_conf s3.endpoint=https://****.com "
                        + "--catalog_conf s3.access-key=***** "
                        + "--catalog_conf s3.secret-key=***** ");
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy