All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.lakesoul.metadata.LakeSoulCatalog Maven / Gradle / Ivy

There is a newer version: 2.5.1-flink-1.17
Show newest version
// SPDX-FileCopyrightText: 2023 LakeSoul Contributors
//
// SPDX-License-Identifier: Apache-2.0

package org.apache.flink.lakesoul.metadata;

import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import com.dmetasoul.lakesoul.meta.DBManager;
import com.dmetasoul.lakesoul.meta.DBUtil;
import com.dmetasoul.lakesoul.meta.entity.Namespace;
import com.dmetasoul.lakesoul.meta.entity.PartitionInfo;
import com.dmetasoul.lakesoul.meta.entity.TableInfo;
import org.apache.flink.configuration.GlobalConfiguration;
import org.apache.flink.core.fs.FileSystem;
import org.apache.flink.core.fs.Path;
import org.apache.flink.lakesoul.table.LakeSoulDynamicTableFactory;
import org.apache.flink.lakesoul.tool.FlinkUtil;
import org.apache.flink.table.api.TableSchema;
import org.apache.flink.table.api.constraints.UniqueConstraint;
import org.apache.flink.table.catalog.*;
import org.apache.flink.table.catalog.exceptions.*;
import org.apache.flink.table.catalog.stats.CatalogColumnStatistics;
import org.apache.flink.table.catalog.stats.CatalogTableStatistics;
import org.apache.flink.table.expressions.CallExpression;
import org.apache.flink.table.expressions.Expression;
import org.apache.flink.table.factories.Factory;

import java.io.IOException;
import java.util.*;

import static com.dmetasoul.lakesoul.meta.DBConfig.LAKESOUL_HASH_PARTITION_SPLITTER;
import static com.dmetasoul.lakesoul.meta.DBConfig.LAKESOUL_PARTITION_SPLITTER_OF_RANGE_AND_HASH;
import static org.apache.flink.lakesoul.tool.LakeSoulSinkOptions.*;
import static org.apache.flink.util.Preconditions.checkNotNull;

public class LakeSoulCatalog implements Catalog {

    public static final String CATALOG_NAME = "lakesoul";
    public static final String TABLE_ID_PREFIX = "table_";
    private static final String TABLE_PATH = "path";
    private final DBManager dbManager;


    public LakeSoulCatalog() {
        dbManager = new DBManager();
        createDatabase("default", new LakesoulCatalogDatabase(), true);
    }

    @Override
    public void open() throws CatalogException {

    }

    @Override
    public void close() throws CatalogException {

    }

    @Override
    public Optional getFactory() {
        return Optional.of(new LakeSoulDynamicTableFactory());
    }

    @Override
    public String getDefaultDatabase() throws CatalogException {
        return "default";
    }

    @Override
    public List listDatabases() throws CatalogException {
        return dbManager.listNamespaces();
    }

    @Override
    public CatalogDatabase getDatabase(String databaseName) throws DatabaseNotExistException, CatalogException {

        Namespace namespaceEntity = dbManager.getNamespaceByNamespace(databaseName);
        if (namespaceEntity == null) {
            throw new DatabaseNotExistException(CATALOG_NAME, databaseName);
        } else {

            Map properties = DBUtil.jsonToStringMap(JSON.parseObject(namespaceEntity.getProperties()));

            return new LakesoulCatalogDatabase(properties, namespaceEntity.getComment());
        }
    }

    @Override
    public boolean databaseExists(String databaseName) throws CatalogException {
        Namespace namespaceEntity = dbManager.getNamespaceByNamespace(databaseName);
        return namespaceEntity != null;
    }

    @Override
    public void createDatabase(String databaseName, CatalogDatabase catalogDatabase, boolean ignoreIfExists)
            throws CatalogException {
        if (databaseExists(databaseName)) {
            if (ignoreIfExists) {
                return;
            }
            throw new CatalogException(String.format("database %s already exists", databaseName));
        }
        try {
            dbManager.createNewNamespace(databaseName, DBUtil.stringMapToJson(catalogDatabase.getProperties()).toJSONString(),
                    catalogDatabase.getComment());
        } catch (RuntimeException e) {
            e.printStackTrace();
            throw e;
        }

    }

    @Override
    public void dropDatabase(String databaseName, boolean ignoreIfNotExists, boolean cascade) throws
            DatabaseNotExistException, DatabaseNotEmptyException, CatalogException {
        if (!databaseExists(databaseName)) {
            if (!ignoreIfNotExists) {
                throw new DatabaseNotExistException(CATALOG_NAME, databaseName);
            } else {
                return;
            }
        }
        List tables = listTables(databaseName);
        if (!tables.isEmpty()) {
            if (cascade) {
                for (String table : tables) {
                    try {
                        dropTable(new ObjectPath(databaseName, table), true);
                    } catch (TableNotExistException e) {
                        throw new CatalogException(e.getMessage(), e.getCause());
                    }
                }
            } else {
                throw new DatabaseNotEmptyException(CATALOG_NAME, databaseName);
            }
        }
        dbManager.deleteNamespace(databaseName);
    }

    @Override
    public void alterDatabase(String databaseName, CatalogDatabase catalogDatabase, boolean ignoreIfNotExists)
            throws DatabaseNotExistException, CatalogException {
        if (!databaseExists(databaseName)) {
            if (!ignoreIfNotExists) {
                throw new DatabaseNotExistException(CATALOG_NAME, databaseName);
            } else {
                return;
            }
        }
        dbManager.updateNamespaceProperties(databaseName, DBUtil.stringMapToJson(catalogDatabase.getProperties()).toJSONString());
    }

    @Override
    public List listTables(String databaseName) throws CatalogException {
        List tifs = dbManager.getTableInfosByNamespace(databaseName);
        List tableNames = new ArrayList<>(100);
        for (TableInfo item : tifs) {
            if (FlinkUtil.isTable(item)) {
                tableNames.add(item.getTableName());
            }
        }
        return tableNames;
    }

    @Override
    public List listViews(String databaseName) throws CatalogException {
        List tifs = dbManager.getTableInfosByNamespace(databaseName);
        List tableNames = new ArrayList<>(100);
        for (TableInfo item : tifs) {
            if (FlinkUtil.isView(item)) {
                tableNames.add(item.getTableName());
            }
        }
        return tableNames;
    }

    @Override
    public CatalogBaseTable getTable(ObjectPath tablePath) throws TableNotExistException, CatalogException {
        if (!tableExists(tablePath)) {
            throw new TableNotExistException(CATALOG_NAME, tablePath);
        }
        TableInfo tableInfo =
                dbManager.getTableInfoByNameAndNamespace(tablePath.getObjectName(), tablePath.getDatabaseName());
        return FlinkUtil.toFlinkCatalog(tableInfo);
    }

    @Override
    public boolean tableExists(ObjectPath tablePath) throws CatalogException {
        checkNotNull(tablePath);
        TableInfo tableInfo =
                dbManager.getTableInfoByNameAndNamespace(tablePath.getObjectName(), tablePath.getDatabaseName());

        return null != tableInfo;
    }

    @Override
    public void dropTable(ObjectPath tablePath, boolean ignoreIfNotExists)
            throws TableNotExistException, CatalogException {
        checkNotNull(tablePath);
        String tableName = tablePath.getObjectName();
        TableInfo tableInfo =
                dbManager.getTableInfoByNameAndNamespace(tablePath.getObjectName(), tablePath.getDatabaseName());
        if (tableInfo != null) {
            String tableId = tableInfo.getTableId();
            dbManager.deleteTableInfo(tableInfo.getTablePath(), tableId, tablePath.getDatabaseName());
            dbManager.deleteShortTableName(tableInfo.getTableName(), tableName, tablePath.getDatabaseName());
            dbManager.deleteDataCommitInfo(tableId);
            dbManager.deletePartitionInfoByTableId(tableId);
            if (FlinkUtil.isTable(tableInfo)) {
                Path path = new Path(tableInfo.getTablePath());
                try {
                    path.getFileSystem().delete(path, true);
                } catch (IOException e) {
                    throw new RuntimeException(e);
                }
            }

        } else {
            if (ignoreIfNotExists) {
                return;
            }
            throw new TableNotExistException(CATALOG_NAME, tablePath);
        }
    }

    @Override
    public void renameTable(ObjectPath tablePath, String s, boolean b) throws CatalogException {
        throw new CatalogException("Rename LakeSoul table is not supported for now");
    }

    @Override
    public void createTable(ObjectPath tablePath, CatalogBaseTable table, boolean ignoreIfExists)
            throws TableAlreadyExistException, DatabaseNotExistException, CatalogException {

        checkNotNull(tablePath);
        checkNotNull(table);
        TableSchema schema = table.getSchema();
        Optional primaryKeyColumns = schema.getPrimaryKey();
        if (!databaseExists(tablePath.getDatabaseName())) {
            throw new DatabaseNotExistException(CATALOG_NAME, tablePath.getDatabaseName());
        }
        if (tableExists(tablePath)) {
            if (!ignoreIfExists) {
                throw new TableAlreadyExistException(CATALOG_NAME, tablePath);
            } else return;
        }
        String primaryKeys = primaryKeyColumns.map(
                        uniqueConstraint -> String.join(LAKESOUL_HASH_PARTITION_SPLITTER,
                                uniqueConstraint.getColumns()))
                .orElse("");
        Map tableOptions = table.getOptions();

        // adding cdc options
        if (!"".equals(primaryKeys)) {
            tableOptions.put(HASH_PARTITIONS, primaryKeys);
        }
        Optional cdcColumn;
        if ("true".equals(tableOptions.get(USE_CDC.key()))) {
            if (primaryKeys.isEmpty()) {
                throw new CatalogException("CDC table must have primary key(s)");
            }
            cdcColumn = Optional.of(tableOptions.getOrDefault(CDC_CHANGE_COLUMN, CDC_CHANGE_COLUMN_DEFAULT));
            tableOptions.put(CDC_CHANGE_COLUMN, cdcColumn.get());
        } else {
            cdcColumn = Optional.empty();
        }
        // adding hash bucket options
        if (!primaryKeys.isEmpty()) {
            if (Integer.parseInt(tableOptions.getOrDefault(HASH_BUCKET_NUM.key(), "-1")) <= 0) {
                throw new CatalogException(
                        "Valid integer value for hashBucketNum property must be set for table with primary key");
            }
        }
        String tableId = TABLE_ID_PREFIX + UUID.randomUUID();
        String qualifiedPath = "";
        String sparkSchema = FlinkUtil.toArrowSchema(schema, cdcColumn).toJson();
        List partitionKeys = Collections.emptyList();
        if (table instanceof ResolvedCatalogTable) {
            partitionKeys = ((ResolvedCatalogTable) table).getPartitionKeys();
            String path = null;
            if (tableOptions.containsKey(TABLE_PATH)) {
                path = tableOptions.get(TABLE_PATH);
            } else {
                String flinkWarehouseDir = GlobalConfiguration.loadConfiguration().get(FLINK_WAREHOUSE_DIR);
                if (null != flinkWarehouseDir) {
                    path = String.join("/", flinkWarehouseDir, tablePath.getDatabaseName(), tablePath.getObjectName());
                }
            }
            try {
                FileSystem fileSystem = new Path(path).getFileSystem();
                Path qp = new Path(path).makeQualified(fileSystem);
                FlinkUtil.createAndSetTableDirPermission(qp);
                qualifiedPath = qp.toString();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
        if (table instanceof ResolvedCatalogView) {
            tableOptions.put(LAKESOUL_VIEW.key(), "true");
            tableOptions.put(LAKESOUL_VIEW_TYPE.key(), LAKESOUL_VIEW_TYPE.defaultValue());
            tableOptions.put(VIEW_ORIGINAL_QUERY, ((ResolvedCatalogView) table).getOriginalQuery());
            tableOptions.put(VIEW_EXPANDED_QUERY, ((ResolvedCatalogView) table).getExpandedQuery());
        }
        String json = JSON.toJSONString(tableOptions);
        JSONObject properties = JSON.parseObject(json);
        String tableName = tablePath.getObjectName();
        dbManager.createNewTable(tableId, tablePath.getDatabaseName(), tableName, qualifiedPath, sparkSchema,
                properties, DBUtil.formatTableInfoPartitionsField(primaryKeys, partitionKeys));
    }

    @Override
    public void alterTable(ObjectPath tablePath, CatalogBaseTable catalogBaseTable, boolean b) throws CatalogException {
        throw new CatalogException("Alter lakesoul table not supported now");
    }

    @Override
    public List listPartitions(ObjectPath tablePath) throws CatalogException {
        checkNotNull(tablePath);
        if (!tableExists(tablePath)) {
            throw new CatalogException("table path not exist");
        }

        return listPartitions(tablePath, null);
    }

    @Override
    public List listPartitions(ObjectPath tablePath, CatalogPartitionSpec catalogPartitionSpec)
            throws CatalogException {
        if (!tableExists(tablePath)) {
            throw new CatalogException("table path not exist");
        }
        TableInfo tableInfo =
                dbManager.getTableInfoByNameAndNamespace(tablePath.getObjectName(), tablePath.getDatabaseName());
        List tableAllPartitionDesc = dbManager.getTableAllPartitionDesc(tableInfo.getTableId());
        ArrayList al = new ArrayList<>(100);
        for (String item : tableAllPartitionDesc) {
            if (null == item || "".equals(item)) {
                throw new CatalogException("partition not exist");
            } else {
                al.add(new CatalogPartitionSpec(DBUtil.parsePartitionDesc(item)));
            }
        }
        return al;
    }

    @Override
    public List listPartitionsByFilter(ObjectPath tablePath, List list)
            throws CatalogException {
        // TODO: optimize this when filter is an exact match of one partition
        List partitions = listPartitions(tablePath);
        List catalogPartitionSpecs = new ArrayList<>();
        for (Expression exp : list) {
            if (exp instanceof CallExpression) {
                if (!"equals".equalsIgnoreCase(
                        ((CallExpression) exp).getFunctionIdentifier().get().getSimpleName().get())) {
                    throw new CatalogException("just support equal;such as range=val and range=val2");
                }
            }
        }
        for (CatalogPartitionSpec cps : partitions) {
            boolean allAnd = true;
            for (Expression exp : list) {
                String key = exp.getChildren().get(0).toString();
                String value = convertFieldType(exp.getChildren().get(1).toString());
                if (cps.getPartitionSpec().containsKey(key) && cps.getPartitionSpec().get(key).equals(value)) {
                    continue;
                } else {
                    allAnd = false;
                    break;
                }
            }
            if (allAnd) {
                catalogPartitionSpecs.add(cps);
            }
        }
        return catalogPartitionSpecs;

    }

    private String convertFieldType(String field) {
        if (field.startsWith("'")) {
            return field.substring(1, field.length() - 1);
        } else {
            return field;
        }
    }

    @Override
    public CatalogPartition getPartition(ObjectPath tablePath, CatalogPartitionSpec catalogPartitionSpec)
            throws PartitionNotExistException, CatalogException {
        throw new CatalogException("not supported");
    }

    @Override
    public boolean partitionExists(ObjectPath tablePath, CatalogPartitionSpec catalogPartitionSpec)
            throws CatalogException {
        TableInfo tableInfo =
                dbManager.getTableInfoByNameAndNamespace(tablePath.getObjectName(), tablePath.getDatabaseName());
        if (tableInfo == null) {
            throw new CatalogException(tablePath + " does not exist");
        }
        if (tableInfo.getPartitions().equals(LAKESOUL_PARTITION_SPLITTER_OF_RANGE_AND_HASH)) {
            throw new CatalogException(tablePath + " is not partitioned");
        }
        List partitionInfos = dbManager.getOnePartition(tableInfo.getTableId(),
                DBUtil.formatPartitionDesc(catalogPartitionSpec.getPartitionSpec()));
        return !partitionInfos.isEmpty();
    }

    @Override
    public void createPartition(ObjectPath tablePath, CatalogPartitionSpec catalogPartitionSpec,
                                CatalogPartition catalogPartition, boolean ignoreIfExists) throws CatalogException {
        throw new CatalogException("not supported now");
    }

    @Override
    public void dropPartition(ObjectPath tablePath, CatalogPartitionSpec catalogPartitionSpec, boolean ignoreIfExists)
            throws CatalogException {

        TableInfo tableInfo =
                dbManager.getTableInfoByNameAndNamespace(tablePath.getObjectName(), tablePath.getDatabaseName());
        if (tableInfo == null) {
            throw new CatalogException(tablePath + " does not exist");
        }
        String partitionDesc = DBUtil.formatPartitionDesc(catalogPartitionSpec.getPartitionSpec());
        List deleteFilePath = dbManager.deleteMetaPartitionInfo(tableInfo.getTableId(), partitionDesc);
        deleteFilePath.forEach(filePath -> {
            Path path = new Path(filePath);
            try {
                path.getFileSystem().delete(path, true);
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
        });
    }

    @Override
    public void alterPartition(ObjectPath tablePath, CatalogPartitionSpec catalogPartitionSpec,
                               CatalogPartition catalogPartition, boolean ignoreIfExists) throws CatalogException {
        throw new CatalogException("not supported now");
    }

    @Override
    public List listFunctions(String s) throws CatalogException {
        throw new CatalogException("not supported now");
    }

    @Override
    public CatalogFunction getFunction(ObjectPath tablePath) throws CatalogException, FunctionNotExistException {
        throw new FunctionNotExistException("lakesoul", tablePath);
    }

    @Override
    public boolean functionExists(ObjectPath tablePath) throws CatalogException {
        throw new CatalogException("not supported now");
    }

    @Override
    public void createFunction(ObjectPath tablePath, CatalogFunction catalogFunction, boolean b)
            throws CatalogException {

    }

    @Override
    public void alterFunction(ObjectPath tablePath, CatalogFunction catalogFunction, boolean b)
            throws CatalogException {
        throw new CatalogException("not supported now");

    }

    @Override
    public void dropFunction(ObjectPath tablePath, boolean b) throws CatalogException {
        throw new CatalogException("not supported now");

    }

    @Override
    public CatalogTableStatistics getTableStatistics(ObjectPath tablePath) {
        return null;
    }

    @Override
    public CatalogColumnStatistics getTableColumnStatistics(ObjectPath tablePath) throws CatalogException {
        return null;
    }

    @Override
    public CatalogTableStatistics getPartitionStatistics(ObjectPath tablePath,
                                                         CatalogPartitionSpec catalogPartitionSpec)
            throws CatalogException {
        return null;
    }

    @Override
    public CatalogColumnStatistics getPartitionColumnStatistics(ObjectPath tablePath,
                                                                CatalogPartitionSpec catalogPartitionSpec)
            throws CatalogException {
        return null;
    }

    @Override
    public void alterTableStatistics(ObjectPath tablePath, CatalogTableStatistics catalogTableStatistics, boolean b)
            throws CatalogException {
        throw new CatalogException("not supported now");

    }

    @Override
    public void alterTableColumnStatistics(ObjectPath tablePath, CatalogColumnStatistics catalogColumnStatistics,
                                           boolean b) throws CatalogException {
        throw new CatalogException("not supported now");

    }

    @Override
    public void alterPartitionStatistics(ObjectPath tablePath, CatalogPartitionSpec catalogPartitionSpec,
                                         CatalogTableStatistics catalogTableStatistics, boolean b)
            throws CatalogException {
        throw new CatalogException("not supported now");

    }

    @Override
    public void alterPartitionColumnStatistics(ObjectPath tablePath, CatalogPartitionSpec catalogPartitionSpec,
                                               CatalogColumnStatistics catalogColumnStatistics, boolean b)
            throws CatalogException {
        throw new CatalogException("not supported now");
    }

    public String getName() {
        return CATALOG_NAME;
    }

    public void cleanForTest() {
        dbManager.cleanMeta();
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy