All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.kylin.rest.service.InternalTableService Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.kylin.rest.service;

import static org.apache.kylin.common.exception.ServerErrorCode.EMPTY_PARAMETER;
import static org.apache.kylin.common.exception.ServerErrorCode.INTERNAL_TABLE_ERROR;
import static org.apache.kylin.common.exception.ServerErrorCode.INTERNAL_TABLE_NOT_EXIST;
import static org.apache.kylin.common.exception.ServerErrorCode.INTERNAL_TABLE_RELOAD_ERROR;
import static org.apache.kylin.common.exception.ServerErrorCode.INVALID_INTERNAL_TABLE_PARAMETER;
import static org.apache.kylin.common.exception.ServerErrorCode.TABLE_NOT_EXIST;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.stream.Collectors;

import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.kylin.common.KylinConfig;
import org.apache.kylin.common.exception.KylinException;
import org.apache.kylin.common.msg.MsgPicker;
import org.apache.kylin.common.util.HadoopUtil;
import org.apache.kylin.engine.spark.builder.InternalTableLoader;
import org.apache.kylin.guava30.shaded.common.collect.Lists;
import org.apache.kylin.guava30.shaded.common.collect.Maps;
import org.apache.kylin.job.execution.ExecutableManager;
import org.apache.kylin.job.execution.JobTypeEnum;
import org.apache.kylin.job.service.InternalTableLoadingService;
import org.apache.kylin.metadata.model.ColumnDesc;
import org.apache.kylin.metadata.model.NTableMetadataManager;
import org.apache.kylin.metadata.model.TableDesc;
import org.apache.kylin.metadata.project.EnhancedUnitOfWork;
import org.apache.kylin.metadata.table.InternalTableDesc;
import org.apache.kylin.metadata.table.InternalTableManager;
import org.apache.kylin.metadata.table.InternalTablePartition;
import org.apache.kylin.metadata.table.InternalTablePartitionDetail;
import org.apache.kylin.rest.response.InternalTableDescResponse;
import org.apache.kylin.rest.response.InternalTableLoadingJobResponse;
import org.apache.kylin.rest.util.AclEvaluate;
import org.apache.kylin.util.DataRangeUtils;
import org.apache.spark.sql.SparkSession;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;

import lombok.val;
import scala.Option;

@Service("internalTableService")
public class InternalTableService extends BasicService {

    private static final Logger logger = LoggerFactory.getLogger(InternalTableService.class);

    @Autowired
    private AclEvaluate aclEvaluate;

    @Autowired
    private InternalTableLoadingService internalTableLoadingService;

    @Autowired
    private TableService tableService;

    /**
     * Create an internal table from an existing table
     *
     * @param projectName
     * @param table
     * @param database
     * @param partitionCols
     * @param datePartitionFormat
     * @param tblProperties
     * @throws Exception
     */
    public void createInternalTable(String projectName, String table, String database, String[] partitionCols,
            String datePartitionFormat, Map tblProperties, String storageType) throws Exception {
        String tableIdentity = database + "." + table;
        createInternalTable(projectName, tableIdentity, partitionCols, datePartitionFormat, tblProperties, storageType);
    }

    public void createInternalTable(String projectName, String tableIdentity, String[] partitionCols,
            String datePartitionFormat, Map tblProperties, String storageType) throws Exception {
        aclEvaluate.checkProjectWritePermission(projectName);

        EnhancedUnitOfWork.doInTransactionWithCheckAndRetry(() -> {
            NTableMetadataManager tableMetadataManager = getManager(NTableMetadataManager.class, projectName);
            InternalTableManager internalTableManager = getManager(InternalTableManager.class, projectName);
            Map properties = Maps.newHashMap();
            properties.putAll(tblProperties);
            TableDesc originTable = tableMetadataManager.getTableDesc(tableIdentity);
            if (Objects.isNull(originTable)) {
                String errorMsg = String.format(Locale.ROOT, MsgPicker.getMsg().getTableNotFound(), tableIdentity);
                throw new KylinException(TABLE_NOT_EXIST, errorMsg);
            }
            if (originTable.getHasInternal()) {
                throw new KylinException(INTERNAL_TABLE_ERROR, "Table is already an internal table");
            }
            checkParameters(partitionCols, originTable, datePartitionFormat);
            InternalTableDesc internalTable = new InternalTableDesc(originTable);
            createInternalTablePath(internalTable.generateInternalTableLocation());
            if (partitionCols != null && partitionCols.length != 0) {
                InternalTablePartition tablePartition = new InternalTablePartition(partitionCols, datePartitionFormat);
                internalTable.setTablePartition(tablePartition);
            }
            internalTable.setTblProperties(properties);
            internalTable.optimizeTblProperties();
            internalTable.setStorageType(storageType);
            internalTable.setLocation(internalTable.generateInternalTableLocation());
            createDeltaSchema(internalTable);
            tableMetadataManager.updateTableDesc(originTable.getIdentity(),
                    copyForWrite -> copyForWrite.setHasInternal(true));
            internalTableManager.saveOrUpdateInternalTable(internalTable);
            return true;
        }, projectName);
    }

    public void checkParameters(String[] partitionCols, TableDesc originTable, String datePartitionFormat)
            throws Exception {
        if (!Objects.isNull(partitionCols)) {
            List partitionColList = Arrays.stream(partitionCols)
                    .map(col -> originTable.findColumnByName(col)).filter(col -> col != null)
                    .collect(Collectors.toList());
            // exist unmatched partition columns
            if (partitionCols.length != partitionColList.size()) {
                String errorMsg = String.format(Locale.ROOT, MsgPicker.getMsg().getPartitionColumnNotExist(),
                        originTable.getIdentity());
                throw new KylinException(INVALID_INTERNAL_TABLE_PARAMETER, errorMsg);
            }
            Optional dateCol = partitionColList.stream().filter(col -> col.getTypeName().equals("date"))
                    .findFirst();
            if (StringUtils.isEmpty(datePartitionFormat) && dateCol.isPresent()) {
                throw new KylinException(EMPTY_PARAMETER, "date_partition_format can not be null, please check again");
            }

            if (dateCol.isPresent() && !StringUtils.isEmpty(datePartitionFormat)) {
                boolean isFormatMatchRealDataFormat = true;
                try {
                    // If the source table is empty, the true format cannot be obtained
                    isFormatMatchRealDataFormat = tableService.getPartitionColumnFormat(originTable.getProject(),
                            originTable.getIdentity(), dateCol.get().getName(), null).equals(datePartitionFormat);
                } catch (KylinException kylinException) {
                    logger.warn("Cannot get the real data format, skip the date format check", kylinException);
                    // other non kylin-exception will throw out
                }
                if (!isFormatMatchRealDataFormat) {
                    String errorMsg = String.format(Locale.ROOT, MsgPicker.getMsg().getIncorrectDateformat(),
                            datePartitionFormat);
                    throw new KylinException(INVALID_INTERNAL_TABLE_PARAMETER, errorMsg);
                }
            }
        }
    }

    public void createDeltaSchema(InternalTableDesc internalTable) throws Exception {
        try {
            if (internalTable.getStorageType() == InternalTableDesc.StorageType.GLUTEN
                    || internalTable.getStorageType() == InternalTableDesc.StorageType.DELTALAKE) {
                Option defaultSession = SparkSession.getDefaultSession();
                InternalTableLoader internalTableLoader = new InternalTableLoader();
                internalTableLoader.onlyLoadSchema(true);
                internalTableLoader.loadInternalTable(defaultSession.get(), internalTable, "true", "", "",
                        KylinConfig.getInstanceFromEnv().getGlutenStoragePolicy(), false);
            }
        } catch (Exception e) {
            // delete delta log on hdfs
            HadoopUtil.deletePath(HadoopUtil.getCurrentConfiguration(), new Path(internalTable.getLocation()));
            throw e;
        }
    }

    /**
     * create an internal table with source table
     * and no partition columns and table properties specified
     *
     * @param project
     * @param originTable
     * @param storageType
     */
    public void createInternalTable(String project, TableDesc originTable, String storageType) throws Exception {
        createInternalTable(project, originTable.getName(), originTable.getDatabase(), null, null, new HashMap<>(),
                storageType);
    }

    public void updateInternalTable(String project, String table, String database, String[] partitionCols,
            String datePartitionFormat, Map tblProperties, String storageType) {
        aclEvaluate.checkProjectWritePermission(project);
        EnhancedUnitOfWork.doInTransactionWithCheckAndRetry(() -> {
            String dbTblName = database + "." + table;
            NTableMetadataManager tableMetadataManager = getManager(NTableMetadataManager.class, project);
            InternalTableManager internalTableManager = getManager(InternalTableManager.class, project);
            TableDesc originTable = tableMetadataManager.getTableDesc(dbTblName);
            InternalTableDesc internalTable = internalTableManager.getInternalTableDesc(dbTblName);
            if (Objects.isNull(internalTable)) {
                String errorMsg = String.format(Locale.ROOT, MsgPicker.getMsg().getInternalTableNotFound(), dbTblName);
                throw new KylinException(INTERNAL_TABLE_NOT_EXIST, errorMsg);
            }
            if (internalTable.getRowCount() > 0L) {
                throw new KylinException(INTERNAL_TABLE_ERROR, "Non-empty internal table can not be updated");
            }
            checkParameters(partitionCols, originTable, datePartitionFormat);
            if (partitionCols != null && partitionCols.length != 0) {
                InternalTablePartition tablePartition = new InternalTablePartition(partitionCols, datePartitionFormat);
                internalTable.setTablePartition(tablePartition);
            } else {
                internalTable.setTablePartition(null);
            }
            internalTable.setTblProperties(tblProperties);
            internalTable.optimizeTblProperties();
            internalTable.setStorageType(storageType);
            suicideRunningInternalTableJob(project, dbTblName);
            deleteMetaAndDataInFileSystem(internalTable);
            createDeltaSchema(internalTable);
            internalTableManager.saveOrUpdateInternalTable(internalTable);
            return true;
        }, project);
    }

    protected void createInternalTablePath(String path) {
        try {
            FileSystem fs = HadoopUtil.getWorkingFileSystem();
            Path location = new Path(path);
            fs.mkdirs(location);
        } catch (IOException e) {
            throw new KylinException(INTERNAL_TABLE_ERROR, "Failed to create internal table location", e);
        }
    }

    protected void deleteMetaAndDataInFileSystem(InternalTableDesc internalTable) {
        try {
            FileSystem fs = HadoopUtil.getWorkingFileSystem();
            Path location = new Path(internalTable.getLocation());
            if (fs.exists(location)) {
                HadoopUtil.deletePath(HadoopUtil.getCurrentConfiguration(), location);
                logger.info("Successfully deleted internal table on {}", internalTable.getLocation());
            } else {
                logger.warn("Internal table {}'s root path {} is not exists, skip delete", internalTable.getIdentity(),
                        internalTable.getLocation());
            }
        } catch (IOException e) {
            logger.error("Failed to delete internal table on {}", internalTable.getLocation(), e);
        }
    }

    public void suicideRunningInternalTableJob(String project, String table) {
        try {
            ExecutableManager.getInstance(KylinConfig.getInstanceFromEnv(), project).suicideRunningJobByJobType(project,
                    table,
                    Lists.newArrayList(JobTypeEnum.INTERNAL_TABLE_BUILD.name(),
                            JobTypeEnum.INTERNAL_TABLE_REFRESH.name(),
                            JobTypeEnum.INTERNAL_TABLE_DELETE_PARTITION.name()));
        } catch (Exception e) {
            logger.warn("Failed to suicide running internal table job for table {}", table, e);
        }
    }

    // 1. delete data in filesystem
    // 2. clear internal table metadata
    // 3. change mark in source table
    public void dropInternalTable(String project, String tableIdentity) {
        aclEvaluate.checkProjectWritePermission(project);
        suicideRunningInternalTableJob(project, tableIdentity);
        EnhancedUnitOfWork.doInTransactionWithCheckAndRetry(() -> {
            NTableMetadataManager tableMetadataManager = getManager(NTableMetadataManager.class, project);
            InternalTableManager internalTableManager = getManager(InternalTableManager.class, project);
            InternalTableDesc internalTable = internalTableManager.getInternalTableDesc(tableIdentity);
            if (Objects.isNull(internalTable)) {
                String errorMsg = String.format(Locale.ROOT, MsgPicker.getMsg().getInternalTableNotFound(),
                        tableIdentity);
                throw new KylinException(TABLE_NOT_EXIST, errorMsg);
            }
            tableMetadataManager.updateTableDesc(tableIdentity, copyForWrite -> copyForWrite.setHasInternal(false));
            internalTableManager.removeInternalTable(tableIdentity);
            deleteMetaAndDataInFileSystem(internalTable);
            return true;
        }, project);

    }

    // 1. delete data in file system
    // 2. clear partition values in internal table meta
    public InternalTableLoadingJobResponse truncateInternalTable(String project, String tableIdentity)
            throws Exception {
        aclEvaluate.checkProjectWritePermission(project);
        InternalTableManager internalTableManager = getManager(InternalTableManager.class, project);
        InternalTableDesc internalTable = internalTableManager.getInternalTableDesc(tableIdentity);
        if (Objects.isNull(internalTable)) {
            String errorMsg = String.format(Locale.ROOT, MsgPicker.getMsg().getInternalTableNotFound(), tableIdentity);
            throw new KylinException(INTERNAL_TABLE_NOT_EXIST, errorMsg);
        }
        suicideRunningInternalTableJob(project, tableIdentity);
        long start = System.currentTimeMillis();
        deleteMetaAndDataInFileSystem(internalTable);
        createDeltaSchema(internalTable);
        val fs = HadoopUtil.getWorkingFileSystem();
        // -1 indicates that an error occurred while obtaining files statistics
        long storageSize = -1;
        try {
            storageSize = HadoopUtil.getContentSummary(fs, new Path(internalTable.getLocation())).getLength();
        } catch (IOException e) {
            logger.warn("Fetch storage size for internal table {} from {} failed caused by:",
                    internalTable.getIdentity(), internalTable.getLocation(), e);
        }
        long finalStorageSize = storageSize;
        EnhancedUnitOfWork.doInTransactionWithCheckAndRetry(() -> {
            InternalTableManager img = getManager(InternalTableManager.class, project);
            InternalTableDesc oldTable = img.getInternalTableDesc(tableIdentity);
            InternalTablePartition tablePartition = oldTable.getTablePartition();
            if (tablePartition != null) {
                tablePartition.setPartitionValues(new ArrayList<>());
                tablePartition.setPartitionDetails(new ArrayList<>());
            }
            oldTable.setStorageSize(finalStorageSize);
            oldTable.setRowCount(0);
            img.saveOrUpdateInternalTable(oldTable);
            return true;
        }, project);
        logger.info("Successfully truncate internal table {} in {} ms", tableIdentity,
                System.currentTimeMillis() - start);
        return InternalTableLoadingJobResponse.of(new ArrayList<>(), "");
    }

    // 1. delete partition data in file system
    // 2. update partition values in internal table meta
    // we shall do this delete action by a spark job and call delta delete api
    // so that the delta meta could be updated!
    public void dropPartitionsOnDeltaTable(String project, String tableIdentity,
            String[] partitionValues, String yarnQueue) throws IOException {
        aclEvaluate.checkProjectWritePermission(project);
        internalTableLoadingService.dropPartitions(project, partitionValues, tableIdentity, yarnQueue);
    }

    public void reloadInternalTableSchema(String project, String tableIdentity) throws Exception {
        aclEvaluate.checkProjectWritePermission(project);
        InternalTableManager internalTableManager = getManager(InternalTableManager.class, project);
        InternalTableDesc internalTable = internalTableManager.getInternalTableDesc(tableIdentity);
        if (internalTable != null) {
            if (internalTable.getRowCount() != 0) {
                throw new KylinException(INTERNAL_TABLE_RELOAD_ERROR,
                        String.format(Locale.ROOT,
                                MsgPicker.getMsg().getFailedReloadNoneEmptyInternalTable(), tableIdentity));
            }
            dropInternalTable(project, tableIdentity);
            createInternalTable(project, tableIdentity, internalTable.getPartitionColumns(),
                    internalTable.getDatePartitionFormat(), internalTable.getTblProperties(),
                    internalTable.getStorageType().toString());
        }
    }

    /**
     * @param project
     * @param table
     * @param isIncremental
     * @param startDate
     * @param endDate
     * @param yarnQueue     if not null, use hadoop yarn resource to build, else use spark standalone
     * @return
     * @throws Exception
     */
    public InternalTableLoadingJobResponse loadIntoInternalTable(String project, String table, String database,
            boolean isIncremental, boolean isRefresh, String startDate, String endDate, String yarnQueue) {
        aclEvaluate.checkProjectWritePermission(project);
        if (isIncremental) {
            DataRangeUtils.validateRange(startDate, endDate);
        }
        return internalTableLoadingService.loadIntoInternalTable(project, table, database, isIncremental, isRefresh,
                startDate, endDate, yarnQueue);
    }

    public List getTableList(String project) {
        InternalTableManager internalTableManager = getManager(InternalTableManager.class, project);
        List tableList = internalTableManager.listAllTables();
        List descList = Lists.newArrayList();
        tableList.forEach(internalTableDesc -> {
            InternalTableDescResponse internalTableDescResponse = new InternalTableDescResponse();
            internalTableDescResponse.setTableName(internalTableDesc.getName());
            internalTableDescResponse.setUuid(internalTableDesc.getUuid());
            internalTableDescResponse.setDatabaseName(internalTableDesc.getDatabase());
            internalTableDescResponse.setRowCount(internalTableDesc.getRowCount());
            internalTableDescResponse.setStorageSize(internalTableDesc.getStorageSize());
            internalTableDescResponse.setHitCount(internalTableDesc.getHitCount());
            String[] partitionColumns = internalTableDesc.getPartitionColumns();
            String partitionColumn = (partitionColumns == null || partitionColumns.length == 0) ? null
                    : internalTableDesc.getPartitionColumns()[0];
            internalTableDescResponse.setTimePartitionCol(partitionColumn);
            internalTableDescResponse.setUpdateTime(internalTableDesc.getLastModified());
            internalTableDescResponse.setDatePartitionFormat(internalTableDesc.getDatePartitionFormat());
            internalTableDescResponse.setTblProperties(internalTableDesc.getTblProperties());
            descList.add(internalTableDescResponse);
        });
        return descList;
    }

    public List getTableDetail(String project, String databaseName, String tableName) {
        InternalTableManager internalTableManager = getManager(InternalTableManager.class, project);
        String tableIdentity = databaseName + "." + tableName;
        InternalTableDesc internalTableDesc = internalTableManager.getInternalTableDesc(tableIdentity);
        if (internalTableDesc == null) {
            throw new KylinException(INTERNAL_TABLE_NOT_EXIST,
                    String.format(Locale.ROOT, MsgPicker.getMsg().getInternalTableNotFound(), tableIdentity));
        }
        if (internalTableDesc.getTablePartition() == null) {
            return null;
        }
        return internalTableDesc.getTablePartition().getPartitionDetails();
    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy