All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.kylin.job.service.InternalTableLoadingService Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.kylin.job.service;

import static org.apache.kylin.common.exception.ServerErrorCode.INTERNAL_TABLE_ERROR;
import static org.apache.kylin.common.exception.ServerErrorCode.INTERNAL_TABLE_NOT_EXIST;
import static org.apache.kylin.common.exception.ServerErrorCode.INTERNAL_TABLE_PARTITION_NOT_EXIST;
import static org.apache.kylin.common.exception.ServerErrorCode.INVALID_INTERNAL_TABLE_PARAMETER;
import static org.apache.kylin.job.execution.JobTypeEnum.INTERNAL_TABLE_BUILD;
import static org.apache.kylin.job.execution.JobTypeEnum.INTERNAL_TABLE_REFRESH;

import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.List;
import java.util.Locale;
import java.util.Objects;
import java.util.Set;

import org.apache.commons.lang3.StringUtils;
import org.apache.kylin.common.exception.KylinException;
import org.apache.kylin.common.msg.MsgPicker;
import org.apache.kylin.common.util.TimeUtil;
import org.apache.kylin.engine.spark.builder.InternalTableLoader;
import org.apache.kylin.engine.spark.job.InternalTableLoadJob;
import org.apache.kylin.guava30.shaded.common.collect.Sets;
import org.apache.kylin.job.dao.JobStatisticsManager;
import org.apache.kylin.job.execution.JobTypeEnum;
import org.apache.kylin.job.manager.JobManager;
import org.apache.kylin.job.model.JobParam;
import org.apache.kylin.metadata.cube.model.NBatchConstants;
import org.apache.kylin.metadata.project.EnhancedUnitOfWork;
import org.apache.kylin.metadata.sourceusage.SourceUsageManager;
import org.apache.kylin.metadata.table.InternalTableDesc;
import org.apache.kylin.metadata.table.InternalTableManager;
import org.apache.kylin.metadata.table.InternalTablePartition;
import org.apache.kylin.metadata.table.InternalTablePartitionDetail;
import org.apache.kylin.rest.response.InternalTableLoadingJobResponse;
import org.apache.kylin.rest.service.BasicService;
import org.apache.spark.sql.SparderEnv;
import org.apache.spark.sql.SparkSession;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Service;
import org.springframework.util.CollectionUtils;

@Service("internalTableLoadingService")
public class InternalTableLoadingService extends BasicService {
    private static final Logger logger = LoggerFactory.getLogger(InternalTableLoadingService.class);

    public InternalTableLoadingJobResponse loadIntoInternalTable(String project, String table, String database,
            boolean isIncremental, boolean isRefresh, String startDate, String endDate, String yarnQueue) {
        JobManager.checkStorageQuota(project);
        List jobIds = new ArrayList<>();
        JobTypeEnum jobType = isRefresh ? INTERNAL_TABLE_REFRESH : INTERNAL_TABLE_BUILD;
        EnhancedUnitOfWork.doInTransactionWithCheckAndRetry(() -> {
            JobManager.checkStorageQuota(project);
            JobStatisticsManager jobStatisticsManager = JobStatisticsManager.getInstance(getConfig(), project);
            jobStatisticsManager.updateStatistics(TimeUtil.getDayStart(System.currentTimeMillis()), 0, 0, 1);
            InternalTableDesc internalTable = checkAndGetInternalTables(project, table, database);
            if (isIncremental && (Objects.isNull(internalTable.getTablePartition())
                    || Objects.isNull(internalTable.getTablePartition().getPartitionColumns())
                    || internalTable.getTablePartition().getPartitionColumns().length == 0)) {
                throw new KylinException(INTERNAL_TABLE_ERROR,
                        "Incremental build is not supported for unPartitioned table");
            }
            // check refresh time exceed loaded range
            InternalTablePartition tablePartition = internalTable.getTablePartition();
            if (jobType == INTERNAL_TABLE_REFRESH && !Objects.isNull(tablePartition)
                    && StringUtils.isNotEmpty(tablePartition.getDatePartitionFormat())
                    && !CollectionUtils.isEmpty(tablePartition.getPartitionValues())) {
                // List is sorted in ascending order
                List partitionValues = tablePartition.getPartitionValues();
                SimpleDateFormat formatter = new SimpleDateFormat(tablePartition.getDatePartitionFormat(), Locale.ROOT);
                long rangeStart = formatter.parse(partitionValues.get(0)).getTime();
                long rangeEnd = formatter.parse(partitionValues.get(partitionValues.size() - 1)).getTime();
                if ((StringUtils.isNotEmpty(startDate)
                        && (Long.parseLong(startDate) < rangeStart || Long.parseLong(startDate) > rangeEnd))
                        || (StringUtils.isNotEmpty(endDate)
                                && (Long.parseLong(endDate) > rangeEnd || Long.parseLong(endDate) < rangeStart))) {
                    String errorMsg = String.format(Locale.ROOT, MsgPicker.getMsg().getTimeExceedPartitionRange(),
                            partitionValues.get(0), partitionValues.get(partitionValues.size() - 1));
                    throw new KylinException(INVALID_INTERNAL_TABLE_PARAMETER, errorMsg);
                }
            }

            logger.info(
                    "create internal table loading job for table: {}, isIncrementBuild: {}, startTime: {}, endTime: {}",
                    internalTable.getIdentity(), isIncremental, startDate, endDate);
            JobParam jobParam = new JobParam().withProject(project).withTable(internalTable.getIdentity())
                    .withYarnQueue(yarnQueue).withJobTypeEnum(jobType).withOwner(BasicService.getUsername())
                    .addExtParams(NBatchConstants.P_INCREMENTAL_BUILD, String.valueOf(isIncremental))
                    .addExtParams(NBatchConstants.P_OUTPUT_MODE, String.valueOf(isRefresh))
                    .addExtParams(NBatchConstants.P_START_DATE, startDate)
                    .addExtParams(NBatchConstants.P_END_DATE, endDate);
            String jobId = getManager(SourceUsageManager.class).licenseCheckWrap(project,
                    () -> getManager(JobManager.class, project).addJob(jobParam));
            jobIds.add(jobId);
            return true;
        }, project);
        String jobName = isRefresh ? INTERNAL_TABLE_REFRESH.toString() : INTERNAL_TABLE_BUILD.toString();
        return InternalTableLoadingJobResponse.of(jobIds, jobName);
    }

    public void dropPartitions(String project, String[] partitionValues, String tableIdentity, String yarnQueue)
            throws IOException {
        // If internal table can not be obtained, will throw a kylin exception
        InternalTableDesc internalTable = checkAndGetInternalTables(project, tableIdentity);
        checkInternalTablePartitions(internalTable, partitionValues);
        InternalTableLoader internalTableLoader = new InternalTableLoader();
        String toBeDelete = String.join(",", partitionValues);
        SparkSession ss = SparderEnv.getSparkSession();
        long start = System.currentTimeMillis();
        logger.info("Start to drop partition for table {}", tableIdentity);
        internalTableLoader.dropPartitions(ss, internalTable, toBeDelete);
        InternalTableLoadJob internalTableLoadJob = new InternalTableLoadJob();
        InternalTableLoadJob.InternalTableMetaUpdateInfo info = internalTableLoadJob.extractUpdateInfo(project,
                internalTable.getIdentity(), getConfig(), ss);
        EnhancedUnitOfWork.doInTransactionWithCheckAndRetry(() -> {
            InternalTableManager internalTableManager = InternalTableManager.getInstance(getConfig(), project);
            InternalTableDesc oldTable = checkAndGetInternalTables(project, tableIdentity);
            InternalTablePartition tablePartition = oldTable.getTablePartition();
            tablePartition.setPartitionValues(info.getPartitionValues());
            tablePartition.setPartitionDetails(info.getPartitionDetails());
            oldTable.setRowCount(info.getFinalCount());
            internalTableManager.saveOrUpdateInternalTable(oldTable);
            return true;
        }, project);

        logger.info("Successfully drop partition[{}] for table {} in {} ms", toBeDelete, tableIdentity,
                System.currentTimeMillis() - start);
    }

    private InternalTableDesc checkAndGetInternalTables(String project, String tableIdentity) {
        InternalTableManager manager = getManager(InternalTableManager.class, project);
        InternalTableDesc internalTable = manager.getInternalTableDesc(tableIdentity);
        if (internalTable == null) {
            throw new KylinException(INTERNAL_TABLE_NOT_EXIST,
                    String.format(Locale.ROOT, MsgPicker.getMsg().getInternalTableNotFound(), tableIdentity));
        }
        return internalTable;
    }

    private InternalTableDesc checkAndGetInternalTables(String project, String table, String database) {
        String dbTableName = database + "." + table;
        return checkAndGetInternalTables(project, dbTableName);
    }

    private void checkInternalTablePartitions(InternalTableDesc internalTable, String[] partitionValues) {
        InternalTablePartition internalTablePartition = internalTable.getTablePartition();
        if (null == internalTablePartition || null == internalTablePartition.getPartitionDetails()) {
            throw new KylinException(INTERNAL_TABLE_PARTITION_NOT_EXIST, String.format(Locale.ROOT,
                    MsgPicker.getMsg().getInternalTablePartitionNotFound(), StringUtils.join(partitionValues, ',')));
        }
        Set partitionValueSet = Sets.newHashSet(partitionValues);
        List partitionList = internalTablePartition.getPartitionDetails();
        for (InternalTablePartitionDetail partitionDetail : partitionList) {
            partitionValueSet.remove(partitionDetail.getPartitionValue());
        }
        if (!partitionValueSet.isEmpty()) {
            throw new KylinException(INTERNAL_TABLE_PARTITION_NOT_EXIST, String.format(Locale.ROOT,
                    MsgPicker.getMsg().getInternalTablePartitionNotFound(), StringUtils.join(partitionValueSet, ',')));
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy