All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.kylin.rest.service.ScheduleService Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.kylin.rest.service;

import static org.apache.kylin.common.constant.Constants.BACKSLASH;
import static org.apache.kylin.common.constant.Constants.METADATA_FILE;
import static org.apache.kylin.common.constant.HttpConstant.HTTP_VND_APACHE_KYLIN_V4_PUBLIC_JSON;
import static org.apache.kylin.common.exception.KylinException.CODE_SUCCESS;
import static org.apache.kylin.common.exception.KylinException.CODE_UNDEFINED;
import static org.apache.kylin.common.persistence.ResourceStore.GLOBAL_PROJECT;
import static org.apache.kylin.job.factory.JobFactoryConstant.ROUTINE_JOB_FACTORY;

import java.io.IOException;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Optional;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.concurrent.atomic.AtomicReference;
import java.util.stream.Collectors;

import javax.servlet.http.HttpServletRequest;

import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.http.HttpStatus;
import org.apache.kylin.common.KylinConfig;
import org.apache.kylin.common.exception.KylinException;
import org.apache.kylin.common.exception.KylinRuntimeException;
import org.apache.kylin.common.metrics.MetricsCategory;
import org.apache.kylin.common.metrics.MetricsGroup;
import org.apache.kylin.common.metrics.MetricsName;
import org.apache.kylin.common.response.RestResponse;
import org.apache.kylin.common.util.AddressUtil;
import org.apache.kylin.common.util.JsonUtil;
import org.apache.kylin.common.util.NamedThreadFactory;
import org.apache.kylin.common.util.Pair;
import org.apache.kylin.common.util.RandomUtil;
import org.apache.kylin.common.util.SetThreadName;
import org.apache.kylin.guava30.shaded.common.collect.Lists;
import org.apache.kylin.guava30.shaded.common.collect.Maps;
import org.apache.kylin.helper.RoutineToolHelper;
import org.apache.kylin.job.execution.ExecutableManager;
import org.apache.kylin.job.execution.JobTypeEnum;
import org.apache.kylin.job.factory.JobFactory;
import org.apache.kylin.job.util.JobContextUtil;
import org.apache.kylin.metadata.project.NProjectManager;
import org.apache.kylin.metadata.project.ProjectInstance;
import org.apache.kylin.metadata.resourcegroup.KylinInstance;
import org.apache.kylin.metadata.resourcegroup.RequestTypeEnum;
import org.apache.kylin.metadata.resourcegroup.ResourceGroupManager;
import org.apache.kylin.metadata.resourcegroup.ResourceGroupMappingInfo;
import org.apache.kylin.rest.response.EnvelopeResponse;
import org.apache.kylin.rest.response.ServerInfoResponse;
import org.apache.kylin.tool.garbage.LogCleaner;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.http.HttpEntity;
import org.springframework.http.HttpHeaders;
import org.springframework.http.HttpMethod;
import org.springframework.http.ResponseEntity;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Service;
import org.springframework.web.client.RestTemplate;

import com.fasterxml.jackson.core.type.TypeReference;

import lombok.val;
import lombok.extern.slf4j.Slf4j;

@Slf4j
@Service
public class ScheduleService extends BasicService {

    private static final String GLOBAL = "global";

    private static final String CLEAN_SPARDER_EVENT_LOG = "http://%s/kylin/api/system/clean_sparder_event_log";

    static {
        JobFactory.register(ROUTINE_JOB_FACTORY, new RoutineJob.RoutineJobFactory());
    }

    @Autowired
    @Qualifier("normalRestTemplate")
    RestTemplate restTemplate;

    @Autowired
    FileService fileService;

    @Autowired
    MetadataBackupService backupService;

    @Autowired
    ProjectService projectService;

    private final ExecutorService executors = Executors
            .newSingleThreadExecutor(new NamedThreadFactory("RoutineTaskScheduler"));
    private final ExecutorService asyncExecutors = new ThreadPoolExecutor(20, 20, 30, TimeUnit.MINUTES,
            new LinkedBlockingQueue<>(), new NamedThreadFactory("RoutineBroadcastScheduler"));

    private long opsCronTimeout;
    private String tmpMetadataBackupFilePath;

    private static final ThreadLocal> CURRENT_FUTURE = new ThreadLocal<>();

    private static final Map, Long> ASYNC_FUTURES = Maps.newConcurrentMap();

    @Scheduled(cron = "${kylin.metadata.ops-cron:0 0 0 * * *}")
    public void routineTask() {
        submitJob();
    }

    private void submitJob() {
        if (!JobContextUtil.getJobContext(KylinConfig.getInstanceFromEnv()).getJobScheduler().isMaster()) {
            log.info("Not master node, skip submitting routine job");
            return;
        }
        List projects = NProjectManager.getInstance(KylinConfig.getInstanceFromEnv()).listAllProjects().stream()
                .map(ProjectInstance::getName).collect(Collectors.toList());
        projects.add(GLOBAL_PROJECT);
        for (String project : projects) {
            ExecutableManager manager = ExecutableManager.getInstance(KylinConfig.getInstanceFromEnv(), project);
            manager.checkAndSubmitCronJob(ROUTINE_JOB_FACTORY, JobTypeEnum.ROUTINE);
        }
        log.info("Successfully create garbage cleanup jobs.");
    }

    public void doRoutineTaskForGlobal() {
        doTask(() -> {
            log.info("Start to work");
            val kylinConfig = KylinConfig.getInstanceFromEnv();
            long startTime = System.currentTimeMillis();
            MetricsGroup.hostTagCounterInc(MetricsName.METADATA_OPS_CRON, MetricsCategory.GLOBAL, GLOBAL);
            try (SetThreadName ignored = new SetThreadName("RoutineOpsWorker")) {
                AtomicReference> backupFolder = new AtomicReference<>(null);
                broadcastCleanSparderEventLogToAllNodes();
                executeTask(() -> backupFolder.set(backupService.backupAll()), "MetadataBackup", startTime);
                executeMetadataBackupInTenantMode(kylinConfig, startTime, backupFolder);
                executeTask(() -> RoutineToolHelper.cleanQueryHistoriesAsync(getRemainingTime(startTime),
                        TimeUnit.MILLISECONDS), "QueryHistoriesCleanup", startTime);
                executeTask(RoutineToolHelper::cleanStreamingStats, "StreamingStatsCleanup", startTime);
                executeTask(RoutineToolHelper::deleteRawRecItems, "RawRecItemsDeletion", startTime);
                executeTask(RoutineToolHelper::cleanGlobalSourceUsage, "SourceUsageCleanup", startTime);
                executeTask(() -> projectService.cleanupAcl(), "AclCleanup", startTime);
                executeTask(() -> projectService.cleanRawRecForDeletedProject(), "RawRecCleanup", startTime);

                // clean storage
                executeTask(RoutineToolHelper::cleanStorageForRoutine, "HdfsCleanup", startTime);
                // clear logs for stopped instance
                executeTask(() -> new LogCleaner().cleanUp(), "RemoteLogCleanup", startTime);
                log.info("Finish to work for global, cost {}ms", System.currentTimeMillis() - startTime);
            }
            return true;
        });
    }

    public void doRoutineTaskForProject(String project) {
        doTask(() -> {
            log.info("Start to work");
            long startTime = System.currentTimeMillis();
            MetricsGroup.hostTagCounterInc(MetricsName.METADATA_OPS_CRON, MetricsCategory.GLOBAL, GLOBAL);
            try (SetThreadName ignored = new SetThreadName("RoutineOpsWorker")) {
                executeTask(() -> projectService.garbageCleanup(project, getRemainingTime(startTime)),
                        "ProjectGarbageCleanup", startTime);
                // clean storage
                executeTask(() -> RoutineToolHelper.cleanEventLog(RoutineToolHelper.CleanType.SPARK, project),
                        "EventLogCleanup", startTime);
                log.info("Finish to work for project {}, cost {}ms", project, System.currentTimeMillis() - startTime);
            }
            return true;
        });
    }

    private void doTask(Callable callable) {
        val kylinConfig = KylinConfig.getInstanceFromEnv();
        opsCronTimeout = kylinConfig.getRoutineOpsTaskTimeOut();
        CURRENT_FUTURE.remove();
        ASYNC_FUTURES.clear();
        try {
            callable.call();
        } catch (InterruptedException e) {
            log.warn("Routine task execution interrupted", e);
            Thread.currentThread().interrupt();
        } catch (TimeoutException e) {
            log.warn("Routine task execution timeout", e);
            if (CURRENT_FUTURE.get() != null) {
                CURRENT_FUTURE.get().cancel(true);
            }
            ASYNC_FUTURES.keySet().forEach(asyncTask -> asyncTask.cancel(true));
        } catch (Exception e) {
            throw new KylinRuntimeException("Unexpected exception.", e);
        } finally {
            ASYNC_FUTURES.clear();
        }
        MetricsGroup.hostTagCounterInc(MetricsName.METADATA_OPS_CRON_SUCCESS, MetricsCategory.GLOBAL, GLOBAL);
    }

    public void executeMetadataBackupInTenantMode(KylinConfig kylinConfig, long startTime,
            AtomicReference> backupFolder) throws InterruptedException, TimeoutException {
        val rgManager = ResourceGroupManager.getInstance(kylinConfig);
        if (kylinConfig.isKylinMultiTenantEnabled() && rgManager.isResourceGroupEnabled()) {
            val servers = getResourceGroupServerNode(rgManager);
            log.info("ResourceGroupServerNode : {}", servers);
            if (servers.size() > 0) {
                try {
                    tmpMetadataBackupFilePath = "";
                    executeBroadcastMetadataBackup(() -> broadcastToServer(servers, backupFolder, startTime),
                            "broadcastMetadataBackup", startTime);
                } finally {
                    if (StringUtils.isNotBlank(tmpMetadataBackupFilePath)) {
                        fileService.deleteTmpDir(tmpMetadataBackupFilePath);
                    }
                }
                log.info("backup file path [{}] broadcast to server success", backupFolder.get().getFirst());
            }
        }
    }

    /**
     * get resource group server node without global server's resource group
     */
    public Map> getResourceGroupServerNode(ResourceGroupManager rgManager) {
        val servers = Maps.> newHashMap();
        val allResourceGroups = rgManager.getResourceGroup();
        val concurrentServer = AddressUtil.getLocalInstance();
        String concurrentServerResourceGroupId = allResourceGroups.getKylinInstances().stream()
                .filter(instance -> instance.getInstance().equals(concurrentServer))
                .map(KylinInstance::getResourceGroupId).findFirst().orElse(null);
        val buildResourceGroups = allResourceGroups.getResourceGroupMappingInfoList().stream()
                .filter(resourceGroupMappingInfo -> resourceGroupMappingInfo.getRequestType() == RequestTypeEnum.BUILD)
                .map(ResourceGroupMappingInfo::getResourceGroupId)
                .filter(groupId -> !StringUtils.equals(groupId, concurrentServerResourceGroupId))
                .collect(Collectors.toList());
        allResourceGroups.getKylinInstances().stream()
                .filter(kylinInstance -> buildResourceGroups.contains(kylinInstance.getResourceGroupId()))
                .forEach(instance -> {
                    val instances = servers.getOrDefault(instance.getResourceGroupId(), Lists.newArrayList());
                    instances.add(instance);
                    servers.put(instance.getResourceGroupId(), instances);
                });
        return servers;
    }

    public void broadcastToServer(Map> servers,
            AtomicReference> backupFolder, long startTime) {
        val backupFilePath = backupFolder.get().getFirst() + BACKSLASH + METADATA_FILE;
        val backupDir = backupFolder.get().getSecond();
        try {
            val tmpFileMessage = fileService.saveMetadataBackupInTmpPath(backupFilePath);
            tmpMetadataBackupFilePath = tmpFileMessage.getFirst();
            val tmpFileLength = tmpFileMessage.getSecond();
            for (Map.Entry> entry : servers.entrySet()) {
                val kylinInstances = entry.getValue();
                if (CollectionUtils.isNotEmpty(kylinInstances)) {
                    val server = kylinInstances.get(RandomUtil.nextInt(kylinInstances.size()));
                    log.info("routineTask[broadcastMetadataBackup] execute to groupId [{}] server [{}]", entry.getKey(),
                            server.getInstance());
                    executeAsyncTask(
                            () -> broadcastToTenantNode(entry.getKey(), backupDir, tmpMetadataBackupFilePath,
                                    tmpFileLength, server.getInstance()),
                            "broadcastToTenantNode-GroupIs[" + entry.getKey() + "]", startTime);
                }
            }
        } catch (IOException e) {
            log.error("backup file path [{}] broadcast to server has error. reason:", backupFilePath, e);
        }
    }

    private void broadcastCleanSparderEventLogToAllNodes() {
        List allNodes = clusterManager.getServers();

        try {
            for (ServerInfoResponse node : allNodes) {
                val url = String.format(Locale.ROOT, CLEAN_SPARDER_EVENT_LOG, node.getHost());
                log.info("Start broadcasting to clean the sparder event log of {}", url);

                val httpHeaders = new HttpHeaders();
                httpHeaders.add(HttpHeaders.CONTENT_TYPE, HTTP_VND_APACHE_KYLIN_V4_PUBLIC_JSON);
                val response = restTemplate.exchange(url, HttpMethod.DELETE, new HttpEntity<>(httpHeaders),
                        String.class);
                receive(response, "noticeToQueryNode");
            }
        } catch (Exception e) {
            log.error("Broadcast cleaning sparder event log failed!", e);
        }
    }

    private void receive(ResponseEntity response, String msg) throws IOException {
        val responseStatus = response.getStatusCodeValue();
        if (responseStatus != HttpStatus.SC_OK) {
            log.error("{} failed, HttpStatus is {}", msg, responseStatus);
        }

        val responseBody = Optional.ofNullable(response.getBody()).orElse("");
        val responseJson = JsonUtil.readValue(responseBody, new TypeReference>() {
        });
        if (!StringUtils.equals(responseJson.getCode(), KylinException.CODE_SUCCESS)) {
            log.error("{} failed, response code is {}", msg, responseJson.getCode());
        }
    }

    public void broadcastToTenantNode(String resourceGroupId, String backupDir, String tmpFilePath, long tmpFileLength,
            String host) {
        try {
            val url = String.format(Locale.ROOT, "http://%s/kylin/api/system/broadcast_metadata_backup", host);
            val req = Maps.newHashMap();
            req.put("resource_group_id", resourceGroupId);
            req.put("tmp_file_path", tmpFilePath);
            req.put("tmp_file_size", tmpFileLength);
            req.put("backup_dir", backupDir);
            req.put("from_host", AddressUtil.getLocalInstance());
            val httpHeaders = new HttpHeaders();
            httpHeaders.add(HttpHeaders.CONTENT_TYPE, HTTP_VND_APACHE_KYLIN_V4_PUBLIC_JSON);
            val exchange = restTemplate.exchange(url, HttpMethod.POST,
                    new HttpEntity<>(JsonUtil.writeValueAsBytes(req), httpHeaders), String.class);
            receive(exchange, "noticeToTenantNode");
        } catch (IOException e) {
            log.error(e.getMessage(), e);
        }
    }

    public void executeTask(Runnable task, String taskName, long startTime)
            throws InterruptedException, TimeoutException {
        val future = executors.submit(task);
        val remainingTime = getRemainingTime(startTime);
        log.info("execute task {} with remaining time: {} ms", taskName, remainingTime);
        CURRENT_FUTURE.set(future);
        try {
            future.get(remainingTime, TimeUnit.MILLISECONDS);
        } catch (ExecutionException e) {
            log.warn("Routine task {} execution failed, reason:", taskName, e);
        }
    }

    public void executeBroadcastMetadataBackup(Runnable task, String taskName, long startTime)
            throws InterruptedException, TimeoutException {
        executeTask(task, taskName, startTime);
        cancelTimeoutAsyncTask(startTime);
    }

    public void cancelTimeoutAsyncTask(long startTime) throws InterruptedException {
        while (ASYNC_FUTURES.size() > 0) {
            ASYNC_FUTURES.forEach((asyncTask, start) -> {
                if (getRemainingTime(start) <= 0) {
                    asyncTask.cancel(true);
                }
            });
            val doneTaskCount = ASYNC_FUTURES.keySet().stream().filter(Future::isDone).count();
            if (doneTaskCount == ASYNC_FUTURES.size()) {
                log.info("all running asyncTask[broadcastToServer] is done");
                break;
            }
            if (getRemainingTime(startTime) <= 0) {
                log.warn("cancel all running asyncTask, DoneAsyncTask count: [{}], AllAsyncTask count : [{}]",
                        doneTaskCount, ASYNC_FUTURES.size());
                ASYNC_FUTURES.keySet().stream().filter(asyncTask -> !asyncTask.isDone())
                        .forEach(asyncTask -> asyncTask.cancel(true));
                break;
            }
            TimeUnit.SECONDS.sleep(10);
        }
    }

    public void executeAsyncTask(Runnable task, String taskName, long startTime) {
        val future = asyncExecutors.submit(task);
        val remainingTime = getRemainingTime(startTime);
        log.info("execute async task {} with remaining time: {} ms", taskName, remainingTime);
        ASYNC_FUTURES.put(future, System.currentTimeMillis());
    }

    private long getRemainingTime(long startTime) {
        return opsCronTimeout - (System.currentTimeMillis() - startTime);
    }

    public Pair triggerAllCleanupGarbage(HttpServletRequest request) {
        String jobMaster = JobContextUtil.getJobContext(KylinConfig.getInstanceFromEnv()).getJobScheduler()
                .getJobMaster();

        StringBuilder msg = new StringBuilder();
        Pair result = new Pair<>();
        result.setFirst(CODE_SUCCESS);
        String url = "http://" + jobMaster + "/kylin/api/system/do_cleanup_garbage";
        try {
            EnvelopeResponse response = generateTaskForRemoteHost(request, url);
            if (response.getCode().equals(CODE_SUCCESS)) {
                msg.append(jobMaster).append(":").append("triggered successfully").append(";");
            }
            if (response.getCode().equals(CODE_UNDEFINED)) {
                result.setFirst(CODE_UNDEFINED);
                msg.append(jobMaster).append(":").append("triggered failed").append(response.getMsg()).append(";");
            }
        } catch (Exception e) {
            msg.append(jobMaster).append(":").append("triggered failed: ").append(e.getMessage()).append(";");
        }
        result.setSecond(msg.toString());
        return result;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy