All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.kylin.tool.daemon.checker.KEStatusChecker Maven / Gradle / Ivy

There is a newer version: 5.0.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.kylin.tool.daemon.checker;

import java.util.List;
import java.util.Locale;

import org.apache.commons.collections.CollectionUtils;
import org.apache.kylin.common.exception.KylinException;
import org.apache.kylin.common.util.SecretKeyUtil;
import org.apache.kylin.tool.daemon.CheckResult;
import org.apache.kylin.tool.daemon.CheckStateEnum;
import org.apache.kylin.tool.util.ToolUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.core.type.TypeReference;
import com.google.common.annotations.VisibleForTesting;

import lombok.AllArgsConstructor;
import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.Setter;

public class KEStatusChecker extends AbstractHealthChecker {
    public static final String PERMISSION_DENIED = "Check permission failed!";
    private static final Logger logger = LoggerFactory.getLogger(KEStatusChecker.class);
    private int failCount = 0;

    public KEStatusChecker() {
        setPriority(100000);
    }

    /**
     * KE restart, KG have to restart.
     * @return
     */
    private byte[] getEncryptedTokenForKAPHealth() throws Exception {
        try {
            if (null == getKgSecretKey()) {
                setKgSecretKey(SecretKeyUtil.readKGSecretKeyFromFile());
            }

            if (null == getKePid()) {
                setKEPid(ToolUtil.getKylinPid());
            }
            return SecretKeyUtil.generateEncryptedTokenWithPid(getKgSecretKey(), getKePid());
        } catch (Exception e) {
            logger.error("Read KG secret key from file failed.");
            throw e;
        }
    }

    @VisibleForTesting
    public EnvelopeResponse getHealthStatus() throws Exception {
        TypeReference> typeRef = new TypeReference>() {
        };
        byte[] encryptedToken = getEncryptedTokenForKAPHealth();
        return getRestClient().getKapHealthStatus(typeRef, encryptedToken);
    }

    @Override
    CheckResult doCheck() {
        try {
            EnvelopeResponse response = getHealthStatus();
            if (!KylinException.CODE_SUCCESS.equals(response.code)) {
                if (PERMISSION_DENIED.equals(response.getMsg())) {
                    setKgSecretKey(null);
                }

                throw new IllegalStateException("Get KE health status failed: " + response.msg);
            }

            Status status = response.getData();

            StringBuilder sb = new StringBuilder();

            boolean sparkRestart = false;
            boolean slowQueryRestart = false;

            SparkStatus sparkStatus = status.getSparkStatus();
            if (getKylinConfig().isSparkFailRestartKeEnabled()
                    && sparkStatus.getFailureTimes() >= getKylinConfig().getGuardianSparkFailThreshold()) {
                sparkRestart = true;
                sb.append(String.format(Locale.ROOT,
                        "Spark restart failure reach %s times, last restart failure time %s. ",
                        getKylinConfig().getGuardianSparkFailThreshold(), sparkStatus.getLastFailureTime()));
            }

            List slowQueryStatusList = status.getCanceledSlowQueryStatus();
            if (CollectionUtils.isNotEmpty(slowQueryStatusList)) {
                long failedKillQueries = slowQueryStatusList.stream().filter(slowQueryStatus -> slowQueryStatus
                        .getCanceledTimes() >= getKylinConfig().getGuardianSlowQueryKillFailedThreshold()).count();

                if (getKylinConfig().isSlowQueryKillFailedRestartKeEnabled() && failedKillQueries > 0) {
                    slowQueryRestart = true;
                    sb.append(String.format(Locale.ROOT, "Have slowQuery be canceled reach %s times. ",
                            getKylinConfig().getGuardianSparkFailThreshold()));
                }
            }

            if (sparkRestart || slowQueryRestart) {
                return new CheckResult(CheckStateEnum.RESTART, sb.toString());
            }

            failCount = 0;
            return new CheckResult(CheckStateEnum.NORMAL);
        } catch (Exception e) {
            logger.info("Check KE status failed! ", e);

            if (++failCount >= getKylinConfig().getGuardianApiFailThreshold()) {
                return new CheckResult(CheckStateEnum.RESTART, String.format(Locale.ROOT,
                        "Instance is in inaccessible status, API failed count reach %d", failCount));
            } else {
                return new CheckResult(CheckStateEnum.WARN, e.getMessage());
            }
        }
    }

    @Getter
    @Setter
    @AllArgsConstructor
    @NoArgsConstructor
    public static class EnvelopeResponse {
        protected String code;
        protected T data;
        protected String msg;
    }

    @Getter
    @Setter
    @AllArgsConstructor
    @NoArgsConstructor
    public static class Status {
        @JsonProperty("spark_status")
        private SparkStatus sparkStatus;

        @JsonProperty("slow_queries_status")
        private List canceledSlowQueryStatus;
    }

    @Getter
    @Setter
    @AllArgsConstructor
    @NoArgsConstructor
    public static class SparkStatus {
        @JsonProperty("restart_failure_times")
        private int failureTimes;
        @JsonProperty("last_restart_failure_time")
        private long lastFailureTime;
    }

    @Getter
    @Setter
    @AllArgsConstructor
    @NoArgsConstructor
    public static class CanceledSlowQueryStatus {
        @JsonProperty("query_id")
        private String queryId;
        @JsonProperty("canceled_times")
        private int canceledTimes;
        @JsonProperty("last_canceled_time")
        private long lastCanceledTime;
        @JsonProperty("duration_time")
        private float queryDurationTime;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy