All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.kylin.rest.service.AsyncQueryJob Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.kylin.rest.service;

import static org.apache.kylin.common.persistence.MetadataType.ACL;
import static org.apache.kylin.common.persistence.MetadataType.COMPUTE_COLUMN;
import static org.apache.kylin.common.persistence.MetadataType.DATAFLOW;
import static org.apache.kylin.common.persistence.MetadataType.FUSION_MODEL;
import static org.apache.kylin.common.persistence.MetadataType.INDEX_PLAN;
import static org.apache.kylin.common.persistence.MetadataType.LAYOUT;
import static org.apache.kylin.common.persistence.MetadataType.MODEL;
import static org.apache.kylin.common.persistence.MetadataType.NON_GLOBAL_METADATA_TYPE;
import static org.apache.kylin.common.persistence.MetadataType.OBJECT_ACL;
import static org.apache.kylin.common.persistence.MetadataType.PROJECT;
import static org.apache.kylin.common.persistence.MetadataType.RESOURCE_GROUP;
import static org.apache.kylin.common.persistence.MetadataType.SEGMENT;
import static org.apache.kylin.common.persistence.MetadataType.SQL_BLACKLIST;
import static org.apache.kylin.common.persistence.MetadataType.TABLE_EXD;
import static org.apache.kylin.common.persistence.MetadataType.TABLE_INFO;
import static org.apache.kylin.common.persistence.MetadataType.USER_GLOBAL_ACL;
import static org.apache.kylin.common.persistence.MetadataType.USER_GROUP;
import static org.apache.kylin.query.util.AsyncQueryUtil.ASYNC_QUERY_JOB_ID_PRE;

import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.NavigableSet;
import java.util.Properties;
import java.util.Set;

import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.kylin.common.KylinConfig;
import org.apache.kylin.common.KylinConfigExt;
import org.apache.kylin.common.QueryContext;
import org.apache.kylin.common.exception.KylinRuntimeException;
import org.apache.kylin.common.extension.KylinInfoExtension;
import org.apache.kylin.common.persistence.MetadataType;
import org.apache.kylin.common.persistence.RawResourceFilter;
import org.apache.kylin.common.persistence.ResourceStore;
import org.apache.kylin.common.util.BufferedLogger;
import org.apache.kylin.common.util.CliCommandExecutor;
import org.apache.kylin.common.util.HadoopUtil;
import org.apache.kylin.common.util.JsonUtil;
import org.apache.kylin.engine.spark.job.DefaultSparkBuildJobHandler;
import org.apache.kylin.guava30.shaded.common.annotations.VisibleForTesting;
import org.apache.kylin.guava30.shaded.common.collect.Maps;
import org.apache.kylin.job.exception.ExecuteException;
import org.apache.kylin.job.execution.ExecuteResult;
import org.apache.kylin.job.execution.JobTypeEnum;
import org.apache.kylin.job.execution.NSparkExecutable;
import org.apache.kylin.metadata.cube.model.NBatchConstants;
import org.apache.kylin.query.util.QueryParams;
import org.apache.kylin.util.DumpInfo;
import org.apache.kylin.util.MetadataDumpUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.SerializationFeature;

import lombok.val;

public class AsyncQueryJob extends NSparkExecutable {

    private static final Logger logger = LoggerFactory.getLogger(AsyncQueryJob.class);
    private static final MetadataType[] META_DUMP_LIST = new MetadataType[] { DATAFLOW, LAYOUT, INDEX_PLAN, MODEL,
            TABLE_INFO, TABLE_EXD, USER_GLOBAL_ACL, ACL, OBJECT_ACL, PROJECT, COMPUTE_COLUMN, SEGMENT, USER_GROUP,
            SQL_BLACKLIST, FUSION_MODEL, RESOURCE_GROUP };

    public AsyncQueryJob() {
        super();
    }

    public AsyncQueryJob(Object notSetId) {
        super(notSetId);
    }

    @Override
    protected void initHandler() {
        sparkJobHandler = new DefaultSparkBuildJobHandler();
    }

    @Override
    protected ExecuteResult runSparkSubmit(String hadoopConf, String kylinJobJar, String appArgs) {
        val patternedLogger = new BufferedLogger(logger);
        try {
            killOrphanApplicationIfExists(getId());
            val desc = getSparkAppDesc();
            desc.setHadoopConfDir(hadoopConf);
            desc.setKylinJobJar(kylinJobJar);
            desc.setAppArgs(appArgs);
            String cmd = (String) sparkJobHandler.generateSparkCmd(KylinConfig.getInstanceFromEnv(), desc);
            CliCommandExecutor exec = getCliCommandExecutor();
            CliCommandExecutor.CliCmdExecResult r = exec.execute(cmd, patternedLogger, getId());
            return ExecuteResult.createSucceed(r.getCmd());
        } catch (Exception e) {
            return ExecuteResult.createError(e);
        }
    }

    @VisibleForTesting
    public CliCommandExecutor getCliCommandExecutor() {
        return new CliCommandExecutor();
    }

    @Override
    protected Map getSparkConfigOverride(KylinConfig config) {
        Map overrides = config.getAsyncQuerySparkConfigOverride();

        if (StringUtils.isNotEmpty(getParam(NBatchConstants.P_QUERY_QUEUE))) {
            // async query spark queue priority: request param > project config > system config
            overrides.put("spark.yarn.queue", getParam(NBatchConstants.P_QUERY_QUEUE));
        }

        if (!overrides.containsKey("spark.driver.memory")) {
            overrides.put("spark.driver.memory", "1024m");
        }

        if (UserGroupInformation.isSecurityEnabled()) {
            overrides.put("spark.hadoop.hive.metastore.sasl.enabled", "true");
        }
        return overrides;
    }

    @Override
    protected String getJobNamePrefix() {
        return "";
    }

    @Override
    protected String getExtJar() {
        return getConfig().getKylinExtJarsPath();
    }

    @Override
    public String getId() {
        return ASYNC_QUERY_JOB_ID_PRE + super.getId();
    }

    public ExecuteResult submit(QueryParams queryParams) throws ExecuteException, JsonProcessingException {
        this.setLogPath(getSparkDriverLogHdfsPath(getConfig()));
        KylinConfig originConfig = getConfig();
        HashMap overrideCopy = Maps.newHashMap(((KylinConfigExt) originConfig).getExtendedOverrides());
        if (StringUtils.isNotEmpty(queryParams.getSparkQueue())) {
            overrideCopy.put("kylin.query.async-query.spark-conf.spark.yarn.queue", queryParams.getSparkQueue());
        }
        KylinConfig config = KylinConfigExt.createInstance(originConfig, overrideCopy);
        String kylinJobJar = config.getKylinJobJarPath();
        if (StringUtils.isEmpty(kylinJobJar) && !config.isUTEnv()) {
            throw new KylinRuntimeException("Missing kylin job jar");
        }

        ObjectMapper fieldOnlyMapper = new ObjectMapper();
        fieldOnlyMapper.configure(SerializationFeature.FAIL_ON_EMPTY_BEANS, false);
        setParam(NBatchConstants.P_QUERY_PARAMS, fieldOnlyMapper.writeValueAsString(queryParams));
        setParam(NBatchConstants.P_QUERY_CONTEXT, JsonUtil.writeValueAsString(QueryContext.current()));
        setParam(NBatchConstants.P_PROJECT_NAME, getProject());
        setParam(NBatchConstants.P_QUERY_ID, QueryContext.current().getQueryId());
        setParam(NBatchConstants.P_JOB_ID, getId());
        setParam(NBatchConstants.P_JOB_TYPE, JobTypeEnum.ASYNC_QUERY.toString());
        setParam(NBatchConstants.P_QUERY_QUEUE, queryParams.getSparkQueue());
        setDistMetaUrl(config.getJobTmpMetaStoreUrl(getProject(), getId()));

        try {
            // dump kylin.properties to HDFS
            config.setQueryHistoryUrl(config.getQueryHistoryUrl().toString());
            dumpKylinProps(config);
            // dump metadata to HDFS
            DumpInfo dumpInfo = generateDumpInfo(config, DumpInfo.DumpType.ASYNC_QUERY);
            MetadataDumpUtil.dumpMetadata(dumpInfo);
        } catch (Exception e) {
            throw new ExecuteException("kylin properties or meta dump failed", e);
        }

        return runSparkSubmit(getHadoopConfDir(), kylinJobJar,
                "-className org.apache.kylin.query.engine.AsyncQueryApplication "
                        + createArgsFileOnHDFS(config, getId()));
    }

    @Override
    protected Set getMetadataDumpList(KylinConfig config) {
        ResourceStore resourceStore = ResourceStore.getKylinMetaStore(config);
        Set metadataDumpSet = new HashSet<>();
        for (MetadataType mata : META_DUMP_LIST) {
            NavigableSet metadata;
            if (NON_GLOBAL_METADATA_TYPE.contains(mata)) {
                metadata = resourceStore.listResourcesRecursively(mata.name(),
                        RawResourceFilter.equalFilter("project", getProject()));
            } else {
                metadata = resourceStore.listResourcesRecursively(mata.name());
            }

            if (metadata != null) {
                metadataDumpSet.addAll(metadata);
            }
        }
        return metadataDumpSet;
    }

    private String getHadoopConfDir() {
        KylinConfig kylinconfig = KylinConfig.getInstanceFromEnv();
        if (StringUtils.isNotEmpty(kylinconfig.getAsyncQueryHadoopConfDir())) {
            return kylinconfig.getAsyncQueryHadoopConfDir();
        }
        return HadoopUtil.getHadoopConfDir();
    }

    @Override
    public void modifyDump(Properties props) {
        super.modifyDump(props);
        if (!KylinInfoExtension.getFactory().checkKylinInfo()) {
            props.setProperty("kylin.streaming.enabled", KylinConfig.FALSE);
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy