All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.kylin.engine.mr.CubingJob Maven / Gradle / Ivy

There is a newer version: 3.1.3
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * 
 *     http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
*/

package org.apache.kylin.engine.mr;

import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.TimeZone;
import java.util.regex.Matcher;

import org.apache.kylin.common.KylinConfig;
import org.apache.kylin.common.util.Pair;
import org.apache.kylin.common.util.StringUtil;
import org.apache.kylin.cube.CubeInstance;
import org.apache.kylin.cube.CubeManager;
import org.apache.kylin.cube.CubeSegment;
import org.apache.kylin.engine.mr.common.MapReduceExecutable;
import org.apache.kylin.engine.mr.steps.CubingExecutableUtil;
import org.apache.kylin.job.constant.ExecutableConstants;
import org.apache.kylin.job.engine.JobEngineConfig;
import org.apache.kylin.job.execution.AbstractExecutable;
import org.apache.kylin.job.execution.DefaultChainedExecutable;
import org.apache.kylin.job.execution.ExecutableContext;
import org.apache.kylin.job.execution.ExecutableState;
import org.apache.kylin.job.execution.ExecuteResult;
import org.apache.kylin.job.execution.Output;
import org.apache.kylin.job.metrics.JobMetricsFacade;
import org.apache.kylin.job.util.MailNotificationUtil;
import org.apache.kylin.metadata.project.ProjectInstance;
import org.apache.kylin.metadata.project.ProjectManager;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.common.base.Preconditions;
import com.google.common.base.Strings;
import com.google.common.collect.Maps;

/**
 */
public class CubingJob extends DefaultChainedExecutable {

    private static final Logger logger = LoggerFactory.getLogger(CubingJob.class);

    public enum AlgorithmEnum {
        LAYER, INMEM
    }

    public enum CubingJobTypeEnum {
        BUILD("BUILD", 20), OPTIMIZE("OPTIMIZE", 5), MERGE("MERGE", 25);

        private final String name;
        private final int defaultPriority;

        CubingJobTypeEnum(String name, int priority) {
            this.name = name;
            this.defaultPriority = priority;
        }

        public int getDefaultPriority() {
            return defaultPriority;
        }

        public String toString() {
            return name;
        }

        public static CubingJobTypeEnum getByName(String name) {
            if (Strings.isNullOrEmpty(name)) {
                return null;
            }
            for (CubingJobTypeEnum jobTypeEnum : CubingJobTypeEnum.values()) {
                if (jobTypeEnum.name.equals(name.toUpperCase(Locale.ROOT))) {
                    return jobTypeEnum;
                }
            }
            return null;
        }
    }

    //32MB per block created by the first step
    private static final long MIN_SOURCE_SIZE = 33554432L;

    // KEYS of Output.extraInfo map, info passed across job steps
    public static final String SOURCE_RECORD_COUNT = "sourceRecordCount";
    public static final String SOURCE_SIZE_BYTES = "sourceSizeBytes";
    public static final String CUBE_SIZE_BYTES = "byteSizeBytes";
    public static final String MAP_REDUCE_WAIT_TIME = "mapReduceWaitTime";
    private static final String DEPLOY_ENV_NAME = "envName";
    private static final String PROJECT_INSTANCE_NAME = "projectName";
    private static final String JOB_TYPE = "jobType";

    public static CubingJob createBuildJob(CubeSegment seg, String submitter, JobEngineConfig config) {
        return initCubingJob(seg, CubingJobTypeEnum.BUILD.toString(), submitter, config);
    }

    public static CubingJob createOptimizeJob(CubeSegment seg, String submitter, JobEngineConfig config) {
        return initCubingJob(seg, CubingJobTypeEnum.OPTIMIZE.toString(), submitter, config);
    }

    public static CubingJob createMergeJob(CubeSegment seg, String submitter, JobEngineConfig config) {
        return initCubingJob(seg, CubingJobTypeEnum.MERGE.toString(), submitter, config);
    }

    private static CubingJob initCubingJob(CubeSegment seg, String jobType, String submitter, JobEngineConfig config) {
        KylinConfig kylinConfig = config.getConfig();
        CubeInstance cube = seg.getCubeInstance();
        List projList = ProjectManager.getInstance(kylinConfig).findProjects(cube.getType(),
                cube.getName());
        if (projList == null || projList.size() == 0) {
            throw new RuntimeException("Cannot find the project containing the cube " + cube.getName() + "!!!");
        } else if (projList.size() >= 2) {
            String msg = "Find more than one project containing the cube " + cube.getName()
                    + ". It does't meet the uniqueness requirement!!! ";
            if (!config.getConfig().allowCubeAppearInMultipleProjects()) {
                throw new RuntimeException(msg);
            } else {
                logger.warn(msg);
            }
        }

        CubingJob result = new CubingJob();
        SimpleDateFormat format = new SimpleDateFormat("z yyyy-MM-dd HH:mm:ss", Locale.ROOT);
        format.setTimeZone(TimeZone.getTimeZone(config.getTimeZone()));
        result.setDeployEnvName(kylinConfig.getDeployEnv());
        result.setProjectName(projList.get(0).getName());
        result.setJobType(jobType);
        CubingExecutableUtil.setCubeName(seg.getCubeInstance().getName(), result.getParams());
        CubingExecutableUtil.setSegmentId(seg.getUuid(), result.getParams());
        CubingExecutableUtil.setSegmentName(seg.getName(), result.getParams());
        result.setName(jobType + " CUBE - " + seg.getCubeInstance().getDisplayName() + " - " + seg.getName() + " - "
                + format.format(new Date(System.currentTimeMillis())));
        result.setSubmitter(submitter);
        result.setNotifyList(seg.getCubeInstance().getDescriptor().getNotifyList());
        return result;
    }

    public CubingJob() {
        super();
    }

    @Override
    public int getDefaultPriority() {
        CubingJobTypeEnum jobType = CubingJobTypeEnum.getByName(getJobType());
        if (jobType == null) {
            return super.getDefaultPriority();
        }
        return jobType.getDefaultPriority();
    }

    protected void setDeployEnvName(String name) {
        setParam(DEPLOY_ENV_NAME, name);
    }

    public String getDeployEnvName() {
        return getParam(DEPLOY_ENV_NAME);
    }

    protected void setProjectName(String name) {
        setParam(PROJECT_INSTANCE_NAME, name);
    }

    public String getProjectName() {
        return getParam(PROJECT_INSTANCE_NAME);
    }

    public String getJobType() {
        return getParam(JOB_TYPE);
    }

    void setJobType(String jobType) {
        setParam(JOB_TYPE, jobType);
    }

    @Override
    protected Pair formatNotifications(ExecutableContext context, ExecutableState state) {
        CubeInstance cubeInstance = CubeManager.getInstance(context.getConfig())
                .getCube(CubingExecutableUtil.getCubeName(this.getParams()));
        final Output output = getManager().getOutput(getId());
        state = output.getState();
        if (state != ExecutableState.ERROR
                && !cubeInstance.getDescriptor().getStatusNeedNotify().contains(state.toString())) {
            logger.info("state:" + state + " no need to notify users");
            return null;
        }

        if (!MailNotificationUtil.hasMailNotification(state)) {
            logger.info("Cannot find email template for job state: " + state);
            return null;
        }

        Map dataMap = Maps.newHashMap();
        dataMap.put("job_name", getName());
        dataMap.put("env_name", getDeployEnvName());
        dataMap.put("submitter", StringUtil.noBlank(getSubmitter(), "missing submitter"));
        dataMap.put("job_engine", MailNotificationUtil.getLocalHostName());
        dataMap.put("project_name", getProjectName());
        dataMap.put("cube_name", cubeInstance.getName());
        dataMap.put("source_records_count", String.valueOf(findSourceRecordCount()));
        dataMap.put("start_time", new Date(getStartTime()).toString());
        dataMap.put("duration", getDuration() / 60000 + "mins");
        dataMap.put("mr_waiting", getMapReduceWaitTime() / 60000 + "mins");
        dataMap.put("last_update_time", new Date(getLastModified()).toString());

        if (state == ExecutableState.ERROR) {
            AbstractExecutable errorTask = null;
            Output errorOutput = null;
            for (AbstractExecutable task : getTasks()) {
                errorOutput = getManager().getOutput(task.getId());
                if (errorOutput.getState() == ExecutableState.ERROR) {
                    errorTask = task;
                    break;
                }
            }
            Preconditions.checkNotNull(errorTask,
                    "None of the sub tasks of cubing job " + getId() + " is error and this job should become success.");

            dataMap.put("error_step", errorTask.getName());
            if (errorTask instanceof MapReduceExecutable) {
                final String mrJobId = errorOutput.getExtra().get(ExecutableConstants.MR_JOB_ID);
                dataMap.put("mr_job_id", StringUtil.noBlank(mrJobId, "Not initialized"));
            } else {
                dataMap.put("mr_job_id", MailNotificationUtil.NA);
            }
            dataMap.put("error_log",
                    Matcher.quoteReplacement(StringUtil.noBlank(output.getVerboseMsg(), "no error message")));
        }

        String content = MailNotificationUtil.getMailContent(state, dataMap);
        String title = MailNotificationUtil.getMailTitle("JOB", state.toString(), getDeployEnvName(), getProjectName(),
                cubeInstance.getName());
        return Pair.newPair(title, content);
    }

    @Override
    protected void onExecuteFinished(ExecuteResult result, ExecutableContext executableContext) {
        long time = 0L;
        for (AbstractExecutable task : getTasks()) {
            final ExecutableState status = task.getStatus();
            if (status != ExecutableState.SUCCEED) {
                break;
            }
            if (task instanceof MapReduceExecutable) {
                time += ((MapReduceExecutable) task).getMapReduceWaitTime();
            }
        }
        setMapReduceWaitTime(time);
        super.onExecuteFinished(result, executableContext);
    }

    protected void onStatusChange(ExecutableContext context, ExecuteResult result, ExecutableState state) {
        super.onStatusChange(context, result, state);

        updateMetrics(context, result, state);
    }

    protected void updateMetrics(ExecutableContext context, ExecuteResult result, ExecutableState state) {
        JobMetricsFacade.JobStatisticsResult jobStats = new JobMetricsFacade.JobStatisticsResult();
        jobStats.setWrapper(getSubmitter(), getProjectName(), CubingExecutableUtil.getCubeName(getParams()), getId(),
                getJobType(), getAlgorithm() == null ? "NULL" : getAlgorithm().toString());

        if (state == ExecutableState.SUCCEED) {
            jobStats.setJobStats(findSourceSizeBytes(), findCubeSizeBytes(), getDuration(), getMapReduceWaitTime(),
                    getPerBytesTimeCost(findSourceSizeBytes(), getDuration()));
            if (CubingJobTypeEnum.getByName(getJobType()) == CubingJobTypeEnum.BUILD) {
                jobStats.setJobStepStats(getTaskDurationByName(ExecutableConstants.STEP_NAME_FACT_DISTINCT_COLUMNS),
                        getTaskDurationByName(ExecutableConstants.STEP_NAME_BUILD_DICTIONARY),
                        getTaskDurationByName(ExecutableConstants.STEP_NAME_BUILD_IN_MEM_CUBE),
                        getTaskDurationByName(ExecutableConstants.STEP_NAME_CONVERT_CUBOID_TO_HFILE));
            }
        } else if (state == ExecutableState.ERROR) {
            jobStats.setJobException(result.getThrowable() != null ? result.getThrowable() : new Exception());
        }
        JobMetricsFacade.updateMetrics(jobStats);
    }

    private long getTaskDurationByName(String name) {
        AbstractExecutable task = getTaskByName(name);
        if (task != null) {
            return task.getDuration();
        } else {
            return 0;
        }
    }

    private static double getPerBytesTimeCost(long size, long timeCost) {
        if (size <= 0) {
            return 0;
        }
        if (size < MIN_SOURCE_SIZE) {
            size = MIN_SOURCE_SIZE;
        }
        return timeCost * 1.0 / size;
    }

    public long getMapReduceWaitTime() {
        return getExtraInfoAsLong(MAP_REDUCE_WAIT_TIME, 0L);
    }

    public void setMapReduceWaitTime(long t) {
        addExtraInfo(MAP_REDUCE_WAIT_TIME, t + "");
    }

    public void setAlgorithm(AlgorithmEnum alg) {
        addExtraInfo("algorithm", alg.name());
    }

    public AlgorithmEnum getAlgorithm() {
        String alg = getExtraInfo().get("algorithm");
        return alg == null ? null : AlgorithmEnum.valueOf(alg);
    }

    public boolean isLayerCubing() {
        return AlgorithmEnum.LAYER == getAlgorithm();
    }

    public boolean isInMemCubing() {
        return AlgorithmEnum.INMEM == getAlgorithm();
    }

    public long findSourceRecordCount() {
        return Long.parseLong(findExtraInfo(SOURCE_RECORD_COUNT, "0"));
    }

    public long findSourceSizeBytes() {
        return Long.parseLong(findExtraInfo(SOURCE_SIZE_BYTES, "0"));
    }

    public long findCubeSizeBytes() {
        // look for the info BACKWARD, let the last step that claims the cube size win
        return Long.parseLong(findExtraInfoBackward(CUBE_SIZE_BYTES, "0"));
    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy