All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.kylin.rest.service.merger.AfterBuildResourceMerger Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.kylin.rest.service.merger;

import java.util.List;
import java.util.Objects;
import java.util.Set;

import org.apache.commons.collections.CollectionUtils;
import org.apache.kylin.common.KylinConfig;
import org.apache.kylin.common.persistence.ResourceStore;
import org.apache.kylin.engine.spark.ExecutableUtils;
import org.apache.kylin.guava30.shaded.common.base.Preconditions;
import org.apache.kylin.guava30.shaded.common.collect.Lists;
import org.apache.kylin.job.execution.JobTypeEnum;
import org.apache.kylin.job.execution.MergerInfo;
import org.apache.kylin.metadata.cube.model.NDataLayout;
import org.apache.kylin.metadata.cube.model.NDataSegment;
import org.apache.kylin.metadata.cube.model.NDataflow;
import org.apache.kylin.metadata.cube.model.NDataflowManager;
import org.apache.kylin.metadata.cube.model.NDataflowUpdate;
import org.apache.kylin.metadata.cube.model.PartitionStatusEnum;
import org.apache.kylin.metadata.model.SegmentStatusEnum;

import lombok.val;
import lombok.extern.slf4j.Slf4j;

@Slf4j
public class AfterBuildResourceMerger extends MetadataMerger {

    public AfterBuildResourceMerger(KylinConfig config, String project) {
        super(config, project);
    }

    public NDataLayout[] merge(String dataflowId, Set segmentId, Set layoutIds,
            ResourceStore remoteResourceStore, JobTypeEnum jobType, Set partitions) {
        switch (jobType) {
        case INDEX_BUILD:
        case SUB_PARTITION_BUILD:
            return mergeAfterCatchup(dataflowId, segmentId, layoutIds, remoteResourceStore, partitions);
        case INC_BUILD:
            Preconditions.checkArgument(segmentId.size() == 1);
            return mergeAfterIncrement(dataflowId, segmentId.iterator().next(), layoutIds, remoteResourceStore);
        default:
            throw new UnsupportedOperationException("Error job type: " + jobType);
        }
    }

    @Override
    public NDataLayout[] merge(MergerInfo.TaskMergeInfo info) {
        try (val buildResourceStore = ExecutableUtils.getRemoteStore(this.getConfig(), info.getOutputMetaUrl())) {
            val dataFlowId = info.getDataFlowId();
            val segmentIds = info.getSegmentIds();
            val layoutIds = info.getLayoutIds();
            val partitionIds = info.getPartitionIds();
            val jobType = info.getJobType();
            val needBuildSnapshots = info.isNeedBuildSnapshots();
            NDataLayout[] nDataLayouts = merge(dataFlowId, segmentIds, layoutIds, buildResourceStore, jobType,
                    partitionIds);
            NDataflow dataflow = NDataflowManager.getInstance(getConfig(), getProject()).getDataflow(dataFlowId);
            if (needBuildSnapshots) {
                mergeSnapshotMeta(dataflow, buildResourceStore);
            }
            mergeTableExtMeta(dataflow, buildResourceStore);
            return nDataLayouts;
        }
    }

    public NDataLayout[] mergeAfterIncrement(String dataflowId, String segmentId, Set layoutIds,
            ResourceStore remoteStore) {
        val localDataflowManager = NDataflowManager.getInstance(getConfig(), getProject());
        val remoteDataflowManager = NDataflowManager.getInstance(remoteStore.getConfig(), getProject());
        val remoteDataflow = remoteDataflowManager.getDataflow(dataflowId).copy();

        val dfUpdate = new NDataflowUpdate(dataflowId);
        val theSeg = remoteDataflow.getSegment(segmentId);
        val toRemoveSegments = remoteDataflowManager.getToRemoveSegs(remoteDataflow, theSeg);

        if (theSeg.getModel().isMultiPartitionModel()) {
            final long lastBuildTime = System.currentTimeMillis();
            theSeg.getMultiPartitions().forEach(partition -> {
                partition.setStatus(PartitionStatusEnum.READY);
                partition.setLastBuildTime(lastBuildTime);
            });
            theSeg.setLastBuildTime(lastBuildTime);
        } else {
            theSeg.setLastBuildTime(theSeg.getSegDetails().getLastModified());
        }
        theSeg.setMvcc(localDataflowManager.getDataflow(dataflowId).getSegment(segmentId).getMvcc());

        resetBreakpoints(theSeg);

        theSeg.setStatus(SegmentStatusEnum.READY);
        dfUpdate.setToUpdateSegs(theSeg);
        dfUpdate.setToRemoveSegs(toRemoveSegments.toArray(new NDataSegment[toRemoveSegments.size()]));
        dfUpdate.setToAddOrUpdateLayouts(theSeg.getSegDetails().getEffectiveLayouts().toArray(new NDataLayout[0]));
        if (remoteDataflow.getModel().getStorageType().isV3Storage()) {
            mergeLayoutDetails(getProject(), remoteDataflow.getModel().getId(), layoutIds, theSeg, toRemoveSegments,
                    remoteStore.getConfig());
        }

        localDataflowManager.updateDataflow(dfUpdate);
        updateIndexPlan(dataflowId, remoteStore);
        return dfUpdate.getToAddOrUpdateLayouts();
    }

    public NDataLayout[] mergeAfterCatchup(String flowName, Set segmentIds, Set layoutIds,
            ResourceStore remoteStore, Set partitionIds) {
        if (CollectionUtils.isNotEmpty(partitionIds)) {
            return mergeMultiPartitionModelAfterCatchUp(flowName, segmentIds, layoutIds, remoteStore, partitionIds);
        } else {
            return mergeNormalModelAfterCatchUp(flowName, segmentIds, layoutIds, remoteStore);
        }
    }

    public NDataLayout[] mergeNormalModelAfterCatchUp(String flowName, Set segmentIds, Set layoutIds,
            ResourceStore remoteStore) {
        val localDataflowManager = NDataflowManager.getInstance(getConfig(), getProject());
        val dataflow = localDataflowManager.getDataflow(flowName);
        val remoteDataflowManager = NDataflowManager.getInstance(remoteStore.getConfig(), getProject());
        val remoteDataflow = remoteDataflowManager.getDataflow(flowName).copy();

        val dfUpdate = new NDataflowUpdate(flowName);
        val addCuboids = Lists. newArrayList();
        val availableLayoutIds = getAvailableLayoutIds(dataflow, layoutIds);

        List segsToUpdate = Lists.newArrayList();
        for (String segId : segmentIds) {
            val localSeg = dataflow.getSegment(segId);
            val remoteSeg = remoteDataflow.getSegment(segId);
            // ignore if local segment is not ready
            if (isUnavailableSegment(localSeg)) {
                continue;
            }
            remoteSeg.setLastBuildTime(remoteSeg.getSegDetails().getLastModified());
            for (long layoutId : availableLayoutIds) {
                NDataLayout dataCuboid = remoteSeg.getLayout(layoutId, true);
                Preconditions.checkNotNull(dataCuboid);
                addCuboids.add(dataCuboid);
            }
            remoteSeg.setMvcc(localSeg.getMvcc());

            if (remoteDataflow.getModel().getStorageType().isV3Storage()) {
                mergeLayoutDetails(getProject(), remoteDataflow.getModel().getId(), layoutIds, remoteSeg, segsToUpdate,
                        remoteStore.getConfig());
            }

            resetBreakpoints(remoteSeg);

            segsToUpdate.add(remoteSeg);
        }

        dfUpdate.setToUpdateSegs(segsToUpdate.toArray(new NDataSegment[0]));
        dfUpdate.setToAddOrUpdateLayouts(addCuboids.toArray(new NDataLayout[0]));

        localDataflowManager.updateDataflow(dfUpdate);
        updateIndexPlan(flowName, remoteStore);
        return dfUpdate.getToAddOrUpdateLayouts();
    }

    private boolean isUnavailableSegment(NDataSegment localSeg) {
        if (localSeg == null) {
            return true;
        }
        return localSeg.getStatus() != SegmentStatusEnum.READY && localSeg.getStatus() != SegmentStatusEnum.WARNING;
    }

    /**
     * MultiPartition model job:
     * New layoutIds mean index build job which should add new layouts.
     * Old layoutIds mean partition build job which should update partitions in layouts.
     */
    public NDataLayout[] mergeMultiPartitionModelAfterCatchUp(String flowName, Set segmentIds,
            Set layoutIds, ResourceStore remoteStore, Set partitionIds) {

        val localDataflowManager = NDataflowManager.getInstance(getConfig(), getProject());
        val localDataflow = localDataflowManager.getDataflow(flowName).copy();
        val remoteDataflowManager = NDataflowManager.getInstance(remoteStore.getConfig(), getProject());
        val remoteDataflow = remoteDataflowManager.getDataflow(flowName).copy();
        val dataflow = localDataflowManager.getDataflow(flowName);
        val dfUpdate = new NDataflowUpdate(flowName);
        val upsertCuboids = Lists. newArrayList();
        val availableLayoutIds = getAvailableLayoutIds(dataflow, layoutIds);
        List segsToUpdate = Lists.newArrayList();

        for (String segId : segmentIds) {
            val localSeg = localDataflow.getSegment(segId);
            val remoteSeg = remoteDataflow.getSegment(segId);
            // ignore if local segment is not ready
            if (isUnavailableSegment(localSeg)) {
                continue;
            }
            NDataSegment updateSegment = upsertSegmentPartition(localSeg, remoteSeg, partitionIds);

            for (long layoutId : availableLayoutIds) {
                NDataLayout remoteLayout = remoteSeg.getLayout(layoutId);
                NDataLayout localLayout = localSeg.getLayout(layoutId);
                NDataLayout upsertLayout = upsertLayoutPartition(localLayout, remoteLayout, partitionIds);
                if (upsertLayout == null) {
                    log.warn("Layout {} is null in segment {}. Segment have layouts {} ", layoutId, segId,
                            remoteSeg.getLayoutIds());
                }
                upsertCuboids.add(upsertLayout);
            }
            segsToUpdate.add(updateSegment);
        }
        dfUpdate.setToUpdateSegs(segsToUpdate.toArray(new NDataSegment[0]));
        dfUpdate.setToAddOrUpdateLayouts(upsertCuboids.toArray(new NDataLayout[0]));

        localDataflowManager.updateDataflow(dfUpdate);
        updateIndexPlan(flowName, remoteStore);
        return dfUpdate.getToAddOrUpdateLayouts();
    }

    private void resetBreakpoints(NDataSegment dataSegment) {
        // Reset breakpoints.
        dataSegment.setFactViewReady(false);
        dataSegment.setDictReady(false);
        if (!getConfig().isPersistFlatTableEnabled()) {
            dataSegment.setFlatTableReady(false);
        }

        // Multi level partition FLAT-TABLE is not reusable.
        if (Objects.nonNull(dataSegment.getModel()) //
                && Objects.nonNull(dataSegment.getModel().getMultiPartitionDesc())) {
            dataSegment.setFlatTableReady(false);
            // By design, multi level partition shouldn't refresh snapshots frequently.
        } else {
            dataSegment.setSnapshotReady(false);
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy