All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.kyligence.kap.clickhouse.job.ClickHouseRefresh Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package io.kyligence.kap.clickhouse.job;

import static io.kyligence.kap.secondstorage.SecondStorageConstants.P_OLD_SEGMENT_IDS;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;

import javax.annotation.concurrent.NotThreadSafe;

import org.apache.kylin.common.SegmentOnlineMode;
import org.apache.kylin.job.SecondStorageJobParamUtil;
import org.apache.kylin.job.handler.SecondStorageIndexCleanJobHandler;
import org.apache.kylin.job.manager.JobManager;
import org.apache.kylin.job.model.JobParam;
import org.apache.kylin.metadata.cube.model.NBatchConstants;
import org.apache.kylin.metadata.cube.model.NDataSegment;
import org.apache.kylin.metadata.cube.model.NDataflowManager;
import org.apache.kylin.metadata.cube.model.NDataflowUpdate;
import org.apache.kylin.metadata.model.SegmentStatusEnum;
import org.apache.kylin.metadata.project.EnhancedUnitOfWork;

import org.apache.kylin.guava30.shaded.common.base.Preconditions;

import io.kyligence.kap.secondstorage.SecondStorage;
import io.kyligence.kap.secondstorage.SecondStorageConstants;
import io.kyligence.kap.secondstorage.SecondStorageUtil;
import io.kyligence.kap.secondstorage.metadata.TableData;
import io.kyligence.kap.secondstorage.metadata.TableFlow;
import io.kyligence.kap.secondstorage.metadata.TablePlan;
import lombok.val;

@NotThreadSafe
public class ClickHouseRefresh extends ClickHouseLoad {
    private Map newSegToOld = null;
    private Set oldSegmentIds;

    public ClickHouseRefresh() {
        this.setName(SecondStorageConstants.STEP_REFRESH_SECOND_STORAGE);
    }

    public ClickHouseRefresh(Object notSetId) {
        super(notSetId);
    }

    private void initSegMap() {
        newSegToOld = new HashMap<>();

        String[] segmentIds = getParam(NBatchConstants.P_SEGMENT_IDS).split(",");
        String[] oldSegmentIdsArray = getParam(P_OLD_SEGMENT_IDS).split(",");

        oldSegmentIds = new HashSet<>(Arrays.asList(oldSegmentIdsArray));

        for (int i = 0; i < segmentIds.length; i++) {
            newSegToOld.put(segmentIds[i], oldSegmentIdsArray[i]);
        }
    }

    @Override
    protected List preprocessLoadInfo(List infoList) {
        if (newSegToOld == null) {
            initSegMap();
        }
        infoList.forEach(info -> info.setOldSegmentId(newSegToOld.get(info.getSegmentId())));
        return infoList;
    }

    @Override
    protected void beforeDataCommit() {
        EnhancedUnitOfWork.doInTransactionWithCheckAndRetry(() -> {
            getTableFlow().update(copied -> copied.all().forEach(tableData ->
                    tableData.removePartitions(p -> oldSegmentIds.contains(p.getSegmentId()))
            ));
            return null;
        }, project, 1, getEpochId());
    }

    @Override
    protected void updateMeta() {
        EnhancedUnitOfWork.doInTransactionWithCheckAndRetry(() -> {
            super.updateMeta();

            getTableFlow().update(copied -> copied.all().forEach(tableData ->
                    tableData.removePartitions(p -> oldSegmentIds.contains(p.getSegmentId()))
            ));

            Set needDeleteLayoutIds = getTableFlow().getTableDataList().stream()
                    .filter(tableData -> tableData.getPartitions().isEmpty())
                    .map(TableData::getLayoutID).collect(Collectors.toSet());

            getTableFlow().update(copied -> copied.cleanTableData(tableData -> tableData.getPartitions().isEmpty()));
            getTablePlan().update(t -> t.cleanTable(needDeleteLayoutIds));

            if (!needDeleteLayoutIds.isEmpty()) {
                val jobHandler = new SecondStorageIndexCleanJobHandler();
                final JobParam param = SecondStorageJobParamUtil.layoutCleanParam(project, getTargetSubject(), getSubmitter(), needDeleteLayoutIds, Collections.emptySet());
                JobManager.getInstance(getConfig(), project).addJob(param, jobHandler);
            }

            return null;
        }, project, 1, getEpochId());
    }

    private TableFlow getTableFlow() {
        val tableFlowManager = SecondStorage.tableFlowManager(getConfig(), getProject());
        return tableFlowManager.get(getTargetSubject()).orElse(null);
    }

    private TablePlan getTablePlan() {
        val tablePlanManager = SecondStorageUtil.tablePlanManager(getConfig(), project);
        Preconditions.checkState(tablePlanManager.isPresent());
        return tablePlanManager.get().get(getTargetSubject()).orElse(null);
    }

    @Override
    protected void updateDFSSegmentIfNeeded(MethodContext mc) {
        if (!isDAGJobScheduler()) {
            return;
        }

        if (!SegmentOnlineMode.ANY.toString().equalsIgnoreCase(getProjectConfig().getKylinEngineSegmentOnlineMode())) {
            return;
        }

        EnhancedUnitOfWork.doInTransactionWithCheckAndRetry(() -> {
            final NDataflowManager dfMgr = NDataflowManager.getInstance(getConfig(), getProject());
            val dataFlow = dfMgr.getDataflow(mc.getDataflowId()).copy();
            NDataflowUpdate update = new NDataflowUpdate(mc.getDf().getId());
            List toUpdateSegments = new ArrayList<>(oldSegmentIds.size());
            List toRemoveSegments = new ArrayList<>(oldSegmentIds.size());

            for (Map.Entry entry : newSegToOld.entrySet()) {
                String newSegmentId = entry.getKey();
                String oldSegmentId = entry.getValue();

                NDataSegment newSegment = dataFlow.getSegment(newSegmentId);
                NDataSegment oldSegment = dataFlow.getSegment(oldSegmentId);

                if (newSegment == null || newSegment.getStatus() != SegmentStatusEnum.NEW) {
                    continue;
                }

                newSegment.setStatus(SegmentStatusEnum.READY);
                toUpdateSegments.add(newSegment);

                if (oldSegment != null) {
                    toRemoveSegments.add(oldSegment);
                }
            }

            update.setToRemoveSegs(toRemoveSegments.toArray(new NDataSegment[0]));
            update.setToUpdateSegs(toUpdateSegments.toArray(new NDataSegment[0]));
            dfMgr.updateDataflowWithoutIndex(update);

            markDFStatus(mc.getDataflowId());
            return null;
        }, project, 1, getEpochId());
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy