All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.kylin.rest.source.DataSourceState Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.kylin.rest.source;

import static org.apache.kylin.common.exception.ServerErrorCode.FAILED_CHECK_KERBEROS;
import static org.apache.kylin.common.exception.ServerErrorCode.PERMISSION_DENIED;
import static org.apache.kylin.common.exception.code.ErrorCodeSystem.QUERY_NODE_API_INVALID;

import java.io.File;
import java.io.IOException;
import java.security.PrivilegedAction;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;

import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.time.StopWatch;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.kylin.common.KapConfig;
import org.apache.kylin.common.KylinConfig;
import org.apache.kylin.common.Singletons;
import org.apache.kylin.common.exception.KylinException;
import org.apache.kylin.common.msg.MsgPicker;
import org.apache.kylin.common.util.Pair;
import org.apache.kylin.guava30.shaded.common.collect.Lists;
import org.apache.kylin.guava30.shaded.common.collect.Maps;
import org.apache.kylin.guava30.shaded.common.collect.Sets;
import org.apache.kylin.metadata.project.NProjectManager;
import org.apache.kylin.metadata.project.ProjectInstance;
import org.apache.kylin.rest.response.NHiveTableNameResponse;
import org.apache.kylin.rest.security.KerberosLoginManager;
import org.apache.kylin.source.ISourceMetadataExplorer;
import org.apache.kylin.source.SourceFactory;
import org.apache.spark.sql.SparderEnv;

import lombok.extern.slf4j.Slf4j;

@Slf4j
public class DataSourceState implements Runnable {

    private static final String JDBC_SOURCE_KEY_PREFIX = "project#";
    private static final String SOURCE_KEY_PREFIX = "ugi#";

    private final StopWatch sw;
    private final Map cache;
    private final Map runningStateMap;
    private final Map lastLoadTimeMap;
    private ISourceMetadataExplorer explore;

    //only jdbc source use project name as key
    private static final Set USE_PROJECT_AS_KEY_SOURCE_TYPE = Sets.newHashSet(8);

    public static DataSourceState getInstance() {
        return Singletons.getInstance(DataSourceState.class);
    }

    private DataSourceState() {
        setExplore(SourceFactory.getSparkSource().getSourceMetadataExplorer());
        this.cache = Maps.newConcurrentMap();
        this.runningStateMap = Maps.newConcurrentMap();
        this.lastLoadTimeMap = Maps.newConcurrentMap();
        this.sw = StopWatch.create();
    }

    @Override
    public void run() {
        try {
            int waitSeconds = 0;
            while (!SparderEnv.isSparkAvailable() && KylinConfig.getInstanceFromEnv().getKerberosProjectLevelEnable()) {
                if (waitSeconds >= KylinConfig.getInstanceFromEnv().getLoadHiveTableWaitSparderSeconds()) {
                    log.warn("Skip wait sparder start, wait seconds :{}", waitSeconds);
                    return;
                }
                startSparder();
                log.info("Wait sparder start");
                Integer intervals = KylinConfig.getInstanceFromEnv().getLoadHiveTableWaitSparderIntervals();
                TimeUnit.SECONDS.sleep(intervals);
                waitSeconds = waitSeconds + intervals;
            }
            loadAllSourceInfoToCache();
        } catch (InterruptedException ex) {
            log.info("thread interrupted while wait sparder start");
            Thread.currentThread().interrupt();
        } catch (Exception e) {
            log.error("Scheduling refresh of hive table name cache failed", e);
        } finally {
            sw.reset();
        }
    }

    private void startSparder() {
        if (!KylinConfig.getInstanceFromEnv().isUTEnv()) {
            SparderEnv.init();
        }
    }

    /**
     * load all source info to cache
     */
    public void loadAllSourceInfoToCache() throws IOException {
        sw.start();
        checkIsAllNode();
        log.info("start load all table name to cache");

        NProjectManager projectManager = NProjectManager.getInstance(KylinConfig.getInstanceFromEnv());
        KerberosLoginManager kerberosManager = KerberosLoginManager.getInstance();

        Map> ugiMap = Maps.newLinkedHashMap();
        ugiMap.put(SOURCE_KEY_PREFIX + UserGroupInformation.getLoginUser().getUserName(),
                Pair.newPair(null, UserGroupInformation.getLoginUser()));

        projectManager.listAllProjects().stream().filter(p -> StringUtils.isNotBlank(p.getPrincipal()))
                .forEach(projectInstance -> {
                    try {
                        UserGroupInformation projectUgi = kerberosManager.getProjectUGI(projectInstance.getName());
                        ugiMap.put(getCacheKeyByProject(projectInstance), Pair.newPair(projectInstance, projectUgi));
                    } catch (Exception e) {
                        log.error("The kerberos information of the project {} is incorrect.",
                                projectInstance.getName());
                    }
                });

        ugiMap.forEach((cacheKey, pair) -> {
            ProjectInstance projectInstance = pair.getFirst();
            UserGroupInformation projectUgi = pair.getSecond();
            runningStateMap.put(cacheKey, true);
            List tableFilterList = getHiveFilterList(projectInstance);
            NHiveSourceInfo sourceInfo = fetchUgiSourceInfo(projectUgi, tableFilterList);
            putCache(cacheKey, sourceInfo, tableFilterList);
            runningStateMap.put(cacheKey, false);
            lastLoadTimeMap.put(cacheKey, System.currentTimeMillis());
        });

        sw.stop();
        log.info("Load hive table name successful within {} second", sw.getTime(TimeUnit.SECONDS));
    }

    public NHiveTableNameResponse loadAllSourceInfoToCacheForced(String project, boolean force) {
        log.info("Load hive tables immediately {}, force: {}", project, force);
        checkIsAllNode();
        checkKerberosInfo(project);

        NProjectManager projectManager = NProjectManager.getInstance(KylinConfig.getInstanceFromEnv());
        ProjectInstance projectInstance = projectManager.getProject(project);
        NHiveTableNameResponse response = new NHiveTableNameResponse();
        String cacheKey = getCacheKeyByProject(projectInstance);
        runningStateMap.putIfAbsent(cacheKey, false);
        lastLoadTimeMap.putIfAbsent(cacheKey, 0L);

        if (!force) {
            response.setIsRunning(runningStateMap.get(cacheKey));
            response.setTime(System.currentTimeMillis() - lastLoadTimeMap.get(cacheKey));
            return response;
        }
        setExplore(SourceFactory.getSource(projectInstance).getSourceMetadataExplorer());

        KerberosLoginManager kerberosManager = KerberosLoginManager.getInstance();
        UserGroupInformation projectUGI = kerberosManager.getProjectUGI(project);
        runningStateMap.put(cacheKey, true);
        List tableFilterList = getHiveFilterList(projectInstance);
        NHiveSourceInfo sourceInfo = fetchUgiSourceInfo(projectUGI, tableFilterList);
        putCache(cacheKey, sourceInfo, tableFilterList);
        runningStateMap.put(cacheKey, false);
        response.setIsRunning(runningStateMap.get(cacheKey));
        response.setTime(0L);
        return response;
    }

    public synchronized List getTables(String project, String db) {
        NProjectManager projectManager = NProjectManager.getInstance(KylinConfig.getInstanceFromEnv());
        ProjectInstance projectInstance = projectManager.getProject(project);

        List result = Lists.newArrayList();
        NHiveSourceInfo sourceInfo = cache.get(getCacheKeyByProject(projectInstance));
        if (Objects.nonNull(sourceInfo) && Objects.nonNull(sourceInfo.getDatabaseInfo(db))) {
            result.addAll(sourceInfo.getDatabaseInfo(db));
        }
        return result;
    }

    public synchronized void putCache(String cacheKey, NHiveSourceInfo sourceInfo, List tableFilterList) {
        if (CollectionUtils.isNotEmpty(tableFilterList)) {
            NHiveSourceInfo info = cache.get(cacheKey);
            if (!checkSourceInfoEmpty(sourceInfo) && !checkSourceInfoEmpty(info)) {
                info.getTables().keySet().forEach(db -> {
                    if (CollectionUtils.isEmpty(sourceInfo.getDatabaseInfo(db))) {
                        if (tableFilterList.contains(db)) {
                            return;
                        }
                        sourceInfo.putDatabaseInfo(db, info.getDatabaseInfo(db));
                    }
                });
            }
        }
        cache.put(cacheKey, sourceInfo);
    }

    private boolean checkSourceInfoEmpty(NHiveSourceInfo sourceInfo) {
        return sourceInfo == null || sourceInfo.getTables().isEmpty();
    }

    private String getCacheKeyByProject(ProjectInstance projectInstance) {
        String projectName = projectInstance.getName();
        if (USE_PROJECT_AS_KEY_SOURCE_TYPE.contains(projectInstance.getSourceType())) {
            return JDBC_SOURCE_KEY_PREFIX + projectName;
        } else {
            return SOURCE_KEY_PREFIX + KerberosLoginManager.getInstance().getProjectUGI(projectName).getUserName();
        }
    }

    public NHiveSourceInfo fetchUgiSourceInfo(UserGroupInformation ugi, List filterList) {
        log.info("Load hive tables from ugi {}", ugi.getUserName());
        NHiveSourceInfo sourceInfo;
        if (UserGroupInformation.isSecurityEnabled()) {
            sourceInfo = ugi.doAs((PrivilegedAction) () -> fetchSourceInfo(filterList));
        } else {
            sourceInfo = fetchSourceInfo(filterList);
        }
        return sourceInfo;
    }

    private NHiveSourceInfo fetchSourceInfo(List filterList) {
        NHiveSourceInfo sourceInfo = new NHiveSourceInfo();
        try {
            List databaseList = explore.listDatabases().stream().map(StringUtils::toRootUpperCase)
                    .filter(database -> CollectionUtils.isEmpty(filterList) || filterList.contains(database))
                    .collect(Collectors.toList());
            Map> dbTableList = listTables(databaseList);
            sourceInfo.setTables(dbTableList);
        } catch (Exception e) {
            log.error("Load hive tables error.", e);
        }
        return sourceInfo;
    }

    private Map> listTables(List databaseList) throws Exception {
        HashMap> dbTableList = Maps.newHashMap();
        int databaseTotalSize = databaseList.size();

        for (String database : databaseList) {
            if (!explore.checkDatabaseAccess(database)) {
                continue;
            }
            List tableList = explore.listTables(database).stream().map(StringUtils::toRootUpperCase)
                    .collect(Collectors.toList());
            if (CollectionUtils.isNotEmpty(tableList)) {
                dbTableList.put(database, tableList);
            }
            int currDatabaseSize = dbTableList.keySet().size();
            if (currDatabaseSize % 20 == 0) {
                log.info("Foreach database curr pos {}, total num {}", currDatabaseSize, databaseTotalSize);
            }
        }
        return dbTableList;
    }

    private void checkKerberosInfo(String project) {
        KylinConfig kylinConfig = KylinConfig.getInstanceFromEnv();
        NProjectManager projectManager = NProjectManager.getInstance(kylinConfig);
        ProjectInstance projectInstance = projectManager.getProject(project);

        String principal = projectInstance.getPrincipal();
        String keytab = projectInstance.getKeytab();
        try {
            if (kylinConfig.getKerberosProjectLevelEnable() && !StringUtils.isAllBlank(principal, keytab)) {
                String kylinConfHome = KapConfig.getKylinConfDirAtBestEffort();

                String keyTabPath = new Path(kylinConfHome, principal.concat(KerberosLoginManager.KEYTAB_SUFFIX))
                        .toString();
                File keyTabFile = new File(keyTabPath);
                if (!keyTabFile.exists()) {
                    FileUtils.writeStringToFile(keyTabFile, keytab);
                }
                UserGroupInformation.loginUserFromKeytabAndReturnUGI(principal, keyTabPath);
            }
        } catch (Exception e) {
            throw new KylinException(FAILED_CHECK_KERBEROS,
                    "The project " + project + " kerberos information has expired.");
        }
    }

    /**
     * get filter list from kylin.source.hive.databases
     */
    public List getHiveFilterList(ProjectInstance projectInstance) {
        if (Objects.isNull(projectInstance)) {
            return Collections.emptyList();
        }
        KylinConfig config = KylinConfig.getInstanceFromEnv();
        String[] databases = projectInstance.getConfig().getHiveDatabases();
        if (databases.length == 0) {
            databases = config.getHiveDatabases();
        }
        return Arrays.stream(databases).map(str -> str.toUpperCase(Locale.ROOT)).collect(Collectors.toList());
    }

    private void checkIsAllNode() {
        if (!KylinConfig.getInstanceFromEnv().isJobNode() && !KylinConfig.getInstanceFromEnv().isMetadataNode()) {
            throw new KylinException(QUERY_NODE_API_INVALID);
        }
        if (!KylinConfig.getInstanceFromEnv().getLoadHiveTablenameEnabled()) {
            throw new KylinException(PERMISSION_DENIED, MsgPicker.getMsg().getInvalidLoadHiveTableName());
        }
    }

    private synchronized void setExplore(ISourceMetadataExplorer explore) {
        this.explore = explore;
    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy