All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.kylin.source.hive.cardinality.HiveColumnCardinalityUpdateJob Maven / Gradle / Ivy

There is a newer version: 5.0.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * 
 *     http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
*/

package org.apache.kylin.source.hive.cardinality;

import java.io.IOException;
import java.io.InputStream;
import java.io.StringWriter;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;

import org.apache.commons.cli.Option;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.CompressionCodecFactory;
import org.apache.kylin.common.KylinConfig;
import org.apache.kylin.common.util.HadoopUtil;
import org.apache.kylin.common.util.StringUtil;
import org.apache.kylin.engine.mr.common.AbstractHadoopJob;
import org.apache.kylin.metadata.TableMetadataManager;
import org.apache.kylin.metadata.model.TableExtDesc;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * This job will update save the cardinality result into Kylin table metadata store.
 *
 * @author shaoshi
 */
public class HiveColumnCardinalityUpdateJob extends AbstractHadoopJob {
    private static final Logger logger = LoggerFactory.getLogger(HiveColumnCardinalityUpdateJob.class);

    public static final String JOB_TITLE = "Kylin Hive Column Cardinality Update Job";

    @SuppressWarnings("static-access")
    protected static final Option OPTION_TABLE = OptionBuilder.withArgName("table name").hasArg().isRequired(true)
            .withDescription("The hive table name").create("table");

    public HiveColumnCardinalityUpdateJob() {

    }

    @Override
    public int run(String[] args) throws Exception {

        Options options = new Options();

        try {
            options.addOption(OPTION_PROJECT);
            options.addOption(OPTION_TABLE);
            options.addOption(OPTION_OUTPUT_PATH);

            parseOptions(options, args);

            String project = getOptionValue(OPTION_PROJECT);
            String table = getOptionValue(OPTION_TABLE).toUpperCase(Locale.ROOT);

            // start job
            String jobName = JOB_TITLE + getOptionsAsString();
            logger.info("Starting: " + jobName);
            Configuration conf = getConf();
            Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH));

            updateKylinTableExd(table.toUpperCase(Locale.ROOT), output.toString(), conf, project);
            return 0;
        } catch (Exception e) {
            printUsage(options);
            throw e;
        }

    }

    public void updateKylinTableExd(String tableName, String outPath, Configuration config, String prj)
            throws IOException {
        List columns = null;
        try {
            columns = readLines(new Path(outPath), config);
        } catch (Exception e) {
            e.printStackTrace();
            logger.info("Failed to resolve cardinality for " + tableName + " from " + outPath);
            return;
        }

        StringBuffer cardi = new StringBuffer();
        Iterator it = columns.iterator();
        while (it.hasNext()) {
            String string = (String) it.next();
            String[] ss = StringUtils.split(string, "\t");

            if (ss.length != 2) {
                logger.info("The hadoop cardinality value is not valid " + string);
                continue;
            }
            cardi.append(ss[1]);
            cardi.append(",");
        }
        String scardi = cardi.toString();
        if (scardi.length() > 0) {
            scardi = scardi.substring(0, scardi.length() - 1);
            TableMetadataManager metaMgr = TableMetadataManager.getInstance(KylinConfig.getInstanceFromEnv());
            TableExtDesc tableExt = metaMgr.getTableExt(tableName, prj);
            tableExt.setCardinality(scardi);
            metaMgr.saveTableExt(tableExt, prj);
        } else {
            // it gets here when ColumnCardinalityReducer output no record, which means empty table
            TableMetadataManager metaMgr = TableMetadataManager.getInstance(KylinConfig.getInstanceFromEnv());
            TableExtDesc tableExt = metaMgr.getTableExt(tableName, prj);
            tableExt.resetCardinality();
            metaMgr.saveTableExt(tableExt, prj);
        }
    }

    private static List readLines(Path location, Configuration conf) throws Exception {
        FileSystem fileSystem = HadoopUtil.getWorkingFileSystem();
        CompressionCodecFactory factory = new CompressionCodecFactory(conf);
        FileStatus[] items = fileSystem.listStatus(location);
        if (items == null)
            return new ArrayList();
        List results = new ArrayList();
        for (FileStatus item : items) {

            // ignoring files like _SUCCESS
            if (item.getPath().getName().startsWith("_")) {
                continue;
            }

            CompressionCodec codec = factory.getCodec(item.getPath());
            InputStream stream = null;
            StringWriter writer = new StringWriter();
            try {
                // check if we have a compression codec we need to use
                if (codec != null) {
                    stream = codec.createInputStream(fileSystem.open(item.getPath()));
                } else {
                    stream = fileSystem.open(item.getPath());
                }

                IOUtils.copy(stream, writer, "UTF-8");
            } finally {
                if (stream != null) {
                    stream.close();
                }
            }
            String raw = writer.toString();
            for (String str : StringUtil.split(raw, "\n")) {
                results.add(str);
            }
        }
        return results;
    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy