All Downloads are FREE. Search and download functionalities are using the official Maven repository.

water.hive.JdbcHiveMetadata Maven / Gradle / Ivy

The newest version!
package water.hive;

import org.apache.log4j.Logger;
import water.jdbc.SQLManager;
import water.util.JSONUtils;
import water.util.Log;

import java.sql.*;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;

import static java.util.Collections.emptyList;

@SuppressWarnings({"rawtypes", "unchecked"})
public class JdbcHiveMetadata implements HiveMetaData {

    private static final Logger LOG = Logger.getLogger(JdbcHiveMetadata.class);

    private static final String SQL_SET_JSON_OUTPUT = "set hive.ddl.output.format=json";
    private static final String SQL_GET_VERSION = "select version()";
    private static final String SQL_DESCRIBE_TABLE = "DESCRIBE EXTENDED %s";
    private static final String SQL_DESCRIBE_PARTITION = "DESCRIBE EXTENDED %s PARTITION ";
    private static final String SQL_SHOW_PARTS = "SHOW PARTITIONS %s";

    private final String url;

    public JdbcHiveMetadata(String url) {
        this.url = url;
    }

    static class StorableMetadata {
        String location;
        String serializationLib;
        String inputFormat;
        Map serDeParams = Collections.emptyMap();
    }

    static class JdbcStorable implements Storable {

        private final String location;
        private final String serializationLib;
        private final String inputFormat;
        private final Map serDeParams;

        JdbcStorable(StorableMetadata data) {
            this.location = data.location;
            this.serializationLib = data.serializationLib;
            this.inputFormat = data.inputFormat;
            this.serDeParams = data.serDeParams;
        }

        @Override
        public Map getSerDeParams() {
            return serDeParams;
        }

        @Override
        public String getLocation() {
            return location;
        }

        @Override
        public String getSerializationLib() {
            return serializationLib;
        }

        @Override
        public String getInputFormat() {
            return inputFormat;
        }
    }

    static class JdbcPartition extends JdbcStorable implements Partition {

        private final List values;

        JdbcPartition(StorableMetadata meta, List values) {
            super(meta);
            this.values = values;
        }

        @Override
        public List getValues() {
            return values;
        }
    }

    static class JdbcColumn implements Column {

        private final String name;
        private final String type;

        JdbcColumn(String name, String type) {
            this.name = name;
            this.type = type;
        }

        @Override
        public String getName() {
            return name;
        }

        @Override
        public String getType() {
            return type;
        }
    }

    static class JdbcTable extends JdbcStorable implements Table {

        private final String name;
        private final List partitions;
        private final List columns;
        private final List partitionKeys;

        public JdbcTable(
            String name,
            StorableMetadata meta,
            List columns,
            List partitions,
            List partitionKeys
        ) {
            super(meta);
            this.name = name;
            this.partitions = partitions;
            this.columns = columns;
            this.partitionKeys = partitionKeys;
        }

        @Override
        public String getName() {
            return name;
        }

        @Override
        public boolean hasPartitions() {
            return !partitionKeys.isEmpty();
        }

        @Override
        public List getPartitions() {
            return partitions;
        }

        @Override
        public List getColumns() {
            return columns;
        }

        @Override
        public List getPartitionKeys() {
            return partitionKeys;
        }
    }
    
    private String executeQuery(Connection conn, String query) throws SQLException {
        try (Statement stmt = conn.createStatement()) {
            try (ResultSet rs = stmt.executeQuery(query)) {
                boolean hasData = rs.next();
                assert hasData : "Query has no result rows.";
                return rs.getString(1);
            }
        }

    }

    private Map executeAndParseJsonResultSet(
        Connection conn, String queryPattern, String tableName
    ) throws SQLException {
        String query = String.format(queryPattern, tableName);
        LOG.info("Executing Hive metadata query " + query);
        String json = executeQuery(conn, query);
        return JSONUtils.parse(json);
    }

    @Override
    public Table getTable(String tableName) throws SQLException {
        try (Connection conn = SQLManager.getConnectionSafe(url, null, null)) {
            try (Statement stmt = conn.createStatement()) {
                stmt.execute(SQL_SET_JSON_OUTPUT);
            }
            return getTable(conn, tableName);
        }
    }

    private Table getTable(Connection conn, String name) throws SQLException {
        Map tableData = executeAndParseJsonResultSet(conn, SQL_DESCRIBE_TABLE, name);
        List columns = readColumns((List>) tableData.get("columns"));
        Map tableInfo = (Map) tableData.get("tableInfo");
        List partitionKeys = readPartitionKeys(tableInfo);
        columns = columns.subList(0, columns.size() - partitionKeys.size()); // remove partition keys from the end
        List partitions = readPartitions(conn, name, partitionKeys);
        StorableMetadata storableData = readStorableMetadata(tableInfo);
        return new JdbcTable(name, storableData, columns, partitions, partitionKeys);
    }
    
    private String getHiveVersionMajor(Connection conn) {
        try {
            String versionStr = executeQuery(conn, SQL_GET_VERSION);
            return versionStr.substring(0, 1);
        } catch (SQLException e) {
            return "1"; // older hive versions do not support version() function
        }
    }

    private StorableMetadata readStorableMetadata(Map tableInfo) {
        StorableMetadata res = new StorableMetadata();
        Map sd = (Map) tableInfo.get("sd");
        res.location = (String) sd.get("location");
        res.inputFormat = (String) sd.get("inputFormat");
        Map serDeInfo = (Map) sd.get("serdeInfo");
        res.serializationLib = (String) serDeInfo.get("serializationLib");
        res.serDeParams = (Map) serDeInfo.get("parameters");
        return res;
    }

    private List readPartitions(
        Connection conn,
        String tableName,
        List partitionKeys
    ) throws SQLException {
        if (partitionKeys.isEmpty()) {
            return emptyList();
        }
        Map partitionsResult = executeAndParseJsonResultSet(conn, SQL_SHOW_PARTS, tableName);
        String hiveVersion = getHiveVersionMajor(conn);
        List partitions = new ArrayList<>();
        List> partitionsData = (List>) partitionsResult.get("partitions");
        for (Map partition : partitionsData) {
            List values = parsePartitionValues(partition, hiveVersion);
            StorableMetadata data = readPartitionMetadata(conn, tableName, partitionKeys, values);
            partitions.add(new JdbcPartition(data, values));
        }
        return partitions;
    }

    private StorableMetadata readPartitionMetadata(
        Connection conn,
        String tableName,
        List partitionKeys,
        List values
    ) throws SQLException {
        String query = getDescribePartitionQuery(partitionKeys, values);
        Map data = executeAndParseJsonResultSet(conn, query, tableName);
        Map info = (Map) data.get("partitionInfo");
        return readStorableMetadata(info);
    }
    
    private String getDescribePartitionQuery(List partitionKeys, List values) {
        StringBuilder sb = new StringBuilder();
        sb.append(SQL_DESCRIBE_PARTITION).append("(");
        for (int i = 0; i < partitionKeys.size(); i++) {
            if (i > 0) sb.append(", ");
            String escapedValue = values.get(i).replace("\"", "\\\"");
            sb.append(partitionKeys.get(i).getName()).append("=\"").append(escapedValue).append("\"");
        }
        sb.append(")");
        return sb.toString();
    }
    
    private String unescapePartitionValue(String value, String hiveVersion) {
        if (!"1".equals(hiveVersion)) {
            // hive 2+ does the un-escaping automatically
            return value;
        } else {
            return value.replace("\\\"", "\"");
        }
    }
    
    private List parsePartitionValues(Map partition, String hiveVersion) {
        List values = new ArrayList<>();
        List> valuesData = (List>) partition.get("values");
        for (Map valueRecord : valuesData) {
            String value = unescapePartitionValue((String) valueRecord.get("columnValue"), hiveVersion);
            values.add(value);
        }
        return values;
    }

    private List readPartitionKeys(Map tableInfo) {
        if (!tableInfo.containsKey("partitionKeys")) {
            return emptyList();
        } else {
            List> partitionColumns = (List>) tableInfo.get("partitionKeys");
            return readColumns(partitionColumns);
        }
    }

    private List readColumns(List> columnDataList) {
        List columns = new ArrayList<>();
        for (Map column : columnDataList) {
            columns.add(new JdbcColumn((String) column.get("name"), (String) column.get("type")));
        }
        return columns;
    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy