All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.paimon.hive.PaimonMetaHook Maven / Gradle / Ivy

There is a newer version: 0.9.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.paimon.hive;

import org.apache.paimon.CoreOptions;
import org.apache.paimon.catalog.AbstractCatalog;
import org.apache.paimon.catalog.CatalogContext;
import org.apache.paimon.catalog.Identifier;
import org.apache.paimon.fs.FileIO;
import org.apache.paimon.fs.Path;
import org.apache.paimon.hive.mapred.PaimonInputFormat;
import org.apache.paimon.hive.mapred.PaimonOutputFormat;
import org.apache.paimon.hive.utils.HiveUtils;
import org.apache.paimon.options.Options;
import org.apache.paimon.schema.Schema;
import org.apache.paimon.schema.SchemaManager;
import org.apache.paimon.schema.TableSchema;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.HiveMetaHook;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.metastore.api.Table;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.util.HashSet;
import java.util.List;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;

import static org.apache.hadoop.hive.metastore.Warehouse.getDnsPath;
import static org.apache.paimon.CoreOptions.METASTORE_PARTITIONED_TABLE;
import static org.apache.paimon.catalog.Catalog.COMMENT_PROP;
import static org.apache.paimon.hive.HiveTypeUtils.toPaimonType;

/**
 * {@link HiveMetaHook} for paimon. Currently this class is only used to set input and output
 * formats.
 */
public class PaimonMetaHook implements HiveMetaHook {

    private static final Logger LOG = LoggerFactory.getLogger(PaimonMetaHook.class);

    private final Configuration conf;

    // paimon table existed before create hive table
    private final Set existingPaimonTable = new HashSet<>();

    public PaimonMetaHook(Configuration conf) {
        this.conf = conf;
    }

    @Override
    public void preCreateTable(Table table) throws MetaException {

        // hive ql parse cannot recognize input near '$' in table name, no need to add paimon system
        // table verification.

        table.getSd().setInputFormat(PaimonInputFormat.class.getCanonicalName());
        table.getSd().setOutputFormat(PaimonOutputFormat.class.getCanonicalName());
        table.setDbName(table.getDbName().toLowerCase());
        table.setTableName(table.getTableName().toLowerCase());
        String location = LocationKeyExtractor.getPaimonLocation(conf, table);
        Identifier identifier = Identifier.create(table.getDbName(), table.getTableName());
        if (location == null) {
            String warehouse = conf.get(HiveConf.ConfVars.METASTOREWAREHOUSE.varname);
            org.apache.hadoop.fs.Path hadoopPath =
                    getDnsPath(new org.apache.hadoop.fs.Path(warehouse), conf);
            warehouse = hadoopPath.toUri().toString();
            location = AbstractCatalog.newTableLocation(warehouse, identifier).toUri().toString();
            table.getSd().setLocation(location);
        }

        Path path = new Path(location);
        CatalogContext context = catalogContext(table, location);
        FileIO fileIO;
        try {
            fileIO = FileIO.get(path, context);
        } catch (IOException e) {
            throw new RuntimeException(e);
        }

        SchemaManager schemaManager = new SchemaManager(fileIO, path);
        Optional tableSchema = schemaManager.latest();
        if (tableSchema.isPresent()) {
            existingPaimonTable.add(identifier);
            // paimon table already exists
            return;
        }
        // create paimon table
        List cols = table.getSd().getCols();
        Schema.Builder schemaBuilder =
                Schema.newBuilder().comment(table.getParameters().get(COMMENT_PROP));
        cols.iterator()
                .forEachRemaining(
                        fieldSchema ->
                                schemaBuilder.column(
                                        fieldSchema.getName().toLowerCase(),
                                        toPaimonType(fieldSchema.getType()),
                                        fieldSchema.getComment()));
        // partition columns
        if (table.getPartitionKeysSize() > 0) {
            // set metastore.partitioned-table = true
            context.options().set(METASTORE_PARTITIONED_TABLE, true);

            table.getPartitionKeys()
                    .iterator()
                    .forEachRemaining(
                            fieldSchema ->
                                    schemaBuilder.column(
                                            fieldSchema.getName().toLowerCase(),
                                            toPaimonType(fieldSchema.getType()),
                                            fieldSchema.getComment()));

            List partitionKeys =
                    table.getPartitionKeys().stream()
                            .map(FieldSchema::getName)
                            .map(String::toLowerCase)
                            .collect(Collectors.toList());
            schemaBuilder.partitionKeys(partitionKeys);
        }
        schemaBuilder.options(context.options().toMap());

        try {
            schemaManager.createTable(schemaBuilder.build());
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }

    @Override
    public void rollbackCreateTable(Table table) throws MetaException {
        Identifier identifier = Identifier.create(table.getDbName(), table.getTableName());
        if (existingPaimonTable.contains(identifier)) {
            return;
        }

        // we have created a paimon table, so we delete it to roll back;
        String location = LocationKeyExtractor.getPaimonLocation(conf, table);

        Path path = new Path(location);
        CatalogContext context = catalogContext(table, location);
        try {
            FileIO fileIO = FileIO.get(path, context);
            if (fileIO.exists(path)) {
                fileIO.deleteDirectoryQuietly(path);
            }
        } catch (IOException e) {
            LOG.error("Delete directory [{}] fail for the paimon table.", path, e);
        }
    }

    @Override
    public void commitCreateTable(Table table) throws MetaException {}

    @Override
    public void preDropTable(Table table) throws MetaException {}

    @Override
    public void rollbackDropTable(Table table) throws MetaException {}

    @Override
    public void commitDropTable(Table table, boolean b) throws MetaException {}

    private CatalogContext catalogContext(Table table, String location) {
        Options options = HiveUtils.extractCatalogConfig(conf);
        options.set(CoreOptions.PATH, location);
        table.getParameters().forEach(options::set);
        return CatalogContext.create(options, conf);
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy