All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.netease.arctic.hive.utils.HivePartitionUtil Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.netease.arctic.hive.utils;

import com.netease.arctic.hive.HMSClient;
import com.netease.arctic.hive.HMSClientPool;
import com.netease.arctic.table.ArcticTable;
import com.netease.arctic.table.TableIdentifier;
import org.apache.hadoop.hive.metastore.PartitionDropOptions;
import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
import org.apache.hadoop.hive.metastore.api.Partition;
import org.apache.hadoop.hive.metastore.api.PrincipalPrivilegeSet;
import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
import org.apache.hadoop.hive.metastore.api.Table;
import com.netease.arctic.shade.org.apache.iceberg.ClientPool;
import com.netease.arctic.shade.org.apache.iceberg.DataFile;
import com.netease.arctic.shade.org.apache.iceberg.DataFiles;
import com.netease.arctic.shade.org.apache.iceberg.PartitionSpec;
import com.netease.arctic.shade.org.apache.iceberg.StructLike;
import com.netease.arctic.shade.org.apache.iceberg.exceptions.NoSuchTableException;
import com.netease.arctic.shade.org.apache.iceberg.relocated.com.google.common.collect.Lists;
import com.netease.arctic.shade.org.apache.iceberg.types.Type;
import com.netease.arctic.shade.org.apache.iceberg.types.Types;
import com.netease.arctic.shade.org.apache.thrift.TException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.lang.reflect.InvocationTargetException;
import java.util.List;
import java.util.stream.Collectors;

public class HivePartitionUtil {

  private static final Logger LOG = LoggerFactory.getLogger(HivePartitionUtil.class);

  public static List partitionValuesAsList(StructLike partitionData, Types.StructType partitionSchema) {
    List fields = partitionSchema.fields();
    List values = Lists.newArrayList();
    for (int i = 0; i < fields.size(); i++) {
      Type type = fields.get(i).type();
      Object value = partitionData.get(i, type.typeId().javaClass());
      values.add(value.toString());
    }
    return values;
  }

  public static StructLike buildPartitionData(List partitionValues, PartitionSpec spec) {
    StringBuilder pathBuilder = new StringBuilder();
    for (int i = 0; i < spec.partitionType().fields().size(); i++) {
      Types.NestedField field = spec.partitionType().fields().get(i);
      pathBuilder.append(field.name()).append("=").append(partitionValues.get(i));
      if (i < spec.partitionType().fields().size() - 1) {
        pathBuilder.append("/");
      }
    }
    return DataFiles.data(spec, pathBuilder.toString());
  }

  public static Partition newPartition(
      Table hiveTable,
      List values,
      String location,
      List dataFiles,
      int createTimeInSeconds) {
    StorageDescriptor tableSd = hiveTable.getSd();
    PrincipalPrivilegeSet privilegeSet = hiveTable.getPrivileges();
    Partition p = new Partition();
    p.setValues(values);
    p.setDbName(hiveTable.getDbName());
    p.setTableName(hiveTable.getTableName());
    p.setCreateTime(createTimeInSeconds);
    p.setLastAccessTime(createTimeInSeconds);
    StorageDescriptor sd = tableSd.deepCopy();
    sd.setLocation(location);
    p.setSd(sd);

    HiveTableUtil.generateTableProperties(createTimeInSeconds, dataFiles)
        .forEach(p::putToParameters);

    if (privilegeSet != null) {
      p.setPrivileges(privilegeSet.deepCopy());
    }
    return p;
  }

  public static Partition getPartition(
      HMSClientPool hmsClient,
      ArcticTable arcticTable,
      List partitionValues) {
    String db = arcticTable.id().getDatabase();
    String tableName = arcticTable.id().getTableName();

    try {
      return hmsClient.run(client -> {
        Partition partition;
        partition = client.getPartition(db, tableName, partitionValues);
        return partition;
      });
    } catch (NoSuchObjectException e) {
      return null;
    } catch (Exception e) {
      throw new RuntimeException(e);
    }
  }

  public static void rewriteHivePartitions(
      Partition partition, String location, List dataFiles,
      int accessTimestamp) {
    partition.getSd().setLocation(location);
    partition.setLastAccessTime(accessTimestamp);
    HiveTableUtil.generateTableProperties(accessTimestamp, dataFiles)
        .forEach(partition::putToParameters);
  }

  /**
   * Gets all partitions object of the Hive table.
   *
   * @param hiveClient      Hive client from ArcticHiveCatalog
   * @param tableIdentifier A table identifier
   * @return A List of Hive partition objects
   */
  public List getHiveAllPartitions(HMSClientPool hiveClient, TableIdentifier tableIdentifier) {
    try {
      return hiveClient.run(client ->
          client.listPartitions(tableIdentifier.getDatabase(), tableIdentifier.getTableName(), Short.MAX_VALUE));
    } catch (NoSuchObjectException e) {
      throw new NoSuchTableException(e, "Hive table does not exist: %s", tableIdentifier.getTableName());
    } catch (TException e) {
      throw new RuntimeException("Failed to get partitions " + tableIdentifier.getTableName(), e);
    } catch (InterruptedException e) {
      Thread.currentThread().interrupt();
      throw new RuntimeException("Interrupted in call to listPartitions", e);
    }
  }

  /**
   * Gets all partition names of the Hive table.
   *
   * @param hiveClient      Hive client from ArcticHiveCatalog
   * @param tableIdentifier A table identifier
   * @return A List of Hive partition names
   */
  public static List getHivePartitionNames(HMSClientPool hiveClient, TableIdentifier tableIdentifier) {
    try {
      return hiveClient.run(client -> client.listPartitionNames(
          tableIdentifier.getDatabase(),
          tableIdentifier.getTableName(),
          Short.MAX_VALUE)).stream().collect(Collectors.toList());
    } catch (NoSuchObjectException e) {
      throw new NoSuchTableException(e, "Hive table does not exist: %s", tableIdentifier.getTableName());
    } catch (TException e) {
      throw new RuntimeException("Failed to get partitions " + tableIdentifier.getTableName(), e);
    } catch (InterruptedException e) {
      Thread.currentThread().interrupt();
      throw new RuntimeException("Interrupted in call to listPartitions", e);
    }
  }

  /**
   * Gets all partitions location of the Hive table.
   *
   * @param hiveClient      Hive client from ArcticHiveCatalog
   * @param tableIdentifier A table identifier
   * @return A List of Hive partition locations
   */
  public static List getHivePartitionLocations(HMSClientPool hiveClient, TableIdentifier tableIdentifier) {
    try {
      return hiveClient.run(client -> client.listPartitions(
              tableIdentifier.getDatabase(),
              tableIdentifier.getTableName(),
              Short.MAX_VALUE))
          .stream()
          .map(partition -> partition.getSd().getLocation())
          .collect(Collectors.toList());
    } catch (NoSuchObjectException e) {
      throw new NoSuchTableException(e, "Hive table does not exist: %s", tableIdentifier.getTableName());
    } catch (TException e) {
      throw new RuntimeException("Failed to get partitions " + tableIdentifier.getTableName(), e);
    } catch (InterruptedException e) {
      Thread.currentThread().interrupt();
      throw new RuntimeException("Interrupted in call to listPartitions", e);
    }
  }

  /**
   * Change the Hive partition location.
   *
   * @param hiveClient      Hive client from ArcticHiveCatalog
   * @param tableIdentifier A table identifier
   * @param partition       A Hive partition name
   * @param newPath         Target partition location
   */
  public static void alterPartition(
      HMSClientPool hiveClient, TableIdentifier tableIdentifier,
      String partition, String newPath) throws IOException {
    try {
      LOG.info("alter table {} hive partition {} to new location {}",
          tableIdentifier, partition, newPath);
      Partition oldPartition = hiveClient.run(
          client -> client.getPartition(
              tableIdentifier.getDatabase(),
              tableIdentifier.getTableName(),
              partition));
      Partition newPartition = new Partition(oldPartition);
      newPartition.getSd().setLocation(newPath);
      hiveClient.run((ClientPool.Action) client -> {
        try {
          client.alterPartition(tableIdentifier.getDatabase(),
              tableIdentifier.getTableName(),
              newPartition, null);
        } catch (ClassNotFoundException | NoSuchMethodException |
                 InvocationTargetException | IllegalAccessException e) {
          throw new RuntimeException(e);
        }
        return null;
      });
    } catch (Exception e) {
      throw new IOException(e);
    }
  }

  public static void createPartitionIfAbsent(
      HMSClientPool hmsClient,
      ArcticTable arcticTable,
      List partitionValues,
      String partitionLocation,
      List dataFiles,
      int accessTimestamp) {
    String db = arcticTable.id().getDatabase();
    String tableName = arcticTable.id().getTableName();

    try {
      hmsClient.run(client -> {
        Partition partition;
        try {
          partition = client.getPartition(db, tableName, partitionValues);
          return partition;
        } catch (NoSuchObjectException noSuchObjectException) {
          Table hiveTable = client.getTable(db, tableName);
          partition = newPartition(hiveTable, partitionValues, partitionLocation,
              dataFiles, accessTimestamp);
          client.addPartition(partition);
          return partition;
        }
      });
    } catch (Exception e) {
      throw new RuntimeException(e);
    }
  }

  public static void dropPartition(
      HMSClientPool hmsClient,
      ArcticTable arcticTable,
      Partition hivePartition) {
    try {
      hmsClient.run(client -> {
        PartitionDropOptions options = PartitionDropOptions.instance()
            .deleteData(false)
            .ifExists(true)
            .purgeData(false)
            .returnResults(false);
        return client.dropPartition(arcticTable.id().getDatabase(),
            arcticTable.id().getTableName(), hivePartition.getValues(), options);
      });
    } catch (TException | InterruptedException e) {
      throw new RuntimeException(e);
    }
  }

  public static void updatePartitionLocation(
      HMSClientPool hmsClient,
      ArcticTable arcticTable,
      Partition hivePartition,
      String newLocation,
      List dataFiles,
      int accessTimestamp) {
    dropPartition(hmsClient, arcticTable, hivePartition);
    createPartitionIfAbsent(hmsClient, arcticTable, hivePartition.getValues(), newLocation, dataFiles, accessTimestamp);
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy