com.facebook.presto.iceberg.HiveTableOperations Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of presto-iceberg Show documentation
Show all versions of presto-iceberg Show documentation
Presto - Iceberg Connector
The newest version!
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.iceberg;
import com.facebook.airlift.log.Logger;
import com.facebook.presto.hive.HdfsContext;
import com.facebook.presto.hive.HdfsEnvironment;
import com.facebook.presto.hive.metastore.ExtendedHiveMetastore;
import com.facebook.presto.hive.metastore.HivePrivilegeInfo;
import com.facebook.presto.hive.metastore.MetastoreContext;
import com.facebook.presto.hive.metastore.PartitionStatistics;
import com.facebook.presto.hive.metastore.PrestoTableType;
import com.facebook.presto.hive.metastore.PrincipalPrivileges;
import com.facebook.presto.hive.metastore.StorageFormat;
import com.facebook.presto.hive.metastore.Table;
import com.facebook.presto.spi.PrestoException;
import com.facebook.presto.spi.SchemaTableName;
import com.facebook.presto.spi.TableNotFoundException;
import com.facebook.presto.spi.security.PrestoPrincipal;
import com.google.common.cache.CacheBuilder;
import com.google.common.cache.CacheLoader;
import com.google.common.cache.LoadingCache;
import com.google.common.collect.ImmutableMultimap;
import com.google.common.collect.Sets;
import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.iceberg.LocationProviders;
import org.apache.iceberg.TableMetadata;
import org.apache.iceberg.TableMetadata.MetadataLogEntry;
import org.apache.iceberg.TableMetadataParser;
import org.apache.iceberg.TableOperations;
import org.apache.iceberg.TableProperties;
import org.apache.iceberg.exceptions.CommitFailedException;
import org.apache.iceberg.io.FileIO;
import org.apache.iceberg.io.LocationProvider;
import org.apache.iceberg.io.OutputFile;
import org.apache.iceberg.util.Tasks;
import javax.annotation.Nullable;
import javax.annotation.concurrent.NotThreadSafe;
import java.io.FileNotFoundException;
import java.util.HashMap;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicReference;
import java.util.concurrent.locks.ReentrantLock;
import static com.facebook.presto.hive.metastore.HivePrivilegeInfo.HivePrivilege.DELETE;
import static com.facebook.presto.hive.metastore.HivePrivilegeInfo.HivePrivilege.INSERT;
import static com.facebook.presto.hive.metastore.HivePrivilegeInfo.HivePrivilege.SELECT;
import static com.facebook.presto.hive.metastore.HivePrivilegeInfo.HivePrivilege.UPDATE;
import static com.facebook.presto.hive.metastore.MetastoreUtil.TABLE_COMMENT;
import static com.facebook.presto.hive.metastore.MetastoreUtil.isPrestoView;
import static com.facebook.presto.iceberg.IcebergErrorCode.ICEBERG_INVALID_METADATA;
import static com.facebook.presto.iceberg.IcebergUtil.isIcebergTable;
import static com.facebook.presto.iceberg.IcebergUtil.toHiveColumns;
import static com.facebook.presto.spi.security.PrincipalType.USER;
import static com.google.common.base.Preconditions.checkState;
import static java.lang.Integer.parseInt;
import static java.lang.String.format;
import static java.util.Collections.emptyList;
import static java.util.Objects.requireNonNull;
import static java.util.UUID.randomUUID;
import static org.apache.iceberg.BaseMetastoreTableOperations.ICEBERG_TABLE_TYPE_VALUE;
import static org.apache.iceberg.BaseMetastoreTableOperations.TABLE_TYPE_PROP;
import static org.apache.iceberg.TableMetadataParser.getFileExtension;
import static org.apache.iceberg.TableProperties.METADATA_COMPRESSION;
import static org.apache.iceberg.TableProperties.METADATA_COMPRESSION_DEFAULT;
import static org.apache.iceberg.TableProperties.WRITE_METADATA_LOCATION;
@NotThreadSafe
public class HiveTableOperations
implements TableOperations
{
private static final Logger log = Logger.get(HiveTableOperations.class);
public static final String METADATA_LOCATION = "metadata_location";
public static final String PREVIOUS_METADATA_LOCATION = "previous_metadata_location";
private static final String METADATA_FOLDER_NAME = "metadata";
public static final StorageFormat STORAGE_FORMAT = StorageFormat.create(
LazySimpleSerDe.class.getName(),
FileInputFormat.class.getName(),
FileOutputFormat.class.getName());
private final ExtendedHiveMetastore metastore;
private final MetastoreContext metastoreContext;
private final String database;
private final String tableName;
private final Optional owner;
private final Optional location;
private final FileIO fileIO;
private final IcebergHiveTableOperationsConfig config;
private TableMetadata currentMetadata;
private String currentMetadataLocation;
private boolean shouldRefresh = true;
private int version = -1;
private static LoadingCache commitLockCache;
public HiveTableOperations(
ExtendedHiveMetastore metastore,
MetastoreContext metastoreContext,
HdfsEnvironment hdfsEnvironment,
HdfsContext hdfsContext,
IcebergHiveTableOperationsConfig config,
String database,
String table)
{
this(new HdfsFileIO(hdfsEnvironment, hdfsContext),
metastore,
metastoreContext,
config,
database,
table,
Optional.empty(),
Optional.empty());
}
public HiveTableOperations(
ExtendedHiveMetastore metastore,
MetastoreContext metastoreContext,
HdfsEnvironment hdfsEnvironment,
HdfsContext hdfsContext,
IcebergHiveTableOperationsConfig config,
String database,
String table,
String owner,
String location)
{
this(new HdfsFileIO(hdfsEnvironment, hdfsContext),
metastore,
metastoreContext,
config,
database,
table,
Optional.of(requireNonNull(owner, "owner is null")),
Optional.of(requireNonNull(location, "location is null")));
}
private HiveTableOperations(
FileIO fileIO,
ExtendedHiveMetastore metastore,
MetastoreContext metastoreContext,
IcebergHiveTableOperationsConfig config,
String database,
String table,
Optional owner,
Optional location)
{
this.fileIO = requireNonNull(fileIO, "fileIO is null");
this.metastore = requireNonNull(metastore, "metastore is null");
this.metastoreContext = requireNonNull(metastoreContext, "metastore context is null");
this.database = requireNonNull(database, "database is null");
this.tableName = requireNonNull(table, "table is null");
this.owner = requireNonNull(owner, "owner is null");
this.location = requireNonNull(location, "location is null");
this.config = requireNonNull(config, "config is null");
//TODO: duration from config
initTableLevelLockCache(TimeUnit.MINUTES.toMillis(10));
}
private static synchronized void initTableLevelLockCache(long evictionTimeout)
{
if (commitLockCache == null) {
commitLockCache = CacheBuilder.newBuilder()
.expireAfterAccess(evictionTimeout, TimeUnit.MILLISECONDS)
.build(
new CacheLoader() {
@Override
public ReentrantLock load(String fullName)
{
return new ReentrantLock();
}
});
}
}
@Override
public TableMetadata current()
{
if (shouldRefresh) {
return refresh();
}
return currentMetadata;
}
@Override
public TableMetadata refresh()
{
if (location.isPresent()) {
refreshFromMetadataLocation(null);
return currentMetadata;
}
Table table = getTable();
if (!isIcebergTable(table)) {
throw new UnknownTableTypeException(getSchemaTableName());
}
if (isPrestoView(table)) {
throw new TableNotFoundException(new SchemaTableName(database, tableName));
}
String metadataLocation = table.getParameters().get(METADATA_LOCATION);
if (metadataLocation == null) {
throw new PrestoException(ICEBERG_INVALID_METADATA, format("Table is missing [%s] property: %s", METADATA_LOCATION, getSchemaTableName()));
}
refreshFromMetadataLocation(metadataLocation);
return currentMetadata;
}
@Override
public void commit(@Nullable TableMetadata base, TableMetadata metadata)
{
requireNonNull(metadata, "metadata is null");
// if the metadata is already out of date, reject it
if (!Objects.equals(base, current())) {
throw new CommitFailedException("Cannot commit: stale table metadata for %s", getSchemaTableName());
}
// if the metadata is not changed, return early
if (Objects.equals(base, metadata)) {
return;
}
String newMetadataLocation = writeNewMetadata(metadata, version + 1);
Table table;
// getting a process-level lock per table to avoid concurrent commit attempts to the same table from the same
// JVM process, which would result in unnecessary and costly HMS lock acquisition requests
Optional lockId = Optional.empty();
ReentrantLock tableLevelMutex = commitLockCache.getUnchecked(database + "." + tableName);
tableLevelMutex.lock();
try {
try {
lockId = metastore.lock(metastoreContext, database, tableName);
if (base == null) {
String tableComment = metadata.properties().get(TABLE_COMMENT);
Map parameters = new HashMap<>();
parameters.put("EXTERNAL", "TRUE");
parameters.put(TABLE_TYPE_PROP, ICEBERG_TABLE_TYPE_VALUE);
parameters.put(METADATA_LOCATION, newMetadataLocation);
if (tableComment != null) {
parameters.put(TABLE_COMMENT, tableComment);
}
Table.Builder builder = Table.builder()
.setDatabaseName(database)
.setTableName(tableName)
.setOwner(owner.orElseThrow(() -> new IllegalStateException("Owner not set")))
.setTableType(PrestoTableType.EXTERNAL_TABLE)
.setDataColumns(toHiveColumns(metadata.schema().columns()))
.withStorage(storage -> storage.setLocation(metadata.location()))
.withStorage(storage -> storage.setStorageFormat(STORAGE_FORMAT))
.setParameters(parameters);
table = builder.build();
}
else {
Table currentTable = getTable();
checkState(currentMetadataLocation != null, "No current metadata location for existing table");
String metadataLocation = currentTable.getParameters().get(METADATA_LOCATION);
if (!currentMetadataLocation.equals(metadataLocation)) {
throw new CommitFailedException("Metadata location [%s] is not same as table metadata location [%s] for %s", currentMetadataLocation, metadataLocation, getSchemaTableName());
}
table = Table.builder(currentTable)
.setDataColumns(toHiveColumns(metadata.schema().columns()))
.withStorage(storage -> storage.setLocation(metadata.location()))
.setParameter(METADATA_LOCATION, newMetadataLocation)
.setParameter(PREVIOUS_METADATA_LOCATION, currentMetadataLocation)
.build();
}
}
catch (RuntimeException e) {
try {
io().deleteFile(newMetadataLocation);
}
catch (RuntimeException exception) {
e.addSuppressed(exception);
}
throw e;
}
PrestoPrincipal owner = new PrestoPrincipal(USER, table.getOwner());
PrincipalPrivileges privileges = new PrincipalPrivileges(
ImmutableMultimap.builder()
.put(table.getOwner(), new HivePrivilegeInfo(SELECT, true, owner, owner))
.put(table.getOwner(), new HivePrivilegeInfo(INSERT, true, owner, owner))
.put(table.getOwner(), new HivePrivilegeInfo(UPDATE, true, owner, owner))
.put(table.getOwner(), new HivePrivilegeInfo(DELETE, true, owner, owner))
.build(),
ImmutableMultimap.of());
if (base == null) {
metastore.createTable(metastoreContext, table, privileges, emptyList());
}
else {
PartitionStatistics tableStats = metastore.getTableStatistics(metastoreContext, database, tableName);
metastore.replaceTable(metastoreContext, database, tableName, table, privileges);
// attempt to put back previous table statistics
metastore.updateTableStatistics(metastoreContext, database, tableName, oldStats -> tableStats);
}
deleteRemovedMetadataFiles(base, metadata);
}
finally {
shouldRefresh = true;
try {
lockId.ifPresent(id -> metastore.unlock(metastoreContext, id));
}
catch (Exception e) {
log.error(e, "Failed to unlock: %s", lockId.orElse(null));
}
finally {
tableLevelMutex.unlock();
}
}
}
@Override
public FileIO io()
{
return fileIO;
}
@Override
public String metadataFileLocation(String filename)
{
TableMetadata metadata = current();
String location;
if (metadata != null) {
String writeLocation = metadata.properties().get(WRITE_METADATA_LOCATION);
if (writeLocation != null) {
return format("%s/%s", writeLocation, filename);
}
location = metadata.location();
}
else {
location = this.location.orElseThrow(() -> new IllegalStateException("Location not set"));
}
return format("%s/%s/%s", location, METADATA_FOLDER_NAME, filename);
}
@Override
public LocationProvider locationProvider()
{
TableMetadata metadata = current();
return LocationProviders.locationsFor(metadata.location(), metadata.properties());
}
private Table getTable()
{
return metastore.getTable(metastoreContext, database, tableName)
.orElseThrow(() -> new TableNotFoundException(getSchemaTableName()));
}
private SchemaTableName getSchemaTableName()
{
return new SchemaTableName(database, tableName);
}
private String writeNewMetadata(TableMetadata metadata, int newVersion)
{
String newTableMetadataFilePath = newTableMetadataFilePath(metadata, newVersion);
OutputFile newMetadataLocation = fileIO.newOutputFile(newTableMetadataFilePath);
// write the new metadata
TableMetadataParser.write(metadata, newMetadataLocation);
return newTableMetadataFilePath;
}
private void refreshFromMetadataLocation(String newLocation)
{
// use null-safe equality check because new tables have a null metadata location
if (Objects.equals(currentMetadataLocation, newLocation)) {
shouldRefresh = false;
return;
}
AtomicReference newMetadata = new AtomicReference<>();
try {
Tasks.foreach(newLocation)
.retry(config.getTableRefreshRetries())
.shouldRetryTest(this::shouldRetry)
.exponentialBackoff(
config.getTableRefreshBackoffMinSleepTime().toMillis(),
config.getTableRefreshBackoffMaxSleepTime().toMillis(),
config.getTableRefreshMaxRetryTime().toMillis(),
config.getTableRefreshBackoffScaleFactor())
.run(metadataLocation -> newMetadata.set(
TableMetadataParser.read(fileIO, io().newInputFile(metadataLocation))));
}
catch (RuntimeException e) {
throw new TableNotFoundException(getSchemaTableName(), "Table metadata is missing", e);
}
if (newMetadata.get() == null) {
throw new TableNotFoundException(getSchemaTableName(), "failed to retrieve table metadata from " + newLocation);
}
String newUUID = newMetadata.get().uuid();
if (currentMetadata != null) {
checkState(newUUID == null || newUUID.equals(currentMetadata.uuid()),
"Table UUID does not match: current=%s != refreshed=%s", currentMetadata.uuid(), newUUID);
}
currentMetadata = newMetadata.get();
currentMetadataLocation = newLocation;
version = parseVersion(newLocation);
shouldRefresh = false;
}
private boolean shouldRetry(Exception exception)
{
return !(exception.getCause() instanceof FileNotFoundException);
}
private static String newTableMetadataFilePath(TableMetadata meta, int newVersion)
{
String codec = meta.property(METADATA_COMPRESSION, METADATA_COMPRESSION_DEFAULT);
return metadataFileLocation(meta, format("%05d-%s%s", newVersion, randomUUID(), getFileExtension(codec)));
}
private static String metadataFileLocation(TableMetadata metadata, String filename)
{
String location = metadata.properties().get(WRITE_METADATA_LOCATION);
if (location != null) {
return format("%s/%s", location, filename);
}
return format("%s/%s/%s", metadata.location(), METADATA_FOLDER_NAME, filename);
}
private static int parseVersion(String metadataLocation)
{
int versionStart = metadataLocation.lastIndexOf('/') + 1; // if '/' isn't found, this will be 0
int versionEnd = metadataLocation.indexOf('-', versionStart);
try {
return parseInt(metadataLocation.substring(versionStart, versionEnd));
}
catch (NumberFormatException | IndexOutOfBoundsException e) {
log.warn(e, "Unable to parse version from metadata location: %s", metadataLocation);
return -1;
}
}
/**
* Deletes metadata files that are no longer needed, except for the most recent ones
* specified by `TableProperties.METADATA_PREVIOUS_VERSIONS_MAX`.
*
* @param base the base TableMetadata
* @param metadata the current TableMetadata
*/
private void deleteRemovedMetadataFiles(TableMetadata base, TableMetadata metadata)
{
if (base == null) {
return;
}
boolean deleteAfterCommit =
metadata.propertyAsBoolean(
TableProperties.METADATA_DELETE_AFTER_COMMIT_ENABLED,
TableProperties.METADATA_DELETE_AFTER_COMMIT_ENABLED_DEFAULT);
if (deleteAfterCommit) {
Set metadataFilesToRemove =
Sets.newHashSet(base.previousFiles());
// TableMetadata#addPreviousFile builds up the metadata log and uses
// TableProperties.METADATA_PREVIOUS_VERSIONS_MAX to determine how many files should stay in
// the log, thus we don't include metadata.previousFiles() for deletion - everything else can
// be removed
metadataFilesToRemove.removeAll(metadata.previousFiles());
Tasks.foreach(metadataFilesToRemove)
.noRetry()
.suppressFailureWhenFinished()
.onFailure((previousMetadataFile, exc) ->
log.warn("Delete failed for previous metadata file: %s", previousMetadataFile, exc))
.run(previousMetadataFile -> io().deleteFile(previousMetadataFile.file()));
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy