org.apache.hudi.table.HoodieTable Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hudi.table;
import org.apache.avro.specific.SpecificRecordBase;
import org.apache.hudi.avro.HoodieAvroUtils;
import org.apache.hudi.avro.model.HoodieCleanMetadata;
import org.apache.hudi.avro.model.HoodieCleanerPlan;
import org.apache.hudi.avro.model.HoodieClusteringPlan;
import org.apache.hudi.avro.model.HoodieCompactionPlan;
import org.apache.hudi.avro.model.HoodieRestoreMetadata;
import org.apache.hudi.avro.model.HoodieRollbackMetadata;
import org.apache.hudi.avro.model.HoodieRollbackPlan;
import org.apache.hudi.avro.model.HoodieSavepointMetadata;
import org.apache.hudi.common.config.HoodieMetadataConfig;
import org.apache.hudi.common.config.SerializableConfiguration;
import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.engine.HoodieLocalEngineContext;
import org.apache.hudi.common.engine.TaskContextSupplier;
import org.apache.hudi.common.fs.ConsistencyGuard;
import org.apache.hudi.common.fs.ConsistencyGuard.FileVisibility;
import org.apache.hudi.common.fs.ConsistencyGuardConfig;
import org.apache.hudi.common.fs.FailSafeConsistencyGuard;
import org.apache.hudi.common.fs.OptimisticConsistencyGuard;
import org.apache.hudi.common.model.HoodieFileFormat;
import org.apache.hudi.common.model.HoodieKey;
import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.model.HoodieWriteStat;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.TableSchemaResolver;
import org.apache.hudi.common.table.log.block.HoodieLogBlock.HoodieLogBlockType;
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
import org.apache.hudi.common.table.view.FileSystemViewManager;
import org.apache.hudi.common.table.view.FileSystemViewStorageConfig;
import org.apache.hudi.common.table.view.HoodieTableFileSystemView;
import org.apache.hudi.common.table.view.SyncableFileSystemView;
import org.apache.hudi.common.table.view.TableFileSystemView;
import org.apache.hudi.common.table.view.TableFileSystemView.BaseFileOnlyView;
import org.apache.hudi.common.table.view.TableFileSystemView.SliceView;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.exception.HoodieException;
import org.apache.hudi.exception.HoodieIOException;
import org.apache.hudi.exception.HoodieInsertException;
import org.apache.hudi.exception.HoodieUpsertException;
import org.apache.hudi.index.HoodieIndex;
import org.apache.hudi.metadata.HoodieTableMetadata;
import org.apache.hudi.metadata.HoodieTableMetadataWriter;
import org.apache.hudi.table.action.HoodieWriteMetadata;
import org.apache.hudi.table.action.bootstrap.HoodieBootstrapWriteMetadata;
import org.apache.hudi.table.marker.WriteMarkers;
import org.apache.hudi.table.marker.WriteMarkersFactory;
import org.apache.avro.Schema;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import javax.annotation.Nonnull;
import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.TimeoutException;
import java.util.stream.Collectors;
import java.util.stream.Stream;
/**
* Abstract implementation of a HoodieTable.
*
* @param Sub type of HoodieRecordPayload
* @param Type of inputs
* @param Type of keys
* @param Type of outputs
*/
public abstract class HoodieTable implements Serializable {
private static final Logger LOG = LogManager.getLogger(HoodieTable.class);
protected final HoodieWriteConfig config;
protected final HoodieTableMetaClient metaClient;
protected final HoodieIndex index;
private SerializableConfiguration hadoopConfiguration;
protected final TaskContextSupplier taskContextSupplier;
private final HoodieTableMetadata metadata;
private transient FileSystemViewManager viewManager;
protected final transient HoodieEngineContext context;
protected HoodieTable(HoodieWriteConfig config, HoodieEngineContext context, HoodieTableMetaClient metaClient) {
this.config = config;
this.hadoopConfiguration = context.getHadoopConf();
this.context = context;
HoodieMetadataConfig metadataConfig = HoodieMetadataConfig.newBuilder().fromProperties(config.getMetadataConfig().getProps())
.build();
this.metadata = HoodieTableMetadata.create(context, metadataConfig, config.getBasePath(),
FileSystemViewStorageConfig.SPILLABLE_DIR.defaultValue());
this.viewManager = FileSystemViewManager.createViewManager(context, config.getMetadataConfig(), config.getViewStorageConfig(), config.getCommonConfig(), () -> metadata);
this.metaClient = metaClient;
this.index = getIndex(config, context);
this.taskContextSupplier = context.getTaskContextSupplier();
}
protected abstract HoodieIndex getIndex(HoodieWriteConfig config, HoodieEngineContext context);
private synchronized FileSystemViewManager getViewManager() {
if (null == viewManager) {
viewManager = FileSystemViewManager.createViewManager(getContext(), config.getMetadataConfig(), config.getViewStorageConfig(), config.getCommonConfig(), () -> metadata);
}
return viewManager;
}
/**
* Upsert a batch of new records into Hoodie table at the supplied instantTime.
* @param context HoodieEngineContext
* @param instantTime Instant Time for the action
* @param records hoodieRecords to upsert
* @return HoodieWriteMetadata
*/
public abstract HoodieWriteMetadata upsert(HoodieEngineContext context, String instantTime,
I records);
/**
* Insert a batch of new records into Hoodie table at the supplied instantTime.
* @param context HoodieEngineContext
* @param instantTime Instant Time for the action
* @param records hoodieRecords to upsert
* @return HoodieWriteMetadata
*/
public abstract HoodieWriteMetadata insert(HoodieEngineContext context, String instantTime,
I records);
/**
* Bulk Insert a batch of new records into Hoodie table at the supplied instantTime.
* @param context HoodieEngineContext
* @param instantTime Instant Time for the action
* @param records hoodieRecords to upsert
* @param bulkInsertPartitioner User Defined Partitioner
* @return HoodieWriteMetadata
*/
public abstract HoodieWriteMetadata bulkInsert(HoodieEngineContext context, String instantTime,
I records, Option> bulkInsertPartitioner);
/**
* Deletes a list of {@link HoodieKey}s from the Hoodie table, at the supplied instantTime {@link HoodieKey}s will be
* de-duped and non existent keys will be removed before deleting.
*
* @param context HoodieEngineContext
* @param instantTime Instant Time for the action
* @param keys {@link List} of {@link HoodieKey}s to be deleted
* @return HoodieWriteMetadata
*/
public abstract HoodieWriteMetadata delete(HoodieEngineContext context, String instantTime, K keys);
/**
* Deletes all data of partitions.
* @param context HoodieEngineContext
* @param instantTime Instant Time for the action
* @param partitions {@link List} of partition to be deleted
* @return HoodieWriteMetadata
*/
public abstract HoodieWriteMetadata deletePartitions(HoodieEngineContext context, String instantTime, List partitions);
/**
* Upserts the given prepared records into the Hoodie table, at the supplied instantTime.
*
* This implementation requires that the input records are already tagged, and de-duped if needed.
* @param context HoodieEngineContext
* @param instantTime Instant Time for the action
* @param preppedRecords hoodieRecords to upsert
* @return HoodieWriteMetadata
*/
public abstract HoodieWriteMetadata upsertPrepped(HoodieEngineContext context, String instantTime,
I preppedRecords);
/**
* Inserts the given prepared records into the Hoodie table, at the supplied instantTime.
*
* This implementation requires that the input records are already tagged, and de-duped if needed.
* @param context HoodieEngineContext
* @param instantTime Instant Time for the action
* @param preppedRecords hoodieRecords to upsert
* @return HoodieWriteMetadata
*/
public abstract HoodieWriteMetadata insertPrepped(HoodieEngineContext context, String instantTime,
I preppedRecords);
/**
* Bulk Insert the given prepared records into the Hoodie table, at the supplied instantTime.
*
* This implementation requires that the input records are already tagged, and de-duped if needed.
* @param context HoodieEngineContext
* @param instantTime Instant Time for the action
* @param preppedRecords hoodieRecords to upsert
* @param bulkInsertPartitioner User Defined Partitioner
* @return HoodieWriteMetadata
*/
public abstract HoodieWriteMetadata bulkInsertPrepped(HoodieEngineContext context, String instantTime,
I preppedRecords, Option> bulkInsertPartitioner);
/**
* Replaces all the existing records and inserts the specified new records into Hoodie table at the supplied instantTime,
* for the partition paths contained in input records.
*
* @param context HoodieEngineContext
* @param instantTime Instant time for the replace action
* @param records input records
* @return HoodieWriteMetadata
*/
public abstract HoodieWriteMetadata insertOverwrite(HoodieEngineContext context, String instantTime, I records);
/**
* Delete all the existing records of the Hoodie table and inserts the specified new records into Hoodie table at the supplied instantTime,
* for the partition paths contained in input records.
*
* @param context HoodieEngineContext
* @param instantTime Instant time for the replace action
* @param records input records
* @return HoodieWriteMetadata
*/
public abstract HoodieWriteMetadata insertOverwriteTable(HoodieEngineContext context, String instantTime, I records);
/**
* Updates Metadata Indexes (like Z-Index)
* TODO rebase onto metadata table (post RFC-27)
*
* @param context instance of {@link HoodieEngineContext}
* @param instantTime instant of the carried operation triggering the update
*/
public abstract void updateMetadataIndexes(
@Nonnull HoodieEngineContext context,
@Nonnull List stats,
@Nonnull String instantTime
) throws Exception;
public HoodieWriteConfig getConfig() {
return config;
}
public HoodieTableMetaClient getMetaClient() {
return metaClient;
}
public Configuration getHadoopConf() {
return metaClient.getHadoopConf();
}
/**
* Get the view of the file system for this table.
*/
public TableFileSystemView getFileSystemView() {
return new HoodieTableFileSystemView(metaClient, getCompletedCommitsTimeline());
}
/**
* Get the base file only view of the file system for this table.
*/
public BaseFileOnlyView getBaseFileOnlyView() {
return getViewManager().getFileSystemView(metaClient);
}
/**
* Get the full view of the file system for this table.
*/
public SliceView getSliceView() {
return getViewManager().getFileSystemView(metaClient);
}
/**
* Get complete view of the file system for this table with ability to force sync.
*/
public SyncableFileSystemView getHoodieView() {
return getViewManager().getFileSystemView(metaClient);
}
/**
* Get only the completed (no-inflights) commit + deltacommit timeline.
*/
public HoodieTimeline getCompletedCommitsTimeline() {
return metaClient.getCommitsTimeline().filterCompletedInstants();
}
/**
* Get only the completed (no-inflights) commit timeline.
*/
public HoodieTimeline getCompletedCommitTimeline() {
return metaClient.getCommitTimeline().filterCompletedInstants();
}
/**
* Get only the inflights (no-completed) commit timeline.
*/
public HoodieTimeline getPendingCommitTimeline() {
return metaClient.getCommitsTimeline().filterPendingExcludingCompaction();
}
/**
* Get only the completed (no-inflights) clean timeline.
*/
public HoodieTimeline getCompletedCleanTimeline() {
return getActiveTimeline().getCleanerTimeline().filterCompletedInstants();
}
/**
* Get clean timeline.
*/
public HoodieTimeline getCleanTimeline() {
return getActiveTimeline().getCleanerTimeline();
}
/**
* Get rollback timeline.
*/
public HoodieTimeline getRollbackTimeline() {
return getActiveTimeline().getRollbackTimeline();
}
/**
* Get only the completed (no-inflights) savepoint timeline.
*/
public HoodieTimeline getCompletedSavepointTimeline() {
return getActiveTimeline().getSavePointTimeline().filterCompletedInstants();
}
/**
* Get the list of savepoints in this table.
*/
public List getSavepoints() {
return getCompletedSavepointTimeline().getInstants().map(HoodieInstant::getTimestamp).collect(Collectors.toList());
}
public HoodieActiveTimeline getActiveTimeline() {
return metaClient.getActiveTimeline();
}
/**
* Return the index.
*/
public HoodieIndex getIndex() {
return index;
}
/**
* Schedule compaction for the instant time.
*
* @param context HoodieEngineContext
* @param instantTime Instant Time for scheduling compaction
* @param extraMetadata additional metadata to write into plan
* @return
*/
public abstract Option scheduleCompaction(HoodieEngineContext context,
String instantTime,
Option