Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hudi.table;
import org.apache.hudi.avro.AvroSchemaUtils;
import org.apache.hudi.avro.HoodieAvroUtils;
import org.apache.hudi.avro.model.HoodieCleanMetadata;
import org.apache.hudi.avro.model.HoodieCleanerPlan;
import org.apache.hudi.avro.model.HoodieClusteringPlan;
import org.apache.hudi.avro.model.HoodieCompactionPlan;
import org.apache.hudi.avro.model.HoodieIndexCommitMetadata;
import org.apache.hudi.avro.model.HoodieIndexPlan;
import org.apache.hudi.avro.model.HoodieRestoreMetadata;
import org.apache.hudi.avro.model.HoodieRestorePlan;
import org.apache.hudi.avro.model.HoodieRollbackMetadata;
import org.apache.hudi.avro.model.HoodieRollbackPlan;
import org.apache.hudi.avro.model.HoodieSavepointMetadata;
import org.apache.hudi.common.HoodiePendingRollbackInfo;
import org.apache.hudi.common.config.HoodieMetadataConfig;
import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.engine.HoodieLocalEngineContext;
import org.apache.hudi.common.engine.TaskContextSupplier;
import org.apache.hudi.common.fs.ConsistencyGuard;
import org.apache.hudi.common.fs.ConsistencyGuard.FileVisibility;
import org.apache.hudi.common.fs.ConsistencyGuardConfig;
import org.apache.hudi.common.fs.FailSafeConsistencyGuard;
import org.apache.hudi.common.fs.OptimisticConsistencyGuard;
import org.apache.hudi.common.model.HoodieFailedWritesCleaningPolicy;
import org.apache.hudi.common.model.HoodieFileFormat;
import org.apache.hudi.common.model.HoodieKey;
import org.apache.hudi.common.model.HoodieWriteStat;
import org.apache.hudi.common.table.HoodieTableConfig;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.TableSchemaResolver;
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
import org.apache.hudi.common.table.view.FileSystemViewManager;
import org.apache.hudi.common.table.view.HoodieTableFileSystemView;
import org.apache.hudi.common.table.view.SyncableFileSystemView;
import org.apache.hudi.common.table.view.TableFileSystemView;
import org.apache.hudi.common.table.view.TableFileSystemView.BaseFileOnlyView;
import org.apache.hudi.common.table.view.TableFileSystemView.SliceView;
import org.apache.hudi.common.util.Functions;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.StringUtils;
import org.apache.hudi.common.util.ValidationUtils;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.exception.HoodieException;
import org.apache.hudi.exception.HoodieIOException;
import org.apache.hudi.exception.HoodieInsertException;
import org.apache.hudi.exception.HoodieMetadataException;
import org.apache.hudi.exception.HoodieUpsertException;
import org.apache.hudi.exception.SchemaCompatibilityException;
import org.apache.hudi.index.HoodieIndex;
import org.apache.hudi.io.HoodieMergeHandle;
import org.apache.hudi.metadata.HoodieTableMetadata;
import org.apache.hudi.metadata.HoodieTableMetadataWriter;
import org.apache.hudi.metadata.MetadataPartitionType;
import org.apache.hudi.storage.HoodieStorage;
import org.apache.hudi.storage.StorageConfiguration;
import org.apache.hudi.storage.StoragePath;
import org.apache.hudi.table.action.HoodieWriteMetadata;
import org.apache.hudi.table.action.bootstrap.HoodieBootstrapWriteMetadata;
import org.apache.hudi.table.action.commit.HoodieMergeHelper;
import org.apache.hudi.table.marker.WriteMarkers;
import org.apache.hudi.table.marker.WriteMarkersFactory;
import org.apache.hudi.table.storage.HoodieLayoutFactory;
import org.apache.hudi.table.storage.HoodieStorageLayout;
import org.apache.avro.Schema;
import org.apache.hadoop.fs.Path;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.concurrent.TimeoutException;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import static org.apache.hudi.common.model.HoodieFailedWritesCleaningPolicy.EAGER;
import static org.apache.hudi.common.model.HoodieFailedWritesCleaningPolicy.LAZY;
import static org.apache.hudi.common.table.HoodieTableConfig.TABLE_METADATA_PARTITIONS;
import static org.apache.hudi.common.util.StringUtils.EMPTY_STRING;
import static org.apache.hudi.metadata.HoodieTableMetadataUtil.deleteMetadataPartition;
import static org.apache.hudi.metadata.HoodieTableMetadataUtil.deleteMetadataTable;
import static org.apache.hudi.metadata.HoodieTableMetadataUtil.getMetadataPartitionsNeedingWriteStatusTracking;
import static org.apache.hudi.metadata.HoodieTableMetadataUtil.metadataPartitionExists;
/**
* Abstract implementation of a HoodieTable.
*
* @param Sub type of HoodieRecordPayload
* @param Type of inputs
* @param Type of keys
* @param Type of outputs
*/
public abstract class HoodieTable implements Serializable {
private static final Logger LOG = LoggerFactory.getLogger(HoodieTable.class);
protected final HoodieWriteConfig config;
protected final HoodieTableMetaClient metaClient;
protected final HoodieIndex, ?> index;
private final StorageConfiguration> storageConf;
protected final TaskContextSupplier taskContextSupplier;
private final HoodieTableMetadata metadata;
private final HoodieStorageLayout storageLayout;
private final boolean isMetadataTable;
private transient FileSystemViewManager viewManager;
protected final transient HoodieEngineContext context;
protected HoodieTable(HoodieWriteConfig config, HoodieEngineContext context, HoodieTableMetaClient metaClient) {
this.config = config;
this.storageConf = context.getStorageConf();
this.context = context;
this.isMetadataTable = HoodieTableMetadata.isMetadataTable(config.getBasePath());
HoodieMetadataConfig metadataConfig = HoodieMetadataConfig.newBuilder().fromProperties(config.getMetadataConfig().getProps())
.build();
this.metadata = HoodieTableMetadata.create(context, metaClient.getStorage(), metadataConfig, config.getBasePath());
this.viewManager = getViewManager();
this.metaClient = metaClient;
this.index = getIndex(config, context);
this.storageLayout = getStorageLayout(config);
this.taskContextSupplier = context.getTaskContextSupplier();
}
public boolean isMetadataTable() {
return isMetadataTable;
}
protected abstract HoodieIndex, ?> getIndex(HoodieWriteConfig config, HoodieEngineContext context);
protected HoodieStorageLayout getStorageLayout(HoodieWriteConfig config) {
return HoodieLayoutFactory.createLayout(config);
}
private synchronized FileSystemViewManager getViewManager() {
if (null == viewManager) {
viewManager = FileSystemViewManager.createViewManager(getContext(), config.getViewStorageConfig(), config.getCommonConfig(), unused -> metadata);
}
return viewManager;
}
/**
* Upsert a batch of new records into Hoodie table at the supplied instantTime.
* @param context HoodieEngineContext
* @param instantTime Instant Time for the action
* @param records hoodieRecords to upsert
* @return HoodieWriteMetadata
*/
public abstract HoodieWriteMetadata upsert(HoodieEngineContext context, String instantTime, I records);
/**
* Insert a batch of new records into Hoodie table at the supplied instantTime.
* @param context HoodieEngineContext
* @param instantTime Instant Time for the action
* @param records hoodieRecords to upsert
* @return HoodieWriteMetadata
*/
public abstract HoodieWriteMetadata insert(HoodieEngineContext context, String instantTime, I records);
/**
* Bulk Insert a batch of new records into Hoodie table at the supplied instantTime.
* @param context HoodieEngineContext
* @param instantTime Instant Time for the action
* @param records hoodieRecords to upsert
* @param bulkInsertPartitioner User Defined Partitioner
* @return HoodieWriteMetadata
*/
public abstract HoodieWriteMetadata bulkInsert(HoodieEngineContext context, String instantTime,
I records, Option bulkInsertPartitioner);
/**
* Deletes a list of {@link HoodieKey}s from the Hoodie table, at the supplied instantTime {@link HoodieKey}s will be
* de-duped and non existent keys will be removed before deleting.
*
* @param context HoodieEngineContext
* @param instantTime Instant Time for the action
* @param keys {@link List} of {@link HoodieKey}s to be deleted
* @return HoodieWriteMetadata
*/
public abstract HoodieWriteMetadata delete(HoodieEngineContext context, String instantTime, K keys);
/**
* Delete records from Hoodie table based on {@link HoodieKey} and {@link org.apache.hudi.common.model.HoodieRecordLocation} specified in
* preppedRecords.
*
* @param context {@link HoodieEngineContext}.
* @param instantTime Instant Time for the action.
* @param preppedRecords Empty records with key and locator set.
* @return {@link HoodieWriteMetadata}
*/
public abstract HoodieWriteMetadata deletePrepped(HoodieEngineContext context, String instantTime, I preppedRecords);
/**
* Deletes all data of partitions.
* @param context HoodieEngineContext
* @param instantTime Instant Time for the action
* @param partitions {@link List} of partition to be deleted
* @return HoodieWriteMetadata
*/
public abstract HoodieWriteMetadata deletePartitions(HoodieEngineContext context, String instantTime, List partitions);
/**
* Upserts the given prepared records into the Hoodie table, at the supplied instantTime.
*
* This implementation requires that the input records are already tagged, and de-duped if needed.
* @param context HoodieEngineContext
* @param instantTime Instant Time for the action
* @param preppedRecords hoodieRecords to upsert
* @return HoodieWriteMetadata
*/
public abstract HoodieWriteMetadata upsertPrepped(HoodieEngineContext context, String instantTime,
I preppedRecords);
/**
* Inserts the given prepared records into the Hoodie table, at the supplied instantTime.
*
* This implementation requires that the input records are already tagged, and de-duped if needed.
* @param context HoodieEngineContext
* @param instantTime Instant Time for the action
* @param preppedRecords hoodieRecords to upsert
* @return HoodieWriteMetadata
*/
public abstract HoodieWriteMetadata insertPrepped(HoodieEngineContext context, String instantTime,
I preppedRecords);
/**
* Bulk Insert the given prepared records into the Hoodie table, at the supplied instantTime.
*
* This implementation requires that the input records are already tagged, and de-duped if needed.
* @param context HoodieEngineContext
* @param instantTime Instant Time for the action
* @param preppedRecords hoodieRecords to upsert
* @param bulkInsertPartitioner User Defined Partitioner
* @return HoodieWriteMetadata
*/
public abstract HoodieWriteMetadata bulkInsertPrepped(HoodieEngineContext context, String instantTime,
I preppedRecords, Option bulkInsertPartitioner);
/**
* Replaces all the existing records and inserts the specified new records into Hoodie table at the supplied instantTime,
* for the partition paths contained in input records.
*
* @param context HoodieEngineContext
* @param instantTime Instant time for the replace action
* @param records input records
* @return HoodieWriteMetadata
*/
public abstract HoodieWriteMetadata insertOverwrite(HoodieEngineContext context, String instantTime, I records);
/**
* Delete all the existing records of the Hoodie table and inserts the specified new records into Hoodie table at the supplied instantTime,
* for the partition paths contained in input records.
*
* @param context HoodieEngineContext
* @param instantTime Instant time for the replace action
* @param records input records
* @return HoodieWriteMetadata
*/
public abstract HoodieWriteMetadata insertOverwriteTable(HoodieEngineContext context, String instantTime, I records);
/**
* Delete expired partition by config
* @param context HoodieEngineContext
* @param instantTime Instant Time for the action
* @return HoodieWriteMetadata
*/
public abstract HoodieWriteMetadata managePartitionTTL(HoodieEngineContext context, String instantTime);
public HoodieWriteConfig getConfig() {
return config;
}
public HoodieTableMetaClient getMetaClient() {
return metaClient;
}
/**
* @return if the table is physically partitioned, based on the partition fields stored in the table config.
*/
public boolean isPartitioned() {
return getMetaClient().getTableConfig().isTablePartitioned();
}
public StorageConfiguration> getStorageConf() {
return metaClient.getStorageConf();
}
public HoodieStorage getStorage() {
return metaClient.getStorage();
}
/**
* Get the view of the file system for this table.
*/
public TableFileSystemView getFileSystemView() {
return new HoodieTableFileSystemView(metaClient, getCompletedCommitsTimeline());
}
/**
* Get the base file only view of the file system for this table.
*/
public BaseFileOnlyView getBaseFileOnlyView() {
return getViewManager().getFileSystemView(metaClient);
}
/**
* Get the full view of the file system for this table.
*/
public SliceView getSliceView() {
return getViewManager().getFileSystemView(metaClient);
}
/**
* Get complete view of the file system for this table with ability to force sync.
*/
public SyncableFileSystemView getHoodieView() {
return getViewManager().getFileSystemView(metaClient);
}
/**
* Get only the completed (no-inflights) commit + deltacommit timeline.
*/
public HoodieTimeline getCompletedCommitsTimeline() {
return metaClient.getCommitsTimeline().filterCompletedInstants();
}
/**
* Get only the completed (no-inflights) commit timeline.
*/
public HoodieTimeline getCompletedCommitTimeline() {
return metaClient.getCommitTimeline().filterCompletedInstants();
}
/**
* Get only the inflights (no-completed) commit timeline.
*/
public HoodieTimeline getPendingCommitTimeline() {
return metaClient.getCommitsTimeline().filterPendingExcludingMajorAndMinorCompaction();
}
/**
* Get only the completed (no-inflights) clean timeline.
*/
public HoodieTimeline getCompletedCleanTimeline() {
return getActiveTimeline().getCleanerTimeline().filterCompletedInstants();
}
/**
* Get clean timeline.
*/
public HoodieTimeline getCleanTimeline() {
return getActiveTimeline().getCleanerTimeline();
}
/**
* Get rollback timeline.
*/
public HoodieTimeline getRollbackTimeline() {
return getActiveTimeline().getRollbackTimeline();
}
/**
* Get restore timeline.
*/
public HoodieTimeline getRestoreTimeline() {
return getActiveTimeline().getRestoreTimeline();
}
/**
* Get only the completed (no-inflights) savepoint timeline.
*/
public HoodieTimeline getCompletedSavepointTimeline() {
return getActiveTimeline().getSavePointTimeline().filterCompletedInstants();
}
/**
* Get the list of savepoint timestamps in this table.
*/
public Set getSavepointTimestamps() {
return getCompletedSavepointTimeline().getInstantsAsStream().map(HoodieInstant::getTimestamp).collect(Collectors.toSet());
}
public HoodieActiveTimeline getActiveTimeline() {
return metaClient.getActiveTimeline();
}
/**
* Return the index.
*/
public HoodieIndex, ?> getIndex() {
return index;
}
public HoodieStorageLayout getStorageLayout() {
return storageLayout;
}
/**
* Schedule compaction for the instant time.
*
* @param context HoodieEngineContext
* @param instantTime Instant Time for scheduling compaction
* @param extraMetadata additional metadata to write into plan
* @return
*/
public abstract Option scheduleCompaction(HoodieEngineContext context,
String instantTime,
Option