All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hudi.common.table.HoodieTableMetaClient Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hudi.common.table;

import org.apache.hudi.common.config.HoodieConfig;
import org.apache.hudi.common.config.SerializableConfiguration;
import org.apache.hudi.common.fs.ConsistencyGuardConfig;
import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.common.fs.FailSafeConsistencyGuard;
import org.apache.hudi.common.fs.HoodieWrapperFileSystem;
import org.apache.hudi.common.fs.NoOpConsistencyGuard;
import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.model.HoodieTableType;
import org.apache.hudi.common.model.WriteOperationType;
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
import org.apache.hudi.common.table.timeline.HoodieArchivedTimeline;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
import org.apache.hudi.common.table.timeline.TimelineLayout;
import org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion;
import org.apache.hudi.common.util.CommitUtils;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.StringUtils;
import org.apache.hudi.common.util.ValidationUtils;
import org.apache.hudi.exception.HoodieException;
import org.apache.hudi.exception.TableNotFoundException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;

import java.io.IOException;
import java.io.Serializable;
import java.util.Arrays;
import java.util.List;
import java.util.Objects;
import java.util.Properties;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;

/**
 * HoodieTableMetaClient allows to access meta-data about a hoodie table It returns meta-data about
 * commits, savepoints, compactions, cleanups as a HoodieTimeline Create an instance of the
 * HoodieTableMetaClient with FileSystem and basePath to start getting the meta-data.
 * 

* All the timelines are computed lazily, once computed the timeline is cached and never refreshed. Use the * HoodieTimeline.reload() to refresh timelines. * * @see HoodieTimeline * @since 0.3.0 */ public class HoodieTableMetaClient implements Serializable { private static final long serialVersionUID = 1L; private static final Logger LOG = LogManager.getLogger(HoodieTableMetaClient.class); public static final String METAFOLDER_NAME = ".hoodie"; public static final String TEMPFOLDER_NAME = METAFOLDER_NAME + Path.SEPARATOR + ".temp"; public static final String AUXILIARYFOLDER_NAME = METAFOLDER_NAME + Path.SEPARATOR + ".aux"; public static final String BOOTSTRAP_INDEX_ROOT_FOLDER_PATH = AUXILIARYFOLDER_NAME + Path.SEPARATOR + ".bootstrap"; public static final String HEARTBEAT_FOLDER_NAME = METAFOLDER_NAME + Path.SEPARATOR + ".heartbeat"; public static final String ZINDEX_NAME = ".zindex"; public static final String BOOTSTRAP_INDEX_BY_PARTITION_FOLDER_PATH = BOOTSTRAP_INDEX_ROOT_FOLDER_PATH + Path.SEPARATOR + ".partitions"; public static final String BOOTSTRAP_INDEX_BY_FILE_ID_FOLDER_PATH = BOOTSTRAP_INDEX_ROOT_FOLDER_PATH + Path.SEPARATOR + ".fileids"; public static final String MARKER_EXTN = ".marker"; private String basePath; private transient HoodieWrapperFileSystem fs; private String metaPath; private boolean loadActiveTimelineOnLoad; private SerializableConfiguration hadoopConf; private HoodieTableType tableType; private TimelineLayoutVersion timelineLayoutVersion; private HoodieTableConfig tableConfig; private HoodieActiveTimeline activeTimeline; private HoodieArchivedTimeline archivedTimeline; private ConsistencyGuardConfig consistencyGuardConfig = ConsistencyGuardConfig.newBuilder().build(); private HoodieTableMetaClient(Configuration conf, String basePath, boolean loadActiveTimelineOnLoad, ConsistencyGuardConfig consistencyGuardConfig, Option layoutVersion, String payloadClassName) { LOG.info("Loading HoodieTableMetaClient from " + basePath); this.consistencyGuardConfig = consistencyGuardConfig; this.hadoopConf = new SerializableConfiguration(conf); Path basePathDir = new Path(basePath); this.basePath = basePathDir.toString(); this.metaPath = new Path(basePath, METAFOLDER_NAME).toString(); Path metaPathDir = new Path(this.metaPath); this.fs = getFs(); TableNotFoundException.checkTableValidity(fs, basePathDir, metaPathDir); this.tableConfig = new HoodieTableConfig(fs, metaPath, payloadClassName); this.tableType = tableConfig.getTableType(); Option tableConfigVersion = tableConfig.getTimelineLayoutVersion(); if (layoutVersion.isPresent() && tableConfigVersion.isPresent()) { // Ensure layout version passed in config is not lower than the one seen in hoodie.properties ValidationUtils.checkArgument(layoutVersion.get().compareTo(tableConfigVersion.get()) >= 0, "Layout Version defined in hoodie properties has higher version (" + tableConfigVersion.get() + ") than the one passed in config (" + layoutVersion.get() + ")"); } this.timelineLayoutVersion = layoutVersion.orElseGet(() -> tableConfig.getTimelineLayoutVersion().get()); this.loadActiveTimelineOnLoad = loadActiveTimelineOnLoad; LOG.info("Finished Loading Table of type " + tableType + "(version=" + timelineLayoutVersion + ", baseFileFormat=" + this.tableConfig.getBaseFileFormat() + ") from " + basePath); if (loadActiveTimelineOnLoad) { LOG.info("Loading Active commit timeline for " + basePath); getActiveTimeline(); } } /** * For serializing and de-serializing. * * @deprecated */ public HoodieTableMetaClient() {} public static HoodieTableMetaClient reload(HoodieTableMetaClient oldMetaClient) { return HoodieTableMetaClient.builder().setConf(oldMetaClient.hadoopConf.get()).setBasePath(oldMetaClient.basePath).setLoadActiveTimelineOnLoad(oldMetaClient.loadActiveTimelineOnLoad) .setConsistencyGuardConfig(oldMetaClient.consistencyGuardConfig).setLayoutVersion(Option.of(oldMetaClient.timelineLayoutVersion)).setPayloadClassName(null).build(); } /** * This method is only used when this object is de-serialized in a spark executor. * * @deprecated */ private void readObject(java.io.ObjectInputStream in) throws IOException, ClassNotFoundException { in.defaultReadObject(); fs = null; // will be lazily inited } private void writeObject(java.io.ObjectOutputStream out) throws IOException { out.defaultWriteObject(); } /** * @return Base path */ public String getBasePath() { return basePath; } /** * @return Hoodie Table Type */ public HoodieTableType getTableType() { return tableType; } /** * @return Meta path */ public String getMetaPath() { return metaPath; } /** * @return z-index path */ public String getZindexPath() { return new Path(metaPath, ZINDEX_NAME).toString(); } /** * @return Temp Folder path */ public String getTempFolderPath() { return basePath + Path.SEPARATOR + TEMPFOLDER_NAME; } /** * Returns Marker folder path. * * @param instantTs Instant Timestamp * @return */ public String getMarkerFolderPath(String instantTs) { return String.format("%s%s%s", getTempFolderPath(), Path.SEPARATOR, instantTs); } /** * @return Auxiliary Meta path */ public String getMetaAuxiliaryPath() { return basePath + Path.SEPARATOR + AUXILIARYFOLDER_NAME; } /** * @return Heartbeat folder path. */ public static String getHeartbeatFolderPath(String basePath) { return String.format("%s%s%s", basePath, Path.SEPARATOR, HEARTBEAT_FOLDER_NAME); } /** * @return Bootstrap Index By Partition Folder */ public String getBootstrapIndexByPartitionFolderPath() { return basePath + Path.SEPARATOR + BOOTSTRAP_INDEX_BY_PARTITION_FOLDER_PATH; } /** * @return Bootstrap Index By Hudi File Id Folder */ public String getBootstrapIndexByFileIdFolderNameFolderPath() { return basePath + Path.SEPARATOR + BOOTSTRAP_INDEX_BY_FILE_ID_FOLDER_PATH; } /** * @return path where archived timeline is stored */ public String getArchivePath() { String archiveFolder = tableConfig.getArchivelogFolder(); return getMetaPath() + Path.SEPARATOR + archiveFolder; } /** * @return Table Config */ public HoodieTableConfig getTableConfig() { return tableConfig; } public TimelineLayoutVersion getTimelineLayoutVersion() { return timelineLayoutVersion; } /** * Get the FS implementation for this table. */ public HoodieWrapperFileSystem getFs() { if (fs == null) { FileSystem fileSystem = FSUtils.getFs(metaPath, hadoopConf.newCopy()); ValidationUtils.checkArgument(!(fileSystem instanceof HoodieWrapperFileSystem), "File System not expected to be that of HoodieWrapperFileSystem"); fs = new HoodieWrapperFileSystem(fileSystem, consistencyGuardConfig.isConsistencyCheckEnabled() ? new FailSafeConsistencyGuard(fileSystem, consistencyGuardConfig) : new NoOpConsistencyGuard()); } return fs; } /** * Return raw file-system. * * @return fs */ public FileSystem getRawFs() { return getFs().getFileSystem(); } public Configuration getHadoopConf() { return hadoopConf.get(); } /** * Get the active instants as a timeline. * * @return Active instants timeline */ public synchronized HoodieActiveTimeline getActiveTimeline() { if (activeTimeline == null) { activeTimeline = new HoodieActiveTimeline(this); } return activeTimeline; } /** * Reload ActiveTimeline and cache. * * @return Active instants timeline */ public synchronized HoodieActiveTimeline reloadActiveTimeline() { activeTimeline = new HoodieActiveTimeline(this); return activeTimeline; } public ConsistencyGuardConfig getConsistencyGuardConfig() { return consistencyGuardConfig; } /** * Get the archived commits as a timeline. This is costly operation, as all data from the archived files are read. * This should not be used, unless for historical debugging purposes. * * @return Active commit timeline */ public synchronized HoodieArchivedTimeline getArchivedTimeline() { if (archivedTimeline == null) { archivedTimeline = new HoodieArchivedTimeline(this); } return archivedTimeline; } /** * Validate table properties. * @param properties Properties from writeConfig. * @param operationType operation type to be executed. */ public void validateTableProperties(Properties properties, WriteOperationType operationType) { // once meta fields are disabled, it cant be re-enabled for a given table. if (!getTableConfig().populateMetaFields() && Boolean.parseBoolean((String) properties.getOrDefault(HoodieTableConfig.POPULATE_META_FIELDS.key(), HoodieTableConfig.POPULATE_META_FIELDS.defaultValue()))) { throw new HoodieException(HoodieTableConfig.POPULATE_META_FIELDS.key() + " already disabled for the table. Can't be re-enabled back"); } // meta fields can be disabled only with SimpleKeyGenerator if (!getTableConfig().populateMetaFields() && !properties.getProperty(HoodieTableConfig.KEY_GENERATOR_CLASS_NAME.key(), "org.apache.hudi.keygen.SimpleKeyGenerator") .equals("org.apache.hudi.keygen.SimpleKeyGenerator")) { throw new HoodieException("Only simple key generator is supported when meta fields are disabled. KeyGenerator used : " + properties.getProperty(HoodieTableConfig.KEY_GENERATOR_CLASS_NAME.key())); } } /** * Helper method to initialize a given path as a hoodie table with configs passed in as Properties. * * @return Instance of HoodieTableMetaClient */ public static HoodieTableMetaClient initTableAndGetMetaClient(Configuration hadoopConf, String basePath, Properties props) throws IOException { LOG.info("Initializing " + basePath + " as hoodie table " + basePath); Path basePathDir = new Path(basePath); final FileSystem fs = FSUtils.getFs(basePath, hadoopConf); if (!fs.exists(basePathDir)) { fs.mkdirs(basePathDir); } Path metaPathDir = new Path(basePath, METAFOLDER_NAME); if (!fs.exists(metaPathDir)) { fs.mkdirs(metaPathDir); } // if anything other than default archive log folder is specified, create that too String archiveLogPropVal = new HoodieConfig(props).getStringOrDefault(HoodieTableConfig.ARCHIVELOG_FOLDER); if (!StringUtils.isNullOrEmpty(archiveLogPropVal)) { Path archiveLogDir = new Path(metaPathDir, archiveLogPropVal); if (!fs.exists(archiveLogDir)) { fs.mkdirs(archiveLogDir); } } // Always create temporaryFolder which is needed for finalizeWrite for Hoodie tables final Path temporaryFolder = new Path(basePath, HoodieTableMetaClient.TEMPFOLDER_NAME); if (!fs.exists(temporaryFolder)) { fs.mkdirs(temporaryFolder); } // Always create auxiliary folder which is needed to track compaction workloads (stats and any metadata in future) final Path auxiliaryFolder = new Path(basePath, HoodieTableMetaClient.AUXILIARYFOLDER_NAME); if (!fs.exists(auxiliaryFolder)) { fs.mkdirs(auxiliaryFolder); } initializeBootstrapDirsIfNotExists(hadoopConf, basePath, fs); HoodieTableConfig.create(fs, metaPathDir, props); // We should not use fs.getConf as this might be different from the original configuration // used to create the fs in unit tests HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(basePath).build(); LOG.info("Finished initializing Table of type " + metaClient.getTableConfig().getTableType() + " from " + basePath); return metaClient; } public static void initializeBootstrapDirsIfNotExists(Configuration hadoopConf, String basePath, FileSystem fs) throws IOException { // Create bootstrap index by partition folder if it does not exist final Path bootstrap_index_folder_by_partition = new Path(basePath, HoodieTableMetaClient.BOOTSTRAP_INDEX_BY_PARTITION_FOLDER_PATH); if (!fs.exists(bootstrap_index_folder_by_partition)) { fs.mkdirs(bootstrap_index_folder_by_partition); } // Create bootstrap index by partition folder if it does not exist final Path bootstrap_index_folder_by_fileids = new Path(basePath, HoodieTableMetaClient.BOOTSTRAP_INDEX_BY_FILE_ID_FOLDER_PATH); if (!fs.exists(bootstrap_index_folder_by_fileids)) { fs.mkdirs(bootstrap_index_folder_by_fileids); } } /** * Helper method to scan all hoodie-instant metafiles. * * @param fs The file system implementation for this table * @param metaPath The meta path where meta files are stored * @param nameFilter The name filter to filter meta files * @return An array of meta FileStatus * @throws IOException In case of failure */ public static FileStatus[] scanFiles(FileSystem fs, Path metaPath, PathFilter nameFilter) throws IOException { return fs.listStatus(metaPath, nameFilter); } /** * @return {@code true} if any commits are found, else {@code false}. */ public boolean isTimelineNonEmpty() { return getCommitsTimeline().filterCompletedInstants().getInstants().collect(Collectors.toList()).size() > 0; } /** * Get the commit timeline visible for this table. */ public HoodieTimeline getCommitsTimeline() { switch (this.getTableType()) { case COPY_ON_WRITE: return getActiveTimeline().getCommitTimeline(); case MERGE_ON_READ: // We need to include the parquet files written out in delta commits // Include commit action to be able to start doing a MOR over a COW table - no // migration required return getActiveTimeline().getCommitsTimeline(); default: throw new HoodieException("Unsupported table type :" + this.getTableType()); } } /** * Get the commit + pending-compaction timeline visible for this table. A RT filesystem view is constructed with this * timeline so that file-slice after pending compaction-requested instant-time is also considered valid. A RT * file-system view for reading must then merge the file-slices before and after pending compaction instant so that * all delta-commits are read. */ public HoodieTimeline getCommitsAndCompactionTimeline() { switch (this.getTableType()) { case COPY_ON_WRITE: return getActiveTimeline().getCommitTimeline(); case MERGE_ON_READ: return getActiveTimeline().getWriteTimeline(); default: throw new HoodieException("Unsupported table type :" + this.getTableType()); } } /** * Get the compacted commit timeline visible for this table. */ public HoodieTimeline getCommitTimeline() { switch (this.getTableType()) { case COPY_ON_WRITE: case MERGE_ON_READ: // We need to include the parquet files written out in delta commits in tagging return getActiveTimeline().getCommitTimeline(); default: throw new HoodieException("Unsupported table type :" + this.getTableType()); } } /** * Gets the commit action type. */ public String getCommitActionType() { return CommitUtils.getCommitActionType(this.getTableType()); } /** * Helper method to scan all hoodie-instant metafiles and construct HoodieInstant objects. * * @param includedExtensions Included hoodie extensions * @param applyLayoutVersionFilters Depending on Timeline layout version, if there are multiple states for the same * action instant, only include the highest state * @return List of Hoodie Instants generated * @throws IOException in case of failure */ public List scanHoodieInstantsFromFileSystem(Set includedExtensions, boolean applyLayoutVersionFilters) throws IOException { return scanHoodieInstantsFromFileSystem(new Path(metaPath), includedExtensions, applyLayoutVersionFilters); } /** * Helper method to scan all hoodie-instant metafiles and construct HoodieInstant objects. * * @param timelinePath MetaPath where instant files are stored * @param includedExtensions Included hoodie extensions * @param applyLayoutVersionFilters Depending on Timeline layout version, if there are multiple states for the same * action instant, only include the highest state * @return List of Hoodie Instants generated * @throws IOException in case of failure */ public List scanHoodieInstantsFromFileSystem(Path timelinePath, Set includedExtensions, boolean applyLayoutVersionFilters) throws IOException { Stream instantStream = Arrays.stream( HoodieTableMetaClient .scanFiles(getFs(), timelinePath, path -> { // Include only the meta files with extensions that needs to be included String extension = HoodieInstant.getTimelineFileExtension(path.getName()); return includedExtensions.contains(extension); })).map(HoodieInstant::new); if (applyLayoutVersionFilters) { instantStream = TimelineLayout.getLayout(getTimelineLayoutVersion()).filterHoodieInstants(instantStream); } return instantStream.sorted().collect(Collectors.toList()); } @Override public boolean equals(Object o) { if (this == o) { return true; } if (o == null || getClass() != o.getClass()) { return false; } HoodieTableMetaClient that = (HoodieTableMetaClient) o; return Objects.equals(basePath, that.basePath) && tableType == that.tableType; } @Override public int hashCode() { return Objects.hash(basePath, tableType); } @Override public String toString() { final StringBuilder sb = new StringBuilder("HoodieTableMetaClient{"); sb.append("basePath='").append(basePath).append('\''); sb.append(", metaPath='").append(metaPath).append('\''); sb.append(", tableType=").append(tableType); sb.append('}'); return sb.toString(); } public void initializeBootstrapDirsIfNotExists() throws IOException { initializeBootstrapDirsIfNotExists(getHadoopConf(), basePath, getFs()); } public void setBasePath(String basePath) { this.basePath = basePath; } public void setMetaPath(String metaPath) { this.metaPath = metaPath; } public void setActiveTimeline(HoodieActiveTimeline activeTimeline) { this.activeTimeline = activeTimeline; } public static Builder builder() { return new Builder(); } /** * Builder for {@link HoodieTableMetaClient}. */ public static class Builder { private Configuration conf; private String basePath; private boolean loadActiveTimelineOnLoad = false; private String payloadClassName = null; private ConsistencyGuardConfig consistencyGuardConfig = ConsistencyGuardConfig.newBuilder().build(); private Option layoutVersion = Option.of(TimelineLayoutVersion.CURR_LAYOUT_VERSION); public Builder setConf(Configuration conf) { this.conf = conf; return this; } public Builder setBasePath(String basePath) { this.basePath = basePath; return this; } public Builder setLoadActiveTimelineOnLoad(boolean loadActiveTimelineOnLoad) { this.loadActiveTimelineOnLoad = loadActiveTimelineOnLoad; return this; } public Builder setPayloadClassName(String payloadClassName) { this.payloadClassName = payloadClassName; return this; } public Builder setConsistencyGuardConfig(ConsistencyGuardConfig consistencyGuardConfig) { this.consistencyGuardConfig = consistencyGuardConfig; return this; } public Builder setLayoutVersion(Option layoutVersion) { this.layoutVersion = layoutVersion; return this; } public HoodieTableMetaClient build() { ValidationUtils.checkArgument(conf != null, "Configuration needs to be set to init HoodieTableMetaClient"); ValidationUtils.checkArgument(basePath != null, "basePath needs to be set to init HoodieTableMetaClient"); return new HoodieTableMetaClient(conf, basePath, loadActiveTimelineOnLoad, consistencyGuardConfig, layoutVersion, payloadClassName); } } public static PropertyBuilder withPropertyBuilder() { return new PropertyBuilder(); } public static class PropertyBuilder { private HoodieTableType tableType; private String tableName; private String tableCreateSchema; private String recordKeyFields; private String archiveLogFolder; private String payloadClassName; private Integer timelineLayoutVersion; private String baseFileFormat; private String preCombineField; private String partitionFields; private String bootstrapIndexClass; private String bootstrapBasePath; private Boolean bootstrapIndexEnable; private Boolean populateMetaFields; private String keyGeneratorClassProp; private Boolean hiveStylePartitioningEnable; private Boolean urlEncodePartitioning; private PropertyBuilder() { } public PropertyBuilder setTableType(HoodieTableType tableType) { this.tableType = tableType; return this; } public PropertyBuilder setTableType(String tableType) { return setTableType(HoodieTableType.valueOf(tableType)); } public PropertyBuilder setTableName(String tableName) { this.tableName = tableName; return this; } public PropertyBuilder setTableCreateSchema(String tableCreateSchema) { this.tableCreateSchema = tableCreateSchema; return this; } public PropertyBuilder setRecordKeyFields(String recordKeyFields) { this.recordKeyFields = recordKeyFields; return this; } public PropertyBuilder setArchiveLogFolder(String archiveLogFolder) { this.archiveLogFolder = archiveLogFolder; return this; } public PropertyBuilder setPayloadClassName(String payloadClassName) { this.payloadClassName = payloadClassName; return this; } public PropertyBuilder setPayloadClass(Class payloadClass) { return setPayloadClassName(payloadClass.getName()); } public PropertyBuilder setTimelineLayoutVersion(Integer timelineLayoutVersion) { this.timelineLayoutVersion = timelineLayoutVersion; return this; } public PropertyBuilder setBaseFileFormat(String baseFileFormat) { this.baseFileFormat = baseFileFormat; return this; } public PropertyBuilder setPreCombineField(String preCombineField) { this.preCombineField = preCombineField; return this; } public PropertyBuilder setPartitionFields(String partitionFields) { this.partitionFields = partitionFields; return this; } public PropertyBuilder setBootstrapIndexClass(String bootstrapIndexClass) { this.bootstrapIndexClass = bootstrapIndexClass; return this; } public PropertyBuilder setBootstrapBasePath(String bootstrapBasePath) { this.bootstrapBasePath = bootstrapBasePath; return this; } public PropertyBuilder setBootstrapIndexEnable(Boolean bootstrapIndexEnable) { this.bootstrapIndexEnable = bootstrapIndexEnable; return this; } public PropertyBuilder setPopulateMetaFields(boolean populateMetaFields) { this.populateMetaFields = populateMetaFields; return this; } public PropertyBuilder setKeyGeneratorClassProp(String keyGeneratorClassProp) { this.keyGeneratorClassProp = keyGeneratorClassProp; return this; } public PropertyBuilder setHiveStylePartitioningEnable(Boolean hiveStylePartitioningEnable) { this.hiveStylePartitioningEnable = hiveStylePartitioningEnable; return this; } public PropertyBuilder setUrlEncodePartitioning(Boolean urlEncodePartitioning) { this.urlEncodePartitioning = urlEncodePartitioning; return this; } public PropertyBuilder fromMetaClient(HoodieTableMetaClient metaClient) { return setTableType(metaClient.getTableType()) .setTableName(metaClient.getTableConfig().getTableName()) .setArchiveLogFolder(metaClient.getArchivePath()) .setPayloadClassName(metaClient.getTableConfig().getPayloadClass()); } public PropertyBuilder fromProperties(Properties properties) { HoodieConfig hoodieConfig = new HoodieConfig(properties); if (hoodieConfig.contains(HoodieTableConfig.NAME)) { setTableName(hoodieConfig.getString(HoodieTableConfig.NAME)); } if (hoodieConfig.contains(HoodieTableConfig.TYPE)) { setTableType(hoodieConfig.getString(HoodieTableConfig.TYPE)); } if (hoodieConfig.contains(HoodieTableConfig.ARCHIVELOG_FOLDER)) { setArchiveLogFolder( hoodieConfig.getString(HoodieTableConfig.ARCHIVELOG_FOLDER)); } if (hoodieConfig.contains(HoodieTableConfig.PAYLOAD_CLASS_NAME)) { setPayloadClassName( hoodieConfig.getString(HoodieTableConfig.PAYLOAD_CLASS_NAME)); } if (hoodieConfig.contains(HoodieTableConfig.TIMELINE_LAYOUT_VERSION)) { setTimelineLayoutVersion(hoodieConfig.getInt(HoodieTableConfig.TIMELINE_LAYOUT_VERSION)); } if (hoodieConfig.contains(HoodieTableConfig.BASE_FILE_FORMAT)) { setBaseFileFormat( hoodieConfig.getString(HoodieTableConfig.BASE_FILE_FORMAT)); } if (hoodieConfig.contains(HoodieTableConfig.BOOTSTRAP_INDEX_CLASS_NAME)) { setBootstrapIndexClass( hoodieConfig.getString(HoodieTableConfig.BOOTSTRAP_INDEX_CLASS_NAME)); } if (hoodieConfig.contains(HoodieTableConfig.BOOTSTRAP_BASE_PATH)) { setBootstrapBasePath(hoodieConfig.getString(HoodieTableConfig.BOOTSTRAP_BASE_PATH)); } if (hoodieConfig.contains(HoodieTableConfig.BOOTSTRAP_INDEX_ENABLE)) { setBootstrapIndexEnable(hoodieConfig.getBoolean(HoodieTableConfig.BOOTSTRAP_INDEX_ENABLE)); } if (hoodieConfig.contains(HoodieTableConfig.PRECOMBINE_FIELD)) { setPreCombineField(hoodieConfig.getString(HoodieTableConfig.PRECOMBINE_FIELD)); } if (hoodieConfig.contains(HoodieTableConfig.PARTITION_FIELDS)) { setPartitionFields( hoodieConfig.getString(HoodieTableConfig.PARTITION_FIELDS)); } if (hoodieConfig.contains(HoodieTableConfig.RECORDKEY_FIELDS)) { setRecordKeyFields(hoodieConfig.getString(HoodieTableConfig.RECORDKEY_FIELDS)); } if (hoodieConfig.contains(HoodieTableConfig.CREATE_SCHEMA)) { setTableCreateSchema(hoodieConfig.getString(HoodieTableConfig.CREATE_SCHEMA)); } if (hoodieConfig.contains(HoodieTableConfig.POPULATE_META_FIELDS)) { setPopulateMetaFields(hoodieConfig.getBoolean(HoodieTableConfig.POPULATE_META_FIELDS)); } if (hoodieConfig.contains(HoodieTableConfig.KEY_GENERATOR_CLASS_NAME)) { setKeyGeneratorClassProp(hoodieConfig.getString(HoodieTableConfig.KEY_GENERATOR_CLASS_NAME)); } if (hoodieConfig.contains(HoodieTableConfig.HIVE_STYLE_PARTITIONING_ENABLE)) { setHiveStylePartitioningEnable(hoodieConfig.getBoolean(HoodieTableConfig.HIVE_STYLE_PARTITIONING_ENABLE)); } if (hoodieConfig.contains(HoodieTableConfig.URL_ENCODE_PARTITIONING)) { setUrlEncodePartitioning(hoodieConfig.getBoolean(HoodieTableConfig.URL_ENCODE_PARTITIONING)); } return this; } public Properties build() { ValidationUtils.checkArgument(tableType != null, "tableType is null"); ValidationUtils.checkArgument(tableName != null, "tableName is null"); HoodieTableConfig tableConfig = new HoodieTableConfig(); tableConfig.setValue(HoodieTableConfig.NAME, tableName); tableConfig.setValue(HoodieTableConfig.TYPE, tableType.name()); tableConfig.setValue(HoodieTableConfig.VERSION, String.valueOf(HoodieTableVersion.current().versionCode())); if (tableType == HoodieTableType.MERGE_ON_READ && payloadClassName != null) { tableConfig.setValue(HoodieTableConfig.PAYLOAD_CLASS_NAME, payloadClassName); } if (null != tableCreateSchema) { tableConfig.setValue(HoodieTableConfig.CREATE_SCHEMA, tableCreateSchema); } if (!StringUtils.isNullOrEmpty(archiveLogFolder)) { tableConfig.setValue(HoodieTableConfig.ARCHIVELOG_FOLDER, archiveLogFolder); } else { tableConfig.setDefaultValue(HoodieTableConfig.ARCHIVELOG_FOLDER); } if (null != timelineLayoutVersion) { tableConfig.setValue(HoodieTableConfig.TIMELINE_LAYOUT_VERSION, String.valueOf(timelineLayoutVersion)); } if (null != baseFileFormat) { tableConfig.setValue(HoodieTableConfig.BASE_FILE_FORMAT, baseFileFormat.toUpperCase()); } if (null != bootstrapIndexClass) { tableConfig.setValue(HoodieTableConfig.BOOTSTRAP_INDEX_CLASS_NAME, bootstrapIndexClass); } if (null != bootstrapIndexEnable) { tableConfig.setValue(HoodieTableConfig.BOOTSTRAP_INDEX_ENABLE, Boolean.toString(bootstrapIndexEnable)); } if (null != bootstrapBasePath) { tableConfig.setValue(HoodieTableConfig.BOOTSTRAP_BASE_PATH, bootstrapBasePath); } if (null != preCombineField) { tableConfig.setValue(HoodieTableConfig.PRECOMBINE_FIELD, preCombineField); } if (null != partitionFields) { tableConfig.setValue(HoodieTableConfig.PARTITION_FIELDS, partitionFields); } if (null != recordKeyFields) { tableConfig.setValue(HoodieTableConfig.RECORDKEY_FIELDS, recordKeyFields); } if (null != populateMetaFields) { tableConfig.setValue(HoodieTableConfig.POPULATE_META_FIELDS, Boolean.toString(populateMetaFields)); } if (null != keyGeneratorClassProp) { tableConfig.setValue(HoodieTableConfig.KEY_GENERATOR_CLASS_NAME, keyGeneratorClassProp); } if (null != hiveStylePartitioningEnable) { tableConfig.setValue(HoodieTableConfig.HIVE_STYLE_PARTITIONING_ENABLE, Boolean.toString(hiveStylePartitioningEnable)); } if (null != urlEncodePartitioning) { tableConfig.setValue(HoodieTableConfig.URL_ENCODE_PARTITIONING, Boolean.toString(urlEncodePartitioning)); } return tableConfig.getProps(); } /** * Init Table with the properties build by this builder. * * @param configuration The hadoop config. * @param basePath The base path for hoodie table. */ public HoodieTableMetaClient initTable(Configuration configuration, String basePath) throws IOException { return HoodieTableMetaClient.initTableAndGetMetaClient(configuration, basePath, build()); } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy