All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.finra.herd.service.helper.BusinessObjectDataDdlPartitionsHelper Maven / Gradle / Ivy

Go to download

This project contains the business service code. This is a classic service tier where business logic is defined along with it's associated transaction management configuration.

There is a newer version: 0.160.0
Show newest version
/*
 * Copyright 2015 herd contributors
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.finra.herd.service.helper;

import static org.finra.herd.service.helper.DdlGenerator.NON_PARTITIONED_TABLE_LOCATION_CUSTOM_DDL_TOKEN;

import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import javax.xml.datatype.XMLGregorianCalendar;

import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.collections4.MultiValuedMap;
import org.apache.commons.collections4.multimap.ArrayListValuedHashMap;
import org.apache.commons.lang3.BooleanUtils;
import org.apache.commons.lang3.StringUtils;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import org.springframework.util.Assert;

import org.finra.herd.core.helper.ConfigurationHelper;
import org.finra.herd.dao.StorageFileDao;
import org.finra.herd.dao.StorageUnitDao;
import org.finra.herd.model.ObjectNotFoundException;
import org.finra.herd.model.api.xml.BusinessObjectDataDdlRequest;
import org.finra.herd.model.api.xml.BusinessObjectDataKey;
import org.finra.herd.model.api.xml.BusinessObjectDataPartitionsRequest;
import org.finra.herd.model.api.xml.BusinessObjectDefinitionKey;
import org.finra.herd.model.api.xml.BusinessObjectFormat;
import org.finra.herd.model.api.xml.BusinessObjectFormatKey;
import org.finra.herd.model.api.xml.Partition;
import org.finra.herd.model.api.xml.PartitionColumn;
import org.finra.herd.model.api.xml.SchemaColumn;
import org.finra.herd.model.dto.ConfigurationValue;
import org.finra.herd.model.dto.HivePartitionDto;
import org.finra.herd.model.dto.StorageUnitAvailabilityDto;
import org.finra.herd.model.jpa.BusinessObjectDataStatusEntity;
import org.finra.herd.model.jpa.BusinessObjectDefinitionEntity;
import org.finra.herd.model.jpa.BusinessObjectFormatEntity;
import org.finra.herd.model.jpa.CustomDdlEntity;
import org.finra.herd.model.jpa.FileTypeEntity;
import org.finra.herd.model.jpa.StorageEntity;
import org.finra.herd.model.jpa.StoragePlatformEntity;

/**
 * Helper methods used for both DDL generator and Partitions generator
 */
@Component
public class BusinessObjectDataDdlPartitionsHelper
{
    /**
     * The partition key value for business object data without partitioning.
     */
    public static final String NO_PARTITIONING_PARTITION_KEY = "partition";

    /**
     * The partition value for business object data without partitioning.
     */
    public static final String NO_PARTITIONING_PARTITION_VALUE = "none";

    /**
     * The regular expression that represents an empty partition in S3, this is because hadoop file system implements directory support in S3 by creating empty
     * files with the "directoryname_$folder$" suffix.
     */
    public static final String REGEX_S3_EMPTY_PARTITION = "_\\$folder\\$";

    @Autowired
    private BusinessObjectDataDaoHelper businessObjectDataDaoHelper;

    @Autowired
    private BusinessObjectDataHelper businessObjectDataHelper;

    @Autowired
    private BusinessObjectDefinitionDaoHelper businessObjectDefinitionDaoHelper;

    @Autowired
    private BusinessObjectFormatDaoHelper businessObjectFormatDaoHelper;

    @Autowired
    private BusinessObjectFormatHelper businessObjectFormatHelper;

    @Autowired
    private BusinessObjectDataStatusDaoHelper businessObjectDataStatusDaoHelper;

    @Autowired
    private ConfigurationHelper configurationHelper;

    @Autowired
    private S3KeyPrefixHelper s3KeyPrefixHelper;

    @Autowired
    private StorageDaoHelper storageDaoHelper;

    @Autowired
    private StoragePlatformHelper storagePlatformHelper;

    @Autowired
    private StorageUnitDao storageUnitDao;

    @Autowired
    private StorageFileDao storageFileDao;

    @Autowired
    private StorageFileHelper storageFileHelper;

    @Autowired
    private StorageHelper storageHelper;

    @Autowired
    private StorageUnitHelper storageUnitHelper;

    /**
     * Builds business object data DDL request object instance and initialise it with the business object data partitions request field values.
     *
     * @param request the business object data partitions request
     *
     * @return the newly created BusinessObjectDataDdlRequest object instance
     */
    public BusinessObjectDataDdlRequest buildBusinessObjectDataDdlRequest(BusinessObjectDataPartitionsRequest request)
    {
        BusinessObjectDataDdlRequest ddlRequest = new BusinessObjectDataDdlRequest();
        ddlRequest.setNamespace(request.getNamespace());
        ddlRequest.setBusinessObjectDefinitionName(request.getBusinessObjectDefinitionName());
        ddlRequest.setBusinessObjectFormatUsage(request.getBusinessObjectFormatUsage());
        ddlRequest.setBusinessObjectFormatFileType(request.getBusinessObjectFormatFileType());
        ddlRequest.setBusinessObjectFormatVersion(request.getBusinessObjectFormatVersion());
        ddlRequest.setPartitionValueFilters(request.getPartitionValueFilters());
        ddlRequest.setPartitionValueFilter(null);
        ddlRequest.setBusinessObjectDataVersion(request.getBusinessObjectDataVersion());
        ddlRequest.setStorageNames(request.getStorageNames());
        ddlRequest.setStorageName(null);
        ddlRequest.setAllowMissingData(request.isAllowMissingData());
        ddlRequest.setIncludeAllRegisteredSubPartitions(request.isIncludeAllRegisteredSubPartitions());
        ddlRequest.setSuppressScanForUnregisteredSubPartitions(request.isSuppressScanForUnregisteredSubPartitions());
        return ddlRequest;
    }

    /**
     * Generates the create table Hive 13 DDL as per specified business object data DDL request.
     *
     * @param request the business object data DDL request
     * @param businessObjectFormatEntity the business object format entity
     * @param customDdlEntity the optional custom DDL entity
     * @param storageNames the list of storage names
     * @param requestedStorageEntities the list of storage entities per storage names specified in the request
     * @param cachedStorageEntities the map of storage names in upper case to the relative storage entities
     * @param cachedS3BucketNames the map of storage names in upper case to the relative S3 bucket names
     *
     * @return the generateDDL and generatePartitions Wrapper class object
     */
    protected GenerateDdlRequestWrapper buildGenerateDdlPartitionsWrapper(BusinessObjectDataDdlRequest request,
        BusinessObjectFormatEntity businessObjectFormatEntity, CustomDdlEntity customDdlEntity, List storageNames,
        List requestedStorageEntities, Map cachedStorageEntities, Map cachedS3BucketNames)
    {
        // Get business object format key from the request.
        BusinessObjectFormatKey businessObjectFormatKey =
            new BusinessObjectFormatKey(request.getNamespace(), request.getBusinessObjectDefinitionName(), request.getBusinessObjectFormatUsage(),
                request.getBusinessObjectFormatFileType(), request.getBusinessObjectFormatVersion());

        // Get storage platform entity for S3 storage platform type.
        StoragePlatformEntity s3StoragePlatformEntity = storagePlatformHelper.getStoragePlatformEntity(StoragePlatformEntity.S3);

        // Build partition filters based on the specified partition value filters.
        // We do validate that all specified storage entities are of "S3" storage platform type, so we specify S3 storage platform type in
        // the call below, so we select storage units only from all S3 storage, when the specified list of storage entities is empty.
        List> partitionFilters = businessObjectDataDaoHelper
            .buildPartitionFilters(request.getPartitionValueFilters(), request.getPartitionValueFilter(), businessObjectFormatKey,
                request.getBusinessObjectDataVersion(), requestedStorageEntities, s3StoragePlatformEntity, null, businessObjectFormatEntity);

        // If the partitionKey="partition" and partitionValue="none", then DDL should
        // return a DDL which treats business object data as a table, not a partition.
        boolean isPartitioned =
            !businessObjectFormatEntity.getPartitionKey().equalsIgnoreCase(NO_PARTITIONING_PARTITION_KEY) || partitionFilters.size() != 1 ||
                !partitionFilters.get(0).get(0).equalsIgnoreCase(NO_PARTITIONING_PARTITION_VALUE);

        // Generate the create table Hive 13 DDL.
        BusinessObjectDataDdlPartitionsHelper.GenerateDdlRequestWrapper generateDdlRequest = getGenerateDdlRequestWrapperInstance();
        generateDdlRequest.allowMissingData = request.isAllowMissingData();
        generateDdlRequest.businessObjectDataVersion = request.getBusinessObjectDataVersion();
        generateDdlRequest.businessObjectFormatEntity = businessObjectFormatEntity;
        generateDdlRequest.businessObjectFormatVersion = request.getBusinessObjectFormatVersion();
        generateDdlRequest.customDdlEntity = customDdlEntity;
        generateDdlRequest.includeAllRegisteredSubPartitions = request.isIncludeAllRegisteredSubPartitions();
        generateDdlRequest.includeDropPartitions = request.isIncludeDropPartitions();
        generateDdlRequest.includeDropTableStatement = request.isIncludeDropTableStatement();
        generateDdlRequest.includeIfNotExistsOption = request.isIncludeIfNotExistsOption();
        generateDdlRequest.isPartitioned = isPartitioned;
        generateDdlRequest.partitionFilters = partitionFilters;
        generateDdlRequest.cachedS3BucketNames = cachedS3BucketNames;
        generateDdlRequest.cachedStorageEntities = cachedStorageEntities;
        generateDdlRequest.storageNames = storageNames;
        generateDdlRequest.requestedStorageEntities = requestedStorageEntities;
        generateDdlRequest.suppressScanForUnregisteredSubPartitions = request.isSuppressScanForUnregisteredSubPartitions();
        generateDdlRequest.combineMultiplePartitionsInSingleAlterTable = request.isCombineMultiplePartitionsInSingleAlterTable();
        generateDdlRequest.tableName = request.getTableName();
        generateDdlRequest.asOfTime = request.getAsOfTime();

        // getOutputFormat == null, means it's used in generatePartitions request since getOutputFormat must not be null for generateDDL request
        if (request.getOutputFormat() == null)
        {
            generateDdlRequest.isGeneratePartitionsRequest = true;
            Assert.isTrue(isPartitioned, "Generate-partitions request does not support singleton partitions.");
        }
        else
        {
            generateDdlRequest.isGeneratePartitionsRequest = false;
        }
        return generateDdlRequest;
    }

    /**
     * Validate partionFilters and business format
     *
     * @param generateDdlRequest the generateDdlRequestWrapper object
     *
     * @return BusinessObjectFormat object
     */
    public BusinessObjectFormat validatePartitionFiltersAndFormat(GenerateDdlRequestWrapper generateDdlRequest)
    {
        // Validate that partition values passed in the list of partition filters do not contain '/' character.
        if (generateDdlRequest.isPartitioned && !CollectionUtils.isEmpty(generateDdlRequest.partitionFilters))
        {
            // Validate that partition values do not contain '/' characters.
            for (List partitionFilter : generateDdlRequest.partitionFilters)
            {
                for (String partitionValue : partitionFilter)
                {
                    Assert.doesNotContain(partitionValue, "/", String.format("Partition value \"%s\" can not contain a '/' character.", partitionValue));
                }
            }
        }

        // Get business object format model object to directly access schema columns and partitions.
        BusinessObjectFormat businessObjectFormat =
            businessObjectFormatHelper.createBusinessObjectFormatFromEntity(generateDdlRequest.businessObjectFormatEntity);

        // Validate that we have at least one column specified in the business object format schema.
        assertSchemaColumnsNotEmpty(businessObjectFormat, generateDdlRequest.businessObjectFormatEntity);

        if (generateDdlRequest.isPartitioned)
        {
            // Validate that we have at least one partition column specified in the business object format schema.
            Assert.notEmpty(businessObjectFormat.getSchema().getPartitions(), String.format("No schema partitions specified for business object format {%s}.",
                businessObjectFormatHelper.businessObjectFormatEntityAltKeyToString(generateDdlRequest.businessObjectFormatEntity)));

            // Validate that partition column names do not contain '/' characters.
            for (SchemaColumn partitionColumn : businessObjectFormat.getSchema().getPartitions())
            {
                Assert.doesNotContain(partitionColumn.getName(), "/", String
                    .format("Partition column name \"%s\" can not contain a '/' character. Business object format: {%s}", partitionColumn.getName(),
                        businessObjectFormatHelper.businessObjectFormatEntityAltKeyToString(generateDdlRequest.businessObjectFormatEntity)));
            }
        }
        return businessObjectFormat;
    }

    /**
     * Process partitionFilters to retrieve storage unit availability dtos
     *
     * @param generateDdlRequest the generateDdlRequestWrapper object
     *
     * @return the list of storage unit availability dtos
     */
    public List processPartitionFiltersForGenerateDdlPartitions(GenerateDdlRequestWrapper generateDdlRequest)
    {
        // Get the business object format key from the entity.
        BusinessObjectFormatKey businessObjectFormatKey = businessObjectFormatHelper.getBusinessObjectFormatKey(generateDdlRequest.businessObjectFormatEntity);

        // Override the business object format version with the original (optional) value from the request.
        businessObjectFormatKey.setBusinessObjectFormatVersion(generateDdlRequest.businessObjectFormatVersion);

        // Get business object data status entity for the VALID status.
        BusinessObjectDataStatusEntity validBusinessObjectDataStatusEntity =
            businessObjectDataStatusDaoHelper.getBusinessObjectDataStatusEntity(BusinessObjectDataStatusEntity.VALID);

        // Get storage platform entity for S3 storage platform type.
        StoragePlatformEntity s3StoragePlatformEntity = storagePlatformHelper.getStoragePlatformEntity(StoragePlatformEntity.S3);

        // Retrieve a list of storage unit availability DTOs for the specified list of partition filters. The list will be sorted by partition values and
        // storage names. For a non-partitioned table, there should only exist a single business object data entity (with partitionValue equals to "none").
        // We do validate that all specified storage entities are of "S3" storage platform type, so we specify S3 storage platform type in the herdDao call
        // below, so we select storage units only from all S3 storage entities, when the specified list of storage names is empty. We also specify to select
        // only "available" storage units.
        List storageUnitAvailabilityDtos = storageUnitDao
            .getStorageUnitsByPartitionFilters(generateDdlRequest.businessObjectFormatEntity.getBusinessObjectDefinition(),
                businessObjectFormatKey.getBusinessObjectFormatUsage(), generateDdlRequest.businessObjectFormatEntity.getFileType(),
                businessObjectFormatKey.getBusinessObjectFormatVersion(), generateDdlRequest.partitionFilters, generateDdlRequest.businessObjectDataVersion,
                validBusinessObjectDataStatusEntity, generateDdlRequest.requestedStorageEntities, s3StoragePlatformEntity, null, true,
                generateDdlRequest.getAsOfTime());

        // Exclude duplicate business object data per specified list of storage names.
        // If storage names are not specified, the method fails on business object data instances registered with multiple storage.
        storageUnitAvailabilityDtos = excludeDuplicateBusinessObjectData(storageUnitAvailabilityDtos, generateDdlRequest.storageNames);

        // Build a list of matched partition filters. Please note that each request partition
        // filter might result in multiple available business object data entities.
        List> matchedAvailablePartitionFilters = new ArrayList<>();
        List> availablePartitions = new ArrayList<>();
        for (StorageUnitAvailabilityDto storageUnitAvailabilityDto : storageUnitAvailabilityDtos)
        {
            BusinessObjectDataKey businessObjectDataKey = storageUnitAvailabilityDto.getBusinessObjectDataKey();
            matchedAvailablePartitionFilters
                .add(businessObjectDataHelper.getPartitionFilter(businessObjectDataKey, generateDdlRequest.partitionFilters.get(0)));
            availablePartitions.add(businessObjectDataHelper.getPrimaryAndSubPartitionValues(businessObjectDataKey));
        }

        // If request specifies to include all registered sub-partitions, fail if any of "non-available" registered sub-partitions are found.
        if (generateDdlRequest.businessObjectDataVersion == null && BooleanUtils.isTrue(generateDdlRequest.includeAllRegisteredSubPartitions) &&
            !CollectionUtils.isEmpty(matchedAvailablePartitionFilters))
        {
            notAllowNonAvailableRegisteredSubPartitions(generateDdlRequest.businessObjectFormatEntity.getBusinessObjectDefinition(),
                businessObjectFormatKey.getBusinessObjectFormatUsage(), generateDdlRequest.businessObjectFormatEntity.getFileType(),
                businessObjectFormatKey.getBusinessObjectFormatVersion(), matchedAvailablePartitionFilters, availablePartitions,
                generateDdlRequest.storageNames, generateDdlRequest.requestedStorageEntities, s3StoragePlatformEntity);
        }

        // Fail on any missing business object data unless the flag is set to allow missing business object data.
        if (!BooleanUtils.isTrue(generateDdlRequest.allowMissingData))
        {
            // Get a list of unmatched partition filters.
            List> unmatchedPartitionFilters = new ArrayList<>(generateDdlRequest.partitionFilters);
            unmatchedPartitionFilters.removeAll(matchedAvailablePartitionFilters);

            // Throw an exception if we have any unmatched partition filters.
            if (!unmatchedPartitionFilters.isEmpty())
            {
                // Get the first unmatched partition filter and throw exception.
                List unmatchedPartitionFilter = getFirstUnmatchedPartitionFilter(unmatchedPartitionFilters);
                throw new ObjectNotFoundException(String.format(
                    "Business object data {namespace: \"%s\", businessObjectDefinitionName: \"%s\", businessObjectFormatUsage: \"%s\", " +
                        "businessObjectFormatFileType: \"%s\", businessObjectFormatVersion: %d, partitionValue: \"%s\", " +
                        "subpartitionValues: \"%s\", businessObjectDataVersion: %d} is not available in \"%s\" storage(s).",
                    businessObjectFormatKey.getNamespace(), businessObjectFormatKey.getBusinessObjectDefinitionName(),
                    businessObjectFormatKey.getBusinessObjectFormatUsage(), businessObjectFormatKey.getBusinessObjectFormatFileType(),
                    businessObjectFormatKey.getBusinessObjectFormatVersion(), unmatchedPartitionFilter.get(0),
                    StringUtils.join(unmatchedPartitionFilter.subList(1, unmatchedPartitionFilter.size()), ","), generateDdlRequest.businessObjectDataVersion,
                    StringUtils.join(generateDdlRequest.storageNames, ",")));
            }
        }
        return storageUnitAvailabilityDtos;
    }

    /**
     * Adds the relative "alter table add partition" statements for each storage unit entity. Please note that each request partition value might result in
     * multiple available storage unit entities (subpartitions).
     *
     * @param generateDdlRequest the generate ddl request wrapper object
     * @param sb the string builder to be updated with the "alter table add partition" statements
     * @param partitions the list to store business object data partitions
     * @param replacements the hash map of string values to be used to substitute the custom DDL tokens with their actual values
     * @param businessObjectFormatForSchema the business object format to be used for schema
     * @param ifNotExistsOption specifies if generated DDL contains "if not exists" option
     * @param storageUnitAvailabilityDtos the list of storage unit availability DTOs
     */
    public void processStorageUnitsForGenerateDdlPartitions(GenerateDdlRequestWrapper generateDdlRequest, StringBuilder sb, List partitions,
        HashMap replacements, BusinessObjectFormat businessObjectFormatForSchema, String ifNotExistsOption,
        List storageUnitAvailabilityDtos)
    {
        // If flag is not set to suppress scan for unregistered sub-partitions, retrieve all storage
        // file paths for the relative storage units loaded in a multi-valued map for easy access.
        MultiValuedMap storageUnitIdToStorageFilePathsMap =
            BooleanUtils.isTrue(generateDdlRequest.suppressScanForUnregisteredSubPartitions) ? new ArrayListValuedHashMap<>() :
                storageFileDao.getStorageFilePathsByStorageUnitIds(storageUnitHelper.getStorageUnitIds(storageUnitAvailabilityDtos));

        // Crete a map of storage names in upper case to their relative S3 key prefix velocity templates.
        Map s3KeyPrefixVelocityTemplates = new HashMap<>();

        // Crete a map of business object format keys to their relative business object format instances.
        Map businessObjectFormats = new HashMap<>();

        // Get data provider for the business object definition.
        BusinessObjectDefinitionEntity businessObjectDefinitionEntity = businessObjectDefinitionDaoHelper.getBusinessObjectDefinitionEntity(
            new BusinessObjectDefinitionKey(businessObjectFormatForSchema.getNamespace(), businessObjectFormatForSchema.getBusinessObjectDefinitionName()));
        String dataProviderName = businessObjectDefinitionEntity.getDataProvider().getName();

        // Generate the beginning of the alter table statement.
        String alterTableFirstToken = null;
        if (!generateDdlRequest.isGeneratePartitionsRequest)
        {
            alterTableFirstToken = String.format("ALTER TABLE `%s` ADD %s", generateDdlRequest.tableName, ifNotExistsOption).trim();
        }

        // Process all available business object data instances.
        List addPartitionStatements = new ArrayList<>();
        for (StorageUnitAvailabilityDto storageUnitAvailabilityDto : storageUnitAvailabilityDtos)
        {
            // Get storage name in upper case for this storage unit.
            String upperCaseStorageName = storageUnitAvailabilityDto.getStorageName().toUpperCase();

            // Get storage entity for this storage unit.
            StorageEntity storageEntity = getStorageEntity(upperCaseStorageName, generateDdlRequest.cachedStorageEntities);

            // Get business object data key for this business object data.
            BusinessObjectDataKey businessObjectDataKey = storageUnitAvailabilityDto.getBusinessObjectDataKey();

            // Get business object format key for this business object data.
            BusinessObjectFormatKey businessObjectFormatKey = businessObjectFormatHelper.getBusinessObjectFormatKey(businessObjectDataKey);

            // Retrieve s3 key prefix velocity template for this storage.
            String s3KeyPrefixVelocityTemplate = getS3KeyPrefixVelocityTemplate(upperCaseStorageName, storageEntity, s3KeyPrefixVelocityTemplates);

            // Retrieve business object format for this business object data.
            BusinessObjectFormat businessObjectFormat = getBusinessObjectFormat(businessObjectFormatKey, businessObjectFormats);

            // Build the expected S3 key prefix for this storage unit.
            String s3KeyPrefix = s3KeyPrefixHelper.buildS3KeyPrefix(s3KeyPrefixVelocityTemplate, dataProviderName, businessObjectFormat, businessObjectDataKey,
                storageUnitAvailabilityDto.getStorageName());

            // If flag is set to suppress scan for unregistered sub-partitions, use the directory path or the S3 key prefix
            // as the partition's location, otherwise, use storage files to discover all unregistered sub-partitions.
            Collection storageFilePaths = new ArrayList<>();
            if (BooleanUtils.isTrue(generateDdlRequest.suppressScanForUnregisteredSubPartitions))
            {
                // Validate the directory path value if it is present.
                if (StringUtils.isNotBlank(storageUnitAvailabilityDto.getStorageUnitDirectoryPath()))
                {
                    Assert.isTrue(storageUnitAvailabilityDto.getStorageUnitDirectoryPath().equals(s3KeyPrefix), String.format(
                        "Storage directory path \"%s\" registered with business object data {%s} " +
                            "in \"%s\" storage does not match the expected S3 key prefix \"%s\".", storageUnitAvailabilityDto.getStorageUnitDirectoryPath(),
                        businessObjectDataHelper.businessObjectDataKeyToString(businessObjectDataKey), storageUnitAvailabilityDto.getStorageName(),
                        s3KeyPrefix));
                }

                // Add the S3 key prefix to the list of storage files.
                // We add a trailing '/' character to the prefix, since it represents a directory.
                storageFilePaths.add(StringUtils.appendIfMissing(s3KeyPrefix, "/"));
            }
            else
            {
                // Retrieve storage file paths registered with this business object data in the specified storage.
                storageFilePaths = storageUnitIdToStorageFilePathsMap.containsKey(storageUnitAvailabilityDto.getStorageUnitId()) ?
                    storageUnitIdToStorageFilePathsMap.get(storageUnitAvailabilityDto.getStorageUnitId()) : new ArrayList<>();

                // Validate storage file paths registered with this business object data in the specified storage.
                // The validation check below is required even if we have no storage files registered.
                storageFileHelper.validateStorageFilePaths(storageFilePaths, s3KeyPrefix, businessObjectDataKey, storageUnitAvailabilityDto.getStorageName());

                // If there are no storage files registered for this storage unit, we should use the storage directory path value.
                if (storageFilePaths.isEmpty())
                {
                    // Validate that directory path value is present and it matches the S3 key prefix.
                    Assert.isTrue(storageUnitAvailabilityDto.getStorageUnitDirectoryPath() != null &&
                        storageUnitAvailabilityDto.getStorageUnitDirectoryPath().startsWith(s3KeyPrefix), String.format(
                        "Storage directory path \"%s\" registered with business object data {%s} " +
                            "in \"%s\" storage does not match the expected S3 key prefix \"%s\".", storageUnitAvailabilityDto.getStorageUnitDirectoryPath(),
                        businessObjectDataHelper.businessObjectDataKeyToString(businessObjectDataKey), storageUnitAvailabilityDto.getStorageName(),
                        s3KeyPrefix));
                    // Add storage directory path the empty storage files list.
                    // We add a trailing '/' character to the path, since it represents a directory.
                    storageFilePaths.add(storageUnitAvailabilityDto.getStorageUnitDirectoryPath() + "/");
                }
            }

            // Retrieve the s3 bucket name.
            String s3BucketName = getS3BucketName(upperCaseStorageName, storageEntity, generateDdlRequest.cachedS3BucketNames);

            // For partitioned table, add the relative partitions to the generated DDL.
            if (generateDdlRequest.isPartitioned)
            {
                // If flag is set to suppress scan for unregistered sub-partitions, validate that the number of primary and sub-partition values specified for
                // the business object data equals to the number of partition columns defined in schema for the format selected for DDL/Partitions generation.
                if (BooleanUtils.isTrue(generateDdlRequest.suppressScanForUnregisteredSubPartitions))
                {
                    int businessObjectDataRegisteredPartitions = 1 + CollectionUtils.size(businessObjectDataKey.getSubPartitionValues());
                    Assert.isTrue(businessObjectFormatForSchema.getSchema().getPartitions().size() == businessObjectDataRegisteredPartitions, String.format(
                        "Number of primary and sub-partition values (%d) specified for the business object data is not equal to the number of partition " +
                            "columns (%d) defined in the schema of the business object format selected for DDL/Partitions generation. " +
                            "Business object data: {%s},  business object format: {%s}", businessObjectDataRegisteredPartitions,
                        businessObjectFormatForSchema.getSchema().getPartitions().size(),
                        businessObjectDataHelper.businessObjectDataKeyToString(businessObjectDataKey), businessObjectFormatHelper
                            .businessObjectFormatKeyToString(businessObjectFormatHelper.getBusinessObjectFormatKey(businessObjectFormatForSchema))));
                }
                // Otherwise, since the format version selected for DDL/Partitions generation might not match the relative business object format version that
                // business bject data is registered against, validate that the number of sub-partition values specified for the business object data is less
                // than the number of partition columns defined in schema for the format selected for DDL/Partitions generation.
                else
                {
                    Assert.isTrue(
                        businessObjectFormatForSchema.getSchema().getPartitions().size() > CollectionUtils.size(businessObjectDataKey.getSubPartitionValues()),
                        String.format("Number of subpartition values specified for the business object data is greater than or equal to " +
                                "the number of partition columns defined in the schema of the business object format selected for DDL/Partitions generation. " +
                                "Business object data: {%s},  business object format: {%s}",
                            businessObjectDataHelper.businessObjectDataKeyToString(businessObjectDataKey), businessObjectFormatHelper
                                .businessObjectFormatKeyToString(businessObjectFormatHelper.getBusinessObjectFormatKey(businessObjectFormatForSchema))));
                }

                // Get partition information. For multiple level partitioning, auto-discover subpartitions (subdirectories) not already included into the S3 key
                // prefix. Each discovered partition requires a standalone "add partition" clause. Please note that due to the above validation check, there
                // should be no auto discoverable sub-partition columns, when flag is set to suppress scan for unregistered sub-partitions.
                List autoDiscoverableSubPartitionColumns = businessObjectFormatForSchema.getSchema().getPartitions()
                    .subList(1 + CollectionUtils.size(businessObjectDataKey.getSubPartitionValues()),
                        businessObjectFormatForSchema.getSchema().getPartitions().size());

                // Get and process Hive partitions.
                for (HivePartitionDto hivePartition : getHivePartitions(businessObjectDataKey, autoDiscoverableSubPartitionColumns, s3KeyPrefix,
                    storageFilePaths, storageUnitAvailabilityDto.getStorageName()))
                {
                    if (!generateDdlRequest.isGeneratePartitionsRequest)
                    {
                        // Build an add partition statement for this hive partition.
                        StringBuilder addPartitionStatement = new StringBuilder();
                        addPartitionStatement.append(String.format("%s PARTITION (",
                            BooleanUtils.isTrue(generateDdlRequest.combineMultiplePartitionsInSingleAlterTable) ? "   " : alterTableFirstToken));
                        // Specify all partition column values.
                        List partitionKeyValuePairs = new ArrayList<>();
                        for (int i = 0; i < businessObjectFormatForSchema.getSchema().getPartitions().size(); i++)
                        {
                            String partitionColumnName = businessObjectFormatForSchema.getSchema().getPartitions().get(i).getName();
                            String partitionValue = hivePartition.getPartitionValues().get(i);
                            partitionKeyValuePairs.add(String.format("`%s`='%s'", partitionColumnName, partitionValue));
                        }
                        addPartitionStatement.append(StringUtils.join(partitionKeyValuePairs, ", "));
                        addPartitionStatement.append(String.format(") LOCATION 's3n://%s/%s%s'", s3BucketName, s3KeyPrefix,
                            StringUtils.isNotBlank(hivePartition.getPath()) ? hivePartition.getPath() : ""));

                        // Add this add partition statement to the list.
                        addPartitionStatements.add(addPartitionStatement.toString());
                    }
                    else
                    {
                        // Specify all partition column values.
                        Partition partition = new Partition();
                        List partitionColumns = new ArrayList<>();
                        partition.setPartitionColumns(partitionColumns);
                        for (int i = 0; i < businessObjectFormatForSchema.getSchema().getPartitions().size(); i++)
                        {
                            String partitionColumnName = businessObjectFormatForSchema.getSchema().getPartitions().get(i).getName();
                            String partitionValue = hivePartition.getPartitionValues().get(i);
                            partitionColumns.add(new PartitionColumn(partitionColumnName, partitionValue));
                        }
                        partition.setPartitionLocation(String
                            .format("%s/%s%s", s3BucketName, s3KeyPrefix, StringUtils.isNotBlank(hivePartition.getPath()) ? hivePartition.getPath() : ""));
                        partitions.add(partition);
                    }
                }
            }
            else // This is a non-partitioned table.
            {
                // Get location for this non-partitioned table.
                String tableLocation = String.format("s3n://%s/%s", s3BucketName, s3KeyPrefix);

                if (generateDdlRequest.customDdlEntity == null)
                {
                    // Since custom DDL was not specified and this table is not partitioned, add a LOCATION clause.
                    // This is the last line in the non-partitioned table DDL.
                    sb.append(String.format("LOCATION '%s';", tableLocation));
                }
                else
                {
                    // Since custom DDL was used for a non-partitioned table, substitute the relative custom DDL token with the actual table location.
                    replacements.put(NON_PARTITIONED_TABLE_LOCATION_CUSTOM_DDL_TOKEN, tableLocation);
                }
            }
        }

        // Add all add partition statements to the main string builder.
        if (CollectionUtils.isNotEmpty(addPartitionStatements))
        {
            // If specified, combine adding multiple partitions in a single ALTER TABLE statement.
            if (BooleanUtils.isTrue(generateDdlRequest.combineMultiplePartitionsInSingleAlterTable))
            {
                sb.append(alterTableFirstToken).append('\n');
            }

            sb.append(
                StringUtils.join(addPartitionStatements, BooleanUtils.isTrue(generateDdlRequest.combineMultiplePartitionsInSingleAlterTable) ? ",\n" : ";\n"))
                .append(";\n");
        }
    }

    /**
     * Asserts that there exists at least one column specified in the business object format schema.
     *
     * @param businessObjectFormat The {@link BusinessObjectFormat} containing schema columns.
     * @param businessObjectFormatEntity The entity used to generate the error message.
     */

    private void assertSchemaColumnsNotEmpty(BusinessObjectFormat businessObjectFormat, BusinessObjectFormatEntity businessObjectFormatEntity)
    {
        Assert.notEmpty(businessObjectFormat.getSchema().getColumns(), String.format("No schema columns specified for business object format {%s}.",
            businessObjectFormatHelper.businessObjectFormatEntityAltKeyToString(businessObjectFormatEntity)));
    }

    /**
     * Eliminate storage units that belong to the same business object data by picking storage unit registered in a storage listed earlier in the list of
     * storage names specified in the request. If storage names are not specified, simply fail on business object data instances registered with multiple
     * storage.
     *
     * @param storageUnitAvailabilityDtos the list of storage unit availability DTOs
     * @param storageNames the list of storage names
     *
     * @return the updated list of storage unit availability DTOs
     * @throws IllegalArgumentException on business object data being registered in multiple storage and storage names are not specified to resolve this
     */
    protected List excludeDuplicateBusinessObjectData(List storageUnitAvailabilityDtos,
        List storageNames) throws IllegalArgumentException
    {
        // Convert the list of storage names to upper case.
        List upperCaseStorageNames = new ArrayList(storageNames);
        upperCaseStorageNames.replaceAll(String::toUpperCase);

        // If storage names are not specified, fail on business object data instance registered with multiple storage.
        // Otherwise, in a case when the same business object data is registered with multiple storage,
        // pick storage unit registered in a storage listed earlier in the list of storage names specified in the request.
        Map businessObjectDataToStorageUnitMap = new LinkedHashMap<>();
        for (StorageUnitAvailabilityDto storageUnitAvailabilityDto : storageUnitAvailabilityDtos)
        {
            BusinessObjectDataKey businessObjectDataKey = storageUnitAvailabilityDto.getBusinessObjectDataKey();

            if (businessObjectDataToStorageUnitMap.containsKey(businessObjectDataKey))
            {
                // Duplicate business object data is found, so check if storage names are specified.
                if (CollectionUtils.isEmpty(upperCaseStorageNames))
                {
                    // Fail on business object data registered in multiple storage.
                    throw new IllegalArgumentException(String.format("Found business object data registered in more than one storage. " +
                            "Please specify storage(s) in the request to resolve this. Business object data {%s}",
                        businessObjectDataHelper.businessObjectDataKeyToString(businessObjectDataKey)));
                }
                else
                {
                    // Replace the storage unit entity if it belongs to a "higher priority" storage.
                    String currentUpperCaseStorageName = businessObjectDataToStorageUnitMap.get(businessObjectDataKey).getStorageName().toUpperCase();
                    int currentStorageIndex = upperCaseStorageNames.indexOf(currentUpperCaseStorageName);
                    int newStorageIndex = upperCaseStorageNames.indexOf(storageUnitAvailabilityDto.getStorageName().toUpperCase());
                    if (newStorageIndex < currentStorageIndex)
                    {
                        businessObjectDataToStorageUnitMap.put(businessObjectDataKey, storageUnitAvailabilityDto);
                    }
                }
            }
            else
            {
                businessObjectDataToStorageUnitMap.put(businessObjectDataKey, storageUnitAvailabilityDto);
            }
        }

        return new ArrayList<>(businessObjectDataToStorageUnitMap.values());
    }

    /**
     * Searches for and fails on any of "non-available" registered sub-partitions as per list of "matched" partition filters.
     *
     * @param businessObjectDefinitionEntity the business object definition entity
     * @param businessObjectFormatUsage the business object format usage (case-insensitive)
     * @param fileTypeEntity the file type entity
     * @param businessObjectFormatVersion the optional business object format version. If a business object format version isn't specified, the latest
     * available format version for each partition value will be used
     * @param matchedAvailablePartitionFilters the list of "matched" partition filters
     * @param availablePartitions the list of already discovered "available" partitions, where each partition consists of primary and optional sub-partition
     * values
     * @param storageNames the list of storage names
     * @param storageEntities the list of storage entities
     * @param s3StoragePlatformEntity the S3 storage platform entity
     */
    protected void notAllowNonAvailableRegisteredSubPartitions(BusinessObjectDefinitionEntity businessObjectDefinitionEntity, String businessObjectFormatUsage,
        FileTypeEntity fileTypeEntity, Integer businessObjectFormatVersion, List> matchedAvailablePartitionFilters,
        List> availablePartitions, List storageNames, List storageEntities, StoragePlatformEntity s3StoragePlatformEntity)
    {
        // Query all matched partition filters to discover any non-available registered sub-partitions. Retrieve latest business object data per list of
        // matched filters regardless of business object data and/or storage unit statuses. This is done to discover all registered sub-partitions regardless
        // of business object data or storage unit statuses. We do validate that all specified storage entities are of "S3" storage platform type, so we
        // specify S3 storage platform in the herdDao call below to select storage units only from S3 storage (when the list of storage names is empty).
        // We want to select any existing storage units regardless of their status, so we pass "false" for selectOnlyAvailableStorageUnits parameter.
        List matchedNotAvailableStorageUnitAvailabilityDtos = storageUnitDao
            .getStorageUnitsByPartitionFilters(businessObjectDefinitionEntity, businessObjectFormatUsage, fileTypeEntity, businessObjectFormatVersion,
                matchedAvailablePartitionFilters, null, null, storageEntities, s3StoragePlatformEntity, null, false, null);

        // Exclude all storage units with business object data having "DELETED" status.
        matchedNotAvailableStorageUnitAvailabilityDtos =
            storageUnitHelper.excludeBusinessObjectDataStatus(matchedNotAvailableStorageUnitAvailabilityDtos, BusinessObjectDataStatusEntity.DELETED);

        // Exclude all already discovered "available" partitions. Please note that, since we got here, the list of matched partitions can not be empty.
        matchedNotAvailableStorageUnitAvailabilityDtos =
            storageUnitHelper.excludePartitions(matchedNotAvailableStorageUnitAvailabilityDtos, availablePartitions);

        // Fail on any "non-available" registered sub-partitions.
        if (!CollectionUtils.isEmpty(matchedNotAvailableStorageUnitAvailabilityDtos))
        {
            // Get the business object data key for the first "non-available" registered sub-partition.
            BusinessObjectDataKey businessObjectDataKey = matchedNotAvailableStorageUnitAvailabilityDtos.get(0).getBusinessObjectDataKey();

            // Throw an exception.
            throw new ObjectNotFoundException(String.format(
                "Business object data {namespace: \"%s\", businessObjectDefinitionName: \"%s\", businessObjectFormatUsage: \"%s\", " +
                    "businessObjectFormatFileType: \"%s\", businessObjectFormatVersion: %d, partitionValue: \"%s\", " +
                    "subpartitionValues: \"%s\", businessObjectDataVersion: %d} is not available in \"%s\" storage(s).",
                businessObjectDefinitionEntity.getNamespace().getCode(), businessObjectDefinitionEntity.getName(), businessObjectFormatUsage,
                fileTypeEntity.getCode(), businessObjectFormatVersion, businessObjectDataKey.getPartitionValue(),
                StringUtils.join(businessObjectDataKey.getSubPartitionValues(), ","), businessObjectDataKey.getBusinessObjectDataVersion(),
                StringUtils.join(storageNames, ",")));
        }
    }

    /**
     * Gets a first unmatched partition filter from the list of unmatched filters.
     *
     * @param unmatchedPartitionFilters the list of unmatchedPartitionFilters
     *
     * @return the first unmatched partition filter
     */
    private List getFirstUnmatchedPartitionFilter(List> unmatchedPartitionFilters)
    {
        // Get the first unmatched partition filter from the list.
        List unmatchedPartitionFilter = unmatchedPartitionFilters.get(0);

        // Replace all null partition values with an empty string.
        for (int i = 0; i < unmatchedPartitionFilter.size(); i++)
        {
            if (unmatchedPartitionFilter.get(i) == null)
            {
                unmatchedPartitionFilter.set(i, "");
            }
        }

        return unmatchedPartitionFilter;
    }

    /**
     * Gets a storage entity per specified storage name. The method memorizes the responses for performance reasons.
     *
     * @param upperCaseStorageName the storage name in upper case
     * @param storageEntities the map of storage names in upper case to their relative storage entities
     *
     * @return the storage entity
     */
    private StorageEntity getStorageEntity(String upperCaseStorageName, Map storageEntities)
    {
        StorageEntity storageEntity;

        // If storage entity was already retrieved, use it. Otherwise, retrieve and store it in the map.
        if (storageEntities.containsKey(upperCaseStorageName))
        {
            storageEntity = storageEntities.get(upperCaseStorageName);
        }
        else
        {
            storageEntity = storageDaoHelper.getStorageEntity(upperCaseStorageName);
            storageEntities.put(upperCaseStorageName, storageEntity);
        }

        return storageEntity;
    }

    /**
     * Gets S3 key prefix velocity template for the specified storage entity. The method memorizes the responses for performance reasons.
     *
     * @param upperCaseStorageName the storage name in upper case
     * @param storageEntity the storage entity
     * @param s3KeyPrefixVelocityTemplates the map of storage names in upper case to their relative S3 key prefix velocity templates
     *
     * @return the S3 key prefix velocity template
     */
    private String getS3KeyPrefixVelocityTemplate(String upperCaseStorageName, StorageEntity storageEntity, Map s3KeyPrefixVelocityTemplates)
    {
        String s3KeyPrefixVelocityTemplate;

        // If S3 key prefix velocity template was already retrieved for this storage, use it.
        if (s3KeyPrefixVelocityTemplates.containsKey(upperCaseStorageName))
        {
            s3KeyPrefixVelocityTemplate = s3KeyPrefixVelocityTemplates.get(upperCaseStorageName);
        }
        // Otherwise, retrieve the S3 3 key prefix velocity template attribute value and store it in memory.
        else
        {
            // Retrieve S3 key prefix velocity template storage attribute value and store it in memory.
            s3KeyPrefixVelocityTemplate = storageHelper
                .getStorageAttributeValueByName(configurationHelper.getProperty(ConfigurationValue.S3_ATTRIBUTE_NAME_KEY_PREFIX_VELOCITY_TEMPLATE),
                    storageEntity, false);

            // Validate that S3 key prefix velocity template is configured.
            Assert.isTrue(StringUtils.isNotBlank(s3KeyPrefixVelocityTemplate),
                String.format("Storage \"%s\" has no S3 key prefix velocity template configured.", storageEntity.getName()));

            // Store the retrieved value in memory.
            s3KeyPrefixVelocityTemplates.put(upperCaseStorageName, s3KeyPrefixVelocityTemplate);
        }

        return s3KeyPrefixVelocityTemplate;
    }

    /**
     * Gets a business object format for the specified business  object format key. The method memorizes the responses for performance reasons.
     *
     * @param businessObjectFormatKey the business object format key
     * @param businessObjectFormats the map of business object keys to their relative business object format instances
     *
     * @return the business object format
     */
    private BusinessObjectFormat getBusinessObjectFormat(BusinessObjectFormatKey businessObjectFormatKey,
        Map businessObjectFormats)
    {
        BusinessObjectFormat businessObjectFormat;

        // If business object format was already retrieved, use it. Otherwise, retrieve and store it in the map.
        if (businessObjectFormats.containsKey(businessObjectFormatKey))
        {
            businessObjectFormat = businessObjectFormats.get(businessObjectFormatKey);
        }
        else
        {
            BusinessObjectFormatEntity businessObjectFormatEntity = businessObjectFormatDaoHelper.getBusinessObjectFormatEntity(businessObjectFormatKey);
            businessObjectFormat = businessObjectFormatHelper.createBusinessObjectFormatFromEntity(businessObjectFormatEntity);
            businessObjectFormats.put(businessObjectFormatKey, businessObjectFormat);
        }

        return businessObjectFormat;
    }

    /**
     * Gets an S3 bucket name for the specified storage entity. The method memorizes the responses for performance reasons.
     *
     * @param upperCaseStorageName the storage name in upper case
     * @param storageEntity the storage entity
     * @param s3BucketNames the map of storage names in upper case to their relative S3 bucket names
     *
     * @return the S3 bucket name
     */
    private String getS3BucketName(String upperCaseStorageName, StorageEntity storageEntity, Map s3BucketNames)
    {
        String s3BucketName;

        // If bucket name was already retrieved for this storage, use it.
        if (s3BucketNames.containsKey(upperCaseStorageName))
        {
            s3BucketName = s3BucketNames.get(upperCaseStorageName);
        }
        // Otherwise, retrieve the S3 bucket name attribute value and store it in memory. Please note that it is required, so we pass in a "true" flag.
        else
        {
            s3BucketName = storageHelper
                .getStorageAttributeValueByName(configurationHelper.getProperty(ConfigurationValue.S3_ATTRIBUTE_NAME_BUCKET_NAME), storageEntity, true);
            s3BucketNames.put(upperCaseStorageName, s3BucketName);
        }

        return s3BucketName;
    }

    /**
     * Gets a list of Hive partitions. For single level partitioning, no auto-discovery of sub-partitions (sub-directories) is needed - the business object
     * data will be represented by a single Hive partition instance. For multiple level partitioning, this method performs an auto-discovery of all
     * sub-partitions (sub-directories) and creates a Hive partition object instance for each partition.
     *
     * @param businessObjectDataKey the business object data key
     * @param autoDiscoverableSubPartitionColumns the auto-discoverable sub-partition columns
     * @param s3KeyPrefix the S3 key prefix
     * @param storageFiles the storage files
     * @param storageName the storage name
     *
     * @return the list of Hive partitions
     */
    public List getHivePartitions(BusinessObjectDataKey businessObjectDataKey, List autoDiscoverableSubPartitionColumns,
        String s3KeyPrefix, Collection storageFiles, String storageName)
    {
        // We are using linked hash map to preserve the order of the discovered partitions.
        Map, HivePartitionDto> linkedHashMap = new LinkedHashMap<>();

        Pattern pattern = getHivePathPattern(autoDiscoverableSubPartitionColumns);
        for (String storageFile : storageFiles)
        {
            // Remove S3 key prefix from the file path. Please note that the storage files are already validated to start with S3 key prefix.
            String relativeFilePath = storageFile.substring(s3KeyPrefix.length());

            // Try to match the relative file path to the expected subpartition folders.
            Matcher matcher = pattern.matcher(relativeFilePath);
            Assert.isTrue(matcher.matches(), String.format("Registered storage file or directory does not match the expected Hive sub-directory pattern. " +
                    "Storage: {%s}, file/directory: {%s}, business object data: {%s}, S3 key prefix: {%s}, pattern: {%s}", storageName, storageFile,
                businessObjectDataHelper.businessObjectDataKeyToString(businessObjectDataKey), s3KeyPrefix, pattern.pattern()));

            // Create a list of partition values.
            List partitionValues = new ArrayList<>();

            // Add partition values per business object data key.
            partitionValues.add(businessObjectDataKey.getPartitionValue());
            partitionValues.addAll(businessObjectDataKey.getSubPartitionValues());

            // Extract relative partition values.
            for (int i = 1; i <= matcher.groupCount() - 1; i++)
            {
                partitionValues.add(matcher.group(i));
            }

            // Get the matched trailing folder markers and an optional file name.
            String partitionPathTrailingPart = matcher.group(matcher.groupCount());

            // If we did not match all expected partition values along with the trailing folder markers and an optional file name, then this storage file
            // path is not part of a fully qualified partition (this is an intermediate folder marker) and we ignore it.
            if (!partitionValues.contains(null))
            {
                // Get path for this partition by removing trailing "/" followed by an optional file name or "_$folder$" which represents an empty folder in S3.
                String partitionPath = relativeFilePath.substring(0, relativeFilePath.length() - StringUtils.length(partitionPathTrailingPart));

                // Check if we already have that partition discovered - that would happen if partition contains multiple data files.
                HivePartitionDto hivePartition = linkedHashMap.get(partitionValues);

                if (hivePartition != null)
                {
                    // Partition is already discovered, so just validate that the relative paths match.
                    Assert.isTrue(hivePartition.getPath().equals(partitionPath), String.format(
                        "Found two different locations for the same Hive partition. Storage: {%s}, business object data: {%s}, " +
                            "S3 key prefix: {%s}, path[1]: {%s}, path[2]: {%s}", storageName,
                        businessObjectDataHelper.businessObjectDataKeyToString(businessObjectDataKey), s3KeyPrefix, hivePartition.getPath(), partitionPath));
                }
                else
                {
                    // Add this partition to the hash map of discovered partitions.
                    linkedHashMap.put(partitionValues, new HivePartitionDto(partitionPath, partitionValues));
                }
            }
        }

        List hivePartitions = new ArrayList<>();
        hivePartitions.addAll(linkedHashMap.values());

        return hivePartitions;
    }

    /**
     * Gets a pattern to match Hive partition sub-directories.
     *
     * @param partitionColumns the list of partition columns
     *
     * @return the newly created pattern to match Hive partition sub-directories.
     */
    private Pattern getHivePathPattern(List partitionColumns)
    {
        return Pattern.compile(getHivePathRegex(partitionColumns));
    }

    /**
     * Gets a regex to match Hive partition sub-directories.
     *
     * @param partitionColumns the list of partition columns
     *
     * @return the newly created regex to match Hive partition sub-directories.
     */
    private String getHivePathRegex(List partitionColumns)
    {
        StringBuilder sb = new StringBuilder(26);

        sb.append("^(?:");      // Start a non-capturing group for the entire regex.

        // For each partition column, add a regular expression to match "=" sub-directory.
        for (SchemaColumn partitionColumn : partitionColumns)
        {
            sb.append("(?:");   // Start a non-capturing group for the remainder of the regex.
            sb.append("(?:");   // Start a non-capturing group for folder markers.
            sb.append("\\/");   // Add a trailing "/".
            sb.append('|');     // Ann an OR.
            sb.append(REGEX_S3_EMPTY_PARTITION);    // Add a trailing "_$folder$", which represents an empty partition in S3.
            sb.append(')');     // Close the non-capturing group for folder markers.
            sb.append('|');     // Add an OR.
            sb.append("(?:");   // Start a non-capturing group for "/=".
            sb.append("\\/");   // Add a "/".
            // We are using a non-capturing group for the partition column names here - this is done by adding "?:" to the beginning of a capture group.
            sb.append("(?:");   // Start a non-capturing group for column name.
            sb.append("(?i)");  // Match partition column names case insensitive.
            sb.append(Matcher.quoteReplacement(partitionColumn.getName()));
            sb.append('|');     // Add an OR.
            // For sub-partition folder, we do support partition column names having all underscores replaced with hyphens.
            sb.append(Matcher.quoteReplacement(partitionColumn.getName().replace("_", "-")));
            sb.append(')');     // Close the non-capturing group for column name.
            sb.append("=([^/]+)"); // Add a capturing group for a column value.
        }

        // Add additional regular expression for the trailing empty folder marker and/or "/" followed by an optional file name.
        sb.append("(");     // Start a capturing group for folder markers and an optional file name.
        sb.append("\\/");   // Add a trailing "/".
        sb.append("[^/]*"); // Add an optional file name.
        sb.append('|');     // Add an OR.
        sb.append(REGEX_S3_EMPTY_PARTITION);    // Add a trailing "_$folder$", which represents an empty partition in S3.
        sb.append(")");     // Close the capturing group for folder markers and an optional file name.

        // Close all non-capturing groups that are still open.
        for (int i = 0; i < 2 * partitionColumns.size(); i++)
        {
            sb.append(')');
        }

        sb.append(')');     // Close the non-capturing group for the entire regex.
        sb.append('$');

        return sb.toString();
    }

    static class GenerateDdlRequestWrapper
    {
        private Boolean isGeneratePartitionsRequest;

        private Boolean allowMissingData;

        private Integer businessObjectDataVersion;

        private BusinessObjectFormatEntity businessObjectFormatEntity;

        private Integer businessObjectFormatVersion;

        private CustomDdlEntity customDdlEntity;

        private Boolean includeAllRegisteredSubPartitions;

        private Boolean includeDropPartitions;

        private Boolean includeDropTableStatement;

        private Boolean includeIfNotExistsOption;

        private Boolean isPartitioned;

        private List> partitionFilters;

        private Map cachedS3BucketNames;

        private Map cachedStorageEntities;

        private List storageNames;

        private List requestedStorageEntities;

        private Boolean suppressScanForUnregisteredSubPartitions;

        private Boolean combineMultiplePartitionsInSingleAlterTable;

        private String tableName;

        private XMLGregorianCalendar asOfTime;

        public Boolean getGeneratePartitionsRequest()
        {
            return isGeneratePartitionsRequest;
        }

        public void setGeneratePartitionsRequest(Boolean generatePartitionsRequest)
        {
            isGeneratePartitionsRequest = generatePartitionsRequest;
        }

        public Boolean getAllowMissingData()
        {
            return allowMissingData;
        }

        public void setAllowMissingData(Boolean allowMissingData)
        {
            this.allowMissingData = allowMissingData;
        }

        public Integer getBusinessObjectDataVersion()
        {
            return businessObjectDataVersion;
        }

        public void setBusinessObjectDataVersion(Integer businessObjectDataVersion)
        {
            this.businessObjectDataVersion = businessObjectDataVersion;
        }

        public BusinessObjectFormatEntity getBusinessObjectFormatEntity()
        {
            return businessObjectFormatEntity;
        }

        public void setBusinessObjectFormatEntity(BusinessObjectFormatEntity businessObjectFormatEntity)
        {
            this.businessObjectFormatEntity = businessObjectFormatEntity;
        }

        public Integer getBusinessObjectFormatVersion()
        {
            return businessObjectFormatVersion;
        }

        public void setBusinessObjectFormatVersion(Integer businessObjectFormatVersion)
        {
            this.businessObjectFormatVersion = businessObjectFormatVersion;
        }

        public CustomDdlEntity getCustomDdlEntity()
        {
            return customDdlEntity;
        }

        public void setCustomDdlEntity(CustomDdlEntity customDdlEntity)
        {
            this.customDdlEntity = customDdlEntity;
        }

        public Boolean getIncludeAllRegisteredSubPartitions()
        {
            return includeAllRegisteredSubPartitions;
        }

        public void setIncludeAllRegisteredSubPartitions(Boolean includeAllRegisteredSubPartitions)
        {
            this.includeAllRegisteredSubPartitions = includeAllRegisteredSubPartitions;
        }

        public Boolean getIncludeDropPartitions()
        {
            return includeDropPartitions;
        }

        public void setIncludeDropPartitions(Boolean includeDropPartitions)
        {
            this.includeDropPartitions = includeDropPartitions;
        }

        public Boolean getIncludeDropTableStatement()
        {
            return includeDropTableStatement;
        }

        public void setIncludeDropTableStatement(Boolean includeDropTableStatement)
        {
            this.includeDropTableStatement = includeDropTableStatement;
        }

        public Boolean getIncludeIfNotExistsOption()
        {
            return includeIfNotExistsOption;
        }

        public void setIncludeIfNotExistsOption(Boolean includeIfNotExistsOption)
        {
            this.includeIfNotExistsOption = includeIfNotExistsOption;
        }

        public Boolean getPartitioned()
        {
            return isPartitioned;
        }

        public void setPartitioned(Boolean partitioned)
        {
            isPartitioned = partitioned;
        }

        public List> getPartitionFilters()
        {
            return partitionFilters;
        }

        public void setPartitionFilters(List> partitionFilters)
        {
            this.partitionFilters = partitionFilters;
        }

        public Map getCachedS3BucketNames()
        {
            return cachedS3BucketNames;
        }

        public void setCachedS3BucketNames(Map cachedS3BucketNames)
        {
            this.cachedS3BucketNames = cachedS3BucketNames;
        }

        public Map getCachedStorageEntities()
        {
            return cachedStorageEntities;
        }

        public void setCachedStorageEntities(Map cachedStorageEntities)
        {
            this.cachedStorageEntities = cachedStorageEntities;
        }

        public List getStorageNames()
        {
            return storageNames;
        }

        public void setStorageNames(List storageNames)
        {
            this.storageNames = storageNames;
        }

        public List getRequestedStorageEntities()
        {
            return requestedStorageEntities;
        }

        public void setRequestedStorageEntities(List requestedStorageEntities)
        {
            this.requestedStorageEntities = requestedStorageEntities;
        }

        public Boolean getSuppressScanForUnregisteredSubPartitions()
        {
            return suppressScanForUnregisteredSubPartitions;
        }

        public void setSuppressScanForUnregisteredSubPartitions(Boolean suppressScanForUnregisteredSubPartitions)
        {
            this.suppressScanForUnregisteredSubPartitions = suppressScanForUnregisteredSubPartitions;
        }

        public Boolean getCombineMultiplePartitionsInSingleAlterTable()
        {
            return combineMultiplePartitionsInSingleAlterTable;
        }

        public void setCombineMultiplePartitionsInSingleAlterTable(Boolean combineMultiplePartitionsInSingleAlterTable)
        {
            this.combineMultiplePartitionsInSingleAlterTable = combineMultiplePartitionsInSingleAlterTable;
        }

        public String getTableName()
        {
            return tableName;
        }

        public void setTableName(String tableName)
        {
            this.tableName = tableName;
        }

        public XMLGregorianCalendar getAsOfTime()
        {
            return asOfTime;
        }

        public void setAsOfTime(XMLGregorianCalendar asOfTime)
        {
            this.asOfTime = asOfTime;
        }
    }

    GenerateDdlRequestWrapper getGenerateDdlRequestWrapperInstance()
    {
        return new GenerateDdlRequestWrapper();
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy