All Downloads are FREE. Search and download functionalities are using the official Maven repository.

META-INF.smithy.glue.smithy Maven / Gradle / Ivy

The newest version!
$version: "2.0"

metadata suppressions = [
    {
        id: "HttpMethodSemantics"
        namespace: "*"
    }
    {
        id: "HttpResponseCodeSemantics"
        namespace: "*"
    }
    {
        id: "PaginatedTrait"
        namespace: "*"
    }
    {
        id: "HttpHeaderTrait"
        namespace: "*"
    }
    {
        id: "HttpUriConflict"
        namespace: "*"
    }
    {
        id: "Service"
        namespace: "*"
    }
]

namespace com.amazonaws.glue

use aws.api#service
use aws.auth#sigv4
use aws.protocols#awsJson1_1

/// Glue
///          

Defines the public endpoint for the Glue service.

@service( sdkId: "Glue" arnNamespace: "glue" cloudFormationName: "Glue" cloudTrailEventSource: "glue.amazonaws.com" endpointPrefix: "glue" ) @sigv4( name: "glue" ) @awsJson1_1 @title("AWS Glue") service AWSGlue { version: "2017-03-31" operations: [ BatchCreatePartition BatchDeleteConnection BatchDeletePartition BatchDeleteTable BatchDeleteTableVersion BatchGetBlueprints BatchGetCrawlers BatchGetCustomEntityTypes BatchGetDataQualityResult BatchGetDevEndpoints BatchGetJobs BatchGetPartition BatchGetTriggers BatchGetWorkflows BatchStopJobRun BatchUpdatePartition CancelDataQualityRuleRecommendationRun CancelDataQualityRulesetEvaluationRun CancelMLTaskRun CancelStatement CheckSchemaVersionValidity CreateBlueprint CreateClassifier CreateConnection CreateCrawler CreateCustomEntityType CreateDatabase CreateDataQualityRuleset CreateDevEndpoint CreateJob CreateMLTransform CreatePartition CreatePartitionIndex CreateRegistry CreateSchema CreateScript CreateSecurityConfiguration CreateSession CreateTable CreateTrigger CreateUserDefinedFunction CreateWorkflow DeleteBlueprint DeleteClassifier DeleteColumnStatisticsForPartition DeleteColumnStatisticsForTable DeleteConnection DeleteCrawler DeleteCustomEntityType DeleteDatabase DeleteDataQualityRuleset DeleteDevEndpoint DeleteJob DeleteMLTransform DeletePartition DeletePartitionIndex DeleteRegistry DeleteResourcePolicy DeleteSchema DeleteSchemaVersions DeleteSecurityConfiguration DeleteSession DeleteTable DeleteTableVersion DeleteTrigger DeleteUserDefinedFunction DeleteWorkflow GetBlueprint GetBlueprintRun GetBlueprintRuns GetCatalogImportStatus GetClassifier GetClassifiers GetColumnStatisticsForPartition GetColumnStatisticsForTable GetConnection GetConnections GetCrawler GetCrawlerMetrics GetCrawlers GetCustomEntityType GetDatabase GetDatabases GetDataCatalogEncryptionSettings GetDataflowGraph GetDataQualityResult GetDataQualityRuleRecommendationRun GetDataQualityRuleset GetDataQualityRulesetEvaluationRun GetDevEndpoint GetDevEndpoints GetJob GetJobBookmark GetJobRun GetJobRuns GetJobs GetMapping GetMLTaskRun GetMLTaskRuns GetMLTransform GetMLTransforms GetPartition GetPartitionIndexes GetPartitions GetPlan GetRegistry GetResourcePolicies GetResourcePolicy GetSchema GetSchemaByDefinition GetSchemaVersion GetSchemaVersionsDiff GetSecurityConfiguration GetSecurityConfigurations GetSession GetStatement GetTable GetTables GetTableVersion GetTableVersions GetTags GetTrigger GetTriggers GetUnfilteredPartitionMetadata GetUnfilteredPartitionsMetadata GetUnfilteredTableMetadata GetUserDefinedFunction GetUserDefinedFunctions GetWorkflow GetWorkflowRun GetWorkflowRunProperties GetWorkflowRuns ImportCatalogToGlue ListBlueprints ListCrawlers ListCrawls ListCustomEntityTypes ListDataQualityResults ListDataQualityRuleRecommendationRuns ListDataQualityRulesetEvaluationRuns ListDataQualityRulesets ListDevEndpoints ListJobs ListMLTransforms ListRegistries ListSchemas ListSchemaVersions ListSessions ListStatements ListTriggers ListWorkflows PutDataCatalogEncryptionSettings PutResourcePolicy PutSchemaVersionMetadata PutWorkflowRunProperties QuerySchemaVersionMetadata RegisterSchemaVersion RemoveSchemaVersionMetadata ResetJobBookmark ResumeWorkflowRun RunStatement SearchTables StartBlueprintRun StartCrawler StartCrawlerSchedule StartDataQualityRuleRecommendationRun StartDataQualityRulesetEvaluationRun StartExportLabelsTaskRun StartImportLabelsTaskRun StartJobRun StartMLEvaluationTaskRun StartMLLabelingSetGenerationTaskRun StartTrigger StartWorkflowRun StopCrawler StopCrawlerSchedule StopSession StopTrigger StopWorkflowRun TagResource UntagResource UpdateBlueprint UpdateClassifier UpdateColumnStatisticsForPartition UpdateColumnStatisticsForTable UpdateConnection UpdateCrawler UpdateCrawlerSchedule UpdateDatabase UpdateDataQualityRuleset UpdateDevEndpoint UpdateJob UpdateJobFromSourceControl UpdateMLTransform UpdatePartition UpdateRegistry UpdateSchema UpdateSourceControlFromJob UpdateTable UpdateTrigger UpdateUserDefinedFunction UpdateWorkflow ] } ///

Creates one or more partitions in a batch operation.

operation BatchCreatePartition { input: BatchCreatePartitionRequest output: BatchCreatePartitionResponse errors: [ AlreadyExistsException EntityNotFoundException GlueEncryptionException InternalServiceException InvalidInputException OperationTimeoutException ResourceNumberLimitExceededException ] } ///

Deletes a list of connection definitions from the Data Catalog.

operation BatchDeleteConnection { input: BatchDeleteConnectionRequest output: BatchDeleteConnectionResponse errors: [ InternalServiceException OperationTimeoutException ] } ///

Deletes one or more partitions in a batch operation.

operation BatchDeletePartition { input: BatchDeletePartitionRequest output: BatchDeletePartitionResponse errors: [ EntityNotFoundException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Deletes multiple tables at once.

/// ///

After completing this operation, you no longer have access to the table versions and /// partitions that belong to the deleted table. Glue deletes these "orphaned" resources /// asynchronously in a timely manner, at the discretion of the service.

///

To ensure the immediate deletion of all related resources, before calling /// BatchDeleteTable, use DeleteTableVersion or /// BatchDeleteTableVersion, and DeletePartition or /// BatchDeletePartition, to delete any resources that belong to the /// table.

///
operation BatchDeleteTable { input: BatchDeleteTableRequest output: BatchDeleteTableResponse errors: [ EntityNotFoundException GlueEncryptionException InternalServiceException InvalidInputException OperationTimeoutException ResourceNotReadyException ] } ///

Deletes a specified batch of versions of a table.

operation BatchDeleteTableVersion { input: BatchDeleteTableVersionRequest output: BatchDeleteTableVersionResponse errors: [ EntityNotFoundException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Retrieves information about a list of blueprints.

operation BatchGetBlueprints { input: BatchGetBlueprintsRequest output: BatchGetBlueprintsResponse errors: [ InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Returns a list of resource metadata for a given list of crawler names. After calling the ListCrawlers operation, you can call this operation to access the data to which you have been granted permissions. This operation supports all IAM permissions, including permission conditions that uses tags.

operation BatchGetCrawlers { input: BatchGetCrawlersRequest output: BatchGetCrawlersResponse errors: [ InvalidInputException OperationTimeoutException ] } ///

Retrieves the details for the custom patterns specified by a list of names.

operation BatchGetCustomEntityTypes { input: BatchGetCustomEntityTypesRequest output: BatchGetCustomEntityTypesResponse errors: [ InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Retrieves a list of data quality results for the specified result IDs.

operation BatchGetDataQualityResult { input: BatchGetDataQualityResultRequest output: BatchGetDataQualityResultResponse errors: [ InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Returns a list of resource metadata for a given list of development endpoint names. After /// calling the ListDevEndpoints operation, you can call this operation to access the /// data to which you have been granted permissions. This operation supports all IAM permissions, /// including permission conditions that uses tags.

operation BatchGetDevEndpoints { input: BatchGetDevEndpointsRequest output: BatchGetDevEndpointsResponse errors: [ AccessDeniedException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Returns a list of resource metadata for a given list of job names. After calling the ListJobs operation, you can call this operation to access the data to which you have been granted permissions. This operation supports all IAM permissions, including permission conditions that uses tags. ///

operation BatchGetJobs { input: BatchGetJobsRequest output: BatchGetJobsResponse errors: [ InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Retrieves partitions in a batch request.

operation BatchGetPartition { input: BatchGetPartitionRequest output: BatchGetPartitionResponse errors: [ EntityNotFoundException FederationSourceException FederationSourceRetryableException GlueEncryptionException InternalServiceException InvalidInputException InvalidStateException OperationTimeoutException ] } ///

Returns a list of resource metadata for a given list of trigger names. After calling the ListTriggers operation, you can call this operation to access the data to which you have been granted permissions. This operation supports all IAM permissions, including permission conditions that uses tags.

operation BatchGetTriggers { input: BatchGetTriggersRequest output: BatchGetTriggersResponse errors: [ InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Returns a list of resource metadata for a given list of workflow names. After calling the ListWorkflows operation, you can call this operation to access the data to which you have been granted permissions. This operation supports all IAM permissions, including permission conditions that uses tags.

operation BatchGetWorkflows { input: BatchGetWorkflowsRequest output: BatchGetWorkflowsResponse errors: [ InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Stops one or more job runs for a specified job definition.

operation BatchStopJobRun { input: BatchStopJobRunRequest output: BatchStopJobRunResponse errors: [ InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Updates one or more partitions in a batch operation.

operation BatchUpdatePartition { input: BatchUpdatePartitionRequest output: BatchUpdatePartitionResponse errors: [ EntityNotFoundException GlueEncryptionException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Cancels the specified recommendation run that was being used to generate rules.

operation CancelDataQualityRuleRecommendationRun { input: CancelDataQualityRuleRecommendationRunRequest output: CancelDataQualityRuleRecommendationRunResponse errors: [ EntityNotFoundException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Cancels a run where a ruleset is being evaluated against a data source.

operation CancelDataQualityRulesetEvaluationRun { input: CancelDataQualityRulesetEvaluationRunRequest output: CancelDataQualityRulesetEvaluationRunResponse errors: [ EntityNotFoundException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Cancels (stops) a task run. Machine learning task runs are asynchronous tasks that Glue runs on your behalf as part of various machine learning workflows. You can cancel a /// machine learning task run at any time by calling CancelMLTaskRun with a task /// run's parent transform's TransformID and the task run's TaskRunId.

operation CancelMLTaskRun { input: CancelMLTaskRunRequest output: CancelMLTaskRunResponse errors: [ EntityNotFoundException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Cancels the statement.

operation CancelStatement { input: CancelStatementRequest output: CancelStatementResponse errors: [ AccessDeniedException EntityNotFoundException IllegalSessionStateException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Validates the supplied schema. This call has no side effects, it simply validates using the supplied schema using DataFormat as the format. Since it does not take a schema set name, no compatibility checks are performed.

operation CheckSchemaVersionValidity { input := { ///

The data format of the schema definition. Currently AVRO, JSON and PROTOBUF are supported.

@required DataFormat: DataFormat ///

The definition of the schema that has to be validated.

@required SchemaDefinition: SchemaDefinitionString } output: CheckSchemaVersionValidityResponse errors: [ AccessDeniedException InternalServiceException InvalidInputException ] } ///

Registers a blueprint with Glue.

operation CreateBlueprint { input: CreateBlueprintRequest output: CreateBlueprintResponse errors: [ AlreadyExistsException InternalServiceException InvalidInputException OperationTimeoutException ResourceNumberLimitExceededException ] } ///

Creates a classifier in the user's account. This can be a GrokClassifier, an /// XMLClassifier, a JsonClassifier, or a CsvClassifier, /// depending on which field of the request is present.

operation CreateClassifier { input: CreateClassifierRequest output: CreateClassifierResponse errors: [ AlreadyExistsException InvalidInputException OperationTimeoutException ] } ///

Creates a connection definition in the Data Catalog.

operation CreateConnection { input: CreateConnectionRequest output: CreateConnectionResponse errors: [ AlreadyExistsException GlueEncryptionException InvalidInputException OperationTimeoutException ResourceNumberLimitExceededException ] } ///

Creates a new crawler with specified targets, role, configuration, and optional schedule. /// At least one crawl target must be specified, in the s3Targets field, the /// jdbcTargets field, or the DynamoDBTargets field.

operation CreateCrawler { input: CreateCrawlerRequest output: CreateCrawlerResponse errors: [ AlreadyExistsException InvalidInputException OperationTimeoutException ResourceNumberLimitExceededException ] } ///

Creates a custom pattern that is used to detect sensitive data across the columns and rows of your structured data.

///

Each custom pattern you create specifies a regular expression and an optional list of context words. If no context words are passed only a regular expression is checked.

operation CreateCustomEntityType { input: CreateCustomEntityTypeRequest output: CreateCustomEntityTypeResponse errors: [ AccessDeniedException AlreadyExistsException IdempotentParameterMismatchException InternalServiceException InvalidInputException OperationTimeoutException ResourceNumberLimitExceededException ] } ///

Creates a new database in a Data Catalog.

operation CreateDatabase { input: CreateDatabaseRequest output: CreateDatabaseResponse errors: [ AlreadyExistsException ConcurrentModificationException FederatedResourceAlreadyExistsException GlueEncryptionException InternalServiceException InvalidInputException OperationTimeoutException ResourceNumberLimitExceededException ] } ///

Creates a data quality ruleset with DQDL rules applied to a specified Glue table.

///

You create the ruleset using the Data Quality Definition Language (DQDL). For more information, see the Glue developer guide.

@idempotent operation CreateDataQualityRuleset { input: CreateDataQualityRulesetRequest output: CreateDataQualityRulesetResponse errors: [ AlreadyExistsException InternalServiceException InvalidInputException OperationTimeoutException ResourceNumberLimitExceededException ] } ///

Creates a new development endpoint.

operation CreateDevEndpoint { input: CreateDevEndpointRequest output: CreateDevEndpointResponse errors: [ AccessDeniedException AlreadyExistsException IdempotentParameterMismatchException InternalServiceException InvalidInputException OperationTimeoutException ResourceNumberLimitExceededException ValidationException ] } ///

Creates a new job definition.

operation CreateJob { input: CreateJobRequest output: CreateJobResponse errors: [ AlreadyExistsException ConcurrentModificationException IdempotentParameterMismatchException InternalServiceException InvalidInputException OperationTimeoutException ResourceNumberLimitExceededException ] } ///

Creates an Glue machine learning transform. This operation creates the transform and /// all the necessary parameters to train it.

///

Call this operation as the first step in the process of using a machine learning transform /// (such as the FindMatches transform) for deduplicating data. You can provide an /// optional Description, in addition to the parameters that you want to use for your /// algorithm.

///

You must also specify certain parameters for the tasks that Glue runs on your /// behalf as part of learning from your data and creating a high-quality machine learning /// transform. These parameters include Role, and optionally, /// AllocatedCapacity, Timeout, and MaxRetries. For more /// information, see Jobs.

operation CreateMLTransform { input: CreateMLTransformRequest output: CreateMLTransformResponse errors: [ AccessDeniedException AlreadyExistsException IdempotentParameterMismatchException InternalServiceException InvalidInputException OperationTimeoutException ResourceNumberLimitExceededException ] } ///

Creates a new partition.

operation CreatePartition { input: CreatePartitionRequest output: CreatePartitionResponse errors: [ AlreadyExistsException EntityNotFoundException GlueEncryptionException InternalServiceException InvalidInputException OperationTimeoutException ResourceNumberLimitExceededException ] } ///

Creates a specified partition index in an existing table.

operation CreatePartitionIndex { input: CreatePartitionIndexRequest output: CreatePartitionIndexResponse errors: [ AlreadyExistsException EntityNotFoundException GlueEncryptionException InternalServiceException InvalidInputException OperationTimeoutException ResourceNumberLimitExceededException ] } ///

Creates a new registry which may be used to hold a collection of schemas.

operation CreateRegistry { input := { ///

Name of the registry to be created of max length of 255, and may only contain letters, numbers, hyphen, underscore, dollar sign, or hash mark. No whitespace.

@required RegistryName: SchemaRegistryNameString ///

A description of the registry. If description is not provided, there will not be any default value for this.

Description: DescriptionString ///

Amazon Web Services tags that contain a key value pair and may be searched by console, command line, or API.

Tags: TagsMap } output: CreateRegistryResponse errors: [ AccessDeniedException AlreadyExistsException ConcurrentModificationException InternalServiceException InvalidInputException ResourceNumberLimitExceededException ] } ///

Creates a new schema set and registers the schema definition. Returns an error if the schema set already exists without actually registering the version.

///

When the schema set is created, a version checkpoint will be set to the first version. Compatibility mode "DISABLED" restricts any additional schema versions from being added after the first schema version. For all other compatibility modes, validation of compatibility settings will be applied only from the second version onwards when the RegisterSchemaVersion API is used.

///

When this API is called without a RegistryId, this will create an entry for a "default-registry" in the registry database tables, if it is not already present.

operation CreateSchema { input := { ///

This is a wrapper shape to contain the registry identity fields. If this is not provided, the default registry will be used. The ARN format for the same will be: arn:aws:glue:us-east-2::registry/default-registry:random-5-letter-id.

RegistryId: RegistryId ///

Name of the schema to be created of max length of 255, and may only contain letters, numbers, hyphen, underscore, dollar sign, or hash mark. No whitespace.

@required SchemaName: SchemaRegistryNameString ///

The data format of the schema definition. Currently AVRO, JSON and PROTOBUF are supported.

@required DataFormat: DataFormat ///

The compatibility mode of the schema. The possible values are:

///
    ///
  • ///

    /// NONE: No compatibility mode applies. You can use this choice in development scenarios or if you do not know the compatibility mode that you want to apply to schemas. Any new version added will be accepted without undergoing a compatibility check.

    ///
  • ///
  • ///

    /// DISABLED: This compatibility choice prevents versioning for a particular schema. You can use this choice to prevent future versioning of a schema.

    ///
  • ///
  • ///

    /// BACKWARD: This compatibility choice is recommended as it allows data receivers to read both the current and one previous schema version. This means that for instance, a new schema version cannot drop data fields or change the type of these fields, so they can't be read by readers using the previous version.

    ///
  • ///
  • ///

    /// BACKWARD_ALL: This compatibility choice allows data receivers to read both the current and all previous schema versions. You can use this choice when you need to delete fields or add optional fields, and check compatibility against all previous schema versions.

    ///
  • ///
  • ///

    /// FORWARD: This compatibility choice allows data receivers to read both the current and one next schema version, but not necessarily later versions. You can use this choice when you need to add fields or delete optional fields, but only check compatibility against the last schema version.

    ///
  • ///
  • ///

    /// FORWARD_ALL: This compatibility choice allows data receivers to read written by producers of any new registered schema. You can use this choice when you need to add fields or delete optional fields, and check compatibility against all previous schema versions.

    ///
  • ///
  • ///

    /// FULL: This compatibility choice allows data receivers to read data written by producers using the previous or next version of the schema, but not necessarily earlier or later versions. You can use this choice when you need to add or remove optional fields, but only check compatibility against the last schema version.

    ///
  • ///
  • ///

    /// FULL_ALL: This compatibility choice allows data receivers to read data written by producers using all previous schema versions. You can use this choice when you need to add or remove optional fields, and check compatibility against all previous schema versions.

    ///
  • ///
Compatibility: Compatibility ///

An optional description of the schema. If description is not provided, there will not be any automatic default value for this.

Description: DescriptionString ///

Amazon Web Services tags that contain a key value pair and may be searched by console, command line, or API. If specified, follows the Amazon Web Services tags-on-create pattern.

Tags: TagsMap ///

The schema definition using the DataFormat setting for SchemaName.

SchemaDefinition: SchemaDefinitionString } output: CreateSchemaResponse errors: [ AccessDeniedException AlreadyExistsException ConcurrentModificationException EntityNotFoundException InternalServiceException InvalidInputException ResourceNumberLimitExceededException ] } ///

Transforms a directed acyclic graph (DAG) into code.

operation CreateScript { input: CreateScriptRequest output: CreateScriptResponse errors: [ InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Creates a new security configuration. A security configuration is a set of security properties that can be used by Glue. You can use a security configuration to encrypt data at rest. For information about using security configurations in Glue, see Encrypting Data Written by Crawlers, Jobs, and Development Endpoints.

operation CreateSecurityConfiguration { input: CreateSecurityConfigurationRequest output: CreateSecurityConfigurationResponse errors: [ AlreadyExistsException InternalServiceException InvalidInputException OperationTimeoutException ResourceNumberLimitExceededException ] } ///

Creates a new session.

operation CreateSession { input: CreateSessionRequest output: CreateSessionResponse errors: [ AccessDeniedException AlreadyExistsException IdempotentParameterMismatchException InternalServiceException InvalidInputException OperationTimeoutException ResourceNumberLimitExceededException ValidationException ] } ///

Creates a new table definition in the Data Catalog.

operation CreateTable { input: CreateTableRequest output: CreateTableResponse errors: [ AlreadyExistsException ConcurrentModificationException EntityNotFoundException GlueEncryptionException InternalServiceException InvalidInputException OperationTimeoutException ResourceNotReadyException ResourceNumberLimitExceededException ] } ///

Creates a new trigger.

operation CreateTrigger { input: CreateTriggerRequest output: CreateTriggerResponse errors: [ AlreadyExistsException ConcurrentModificationException EntityNotFoundException IdempotentParameterMismatchException InternalServiceException InvalidInputException OperationTimeoutException ResourceNumberLimitExceededException ] } ///

Creates a new function definition in the Data Catalog.

operation CreateUserDefinedFunction { input: CreateUserDefinedFunctionRequest output: CreateUserDefinedFunctionResponse errors: [ AlreadyExistsException EntityNotFoundException GlueEncryptionException InternalServiceException InvalidInputException OperationTimeoutException ResourceNumberLimitExceededException ] } ///

Creates a new workflow.

operation CreateWorkflow { input: CreateWorkflowRequest output: CreateWorkflowResponse errors: [ AlreadyExistsException ConcurrentModificationException InternalServiceException InvalidInputException OperationTimeoutException ResourceNumberLimitExceededException ] } ///

Deletes an existing blueprint.

operation DeleteBlueprint { input: DeleteBlueprintRequest output: DeleteBlueprintResponse errors: [ InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Removes a classifier from the Data Catalog.

operation DeleteClassifier { input: DeleteClassifierRequest output: DeleteClassifierResponse errors: [ EntityNotFoundException OperationTimeoutException ] } ///

Delete the partition column statistics of a column.

///

The Identity and Access Management (IAM) permission required for this operation is DeletePartition.

operation DeleteColumnStatisticsForPartition { input: DeleteColumnStatisticsForPartitionRequest output: DeleteColumnStatisticsForPartitionResponse errors: [ EntityNotFoundException GlueEncryptionException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Retrieves table statistics of columns.

///

The Identity and Access Management (IAM) permission required for this operation is DeleteTable.

operation DeleteColumnStatisticsForTable { input: DeleteColumnStatisticsForTableRequest output: DeleteColumnStatisticsForTableResponse errors: [ EntityNotFoundException GlueEncryptionException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Deletes a connection from the Data Catalog.

operation DeleteConnection { input: DeleteConnectionRequest output: DeleteConnectionResponse errors: [ EntityNotFoundException OperationTimeoutException ] } ///

Removes a specified crawler from the Glue Data Catalog, unless the crawler state is /// RUNNING.

operation DeleteCrawler { input: DeleteCrawlerRequest output: DeleteCrawlerResponse errors: [ CrawlerRunningException EntityNotFoundException OperationTimeoutException SchedulerTransitioningException ] } ///

Deletes a custom pattern by specifying its name.

operation DeleteCustomEntityType { input: DeleteCustomEntityTypeRequest output: DeleteCustomEntityTypeResponse errors: [ AccessDeniedException EntityNotFoundException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Removes a specified database from a Data Catalog.

/// ///

After completing this operation, you no longer have access to the tables (and all table /// versions and partitions that might belong to the tables) and the user-defined functions in /// the deleted database. Glue deletes these "orphaned" resources asynchronously in a timely /// manner, at the discretion of the service.

///

To ensure the immediate deletion of all related resources, before calling /// DeleteDatabase, use DeleteTableVersion or /// BatchDeleteTableVersion, DeletePartition or /// BatchDeletePartition, DeleteUserDefinedFunction, and /// DeleteTable or BatchDeleteTable, to delete any resources that /// belong to the database.

///
operation DeleteDatabase { input: DeleteDatabaseRequest output: DeleteDatabaseResponse errors: [ ConcurrentModificationException EntityNotFoundException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Deletes a data quality ruleset.

operation DeleteDataQualityRuleset { input: DeleteDataQualityRulesetRequest output: DeleteDataQualityRulesetResponse errors: [ EntityNotFoundException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Deletes a specified development endpoint.

operation DeleteDevEndpoint { input: DeleteDevEndpointRequest output: DeleteDevEndpointResponse errors: [ EntityNotFoundException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Deletes a specified job definition. If the job definition /// is not found, no exception is thrown.

operation DeleteJob { input: DeleteJobRequest output: DeleteJobResponse errors: [ InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Deletes an Glue machine learning transform. Machine learning transforms are a special /// type of transform that use machine learning to learn the details of the transformation to be /// performed by learning from examples provided by humans. These transformations are then saved /// by Glue. If you no longer need a transform, you can delete it by calling /// DeleteMLTransforms. However, any Glue jobs that still reference the deleted /// transform will no longer succeed.

operation DeleteMLTransform { input: DeleteMLTransformRequest output: DeleteMLTransformResponse errors: [ EntityNotFoundException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Deletes a specified partition.

operation DeletePartition { input: DeletePartitionRequest output: DeletePartitionResponse errors: [ EntityNotFoundException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Deletes a specified partition index from an existing table.

operation DeletePartitionIndex { input: DeletePartitionIndexRequest output: DeletePartitionIndexResponse errors: [ ConflictException EntityNotFoundException GlueEncryptionException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Delete the entire registry including schema and all of its versions. To get the status of the delete operation, you can call the GetRegistry API after the asynchronous call. Deleting a registry will deactivate all online operations for the registry such as the UpdateRegistry, CreateSchema, UpdateSchema, and RegisterSchemaVersion APIs.

operation DeleteRegistry { input := { ///

This is a wrapper structure that may contain the registry name and Amazon Resource Name (ARN).

@required RegistryId: RegistryId } output: DeleteRegistryResponse errors: [ AccessDeniedException ConcurrentModificationException EntityNotFoundException InvalidInputException ] } ///

Deletes a specified policy.

operation DeleteResourcePolicy { input: DeleteResourcePolicyRequest output: DeleteResourcePolicyResponse errors: [ ConditionCheckFailureException EntityNotFoundException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Deletes the entire schema set, including the schema set and all of its versions. To get the status of the delete operation, you can call GetSchema API after the asynchronous call. Deleting a registry will deactivate all online operations for the schema, such as the GetSchemaByDefinition, and RegisterSchemaVersion APIs.

operation DeleteSchema { input := { ///

This is a wrapper structure that may contain the schema name and Amazon Resource Name (ARN).

@required SchemaId: SchemaId } output: DeleteSchemaResponse errors: [ AccessDeniedException ConcurrentModificationException EntityNotFoundException InvalidInputException ] } ///

Remove versions from the specified schema. A version number or range may be supplied. If the compatibility mode forbids deleting of a version that is necessary, such as BACKWARDS_FULL, an error is returned. Calling the GetSchemaVersions API after this call will list the status of the deleted versions.

///

When the range of version numbers contain check pointed version, the API will return a 409 conflict and will not proceed with the deletion. You have to remove the checkpoint first using the DeleteSchemaCheckpoint API before using this API.

///

You cannot use the DeleteSchemaVersions API to delete the first schema version in the schema set. The first schema version can only be deleted by the DeleteSchema API. This operation will also delete the attached SchemaVersionMetadata under the schema versions. Hard deletes will be enforced on the database.

///

If the compatibility mode forbids deleting of a version that is necessary, such as BACKWARDS_FULL, an error is returned.

operation DeleteSchemaVersions { input := { ///

This is a wrapper structure that may contain the schema name and Amazon Resource Name (ARN).

@required SchemaId: SchemaId ///

A version range may be supplied which may be of the format:

///
    ///
  • ///

    a single version number, 5

    ///
  • ///
  • ///

    a range, 5-8 : deletes versions 5, 6, 7, 8

    ///
  • ///
@required Versions: VersionsString } output: DeleteSchemaVersionsResponse errors: [ AccessDeniedException ConcurrentModificationException EntityNotFoundException InvalidInputException ] } ///

Deletes a specified security configuration.

operation DeleteSecurityConfiguration { input: DeleteSecurityConfigurationRequest output: DeleteSecurityConfigurationResponse errors: [ EntityNotFoundException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Deletes the session.

operation DeleteSession { input: DeleteSessionRequest output: DeleteSessionResponse errors: [ AccessDeniedException ConcurrentModificationException IllegalSessionStateException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Removes a table definition from the Data Catalog.

/// ///

After completing this operation, you no longer have access to the table versions and /// partitions that belong to the deleted table. Glue deletes these "orphaned" resources /// asynchronously in a timely manner, at the discretion of the service.

///

To ensure the immediate deletion of all related resources, before calling /// DeleteTable, use DeleteTableVersion or /// BatchDeleteTableVersion, and DeletePartition or /// BatchDeletePartition, to delete any resources that belong to the /// table.

///
operation DeleteTable { input: DeleteTableRequest output: DeleteTableResponse errors: [ ConcurrentModificationException EntityNotFoundException InternalServiceException InvalidInputException OperationTimeoutException ResourceNotReadyException ] } ///

Deletes a specified version of a table.

operation DeleteTableVersion { input: DeleteTableVersionRequest output: DeleteTableVersionResponse errors: [ EntityNotFoundException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Deletes a specified trigger. If the trigger is not found, no /// exception is thrown.

operation DeleteTrigger { input: DeleteTriggerRequest output: DeleteTriggerResponse errors: [ ConcurrentModificationException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Deletes an existing function definition from the Data Catalog.

operation DeleteUserDefinedFunction { input: DeleteUserDefinedFunctionRequest output: DeleteUserDefinedFunctionResponse errors: [ EntityNotFoundException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Deletes a workflow.

operation DeleteWorkflow { input: DeleteWorkflowRequest output: DeleteWorkflowResponse errors: [ ConcurrentModificationException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Retrieves the details of a blueprint.

operation GetBlueprint { input: GetBlueprintRequest output: GetBlueprintResponse errors: [ EntityNotFoundException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Retrieves the details of a blueprint run.

operation GetBlueprintRun { input: GetBlueprintRunRequest output: GetBlueprintRunResponse errors: [ EntityNotFoundException InternalServiceException OperationTimeoutException ] } ///

Retrieves the details of blueprint runs for a specified blueprint.

@paginated( inputToken: "NextToken" outputToken: "NextToken" pageSize: "MaxResults" ) operation GetBlueprintRuns { input: GetBlueprintRunsRequest output: GetBlueprintRunsResponse errors: [ EntityNotFoundException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Retrieves the status of a migration operation.

operation GetCatalogImportStatus { input: GetCatalogImportStatusRequest output: GetCatalogImportStatusResponse errors: [ InternalServiceException OperationTimeoutException ] } ///

Retrieve a classifier by name.

operation GetClassifier { input: GetClassifierRequest output: GetClassifierResponse errors: [ EntityNotFoundException OperationTimeoutException ] } ///

Lists all classifier objects in the Data Catalog.

@paginated( inputToken: "NextToken" outputToken: "NextToken" pageSize: "MaxResults" ) operation GetClassifiers { input: GetClassifiersRequest output: GetClassifiersResponse errors: [ OperationTimeoutException ] } ///

Retrieves partition statistics of columns.

///

The Identity and Access Management (IAM) permission required for this operation is GetPartition.

operation GetColumnStatisticsForPartition { input: GetColumnStatisticsForPartitionRequest output: GetColumnStatisticsForPartitionResponse errors: [ EntityNotFoundException GlueEncryptionException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Retrieves table statistics of columns.

///

The Identity and Access Management (IAM) permission required for this operation is GetTable.

operation GetColumnStatisticsForTable { input: GetColumnStatisticsForTableRequest output: GetColumnStatisticsForTableResponse errors: [ EntityNotFoundException GlueEncryptionException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Retrieves a connection definition from the Data Catalog.

operation GetConnection { input: GetConnectionRequest output: GetConnectionResponse errors: [ EntityNotFoundException GlueEncryptionException InvalidInputException OperationTimeoutException ] } ///

Retrieves a list of connection definitions from the Data Catalog.

@paginated( inputToken: "NextToken" outputToken: "NextToken" pageSize: "MaxResults" ) operation GetConnections { input: GetConnectionsRequest output: GetConnectionsResponse errors: [ EntityNotFoundException GlueEncryptionException InvalidInputException OperationTimeoutException ] } ///

Retrieves metadata for a specified crawler.

operation GetCrawler { input: GetCrawlerRequest output: GetCrawlerResponse errors: [ EntityNotFoundException OperationTimeoutException ] } ///

Retrieves metrics about specified crawlers.

@paginated( inputToken: "NextToken" outputToken: "NextToken" pageSize: "MaxResults" ) operation GetCrawlerMetrics { input: GetCrawlerMetricsRequest output: GetCrawlerMetricsResponse errors: [ OperationTimeoutException ] } ///

Retrieves metadata for all crawlers defined in the customer /// account.

@paginated( inputToken: "NextToken" outputToken: "NextToken" pageSize: "MaxResults" ) operation GetCrawlers { input: GetCrawlersRequest output: GetCrawlersResponse errors: [ OperationTimeoutException ] } ///

Retrieves the details of a custom pattern by specifying its name.

operation GetCustomEntityType { input: GetCustomEntityTypeRequest output: GetCustomEntityTypeResponse errors: [ AccessDeniedException EntityNotFoundException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Retrieves the definition of a specified database.

operation GetDatabase { input: GetDatabaseRequest output: GetDatabaseResponse errors: [ EntityNotFoundException FederationSourceException GlueEncryptionException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Retrieves all databases defined in a given Data Catalog.

@paginated( inputToken: "NextToken" outputToken: "NextToken" pageSize: "MaxResults" ) operation GetDatabases { input: GetDatabasesRequest output: GetDatabasesResponse errors: [ GlueEncryptionException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Retrieves the security configuration for a specified catalog.

operation GetDataCatalogEncryptionSettings { input: GetDataCatalogEncryptionSettingsRequest output: GetDataCatalogEncryptionSettingsResponse errors: [ InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Transforms a Python script into a directed acyclic graph (DAG).

operation GetDataflowGraph { input: GetDataflowGraphRequest output: GetDataflowGraphResponse errors: [ InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Retrieves the result of a data quality rule evaluation.

operation GetDataQualityResult { input: GetDataQualityResultRequest output: GetDataQualityResultResponse errors: [ EntityNotFoundException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Gets the specified recommendation run that was used to generate rules.

operation GetDataQualityRuleRecommendationRun { input: GetDataQualityRuleRecommendationRunRequest output: GetDataQualityRuleRecommendationRunResponse errors: [ EntityNotFoundException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Returns an existing ruleset by identifier or name.

operation GetDataQualityRuleset { input: GetDataQualityRulesetRequest output: GetDataQualityRulesetResponse errors: [ EntityNotFoundException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Retrieves a specific run where a ruleset is evaluated against a data source.

operation GetDataQualityRulesetEvaluationRun { input: GetDataQualityRulesetEvaluationRunRequest output: GetDataQualityRulesetEvaluationRunResponse errors: [ EntityNotFoundException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Retrieves information about a specified development endpoint.

/// ///

When you create a development endpoint in a virtual private cloud (VPC), Glue returns only /// a private IP address, and the public IP address field is not populated. When you create a /// non-VPC development endpoint, Glue returns only a public IP address.

///
operation GetDevEndpoint { input: GetDevEndpointRequest output: GetDevEndpointResponse errors: [ EntityNotFoundException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Retrieves all the development endpoints in this Amazon Web Services account.

/// ///

When you create a development endpoint in a virtual private cloud (VPC), Glue returns only a private IP address /// and the public IP address field is not populated. When you create a non-VPC development /// endpoint, Glue returns only a public IP address.

///
@paginated( inputToken: "NextToken" outputToken: "NextToken" pageSize: "MaxResults" ) operation GetDevEndpoints { input: GetDevEndpointsRequest output: GetDevEndpointsResponse errors: [ EntityNotFoundException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Retrieves an existing job definition.

operation GetJob { input: GetJobRequest output: GetJobResponse errors: [ EntityNotFoundException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Returns information on a job bookmark entry.

///

For more information about enabling and using job bookmarks, see:

/// operation GetJobBookmark { input: GetJobBookmarkRequest output: GetJobBookmarkResponse errors: [ EntityNotFoundException InternalServiceException InvalidInputException OperationTimeoutException ValidationException ] } ///

Retrieves the metadata for a given job run.

operation GetJobRun { input: GetJobRunRequest output: GetJobRunResponse errors: [ EntityNotFoundException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Retrieves metadata for all runs of a given job definition.

@paginated( inputToken: "NextToken" outputToken: "NextToken" pageSize: "MaxResults" ) operation GetJobRuns { input: GetJobRunsRequest output: GetJobRunsResponse errors: [ EntityNotFoundException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Retrieves all current job definitions.

@paginated( inputToken: "NextToken" outputToken: "NextToken" pageSize: "MaxResults" ) operation GetJobs { input: GetJobsRequest output: GetJobsResponse errors: [ EntityNotFoundException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Creates mappings.

operation GetMapping { input: GetMappingRequest output: GetMappingResponse errors: [ EntityNotFoundException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Gets details for a specific task run on a machine learning transform. Machine learning /// task runs are asynchronous tasks that Glue runs on your behalf as part of various machine /// learning workflows. You can check the stats of any task run by calling /// GetMLTaskRun with the TaskRunID and its parent transform's /// TransformID.

operation GetMLTaskRun { input: GetMLTaskRunRequest output: GetMLTaskRunResponse errors: [ EntityNotFoundException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Gets a list of runs for a machine learning transform. Machine learning task runs are /// asynchronous tasks that Glue runs on your behalf as part of various machine learning /// workflows. You can get a sortable, filterable list of machine learning task runs by calling /// GetMLTaskRuns with their parent transform's TransformID and other /// optional parameters as documented in this section.

///

This operation returns a list of historic runs and must be paginated.

@paginated( inputToken: "NextToken" outputToken: "NextToken" pageSize: "MaxResults" ) operation GetMLTaskRuns { input: GetMLTaskRunsRequest output: GetMLTaskRunsResponse errors: [ EntityNotFoundException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Gets an Glue machine learning transform artifact and all its corresponding metadata. /// Machine learning transforms are a special type of transform that use machine learning to learn /// the details of the transformation to be performed by learning from examples provided by /// humans. These transformations are then saved by Glue. You can retrieve their metadata by /// calling GetMLTransform.

operation GetMLTransform { input: GetMLTransformRequest output: GetMLTransformResponse errors: [ EntityNotFoundException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Gets a sortable, filterable list of existing Glue machine learning transforms. Machine /// learning transforms are a special type of transform that use machine learning to learn the /// details of the transformation to be performed by learning from examples provided by humans. /// These transformations are then saved by Glue, and you can retrieve their metadata by /// calling GetMLTransforms.

@paginated( inputToken: "NextToken" outputToken: "NextToken" pageSize: "MaxResults" ) operation GetMLTransforms { input: GetMLTransformsRequest output: GetMLTransformsResponse errors: [ EntityNotFoundException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Retrieves information about a specified partition.

operation GetPartition { input: GetPartitionRequest output: GetPartitionResponse errors: [ EntityNotFoundException FederationSourceException FederationSourceRetryableException GlueEncryptionException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Retrieves the partition indexes associated with a table.

@paginated( inputToken: "NextToken" outputToken: "NextToken" items: "PartitionIndexDescriptorList" ) operation GetPartitionIndexes { input: GetPartitionIndexesRequest output: GetPartitionIndexesResponse errors: [ ConflictException EntityNotFoundException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Retrieves information about the partitions in a table.

@paginated( inputToken: "NextToken" outputToken: "NextToken" pageSize: "MaxResults" ) operation GetPartitions { input: GetPartitionsRequest output: GetPartitionsResponse errors: [ EntityNotFoundException FederationSourceException FederationSourceRetryableException GlueEncryptionException InternalServiceException InvalidInputException InvalidStateException OperationTimeoutException ResourceNotReadyException ] } ///

Gets code to perform a specified mapping.

operation GetPlan { input: GetPlanRequest output: GetPlanResponse errors: [ InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Describes the specified registry in detail.

operation GetRegistry { input := { ///

This is a wrapper structure that may contain the registry name and Amazon Resource Name (ARN).

@required RegistryId: RegistryId } output: GetRegistryResponse errors: [ AccessDeniedException EntityNotFoundException InternalServiceException InvalidInputException ] } ///

Retrieves the resource policies set on individual resources by Resource Access Manager /// during cross-account permission grants. Also retrieves the Data Catalog resource /// policy.

///

If you enabled metadata encryption in Data Catalog settings, and you do not have /// permission on the KMS key, the operation can't return the Data Catalog resource /// policy.

@paginated( inputToken: "NextToken" outputToken: "NextToken" items: "GetResourcePoliciesResponseList" pageSize: "MaxResults" ) operation GetResourcePolicies { input: GetResourcePoliciesRequest output: GetResourcePoliciesResponse errors: [ GlueEncryptionException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Retrieves a specified resource policy.

operation GetResourcePolicy { input: GetResourcePolicyRequest output: GetResourcePolicyResponse errors: [ EntityNotFoundException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Describes the specified schema in detail.

operation GetSchema { input := { ///

This is a wrapper structure to contain schema identity fields. The structure contains:

///
    ///
  • ///

    SchemaId$SchemaArn: The Amazon Resource Name (ARN) of the schema. Either SchemaArn or SchemaName and RegistryName has to be provided.

    ///
  • ///
  • ///

    SchemaId$SchemaName: The name of the schema. Either SchemaArn or SchemaName and RegistryName has to be provided.

    ///
  • ///
@required SchemaId: SchemaId } output: GetSchemaResponse errors: [ AccessDeniedException EntityNotFoundException InternalServiceException InvalidInputException ] } ///

Retrieves a schema by the SchemaDefinition. The schema definition is sent to the Schema Registry, canonicalized, and hashed. If the hash is matched within the scope of the SchemaName or ARN (or the default registry, if none is supplied), that schema’s metadata is returned. Otherwise, a 404 or NotFound error is returned. Schema versions in Deleted statuses will not be included in the results.

operation GetSchemaByDefinition { input := { ///

This is a wrapper structure to contain schema identity fields. The structure contains:

///
    ///
  • ///

    SchemaId$SchemaArn: The Amazon Resource Name (ARN) of the schema. One of SchemaArn or SchemaName has to be provided.

    ///
  • ///
  • ///

    SchemaId$SchemaName: The name of the schema. One of SchemaArn or SchemaName has to be provided.

    ///
  • ///
@required SchemaId: SchemaId ///

The definition of the schema for which schema details are required.

@required SchemaDefinition: SchemaDefinitionString } output: GetSchemaByDefinitionResponse errors: [ AccessDeniedException EntityNotFoundException InternalServiceException InvalidInputException ] } ///

Get the specified schema by its unique ID assigned when a version of the schema is created or registered. Schema versions in Deleted status will not be included in the results.

operation GetSchemaVersion { input := { ///

This is a wrapper structure to contain schema identity fields. The structure contains:

///
    ///
  • ///

    SchemaId$SchemaArn: The Amazon Resource Name (ARN) of the schema. Either SchemaArn or SchemaName and RegistryName has to be provided.

    ///
  • ///
  • ///

    SchemaId$SchemaName: The name of the schema. Either SchemaArn or SchemaName and RegistryName has to be provided.

    ///
  • ///
SchemaId: SchemaId ///

The SchemaVersionId of the schema version. This field is required for fetching by schema ID. Either this or the SchemaId wrapper has to be provided.

SchemaVersionId: SchemaVersionIdString ///

The version number of the schema.

SchemaVersionNumber: SchemaVersionNumber } output: GetSchemaVersionResponse errors: [ AccessDeniedException EntityNotFoundException InternalServiceException InvalidInputException ] } ///

Fetches the schema version difference in the specified difference type between two stored schema versions in the Schema Registry.

///

This API allows you to compare two schema versions between two schema definitions under the same schema.

operation GetSchemaVersionsDiff { input := { ///

This is a wrapper structure to contain schema identity fields. The structure contains:

///
    ///
  • ///

    SchemaId$SchemaArn: The Amazon Resource Name (ARN) of the schema. One of SchemaArn or SchemaName has to be provided.

    ///
  • ///
  • ///

    SchemaId$SchemaName: The name of the schema. One of SchemaArn or SchemaName has to be provided.

    ///
  • ///
@required SchemaId: SchemaId ///

The first of the two schema versions to be compared.

@required FirstSchemaVersionNumber: SchemaVersionNumber ///

The second of the two schema versions to be compared.

@required SecondSchemaVersionNumber: SchemaVersionNumber ///

Refers to SYNTAX_DIFF, which is the currently supported diff type.

@required SchemaDiffType: SchemaDiffType } output: GetSchemaVersionsDiffResponse errors: [ AccessDeniedException EntityNotFoundException InternalServiceException InvalidInputException ] } ///

Retrieves a specified security configuration.

operation GetSecurityConfiguration { input: GetSecurityConfigurationRequest output: GetSecurityConfigurationResponse errors: [ EntityNotFoundException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Retrieves a list of all security configurations.

@paginated( inputToken: "NextToken" outputToken: "NextToken" items: "SecurityConfigurations" pageSize: "MaxResults" ) operation GetSecurityConfigurations { input: GetSecurityConfigurationsRequest output: GetSecurityConfigurationsResponse errors: [ EntityNotFoundException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Retrieves the session.

operation GetSession { input: GetSessionRequest output: GetSessionResponse errors: [ AccessDeniedException EntityNotFoundException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Retrieves the statement.

operation GetStatement { input: GetStatementRequest output: GetStatementResponse errors: [ AccessDeniedException EntityNotFoundException IllegalSessionStateException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Retrieves the Table definition in a Data Catalog for /// a specified table.

operation GetTable { input: GetTableRequest output: GetTableResponse errors: [ EntityNotFoundException FederationSourceException FederationSourceRetryableException GlueEncryptionException InternalServiceException InvalidInputException OperationTimeoutException ResourceNotReadyException ] } ///

Retrieves the definitions of some or all of the tables in a given /// Database.

@paginated( inputToken: "NextToken" outputToken: "NextToken" pageSize: "MaxResults" ) operation GetTables { input: GetTablesRequest output: GetTablesResponse errors: [ EntityNotFoundException FederationSourceException FederationSourceRetryableException GlueEncryptionException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Retrieves a specified version of a table.

operation GetTableVersion { input: GetTableVersionRequest output: GetTableVersionResponse errors: [ EntityNotFoundException GlueEncryptionException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Retrieves a list of strings that identify available versions of /// a specified table.

@paginated( inputToken: "NextToken" outputToken: "NextToken" pageSize: "MaxResults" ) operation GetTableVersions { input: GetTableVersionsRequest output: GetTableVersionsResponse errors: [ EntityNotFoundException GlueEncryptionException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Retrieves a list of tags associated with a resource.

operation GetTags { input: GetTagsRequest output: GetTagsResponse errors: [ EntityNotFoundException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Retrieves the definition of a trigger.

operation GetTrigger { input: GetTriggerRequest output: GetTriggerResponse errors: [ EntityNotFoundException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Gets all the triggers associated with a job.

@paginated( inputToken: "NextToken" outputToken: "NextToken" pageSize: "MaxResults" ) operation GetTriggers { input: GetTriggersRequest output: GetTriggersResponse errors: [ EntityNotFoundException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Retrieves partition metadata from the Data Catalog that contains unfiltered /// metadata.

///

For IAM authorization, the public IAM action associated with this API is glue:GetPartition.

operation GetUnfilteredPartitionMetadata { input: GetUnfilteredPartitionMetadataRequest output: GetUnfilteredPartitionMetadataResponse errors: [ EntityNotFoundException FederationSourceException FederationSourceRetryableException GlueEncryptionException InternalServiceException InvalidInputException OperationTimeoutException PermissionTypeMismatchException ] } ///

Retrieves partition metadata from the Data Catalog that contains unfiltered /// metadata.

///

For IAM authorization, the public IAM action associated with this API is glue:GetPartitions.

@paginated( inputToken: "NextToken" outputToken: "NextToken" pageSize: "MaxResults" ) operation GetUnfilteredPartitionsMetadata { input: GetUnfilteredPartitionsMetadataRequest output: GetUnfilteredPartitionsMetadataResponse errors: [ EntityNotFoundException FederationSourceException FederationSourceRetryableException GlueEncryptionException InternalServiceException InvalidInputException OperationTimeoutException PermissionTypeMismatchException ] } ///

Retrieves table metadata from the Data Catalog that contains unfiltered /// metadata.

///

For IAM authorization, the public IAM action associated with this API is glue:GetTable.

operation GetUnfilteredTableMetadata { input: GetUnfilteredTableMetadataRequest output: GetUnfilteredTableMetadataResponse errors: [ EntityNotFoundException FederationSourceException FederationSourceRetryableException GlueEncryptionException InternalServiceException InvalidInputException OperationTimeoutException PermissionTypeMismatchException ] } ///

Retrieves a specified function definition from the Data Catalog.

operation GetUserDefinedFunction { input: GetUserDefinedFunctionRequest output: GetUserDefinedFunctionResponse errors: [ EntityNotFoundException GlueEncryptionException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Retrieves multiple function definitions from the Data Catalog.

@paginated( inputToken: "NextToken" outputToken: "NextToken" pageSize: "MaxResults" ) operation GetUserDefinedFunctions { input: GetUserDefinedFunctionsRequest output: GetUserDefinedFunctionsResponse errors: [ EntityNotFoundException GlueEncryptionException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Retrieves resource metadata for a workflow.

operation GetWorkflow { input: GetWorkflowRequest output: GetWorkflowResponse errors: [ EntityNotFoundException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Retrieves the metadata for a given workflow run.

operation GetWorkflowRun { input: GetWorkflowRunRequest output: GetWorkflowRunResponse errors: [ EntityNotFoundException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Retrieves the workflow run properties which were set during the run.

operation GetWorkflowRunProperties { input: GetWorkflowRunPropertiesRequest output: GetWorkflowRunPropertiesResponse errors: [ EntityNotFoundException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Retrieves metadata for all runs of a given workflow.

@paginated( inputToken: "NextToken" outputToken: "NextToken" pageSize: "MaxResults" ) operation GetWorkflowRuns { input: GetWorkflowRunsRequest output: GetWorkflowRunsResponse errors: [ EntityNotFoundException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Imports an existing Amazon Athena Data Catalog to Glue.

operation ImportCatalogToGlue { input: ImportCatalogToGlueRequest output: ImportCatalogToGlueResponse errors: [ InternalServiceException OperationTimeoutException ] } ///

Lists all the blueprint names in an account.

@paginated( inputToken: "NextToken" outputToken: "NextToken" pageSize: "MaxResults" ) operation ListBlueprints { input: ListBlueprintsRequest output: ListBlueprintsResponse errors: [ InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Retrieves the names of all crawler resources in this Amazon Web Services account, or the /// resources with the specified tag. This operation allows you to see which /// resources are available in your account, and their names.

///

This operation takes the optional Tags field, which you can use as a filter on /// the response so that tagged resources can be retrieved as a group. If you choose to use tags /// filtering, only resources with the tag are retrieved.

@paginated( inputToken: "NextToken" outputToken: "NextToken" pageSize: "MaxResults" ) operation ListCrawlers { input: ListCrawlersRequest output: ListCrawlersResponse errors: [ OperationTimeoutException ] } ///

Returns all the crawls of a specified crawler. Returns only the crawls that have occurred since the launch date of the crawler history feature, and only retains up to 12 months of crawls. Older crawls will not be returned.

///

You may use this API to:

///
    ///
  • ///

    Retrive all the crawls of a specified crawler.

    ///
  • ///
  • ///

    Retrieve all the crawls of a specified crawler within a limited count.

    ///
  • ///
  • ///

    Retrieve all the crawls of a specified crawler in a specific time range.

    ///
  • ///
  • ///

    Retrieve all the crawls of a specified crawler with a particular state, crawl ID, or DPU hour value.

    ///
  • ///
operation ListCrawls { input: ListCrawlsRequest output: ListCrawlsResponse errors: [ EntityNotFoundException InvalidInputException OperationTimeoutException ] } ///

Lists all the custom patterns that have been created.

@paginated( inputToken: "NextToken" outputToken: "NextToken" pageSize: "MaxResults" ) operation ListCustomEntityTypes { input: ListCustomEntityTypesRequest output: ListCustomEntityTypesResponse errors: [ InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Returns all data quality execution results for your account.

@paginated( inputToken: "NextToken" outputToken: "NextToken" pageSize: "MaxResults" ) operation ListDataQualityResults { input: ListDataQualityResultsRequest output: ListDataQualityResultsResponse errors: [ InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Lists the recommendation runs meeting the filter criteria.

@paginated( inputToken: "NextToken" outputToken: "NextToken" pageSize: "MaxResults" ) operation ListDataQualityRuleRecommendationRuns { input: ListDataQualityRuleRecommendationRunsRequest output: ListDataQualityRuleRecommendationRunsResponse errors: [ InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Lists all the runs meeting the filter criteria, where a ruleset is evaluated against a data source.

@paginated( inputToken: "NextToken" outputToken: "NextToken" pageSize: "MaxResults" ) operation ListDataQualityRulesetEvaluationRuns { input: ListDataQualityRulesetEvaluationRunsRequest output: ListDataQualityRulesetEvaluationRunsResponse errors: [ InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Returns a paginated list of rulesets for the specified list of Glue tables.

@paginated( inputToken: "NextToken" outputToken: "NextToken" pageSize: "MaxResults" ) operation ListDataQualityRulesets { input: ListDataQualityRulesetsRequest output: ListDataQualityRulesetsResponse errors: [ EntityNotFoundException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Retrieves the names of all DevEndpoint resources in this Amazon Web Services account, or the /// resources with the specified tag. This operation allows you to see which resources are /// available in your account, and their names.

///

This operation takes the optional Tags field, which you can use as a filter on /// the response so that tagged resources can be retrieved as a group. If you choose to use tags /// filtering, only resources with the tag are retrieved.

@paginated( inputToken: "NextToken" outputToken: "NextToken" pageSize: "MaxResults" ) operation ListDevEndpoints { input: ListDevEndpointsRequest output: ListDevEndpointsResponse errors: [ EntityNotFoundException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Retrieves the names of all job resources in this Amazon Web Services account, or the resources with the specified tag. This operation allows you to see which resources are available in your account, and their names.

///

This operation takes the optional Tags field, which you can use as a filter on /// the response so that tagged resources can be retrieved as a group. If you choose to use tags /// filtering, only resources with the tag are retrieved.

@paginated( inputToken: "NextToken" outputToken: "NextToken" pageSize: "MaxResults" ) operation ListJobs { input: ListJobsRequest output: ListJobsResponse errors: [ EntityNotFoundException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Retrieves a sortable, filterable list of existing Glue machine learning transforms in this Amazon Web Services account, /// or the resources with the specified tag. This operation takes the optional Tags field, which you can use as /// a filter of the responses so that tagged resources can be retrieved as a group. If you choose to use tag /// filtering, only resources with the tags are retrieved. ///

@paginated( inputToken: "NextToken" outputToken: "NextToken" pageSize: "MaxResults" ) operation ListMLTransforms { input: ListMLTransformsRequest output: ListMLTransformsResponse errors: [ EntityNotFoundException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Returns a list of registries that you have created, with minimal registry information. Registries in the Deleting status will not be included in the results. Empty results will be returned if there are no registries available.

@paginated( inputToken: "NextToken" outputToken: "NextToken" items: "Registries" pageSize: "MaxResults" ) operation ListRegistries { input := { ///

Maximum number of results required per page. If the value is not supplied, this will be defaulted to 25 per page.

MaxResults: MaxResultsNumber ///

A continuation token, if this is a continuation call.

NextToken: SchemaRegistryTokenString } output: ListRegistriesResponse errors: [ AccessDeniedException InternalServiceException InvalidInputException ] } ///

Returns a list of schemas with minimal details. Schemas in Deleting status will not be included in the results. Empty results will be returned if there are no schemas available.

///

When the RegistryId is not provided, all the schemas across registries will be part of the API response.

@paginated( inputToken: "NextToken" outputToken: "NextToken" items: "Schemas" pageSize: "MaxResults" ) operation ListSchemas { input := { ///

A wrapper structure that may contain the registry name and Amazon Resource Name (ARN).

RegistryId: RegistryId ///

Maximum number of results required per page. If the value is not supplied, this will be defaulted to 25 per page.

MaxResults: MaxResultsNumber ///

A continuation token, if this is a continuation call.

NextToken: SchemaRegistryTokenString } output: ListSchemasResponse errors: [ AccessDeniedException EntityNotFoundException InternalServiceException InvalidInputException ] } ///

Returns a list of schema versions that you have created, with minimal information. Schema versions in Deleted status will not be included in the results. Empty results will be returned if there are no schema versions available.

@paginated( inputToken: "NextToken" outputToken: "NextToken" items: "Schemas" pageSize: "MaxResults" ) operation ListSchemaVersions { input := { ///

This is a wrapper structure to contain schema identity fields. The structure contains:

///
    ///
  • ///

    SchemaId$SchemaArn: The Amazon Resource Name (ARN) of the schema. Either SchemaArn or SchemaName and RegistryName has to be provided.

    ///
  • ///
  • ///

    SchemaId$SchemaName: The name of the schema. Either SchemaArn or SchemaName and RegistryName has to be provided.

    ///
  • ///
@required SchemaId: SchemaId ///

Maximum number of results required per page. If the value is not supplied, this will be defaulted to 25 per page.

MaxResults: MaxResultsNumber ///

A continuation token, if this is a continuation call.

NextToken: SchemaRegistryTokenString } output: ListSchemaVersionsResponse errors: [ AccessDeniedException EntityNotFoundException InternalServiceException InvalidInputException ] } ///

Retrieve a list of sessions.

@paginated( inputToken: "NextToken" outputToken: "NextToken" pageSize: "MaxResults" ) operation ListSessions { input: ListSessionsRequest output: ListSessionsResponse errors: [ AccessDeniedException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Lists statements for the session.

operation ListStatements { input: ListStatementsRequest output: ListStatementsResponse errors: [ AccessDeniedException EntityNotFoundException IllegalSessionStateException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Retrieves the names of all trigger resources in this Amazon Web Services account, or the resources with the specified tag. This operation allows you to see which resources are available in your account, and their names.

///

This operation takes the optional Tags field, which you can use as a filter on /// the response so that tagged resources can be retrieved as a group. If you choose to use tags /// filtering, only resources with the tag are retrieved.

@paginated( inputToken: "NextToken" outputToken: "NextToken" pageSize: "MaxResults" ) operation ListTriggers { input: ListTriggersRequest output: ListTriggersResponse errors: [ EntityNotFoundException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Lists names of workflows created in the account.

@paginated( inputToken: "NextToken" outputToken: "NextToken" pageSize: "MaxResults" ) operation ListWorkflows { input: ListWorkflowsRequest output: ListWorkflowsResponse errors: [ InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Sets the security configuration for a specified catalog. After the configuration has been /// set, the specified encryption is applied to every catalog write thereafter.

operation PutDataCatalogEncryptionSettings { input: PutDataCatalogEncryptionSettingsRequest output: PutDataCatalogEncryptionSettingsResponse errors: [ InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Sets the Data Catalog resource policy for access control.

operation PutResourcePolicy { input: PutResourcePolicyRequest output: PutResourcePolicyResponse errors: [ ConditionCheckFailureException EntityNotFoundException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Puts the metadata key value pair for a specified schema version ID. A maximum of 10 key value pairs will be allowed per schema version. They can be added over one or more calls.

operation PutSchemaVersionMetadata { input := { ///

The unique ID for the schema.

SchemaId: SchemaId ///

The version number of the schema.

SchemaVersionNumber: SchemaVersionNumber ///

The unique version ID of the schema version.

SchemaVersionId: SchemaVersionIdString ///

The metadata key's corresponding value.

@required MetadataKeyValue: MetadataKeyValuePair } output: PutSchemaVersionMetadataResponse errors: [ AccessDeniedException AlreadyExistsException EntityNotFoundException InvalidInputException ResourceNumberLimitExceededException ] } ///

Puts the specified workflow run properties for the given workflow run. If a property already exists for the specified run, then it overrides the value otherwise adds the property to existing properties.

operation PutWorkflowRunProperties { input: PutWorkflowRunPropertiesRequest output: PutWorkflowRunPropertiesResponse errors: [ AlreadyExistsException ConcurrentModificationException EntityNotFoundException InternalServiceException InvalidInputException OperationTimeoutException ResourceNumberLimitExceededException ] } ///

Queries for the schema version metadata information.

operation QuerySchemaVersionMetadata { input := { ///

A wrapper structure that may contain the schema name and Amazon Resource Name (ARN).

SchemaId: SchemaId ///

The version number of the schema.

SchemaVersionNumber: SchemaVersionNumber ///

The unique version ID of the schema version.

SchemaVersionId: SchemaVersionIdString ///

Search key-value pairs for metadata, if they are not provided all the metadata information will be fetched.

MetadataList: MetadataList ///

Maximum number of results required per page. If the value is not supplied, this will be defaulted to 25 per page.

MaxResults: QuerySchemaVersionMetadataMaxResults = 0 ///

A continuation token, if this is a continuation call.

NextToken: SchemaRegistryTokenString } output: QuerySchemaVersionMetadataResponse errors: [ AccessDeniedException EntityNotFoundException InvalidInputException ] } ///

Adds a new version to the existing schema. Returns an error if new version of schema does not meet the compatibility requirements of the schema set. This API will not create a new schema set and will return a 404 error if the schema set is not already present in the Schema Registry.

///

If this is the first schema definition to be registered in the Schema Registry, this API will store the schema version and return immediately. Otherwise, this call has the potential to run longer than other operations due to compatibility modes. You can call the GetSchemaVersion API with the SchemaVersionId to check compatibility modes.

///

If the same schema definition is already stored in Schema Registry as a version, the schema ID of the existing schema is returned to the caller.

operation RegisterSchemaVersion { input := { ///

This is a wrapper structure to contain schema identity fields. The structure contains:

///
    ///
  • ///

    SchemaId$SchemaArn: The Amazon Resource Name (ARN) of the schema. Either SchemaArn or SchemaName and RegistryName has to be provided.

    ///
  • ///
  • ///

    SchemaId$SchemaName: The name of the schema. Either SchemaArn or SchemaName and RegistryName has to be provided.

    ///
  • ///
@required SchemaId: SchemaId ///

The schema definition using the DataFormat setting for the SchemaName.

@required SchemaDefinition: SchemaDefinitionString } output: RegisterSchemaVersionResponse errors: [ AccessDeniedException ConcurrentModificationException EntityNotFoundException InternalServiceException InvalidInputException ResourceNumberLimitExceededException ] } ///

Removes a key value pair from the schema version metadata for the specified schema version ID.

operation RemoveSchemaVersionMetadata { input := { ///

A wrapper structure that may contain the schema name and Amazon Resource Name (ARN).

SchemaId: SchemaId ///

The version number of the schema.

SchemaVersionNumber: SchemaVersionNumber ///

The unique version ID of the schema version.

SchemaVersionId: SchemaVersionIdString ///

The value of the metadata key.

@required MetadataKeyValue: MetadataKeyValuePair } output: RemoveSchemaVersionMetadataResponse errors: [ AccessDeniedException EntityNotFoundException InvalidInputException ] } ///

Resets a bookmark entry.

///

For more information about enabling and using job bookmarks, see:

/// operation ResetJobBookmark { input: ResetJobBookmarkRequest output: ResetJobBookmarkResponse errors: [ EntityNotFoundException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Restarts selected nodes of a previous partially completed workflow run and resumes the workflow run. The selected nodes and all nodes that are downstream from the selected nodes are run.

operation ResumeWorkflowRun { input: ResumeWorkflowRunRequest output: ResumeWorkflowRunResponse errors: [ ConcurrentRunsExceededException EntityNotFoundException IllegalWorkflowStateException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Executes the statement.

operation RunStatement { input: RunStatementRequest output: RunStatementResponse errors: [ AccessDeniedException EntityNotFoundException IllegalSessionStateException InternalServiceException InvalidInputException OperationTimeoutException ResourceNumberLimitExceededException ValidationException ] } ///

Searches a set of tables based on properties in the table metadata as well as on the parent database. You can search against text or filter conditions.

///

You can only get tables that you have access to based on the security policies defined in Lake Formation. You need at least a read-only access to the table for it to be returned. If you do not have access to all the columns in the table, these columns will not be searched against when returning the list of tables back to you. If you have access to the columns but not the data in the columns, those columns and the associated metadata for those columns will be included in the search.

@paginated( inputToken: "NextToken" outputToken: "NextToken" pageSize: "MaxResults" ) operation SearchTables { input: SearchTablesRequest output: SearchTablesResponse errors: [ InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Starts a new run of the specified blueprint.

operation StartBlueprintRun { input: StartBlueprintRunRequest output: StartBlueprintRunResponse errors: [ EntityNotFoundException IllegalBlueprintStateException InternalServiceException InvalidInputException OperationTimeoutException ResourceNumberLimitExceededException ] } ///

Starts a crawl using the specified crawler, regardless /// of what is scheduled. If the crawler is already running, returns a /// CrawlerRunningException.

operation StartCrawler { input: StartCrawlerRequest output: StartCrawlerResponse errors: [ CrawlerRunningException EntityNotFoundException OperationTimeoutException ] } ///

Changes the schedule state of the specified crawler to /// SCHEDULED, unless the crawler is already running or the /// schedule state is already SCHEDULED.

operation StartCrawlerSchedule { input: StartCrawlerScheduleRequest output: StartCrawlerScheduleResponse errors: [ EntityNotFoundException NoScheduleException OperationTimeoutException SchedulerRunningException SchedulerTransitioningException ] } ///

Starts a recommendation run that is used to generate rules when you don't know what rules to write. Glue Data Quality analyzes the data and comes up with recommendations for a potential ruleset. You can then triage the ruleset and modify the generated ruleset to your liking.

@idempotent operation StartDataQualityRuleRecommendationRun { input: StartDataQualityRuleRecommendationRunRequest output: StartDataQualityRuleRecommendationRunResponse errors: [ ConflictException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Once you have a ruleset definition (either recommended or your own), you call this operation to evaluate the ruleset against a data source (Glue table). The evaluation computes results which you can retrieve with the GetDataQualityResult API.

@idempotent operation StartDataQualityRulesetEvaluationRun { input: StartDataQualityRulesetEvaluationRunRequest output: StartDataQualityRulesetEvaluationRunResponse errors: [ ConflictException EntityNotFoundException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Begins an asynchronous task to export all labeled data for a particular transform. This /// task is the only label-related API call that is not part of the typical active learning /// workflow. You typically use StartExportLabelsTaskRun when you want to work with /// all of your existing labels at the same time, such as when you want to remove or change labels /// that were previously submitted as truth. This API operation accepts the /// TransformId whose labels you want to export and an Amazon Simple Storage /// Service (Amazon S3) path to export the labels to. The operation returns a /// TaskRunId. You can check on the status of your task run by calling the /// GetMLTaskRun API.

operation StartExportLabelsTaskRun { input: StartExportLabelsTaskRunRequest output: StartExportLabelsTaskRunResponse errors: [ EntityNotFoundException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Enables you to provide additional labels (examples of truth) to be used to teach the /// machine learning transform and improve its quality. This API operation is generally used as /// part of the active learning workflow that starts with the /// StartMLLabelingSetGenerationTaskRun call and that ultimately results in /// improving the quality of your machine learning transform.

///

After the StartMLLabelingSetGenerationTaskRun finishes, Glue machine learning /// will have generated a series of questions for humans to answer. (Answering these questions is /// often called 'labeling' in the machine learning workflows). In the case of the /// FindMatches transform, these questions are of the form, “What is the correct /// way to group these rows together into groups composed entirely of matching records?” After the /// labeling process is finished, users upload their answers/labels with a call to /// StartImportLabelsTaskRun. After StartImportLabelsTaskRun finishes, /// all future runs of the machine learning transform use the new and improved labels and perform /// a higher-quality transformation.

///

By default, StartMLLabelingSetGenerationTaskRun continually learns from and /// combines all labels that you upload unless you set Replace to true. If you set /// Replace to true, StartImportLabelsTaskRun deletes and forgets all /// previously uploaded labels and learns only from the exact set that you upload. Replacing /// labels can be helpful if you realize that you previously uploaded incorrect labels, and you /// believe that they are having a negative effect on your transform quality.

///

You can check on the status of your task run by calling the GetMLTaskRun /// operation.

operation StartImportLabelsTaskRun { input: StartImportLabelsTaskRunRequest output: StartImportLabelsTaskRunResponse errors: [ EntityNotFoundException InternalServiceException InvalidInputException OperationTimeoutException ResourceNumberLimitExceededException ] } ///

Starts a job run using a job definition.

operation StartJobRun { input: StartJobRunRequest output: StartJobRunResponse errors: [ ConcurrentRunsExceededException EntityNotFoundException InternalServiceException InvalidInputException OperationTimeoutException ResourceNumberLimitExceededException ] } ///

Starts a task to estimate the quality of the transform.

///

When you provide label sets as examples of truth, Glue machine learning uses some of /// those examples to learn from them. The rest of the labels are used as a test to estimate /// quality.

///

Returns a unique identifier for the run. You can call GetMLTaskRun to get more /// information about the stats of the EvaluationTaskRun.

operation StartMLEvaluationTaskRun { input: StartMLEvaluationTaskRunRequest output: StartMLEvaluationTaskRunResponse errors: [ ConcurrentRunsExceededException EntityNotFoundException InternalServiceException InvalidInputException MLTransformNotReadyException OperationTimeoutException ] } ///

Starts the active learning workflow for your machine learning transform to improve the /// transform's quality by generating label sets and adding labels.

///

When the StartMLLabelingSetGenerationTaskRun finishes, Glue will have /// generated a "labeling set" or a set of questions for humans to answer.

///

In the case of the FindMatches transform, these questions are of the form, /// “What is the correct way to group these rows together into groups composed entirely of /// matching records?”

///

After the labeling process is finished, you can upload your labels with a call to /// StartImportLabelsTaskRun. After StartImportLabelsTaskRun finishes, /// all future runs of the machine learning transform will use the new and improved labels and /// perform a higher-quality transformation.

operation StartMLLabelingSetGenerationTaskRun { input: StartMLLabelingSetGenerationTaskRunRequest output: StartMLLabelingSetGenerationTaskRunResponse errors: [ ConcurrentRunsExceededException EntityNotFoundException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Starts an existing trigger. See Triggering /// Jobs for information about how different types of trigger are /// started.

operation StartTrigger { input: StartTriggerRequest output: StartTriggerResponse errors: [ ConcurrentRunsExceededException EntityNotFoundException InternalServiceException InvalidInputException OperationTimeoutException ResourceNumberLimitExceededException ] } ///

Starts a new run of the specified workflow.

operation StartWorkflowRun { input: StartWorkflowRunRequest output: StartWorkflowRunResponse errors: [ ConcurrentRunsExceededException EntityNotFoundException InternalServiceException InvalidInputException OperationTimeoutException ResourceNumberLimitExceededException ] } ///

If the specified crawler is running, stops the crawl.

operation StopCrawler { input: StopCrawlerRequest output: StopCrawlerResponse errors: [ CrawlerNotRunningException CrawlerStoppingException EntityNotFoundException OperationTimeoutException ] } ///

Sets the schedule state of the specified crawler to /// NOT_SCHEDULED, but does not stop the crawler if it is /// already running.

operation StopCrawlerSchedule { input: StopCrawlerScheduleRequest output: StopCrawlerScheduleResponse errors: [ EntityNotFoundException OperationTimeoutException SchedulerNotRunningException SchedulerTransitioningException ] } ///

Stops the session.

operation StopSession { input: StopSessionRequest output: StopSessionResponse errors: [ AccessDeniedException ConcurrentModificationException IllegalSessionStateException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Stops a specified trigger.

operation StopTrigger { input: StopTriggerRequest output: StopTriggerResponse errors: [ ConcurrentModificationException EntityNotFoundException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Stops the execution of the specified workflow run.

operation StopWorkflowRun { input: StopWorkflowRunRequest output: StopWorkflowRunResponse errors: [ EntityNotFoundException IllegalWorkflowStateException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Adds tags to a resource. A tag is a label you can assign to an Amazon Web Services resource. /// In Glue, you can tag only certain resources. For information about what /// resources you can tag, see Amazon Web Services Tags in Glue.

operation TagResource { input: TagResourceRequest output: TagResourceResponse errors: [ EntityNotFoundException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Removes tags from a resource.

operation UntagResource { input: UntagResourceRequest output: UntagResourceResponse errors: [ EntityNotFoundException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Updates a registered blueprint.

operation UpdateBlueprint { input: UpdateBlueprintRequest output: UpdateBlueprintResponse errors: [ ConcurrentModificationException EntityNotFoundException IllegalBlueprintStateException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Modifies an existing classifier (a GrokClassifier, /// an XMLClassifier, a JsonClassifier, or a CsvClassifier, depending on /// which field is present).

operation UpdateClassifier { input: UpdateClassifierRequest output: UpdateClassifierResponse errors: [ EntityNotFoundException InvalidInputException OperationTimeoutException VersionMismatchException ] } ///

Creates or updates partition statistics of columns.

///

The Identity and Access Management (IAM) permission required for this operation is UpdatePartition.

operation UpdateColumnStatisticsForPartition { input: UpdateColumnStatisticsForPartitionRequest output: UpdateColumnStatisticsForPartitionResponse errors: [ EntityNotFoundException GlueEncryptionException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Creates or updates table statistics of columns.

///

The Identity and Access Management (IAM) permission required for this operation is UpdateTable.

operation UpdateColumnStatisticsForTable { input: UpdateColumnStatisticsForTableRequest output: UpdateColumnStatisticsForTableResponse errors: [ EntityNotFoundException GlueEncryptionException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Updates a connection definition in the Data Catalog.

operation UpdateConnection { input: UpdateConnectionRequest output: UpdateConnectionResponse errors: [ EntityNotFoundException GlueEncryptionException InvalidInputException OperationTimeoutException ] } ///

Updates a crawler. If a crawler is /// running, you must stop it using StopCrawler before updating /// it.

operation UpdateCrawler { input: UpdateCrawlerRequest output: UpdateCrawlerResponse errors: [ CrawlerRunningException EntityNotFoundException InvalidInputException OperationTimeoutException VersionMismatchException ] } ///

Updates the schedule of a crawler using a cron expression.

operation UpdateCrawlerSchedule { input: UpdateCrawlerScheduleRequest output: UpdateCrawlerScheduleResponse errors: [ EntityNotFoundException InvalidInputException OperationTimeoutException SchedulerTransitioningException VersionMismatchException ] } ///

Updates an existing database definition in a Data Catalog.

operation UpdateDatabase { input: UpdateDatabaseRequest output: UpdateDatabaseResponse errors: [ ConcurrentModificationException EntityNotFoundException GlueEncryptionException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Updates the specified data quality ruleset.

operation UpdateDataQualityRuleset { input: UpdateDataQualityRulesetRequest output: UpdateDataQualityRulesetResponse errors: [ AlreadyExistsException EntityNotFoundException IdempotentParameterMismatchException InternalServiceException InvalidInputException OperationTimeoutException ResourceNumberLimitExceededException ] } ///

Updates a specified development endpoint.

operation UpdateDevEndpoint { input: UpdateDevEndpointRequest output: UpdateDevEndpointResponse errors: [ EntityNotFoundException InternalServiceException InvalidInputException OperationTimeoutException ValidationException ] } ///

Updates an existing job definition. The previous job definition is completely overwritten by this information.

operation UpdateJob { input: UpdateJobRequest output: UpdateJobResponse errors: [ ConcurrentModificationException EntityNotFoundException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Synchronizes a job from the source control repository. This operation takes the job artifacts that are located in the remote repository and updates the Glue internal stores with these artifacts.

///

This API supports optional parameters which take in the repository information.

operation UpdateJobFromSourceControl { input: UpdateJobFromSourceControlRequest output: UpdateJobFromSourceControlResponse errors: [ AccessDeniedException AlreadyExistsException EntityNotFoundException InternalServiceException InvalidInputException OperationTimeoutException ValidationException ] } ///

Updates an existing machine learning transform. Call this operation to tune the algorithm parameters to achieve better results.

///

After calling this operation, you can call the StartMLEvaluationTaskRun /// operation to assess how well your new parameters achieved your goals (such as improving the /// quality of your machine learning transform, or making it more cost-effective).

operation UpdateMLTransform { input: UpdateMLTransformRequest output: UpdateMLTransformResponse errors: [ AccessDeniedException EntityNotFoundException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Updates a partition.

operation UpdatePartition { input: UpdatePartitionRequest output: UpdatePartitionResponse errors: [ EntityNotFoundException GlueEncryptionException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Updates an existing registry which is used to hold a collection of schemas. The updated properties relate to the registry, and do not modify any of the schemas within the registry.

operation UpdateRegistry { input := { ///

This is a wrapper structure that may contain the registry name and Amazon Resource Name (ARN).

@required RegistryId: RegistryId ///

A description of the registry. If description is not provided, this field will not be updated.

@required Description: DescriptionString } output: UpdateRegistryResponse errors: [ AccessDeniedException ConcurrentModificationException EntityNotFoundException InternalServiceException InvalidInputException ] } ///

Updates the description, compatibility setting, or version checkpoint for a schema set.

///

For updating the compatibility setting, the call will not validate compatibility for the entire set of schema versions with the new compatibility setting. If the value for Compatibility is provided, the VersionNumber (a checkpoint) is also required. The API will validate the checkpoint version number for consistency.

///

If the value for the VersionNumber (checkpoint) is provided, Compatibility is optional and this can be used to set/reset a checkpoint for the schema.

///

This update will happen only if the schema is in the AVAILABLE state.

operation UpdateSchema { input := { ///

This is a wrapper structure to contain schema identity fields. The structure contains:

///
    ///
  • ///

    SchemaId$SchemaArn: The Amazon Resource Name (ARN) of the schema. One of SchemaArn or SchemaName has to be provided.

    ///
  • ///
  • ///

    SchemaId$SchemaName: The name of the schema. One of SchemaArn or SchemaName has to be provided.

    ///
  • ///
@required SchemaId: SchemaId ///

Version number required for check pointing. One of VersionNumber or Compatibility has to be provided.

SchemaVersionNumber: SchemaVersionNumber ///

The new compatibility setting for the schema.

Compatibility: Compatibility ///

The new description for the schema.

Description: DescriptionString } output: UpdateSchemaResponse errors: [ AccessDeniedException ConcurrentModificationException EntityNotFoundException InternalServiceException InvalidInputException ] } ///

Synchronizes a job to the source control repository. This operation takes the job artifacts from the Glue internal stores and makes a commit to the remote repository that is configured on the job.

///

This API supports optional parameters which take in the repository information.

operation UpdateSourceControlFromJob { input: UpdateSourceControlFromJobRequest output: UpdateSourceControlFromJobResponse errors: [ AccessDeniedException AlreadyExistsException EntityNotFoundException InternalServiceException InvalidInputException OperationTimeoutException ValidationException ] } ///

Updates a metadata table in the Data Catalog.

operation UpdateTable { input: UpdateTableRequest output: UpdateTableResponse errors: [ ConcurrentModificationException EntityNotFoundException GlueEncryptionException InternalServiceException InvalidInputException OperationTimeoutException ResourceNotReadyException ResourceNumberLimitExceededException ] } ///

Updates a trigger definition.

operation UpdateTrigger { input: UpdateTriggerRequest output: UpdateTriggerResponse errors: [ ConcurrentModificationException EntityNotFoundException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Updates an existing function definition in the Data Catalog.

operation UpdateUserDefinedFunction { input: UpdateUserDefinedFunctionRequest output: UpdateUserDefinedFunctionResponse errors: [ EntityNotFoundException GlueEncryptionException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Updates an existing workflow.

operation UpdateWorkflow { input: UpdateWorkflowRequest output: UpdateWorkflowResponse errors: [ ConcurrentModificationException EntityNotFoundException InternalServiceException InvalidInputException OperationTimeoutException ] } ///

Access to a resource was denied.

@error("client") structure AccessDeniedException { ///

A message describing the problem.

Message: MessageString } ///

Defines an action to be initiated by a trigger.

structure Action { ///

The name of a job to be run.

JobName: NameString ///

The job arguments used when this trigger fires. For this job run, they replace the default arguments set in the job definition itself.

///

You can specify arguments here that your own job-execution script /// consumes, as well as arguments that Glue itself consumes.

///

For information about how to specify and consume your own Job arguments, see the Calling Glue APIs in Python topic in the developer guide.

///

For information about the key-value pairs that Glue consumes to set up your job, see the Special Parameters Used by Glue topic in the developer guide.

Arguments: GenericMap ///

The JobRun timeout in minutes. This is the maximum time that a job run can /// consume resources before it is terminated and enters TIMEOUT status. The default /// is 2,880 minutes (48 hours). This overrides the timeout value set in the parent job.

Timeout: Timeout ///

The name of the SecurityConfiguration structure to be used with this /// action.

SecurityConfiguration: NameString ///

Specifies configuration properties of a job run notification.

NotificationProperty: NotificationProperty ///

The name of the crawler to be used with this action.

CrawlerName: NameString } ///

Specifies a transform that groups rows by chosen fields and computes the aggregated value by specified function.

structure Aggregate { ///

The name of the transform node.

@required Name: NodeName ///

Specifies the fields and rows to use as inputs for the aggregate transform.

@required Inputs: OneInput ///

Specifies the fields to group by.

@required Groups: GlueStudioPathList ///

Specifies the aggregate functions to be performed on specified fields.

@required Aggs: AggregateOperations } ///

Specifies the set of parameters needed to perform aggregation in the aggregate transform.

structure AggregateOperation { ///

Specifies the column on the data set on which the aggregation function will be applied.

@required Column: EnclosedInStringProperties ///

Specifies the aggregation function to apply.

///

Possible aggregation functions include: avg countDistinct, count, first, last, kurtosis, max, min, skewness, /// stddev_samp, stddev_pop, sum, sumDistinct, var_samp, var_pop

@required AggFunc: AggFunction } ///

A resource to be created or added already exists.

@error("client") structure AlreadyExistsException { ///

A message describing the problem.

Message: MessageString } ///

Specifies an optional value when connecting to the Redshift cluster.

structure AmazonRedshiftAdvancedOption { ///

The key for the additional connection option.

Key: GenericString ///

The value for the additional connection option.

Value: GenericString } ///

Specifies an Amazon Redshift node.

structure AmazonRedshiftNodeData { ///

The access type for the Redshift connection. Can be a direct connection or catalog connections.

AccessType: GenericLimitedString ///

The source type to specify whether a specific table is the source or a custom query.

SourceType: GenericLimitedString ///

The Glue connection to the Redshift cluster.

Connection: Option ///

The Redshift schema name when working with a direct connection.

Schema: Option ///

The Redshift table name when working with a direct connection.

Table: Option ///

The name of the Glue Data Catalog database when working with a data catalog.

CatalogDatabase: Option ///

The Glue Data Catalog table name when working with a data catalog.

CatalogTable: Option ///

The Redshift schema name when working with a data catalog.

CatalogRedshiftSchema: GenericString ///

The database table to read from.

CatalogRedshiftTable: GenericString ///

The Amazon S3 path where temporary data can be staged when copying out of the database.

TempDir: EnclosedInStringProperty ///

Optional. The role name use when connection to S3. The IAM role ill default to the role on the job when left blank.

IamRole: Option ///

Optional values when connecting to the Redshift cluster.

AdvancedOptions: AmazonRedshiftAdvancedOptions ///

The SQL used to fetch the data from a Redshift sources when the SourceType is 'query'.

SampleQuery: GenericString ///

The SQL used before a MERGE or APPEND with upsert is run.

PreAction: GenericString ///

The SQL used before a MERGE or APPEND with upsert is run.

PostAction: GenericString ///

Specifies how writing to a Redshift cluser will occur.

Action: GenericString ///

Specifies the prefix to a table.

TablePrefix: GenericLimitedString ///

The action used on Redshift sinks when doing an APPEND.

Upsert: BooleanValue = false ///

The action used when to detemine how a MERGE in a Redshift sink will be handled.

MergeAction: GenericLimitedString ///

The action used when to detemine how a MERGE in a Redshift sink will be handled when an existing record matches a new record.

MergeWhenMatched: GenericLimitedString ///

The action used when to detemine how a MERGE in a Redshift sink will be handled when an existing record doesn't match a new record.

MergeWhenNotMatched: GenericLimitedString ///

The SQL used in a custom merge to deal with matching records.

MergeClause: GenericString ///

Specifies the name of the connection that is associated with the catalog table used.

CrawlerConnection: GenericString ///

The array of schema output for a given node.

TableSchema: OptionList ///

The name of the temporary staging table that is used when doing a MERGE or APPEND with upsert.

StagingTable: GenericString ///

The list of column names used to determine a matching record when doing a MERGE or APPEND with upsert.

SelectedColumns: OptionList } ///

Specifies an Amazon Redshift source.

structure AmazonRedshiftSource { ///

The name of the Amazon Redshift source.

Name: NodeName ///

Specifies the data of the Amazon Reshift source node.

Data: AmazonRedshiftNodeData } ///

Specifies an Amazon Redshift target.

structure AmazonRedshiftTarget { ///

The name of the Amazon Redshift target.

Name: NodeName ///

Specifies the data of the Amazon Redshift target node.

Data: AmazonRedshiftNodeData ///

The nodes that are inputs to the data target.

Inputs: OneInput } ///

Specifies a transform that maps data property keys in the data source to data property keys in the data target. You can rename keys, modify the data types for keys, and choose which keys to drop from the dataset.

structure ApplyMapping { ///

The name of the transform node.

@required Name: NodeName ///

The data inputs identified by their node names.

@required Inputs: OneInput ///

Specifies the mapping of data property keys in the data source to data property keys in the data target.

@required Mapping: Mappings } ///

Specifies a connector to an Amazon Athena data source.

structure AthenaConnectorSource { ///

The name of the data source.

@required Name: NodeName ///

The name of the connection that is associated with the connector.

@required ConnectionName: EnclosedInStringProperty ///

The name of a connector that assists with accessing the data store in Glue Studio.

@required ConnectorName: EnclosedInStringProperty ///

The type of connection, such as marketplace.athena or custom.athena, designating a connection to an Amazon Athena data store.

@required ConnectionType: EnclosedInStringProperty ///

The name of the table in the data source.

ConnectionTable: EnclosedInStringPropertyWithQuote ///

The name of the Cloudwatch log group to read from. For example, /aws-glue/jobs/output.

@required SchemaName: EnclosedInStringProperty ///

Specifies the data schema for the custom Athena source.

OutputSchemas: GlueSchemas } ///

A structure containing the Lake Formation audit context.

structure AuditContext { ///

A string containing the additional audit context information.

AdditionalAuditContext: AuditContextString ///

The requested columns for audit.

RequestedColumns: AuditColumnNamesList ///

All columns request for audit.

AllColumnsRequested: NullableBoolean } ///

A list of errors that can occur when registering partition indexes for an existing table.

///

These errors give the details about why an index registration failed and provide a limited number of partitions in the response, so that you can fix the partitions at fault and try registering the index again. The most common set of errors that can occur are categorized as follows:

///
    ///
  • ///

    EncryptedPartitionError: The partitions are encrypted.

    ///
  • ///
  • ///

    InvalidPartitionTypeDataError: The partition value doesn't match the data type for that partition column.

    ///
  • ///
  • ///

    MissingPartitionValueError: The partitions are encrypted.

    ///
  • ///
  • ///

    UnsupportedPartitionCharacterError: Characters inside the partition value are not supported. For example: U+0000 , U+0001, U+0002.

    ///
  • ///
  • ///

    InternalError: Any error which does not belong to other error codes.

    ///
  • ///
structure BackfillError { ///

The error code for an error that occurred when registering partition indexes for an existing table.

Code: BackfillErrorCode ///

A list of a limited number of partitions in the response.

Partitions: BackfillErroredPartitionsList } ///

Specifies a target that uses a Glue Data Catalog table.

structure BasicCatalogTarget { ///

The name of your data target.

@required Name: NodeName ///

The nodes that are inputs to the data target.

@required Inputs: OneInput ///

The database that contains the table you want to use as the target. This database must already exist in the Data Catalog.

@required Database: EnclosedInStringProperty ///

The table that defines the schema of your output data. This table must already exist in the Data Catalog.

@required Table: EnclosedInStringProperty } @input structure BatchCreatePartitionRequest { ///

The ID of the catalog in which the partition is to be created. Currently, this should be /// the Amazon Web Services account ID.

CatalogId: CatalogIdString ///

The name of the metadata database in which the partition is /// to be created.

@required DatabaseName: NameString ///

The name of the metadata table in which the partition is to be created.

@required TableName: NameString ///

A list of PartitionInput structures that define /// the partitions to be created.

@required PartitionInputList: PartitionInputList } @output structure BatchCreatePartitionResponse { ///

The errors encountered when trying to create the requested partitions.

Errors: PartitionErrors } @input structure BatchDeleteConnectionRequest { ///

The ID of the Data Catalog in which the connections reside. If none is provided, the Amazon Web Services /// account ID is used by default.

CatalogId: CatalogIdString ///

A list of names of the connections to delete.

@required ConnectionNameList: DeleteConnectionNameList } @output structure BatchDeleteConnectionResponse { ///

A list of names of the connection definitions that were /// successfully deleted.

Succeeded: NameStringList ///

A map of the names of connections that were not successfully /// deleted to error details.

Errors: ErrorByName } @input structure BatchDeletePartitionRequest { ///

The ID of the Data Catalog where the partition to be deleted resides. If none is provided, /// the Amazon Web Services account ID is used by default.

CatalogId: CatalogIdString ///

The name of the catalog database in which the table in question /// resides.

@required DatabaseName: NameString ///

The name of the table that contains the partitions to be deleted.

@required TableName: NameString ///

A list of PartitionInput structures that define /// the partitions to be deleted.

@required PartitionsToDelete: BatchDeletePartitionValueList } @output structure BatchDeletePartitionResponse { ///

The errors encountered when trying to delete the requested partitions.

Errors: PartitionErrors } @input structure BatchDeleteTableRequest { ///

The ID of the Data Catalog where the table resides. If none is provided, the Amazon Web Services account /// ID is used by default.

CatalogId: CatalogIdString ///

The name of the catalog database in which the tables to delete reside. For Hive /// compatibility, this name is entirely lowercase.

@required DatabaseName: NameString ///

A list of the table to delete.

@required TablesToDelete: BatchDeleteTableNameList ///

The transaction ID at which to delete the table contents.

TransactionId: TransactionIdString } @output structure BatchDeleteTableResponse { ///

A list of errors encountered in attempting to delete the specified tables.

Errors: TableErrors } @input structure BatchDeleteTableVersionRequest { ///

The ID of the Data Catalog where the tables reside. If none is provided, the Amazon Web Services account /// ID is used by default.

CatalogId: CatalogIdString ///

The database in the catalog in which the table resides. For Hive /// compatibility, this name is entirely lowercase.

@required DatabaseName: NameString ///

The name of the table. For Hive compatibility, /// this name is entirely lowercase.

@required TableName: NameString ///

A list of the IDs of versions to be deleted. A VersionId is a string representation of an integer. Each version is incremented by 1.

@required VersionIds: BatchDeleteTableVersionList } @output structure BatchDeleteTableVersionResponse { ///

A list of errors encountered while trying to delete /// the specified table versions.

Errors: TableVersionErrors } @input structure BatchGetBlueprintsRequest { ///

A list of blueprint names.

@required Names: BatchGetBlueprintNames ///

Specifies whether or not to include the blueprint in the response.

IncludeBlueprint: NullableBoolean ///

Specifies whether or not to include the parameters, as a JSON string, for the blueprint in the response.

IncludeParameterSpec: NullableBoolean } @output structure BatchGetBlueprintsResponse { ///

Returns a list of blueprint as a Blueprints object.

Blueprints: Blueprints ///

Returns a list of BlueprintNames that were not found.

MissingBlueprints: BlueprintNames } @input structure BatchGetCrawlersRequest { ///

A list of crawler names, which might be the names returned from the /// ListCrawlers operation.

@required CrawlerNames: CrawlerNameList } @output structure BatchGetCrawlersResponse { ///

A list of crawler definitions.

Crawlers: CrawlerList ///

A list of names of crawlers that were not found.

CrawlersNotFound: CrawlerNameList } @input structure BatchGetCustomEntityTypesRequest { ///

A list of names of the custom patterns that you want to retrieve.

@required Names: CustomEntityTypeNames } @output structure BatchGetCustomEntityTypesResponse { ///

A list of CustomEntityType objects representing the custom patterns that have been created.

CustomEntityTypes: CustomEntityTypes ///

A list of the names of custom patterns that were not found.

CustomEntityTypesNotFound: CustomEntityTypeNames } @input structure BatchGetDataQualityResultRequest { ///

A list of unique result IDs for the data quality results.

@required ResultIds: DataQualityResultIds } @output structure BatchGetDataQualityResultResponse { ///

A list of DataQualityResult objects representing the data quality results.

@required Results: DataQualityResultsList ///

A list of result IDs for which results were not found.

ResultsNotFound: DataQualityResultIds } @input structure BatchGetDevEndpointsRequest { ///

The list of DevEndpoint names, which might be the names returned from the /// ListDevEndpoint operation.

@required DevEndpointNames: DevEndpointNames } @output structure BatchGetDevEndpointsResponse { ///

A list of DevEndpoint definitions.

DevEndpoints: DevEndpointList ///

A list of DevEndpoints not found.

DevEndpointsNotFound: DevEndpointNames } @input structure BatchGetJobsRequest { ///

A list of job names, which might be the names returned from the ListJobs /// operation.

@required JobNames: JobNameList } @output structure BatchGetJobsResponse { ///

A list of job definitions.

Jobs: JobList ///

A list of names of jobs not found.

JobsNotFound: JobNameList } @input structure BatchGetPartitionRequest { ///

The ID of the Data Catalog where the partitions in question reside. /// If none is supplied, the Amazon Web Services account ID is used by default.

CatalogId: CatalogIdString ///

The name of the catalog database where the partitions reside.

@required DatabaseName: NameString ///

The name of the partitions' table.

@required TableName: NameString ///

A list of partition values identifying the partitions to retrieve.

@required PartitionsToGet: BatchGetPartitionValueList } @output structure BatchGetPartitionResponse { ///

A list of the requested partitions.

Partitions: PartitionList ///

A list of the partition values in the request for which partitions were not /// returned.

UnprocessedKeys: BatchGetPartitionValueList } @input structure BatchGetTriggersRequest { ///

A list of trigger names, which may be the names returned from the ListTriggers operation.

@required TriggerNames: TriggerNameList } @output structure BatchGetTriggersResponse { ///

A list of trigger definitions.

Triggers: TriggerList ///

A list of names of triggers not found.

TriggersNotFound: TriggerNameList } @input structure BatchGetWorkflowsRequest { ///

A list of workflow names, which may be the names returned from the ListWorkflows operation.

@required Names: WorkflowNames ///

Specifies whether to include a graph when returning the workflow resource metadata.

IncludeGraph: NullableBoolean } @output structure BatchGetWorkflowsResponse { ///

A list of workflow resource metadata.

Workflows: Workflows ///

A list of names of workflows not found.

MissingWorkflows: WorkflowNames } ///

Records an error that occurred when attempting to stop a /// specified job run.

structure BatchStopJobRunError { ///

The name of the job definition that is used in the job run in question.

JobName: NameString ///

The JobRunId of the job run in question.

JobRunId: IdString ///

Specifies details about the error that was encountered.

ErrorDetail: ErrorDetail } @input structure BatchStopJobRunRequest { ///

The name of the job definition for which to stop job runs.

@required JobName: NameString ///

A list of the JobRunIds that should be stopped for that job /// definition.

@required JobRunIds: BatchStopJobRunJobRunIdList } @output structure BatchStopJobRunResponse { ///

A list of the JobRuns that were successfully submitted for stopping.

SuccessfulSubmissions: BatchStopJobRunSuccessfulSubmissionList ///

A list of the errors that were encountered in trying to stop JobRuns, /// including the JobRunId for which each error was encountered and details about the /// error.

Errors: BatchStopJobRunErrorList } ///

Records a successful request to stop a specified JobRun.

structure BatchStopJobRunSuccessfulSubmission { ///

The name of the job definition used in the job run that was stopped.

JobName: NameString ///

The JobRunId of the job run that was stopped.

JobRunId: IdString } ///

Contains information about a batch update partition error.

structure BatchUpdatePartitionFailureEntry { ///

A list of values defining the partitions.

PartitionValueList: BoundedPartitionValueList ///

The details about the batch update partition error.

ErrorDetail: ErrorDetail } @input structure BatchUpdatePartitionRequest { ///

The ID of the catalog in which the partition is to be updated. Currently, this should be /// the Amazon Web Services account ID.

CatalogId: CatalogIdString ///

The name of the metadata database in which the partition is /// to be updated.

@required DatabaseName: NameString ///

The name of the metadata table in which the partition is to be updated.

@required TableName: NameString ///

A list of up to 100 BatchUpdatePartitionRequestEntry objects to update.

@required Entries: BatchUpdatePartitionRequestEntryList } ///

A structure that contains the values and structure used to update a partition.

structure BatchUpdatePartitionRequestEntry { ///

A list of values defining the partitions.

@required PartitionValueList: BoundedPartitionValueList ///

The structure used to update a partition.

@required PartitionInput: PartitionInput } @output structure BatchUpdatePartitionResponse { ///

The errors encountered when trying to update the requested partitions. A list of BatchUpdatePartitionFailureEntry objects.

Errors: BatchUpdatePartitionFailureList } ///

Defines column statistics supported for bit sequence data values.

structure BinaryColumnStatisticsData { ///

The size of the longest bit sequence in the column.

@required MaximumLength: NonNegativeLong = 0 ///

The average bit sequence length in the column.

@required AverageLength: NonNegativeDouble = 0 ///

The number of null values in the column.

@required NumberOfNulls: NonNegativeLong = 0 } ///

The details of a blueprint.

structure Blueprint { ///

The name of the blueprint.

Name: OrchestrationNameString ///

The description of the blueprint.

Description: Generic512CharString ///

The date and time the blueprint was registered.

CreatedOn: TimestampValue ///

The date and time the blueprint was last modified.

LastModifiedOn: TimestampValue ///

A JSON string that indicates the list of parameter specifications for the blueprint.

ParameterSpec: BlueprintParameterSpec ///

Specifies the path in Amazon S3 where the blueprint is published.

BlueprintLocation: GenericString ///

Specifies a path in Amazon S3 where the blueprint is copied when you call CreateBlueprint/UpdateBlueprint to register the blueprint in Glue.

BlueprintServiceLocation: GenericString ///

The status of the blueprint registration.

///
    ///
  • ///

    Creating — The blueprint registration is in progress.

    ///
  • ///
  • ///

    Active — The blueprint has been successfully registered.

    ///
  • ///
  • ///

    Updating — An update to the blueprint registration is in progress.

    ///
  • ///
  • ///

    Failed — The blueprint registration failed.

    ///
  • ///
Status: BlueprintStatus ///

An error message.

ErrorMessage: ErrorString ///

When there are multiple versions of a blueprint and the latest version has some errors, this attribute indicates the last successful blueprint definition that is available with the service.

LastActiveDefinition: LastActiveDefinition } ///

The details of a blueprint.

structure BlueprintDetails { ///

The name of the blueprint.

BlueprintName: OrchestrationNameString ///

The run ID for this blueprint.

RunId: IdString } ///

The details of a blueprint run.

structure BlueprintRun { ///

The name of the blueprint.

BlueprintName: OrchestrationNameString ///

The run ID for this blueprint run.

RunId: IdString ///

The name of a workflow that is created as a result of a successful blueprint run. If a blueprint run has an error, there will not be a workflow created.

WorkflowName: NameString ///

The state of the blueprint run. Possible values are:

///
    ///
  • ///

    Running — The blueprint run is in progress.

    ///
  • ///
  • ///

    Succeeded — The blueprint run completed successfully.

    ///
  • ///
  • ///

    Failed — The blueprint run failed and rollback is complete.

    ///
  • ///
  • ///

    Rolling Back — The blueprint run failed and rollback is in progress.

    ///
  • ///
State: BlueprintRunState ///

The date and time that the blueprint run started.

StartedOn: TimestampValue ///

The date and time that the blueprint run completed.

CompletedOn: TimestampValue ///

Indicates any errors that are seen while running the blueprint.

ErrorMessage: MessageString ///

If there are any errors while creating the entities of a workflow, we try to roll back the created entities until that point and delete them. This attribute indicates the errors seen while trying to delete the entities that are created.

RollbackErrorMessage: MessageString ///

The blueprint parameters as a string. You will have to provide a value for each key that is required from the parameter spec that is defined in the Blueprint$ParameterSpec.

Parameters: BlueprintParameters ///

The role ARN. This role will be assumed by the Glue service and will be used to create the workflow and other entities of a workflow.

RoleArn: OrchestrationIAMRoleArn } ///

Defines column statistics supported for Boolean data columns.

structure BooleanColumnStatisticsData { ///

The number of true values in the column.

@required NumberOfTrues: NonNegativeLong = 0 ///

The number of false values in the column.

@required NumberOfFalses: NonNegativeLong = 0 ///

The number of null values in the column.

@required NumberOfNulls: NonNegativeLong = 0 } @input structure CancelDataQualityRuleRecommendationRunRequest { ///

The unique run identifier associated with this run.

@required RunId: HashString } @output structure CancelDataQualityRuleRecommendationRunResponse {} @input structure CancelDataQualityRulesetEvaluationRunRequest { ///

The unique run identifier associated with this run.

@required RunId: HashString } @output structure CancelDataQualityRulesetEvaluationRunResponse {} @input structure CancelMLTaskRunRequest { ///

The unique identifier of the machine learning transform.

@required TransformId: HashString ///

A unique identifier for the task run.

@required TaskRunId: HashString } @output structure CancelMLTaskRunResponse { ///

The unique identifier of the machine learning transform.

TransformId: HashString ///

The unique identifier for the task run.

TaskRunId: HashString ///

The status for this run.

Status: TaskStatusType } @input structure CancelStatementRequest { ///

The Session ID of the statement to be cancelled.

@required SessionId: NameString ///

The ID of the statement to be cancelled.

@required Id: IntegerValue = 0 ///

The origin of the request to cancel the statement.

RequestOrigin: OrchestrationNameString } @output structure CancelStatementResponse {} ///

Specifies a Delta Lake data source that is registered in the Glue Data Catalog.

structure CatalogDeltaSource { ///

The name of the Delta Lake data source.

@required Name: NodeName ///

The name of the database to read from.

@required Database: EnclosedInStringProperty ///

The name of the table in the database to read from.

@required Table: EnclosedInStringProperty ///

Specifies additional connection options.

AdditionalDeltaOptions: AdditionalOptions ///

Specifies the data schema for the Delta Lake source.

OutputSchemas: GlueSchemas } ///

Specifies a table definition in the Glue Data Catalog.

structure CatalogEntry { ///

The database in which the table metadata resides.

@required DatabaseName: NameString ///

The name of the table in question.

@required TableName: NameString } ///

Specifies a Hudi data source that is registered in the Glue Data Catalog.

structure CatalogHudiSource { ///

The name of the Hudi data source.

@required Name: NodeName ///

The name of the database to read from.

@required Database: EnclosedInStringProperty ///

The name of the table in the database to read from.

@required Table: EnclosedInStringProperty ///

Specifies additional connection options.

AdditionalHudiOptions: AdditionalOptions ///

Specifies the data schema for the Hudi source.

OutputSchemas: GlueSchemas } ///

A structure containing migration status information.

structure CatalogImportStatus { ///

/// True if the migration has completed, or False otherwise.

ImportCompleted: Boolean = false ///

The time that the migration was started.

ImportTime: Timestamp ///

The name of the person who initiated the migration.

ImportedBy: NameString } ///

Specifies an Apache Kafka data store in the Data Catalog.

structure CatalogKafkaSource { ///

The name of the data store.

@required Name: NodeName ///

The amount of time to spend processing each micro batch.

WindowSize: BoxedPositiveInt ///

Whether to automatically determine the schema from the incoming data.

DetectSchema: BoxedBoolean ///

The name of the table in the database to read from.

@required Table: EnclosedInStringProperty ///

The name of the database to read from.

@required Database: EnclosedInStringProperty ///

Specifies the streaming options.

StreamingOptions: KafkaStreamingSourceOptions ///

Specifies options related to data preview for viewing a sample of your data.

DataPreviewOptions: StreamingDataPreviewOptions } ///

Specifies a Kinesis data source in the Glue Data Catalog.

structure CatalogKinesisSource { ///

The name of the data source.

@required Name: NodeName ///

The amount of time to spend processing each micro batch.

WindowSize: BoxedPositiveInt ///

Whether to automatically determine the schema from the incoming data.

DetectSchema: BoxedBoolean ///

The name of the table in the database to read from.

@required Table: EnclosedInStringProperty ///

The name of the database to read from.

@required Database: EnclosedInStringProperty ///

Additional options for the Kinesis streaming data source.

StreamingOptions: KinesisStreamingSourceOptions ///

Additional options for data preview.

DataPreviewOptions: StreamingDataPreviewOptions } ///

A policy that specifies update behavior for the crawler.

structure CatalogSchemaChangePolicy { ///

Whether to use the specified update behavior when the crawler finds a changed schema.

EnableUpdateCatalog: BoxedBoolean ///

The update behavior when the crawler finds a changed schema.

UpdateBehavior: UpdateCatalogBehavior } ///

Specifies a data store in the Glue Data Catalog.

structure CatalogSource { ///

The name of the data store.

@required Name: NodeName ///

The name of the database to read from.

@required Database: EnclosedInStringProperty ///

The name of the table in the database to read from.

@required Table: EnclosedInStringProperty } ///

Specifies an Glue Data Catalog target.

structure CatalogTarget { ///

The name of the database to be synchronized.

@required DatabaseName: NameString ///

A list of the tables to be synchronized.

@required Tables: CatalogTablesList ///

The name of the connection for an Amazon S3-backed Data Catalog table to be a target of the crawl when using a Catalog connection type paired with a NETWORK Connection type.

ConnectionName: ConnectionName ///

A valid Amazon SQS ARN. For example, arn:aws:sqs:region:account:sqs.

EventQueueArn: EventQueueArn ///

A valid Amazon dead-letter SQS ARN. For example, arn:aws:sqs:region:account:deadLetterQueue.

DlqEventQueueArn: EventQueueArn } @output structure CheckSchemaVersionValidityResponse { ///

Return true, if the schema is valid and false otherwise.

Valid: IsVersionValid = false ///

A validation failure error message.

Error: SchemaValidationError } ///

Classifiers are triggered during a crawl task. A classifier checks whether a given file is /// in a format it can handle. If it is, the classifier creates a schema in the form of a /// StructType object that matches that data format.

///

You can use the standard classifiers that Glue provides, or you can write your own /// classifiers to best categorize your data sources and specify the appropriate schemas to use /// for them. A classifier can be a grok classifier, an XML classifier, /// a JSON classifier, or a custom CSV classifier, as specified in one /// of the fields in the Classifier object.

structure Classifier { ///

A classifier that uses grok.

GrokClassifier: GrokClassifier ///

A classifier for XML content.

XMLClassifier: XMLClassifier ///

A classifier for JSON content.

JsonClassifier: JsonClassifier ///

A classifier for comma-separated values (CSV).

CsvClassifier: CsvClassifier } ///

Specifies how Amazon CloudWatch data should be encrypted.

structure CloudWatchEncryption { ///

The encryption mode to use for CloudWatch data.

CloudWatchEncryptionMode: CloudWatchEncryptionMode ///

The Amazon Resource Name (ARN) of the KMS key to be used to encrypt the data.

KmsKeyArn: KmsKeyArn } ///

/// CodeGenConfigurationNode enumerates all valid Node types. One and only one of its member variables can be populated.

structure CodeGenConfigurationNode { ///

Specifies a connector to an Amazon Athena data source.

AthenaConnectorSource: AthenaConnectorSource ///

Specifies a connector to a JDBC data source.

JDBCConnectorSource: JDBCConnectorSource ///

Specifies a connector to an Apache Spark data source.

SparkConnectorSource: SparkConnectorSource ///

Specifies a data store in the Glue Data Catalog.

CatalogSource: CatalogSource ///

Specifies an Amazon Redshift data store.

RedshiftSource: RedshiftSource ///

Specifies an Amazon S3 data store in the Glue Data Catalog.

S3CatalogSource: S3CatalogSource ///

Specifies a command-separated value (CSV) data store stored in Amazon S3.

S3CsvSource: S3CsvSource ///

Specifies a JSON data store stored in Amazon S3.

S3JsonSource: S3JsonSource ///

Specifies an Apache Parquet data store stored in Amazon S3.

S3ParquetSource: S3ParquetSource ///

Specifies a relational catalog data store in the Glue Data Catalog.

RelationalCatalogSource: RelationalCatalogSource ///

Specifies a DynamoDBC Catalog data store in the Glue Data Catalog.

DynamoDBCatalogSource: DynamoDBCatalogSource ///

Specifies a data target that writes to Amazon S3 in Apache Parquet columnar storage.

JDBCConnectorTarget: JDBCConnectorTarget ///

Specifies a target that uses an Apache Spark connector.

SparkConnectorTarget: SparkConnectorTarget ///

Specifies a target that uses a Glue Data Catalog table.

CatalogTarget: BasicCatalogTarget ///

Specifies a target that uses Amazon Redshift.

RedshiftTarget: RedshiftTarget ///

Specifies a data target that writes to Amazon S3 using the Glue Data Catalog.

S3CatalogTarget: S3CatalogTarget ///

Specifies a data target that writes to Amazon S3 in Apache Parquet columnar storage.

S3GlueParquetTarget: S3GlueParquetTarget ///

Specifies a data target that writes to Amazon S3.

S3DirectTarget: S3DirectTarget ///

Specifies a transform that maps data property keys in the data source to data property keys in the data target. You can rename keys, modify the data types for keys, and choose which keys to drop from the dataset.

ApplyMapping: ApplyMapping ///

Specifies a transform that chooses the data property keys that you want to keep.

SelectFields: SelectFields ///

Specifies a transform that chooses the data property keys that you want to drop.

DropFields: DropFields ///

Specifies a transform that renames a single data property key.

RenameField: RenameField ///

Specifies a transform that writes samples of the data to an Amazon S3 bucket.

Spigot: Spigot ///

Specifies a transform that joins two datasets into one dataset using a comparison phrase on the specified data property keys. You can use inner, outer, left, right, left semi, and left anti joins.

Join: Join ///

Specifies a transform that splits data property keys into two DynamicFrames. The output is a collection of DynamicFrames: one with selected data property keys, and one with the remaining data property keys.

SplitFields: SplitFields ///

Specifies a transform that chooses one DynamicFrame from a collection of DynamicFrames. The output is the selected DynamicFrame ///

SelectFromCollection: SelectFromCollection ///

Specifies a transform that locates records in the dataset that have missing values and adds a new field with a value determined by imputation. The input data set is used to train the machine learning model that determines what the missing value should be.

FillMissingValues: FillMissingValues ///

Specifies a transform that splits a dataset into two, based on a filter condition.

Filter: Filter ///

Specifies a transform that uses custom code you provide to perform the data transformation. The output is a collection of DynamicFrames.

CustomCode: CustomCode ///

Specifies a transform where you enter a SQL query using Spark SQL syntax to transform the data. The output is a single DynamicFrame.

SparkSQL: SparkSQL ///

Specifies a direct Amazon Kinesis data source.

DirectKinesisSource: DirectKinesisSource ///

Specifies an Apache Kafka data store.

DirectKafkaSource: DirectKafkaSource ///

Specifies a Kinesis data source in the Glue Data Catalog.

CatalogKinesisSource: CatalogKinesisSource ///

Specifies an Apache Kafka data store in the Data Catalog.

CatalogKafkaSource: CatalogKafkaSource ///

Specifies a transform that removes columns from the dataset if all values in the column are 'null'. By default, Glue Studio will recognize null objects, but some values such as empty strings, strings that are "null", -1 integers or other placeholders such as zeros, are not automatically recognized as nulls.

DropNullFields: DropNullFields ///

Specifies a transform that merges a DynamicFrame with a staging DynamicFrame based on the specified primary keys to identify records. Duplicate records (records with the same primary keys) are not de-duplicated.

Merge: Merge ///

Specifies a transform that combines the rows from two or more datasets into a single result.

Union: Union ///

Specifies a transform that identifies, removes or masks PII data.

PIIDetection: PIIDetection ///

Specifies a transform that groups rows by chosen fields and computes the aggregated value by specified function.

Aggregate: Aggregate ///

Specifies a transform that removes rows of repeating data from a data set.

DropDuplicates: DropDuplicates ///

Specifies a data target that writes to a goverened catalog.

GovernedCatalogTarget: GovernedCatalogTarget ///

Specifies a data source in a goverened Data Catalog.

GovernedCatalogSource: GovernedCatalogSource ///

Specifies a Microsoft SQL server data source in the Glue Data Catalog.

MicrosoftSQLServerCatalogSource: MicrosoftSQLServerCatalogSource ///

Specifies a MySQL data source in the Glue Data Catalog.

MySQLCatalogSource: MySQLCatalogSource ///

Specifies an Oracle data source in the Glue Data Catalog.

OracleSQLCatalogSource: OracleSQLCatalogSource ///

Specifies a PostgresSQL data source in the Glue Data Catalog.

PostgreSQLCatalogSource: PostgreSQLCatalogSource ///

Specifies a target that uses Microsoft SQL.

MicrosoftSQLServerCatalogTarget: MicrosoftSQLServerCatalogTarget ///

Specifies a target that uses MySQL.

MySQLCatalogTarget: MySQLCatalogTarget ///

Specifies a target that uses Oracle SQL.

OracleSQLCatalogTarget: OracleSQLCatalogTarget ///

Specifies a target that uses Postgres SQL.

PostgreSQLCatalogTarget: PostgreSQLCatalogTarget ///

Specifies a custom visual transform created by a user.

DynamicTransform: DynamicTransform ///

Specifies your data quality evaluation criteria.

EvaluateDataQuality: EvaluateDataQuality ///

Specifies a Hudi data source that is registered in the Glue Data Catalog. The data source must be stored in Amazon S3.

S3CatalogHudiSource: S3CatalogHudiSource ///

Specifies a Hudi data source that is registered in the Glue Data Catalog.

CatalogHudiSource: CatalogHudiSource ///

Specifies a Hudi data source stored in Amazon S3.

S3HudiSource: S3HudiSource ///

Specifies a target that writes to a Hudi data source in the Glue Data Catalog.

S3HudiCatalogTarget: S3HudiCatalogTarget ///

Specifies a target that writes to a Hudi data source in Amazon S3.

S3HudiDirectTarget: S3HudiDirectTarget DirectJDBCSource: DirectJDBCSource ///

Specifies a Delta Lake data source that is registered in the Glue Data Catalog. The data source must be stored in Amazon S3.

S3CatalogDeltaSource: S3CatalogDeltaSource ///

Specifies a Delta Lake data source that is registered in the Glue Data Catalog.

CatalogDeltaSource: CatalogDeltaSource ///

Specifies a Delta Lake data source stored in Amazon S3.

S3DeltaSource: S3DeltaSource ///

Specifies a target that writes to a Delta Lake data source in the Glue Data Catalog.

S3DeltaCatalogTarget: S3DeltaCatalogTarget ///

Specifies a target that writes to a Delta Lake data source in Amazon S3.

S3DeltaDirectTarget: S3DeltaDirectTarget ///

Specifies a target that writes to a data source in Amazon Redshift.

AmazonRedshiftSource: AmazonRedshiftSource ///

Specifies a target that writes to a data target in Amazon Redshift.

AmazonRedshiftTarget: AmazonRedshiftTarget ///

Specifies your data quality evaluation criteria. Allows multiple input data and returns a collection of Dynamic Frames.

EvaluateDataQualityMultiFrame: EvaluateDataQualityMultiFrame ///

Specifies a Glue DataBrew recipe node.

Recipe: Recipe ///

Specifies a Snowflake data source.

SnowflakeSource: SnowflakeSource ///

Specifies a target that writes to a Snowflake data source.

SnowflakeTarget: SnowflakeTarget } ///

Represents a directional edge in a directed acyclic graph (DAG).

structure CodeGenEdge { ///

The ID of the node at which the edge starts.

@required Source: CodeGenIdentifier ///

The ID of the node at which the edge ends.

@required Target: CodeGenIdentifier ///

The target of the edge.

TargetParameter: CodeGenArgName } ///

Represents a node in a directed acyclic graph (DAG)

structure CodeGenNode { ///

A node identifier that is unique within the node's graph.

@required Id: CodeGenIdentifier ///

The type of node that this is.

@required NodeType: CodeGenNodeType ///

Properties of the node, in the form of name-value pairs.

@required Args: CodeGenNodeArgs ///

The line number of the node.

LineNumber: Integer = 0 } ///

An argument or property of a node.

structure CodeGenNodeArg { ///

The name of the argument or property.

@required Name: CodeGenArgName ///

The value of the argument or property.

@required Value: CodeGenArgValue ///

True if the value is used as a parameter.

Param: Boolean = false } ///

A column in a Table.

structure Column { ///

The name of the Column.

@required Name: NameString ///

The data type of the Column.

Type: ColumnTypeString ///

A free-form text comment.

Comment: CommentString ///

These key-value pairs define properties associated with the column.

Parameters: ParametersMap } ///

Encapsulates a column name that failed and the reason for failure.

structure ColumnError { ///

The name of the column that failed.

ColumnName: NameString ///

An error message with the reason for the failure of an operation.

Error: ErrorDetail } ///

A structure containing the column name and column importance score for a column.

///

Column importance helps you understand how columns contribute to your model, by identifying which columns in your records are more important than others.

structure ColumnImportance { ///

The name of a column.

ColumnName: NameString ///

The column importance score for the column, as a decimal.

Importance: GenericBoundedDouble } ///

A filter that uses both column-level and row-level filtering.

structure ColumnRowFilter { ///

A string containing the name of the column.

ColumnName: NameString ///

A string containing the row-level filter expression.

RowFilterExpression: PredicateString } ///

Represents the generated column-level statistics for a table or partition.

structure ColumnStatistics { ///

Name of column which statistics belong to.

@required ColumnName: NameString ///

The data type of the column.

@required ColumnType: TypeString ///

The timestamp of when column statistics were generated.

@required AnalyzedTime: Timestamp ///

A ColumnStatisticData object that contains the statistics data values.

@required StatisticsData: ColumnStatisticsData } ///

Contains the individual types of column statistics data. Only one data object should be set and indicated by the Type attribute.

structure ColumnStatisticsData { ///

The type of column statistics data.

@required Type: ColumnStatisticsType ///

Boolean column statistics data.

BooleanColumnStatisticsData: BooleanColumnStatisticsData ///

Date column statistics data.

DateColumnStatisticsData: DateColumnStatisticsData ///

/// Decimal column statistics data. UnscaledValues within are Base64-encoded /// binary objects storing big-endian, two's complement representations of /// the decimal's unscaled value. ///

DecimalColumnStatisticsData: DecimalColumnStatisticsData ///

Double column statistics data.

DoubleColumnStatisticsData: DoubleColumnStatisticsData ///

Long column statistics data.

LongColumnStatisticsData: LongColumnStatisticsData ///

String column statistics data.

StringColumnStatisticsData: StringColumnStatisticsData ///

Binary column statistics data.

BinaryColumnStatisticsData: BinaryColumnStatisticsData } ///

Encapsulates a ColumnStatistics object that failed and the reason for failure.

structure ColumnStatisticsError { ///

The ColumnStatistics of the column.

ColumnStatistics: ColumnStatistics ///

An error message with the reason for the failure of an operation.

Error: ErrorDetail } ///

Two processes are trying to modify a resource simultaneously.

@error("client") structure ConcurrentModificationException { ///

A message describing the problem.

Message: MessageString } ///

Too many jobs are being run concurrently.

@error("client") structure ConcurrentRunsExceededException { ///

A message describing the problem.

Message: MessageString } ///

Defines a condition under which a trigger fires.

structure Condition { ///

A logical operator.

LogicalOperator: LogicalOperator ///

The name of the job whose JobRuns this condition applies to, and on which /// this trigger waits.

JobName: NameString ///

The condition state. Currently, the only job states that a trigger can listen for are SUCCEEDED, STOPPED, FAILED, and TIMEOUT. The only crawler states that a trigger can listen for are SUCCEEDED, FAILED, and CANCELLED.

State: JobRunState ///

The name of the crawler to which this condition applies.

CrawlerName: NameString ///

The state of the crawler to which this condition applies.

CrawlState: CrawlState } ///

A specified condition was not satisfied.

@error("client") structure ConditionCheckFailureException { ///

A message describing the problem.

Message: MessageString } ///

The CreatePartitions API was called on a table that has indexes enabled.

@error("client") structure ConflictException { ///

A message describing the problem.

Message: MessageString } ///

The confusion matrix shows you what your transform is predicting accurately and what types of errors it is making.

///

For more information, see Confusion matrix in Wikipedia.

structure ConfusionMatrix { ///

The number of matches in the data that the transform correctly found, in the confusion matrix for your transform.

NumTruePositives: RecordsCount ///

The number of nonmatches in the data that the transform incorrectly classified as a match, /// in the confusion matrix for your transform.

NumFalsePositives: RecordsCount ///

The number of nonmatches in the data that the transform correctly rejected, in the /// confusion matrix for your transform.

NumTrueNegatives: RecordsCount ///

The number of matches in the data that the transform didn't find, in the confusion matrix for your transform.

NumFalseNegatives: RecordsCount } ///

Defines a connection to a data source.

structure Connection { ///

The name of the connection definition.

Name: NameString ///

The description of the connection.

Description: DescriptionString ///

The type of the connection. Currently, SFTP is not supported.

ConnectionType: ConnectionType ///

A list of criteria that can be used in selecting this connection.

MatchCriteria: MatchCriteria ///

These key-value pairs define parameters for the connection:

///
    ///
  • ///

    /// HOST - The host URI: either the /// fully qualified domain name (FQDN) or the IPv4 address of /// the database host.

    ///
  • ///
  • ///

    /// PORT - The port number, between /// 1024 and 65535, of the port on which the database host is /// listening for database connections.

    ///
  • ///
  • ///

    /// USER_NAME - The name under which /// to log in to the database. The value string for USER_NAME is "USERNAME".

    ///
  • ///
  • ///

    /// PASSWORD - A password, /// if one is used, for the user name.

    ///
  • ///
  • ///

    /// ENCRYPTED_PASSWORD - When you enable connection password protection by setting ConnectionPasswordEncryption in the Data Catalog encryption settings, this field stores the encrypted password.

    ///
  • ///
  • ///

    /// JDBC_DRIVER_JAR_URI - The Amazon Simple Storage Service (Amazon S3) path of the /// JAR file that contains the JDBC driver to use.

    ///
  • ///
  • ///

    /// JDBC_DRIVER_CLASS_NAME - The class name of the JDBC driver to use.

    ///
  • ///
  • ///

    /// JDBC_ENGINE - The name of the JDBC engine to use.

    ///
  • ///
  • ///

    /// JDBC_ENGINE_VERSION - The version of the JDBC engine to use.

    ///
  • ///
  • ///

    /// CONFIG_FILES - (Reserved for future use.)

    ///
  • ///
  • ///

    /// INSTANCE_ID - The instance ID to use.

    ///
  • ///
  • ///

    /// JDBC_CONNECTION_URL - The URL for connecting to a JDBC data source.

    ///
  • ///
  • ///

    /// JDBC_ENFORCE_SSL - A Boolean string (true, false) specifying whether Secure /// Sockets Layer (SSL) with hostname matching is enforced for the JDBC connection on the /// client. The default is false.

    ///
  • ///
  • ///

    /// CUSTOM_JDBC_CERT - An Amazon S3 location specifying the customer's root certificate. Glue uses this root certificate to validate the customer’s certificate when connecting to the customer database. Glue only handles X.509 certificates. The certificate provided must be DER-encoded and supplied in Base64 encoding PEM format.

    ///
  • ///
  • ///

    /// SKIP_CUSTOM_JDBC_CERT_VALIDATION - By default, this is false. Glue validates the Signature algorithm and Subject Public Key Algorithm for the customer certificate. The only permitted algorithms for the Signature algorithm are SHA256withRSA, SHA384withRSA or SHA512withRSA. For the Subject Public Key Algorithm, the key length must be at least 2048. You can set the value of this property to true to skip Glue’s validation of the customer certificate.

    ///
  • ///
  • ///

    /// CUSTOM_JDBC_CERT_STRING - A custom JDBC certificate string which is used for domain match or distinguished name match to prevent a man-in-the-middle attack. In Oracle database, this is used as the SSL_SERVER_CERT_DN; in Microsoft SQL Server, this is used as the hostNameInCertificate.

    ///
  • ///
  • ///

    /// CONNECTION_URL - The URL for connecting to a general (non-JDBC) data source.

    ///
  • ///
  • ///

    /// SECRET_ID - The secret ID used for the secret manager of credentials.

    ///
  • ///
  • ///

    /// CONNECTOR_URL - The connector URL for a MARKETPLACE or CUSTOM connection.

    ///
  • ///
  • ///

    /// CONNECTOR_TYPE - The connector type for a MARKETPLACE or CUSTOM connection.

    ///
  • ///
  • ///

    /// CONNECTOR_CLASS_NAME - The connector class name for a MARKETPLACE or CUSTOM connection.

    ///
  • ///
  • ///

    /// KAFKA_BOOTSTRAP_SERVERS - A comma-separated list of host and port pairs that are the addresses of the Apache Kafka brokers in a Kafka cluster to which a Kafka client will connect to and bootstrap itself.

    ///
  • ///
  • ///

    /// KAFKA_SSL_ENABLED - Whether to enable or disable SSL on an Apache Kafka connection. Default value is "true".

    ///
  • ///
  • ///

    /// KAFKA_CUSTOM_CERT - The Amazon S3 URL for the private CA cert file (.pem format). The default is an empty string.

    ///
  • ///
  • ///

    /// KAFKA_SKIP_CUSTOM_CERT_VALIDATION - Whether to skip the validation of the CA cert file or not. Glue validates for three algorithms: SHA256withRSA, SHA384withRSA and SHA512withRSA. Default value is "false".

    ///
  • ///
  • ///

    /// KAFKA_CLIENT_KEYSTORE - The Amazon S3 location of the client keystore file for Kafka client side authentication (Optional).

    ///
  • ///
  • ///

    /// KAFKA_CLIENT_KEYSTORE_PASSWORD - The password to access the provided keystore (Optional).

    ///
  • ///
  • ///

    /// KAFKA_CLIENT_KEY_PASSWORD - A keystore can consist of multiple keys, so this is the password to access the client key to be used with the Kafka server side key (Optional).

    ///
  • ///
  • ///

    /// ENCRYPTED_KAFKA_CLIENT_KEYSTORE_PASSWORD - The encrypted version of the Kafka client keystore password (if the user has the Glue encrypt passwords setting selected).

    ///
  • ///
  • ///

    /// ENCRYPTED_KAFKA_CLIENT_KEY_PASSWORD - The encrypted version of the Kafka client key password (if the user has the Glue encrypt passwords setting selected).

    ///
  • ///
  • ///

    /// KAFKA_SASL_MECHANISM - "SCRAM-SHA-512", "GSSAPI", or "AWS_MSK_IAM". These are the supported SASL Mechanisms.

    ///
  • ///
  • ///

    /// KAFKA_SASL_SCRAM_USERNAME - A plaintext username used to authenticate with the "SCRAM-SHA-512" mechanism.

    ///
  • ///
  • ///

    /// KAFKA_SASL_SCRAM_PASSWORD - A plaintext password used to authenticate with the "SCRAM-SHA-512" mechanism.

    ///
  • ///
  • ///

    /// ENCRYPTED_KAFKA_SASL_SCRAM_PASSWORD - The encrypted version of the Kafka SASL SCRAM password (if the user has the Glue encrypt passwords setting selected).

    ///
  • ///
  • ///

    /// KAFKA_SASL_SCRAM_SECRETS_ARN - The Amazon Resource Name of a secret in Amazon Web Services Secrets Manager.

    ///
  • ///
  • ///

    /// KAFKA_SASL_GSSAPI_KEYTAB - The S3 location of a Kerberos keytab file. A keytab stores long-term keys for one or more principals. For more information, see MIT Kerberos Documentation: Keytab.

    ///
  • ///
  • ///

    /// KAFKA_SASL_GSSAPI_KRB5_CONF - The S3 location of a Kerberos krb5.conf file. A krb5.conf stores Kerberos configuration information, such as the location of the KDC server. For more information, see MIT Kerberos Documentation: krb5.conf.

    ///
  • ///
  • ///

    /// KAFKA_SASL_GSSAPI_SERVICE - The Kerberos service name, as set with sasl.kerberos.service.name in your Kafka Configuration.

    ///
  • ///
  • ///

    /// KAFKA_SASL_GSSAPI_PRINCIPAL - The name of the Kerberos princial used by Glue. For more information, see Kafka Documentation: Configuring Kafka Brokers.

    ///
  • ///
ConnectionProperties: ConnectionProperties ///

A map of physical connection requirements, such as virtual private cloud (VPC) and /// SecurityGroup, that are needed to make this connection successfully.

PhysicalConnectionRequirements: PhysicalConnectionRequirements ///

The time that this connection definition was created.

CreationTime: Timestamp ///

The last time that this connection definition was updated.

LastUpdatedTime: Timestamp ///

The user, group, or role that last updated this connection definition.

LastUpdatedBy: NameString } ///

A structure that is used to specify a connection to create or update.

structure ConnectionInput { ///

The name of the connection. Connection will not function as expected without a name.

@required Name: NameString ///

The description of the connection.

Description: DescriptionString ///

The type of the connection. Currently, these types are supported:

///
    ///
  • ///

    /// JDBC - Designates a connection to a database through Java Database Connectivity (JDBC).

    ///

    /// JDBC Connections use the following ConnectionParameters.

    ///
      ///
    • ///

      Required: All of (HOST, PORT, JDBC_ENGINE) or JDBC_CONNECTION_URL.

      ///
    • ///
    • ///

      Required: All of (USERNAME, PASSWORD) or SECRET_ID.

      ///
    • ///
    • ///

      Optional: JDBC_ENFORCE_SSL, CUSTOM_JDBC_CERT, CUSTOM_JDBC_CERT_STRING, SKIP_CUSTOM_JDBC_CERT_VALIDATION. These parameters are used to configure SSL with JDBC.

      ///
    • ///
    ///
  • ///
  • ///

    /// KAFKA - Designates a connection to an Apache Kafka streaming platform.

    ///

    /// KAFKA Connections use the following ConnectionParameters.

    ///
      ///
    • ///

      Required: KAFKA_BOOTSTRAP_SERVERS.

      ///
    • ///
    • ///

      Optional: KAFKA_SSL_ENABLED, KAFKA_CUSTOM_CERT, KAFKA_SKIP_CUSTOM_CERT_VALIDATION. These parameters are used to configure SSL with KAFKA.

      ///
    • ///
    • ///

      Optional: KAFKA_CLIENT_KEYSTORE, KAFKA_CLIENT_KEYSTORE_PASSWORD, KAFKA_CLIENT_KEY_PASSWORD, ENCRYPTED_KAFKA_CLIENT_KEYSTORE_PASSWORD, ENCRYPTED_KAFKA_CLIENT_KEY_PASSWORD. These parameters are used to configure TLS client configuration with SSL in KAFKA.

      ///
    • ///
    • ///

      Optional: KAFKA_SASL_MECHANISM. Can be specified as SCRAM-SHA-512, GSSAPI, or AWS_MSK_IAM.

      ///
    • ///
    • ///

      Optional: KAFKA_SASL_SCRAM_USERNAME, KAFKA_SASL_SCRAM_PASSWORD, ENCRYPTED_KAFKA_SASL_SCRAM_PASSWORD. These parameters are used to configure SASL/SCRAM-SHA-512 authentication with KAFKA.

      ///
    • ///
    • ///

      Optional: KAFKA_SASL_GSSAPI_KEYTAB, KAFKA_SASL_GSSAPI_KRB5_CONF, KAFKA_SASL_GSSAPI_SERVICE, KAFKA_SASL_GSSAPI_PRINCIPAL. These parameters are used to configure SASL/GSSAPI authentication with KAFKA.

      ///
    • ///
    ///
  • ///
  • ///

    /// MONGODB - Designates a connection to a MongoDB document database.

    ///

    /// MONGODB Connections use the following ConnectionParameters.

    ///
      ///
    • ///

      Required: CONNECTION_URL.

      ///
    • ///
    • ///

      Required: All of (USERNAME, PASSWORD) or SECRET_ID.

      ///
    • ///
    ///
  • ///
  • ///

    /// NETWORK - Designates a network connection to a data source within an Amazon Virtual Private Cloud environment (Amazon VPC).

    ///

    /// NETWORK Connections do not require ConnectionParameters. Instead, provide a PhysicalConnectionRequirements.

    ///
  • ///
  • ///

    /// MARKETPLACE - Uses configuration settings contained in a connector purchased from Amazon Web Services Marketplace to read from and write to data stores that are not natively supported by Glue.

    ///

    /// MARKETPLACE Connections use the following ConnectionParameters.

    ///
      ///
    • ///

      Required: CONNECTOR_TYPE, CONNECTOR_URL, CONNECTOR_CLASS_NAME, CONNECTION_URL.

      ///
    • ///
    • ///

      Required for JDBC /// CONNECTOR_TYPE connections: All of (USERNAME, PASSWORD) or SECRET_ID.

      ///
    • ///
    ///
  • ///
  • ///

    /// CUSTOM - Uses configuration settings contained in a custom connector to read from and write to data stores that are not natively supported by Glue.

    ///
  • ///
///

/// SFTP is not supported.

///

For more information about how optional ConnectionProperties are used to configure features in Glue, consult Glue connection properties.

///

For more information about how optional ConnectionProperties are used to configure features in Glue Studio, consult Using connectors and connections.

@required ConnectionType: ConnectionType ///

A list of criteria that can be used in selecting this connection.

MatchCriteria: MatchCriteria ///

These key-value pairs define parameters for the connection.

@required ConnectionProperties: ConnectionProperties ///

A map of physical connection requirements, such as virtual private cloud (VPC) and /// SecurityGroup, that are needed to successfully make this connection.

PhysicalConnectionRequirements: PhysicalConnectionRequirements } ///

The data structure used by the Data Catalog to encrypt the password as part of /// CreateConnection or UpdateConnection and store it in the /// ENCRYPTED_PASSWORD field in the connection properties. You can enable catalog /// encryption or only password encryption.

///

When a CreationConnection request arrives containing a password, the Data /// Catalog first encrypts the password using your KMS key. It then encrypts the whole /// connection object again if catalog encryption is also enabled.

///

This encryption requires that you set KMS key permissions to enable or restrict access /// on the password key according to your security requirements. For example, you might want only /// administrators to have decrypt permission on the password key.

structure ConnectionPasswordEncryption { ///

When the ReturnConnectionPasswordEncrypted flag is set to "true", passwords remain encrypted in the responses of GetConnection and GetConnections. This encryption takes effect independently from catalog encryption.

@required ReturnConnectionPasswordEncrypted: Boolean = false ///

An KMS key that is used to encrypt the connection password.

///

If connection password protection is enabled, the caller of CreateConnection /// and UpdateConnection needs at least kms:Encrypt permission on the /// specified KMS key, to encrypt passwords before storing them in the Data Catalog.

///

You can set the decrypt permission to enable or restrict access on the password key according to your security requirements.

AwsKmsKeyId: NameString } ///

Specifies the connections used by a job.

structure ConnectionsList { ///

A list of connections used by the job.

Connections: OrchestrationStringList } ///

The details of a crawl in the workflow.

structure Crawl { ///

The state of the crawler.

State: CrawlState ///

The date and time on which the crawl started.

StartedOn: TimestampValue ///

The date and time on which the crawl completed.

CompletedOn: TimestampValue ///

The error message associated with the crawl.

ErrorMessage: DescriptionString ///

The log group associated with the crawl.

LogGroup: LogGroup ///

The log stream associated with the crawl.

LogStream: LogStream } ///

Specifies a crawler program that examines a data source and uses classifiers to try to /// determine its schema. If successful, the crawler records metadata concerning the data source /// in the Glue Data Catalog.

structure Crawler { ///

The name of the crawler.

Name: NameString ///

The Amazon Resource Name (ARN) of an IAM role that's used to access customer resources, /// such as Amazon Simple Storage Service (Amazon S3) data.

Role: Role ///

A collection of targets to crawl.

Targets: CrawlerTargets ///

The name of the database in which the crawler's output is stored.

DatabaseName: DatabaseName ///

A description of the crawler.

Description: DescriptionString ///

A list of UTF-8 strings that specify the custom classifiers that are associated /// with the crawler.

Classifiers: ClassifierNameList ///

A policy that specifies whether to crawl the entire dataset again, or to crawl only folders that were added since the last crawler run.

RecrawlPolicy: RecrawlPolicy ///

The policy that specifies update and delete behaviors for the crawler.

SchemaChangePolicy: SchemaChangePolicy ///

A configuration that specifies whether data lineage is enabled for the crawler.

LineageConfiguration: LineageConfiguration ///

Indicates whether the crawler is running, or whether a run is pending.

State: CrawlerState ///

The prefix added to the names of tables that are created.

TablePrefix: TablePrefix ///

For scheduled crawlers, the schedule when the crawler runs.

Schedule: Schedule ///

If the crawler is running, contains the total time elapsed since the last crawl /// began.

CrawlElapsedTime: MillisecondsCount = 0 ///

The time that the crawler was created.

CreationTime: Timestamp ///

The time that the crawler was last updated.

LastUpdated: Timestamp ///

The status of the last crawl, and potentially error information if /// an error occurred.

LastCrawl: LastCrawlInfo ///

The version of the crawler.

Version: VersionId = 0 ///

Crawler configuration information. This versioned JSON string allows users to specify /// aspects of a crawler's behavior. For more information, see Setting crawler configuration options.

Configuration: CrawlerConfiguration ///

The name of the SecurityConfiguration structure to be used by this /// crawler.

CrawlerSecurityConfiguration: CrawlerSecurityConfiguration ///

Specifies whether the crawler should use Lake Formation credentials for the crawler instead of the IAM role credentials.

LakeFormationConfiguration: LakeFormationConfiguration } ///

Contains the information for a run of a crawler.

structure CrawlerHistory { ///

A UUID identifier for each crawl.

CrawlId: CrawlId ///

The state of the crawl.

State: CrawlerHistoryState ///

The date and time on which the crawl started.

StartTime: Timestamp ///

The date and time on which the crawl ended.

EndTime: Timestamp ///

A run summary for the specific crawl in JSON. Contains the catalog tables and partitions that were added, updated, or deleted.

Summary: NameString ///

If an error occurred, the error message associated with the crawl.

ErrorMessage: DescriptionString ///

The log group associated with the crawl.

LogGroup: LogGroup ///

The log stream associated with the crawl.

LogStream: LogStream ///

The prefix for a CloudWatch message about this crawl.

MessagePrefix: MessagePrefix ///

The number of data processing units (DPU) used in hours for the crawl.

DPUHour: NonNegativeDouble = 0 } ///

Metrics for a specified crawler.

structure CrawlerMetrics { ///

The name of the crawler.

CrawlerName: NameString ///

The estimated time left to complete a running crawl.

TimeLeftSeconds: NonNegativeDouble = 0 ///

True if the crawler is still estimating how long it will take to complete this run.

StillEstimating: Boolean = false ///

The duration of the crawler's most recent run, in seconds.

LastRuntimeSeconds: NonNegativeDouble = 0 ///

The median duration of this crawler's runs, in seconds.

MedianRuntimeSeconds: NonNegativeDouble = 0 ///

The number of tables created by this crawler.

TablesCreated: NonNegativeInteger = 0 ///

The number of tables updated by this crawler.

TablesUpdated: NonNegativeInteger = 0 ///

The number of tables deleted by this crawler.

TablesDeleted: NonNegativeInteger = 0 } ///

The details of a Crawler node present in the workflow.

structure CrawlerNodeDetails { ///

A list of crawls represented by the crawl node.

Crawls: CrawlList } ///

The specified crawler is not running.

@error("client") structure CrawlerNotRunningException { ///

A message describing the problem.

Message: MessageString } ///

The operation cannot be performed because the crawler is already running.

@error("client") structure CrawlerRunningException { ///

A message describing the problem.

Message: MessageString } ///

The specified crawler is stopping.

@error("client") structure CrawlerStoppingException { ///

A message describing the problem.

Message: MessageString } ///

Specifies data stores to crawl.

structure CrawlerTargets { ///

Specifies Amazon Simple Storage Service (Amazon S3) targets.

S3Targets: S3TargetList ///

Specifies JDBC targets.

JdbcTargets: JdbcTargetList ///

Specifies Amazon DocumentDB or MongoDB targets.

MongoDBTargets: MongoDBTargetList ///

Specifies Amazon DynamoDB targets.

DynamoDBTargets: DynamoDBTargetList ///

Specifies Glue Data Catalog targets.

CatalogTargets: CatalogTargetList ///

Specifies Delta data store targets.

DeltaTargets: DeltaTargetList ///

Specifies Apache Iceberg data store targets.

IcebergTargets: IcebergTargetList ///

Specifies Apache Hudi data store targets.

HudiTargets: HudiTargetList } ///

A list of fields, comparators and value that you can use to filter the crawler runs for a specified crawler.

structure CrawlsFilter { ///

A key used to filter the crawler runs for a specified crawler. Valid values for each of the field names are:

///
    ///
  • ///

    /// CRAWL_ID: A string representing the UUID identifier for a crawl.

    ///
  • ///
  • ///

    /// STATE: A string representing the state of the crawl.

    ///
  • ///
  • ///

    /// START_TIME and END_TIME: The epoch timestamp in milliseconds.

    ///
  • ///
  • ///

    /// DPU_HOUR: The number of data processing unit (DPU) hours used for the crawl.

    ///
  • ///
FieldName: FieldName ///

A defined comparator that operates on the value. The available operators are:

///
    ///
  • ///

    /// GT: Greater than.

    ///
  • ///
  • ///

    /// GE: Greater than or equal to.

    ///
  • ///
  • ///

    /// LT: Less than.

    ///
  • ///
  • ///

    /// LE: Less than or equal to.

    ///
  • ///
  • ///

    /// EQ: Equal to.

    ///
  • ///
  • ///

    /// NE: Not equal to.

    ///
  • ///
FilterOperator: FilterOperator ///

The value provided for comparison on the crawl field.

FieldValue: GenericString } @input structure CreateBlueprintRequest { ///

The name of the blueprint.

@required Name: OrchestrationNameString ///

A description of the blueprint.

Description: Generic512CharString ///

Specifies a path in Amazon S3 where the blueprint is published.

@required BlueprintLocation: OrchestrationS3Location ///

The tags to be applied to this blueprint.

Tags: TagsMap } @output structure CreateBlueprintResponse { ///

Returns the name of the blueprint that was registered.

Name: NameString } @input structure CreateClassifierRequest { ///

A GrokClassifier object specifying the classifier /// to create.

GrokClassifier: CreateGrokClassifierRequest ///

An XMLClassifier object specifying the classifier /// to create.

XMLClassifier: CreateXMLClassifierRequest ///

A JsonClassifier object specifying the classifier /// to create.

JsonClassifier: CreateJsonClassifierRequest ///

A CsvClassifier object specifying the classifier /// to create.

CsvClassifier: CreateCsvClassifierRequest } @output structure CreateClassifierResponse {} @input structure CreateConnectionRequest { ///

The ID of the Data Catalog in which to create the connection. If none is provided, the Amazon Web Services /// account ID is used by default.

CatalogId: CatalogIdString ///

A ConnectionInput object defining the connection /// to create.

@required ConnectionInput: ConnectionInput ///

The tags you assign to the connection.

Tags: TagsMap } @output structure CreateConnectionResponse {} @input structure CreateCrawlerRequest { ///

Name of the new crawler.

@required Name: NameString ///

The IAM role or Amazon Resource Name (ARN) of an IAM role used by the new crawler to /// access customer resources.

@required Role: Role ///

The Glue database where results are written, such as: /// arn:aws:daylight:us-east-1::database/sometable/*.

DatabaseName: DatabaseName ///

A description of the new crawler.

Description: DescriptionString ///

A list of collection of targets to crawl.

@required Targets: CrawlerTargets ///

A cron expression used to specify the schedule (see Time-Based Schedules for Jobs and Crawlers. For example, to run /// something every day at 12:15 UTC, you would specify: /// cron(15 12 * * ? *).

Schedule: CronExpression ///

A list of custom classifiers that the user has registered. By default, all built-in /// classifiers are included in a crawl, but these custom classifiers always override the default /// classifiers for a given classification.

Classifiers: ClassifierNameList ///

The table prefix used for catalog tables that are created.

TablePrefix: TablePrefix ///

The policy for the crawler's update and deletion behavior.

SchemaChangePolicy: SchemaChangePolicy ///

A policy that specifies whether to crawl the entire dataset again, or to crawl only folders that were added since the last crawler run.

RecrawlPolicy: RecrawlPolicy ///

Specifies data lineage configuration settings for the crawler.

LineageConfiguration: LineageConfiguration ///

Specifies Lake Formation configuration settings for the crawler.

LakeFormationConfiguration: LakeFormationConfiguration ///

Crawler configuration information. This versioned JSON /// string allows users to specify aspects of a crawler's behavior. /// For more information, see Setting crawler configuration options.

Configuration: CrawlerConfiguration ///

The name of the SecurityConfiguration structure to be used by this /// crawler.

CrawlerSecurityConfiguration: CrawlerSecurityConfiguration ///

The tags to use with this crawler request. You may use tags to limit access to the /// crawler. For more information about tags in Glue, see Amazon Web Services Tags in Glue in the developer /// guide.

Tags: TagsMap } @output structure CreateCrawlerResponse {} ///

Specifies a custom CSV classifier for CreateClassifier to create.

structure CreateCsvClassifierRequest { ///

The name of the classifier.

@required Name: NameString ///

A custom symbol to denote what separates each column entry in the row.

Delimiter: CsvColumnDelimiter ///

A custom symbol to denote what combines content into a single column value. Must be different from the column delimiter.

QuoteSymbol: CsvQuoteSymbol ///

Indicates whether the CSV file contains a header.

ContainsHeader: CsvHeaderOption ///

A list of strings representing column names.

Header: CsvHeader ///

Specifies not to trim values before identifying the type of column values. The default value is true.

DisableValueTrimming: NullableBoolean ///

Enables the processing of files that contain only one column.

AllowSingleColumn: NullableBoolean ///

Enables the configuration of custom datatypes.

CustomDatatypeConfigured: NullableBoolean ///

Creates a list of supported custom datatypes.

CustomDatatypes: CustomDatatypes ///

Sets the SerDe for processing CSV in the classifier, which will be applied in the Data Catalog. Valid values are OpenCSVSerDe, LazySimpleSerDe, and None. You can specify the None value when you want the crawler to do the detection.

Serde: CsvSerdeOption } @input structure CreateCustomEntityTypeRequest { ///

A name for the custom pattern that allows it to be retrieved or deleted later. This name must be unique per Amazon Web Services account.

@required Name: NameString ///

A regular expression string that is used for detecting sensitive data in a custom pattern.

@required RegexString: NameString ///

A list of context words. If none of these context words are found within the vicinity of the regular expression the data will not be detected as sensitive data.

///

If no context words are passed only a regular expression is checked.

ContextWords: ContextWords ///

A list of tags applied to the custom entity type.

Tags: TagsMap } @output structure CreateCustomEntityTypeResponse { ///

The name of the custom pattern you created.

Name: NameString } @input structure CreateDatabaseRequest { ///

The ID of the Data Catalog in which to create the database. If none is provided, the Amazon Web Services /// account ID is used by default.

CatalogId: CatalogIdString ///

The metadata for the database.

@required DatabaseInput: DatabaseInput ///

The tags you assign to the database.

Tags: TagsMap } @output structure CreateDatabaseResponse {} @input structure CreateDataQualityRulesetRequest { ///

A unique name for the data quality ruleset.

@required Name: NameString ///

A description of the data quality ruleset.

Description: DescriptionString ///

A Data Quality Definition Language (DQDL) ruleset. For more information, see the Glue developer guide.

@required Ruleset: DataQualityRulesetString ///

A list of tags applied to the data quality ruleset.

Tags: TagsMap ///

A target table associated with the data quality ruleset.

TargetTable: DataQualityTargetTable ///

Used for idempotency and is recommended to be set to a random ID (such as a UUID) to avoid creating or starting multiple instances of the same resource.

ClientToken: HashString } @output structure CreateDataQualityRulesetResponse { ///

A unique name for the data quality ruleset.

Name: NameString } @input structure CreateDevEndpointRequest { ///

The name to be assigned to the new DevEndpoint.

@required EndpointName: GenericString ///

The IAM role for the DevEndpoint.

@required RoleArn: RoleArn ///

Security group IDs for the security groups to be used by the new /// DevEndpoint.

SecurityGroupIds: StringList ///

The subnet ID for the new DevEndpoint to use.

SubnetId: GenericString ///

The public key to be used by this DevEndpoint for authentication. This /// attribute is provided for backward compatibility because the recommended attribute to use is /// public keys.

PublicKey: GenericString ///

A list of public keys to be used by the development endpoints for authentication. The use /// of this attribute is preferred over a single public key because the public keys allow you to /// have a different private key per client.

/// ///

If you previously created an endpoint with a public key, you must remove that key to be able /// to set a list of public keys. Call the UpdateDevEndpoint API with the public /// key content in the deletePublicKeys attribute, and the list of new keys in the /// addPublicKeys attribute.

///
PublicKeys: PublicKeysList ///

The number of Glue Data Processing Units (DPUs) to allocate to this /// DevEndpoint.

NumberOfNodes: IntegerValue = 0 ///

The type of predefined worker that is allocated to the development endpoint. Accepts a value of Standard, G.1X, or G.2X.

///
    ///
  • ///

    For the Standard worker type, each worker provides 4 vCPU, 16 GB of memory and a 50GB disk, and 2 executors per worker.

    ///
  • ///
  • ///

    For the G.1X worker type, each worker maps to 1 DPU (4 vCPU, 16 GB of memory, 64 GB disk), and provides 1 executor per worker. We recommend this worker type for memory-intensive jobs.

    ///
  • ///
  • ///

    For the G.2X worker type, each worker maps to 2 DPU (8 vCPU, 32 GB of memory, 128 GB disk), and provides 1 executor per worker. We recommend this worker type for memory-intensive jobs.

    ///
  • ///
///

Known issue: when a development endpoint is created with the G.2X /// WorkerType configuration, the Spark drivers for the development endpoint will run on 4 vCPU, 16 GB of memory, and a 64 GB disk.

WorkerType: WorkerType ///

Glue version determines the versions of Apache Spark and Python that Glue supports. The Python version indicates the version supported for running your ETL scripts on development endpoints.

///

For more information about the available Glue versions and corresponding Spark and Python versions, see Glue version in the developer guide.

///

Development endpoints that are created without specifying a Glue version default to Glue 0.9.

///

You can specify a version of Python support for development endpoints by using the Arguments parameter in the CreateDevEndpoint or UpdateDevEndpoint APIs. If no arguments are provided, the version defaults to Python 2.

GlueVersion: GlueVersionString ///

The number of workers of a defined workerType that are allocated to the development endpoint.

///

The maximum number of workers you can define are 299 for G.1X, and 149 for G.2X.

NumberOfWorkers: NullableInteger ///

The paths to one or more Python libraries in an Amazon S3 bucket that should be loaded in /// your DevEndpoint. Multiple values must be complete paths separated by a /// comma.

/// ///

You can only use pure Python libraries with a DevEndpoint. Libraries that rely on /// C extensions, such as the pandas Python data /// analysis library, are not yet supported.

///
ExtraPythonLibsS3Path: GenericString ///

The path to one or more Java .jar files in an S3 bucket that should be loaded /// in your DevEndpoint.

ExtraJarsS3Path: GenericString ///

The name of the SecurityConfiguration structure to be used with this /// DevEndpoint.

SecurityConfiguration: NameString ///

The tags to use with this DevEndpoint. You may use tags to limit access to the DevEndpoint. For more information about tags in Glue, see Amazon Web Services Tags in Glue in the developer guide.

Tags: TagsMap ///

A map of arguments used to configure the DevEndpoint.

Arguments: MapValue } @output structure CreateDevEndpointResponse { ///

The name assigned to the new DevEndpoint.

EndpointName: GenericString ///

The current status of the new DevEndpoint.

Status: GenericString ///

The security groups assigned to the new DevEndpoint.

SecurityGroupIds: StringList ///

The subnet ID assigned to the new DevEndpoint.

SubnetId: GenericString ///

The Amazon Resource Name (ARN) of the role assigned to the new /// DevEndpoint.

RoleArn: RoleArn ///

The address of the YARN endpoint used by this DevEndpoint.

YarnEndpointAddress: GenericString ///

The Apache Zeppelin port for the remote Apache Spark interpreter.

ZeppelinRemoteSparkInterpreterPort: IntegerValue = 0 ///

The number of Glue Data Processing Units (DPUs) allocated to this DevEndpoint.

NumberOfNodes: IntegerValue = 0 ///

The type of predefined worker that is allocated to the development endpoint. May be a value of Standard, G.1X, or G.2X.

WorkerType: WorkerType ///

Glue version determines the versions of Apache Spark and Python that Glue supports. The Python version indicates the version supported for running your ETL scripts on development endpoints.

///

For more information about the available Glue versions and corresponding Spark and Python versions, see Glue version in the developer guide.

GlueVersion: GlueVersionString ///

The number of workers of a defined workerType that are allocated to the development endpoint.

NumberOfWorkers: NullableInteger ///

The Amazon Web Services Availability Zone where this DevEndpoint is located.

AvailabilityZone: GenericString ///

The ID of the virtual private cloud (VPC) used by this DevEndpoint.

VpcId: GenericString ///

The paths to one or more Python libraries in an S3 bucket that will be loaded in your /// DevEndpoint.

ExtraPythonLibsS3Path: GenericString ///

Path to one or more Java .jar files in an S3 bucket that will be loaded in /// your DevEndpoint.

ExtraJarsS3Path: GenericString ///

The reason for a current failure in this DevEndpoint.

FailureReason: GenericString ///

The name of the SecurityConfiguration structure being used with this /// DevEndpoint.

SecurityConfiguration: NameString ///

The point in time at which this DevEndpoint was created.

CreatedTimestamp: TimestampValue ///

The map of arguments used to configure this DevEndpoint.

///

Valid arguments are:

///
    ///
  • ///

    /// "--enable-glue-datacatalog": "" ///

    ///
  • ///
///

You can specify a version of Python support for development endpoints by using the Arguments parameter in the CreateDevEndpoint or UpdateDevEndpoint APIs. If no arguments are provided, the version defaults to Python 2.

Arguments: MapValue } ///

Specifies a grok classifier for CreateClassifier /// to create.

structure CreateGrokClassifierRequest { ///

An identifier of the data format that the classifier matches, /// such as Twitter, JSON, Omniture logs, Amazon CloudWatch Logs, and so on.

@required Classification: Classification ///

The name of the new classifier.

@required Name: NameString ///

The grok pattern used by this classifier.

@required GrokPattern: GrokPattern ///

Optional custom grok patterns used by this classifier.

CustomPatterns: CustomPatterns } @input structure CreateJobRequest { ///

The name you assign to this job definition. It must be unique in your account.

@required Name: NameString ///

Description of the job being defined.

Description: DescriptionString ///

This field is reserved for future use.

LogUri: UriString ///

The name or Amazon Resource Name (ARN) of the IAM role associated with this job.

@required Role: RoleString ///

An ExecutionProperty specifying the maximum number of concurrent runs allowed /// for this job.

ExecutionProperty: ExecutionProperty ///

The JobCommand that runs this job.

@required Command: JobCommand ///

The default arguments for every run of this job, specified as name-value pairs.

///

You can specify arguments here that your own job-execution script /// consumes, as well as arguments that Glue itself consumes.

///

Job arguments may be logged. Do not pass plaintext secrets as arguments. Retrieve secrets /// from a Glue Connection, Secrets Manager or other secret management /// mechanism if you intend to keep them within the Job.

///

For information about how to specify and consume your own Job arguments, see the Calling Glue APIs in Python topic in the developer guide.

///

For information about the arguments you can provide to this field when configuring Spark jobs, /// see the Special Parameters Used by Glue topic in the developer guide.

///

For information about the arguments you can provide to this field when configuring Ray /// jobs, see Using /// job parameters in Ray jobs in the developer guide.

DefaultArguments: GenericMap ///

Arguments for this job that are not overridden when providing job arguments /// in a job run, specified as name-value pairs.

NonOverridableArguments: GenericMap ///

The connections used for this job.

Connections: ConnectionsList ///

The maximum number of times to retry this job if it fails.

MaxRetries: MaxRetries = 0 ///

This parameter is deprecated. Use MaxCapacity instead.

///

The number of Glue data processing units (DPUs) to allocate to this Job. You can /// allocate a minimum of 2 DPUs; the default is 10. A DPU is a relative measure of processing /// power that consists of 4 vCPUs of compute capacity and 16 GB of memory. For more information, /// see the Glue pricing /// page.

@deprecated( message: "This property is deprecated, use MaxCapacity instead." ) AllocatedCapacity: IntegerValue = 0 ///

The job timeout in minutes. This is the maximum time that a job run /// can consume resources before it is terminated and enters TIMEOUT /// status. The default is 2,880 minutes (48 hours).

Timeout: Timeout ///

For Glue version 1.0 or earlier jobs, using the standard worker type, the number of /// Glue data processing units (DPUs) that can be allocated when this job runs. A DPU is /// a relative measure of processing power that consists of 4 vCPUs of compute capacity and 16 GB /// of memory. For more information, see the /// Glue pricing page.

///

For Glue version 2.0+ jobs, you cannot specify a Maximum capacity. /// Instead, you should specify a Worker type and the Number of workers.

///

Do not set MaxCapacity if using WorkerType and NumberOfWorkers.

///

The value that can be allocated for MaxCapacity depends on whether you are /// running a Python shell job, an Apache Spark ETL job, or an Apache Spark streaming ETL /// job:

///
    ///
  • ///

    When you specify a Python shell job (JobCommand.Name="pythonshell"), you can /// allocate either 0.0625 or 1 DPU. The default is 0.0625 DPU.

    ///
  • ///
  • ///

    When you specify an Apache Spark ETL job (JobCommand.Name="glueetl") or Apache /// Spark streaming ETL job (JobCommand.Name="gluestreaming"), you can allocate from 2 to 100 DPUs. /// The default is 10 DPUs. This job type cannot have a fractional DPU allocation.

    ///
  • ///
MaxCapacity: NullableDouble ///

The name of the SecurityConfiguration structure to be used with this /// job.

SecurityConfiguration: NameString ///

The tags to use with this job. You may use tags to limit access to the job. For more information about tags in Glue, see Amazon Web Services Tags in Glue in the developer guide.

Tags: TagsMap ///

Specifies configuration properties of a job notification.

NotificationProperty: NotificationProperty ///

In Spark jobs, GlueVersion determines the versions of Apache Spark and Python /// that Glue available in a job. The Python version indicates the version /// supported for jobs of type Spark.

///

Ray jobs should set GlueVersion to 4.0 or greater. However, /// the versions of Ray, Python and additional libraries available in your Ray job are determined /// by the Runtime parameter of the Job command.

///

For more information about the available Glue versions and corresponding /// Spark and Python versions, see Glue version in the developer /// guide.

///

Jobs that are created without specifying a Glue version default to Glue 0.9.

GlueVersion: GlueVersionString ///

The number of workers of a defined workerType that are allocated when a job runs.

NumberOfWorkers: NullableInteger ///

The type of predefined worker that is allocated when a job runs. Accepts a value of /// G.1X, G.2X, G.4X, G.8X or G.025X for Spark jobs. Accepts the value Z.2X for Ray jobs.

///
    ///
  • ///

    For the G.1X worker type, each worker maps to 1 DPU (4 vCPUs, 16 GB of memory) with 84GB disk (approximately 34GB free), and provides 1 executor per worker. We recommend this worker type for workloads such as data transforms, joins, and queries, to offers a scalable and cost effective way to run most jobs.

    ///
  • ///
  • ///

    For the G.2X worker type, each worker maps to 2 DPU (8 vCPUs, 32 GB of memory) with 128GB disk (approximately 77GB free), and provides 1 executor per worker. We recommend this worker type for workloads such as data transforms, joins, and queries, to offers a scalable and cost effective way to run most jobs.

    ///
  • ///
  • ///

    For the G.4X worker type, each worker maps to 4 DPU (16 vCPUs, 64 GB of memory) with 256GB disk (approximately 235GB free), and provides 1 executor per worker. We recommend this worker type for jobs whose workloads contain your most demanding transforms, aggregations, joins, and queries. This worker type is available only for Glue version 3.0 or later Spark ETL jobs in the following Amazon Web Services Regions: US East (Ohio), US East (N. Virginia), US West (Oregon), Asia Pacific (Singapore), Asia Pacific (Sydney), Asia Pacific (Tokyo), Canada (Central), Europe (Frankfurt), Europe (Ireland), and Europe (Stockholm).

    ///
  • ///
  • ///

    For the G.8X worker type, each worker maps to 8 DPU (32 vCPUs, 128 GB of memory) with 512GB disk (approximately 487GB free), and provides 1 executor per worker. We recommend this worker type for jobs whose workloads contain your most demanding transforms, aggregations, joins, and queries. This worker type is available only for Glue version 3.0 or later Spark ETL jobs, in the same Amazon Web Services Regions as supported for the G.4X worker type.

    ///
  • ///
  • ///

    For the G.025X worker type, each worker maps to 0.25 DPU (2 vCPUs, 4 GB of memory) with 84GB disk (approximately 34GB free), and provides 1 executor per worker. We recommend this worker type for low volume streaming jobs. This worker type is only available for Glue version 3.0 streaming jobs.

    ///
  • ///
  • ///

    For the Z.2X worker type, each worker maps to 2 M-DPU (8vCPUs, 64 GB of memory) with 128 GB disk (approximately 120GB free), and provides up to 8 Ray workers based on the autoscaler.

    ///
  • ///
WorkerType: WorkerType ///

The representation of a directed acyclic graph on which both the Glue Studio visual component and Glue Studio code generation is based.

CodeGenConfigurationNodes: CodeGenConfigurationNodes ///

Indicates whether the job is run with a standard or flexible execution class. The standard execution-class is ideal for time-sensitive workloads that require fast job startup and dedicated resources.

///

The flexible execution class is appropriate for time-insensitive jobs whose start and completion times may vary.

///

Only jobs with Glue version 3.0 and above and command type glueetl will be allowed to set ExecutionClass to FLEX. The flexible execution class is available for Spark jobs.

ExecutionClass: ExecutionClass ///

The details for a source control configuration for a job, allowing synchronization of job artifacts to or from a remote repository.

SourceControlDetails: SourceControlDetails } @output structure CreateJobResponse { ///

The unique name that was provided for this job definition.

Name: NameString } ///

Specifies a JSON classifier for CreateClassifier to create.

structure CreateJsonClassifierRequest { ///

The name of the classifier.

@required Name: NameString ///

A JsonPath string defining the JSON data for the classifier to classify. /// Glue supports a subset of JsonPath, as described in Writing JsonPath Custom Classifiers.

@required JsonPath: JsonPath } @input structure CreateMLTransformRequest { ///

The unique name that you give the transform when you create it.

@required Name: NameString ///

A description of the machine learning transform that is being defined. The default is an /// empty string.

Description: DescriptionString ///

A list of Glue table definitions used by the transform.

@required InputRecordTables: GlueTables ///

The algorithmic parameters that are specific to the transform type used. Conditionally /// dependent on the transform type.

@required Parameters: TransformParameters ///

The name or Amazon Resource Name (ARN) of the IAM role with the required permissions. The required permissions include both Glue service role permissions to Glue resources, and Amazon S3 permissions required by the transform.

///
    ///
  • ///

    This role needs Glue service role permissions to allow access to resources in Glue. See Attach a Policy to IAM Users That Access Glue.

    ///
  • ///
  • ///

    This role needs permission to your Amazon Simple Storage Service (Amazon S3) sources, targets, temporary directory, scripts, and any libraries used by the task run for this transform.

    ///
  • ///
@required Role: RoleString ///

This value determines which version of Glue this machine learning transform is compatible with. Glue 1.0 is recommended for most customers. If the value is not set, the Glue compatibility defaults to Glue 0.9. For more information, see Glue Versions in the developer guide.

GlueVersion: GlueVersionString ///

The number of Glue data processing units (DPUs) that are allocated to task runs for this transform. You can allocate from 2 to 100 DPUs; the default is 10. A DPU is a relative measure of /// processing power that consists of 4 vCPUs of compute capacity and 16 GB of memory. For more /// information, see the Glue pricing /// page.

///

/// MaxCapacity is a mutually exclusive option with NumberOfWorkers and WorkerType.

///
    ///
  • ///

    If either NumberOfWorkers or WorkerType is set, then MaxCapacity cannot be set.

    ///
  • ///
  • ///

    If MaxCapacity is set then neither NumberOfWorkers or WorkerType can be set.

    ///
  • ///
  • ///

    If WorkerType is set, then NumberOfWorkers is required (and vice versa).

    ///
  • ///
  • ///

    /// MaxCapacity and NumberOfWorkers must both be at least 1.

    ///
  • ///
///

When the WorkerType field is set to a value other than Standard, the MaxCapacity field is set automatically and becomes read-only.

///

When the WorkerType field is set to a value other than Standard, the MaxCapacity field is set automatically and becomes read-only.

MaxCapacity: NullableDouble ///

The type of predefined worker that is allocated when this task runs. Accepts a value of Standard, G.1X, or G.2X.

///
    ///
  • ///

    For the Standard worker type, each worker provides 4 vCPU, 16 GB of memory and a 50GB disk, and 2 executors per worker.

    ///
  • ///
  • ///

    For the G.1X worker type, each worker provides 4 vCPU, 16 GB of memory and a 64GB disk, and 1 executor per worker.

    ///
  • ///
  • ///

    For the G.2X worker type, each worker provides 8 vCPU, 32 GB of memory and a 128GB disk, and 1 executor per worker.

    ///
  • ///
///

/// MaxCapacity is a mutually exclusive option with NumberOfWorkers and WorkerType.

///
    ///
  • ///

    If either NumberOfWorkers or WorkerType is set, then MaxCapacity cannot be set.

    ///
  • ///
  • ///

    If MaxCapacity is set then neither NumberOfWorkers or WorkerType can be set.

    ///
  • ///
  • ///

    If WorkerType is set, then NumberOfWorkers is required (and vice versa).

    ///
  • ///
  • ///

    /// MaxCapacity and NumberOfWorkers must both be at least 1.

    ///
  • ///
WorkerType: WorkerType ///

The number of workers of a defined workerType that are allocated when this task runs.

///

If WorkerType is set, then NumberOfWorkers is required (and vice versa).

NumberOfWorkers: NullableInteger ///

The timeout of the task run for this transform in minutes. This is the maximum time that a task run for this transform can consume resources before it is terminated and enters TIMEOUT status. The default is 2,880 minutes (48 hours).

Timeout: Timeout ///

The maximum number of times to retry a task for this transform after a task run fails.

MaxRetries: NullableInteger ///

The tags to use with this machine learning transform. You may use tags to limit access to the machine learning transform. For more information about tags in Glue, see Amazon Web Services Tags in Glue in the developer guide.

Tags: TagsMap ///

The encryption-at-rest settings of the transform that apply to accessing user data. Machine learning transforms can access user data encrypted in Amazon S3 using KMS.

TransformEncryption: TransformEncryption } @output structure CreateMLTransformResponse { ///

A unique identifier that is generated for the transform.

TransformId: HashString } @input structure CreatePartitionIndexRequest { ///

The catalog ID where the table resides.

CatalogId: CatalogIdString ///

Specifies the name of a database in which you want to create a partition index.

@required DatabaseName: NameString ///

Specifies the name of a table in which you want to create a partition index.

@required TableName: NameString ///

Specifies a PartitionIndex structure to create a partition index in an existing table.

@required PartitionIndex: PartitionIndex } @output structure CreatePartitionIndexResponse {} @input structure CreatePartitionRequest { ///

The Amazon Web Services account ID of the catalog in which the partition is to be created.

CatalogId: CatalogIdString ///

The name of the metadata database in which the partition is /// to be created.

@required DatabaseName: NameString ///

The name of the metadata table in which the partition is to be created.

@required TableName: NameString ///

A PartitionInput structure defining the partition /// to be created.

@required PartitionInput: PartitionInput } @output structure CreatePartitionResponse {} @output structure CreateRegistryResponse { ///

The Amazon Resource Name (ARN) of the newly created registry.

RegistryArn: GlueResourceArn ///

The name of the registry.

RegistryName: SchemaRegistryNameString ///

A description of the registry.

Description: DescriptionString ///

The tags for the registry.

Tags: TagsMap } @output structure CreateSchemaResponse { ///

The name of the registry.

RegistryName: SchemaRegistryNameString ///

The Amazon Resource Name (ARN) of the registry.

RegistryArn: GlueResourceArn ///

The name of the schema.

SchemaName: SchemaRegistryNameString ///

The Amazon Resource Name (ARN) of the schema.

SchemaArn: GlueResourceArn ///

A description of the schema if specified when created.

Description: DescriptionString ///

The data format of the schema definition. Currently AVRO, JSON and PROTOBUF are supported.

DataFormat: DataFormat ///

The schema compatibility mode.

Compatibility: Compatibility ///

The version number of the checkpoint (the last time the compatibility mode was changed).

SchemaCheckpoint: SchemaCheckpointNumber = 0 ///

The latest version of the schema associated with the returned schema definition.

LatestSchemaVersion: VersionLongNumber = 0 ///

The next version of the schema associated with the returned schema definition.

NextSchemaVersion: VersionLongNumber = 0 ///

The status of the schema.

SchemaStatus: SchemaStatus ///

The tags for the schema.

Tags: TagsMap ///

The unique identifier of the first schema version.

SchemaVersionId: SchemaVersionIdString ///

The status of the first schema version created.

SchemaVersionStatus: SchemaVersionStatus } @input structure CreateScriptRequest { ///

A list of the nodes in the DAG.

DagNodes: DagNodes ///

A list of the edges in the DAG.

DagEdges: DagEdges ///

The programming language of the resulting code from the DAG.

Language: Language } @output structure CreateScriptResponse { ///

The Python script generated from the DAG.

PythonScript: PythonScript ///

The Scala code generated from the DAG.

ScalaCode: ScalaCode } @input structure CreateSecurityConfigurationRequest { ///

The name for the new security configuration.

@required Name: NameString ///

The encryption configuration for the new security configuration.

@required EncryptionConfiguration: EncryptionConfiguration } @output structure CreateSecurityConfigurationResponse { ///

The name assigned to the new security configuration.

Name: NameString ///

The time at which the new security configuration was created.

CreatedTimestamp: TimestampValue } ///

Request to create a new session.

@input structure CreateSessionRequest { ///

The ID of the session request.

@required Id: NameString ///

The description of the session.

Description: DescriptionString ///

The IAM Role ARN

@required Role: OrchestrationRoleArn ///

The SessionCommand that runs the job.

@required Command: SessionCommand ///

/// The number of minutes before session times out. Default for Spark ETL /// jobs is 48 hours (2880 minutes), the maximum session lifetime for this job type. /// Consult the documentation for other job types. ///

Timeout: Timeout ///

/// The number of minutes when idle before session times out. Default for /// Spark ETL jobs is value of Timeout. Consult the documentation /// for other job types. ///

IdleTimeout: Timeout ///

A map array of key-value pairs. Max is 75 pairs.

DefaultArguments: OrchestrationArgumentsMap ///

The number of connections to use for the session.

Connections: ConnectionsList ///

The number of Glue data processing units (DPUs) that can be allocated when the job runs. /// A DPU is a relative measure of processing power that consists of 4 vCPUs of compute capacity and 16 GB memory.

MaxCapacity: NullableDouble ///

The number of workers of a defined WorkerType to use for the session.

NumberOfWorkers: NullableInteger ///

The type of predefined worker that is allocated when a job runs. Accepts a value of /// G.1X, G.2X, G.4X, or G.8X for Spark jobs. Accepts the value Z.2X for Ray notebooks.

///
    ///
  • ///

    For the G.1X worker type, each worker maps to 1 DPU (4 vCPUs, 16 GB of memory) with 84GB disk (approximately 34GB free), and provides 1 executor per worker. We recommend this worker type for workloads such as data transforms, joins, and queries, to offers a scalable and cost effective way to run most jobs.

    ///
  • ///
  • ///

    For the G.2X worker type, each worker maps to 2 DPU (8 vCPUs, 32 GB of memory) with 128GB disk (approximately 77GB free), and provides 1 executor per worker. We recommend this worker type for workloads such as data transforms, joins, and queries, to offers a scalable and cost effective way to run most jobs.

    ///
  • ///
  • ///

    For the G.4X worker type, each worker maps to 4 DPU (16 vCPUs, 64 GB of memory) with 256GB disk (approximately 235GB free), and provides 1 executor per worker. We recommend this worker type for jobs whose workloads contain your most demanding transforms, aggregations, joins, and queries. This worker type is available only for Glue version 3.0 or later Spark ETL jobs in the following Amazon Web Services Regions: US East (Ohio), US East (N. Virginia), US West (Oregon), Asia Pacific (Singapore), Asia Pacific (Sydney), Asia Pacific (Tokyo), Canada (Central), Europe (Frankfurt), Europe (Ireland), and Europe (Stockholm).

    ///
  • ///
  • ///

    For the G.8X worker type, each worker maps to 8 DPU (32 vCPUs, 128 GB of memory) with 512GB disk (approximately 487GB free), and provides 1 executor per worker. We recommend this worker type for jobs whose workloads contain your most demanding transforms, aggregations, joins, and queries. This worker type is available only for Glue version 3.0 or later Spark ETL jobs, in the same Amazon Web Services Regions as supported for the G.4X worker type.

    ///
  • ///
  • ///

    For the Z.2X worker type, each worker maps to 2 M-DPU (8vCPUs, 64 GB of memory) with 128 GB disk (approximately 120GB free), and provides up to 8 Ray workers based on the autoscaler.

    ///
  • ///
WorkerType: WorkerType ///

The name of the SecurityConfiguration structure to be used with the session

SecurityConfiguration: NameString ///

The Glue version determines the versions of Apache Spark and Python that Glue supports. /// The GlueVersion must be greater than 2.0.

GlueVersion: GlueVersionString ///

The map of key value pairs (tags) belonging to the session.

Tags: TagsMap ///

The origin of the request.

RequestOrigin: OrchestrationNameString } @output structure CreateSessionResponse { ///

Returns the session object in the response.

Session: Session } @input structure CreateTableRequest { ///

The ID of the Data Catalog in which to create the Table. /// If none is supplied, the Amazon Web Services account ID is used by default.

CatalogId: CatalogIdString ///

The catalog database in which to create the new table. For Hive /// compatibility, this name is entirely lowercase.

@required DatabaseName: NameString ///

The TableInput object that defines the metadata table /// to create in the catalog.

@required TableInput: TableInput ///

A list of partition indexes, PartitionIndex structures, to create in the table.

PartitionIndexes: PartitionIndexList ///

The ID of the transaction.

TransactionId: TransactionIdString ///

Specifies an OpenTableFormatInput structure when creating an open format table.

OpenTableFormatInput: OpenTableFormatInput } @output structure CreateTableResponse {} @input structure CreateTriggerRequest { ///

The name of the trigger.

@required Name: NameString ///

The name of the workflow associated with the trigger.

WorkflowName: NameString ///

The type of the new trigger.

@required Type: TriggerType ///

A cron expression used to specify the schedule (see Time-Based Schedules for Jobs and Crawlers. For example, to run /// something every day at 12:15 UTC, you would specify: /// cron(15 12 * * ? *).

///

This field is required when the trigger type is SCHEDULED.

Schedule: GenericString ///

A predicate to specify when the new trigger should fire.

///

This field is required when the trigger type is CONDITIONAL.

Predicate: Predicate ///

The actions initiated by this trigger when it fires.

@required Actions: ActionList ///

A description of the new trigger.

Description: DescriptionString ///

Set to true to start SCHEDULED and CONDITIONAL /// triggers when created. True is not supported for ON_DEMAND triggers.

StartOnCreation: BooleanValue = false ///

The tags to use with this trigger. You may use tags to limit access to the trigger. /// For more information about tags in Glue, see /// Amazon Web Services Tags in Glue in the developer guide.

Tags: TagsMap ///

Batch condition that must be met (specified number of events received or batch time window expired) /// before EventBridge event trigger fires.

EventBatchingCondition: EventBatchingCondition } @output structure CreateTriggerResponse { ///

The name of the trigger.

Name: NameString } @input structure CreateUserDefinedFunctionRequest { ///

The ID of the Data Catalog in which to create the function. If none is provided, the Amazon Web Services /// account ID is used by default.

CatalogId: CatalogIdString ///

The name of the catalog database in which to create the function.

@required DatabaseName: NameString ///

A FunctionInput object that defines the function /// to create in the Data Catalog.

@required FunctionInput: UserDefinedFunctionInput } @output structure CreateUserDefinedFunctionResponse {} @input structure CreateWorkflowRequest { ///

The name to be assigned to the workflow. It should be unique within your account.

@required Name: NameString ///

A description of the workflow.

Description: GenericString ///

A collection of properties to be used as part of each execution of the workflow.

DefaultRunProperties: WorkflowRunProperties ///

The tags to be used with this workflow.

Tags: TagsMap ///

You can use this parameter to prevent unwanted multiple updates to data, to control costs, or in some cases, to prevent exceeding the maximum number of concurrent runs of any of the component jobs. If you leave this parameter blank, there is no limit to the number of concurrent workflow runs.

MaxConcurrentRuns: NullableInteger } @output structure CreateWorkflowResponse { ///

The name of the workflow which was provided as part of the request.

Name: NameString } ///

Specifies an XML classifier for CreateClassifier to create.

structure CreateXMLClassifierRequest { ///

An identifier of the data format that the classifier matches.

@required Classification: Classification ///

The name of the classifier.

@required Name: NameString ///

The XML tag designating the element that contains each record in an XML document being /// parsed. This can't identify a self-closing element (closed by />). An empty /// row element that contains only attributes can be parsed as long as it ends with a closing tag /// (for example, is okay, but /// is not).

RowTag: RowTag } ///

A classifier for custom CSV content.

structure CsvClassifier { ///

The name of the classifier.

@required Name: NameString ///

The time that this classifier was registered.

CreationTime: Timestamp ///

The time that this classifier was last updated.

LastUpdated: Timestamp ///

The version of this classifier.

Version: VersionId = 0 ///

A custom symbol to denote what separates each column entry in the row.

Delimiter: CsvColumnDelimiter ///

A custom symbol to denote what combines content into a single column value. It must be /// different from the column delimiter.

QuoteSymbol: CsvQuoteSymbol ///

Indicates whether the CSV file contains a header.

ContainsHeader: CsvHeaderOption ///

A list of strings representing column names.

Header: CsvHeader ///

Specifies not to trim values before identifying the type of column values. The default /// value is true.

DisableValueTrimming: NullableBoolean ///

Enables the processing of files that contain only one column.

AllowSingleColumn: NullableBoolean ///

Enables the custom datatype to be configured.

CustomDatatypeConfigured: NullableBoolean ///

A list of custom datatypes including "BINARY", "BOOLEAN", "DATE", "DECIMAL", "DOUBLE", "FLOAT", "INT", "LONG", "SHORT", "STRING", "TIMESTAMP".

CustomDatatypes: CustomDatatypes ///

Sets the SerDe for processing CSV in the classifier, which will be applied in the Data Catalog. Valid values are OpenCSVSerDe, LazySimpleSerDe, and None. You can specify the None value when you want the crawler to do the detection.

Serde: CsvSerdeOption } ///

Specifies a transform that uses custom code you provide to perform the data transformation. The output is a collection of DynamicFrames.

structure CustomCode { ///

The name of the transform node.

@required Name: NodeName ///

The data inputs identified by their node names.

@required Inputs: ManyInputs ///

The custom code that is used to perform the data transformation.

@required Code: ExtendedString ///

The name defined for the custom code node class.

@required ClassName: EnclosedInStringProperty ///

Specifies the data schema for the custom code transform.

OutputSchemas: GlueSchemas } ///

An object representing a custom pattern for detecting sensitive data across the columns and rows of your structured data.

structure CustomEntityType { ///

A name for the custom pattern that allows it to be retrieved or deleted later. This name must be unique per Amazon Web Services account.

@required Name: NameString ///

A regular expression string that is used for detecting sensitive data in a custom pattern.

@required RegexString: NameString ///

A list of context words. If none of these context words are found within the vicinity of the regular expression the data will not be detected as sensitive data.

///

If no context words are passed only a regular expression is checked.

ContextWords: ContextWords } ///

The Database object represents a logical grouping of tables that might reside /// in a Hive metastore or an RDBMS.

structure Database { ///

The name of the database. For Hive compatibility, this is folded to lowercase when it is /// stored.

@required Name: NameString ///

A description of the database.

Description: DescriptionString ///

The location of the database (for example, an HDFS path).

LocationUri: URI ///

These key-value pairs define parameters and properties /// of the database.

Parameters: ParametersMap ///

The time at which the metadata database was created in the catalog.

CreateTime: Timestamp ///

Creates a set of default permissions on the table for principals. Used by Lake Formation. Not used in the normal course of Glue operations.

CreateTableDefaultPermissions: PrincipalPermissionsList ///

A DatabaseIdentifier structure that describes a target database for resource linking.

TargetDatabase: DatabaseIdentifier ///

The ID of the Data Catalog in which the database resides.

CatalogId: CatalogIdString ///

A FederatedDatabase structure that references an entity outside the Glue Data Catalog.

FederatedDatabase: FederatedDatabase } ///

A structure that describes a target database for resource linking.

structure DatabaseIdentifier { ///

The ID of the Data Catalog in which the database resides.

CatalogId: CatalogIdString ///

The name of the catalog database.

DatabaseName: NameString ///

Region of the target database.

Region: NameString } ///

The structure used to create or update a database.

structure DatabaseInput { ///

The name of the database. For Hive compatibility, this is folded to lowercase when it is /// stored.

@required Name: NameString ///

A description of the database.

Description: DescriptionString ///

The location of the database (for example, an HDFS path).

LocationUri: URI ///

These key-value pairs define parameters and properties /// of the database.

///

These key-value pairs define parameters and properties of the database.

Parameters: ParametersMap ///

Creates a set of default permissions on the table for principals. Used by Lake Formation. Not used in the normal course of Glue operations.

CreateTableDefaultPermissions: PrincipalPermissionsList ///

A DatabaseIdentifier structure that describes a target database for resource linking.

TargetDatabase: DatabaseIdentifier ///

A FederatedDatabase structure that references an entity outside the Glue Data Catalog.

FederatedDatabase: FederatedDatabase } ///

Contains configuration information for maintaining Data Catalog security.

structure DataCatalogEncryptionSettings { ///

Specifies the encryption-at-rest configuration for the Data Catalog.

EncryptionAtRest: EncryptionAtRest ///

When connection password protection is enabled, the Data Catalog uses a customer-provided /// key to encrypt the password as part of CreateConnection or /// UpdateConnection and store it in the ENCRYPTED_PASSWORD field in /// the connection properties. You can enable catalog encryption or only password /// encryption.

ConnectionPasswordEncryption: ConnectionPasswordEncryption } ///

The Lake Formation principal.

structure DataLakePrincipal { ///

An identifier for the Lake Formation principal.

DataLakePrincipalIdentifier: DataLakePrincipalString } ///

Additional run options you can specify for an evaluation run.

structure DataQualityEvaluationRunAdditionalRunOptions { ///

Whether or not to enable CloudWatch metrics.

CloudWatchMetricsEnabled: NullableBoolean ///

Prefix for Amazon S3 to store results.

ResultsS3Prefix: UriString } ///

Describes a data quality result.

structure DataQualityResult { ///

A unique result ID for the data quality result.

ResultId: HashString ///

An aggregate data quality score. Represents the ratio of rules that passed to the total number of rules.

Score: GenericBoundedDouble ///

The table associated with the data quality result, if any.

DataSource: DataSource ///

The name of the ruleset associated with the data quality result.

RulesetName: NameString ///

In the context of a job in Glue Studio, each node in the canvas is typically assigned some sort of name and data quality nodes will have names. In the case of multiple nodes, the evaluationContext can differentiate the nodes.

EvaluationContext: GenericString ///

The date and time when this data quality run started.

StartedOn: Timestamp ///

The date and time when this data quality run completed.

CompletedOn: Timestamp ///

The job name associated with the data quality result, if any.

JobName: NameString ///

The job run ID associated with the data quality result, if any.

JobRunId: HashString ///

The unique run ID for the ruleset evaluation for this data quality result.

RulesetEvaluationRunId: HashString ///

A list of DataQualityRuleResult objects representing the results for each rule.

RuleResults: DataQualityRuleResults } ///

Describes a data quality result.

structure DataQualityResultDescription { ///

The unique result ID for this data quality result.

ResultId: HashString ///

The table name associated with the data quality result.

DataSource: DataSource ///

The job name associated with the data quality result.

JobName: NameString ///

The job run ID associated with the data quality result.

JobRunId: HashString ///

The time that the run started for this data quality result.

StartedOn: Timestamp } ///

Criteria used to return data quality results.

structure DataQualityResultFilterCriteria { ///

Filter results by the specified data source. For example, retrieving all results for an Glue table.

DataSource: DataSource ///

Filter results by the specified job name.

JobName: NameString ///

Filter results by the specified job run ID.

JobRunId: HashString ///

Filter results by runs that started after this time.

StartedAfter: Timestamp ///

Filter results by runs that started before this time.

StartedBefore: Timestamp } ///

Describes the result of a data quality rule recommendation run.

structure DataQualityRuleRecommendationRunDescription { ///

The unique run identifier associated with this run.

RunId: HashString ///

The status for this run.

Status: TaskStatusType ///

The date and time when this run started.

StartedOn: Timestamp ///

The data source (Glue table) associated with the recommendation run.

DataSource: DataSource } ///

A filter for listing data quality recommendation runs.

structure DataQualityRuleRecommendationRunFilter { ///

Filter based on a specified data source (Glue table).

@required DataSource: DataSource ///

Filter based on time for results started before provided time.

StartedBefore: Timestamp ///

Filter based on time for results started after provided time.

StartedAfter: Timestamp } ///

Describes the result of the evaluation of a data quality rule.

structure DataQualityRuleResult { ///

The name of the data quality rule.

Name: NameString ///

A description of the data quality rule.

Description: DescriptionString ///

An evaluation message.

EvaluationMessage: DescriptionString ///

A pass or fail status for the rule.

Result: DataQualityRuleResultStatus ///

A map of metrics associated with the evaluation of the rule.

EvaluatedMetrics: EvaluatedMetricsMap } ///

Describes the result of a data quality ruleset evaluation run.

structure DataQualityRulesetEvaluationRunDescription { ///

The unique run identifier associated with this run.

RunId: HashString ///

The status for this run.

Status: TaskStatusType ///

The date and time when the run started.

StartedOn: Timestamp ///

The data source (an Glue table) associated with the run.

DataSource: DataSource } ///

The filter criteria.

structure DataQualityRulesetEvaluationRunFilter { ///

Filter based on a data source (an Glue table) associated with the run.

@required DataSource: DataSource ///

Filter results by runs that started before this time.

StartedBefore: Timestamp ///

Filter results by runs that started after this time.

StartedAfter: Timestamp } ///

The criteria used to filter data quality rulesets.

structure DataQualityRulesetFilterCriteria { ///

The name of the ruleset filter criteria.

Name: NameString ///

The description of the ruleset filter criteria.

Description: DescriptionString ///

Filter on rulesets created before this date.

CreatedBefore: Timestamp ///

Filter on rulesets created after this date.

CreatedAfter: Timestamp ///

Filter on rulesets last modified before this date.

LastModifiedBefore: Timestamp ///

Filter on rulesets last modified after this date.

LastModifiedAfter: Timestamp ///

The name and database name of the target table.

TargetTable: DataQualityTargetTable } ///

Describes a data quality ruleset returned by GetDataQualityRuleset.

structure DataQualityRulesetListDetails { ///

The name of the data quality ruleset.

Name: NameString ///

A description of the data quality ruleset.

Description: DescriptionString ///

The date and time the data quality ruleset was created.

CreatedOn: Timestamp ///

The date and time the data quality ruleset was last modified.

LastModifiedOn: Timestamp ///

An object representing an Glue table.

TargetTable: DataQualityTargetTable ///

When a ruleset was created from a recommendation run, this run ID is generated to link the two together.

RecommendationRunId: HashString ///

The number of rules in the ruleset.

RuleCount: NullableInteger } ///

An object representing an Glue table.

structure DataQualityTargetTable { ///

The name of the Glue table.

@required TableName: NameString ///

The name of the database where the Glue table exists.

@required DatabaseName: NameString ///

The catalog id where the Glue table exists.

CatalogId: NameString } ///

A data source (an Glue table) for which you want data quality results.

structure DataSource { ///

An Glue table.

@required GlueTable: GlueTable } ///

A structure representing the datatype of the value.

structure Datatype { ///

The datatype of the value.

@required Id: GenericLimitedString ///

A label assigned to the datatype.

@required Label: GenericLimitedString } ///

Defines column statistics supported for timestamp data columns.

structure DateColumnStatisticsData { ///

The lowest value in the column.

MinimumValue: Timestamp ///

The highest value in the column.

MaximumValue: Timestamp ///

The number of null values in the column.

@required NumberOfNulls: NonNegativeLong = 0 ///

The number of distinct values in a column.

@required NumberOfDistinctValues: NonNegativeLong = 0 } ///

Defines column statistics supported for fixed-point number data columns.

structure DecimalColumnStatisticsData { ///

The lowest value in the column.

MinimumValue: DecimalNumber ///

The highest value in the column.

MaximumValue: DecimalNumber ///

The number of null values in the column.

@required NumberOfNulls: NonNegativeLong = 0 ///

The number of distinct values in a column.

@required NumberOfDistinctValues: NonNegativeLong = 0 } ///

Contains a numeric value in decimal format.

structure DecimalNumber { ///

The unscaled numeric value.

@required UnscaledValue: Blob ///

The scale that determines where the decimal point falls in the /// unscaled value.

@required Scale: Integer = 0 } @input structure DeleteBlueprintRequest { ///

The name of the blueprint to delete.

@required Name: NameString } @output structure DeleteBlueprintResponse { ///

Returns the name of the blueprint that was deleted.

Name: NameString } @input structure DeleteClassifierRequest { ///

Name of the classifier to remove.

@required Name: NameString } @output structure DeleteClassifierResponse {} @input structure DeleteColumnStatisticsForPartitionRequest { ///

The ID of the Data Catalog where the partitions in question reside. /// If none is supplied, the Amazon Web Services account ID is used by default.

CatalogId: CatalogIdString ///

The name of the catalog database where the partitions reside.

@required DatabaseName: NameString ///

The name of the partitions' table.

@required TableName: NameString ///

A list of partition values identifying the partition.

@required PartitionValues: ValueStringList ///

Name of the column.

@required ColumnName: NameString } @output structure DeleteColumnStatisticsForPartitionResponse {} @input structure DeleteColumnStatisticsForTableRequest { ///

The ID of the Data Catalog where the partitions in question reside. /// If none is supplied, the Amazon Web Services account ID is used by default.

CatalogId: CatalogIdString ///

The name of the catalog database where the partitions reside.

@required DatabaseName: NameString ///

The name of the partitions' table.

@required TableName: NameString ///

The name of the column.

@required ColumnName: NameString } @output structure DeleteColumnStatisticsForTableResponse {} @input structure DeleteConnectionRequest { ///

The ID of the Data Catalog in which the connection resides. If none is provided, the Amazon Web Services /// account ID is used by default.

CatalogId: CatalogIdString ///

The name of the connection to delete.

@required ConnectionName: NameString } @output structure DeleteConnectionResponse {} @input structure DeleteCrawlerRequest { ///

The name of the crawler to remove.

@required Name: NameString } @output structure DeleteCrawlerResponse {} @input structure DeleteCustomEntityTypeRequest { ///

The name of the custom pattern that you want to delete.

@required Name: NameString } @output structure DeleteCustomEntityTypeResponse { ///

The name of the custom pattern you deleted.

Name: NameString } @input structure DeleteDatabaseRequest { ///

The ID of the Data Catalog in which the database resides. If none is provided, the Amazon Web Services /// account ID is used by default.

CatalogId: CatalogIdString ///

The name of the database to delete. For Hive compatibility, this must be all /// lowercase.

@required Name: NameString } @output structure DeleteDatabaseResponse {} @input structure DeleteDataQualityRulesetRequest { ///

A name for the data quality ruleset.

@required Name: NameString } @output structure DeleteDataQualityRulesetResponse {} @input structure DeleteDevEndpointRequest { ///

The name of the DevEndpoint.

@required EndpointName: GenericString } @output structure DeleteDevEndpointResponse {} @input structure DeleteJobRequest { ///

The name of the job definition to delete.

@required JobName: NameString } @output structure DeleteJobResponse { ///

The name of the job definition that was deleted.

JobName: NameString } @input structure DeleteMLTransformRequest { ///

The unique identifier of the transform to delete.

@required TransformId: HashString } @output structure DeleteMLTransformResponse { ///

The unique identifier of the transform that was deleted.

TransformId: HashString } @input structure DeletePartitionIndexRequest { ///

The catalog ID where the table resides.

CatalogId: CatalogIdString ///

Specifies the name of a database from which you want to delete a partition index.

@required DatabaseName: NameString ///

Specifies the name of a table from which you want to delete a partition index.

@required TableName: NameString ///

The name of the partition index to be deleted.

@required IndexName: NameString } @output structure DeletePartitionIndexResponse {} @input structure DeletePartitionRequest { ///

The ID of the Data Catalog where the partition to be deleted resides. If none is provided, /// the Amazon Web Services account ID is used by default.

CatalogId: CatalogIdString ///

The name of the catalog database in which the table in question /// resides.

@required DatabaseName: NameString ///

The name of the table that contains the partition to be deleted.

@required TableName: NameString ///

The values that define the partition.

@required PartitionValues: ValueStringList } @output structure DeletePartitionResponse {} @output structure DeleteRegistryResponse { ///

The name of the registry being deleted.

RegistryName: SchemaRegistryNameString ///

The Amazon Resource Name (ARN) of the registry being deleted.

RegistryArn: GlueResourceArn ///

The status of the registry. A successful operation will return the Deleting status.

Status: RegistryStatus } @input structure DeleteResourcePolicyRequest { ///

The hash value returned when this policy was set.

PolicyHashCondition: HashString ///

The ARN of the Glue resource for the resource policy to be deleted.

ResourceArn: GlueResourceArn } @output structure DeleteResourcePolicyResponse {} @output structure DeleteSchemaResponse { ///

The Amazon Resource Name (ARN) of the schema being deleted.

SchemaArn: GlueResourceArn ///

The name of the schema being deleted.

SchemaName: SchemaRegistryNameString ///

The status of the schema.

Status: SchemaStatus } @output structure DeleteSchemaVersionsResponse { ///

A list of SchemaVersionErrorItem objects, each containing an error and schema version.

SchemaVersionErrors: SchemaVersionErrorList } @input structure DeleteSecurityConfigurationRequest { ///

The name of the security configuration to delete.

@required Name: NameString } @output structure DeleteSecurityConfigurationResponse {} @input structure DeleteSessionRequest { ///

The ID of the session to be deleted.

@required Id: NameString ///

The name of the origin of the delete session request.

RequestOrigin: OrchestrationNameString } @output structure DeleteSessionResponse { ///

Returns the ID of the deleted session.

Id: NameString } @input structure DeleteTableRequest { ///

The ID of the Data Catalog where the table resides. If none is provided, the Amazon Web Services account /// ID is used by default.

CatalogId: CatalogIdString ///

The name of the catalog database in which the table resides. For Hive /// compatibility, this name is entirely lowercase.

@required DatabaseName: NameString ///

The name of the table to be deleted. For Hive /// compatibility, this name is entirely lowercase.

@required Name: NameString ///

The transaction ID at which to delete the table contents.

TransactionId: TransactionIdString } @output structure DeleteTableResponse {} @input structure DeleteTableVersionRequest { ///

The ID of the Data Catalog where the tables reside. If none is provided, the Amazon Web Services account /// ID is used by default.

CatalogId: CatalogIdString ///

The database in the catalog in which the table resides. For Hive /// compatibility, this name is entirely lowercase.

@required DatabaseName: NameString ///

The name of the table. For Hive compatibility, /// this name is entirely lowercase.

@required TableName: NameString ///

The ID of the table version to be deleted. A VersionID is a string representation of an integer. Each version is incremented by 1.

@required VersionId: VersionString } @output structure DeleteTableVersionResponse {} @input structure DeleteTriggerRequest { ///

The name of the trigger to delete.

@required Name: NameString } @output structure DeleteTriggerResponse { ///

The name of the trigger that was deleted.

Name: NameString } @input structure DeleteUserDefinedFunctionRequest { ///

The ID of the Data Catalog where the function to be deleted is /// located. If none is supplied, the Amazon Web Services account ID is used by default.

CatalogId: CatalogIdString ///

The name of the catalog database where the function is located.

@required DatabaseName: NameString ///

The name of the function definition to be deleted.

@required FunctionName: NameString } @output structure DeleteUserDefinedFunctionResponse {} @input structure DeleteWorkflowRequest { ///

Name of the workflow to be deleted.

@required Name: NameString } @output structure DeleteWorkflowResponse { ///

Name of the workflow specified in input.

Name: NameString } ///

Specifies a Delta data store to crawl one or more Delta tables.

structure DeltaTarget { ///

A list of the Amazon S3 paths to the Delta tables.

DeltaTables: PathList ///

The name of the connection to use to connect to the Delta table target.

ConnectionName: ConnectionName ///

Specifies whether to write the manifest files to the Delta table path.

WriteManifest: NullableBoolean ///

Specifies whether the crawler will create native tables, to allow integration with query engines that support querying of the Delta transaction log directly.

CreateNativeDeltaTable: NullableBoolean } ///

A development endpoint where a developer can remotely debug extract, transform, and load /// (ETL) scripts.

structure DevEndpoint { ///

The name of the DevEndpoint.

EndpointName: GenericString ///

The Amazon Resource Name (ARN) of the IAM role used in this /// DevEndpoint.

RoleArn: RoleArn ///

A list of security group identifiers used in this DevEndpoint.

SecurityGroupIds: StringList ///

The subnet ID for this DevEndpoint.

SubnetId: GenericString ///

The YARN endpoint address used by this DevEndpoint.

YarnEndpointAddress: GenericString ///

A private IP address to access the DevEndpoint within a VPC if the /// DevEndpoint is created within one. The PrivateAddress field is /// present only when you create the DevEndpoint within your VPC.

PrivateAddress: GenericString ///

The Apache Zeppelin port for the remote Apache Spark interpreter.

ZeppelinRemoteSparkInterpreterPort: IntegerValue = 0 ///

The public IP address used by this DevEndpoint. The /// PublicAddress field is present only when you create a non-virtual private cloud /// (VPC) DevEndpoint.

PublicAddress: GenericString ///

The current status of this DevEndpoint.

Status: GenericString ///

The type of predefined worker that is allocated to the development endpoint. Accepts a value of Standard, G.1X, or G.2X.

///
    ///
  • ///

    For the Standard worker type, each worker provides 4 vCPU, 16 GB of memory and a 50GB disk, and 2 executors per worker.

    ///
  • ///
  • ///

    For the G.1X worker type, each worker maps to 1 DPU (4 vCPU, 16 GB of memory, 64 GB disk), and provides 1 executor per worker. We recommend this worker type for memory-intensive jobs.

    ///
  • ///
  • ///

    For the G.2X worker type, each worker maps to 2 DPU (8 vCPU, 32 GB of memory, 128 GB disk), and provides 1 executor per worker. We recommend this worker type for memory-intensive jobs.

    ///
  • ///
///

Known issue: when a development endpoint is created with the G.2X /// WorkerType configuration, the Spark drivers for the development endpoint will run on 4 vCPU, 16 GB of memory, and a 64 GB disk.

WorkerType: WorkerType ///

Glue version determines the versions of Apache Spark and Python that Glue supports. The Python version indicates the version supported for running your ETL scripts on development endpoints.

///

For more information about the available Glue versions and corresponding Spark and Python versions, see Glue version in the developer guide.

///

Development endpoints that are created without specifying a Glue version default to Glue 0.9.

///

You can specify a version of Python support for development endpoints by using the Arguments parameter in the CreateDevEndpoint or UpdateDevEndpoint APIs. If no arguments are provided, the version defaults to Python 2.

GlueVersion: GlueVersionString ///

The number of workers of a defined workerType that are allocated to the development endpoint.

///

The maximum number of workers you can define are 299 for G.1X, and 149 for G.2X.

NumberOfWorkers: NullableInteger ///

The number of Glue Data Processing Units (DPUs) allocated to this /// DevEndpoint.

NumberOfNodes: IntegerValue = 0 ///

The Amazon Web Services Availability Zone where this DevEndpoint is located.

AvailabilityZone: GenericString ///

The ID of the virtual private cloud (VPC) used by this DevEndpoint.

VpcId: GenericString ///

The paths to one or more Python libraries in an Amazon S3 bucket that should be loaded in /// your DevEndpoint. Multiple values must be complete paths separated by a /// comma.

/// ///

You can only use pure Python libraries with a DevEndpoint. Libraries that rely on /// C extensions, such as the pandas Python data /// analysis library, are not currently supported.

///
ExtraPythonLibsS3Path: GenericString ///

The path to one or more Java .jar files in an S3 bucket that should be loaded /// in your DevEndpoint.

/// ///

You can only use pure Java/Scala libraries with a DevEndpoint.

///
ExtraJarsS3Path: GenericString ///

The reason for a current failure in this DevEndpoint.

FailureReason: GenericString ///

The status of the last update.

LastUpdateStatus: GenericString ///

The point in time at which this DevEndpoint was created.

CreatedTimestamp: TimestampValue ///

The point in time at which this DevEndpoint was last modified.

LastModifiedTimestamp: TimestampValue ///

The public key to be used by this DevEndpoint for authentication. This /// attribute is provided for backward compatibility because the recommended attribute to use is /// public keys.

PublicKey: GenericString ///

A list of public keys to be used by the DevEndpoints for authentication. /// Using this attribute is preferred over a single public key because the public keys allow you /// to have a different private key per client.

/// ///

If you previously created an endpoint with a public key, you must remove that key to be /// able to set a list of public keys. Call the UpdateDevEndpoint API operation /// with the public key content in the deletePublicKeys attribute, and the list of /// new keys in the addPublicKeys attribute.

///
PublicKeys: PublicKeysList ///

The name of the SecurityConfiguration structure to be used with this /// DevEndpoint.

SecurityConfiguration: NameString ///

A map of arguments used to configure the DevEndpoint.

///

Valid arguments are:

///
    ///
  • ///

    /// "--enable-glue-datacatalog": "" ///

    ///
  • ///
///

You can specify a version of Python support for development endpoints by using the Arguments parameter in the CreateDevEndpoint or UpdateDevEndpoint APIs. If no arguments are provided, the version defaults to Python 2.

Arguments: MapValue } ///

Custom libraries to be loaded into a development endpoint.

structure DevEndpointCustomLibraries { ///

The paths to one or more Python libraries in an Amazon Simple Storage Service (Amazon S3) /// bucket that should be loaded in your DevEndpoint. Multiple values must be /// complete paths separated by a comma.

/// ///

You can only use pure Python libraries with a DevEndpoint. Libraries that rely on /// C extensions, such as the pandas Python data /// analysis library, are not currently supported.

///
ExtraPythonLibsS3Path: GenericString ///

The path to one or more Java .jar files in an S3 bucket that should be loaded /// in your DevEndpoint.

/// ///

You can only use pure Java/Scala libraries with a DevEndpoint.

///
ExtraJarsS3Path: GenericString } ///

Specifies the direct JDBC source connection.

structure DirectJDBCSource { ///

The name of the JDBC source connection.

@required Name: NodeName ///

The database of the JDBC source connection.

@required Database: EnclosedInStringProperty ///

The table of the JDBC source connection.

@required Table: EnclosedInStringProperty ///

The connection name of the JDBC source.

@required ConnectionName: EnclosedInStringProperty ///

The connection type of the JDBC source.

@required ConnectionType: JDBCConnectionType ///

The temp directory of the JDBC Redshift source.

RedshiftTmpDir: EnclosedInStringProperty } ///

Specifies an Apache Kafka data store.

structure DirectKafkaSource { ///

The name of the data store.

@required Name: NodeName ///

Specifies the streaming options.

StreamingOptions: KafkaStreamingSourceOptions ///

The amount of time to spend processing each micro batch.

WindowSize: BoxedPositiveInt ///

Whether to automatically determine the schema from the incoming data.

DetectSchema: BoxedBoolean ///

Specifies options related to data preview for viewing a sample of your data.

DataPreviewOptions: StreamingDataPreviewOptions } ///

Specifies a direct Amazon Kinesis data source.

structure DirectKinesisSource { ///

The name of the data source.

@required Name: NodeName ///

The amount of time to spend processing each micro batch.

WindowSize: BoxedPositiveInt ///

Whether to automatically determine the schema from the incoming data.

DetectSchema: BoxedBoolean ///

Additional options for the Kinesis streaming data source.

StreamingOptions: KinesisStreamingSourceOptions ///

Additional options for data preview.

DataPreviewOptions: StreamingDataPreviewOptions } ///

A policy that specifies update behavior for the crawler.

structure DirectSchemaChangePolicy { ///

Whether to use the specified update behavior when the crawler finds a changed schema.

EnableUpdateCatalog: BoxedBoolean ///

The update behavior when the crawler finds a changed schema.

UpdateBehavior: UpdateCatalogBehavior ///

Specifies the table in the database that the schema change policy applies to.

Table: EnclosedInStringProperty ///

Specifies the database that the schema change policy applies to.

Database: EnclosedInStringProperty } ///

Defines column statistics supported for floating-point number data columns.

structure DoubleColumnStatisticsData { ///

The lowest value in the column.

MinimumValue: Double = 0 ///

The highest value in the column.

MaximumValue: Double = 0 ///

The number of null values in the column.

@required NumberOfNulls: NonNegativeLong = 0 ///

The number of distinct values in a column.

@required NumberOfDistinctValues: NonNegativeLong = 0 } ///

Options to configure how your data quality evaluation results are published.

structure DQResultsPublishingOptions { ///

The context of the evaluation.

EvaluationContext: GenericLimitedString ///

The Amazon S3 prefix prepended to the results.

ResultsS3Prefix: EnclosedInStringProperty ///

Enable metrics for your data quality results.

CloudWatchMetricsEnabled: BoxedBoolean ///

Enable publishing for your data quality results.

ResultsPublishingEnabled: BoxedBoolean } ///

Options to configure how your job will stop if your data quality evaluation fails.

structure DQStopJobOnFailureOptions { ///

When to stop job if your data quality evaluation fails. Options are Immediate or AfterDataLoad.

StopJobOnFailureTiming: DQStopJobOnFailureTiming } ///

Specifies a transform that removes rows of repeating data from a data set.

structure DropDuplicates { ///

The name of the transform node.

@required Name: NodeName ///

The data inputs identified by their node names.

@required Inputs: OneInput ///

The name of the columns to be merged or removed if repeating.

Columns: LimitedPathList } ///

Specifies a transform that chooses the data property keys that you want to drop.

structure DropFields { ///

The name of the transform node.

@required Name: NodeName ///

The data inputs identified by their node names.

@required Inputs: OneInput ///

A JSON path to a variable in the data structure.

@required Paths: GlueStudioPathList } ///

Specifies a transform that removes columns from the dataset if all values in the column are 'null'. By default, Glue Studio will recognize null objects, but some values such as empty strings, strings that are "null", -1 integers or other placeholders such as zeros, are not automatically recognized as nulls.

structure DropNullFields { ///

The name of the transform node.

@required Name: NodeName ///

The data inputs identified by their node names.

@required Inputs: OneInput ///

A structure that represents whether certain values are recognized as null values for removal.

NullCheckBoxList: NullCheckBoxList ///

A structure that specifies a list of NullValueField structures that represent a custom null value such as zero or other value being used as a null placeholder unique to the dataset.

///

The DropNullFields transform removes custom null values only if both the value of the null placeholder and the datatype match the data.

NullTextList: NullValueFields } ///

Specifies the set of parameters needed to perform the dynamic transform.

structure DynamicTransform { ///

Specifies the name of the dynamic transform.

@required Name: EnclosedInStringProperty ///

Specifies the name of the dynamic transform as it appears in the Glue Studio visual editor.

@required TransformName: EnclosedInStringProperty ///

Specifies the inputs for the dynamic transform that are required.

@required Inputs: OneInput ///

Specifies the parameters of the dynamic transform.

Parameters: TransformConfigParameterList ///

Specifies the name of the function of the dynamic transform.

@required FunctionName: EnclosedInStringProperty ///

Specifies the path of the dynamic transform source and config files.

@required Path: EnclosedInStringProperty ///

This field is not used and will be deprecated in future release.

Version: EnclosedInStringProperty ///

Specifies the data schema for the dynamic transform.

OutputSchemas: GlueSchemas } ///

Specifies a DynamoDB data source in the Glue Data Catalog.

structure DynamoDBCatalogSource { ///

The name of the data source.

@required Name: NodeName ///

The name of the database to read from.

@required Database: EnclosedInStringProperty ///

The name of the table in the database to read from.

@required Table: EnclosedInStringProperty } ///

Specifies an Amazon DynamoDB table to crawl.

structure DynamoDBTarget { ///

The name of the DynamoDB table to crawl.

Path: Path ///

Indicates whether to scan all the records, or to sample rows from the table. Scanning all the records can take a long time when the table is not a high throughput table.

///

A value of true means to scan all records, while a value of false means to sample the records. If no value is specified, the value defaults to true.

scanAll: NullableBoolean ///

The percentage of the configured read capacity units to use by the Glue crawler. Read capacity units is a term defined by DynamoDB, and is a numeric value that acts as rate limiter for the number of reads that can be performed on that table per second.

///

The valid values are null or a value between 0.1 to 1.5. A null value is used when user does not provide a value, and defaults to 0.5 of the configured Read Capacity Unit (for provisioned tables), or 0.25 of the max configured Read Capacity Unit (for tables using on-demand mode).

scanRate: NullableDouble } ///

An edge represents a directed connection between two Glue components that are part of the workflow the /// edge belongs to.

structure Edge { ///

The unique of the node within the workflow where the edge starts.

SourceId: NameString ///

The unique of the node within the workflow where the edge ends.

DestinationId: NameString } ///

Specifies the encryption-at-rest configuration for the Data Catalog.

structure EncryptionAtRest { ///

The encryption-at-rest mode for encrypting Data Catalog data.

@required CatalogEncryptionMode: CatalogEncryptionMode ///

The ID of the KMS key to use for encryption at rest.

SseAwsKmsKeyId: NameString } ///

Specifies an encryption configuration.

structure EncryptionConfiguration { ///

The encryption configuration for Amazon Simple Storage Service (Amazon S3) data.

S3Encryption: S3EncryptionList ///

The encryption configuration for Amazon CloudWatch.

CloudWatchEncryption: CloudWatchEncryption ///

The encryption configuration for job bookmarks.

JobBookmarksEncryption: JobBookmarksEncryption } ///

A specified entity does not exist

@error("client") structure EntityNotFoundException { ///

A message describing the problem.

Message: MessageString ///

Indicates whether or not the exception relates to a federated source.

FromFederationSource: NullableBoolean } ///

Contains details about an error.

structure ErrorDetail { ///

The code associated with this error.

ErrorCode: NameString ///

A message describing the error.

ErrorMessage: DescriptionString } ///

An object containing error details.

structure ErrorDetails { ///

The error code for an error.

ErrorCode: ErrorCodeString ///

The error message for an error.

ErrorMessage: ErrorMessageString } ///

Specifies your data quality evaluation criteria.

structure EvaluateDataQuality { ///

The name of the data quality evaluation.

@required Name: NodeName ///

The inputs of your data quality evaluation.

@required Inputs: OneInput ///

The ruleset for your data quality evaluation.

@required Ruleset: DQDLString ///

The output of your data quality evaluation.

Output: DQTransformOutput ///

Options to configure how your results are published.

PublishingOptions: DQResultsPublishingOptions ///

Options to configure how your job will stop if your data quality evaluation fails.

StopJobOnFailureOptions: DQStopJobOnFailureOptions } ///

Specifies your data quality evaluation criteria.

structure EvaluateDataQualityMultiFrame { ///

The name of the data quality evaluation.

@required Name: NodeName ///

The inputs of your data quality evaluation. The first input in this list is the primary data source.

@required Inputs: ManyInputs ///

The aliases of all data sources except primary.

AdditionalDataSources: DQDLAliases ///

The ruleset for your data quality evaluation.

@required Ruleset: DQDLString ///

Options to configure how your results are published.

PublishingOptions: DQResultsPublishingOptions ///

Options to configure runtime behavior of the transform.

AdditionalOptions: DQAdditionalOptions ///

Options to configure how your job will stop if your data quality evaluation fails.

StopJobOnFailureOptions: DQStopJobOnFailureOptions } ///

Evaluation metrics provide an estimate of the quality of your machine learning transform.

structure EvaluationMetrics { ///

The type of machine learning transform.

@required TransformType: TransformType ///

The evaluation metrics for the find matches algorithm.

FindMatchesMetrics: FindMatchesMetrics } ///

Batch condition that must be met (specified number of events received or batch time window expired) /// before EventBridge event trigger fires.

structure EventBatchingCondition { ///

Number of events that must be received from Amazon EventBridge before EventBridge event trigger fires.

@required BatchSize: BatchSize = 0 ///

Window of time in seconds after which EventBridge event trigger fires. Window starts when first event is received.

BatchWindow: BatchWindow } ///

An execution property of a job.

structure ExecutionProperty { ///

The maximum number of concurrent runs allowed for the job. /// The default is 1. An error is returned when this threshold is reached. /// The maximum value you can specify is controlled by a service limit.

MaxConcurrentRuns: MaxConcurrentRuns = 0 } ///

Specifies configuration properties for an exporting labels task run.

structure ExportLabelsTaskRunProperties { ///

The Amazon Simple Storage Service (Amazon S3) path where you will export the /// labels.

OutputS3Path: UriString } ///

A database that points to an entity outside the Glue Data Catalog.

structure FederatedDatabase { ///

A unique identifier for the federated database.

Identifier: FederationIdentifier ///

The name of the connection to the external metastore.

ConnectionName: NameString } ///

A federated resource already exists.

@error("client") structure FederatedResourceAlreadyExistsException { ///

The message describing the problem.

Message: MessageString ///

The associated Glue resource already exists.

AssociatedGlueResource: GlueResourceArn } ///

A table that points to an entity outside the Glue Data Catalog.

structure FederatedTable { ///

A unique identifier for the federated table.

Identifier: FederationIdentifier ///

A unique identifier for the federated database.

DatabaseIdentifier: FederationIdentifier ///

The name of the connection to the external metastore.

ConnectionName: NameString } ///

A federation source failed.

@error("client") structure FederationSourceException { ///

The error code of the problem.

FederationSourceErrorCode: FederationSourceErrorCode ///

The message describing the problem.

Message: MessageString } @error("client") structure FederationSourceRetryableException { Message: MessageString } ///

Specifies a transform that locates records in the dataset that have missing values and adds a new field with a value determined by imputation. The input data set is used to train the machine learning model that determines what the missing value should be.

structure FillMissingValues { ///

The name of the transform node.

@required Name: NodeName ///

The data inputs identified by their node names.

@required Inputs: OneInput ///

A JSON path to a variable in the data structure for the dataset that is imputed.

@required ImputedPath: EnclosedInStringProperty ///

A JSON path to a variable in the data structure for the dataset that is filled.

FilledPath: EnclosedInStringProperty } ///

Specifies a transform that splits a dataset into two, based on a filter condition.

structure Filter { ///

The name of the transform node.

@required Name: NodeName ///

The data inputs identified by their node names.

@required Inputs: OneInput ///

The operator used to filter rows by comparing the key value to a specified value.

@required LogicalOperator: FilterLogicalOperator ///

Specifies a filter expression.

@required Filters: FilterExpressions } ///

Specifies a filter expression.

structure FilterExpression { ///

The type of operation to perform in the expression.

@required Operation: FilterOperation ///

Whether the expression is to be negated.

Negated: BoxedBoolean ///

A list of filter values.

@required Values: FilterValues } ///

Represents a single entry in the list of values for a FilterExpression.

structure FilterValue { ///

The type of filter value.

@required Type: FilterValueType ///

The value to be associated.

@required Value: EnclosedInStringProperties } ///

The evaluation metrics for the find matches algorithm. The quality of your machine /// learning transform is measured by getting your transform to predict some matches and comparing /// the results to known matches from the same dataset. The quality metrics are based on a subset /// of your data, so they are not precise.

structure FindMatchesMetrics { ///

The area under the precision/recall curve (AUPRC) is a single number measuring the overall /// quality of the transform, that is independent of the choice made for precision vs. recall. /// Higher values indicate that you have a more attractive precision vs. recall tradeoff.

///

For more information, see Precision and recall in Wikipedia.

AreaUnderPRCurve: GenericBoundedDouble ///

The precision metric indicates when often your transform is correct when it predicts a match. Specifically, it measures how well the transform finds true positives from the total true positives possible.

///

For more information, see Precision and recall in Wikipedia.

Precision: GenericBoundedDouble ///

The recall metric indicates that for an actual match, how often your transform predicts /// the match. Specifically, it measures how well the transform finds true positives from the /// total records in the source data.

///

For more information, see Precision and recall in Wikipedia.

Recall: GenericBoundedDouble ///

The maximum F1 metric indicates the transform's accuracy between 0 and 1, where 1 is the best accuracy.

///

For more information, see F1 score in Wikipedia.

F1: GenericBoundedDouble ///

The confusion matrix shows you what your transform is predicting accurately and what types of errors it is making.

///

For more information, see Confusion matrix in Wikipedia.

ConfusionMatrix: ConfusionMatrix ///

A list of ColumnImportance structures containing column importance metrics, sorted in order of descending importance.

ColumnImportances: ColumnImportanceList } ///

The parameters to configure the find matches transform.

structure FindMatchesParameters { ///

The name of a column that uniquely identifies rows in the source table. Used to help identify matching records.

PrimaryKeyColumnName: ColumnNameString ///

The value selected when tuning your transform for a balance between precision and recall. /// A value of 0.5 means no preference; a value of 1.0 means a bias purely for precision, and a /// value of 0.0 means a bias for recall. Because this is a tradeoff, choosing values close to 1.0 /// means very low recall, and choosing values close to 0.0 results in very low precision.

///

The precision metric indicates how often your model is correct when it predicts a match.

///

The recall metric indicates that for an actual match, how often your model predicts the /// match.

PrecisionRecallTradeoff: GenericBoundedDouble ///

The value that is selected when tuning your transform for a balance between accuracy and /// cost. A value of 0.5 means that the system balances accuracy and cost concerns. A value of 1.0 /// means a bias purely for accuracy, which typically results in a higher cost, sometimes /// substantially higher. A value of 0.0 means a bias purely for cost, which results in a less /// accurate FindMatches transform, sometimes with unacceptable accuracy.

///

Accuracy measures how well the transform finds true positives and true negatives. Increasing accuracy requires more machine resources and cost. But it also results in increased recall.

///

Cost measures how many compute resources, and thus money, are consumed to run the /// transform.

AccuracyCostTradeoff: GenericBoundedDouble ///

The value to switch on or off to force the output to match the provided labels from users. If the value is True, the find matches transform forces the output to match the provided labels. The results override the normal conflation results. If the value is False, the find matches transform does not ensure all the labels provided are respected, and the results rely on the trained model.

///

Note that setting this value to true may increase the conflation execution time.

EnforceProvidedLabels: NullableBoolean } ///

Specifies configuration properties for a Find Matches task run.

structure FindMatchesTaskRunProperties { ///

The job ID for the Find Matches task run.

JobId: HashString ///

The name assigned to the job for the Find Matches task run.

JobName: NameString ///

The job run ID for the Find Matches task run.

JobRunId: HashString } @input structure GetBlueprintRequest { ///

The name of the blueprint.

@required Name: NameString ///

Specifies whether or not to include the blueprint in the response.

IncludeBlueprint: NullableBoolean ///

Specifies whether or not to include the parameter specification.

IncludeParameterSpec: NullableBoolean } @output structure GetBlueprintResponse { ///

Returns a Blueprint object.

Blueprint: Blueprint } @input structure GetBlueprintRunRequest { ///

The name of the blueprint.

@required BlueprintName: OrchestrationNameString ///

The run ID for the blueprint run you want to retrieve.

@required RunId: IdString } @output structure GetBlueprintRunResponse { ///

Returns a BlueprintRun object.

BlueprintRun: BlueprintRun } @input structure GetBlueprintRunsRequest { ///

The name of the blueprint.

@required BlueprintName: NameString ///

A continuation token, if this is a continuation request.

NextToken: GenericString ///

The maximum size of a list to return.

MaxResults: PageSize } @output structure GetBlueprintRunsResponse { ///

Returns a list of BlueprintRun objects.

BlueprintRuns: BlueprintRuns ///

A continuation token, if not all blueprint runs have been returned.

NextToken: GenericString } @input structure GetCatalogImportStatusRequest { ///

The ID of the catalog to migrate. Currently, this should be the Amazon Web Services account ID.

CatalogId: CatalogIdString } @output structure GetCatalogImportStatusResponse { ///

The status of the specified catalog migration.

ImportStatus: CatalogImportStatus } @input structure GetClassifierRequest { ///

Name of the classifier to retrieve.

@required Name: NameString } @output structure GetClassifierResponse { ///

The requested classifier.

Classifier: Classifier } @input structure GetClassifiersRequest { ///

The size of the list to return (optional).

MaxResults: PageSize ///

An optional continuation token.

NextToken: Token } @output structure GetClassifiersResponse { ///

The requested list of classifier /// objects.

Classifiers: ClassifierList ///

A continuation token.

NextToken: Token } @input structure GetColumnStatisticsForPartitionRequest { ///

The ID of the Data Catalog where the partitions in question reside. /// If none is supplied, the Amazon Web Services account ID is used by default.

CatalogId: CatalogIdString ///

The name of the catalog database where the partitions reside.

@required DatabaseName: NameString ///

The name of the partitions' table.

@required TableName: NameString ///

A list of partition values identifying the partition.

@required PartitionValues: ValueStringList ///

A list of the column names.

@required ColumnNames: GetColumnNamesList } @output structure GetColumnStatisticsForPartitionResponse { ///

List of ColumnStatistics that failed to be retrieved.

ColumnStatisticsList: ColumnStatisticsList ///

Error occurred during retrieving column statistics data.

Errors: ColumnErrors } @input structure GetColumnStatisticsForTableRequest { ///

The ID of the Data Catalog where the partitions in question reside. /// If none is supplied, the Amazon Web Services account ID is used by default.

CatalogId: CatalogIdString ///

The name of the catalog database where the partitions reside.

@required DatabaseName: NameString ///

The name of the partitions' table.

@required TableName: NameString ///

A list of the column names.

@required ColumnNames: GetColumnNamesList } @output structure GetColumnStatisticsForTableResponse { ///

List of ColumnStatistics.

ColumnStatisticsList: ColumnStatisticsList ///

List of ColumnStatistics that failed to be retrieved.

Errors: ColumnErrors } @input structure GetConnectionRequest { ///

The ID of the Data Catalog in which the connection resides. If none is provided, the Amazon Web Services /// account ID is used by default.

CatalogId: CatalogIdString ///

The name of the connection definition to retrieve.

@required Name: NameString ///

Allows you to retrieve the connection metadata without returning the password. For /// instance, the Glue console uses this flag to retrieve the connection, and does not display /// the password. Set this parameter when the caller might not have permission to use the KMS /// key to decrypt the password, but it does have permission to access the rest of the connection /// properties.

HidePassword: Boolean = false } @output structure GetConnectionResponse { ///

The requested connection definition.

Connection: Connection } ///

Filters the connection definitions that are returned by the GetConnections /// API operation.

structure GetConnectionsFilter { ///

A criteria string that must match the criteria recorded in the /// connection definition for that connection definition to be returned.

MatchCriteria: MatchCriteria ///

The type of connections to return. Currently, SFTP is not supported.

ConnectionType: ConnectionType } @input structure GetConnectionsRequest { ///

The ID of the Data Catalog in which the connections reside. If none is provided, the Amazon Web Services /// account ID is used by default.

CatalogId: CatalogIdString ///

A filter that controls which connections are returned.

Filter: GetConnectionsFilter ///

Allows you to retrieve the connection metadata without returning the password. For /// instance, the Glue console uses this flag to retrieve the connection, and does not display /// the password. Set this parameter when the caller might not have permission to use the KMS /// key to decrypt the password, but it does have permission to access the rest of the connection /// properties.

HidePassword: Boolean = false ///

A continuation token, if this is a continuation call.

NextToken: Token ///

The maximum number of connections to return in one response.

MaxResults: PageSize } @output structure GetConnectionsResponse { ///

A list of requested connection definitions.

ConnectionList: ConnectionList ///

A continuation token, if the list of connections returned does not /// include the last of the filtered connections.

NextToken: Token } @input structure GetCrawlerMetricsRequest { ///

A list of the names of crawlers about which to retrieve metrics.

CrawlerNameList: CrawlerNameList ///

The maximum size of a list to return.

MaxResults: PageSize ///

A continuation token, if this is a continuation call.

NextToken: Token } @output structure GetCrawlerMetricsResponse { ///

A list of metrics for the specified crawler.

CrawlerMetricsList: CrawlerMetricsList ///

A continuation token, if the returned list does not contain the /// last metric available.

NextToken: Token } @input structure GetCrawlerRequest { ///

The name of the crawler to retrieve metadata for.

@required Name: NameString } @output structure GetCrawlerResponse { ///

The metadata for the specified crawler.

Crawler: Crawler } @input structure GetCrawlersRequest { ///

The number of crawlers to return on each call.

MaxResults: PageSize ///

A continuation token, if this is a continuation request.

NextToken: Token } @output structure GetCrawlersResponse { ///

A list of crawler metadata.

Crawlers: CrawlerList ///

A continuation token, if the returned list has not reached the end /// of those defined in this customer account.

NextToken: Token } @input structure GetCustomEntityTypeRequest { ///

The name of the custom pattern that you want to retrieve.

@required Name: NameString } @output structure GetCustomEntityTypeResponse { ///

The name of the custom pattern that you retrieved.

Name: NameString ///

A regular expression string that is used for detecting sensitive data in a custom pattern.

RegexString: NameString ///

A list of context words if specified when you created the custom pattern. If none of these context words are found within the vicinity of the regular expression the data will not be detected as sensitive data.

ContextWords: ContextWords } @input structure GetDatabaseRequest { ///

The ID of the Data Catalog in which the database resides. If none is provided, the Amazon Web Services /// account ID is used by default.

CatalogId: CatalogIdString ///

The name of the database to retrieve. For Hive compatibility, this /// should be all lowercase.

@required Name: NameString } @output structure GetDatabaseResponse { ///

The definition of the specified database in the Data Catalog.

Database: Database } @input structure GetDatabasesRequest { ///

The ID of the Data Catalog from which to retrieve Databases. If none is /// provided, the Amazon Web Services account ID is used by default.

CatalogId: CatalogIdString ///

A continuation token, if this is a continuation call.

NextToken: Token ///

The maximum number of databases to return in one response.

MaxResults: CatalogGetterPageSize ///

Allows you to specify that you want to list the databases shared with your account. The allowable values are FEDERATED, FOREIGN or ALL.

///
    ///
  • ///

    If set to FEDERATED, will list the federated databases (referencing an external entity) shared with your account.

    ///
  • ///
  • ///

    If set to FOREIGN, will list the databases shared with your account.

    ///
  • ///
  • ///

    If set to ALL, will list the databases shared with your account, as well as the databases in yor local account.

    ///
  • ///
ResourceShareType: ResourceShareType } @output structure GetDatabasesResponse { ///

A list of Database objects from the specified catalog.

@required DatabaseList: DatabaseList ///

A continuation token for paginating the returned list of tokens, /// returned if the current segment of the list is not the last.

NextToken: Token } @input structure GetDataCatalogEncryptionSettingsRequest { ///

The ID of the Data Catalog to retrieve the security configuration for. If none is /// provided, the Amazon Web Services account ID is used by default.

CatalogId: CatalogIdString } @output structure GetDataCatalogEncryptionSettingsResponse { ///

The requested security configuration.

DataCatalogEncryptionSettings: DataCatalogEncryptionSettings } @input structure GetDataflowGraphRequest { ///

The Python script to transform.

PythonScript: PythonScript } @output structure GetDataflowGraphResponse { ///

A list of the nodes in the resulting DAG.

DagNodes: DagNodes ///

A list of the edges in the resulting DAG.

DagEdges: DagEdges } @input structure GetDataQualityResultRequest { ///

A unique result ID for the data quality result.

@required ResultId: HashString } @output structure GetDataQualityResultResponse { ///

A unique result ID for the data quality result.

ResultId: HashString ///

An aggregate data quality score. Represents the ratio of rules that passed to the total number of rules.

Score: GenericBoundedDouble ///

The table associated with the data quality result, if any.

DataSource: DataSource ///

The name of the ruleset associated with the data quality result.

RulesetName: NameString ///

In the context of a job in Glue Studio, each node in the canvas is typically assigned some sort of name and data quality nodes will have names. In the case of multiple nodes, the evaluationContext can differentiate the nodes.

EvaluationContext: GenericString ///

The date and time when the run for this data quality result started.

StartedOn: Timestamp ///

The date and time when the run for this data quality result was completed.

CompletedOn: Timestamp ///

The job name associated with the data quality result, if any.

JobName: NameString ///

The job run ID associated with the data quality result, if any.

JobRunId: HashString ///

The unique run ID associated with the ruleset evaluation.

RulesetEvaluationRunId: HashString ///

A list of DataQualityRuleResult objects representing the results for each rule.

RuleResults: DataQualityRuleResults } @input structure GetDataQualityRuleRecommendationRunRequest { ///

The unique run identifier associated with this run.

@required RunId: HashString } @output structure GetDataQualityRuleRecommendationRunResponse { ///

The unique run identifier associated with this run.

RunId: HashString ///

The data source (an Glue table) associated with this run.

DataSource: DataSource ///

An IAM role supplied to encrypt the results of the run.

Role: RoleString ///

The number of G.1X workers to be used in the run. The default is 5.

NumberOfWorkers: NullableInteger ///

The timeout for a run in minutes. This is the maximum time that a run can consume resources before it is terminated and enters TIMEOUT status. The default is 2,880 minutes (48 hours).

Timeout: Timeout ///

The status for this run.

Status: TaskStatusType ///

The error strings that are associated with the run.

ErrorString: GenericString ///

The date and time when this run started.

StartedOn: Timestamp ///

A timestamp. The last point in time when this data quality rule recommendation run was modified.

LastModifiedOn: Timestamp ///

The date and time when this run was completed.

CompletedOn: Timestamp ///

The amount of time (in seconds) that the run consumed resources.

ExecutionTime: ExecutionTime = 0 ///

When a start rule recommendation run completes, it creates a recommended ruleset (a set of rules). This member has those rules in Data Quality Definition Language (DQDL) format.

RecommendedRuleset: DataQualityRulesetString ///

The name of the ruleset that was created by the run.

CreatedRulesetName: NameString } @input structure GetDataQualityRulesetEvaluationRunRequest { ///

The unique run identifier associated with this run.

@required RunId: HashString } @output structure GetDataQualityRulesetEvaluationRunResponse { ///

The unique run identifier associated with this run.

RunId: HashString ///

The data source (an Glue table) associated with this evaluation run.

DataSource: DataSource ///

An IAM role supplied to encrypt the results of the run.

Role: RoleString ///

The number of G.1X workers to be used in the run. The default is 5.

NumberOfWorkers: NullableInteger ///

The timeout for a run in minutes. This is the maximum time that a run can consume resources before it is terminated and enters TIMEOUT status. The default is 2,880 minutes (48 hours).

Timeout: Timeout ///

Additional run options you can specify for an evaluation run.

AdditionalRunOptions: DataQualityEvaluationRunAdditionalRunOptions ///

The status for this run.

Status: TaskStatusType ///

The error strings that are associated with the run.

ErrorString: GenericString ///

The date and time when this run started.

StartedOn: Timestamp ///

A timestamp. The last point in time when this data quality rule recommendation run was modified.

LastModifiedOn: Timestamp ///

The date and time when this run was completed.

CompletedOn: Timestamp ///

The amount of time (in seconds) that the run consumed resources.

ExecutionTime: ExecutionTime = 0 ///

A list of ruleset names for the run.

RulesetNames: RulesetNames ///

A list of result IDs for the data quality results for the run.

ResultIds: DataQualityResultIdList ///

A map of reference strings to additional data sources you can specify for an evaluation run.

AdditionalDataSources: DataSourceMap } @input structure GetDataQualityRulesetRequest { ///

The name of the ruleset.

@required Name: NameString } @output structure GetDataQualityRulesetResponse { ///

The name of the ruleset.

Name: NameString ///

A description of the ruleset.

Description: DescriptionString ///

A Data Quality Definition Language (DQDL) ruleset. For more information, see the Glue developer guide.

Ruleset: DataQualityRulesetString ///

The name and database name of the target table.

TargetTable: DataQualityTargetTable ///

A timestamp. The time and date that this data quality ruleset was created.

CreatedOn: Timestamp ///

A timestamp. The last point in time when this data quality ruleset was modified.

LastModifiedOn: Timestamp ///

When a ruleset was created from a recommendation run, this run ID is generated to link the two together.

RecommendationRunId: HashString } @input structure GetDevEndpointRequest { ///

Name of the DevEndpoint to retrieve information for.

@required EndpointName: GenericString } @output structure GetDevEndpointResponse { ///

A DevEndpoint definition.

DevEndpoint: DevEndpoint } @input structure GetDevEndpointsRequest { ///

The maximum size of information to return.

MaxResults: PageSize ///

A continuation token, if this is a continuation call.

NextToken: GenericString } @output structure GetDevEndpointsResponse { ///

A list of DevEndpoint definitions.

DevEndpoints: DevEndpointList ///

A continuation token, if not all DevEndpoint definitions have yet been /// returned.

NextToken: GenericString } @input structure GetJobBookmarkRequest { ///

The name of the job in question.

@required JobName: JobName ///

The unique run identifier associated with this job run.

RunId: RunId } @output structure GetJobBookmarkResponse { ///

A structure that defines a point that a job can resume processing.

JobBookmarkEntry: JobBookmarkEntry } @input structure GetJobRequest { ///

The name of the job definition to retrieve.

@required JobName: NameString } @output structure GetJobResponse { ///

The requested job definition.

Job: Job } @input structure GetJobRunRequest { ///

Name of the job definition being run.

@required JobName: NameString ///

The ID of the job run.

@required RunId: IdString ///

True if a list of predecessor runs should be returned.

PredecessorsIncluded: BooleanValue = false } @output structure GetJobRunResponse { ///

The requested job-run metadata.

JobRun: JobRun } @input structure GetJobRunsRequest { ///

The name of the job definition for which to retrieve all job runs.

@required JobName: NameString ///

A continuation token, if this is a continuation call.

NextToken: GenericString ///

The maximum size of the response.

MaxResults: PageSize } @output structure GetJobRunsResponse { ///

A list of job-run metadata objects.

JobRuns: JobRunList ///

A continuation token, if not all requested job runs have been returned.

NextToken: GenericString } @input structure GetJobsRequest { ///

A continuation token, if this is a continuation call.

NextToken: GenericString ///

The maximum size of the response.

MaxResults: PageSize } @output structure GetJobsResponse { ///

A list of job definitions.

Jobs: JobList ///

A continuation token, if not all job definitions have yet been returned.

NextToken: GenericString } @input structure GetMappingRequest { ///

Specifies the source table.

@required Source: CatalogEntry ///

A list of target tables.

Sinks: CatalogEntries ///

Parameters for the mapping.

Location: Location } @output structure GetMappingResponse { ///

A list of mappings to the specified targets.

@required Mapping: MappingList } @input structure GetMLTaskRunRequest { ///

The unique identifier of the machine learning transform.

@required TransformId: HashString ///

The unique identifier of the task run.

@required TaskRunId: HashString } @output structure GetMLTaskRunResponse { ///

The unique identifier of the task run.

TransformId: HashString ///

The unique run identifier associated with this run.

TaskRunId: HashString ///

The status for this task run.

Status: TaskStatusType ///

The names of the log groups that are associated with the task run.

LogGroupName: GenericString ///

The list of properties that are associated with the task run.

Properties: TaskRunProperties ///

The error strings that are associated with the task run.

ErrorString: GenericString ///

The date and time when this task run started.

StartedOn: Timestamp ///

The date and time when this task run was last modified.

LastModifiedOn: Timestamp ///

The date and time when this task run was completed.

CompletedOn: Timestamp ///

The amount of time (in seconds) that the task run consumed resources.

ExecutionTime: ExecutionTime = 0 } @input structure GetMLTaskRunsRequest { ///

The unique identifier of the machine learning transform.

@required TransformId: HashString ///

A token for pagination of the results. The default is empty.

NextToken: PaginationToken ///

The maximum number of results to return.

MaxResults: PageSize ///

The filter criteria, in the TaskRunFilterCriteria structure, for the task run.

Filter: TaskRunFilterCriteria ///

The sorting criteria, in the TaskRunSortCriteria structure, for the task run.

Sort: TaskRunSortCriteria } @output structure GetMLTaskRunsResponse { ///

A list of task runs that are associated with the transform.

TaskRuns: TaskRunList ///

A pagination token, if more results are available.

NextToken: PaginationToken } @input structure GetMLTransformRequest { ///

The unique identifier of the transform, generated at the time that the transform was /// created.

@required TransformId: HashString } @output structure GetMLTransformResponse { ///

The unique identifier of the transform, generated at the time that the transform was /// created.

TransformId: HashString ///

The unique name given to the transform when it was created.

Name: NameString ///

A description of the transform.

Description: DescriptionString ///

The last known status of the transform (to indicate whether it can be used or not). One of "NOT_READY", "READY", or "DELETING".

Status: TransformStatusType ///

The date and time when the transform was created.

CreatedOn: Timestamp ///

The date and time when the transform was last modified.

LastModifiedOn: Timestamp ///

A list of Glue table definitions used by the transform.

InputRecordTables: GlueTables ///

The configuration parameters that are specific to the algorithm used.

Parameters: TransformParameters ///

The latest evaluation metrics.

EvaluationMetrics: EvaluationMetrics ///

The number of labels available for this transform.

LabelCount: LabelCount = 0 ///

The Map object that represents the schema that this /// transform accepts. Has an upper bound of 100 columns.

Schema: TransformSchema ///

The name or Amazon Resource Name (ARN) of the IAM role with the required /// permissions.

Role: RoleString ///

This value determines which version of Glue this machine learning transform is compatible with. Glue 1.0 is recommended for most customers. If the value is not set, the Glue compatibility defaults to Glue 0.9. For more information, see Glue Versions in the developer guide.

GlueVersion: GlueVersionString ///

The number of Glue data processing units (DPUs) that are allocated to task runs for this transform. You can allocate from 2 to 100 DPUs; the default is 10. A DPU is a relative measure of /// processing power that consists of 4 vCPUs of compute capacity and 16 GB of memory. For more /// information, see the Glue pricing /// page.

///

When the WorkerType field is set to a value other than Standard, the MaxCapacity field is set automatically and becomes read-only.

MaxCapacity: NullableDouble ///

The type of predefined worker that is allocated when this task runs. Accepts a value of Standard, G.1X, or G.2X.

///
    ///
  • ///

    For the Standard worker type, each worker provides 4 vCPU, 16 GB of memory and a 50GB disk, and 2 executors per worker.

    ///
  • ///
  • ///

    For the G.1X worker type, each worker provides 4 vCPU, 16 GB of memory and a 64GB disk, and 1 executor per worker.

    ///
  • ///
  • ///

    For the G.2X worker type, each worker provides 8 vCPU, 32 GB of memory and a 128GB disk, and 1 executor per worker.

    ///
  • ///
WorkerType: WorkerType ///

The number of workers of a defined workerType that are allocated when this task runs.

NumberOfWorkers: NullableInteger ///

The timeout for a task run for this transform in minutes. This is the maximum time that a task run for this transform can consume resources before it is terminated and enters TIMEOUT status. The default is 2,880 minutes (48 hours).

Timeout: Timeout ///

The maximum number of times to retry a task for this transform after a task run fails.

MaxRetries: NullableInteger ///

The encryption-at-rest settings of the transform that apply to accessing user data. Machine learning transforms can access user data encrypted in Amazon S3 using KMS.

TransformEncryption: TransformEncryption } @input structure GetMLTransformsRequest { ///

A paginated token to offset the results.

NextToken: PaginationToken ///

The maximum number of results to return.

MaxResults: PageSize ///

The filter transformation criteria.

Filter: TransformFilterCriteria ///

The sorting criteria.

Sort: TransformSortCriteria } @output structure GetMLTransformsResponse { ///

A list of machine learning transforms.

@required Transforms: TransformList ///

A pagination token, if more results are available.

NextToken: PaginationToken } @input structure GetPartitionIndexesRequest { ///

The catalog ID where the table resides.

CatalogId: CatalogIdString ///

Specifies the name of a database from which you want to retrieve partition indexes.

@required DatabaseName: NameString ///

Specifies the name of a table for which you want to retrieve the partition indexes.

@required TableName: NameString ///

A continuation token, included if this is a continuation call.

NextToken: Token } @output structure GetPartitionIndexesResponse { ///

A list of index descriptors.

PartitionIndexDescriptorList: PartitionIndexDescriptorList ///

A continuation token, present if the current list segment is not the last.

NextToken: Token } @input structure GetPartitionRequest { ///

The ID of the Data Catalog where the partition in question resides. If none is provided, /// the Amazon Web Services account ID is used by default.

CatalogId: CatalogIdString ///

The name of the catalog database where the partition resides.

@required DatabaseName: NameString ///

The name of the partition's table.

@required TableName: NameString ///

The values that define the partition.

@required PartitionValues: ValueStringList } @output structure GetPartitionResponse { ///

The requested information, in the form of a Partition /// object.

Partition: Partition } @input structure GetPartitionsRequest { ///

The ID of the Data Catalog where the partitions in question reside. If none is provided, /// the Amazon Web Services account ID is used by default.

CatalogId: CatalogIdString ///

The name of the catalog database where the partitions reside.

@required DatabaseName: NameString ///

The name of the partitions' table.

@required TableName: NameString ///

An expression that filters the partitions to be returned.

///

The expression uses SQL syntax similar to the SQL WHERE filter clause. The /// SQL statement parser JSQLParser parses the expression.

///

/// Operators: The following are the operators that you can use in the /// Expression API call:

///
///
=
///
///

Checks whether the values of the two operands are equal; if yes, then the condition becomes /// true.

///

Example: Assume 'variable a' holds 10 and 'variable b' holds 20.

///

(a = b) is not true.

///
///
< >
///
///

Checks whether the values of two operands are equal; if the values are not equal, /// then the condition becomes true.

///

Example: (a < > b) is true.

///
///
>
///
///

Checks whether the value of the left operand is greater than the value of the right /// operand; if yes, then the condition becomes true.

///

Example: (a > b) is not true.

///
///
<
///
///

Checks whether the value of the left operand is less than the value of the right /// operand; if yes, then the condition becomes true.

///

Example: (a < b) is true.

///
///
>=
///
///

Checks whether the value of the left operand is greater than or equal to the value /// of the right operand; if yes, then the condition becomes true.

///

Example: (a >= b) is not true.

///
///
<=
///
///

Checks whether the value of the left operand is less than or equal to the value of /// the right operand; if yes, then the condition becomes true.

///

Example: (a <= b) is true.

///
///
AND, OR, IN, BETWEEN, LIKE, NOT, IS NULL
///
///

Logical operators.

///
///
///

/// Supported Partition Key Types: The following are the supported /// partition keys.

///
    ///
  • ///

    /// string ///

    ///
  • ///
  • ///

    /// date ///

    ///
  • ///
  • ///

    /// timestamp ///

    ///
  • ///
  • ///

    /// int ///

    ///
  • ///
  • ///

    /// bigint ///

    ///
  • ///
  • ///

    /// long ///

    ///
  • ///
  • ///

    /// tinyint ///

    ///
  • ///
  • ///

    /// smallint ///

    ///
  • ///
  • ///

    /// decimal ///

    ///
  • ///
///

If an type is encountered that is not valid, an exception is thrown.

///

The following list shows the valid operators on each type. When you define a crawler, the /// partitionKey type is created as a STRING, to be compatible with the catalog /// partitions.

///

/// Sample API Call:

Expression: PredicateString ///

A continuation token, if this is not the first call to retrieve /// these partitions.

NextToken: Token ///

The segment of the table's partitions to scan in this request.

Segment: Segment ///

The maximum number of partitions to return in a single response.

MaxResults: PageSize ///

When true, specifies not returning the partition column schema. Useful when you are interested only in other partition attributes such as partition values or location. This approach avoids the problem of a large response by not returning duplicate data.

ExcludeColumnSchema: BooleanNullable ///

The transaction ID at which to read the partition contents.

TransactionId: TransactionIdString ///

The time as of when to read the partition contents. If not set, the most recent transaction commit time will be used. Cannot be specified along with TransactionId.

QueryAsOfTime: Timestamp } @output structure GetPartitionsResponse { ///

A list of requested partitions.

Partitions: PartitionList ///

A continuation token, if the returned list of partitions does not include the last /// one.

NextToken: Token } @input structure GetPlanRequest { ///

The list of mappings from a source table to target tables.

@required Mapping: MappingList ///

The source table.

@required Source: CatalogEntry ///

The target tables.

Sinks: CatalogEntries ///

The parameters for the mapping.

Location: Location ///

The programming language of the code to perform the mapping.

Language: Language ///

A map to hold additional optional key-value parameters.

///

Currently, these key-value pairs are supported:

///
    ///
  • ///

    /// inferSchema  —  Specifies whether to set inferSchema to true or false for the default script generated by an Glue job. For example, to set inferSchema to true, pass the following key value pair:

    ///

    /// --additional-plan-options-map '{"inferSchema":"true"}' ///

    ///
  • ///
AdditionalPlanOptionsMap: AdditionalPlanOptionsMap } @output structure GetPlanResponse { ///

A Python script to perform the mapping.

PythonScript: PythonScript ///

The Scala code to perform the mapping.

ScalaCode: ScalaCode } @output structure GetRegistryResponse { ///

The name of the registry.

RegistryName: SchemaRegistryNameString ///

The Amazon Resource Name (ARN) of the registry.

RegistryArn: GlueResourceArn ///

A description of the registry.

Description: DescriptionString ///

The status of the registry.

Status: RegistryStatus ///

The date and time the registry was created.

CreatedTime: CreatedTimestamp ///

The date and time the registry was updated.

UpdatedTime: UpdatedTimestamp } @input structure GetResourcePoliciesRequest { ///

A continuation token, if this is a continuation request.

NextToken: Token ///

The maximum size of a list to return.

MaxResults: PageSize } @output structure GetResourcePoliciesResponse { ///

A list of the individual resource policies and the account-level resource policy.

GetResourcePoliciesResponseList: GetResourcePoliciesResponseList ///

A continuation token, if the returned list does not contain the last resource policy available.

NextToken: Token } @input structure GetResourcePolicyRequest { ///

The ARN of the Glue resource for which to retrieve the resource policy. If not /// supplied, the Data Catalog resource policy is returned. Use GetResourcePolicies /// to view all existing resource policies. For more information see Specifying Glue Resource ARNs. ///

ResourceArn: GlueResourceArn } @output structure GetResourcePolicyResponse { ///

Contains the requested policy document, in JSON format.

PolicyInJson: PolicyJsonString ///

Contains the hash value associated with this policy.

PolicyHash: HashString ///

The date and time at which the policy was created.

CreateTime: Timestamp ///

The date and time at which the policy was last updated.

UpdateTime: Timestamp } @output structure GetSchemaByDefinitionResponse { ///

The schema ID of the schema version.

SchemaVersionId: SchemaVersionIdString ///

The Amazon Resource Name (ARN) of the schema.

SchemaArn: GlueResourceArn ///

The data format of the schema definition. Currently AVRO, JSON and PROTOBUF are supported.

DataFormat: DataFormat ///

The status of the schema version.

Status: SchemaVersionStatus ///

The date and time the schema was created.

CreatedTime: CreatedTimestamp } @output structure GetSchemaResponse { ///

The name of the registry.

RegistryName: SchemaRegistryNameString ///

The Amazon Resource Name (ARN) of the registry.

RegistryArn: GlueResourceArn ///

The name of the schema.

SchemaName: SchemaRegistryNameString ///

The Amazon Resource Name (ARN) of the schema.

SchemaArn: GlueResourceArn ///

A description of schema if specified when created

Description: DescriptionString ///

The data format of the schema definition. Currently AVRO, JSON and PROTOBUF are supported.

DataFormat: DataFormat ///

The compatibility mode of the schema.

Compatibility: Compatibility ///

The version number of the checkpoint (the last time the compatibility mode was changed).

SchemaCheckpoint: SchemaCheckpointNumber = 0 ///

The latest version of the schema associated with the returned schema definition.

LatestSchemaVersion: VersionLongNumber = 0 ///

The next version of the schema associated with the returned schema definition.

NextSchemaVersion: VersionLongNumber = 0 ///

The status of the schema.

SchemaStatus: SchemaStatus ///

The date and time the schema was created.

CreatedTime: CreatedTimestamp ///

The date and time the schema was updated.

UpdatedTime: UpdatedTimestamp } @output structure GetSchemaVersionResponse { ///

The SchemaVersionId of the schema version.

SchemaVersionId: SchemaVersionIdString ///

The schema definition for the schema ID.

SchemaDefinition: SchemaDefinitionString ///

The data format of the schema definition. Currently AVRO, JSON and PROTOBUF are supported.

DataFormat: DataFormat ///

The Amazon Resource Name (ARN) of the schema.

SchemaArn: GlueResourceArn ///

The version number of the schema.

VersionNumber: VersionLongNumber = 0 ///

The status of the schema version.

Status: SchemaVersionStatus ///

The date and time the schema version was created.

CreatedTime: CreatedTimestamp } @output structure GetSchemaVersionsDiffResponse { ///

The difference between schemas as a string in JsonPatch format.

Diff: SchemaDefinitionDiff } @input structure GetSecurityConfigurationRequest { ///

The name of the security configuration to retrieve.

@required Name: NameString } @output structure GetSecurityConfigurationResponse { ///

The requested security configuration.

SecurityConfiguration: SecurityConfiguration } @input structure GetSecurityConfigurationsRequest { ///

The maximum number of results to return.

MaxResults: PageSize ///

A continuation token, if this is a continuation call.

NextToken: GenericString } @output structure GetSecurityConfigurationsResponse { ///

A list of security configurations.

SecurityConfigurations: SecurityConfigurationList ///

A continuation token, if there are more security /// configurations to return.

NextToken: GenericString } @input structure GetSessionRequest { ///

The ID of the session.

@required Id: NameString ///

The origin of the request.

RequestOrigin: OrchestrationNameString } @output structure GetSessionResponse { ///

The session object is returned in the response.

Session: Session } @input structure GetStatementRequest { ///

The Session ID of the statement.

@required SessionId: NameString ///

The Id of the statement.

@required Id: IntegerValue = 0 ///

The origin of the request.

RequestOrigin: OrchestrationNameString } @output structure GetStatementResponse { ///

Returns the statement.

Statement: Statement } @input structure GetTableRequest { ///

The ID of the Data Catalog where the table resides. If none is provided, the Amazon Web Services account /// ID is used by default.

CatalogId: CatalogIdString ///

The name of the database in the catalog in which the table resides. /// For Hive compatibility, this name is entirely lowercase.

@required DatabaseName: NameString ///

The name of the table for which to retrieve the definition. For Hive /// compatibility, this name is entirely lowercase.

@required Name: NameString ///

The transaction ID at which to read the table contents.

TransactionId: TransactionIdString ///

The time as of when to read the table contents. If not set, the most recent transaction commit time will be used. Cannot be specified along with TransactionId.

QueryAsOfTime: Timestamp } @output structure GetTableResponse { ///

The Table object that defines the specified table.

Table: Table } @input structure GetTablesRequest { ///

The ID of the Data Catalog where the tables reside. If none is provided, the Amazon Web Services account /// ID is used by default.

CatalogId: CatalogIdString ///

The database in the catalog whose tables to list. For Hive /// compatibility, this name is entirely lowercase.

@required DatabaseName: NameString ///

A regular expression pattern. If present, only those tables /// whose names match the pattern are returned.

Expression: FilterString ///

A continuation token, included if this is a continuation call.

NextToken: Token ///

The maximum number of tables to return in a single response.

MaxResults: CatalogGetterPageSize ///

The transaction ID at which to read the table contents.

TransactionId: TransactionIdString ///

The time as of when to read the table contents. If not set, the most recent transaction commit time will be used. Cannot be specified along with TransactionId.

QueryAsOfTime: Timestamp } @output structure GetTablesResponse { ///

A list of the requested Table objects.

TableList: TableList ///

A continuation token, present if the current list segment is /// not the last.

NextToken: Token } @input structure GetTableVersionRequest { ///

The ID of the Data Catalog where the tables reside. If none is provided, the Amazon Web Services account /// ID is used by default.

CatalogId: CatalogIdString ///

The database in the catalog in which the table resides. For Hive /// compatibility, this name is entirely lowercase.

@required DatabaseName: NameString ///

The name of the table. For Hive compatibility, /// this name is entirely lowercase.

@required TableName: NameString ///

The ID value of the table version to be retrieved. A VersionID is a string representation of an integer. Each version is incremented by 1.

VersionId: VersionString } @output structure GetTableVersionResponse { ///

The requested table version.

TableVersion: TableVersion } @input structure GetTableVersionsRequest { ///

The ID of the Data Catalog where the tables reside. If none is provided, the Amazon Web Services account /// ID is used by default.

CatalogId: CatalogIdString ///

The database in the catalog in which the table resides. For Hive /// compatibility, this name is entirely lowercase.

@required DatabaseName: NameString ///

The name of the table. For Hive /// compatibility, this name is entirely lowercase.

@required TableName: NameString ///

A continuation token, if this is not the first call.

NextToken: Token ///

The maximum number of table versions to return in one response.

MaxResults: CatalogGetterPageSize } @output structure GetTableVersionsResponse { ///

A list of strings identifying available versions of the /// specified table.

TableVersions: GetTableVersionsList ///

A continuation token, if the list of available versions does /// not include the last one.

NextToken: Token } @input structure GetTagsRequest { ///

The Amazon Resource Name (ARN) of the resource for which to retrieve tags.

@required ResourceArn: GlueResourceArn } @output structure GetTagsResponse { ///

The requested tags.

Tags: TagsMap } @input structure GetTriggerRequest { ///

The name of the trigger to retrieve.

@required Name: NameString } @output structure GetTriggerResponse { ///

The requested trigger definition.

Trigger: Trigger } @input structure GetTriggersRequest { ///

A continuation token, if this is a continuation call.

NextToken: GenericString ///

The name of the job to retrieve triggers for. The trigger that can start this job is /// returned, and if there is no such trigger, all triggers are returned.

DependentJobName: NameString ///

The maximum size of the response.

MaxResults: PageSize } @output structure GetTriggersResponse { ///

A list of triggers for the specified job.

Triggers: TriggerList ///

A continuation token, if not all the requested triggers /// have yet been returned.

NextToken: GenericString } @input structure GetUnfilteredPartitionMetadataRequest { ///

The catalog ID where the partition resides.

@required CatalogId: CatalogIdString ///

(Required) Specifies the name of a database that contains the partition.

@required DatabaseName: NameString ///

(Required) Specifies the name of a table that contains the partition.

@required TableName: NameString ///

(Required) A list of partition key values.

@required PartitionValues: ValueStringList ///

A structure containing Lake Formation audit context information.

AuditContext: AuditContext ///

(Required) A list of supported permission types.

@required SupportedPermissionTypes: PermissionTypeList } @output structure GetUnfilteredPartitionMetadataResponse { ///

A Partition object containing the partition metadata.

Partition: Partition ///

A list of column names that the user has been granted access to.

AuthorizedColumns: NameStringList ///

A Boolean value that indicates whether the partition location is registered /// with Lake Formation.

IsRegisteredWithLakeFormation: Boolean = false } @input structure GetUnfilteredPartitionsMetadataRequest { ///

The ID of the Data Catalog where the partitions in question reside. If none is provided, /// the AWS account ID is used by default.

@required CatalogId: CatalogIdString ///

The name of the catalog database where the partitions reside.

@required DatabaseName: NameString ///

The name of the table that contains the partition.

@required TableName: NameString ///

An expression that filters the partitions to be returned.

///

The expression uses SQL syntax similar to the SQL WHERE filter clause. The /// SQL statement parser JSQLParser parses the expression.

///

/// Operators: The following are the operators that you can use in the /// Expression API call:

///
///
=
///
///

Checks whether the values of the two operands are equal; if yes, then the condition becomes /// true.

///

Example: Assume 'variable a' holds 10 and 'variable b' holds 20.

///

(a = b) is not true.

///
///
< >
///
///

Checks whether the values of two operands are equal; if the values are not equal, /// then the condition becomes true.

///

Example: (a < > b) is true.

///
///
>
///
///

Checks whether the value of the left operand is greater than the value of the right /// operand; if yes, then the condition becomes true.

///

Example: (a > b) is not true.

///
///
<
///
///

Checks whether the value of the left operand is less than the value of the right /// operand; if yes, then the condition becomes true.

///

Example: (a < b) is true.

///
///
>=
///
///

Checks whether the value of the left operand is greater than or equal to the value /// of the right operand; if yes, then the condition becomes true.

///

Example: (a >= b) is not true.

///
///
<=
///
///

Checks whether the value of the left operand is less than or equal to the value of /// the right operand; if yes, then the condition becomes true.

///

Example: (a <= b) is true.

///
///
AND, OR, IN, BETWEEN, LIKE, NOT, IS NULL
///
///

Logical operators.

///
///
///

/// Supported Partition Key Types: The following are the supported /// partition keys.

///
    ///
  • ///

    /// string ///

    ///
  • ///
  • ///

    /// date ///

    ///
  • ///
  • ///

    /// timestamp ///

    ///
  • ///
  • ///

    /// int ///

    ///
  • ///
  • ///

    /// bigint ///

    ///
  • ///
  • ///

    /// long ///

    ///
  • ///
  • ///

    /// tinyint ///

    ///
  • ///
  • ///

    /// smallint ///

    ///
  • ///
  • ///

    /// decimal ///

    ///
  • ///
///

If an type is encountered that is not valid, an exception is thrown.

Expression: PredicateString ///

A structure containing Lake Formation audit context information.

AuditContext: AuditContext ///

A list of supported permission types.

@required SupportedPermissionTypes: PermissionTypeList ///

A continuation token, if this is not the first call to retrieve /// these partitions.

NextToken: Token ///

The segment of the table's partitions to scan in this request.

Segment: Segment ///

The maximum number of partitions to return in a single response.

MaxResults: PageSize } @output structure GetUnfilteredPartitionsMetadataResponse { ///

A list of requested partitions.

UnfilteredPartitions: UnfilteredPartitionList ///

A continuation token, if the returned list of partitions does not include the last /// one.

NextToken: Token } @input structure GetUnfilteredTableMetadataRequest { ///

The catalog ID where the table resides.

@required CatalogId: CatalogIdString ///

(Required) Specifies the name of a database that contains the table.

@required DatabaseName: NameString ///

(Required) Specifies the name of a table for which you are requesting metadata.

@required Name: NameString ///

A structure containing Lake Formation audit context information.

AuditContext: AuditContext ///

(Required) A list of supported permission types.

@required SupportedPermissionTypes: PermissionTypeList } @output structure GetUnfilteredTableMetadataResponse { ///

A Table object containing the table metadata.

Table: Table ///

A list of column names that the user has been granted access to.

AuthorizedColumns: NameStringList ///

A Boolean value that indicates whether the partition location is registered /// with Lake Formation.

IsRegisteredWithLakeFormation: Boolean = false ///

A list of column row filters.

CellFilters: ColumnRowFilterList } @input structure GetUserDefinedFunctionRequest { ///

The ID of the Data Catalog where the function to be retrieved is located. If none is /// provided, the Amazon Web Services account ID is used by default.

CatalogId: CatalogIdString ///

The name of the catalog database where the function is located.

@required DatabaseName: NameString ///

The name of the function.

@required FunctionName: NameString } @output structure GetUserDefinedFunctionResponse { ///

The requested function definition.

UserDefinedFunction: UserDefinedFunction } @input structure GetUserDefinedFunctionsRequest { ///

The ID of the Data Catalog where the functions to be retrieved are located. If none is /// provided, the Amazon Web Services account ID is used by default.

CatalogId: CatalogIdString ///

The name of the catalog database where the functions are located. If none is provided, functions from all the /// databases across the catalog will be returned.

DatabaseName: NameString ///

An optional function-name pattern string that filters the function /// definitions returned.

@required Pattern: NameString ///

A continuation token, if this is a continuation call.

NextToken: Token ///

The maximum number of functions to return in one response.

MaxResults: CatalogGetterPageSize } @output structure GetUserDefinedFunctionsResponse { ///

A list of requested function definitions.

UserDefinedFunctions: UserDefinedFunctionList ///

A continuation token, if the list of functions returned does /// not include the last requested function.

NextToken: Token } @input structure GetWorkflowRequest { ///

The name of the workflow to retrieve.

@required Name: NameString ///

Specifies whether to include a graph when returning the workflow resource metadata.

IncludeGraph: NullableBoolean } @output structure GetWorkflowResponse { ///

The resource metadata for the workflow.

Workflow: Workflow } @input structure GetWorkflowRunPropertiesRequest { ///

Name of the workflow which was run.

@required Name: NameString ///

The ID of the workflow run whose run properties should be returned.

@required RunId: IdString } @output structure GetWorkflowRunPropertiesResponse { ///

The workflow run properties which were set during the specified run.

RunProperties: WorkflowRunProperties } @input structure GetWorkflowRunRequest { ///

Name of the workflow being run.

@required Name: NameString ///

The ID of the workflow run.

@required RunId: IdString ///

Specifies whether to include the workflow graph in response or not.

IncludeGraph: NullableBoolean } @output structure GetWorkflowRunResponse { ///

The requested workflow run metadata.

Run: WorkflowRun } @input structure GetWorkflowRunsRequest { ///

Name of the workflow whose metadata of runs should be returned.

@required Name: NameString ///

Specifies whether to include the workflow graph in response or not.

IncludeGraph: NullableBoolean ///

The maximum size of the response.

NextToken: GenericString ///

The maximum number of workflow runs to be included in the response.

MaxResults: PageSize } @output structure GetWorkflowRunsResponse { ///

A list of workflow run metadata objects.

Runs: WorkflowRuns ///

A continuation token, if not all requested workflow runs have been returned.

NextToken: GenericString } ///

An encryption operation failed.

@error("client") structure GlueEncryptionException { ///

The message describing the problem.

Message: MessageString } ///

A structure for returning a resource policy.

structure GluePolicy { ///

Contains the requested policy document, in JSON format.

PolicyInJson: PolicyJsonString ///

Contains the hash value associated with this policy.

PolicyHash: HashString ///

The date and time at which the policy was created.

CreateTime: Timestamp ///

The date and time at which the policy was last updated.

UpdateTime: Timestamp } ///

Specifies a user-defined schema when a schema cannot be determined by Glue.

structure GlueSchema { ///

Specifies the column definitions that make up a Glue schema.

Columns: GlueStudioSchemaColumnList } ///

Specifies a single column in a Glue schema definition.

structure GlueStudioSchemaColumn { ///

The name of the column in the Glue Studio schema.

@required Name: GlueStudioColumnNameString ///

The hive type for this column in the Glue Studio schema.

Type: ColumnTypeString } ///

The database and table in the Glue Data Catalog that is used for input or output data.

structure GlueTable { ///

A database name in the Glue Data Catalog.

@required DatabaseName: NameString ///

A table name in the Glue Data Catalog.

@required TableName: NameString ///

A unique identifier for the Glue Data Catalog.

CatalogId: NameString ///

The name of the connection to the Glue Data Catalog.

ConnectionName: NameString ///

Additional options for the table. Currently there are two keys supported:

///
    ///
  • ///

    /// pushDownPredicate: to filter on partitions without having to list and read all the files in your dataset.

    ///
  • ///
  • ///

    /// catalogPartitionPredicate: to use server-side partition pruning using partition indexes in the Glue Data Catalog.

    ///
  • ///
AdditionalOptions: GlueTableAdditionalOptions } ///

Specifies the data store in the governed Glue Data Catalog.

structure GovernedCatalogSource { ///

The name of the data store.

@required Name: NodeName ///

The database to read from.

@required Database: EnclosedInStringProperty ///

The database table to read from.

@required Table: EnclosedInStringProperty ///

Partitions satisfying this predicate are deleted. Files within the retention period in these partitions are not deleted. Set to "" – empty by default.

PartitionPredicate: EnclosedInStringProperty ///

Specifies additional connection options.

AdditionalOptions: S3SourceAdditionalOptions } ///

Specifies a data target that writes to Amazon S3 using the Glue Data Catalog.

structure GovernedCatalogTarget { ///

The name of the data target.

@required Name: NodeName ///

The nodes that are inputs to the data target.

@required Inputs: OneInput ///

Specifies native partitioning using a sequence of keys.

PartitionKeys: GlueStudioPathList ///

The name of the table in the database to write to.

@required Table: EnclosedInStringProperty ///

The name of the database to write to.

@required Database: EnclosedInStringProperty ///

A policy that specifies update behavior for the governed catalog.

SchemaChangePolicy: CatalogSchemaChangePolicy } ///

A classifier that uses grok patterns.

structure GrokClassifier { ///

The name of the classifier.

@required Name: NameString ///

An identifier of the data format that the classifier matches, such as Twitter, JSON, Omniture logs, and /// so on.

@required Classification: Classification ///

The time that this classifier was registered.

CreationTime: Timestamp ///

The time that this classifier was last updated.

LastUpdated: Timestamp ///

The version of this classifier.

Version: VersionId = 0 ///

The grok pattern applied to a data store by this classifier. /// For more information, see built-in patterns in Writing Custom Classifiers.

@required GrokPattern: GrokPattern ///

Optional custom grok patterns defined by this classifier. /// For more information, see custom patterns in Writing Custom Classifiers.

CustomPatterns: CustomPatterns } ///

Specifies an Apache Hudi data source.

structure HudiTarget { ///

An array of Amazon S3 location strings for Hudi, each indicating the root folder with which the metadata files for a Hudi table resides. The Hudi folder may be located in a child folder of the root folder.

///

The crawler will scan all folders underneath a path for a Hudi folder.

Paths: PathList ///

The name of the connection to use to connect to the Hudi target. If your Hudi files are stored in buckets that require VPC authorization, you can set their connection properties here.

ConnectionName: ConnectionName ///

A list of glob patterns used to exclude from the crawl. /// For more information, see Catalog Tables with a Crawler.

Exclusions: PathList ///

The maximum depth of Amazon S3 paths that the crawler can traverse to discover the Hudi metadata folder in your Amazon S3 path. Used to limit the crawler run time.

MaximumTraversalDepth: NullableInteger } ///

A structure that defines an Apache Iceberg metadata table to create in the catalog.

structure IcebergInput { ///

A required metadata operation. Can only be set to CREATE.

@required MetadataOperation: MetadataOperation ///

The table version for the Iceberg table. Defaults to 2.

Version: VersionString } ///

Specifies an Apache Iceberg data source where Iceberg tables are stored in Amazon S3.

structure IcebergTarget { ///

One or more Amazon S3 paths that contains Iceberg metadata folders as s3://bucket/prefix.

Paths: PathList ///

The name of the connection to use to connect to the Iceberg target.

ConnectionName: ConnectionName ///

A list of glob patterns used to exclude from the crawl. /// For more information, see Catalog Tables with a Crawler.

Exclusions: PathList ///

The maximum depth of Amazon S3 paths that the crawler can traverse to discover the Iceberg metadata folder in your Amazon S3 path. Used to limit the crawler run time.

MaximumTraversalDepth: NullableInteger } ///

The same unique identifier was associated with two different records.

@error("client") structure IdempotentParameterMismatchException { ///

A message describing the problem.

Message: MessageString } ///

The blueprint is in an invalid state to perform a requested operation.

@error("client") structure IllegalBlueprintStateException { ///

A message describing the problem.

Message: MessageString } ///

The session is in an invalid state to perform a requested operation.

@error("client") structure IllegalSessionStateException { ///

A message describing the problem.

Message: MessageString } ///

The workflow is in an invalid state to perform a requested operation.

@error("client") structure IllegalWorkflowStateException { ///

A message describing the problem.

Message: MessageString } @input structure ImportCatalogToGlueRequest { ///

The ID of the catalog to import. Currently, this should be the Amazon Web Services account ID.

CatalogId: CatalogIdString } @output structure ImportCatalogToGlueResponse {} ///

Specifies configuration properties for an importing labels task run.

structure ImportLabelsTaskRunProperties { ///

The Amazon Simple Storage Service (Amazon S3) path from where you will import the /// labels.

InputS3Path: UriString ///

Indicates whether to overwrite your existing labels.

Replace: ReplaceBoolean = false } ///

An internal service error occurred.

@error("server") structure InternalServiceException { ///

A message describing the problem.

Message: MessageString } ///

The input provided was not valid.

@error("client") structure InvalidInputException { ///

A message describing the problem.

Message: MessageString ///

Indicates whether or not the exception relates to a federated source.

FromFederationSource: NullableBoolean } ///

An error that indicates your data is in an invalid state.

@error("client") structure InvalidStateException { ///

A message describing the problem.

Message: MessageString } ///

Additional connection options for the connector.

structure JDBCConnectorOptions { ///

Extra condition clause to filter data from source. For example:

///

/// BillingCity='Mountain View' ///

///

When using a query instead of a table name, you should validate that the query works with the specified filterPredicate.

FilterPredicate: EnclosedInStringProperty ///

The name of an integer column that is used for partitioning. This option works only when it's included with lowerBound, upperBound, and numPartitions. This option works the same way as in the Spark SQL JDBC reader.

PartitionColumn: EnclosedInStringProperty ///

The minimum value of partitionColumn that is used to decide partition stride.

LowerBound: BoxedNonNegativeLong ///

The maximum value of partitionColumn that is used to decide partition stride.

UpperBound: BoxedNonNegativeLong ///

The number of partitions. This value, along with lowerBound (inclusive) and upperBound (exclusive), form partition strides for generated WHERE clause expressions that are used to split the partitionColumn.

NumPartitions: BoxedNonNegativeLong ///

The name of the job bookmark keys on which to sort.

JobBookmarkKeys: EnclosedInStringProperties ///

Specifies an ascending or descending sort order.

JobBookmarkKeysSortOrder: EnclosedInStringProperty ///

Custom data type mapping that builds a mapping from a JDBC data type to an Glue data type. For example, the option "dataTypeMapping":{"FLOAT":"STRING"} maps data fields of JDBC type FLOAT into the Java String type by calling the ResultSet.getString() method of the driver, and uses it to build the Glue record. The ResultSet object is implemented by each driver, so the behavior is specific to the driver you use. Refer to the documentation for your JDBC driver to understand how the driver performs the conversions.

DataTypeMapping: JDBCDataTypeMapping } ///

Specifies a connector to a JDBC data source.

structure JDBCConnectorSource { ///

The name of the data source.

@required Name: NodeName ///

The name of the connection that is associated with the connector.

@required ConnectionName: EnclosedInStringProperty ///

The name of a connector that assists with accessing the data store in Glue Studio.

@required ConnectorName: EnclosedInStringProperty ///

The type of connection, such as marketplace.jdbc or custom.jdbc, designating a connection to a JDBC data store.

@required ConnectionType: EnclosedInStringProperty ///

Additional connection options for the connector.

AdditionalOptions: JDBCConnectorOptions ///

The name of the table in the data source.

ConnectionTable: EnclosedInStringPropertyWithQuote ///

The table or SQL query to get the data from. You can specify either ConnectionTable or query, but not both.

Query: SqlQuery ///

Specifies the data schema for the custom JDBC source.

OutputSchemas: GlueSchemas } ///

Specifies a data target that writes to Amazon S3 in Apache Parquet columnar storage.

structure JDBCConnectorTarget { ///

The name of the data target.

@required Name: NodeName ///

The nodes that are inputs to the data target.

@required Inputs: OneInput ///

The name of the connection that is associated with the connector.

@required ConnectionName: EnclosedInStringProperty ///

The name of the table in the data target.

@required ConnectionTable: EnclosedInStringPropertyWithQuote ///

The name of a connector that will be used.

@required ConnectorName: EnclosedInStringProperty ///

The type of connection, such as marketplace.jdbc or custom.jdbc, designating a connection to a JDBC data target.

@required ConnectionType: EnclosedInStringProperty ///

Additional connection options for the connector.

AdditionalOptions: AdditionalOptions ///

Specifies the data schema for the JDBC target.

OutputSchemas: GlueSchemas } ///

Specifies a JDBC data store to crawl.

structure JdbcTarget { ///

The name of the connection to use to connect to the JDBC target.

ConnectionName: ConnectionName ///

The path of the JDBC target.

Path: Path ///

A list of glob patterns used to exclude from the crawl. /// For more information, see Catalog Tables with a Crawler.

Exclusions: PathList ///

Specify a value of RAWTYPES or COMMENTS to enable additional metadata in table responses. RAWTYPES provides the native-level datatype. COMMENTS provides comments associated with a column or table in the database.

///

If you do not need additional metadata, keep the field empty.

EnableAdditionalMetadata: EnableAdditionalMetadata } ///

Specifies a job definition.

structure Job { ///

The name you assign to this job definition.

Name: NameString ///

A description of the job.

Description: DescriptionString ///

This field is reserved for future use.

LogUri: UriString ///

The name or Amazon Resource Name (ARN) of the IAM role associated with this job.

Role: RoleString ///

The time and date that this job definition was created.

CreatedOn: TimestampValue ///

The last point in time when this job definition was modified.

LastModifiedOn: TimestampValue ///

An ExecutionProperty specifying the maximum number of concurrent runs allowed /// for this job.

ExecutionProperty: ExecutionProperty ///

The JobCommand that runs this job.

Command: JobCommand ///

The default arguments for every run of this job, specified as name-value pairs.

///

You can specify arguments here that your own job-execution script /// consumes, as well as arguments that Glue itself consumes.

///

Job arguments may be logged. Do not pass plaintext secrets as arguments. Retrieve secrets /// from a Glue Connection, Secrets Manager or other secret management /// mechanism if you intend to keep them within the Job.

///

For information about how to specify and consume your own Job arguments, see the Calling Glue APIs in Python topic in the developer guide.

///

For information about the arguments you can provide to this field when configuring Spark jobs, /// see the Special Parameters Used by Glue topic in the developer guide.

///

For information about the arguments you can provide to this field when configuring Ray /// jobs, see Using /// job parameters in Ray jobs in the developer guide.

DefaultArguments: GenericMap ///

Arguments for this job that are not overridden when providing job arguments /// in a job run, specified as name-value pairs.

NonOverridableArguments: GenericMap ///

The connections used for this job.

Connections: ConnectionsList ///

The maximum number of times to retry this job after a JobRun fails.

MaxRetries: MaxRetries = 0 ///

This field is deprecated. Use MaxCapacity instead.

///

The number of Glue data processing units (DPUs) allocated to runs of this job. You can /// allocate a minimum of 2 DPUs; the default is 10. A DPU is a relative measure of processing /// power that consists of 4 vCPUs of compute capacity and 16 GB of memory. For more information, /// see the Glue pricing /// page.

///

@deprecated( message: "This property is deprecated, use MaxCapacity instead." ) AllocatedCapacity: IntegerValue = 0 ///

The job timeout in minutes. This is the maximum time that a job run /// can consume resources before it is terminated and enters TIMEOUT /// status. The default is 2,880 minutes (48 hours).

Timeout: Timeout ///

For Glue version 1.0 or earlier jobs, using the standard worker type, the number of /// Glue data processing units (DPUs) that can be allocated when this job runs. A DPU is /// a relative measure of processing power that consists of 4 vCPUs of compute capacity and 16 GB /// of memory. For more information, see the /// Glue pricing page.

///

For Glue version 2.0 or later jobs, you cannot specify a Maximum capacity. /// Instead, you should specify a Worker type and the Number of workers.

///

Do not set MaxCapacity if using WorkerType and NumberOfWorkers.

///

The value that can be allocated for MaxCapacity depends on whether you are /// running a Python shell job, an Apache Spark ETL job, or an Apache Spark streaming ETL /// job:

///
    ///
  • ///

    When you specify a Python shell job (JobCommand.Name="pythonshell"), you can /// allocate either 0.0625 or 1 DPU. The default is 0.0625 DPU.

    ///
  • ///
  • ///

    When you specify an Apache Spark ETL job (JobCommand.Name="glueetl") or Apache /// Spark streaming ETL job (JobCommand.Name="gluestreaming"), you can allocate from 2 to 100 DPUs. /// The default is 10 DPUs. This job type cannot have a fractional DPU allocation.

    ///
  • ///
MaxCapacity: NullableDouble ///

The type of predefined worker that is allocated when a job runs. Accepts a value of /// G.1X, G.2X, G.4X, G.8X or G.025X for Spark jobs. Accepts the value Z.2X for Ray jobs.

///
    ///
  • ///

    For the G.1X worker type, each worker maps to 1 DPU (4 vCPUs, 16 GB of memory) with 84GB disk (approximately 34GB free), and provides 1 executor per worker. We recommend this worker type for workloads such as data transforms, joins, and queries, to offers a scalable and cost effective way to run most jobs.

    ///
  • ///
  • ///

    For the G.2X worker type, each worker maps to 2 DPU (8 vCPUs, 32 GB of memory) with 128GB disk (approximately 77GB free), and provides 1 executor per worker. We recommend this worker type for workloads such as data transforms, joins, and queries, to offers a scalable and cost effective way to run most jobs.

    ///
  • ///
  • ///

    For the G.4X worker type, each worker maps to 4 DPU (16 vCPUs, 64 GB of memory) with 256GB disk (approximately 235GB free), and provides 1 executor per worker. We recommend this worker type for jobs whose workloads contain your most demanding transforms, aggregations, joins, and queries. This worker type is available only for Glue version 3.0 or later Spark ETL jobs in the following Amazon Web Services Regions: US East (Ohio), US East (N. Virginia), US West (Oregon), Asia Pacific (Singapore), Asia Pacific (Sydney), Asia Pacific (Tokyo), Canada (Central), Europe (Frankfurt), Europe (Ireland), and Europe (Stockholm).

    ///
  • ///
  • ///

    For the G.8X worker type, each worker maps to 8 DPU (32 vCPUs, 128 GB of memory) with 512GB disk (approximately 487GB free), and provides 1 executor per worker. We recommend this worker type for jobs whose workloads contain your most demanding transforms, aggregations, joins, and queries. This worker type is available only for Glue version 3.0 or later Spark ETL jobs, in the same Amazon Web Services Regions as supported for the G.4X worker type.

    ///
  • ///
  • ///

    For the G.025X worker type, each worker maps to 0.25 DPU (2 vCPUs, 4 GB of memory) with 84GB disk (approximately 34GB free), and provides 1 executor per worker. We recommend this worker type for low volume streaming jobs. This worker type is only available for Glue version 3.0 streaming jobs.

    ///
  • ///
  • ///

    For the Z.2X worker type, each worker maps to 2 M-DPU (8vCPUs, 64 GB of memory) with 128 GB disk (approximately 120GB free), and provides up to 8 Ray workers based on the autoscaler.

    ///
  • ///
WorkerType: WorkerType ///

The number of workers of a defined workerType that are allocated when a job runs.

NumberOfWorkers: NullableInteger ///

The name of the SecurityConfiguration structure to be used with this /// job.

SecurityConfiguration: NameString ///

Specifies configuration properties of a job notification.

NotificationProperty: NotificationProperty ///

In Spark jobs, GlueVersion determines the versions of Apache Spark and Python /// that Glue available in a job. The Python version indicates the version /// supported for jobs of type Spark.

///

Ray jobs should set GlueVersion to 4.0 or greater. However, /// the versions of Ray, Python and additional libraries available in your Ray job are determined /// by the Runtime parameter of the Job command.

///

For more information about the available Glue versions and corresponding /// Spark and Python versions, see Glue version in the developer /// guide.

///

Jobs that are created without specifying a Glue version default to Glue 0.9.

GlueVersion: GlueVersionString ///

The representation of a directed acyclic graph on which both the Glue Studio visual component and Glue Studio code generation is based.

CodeGenConfigurationNodes: CodeGenConfigurationNodes ///

Indicates whether the job is run with a standard or flexible execution class. The standard execution class is ideal for time-sensitive workloads that require fast job startup and dedicated resources.

///

The flexible execution class is appropriate for time-insensitive jobs whose start and completion times may vary.

///

Only jobs with Glue version 3.0 and above and command type glueetl will be allowed to set ExecutionClass to FLEX. The flexible execution class is available for Spark jobs.

ExecutionClass: ExecutionClass ///

The details for a source control configuration for a job, allowing synchronization of job artifacts to or from a remote repository.

SourceControlDetails: SourceControlDetails } ///

Defines a point that a job can resume processing.

structure JobBookmarkEntry { ///

The name of the job in question.

JobName: JobName ///

The version of the job.

Version: IntegerValue = 0 ///

The run ID number.

Run: IntegerValue = 0 ///

The attempt ID number.

Attempt: IntegerValue = 0 ///

The unique run identifier associated with the previous job run.

PreviousRunId: RunId ///

The run ID number.

RunId: RunId ///

The bookmark itself.

JobBookmark: JsonValue } ///

Specifies how job bookmark data should be encrypted.

structure JobBookmarksEncryption { ///

The encryption mode to use for job bookmarks data.

JobBookmarksEncryptionMode: JobBookmarksEncryptionMode ///

The Amazon Resource Name (ARN) of the KMS key to be used to encrypt the data.

KmsKeyArn: KmsKeyArn } ///

Specifies code that runs when a job is run.

structure JobCommand { ///

The name of the job command. For an Apache Spark ETL job, this must be /// glueetl. For a Python shell job, it must be pythonshell. /// For an Apache Spark streaming ETL job, this must be gluestreaming. For a Ray job, /// this must be glueray.

Name: GenericString ///

Specifies the Amazon Simple Storage Service (Amazon S3) path to a script that runs a /// job.

ScriptLocation: ScriptLocationString ///

The Python version being used to run a Python shell job. Allowed values are 2 or 3.

PythonVersion: PythonVersionString ///

In Ray jobs, Runtime is used to specify the versions of Ray, Python and additional /// libraries available in your environment. This field is not used in other job types. For /// supported runtime environment values, see Working with Ray jobs /// in the Glue Developer Guide.

Runtime: RuntimeNameString } ///

The details of a Job node present in the workflow.

structure JobNodeDetails { ///

The information for the job runs represented by the job node.

JobRuns: JobRunList } ///

Contains information about a job run.

structure JobRun { ///

The ID of this job run.

Id: IdString ///

The number of the attempt to run this job.

Attempt: AttemptCount = 0 ///

The ID of the previous run of this job. For example, the JobRunId specified /// in the StartJobRun action.

PreviousRunId: IdString ///

The name of the trigger that started this job run.

TriggerName: NameString ///

The name of the job definition being used in this run.

JobName: NameString ///

The date and time at which this job run was started.

StartedOn: TimestampValue ///

The last time that this job run was modified.

LastModifiedOn: TimestampValue ///

The date and time that this job run completed.

CompletedOn: TimestampValue ///

The current state of the job run. For more information about the statuses of jobs that have terminated abnormally, see Glue Job Run Statuses.

JobRunState: JobRunState ///

The job arguments associated with this run. For this job run, they replace the default /// arguments set in the job definition itself.

///

You can specify arguments here that your own job-execution script /// consumes, as well as arguments that Glue itself consumes.

///

Job arguments may be logged. Do not pass plaintext secrets as arguments. Retrieve secrets /// from a Glue Connection, Secrets Manager or other secret management /// mechanism if you intend to keep them within the Job.

///

For information about how to specify and consume your own Job arguments, see the Calling Glue APIs in Python topic in the developer guide.

///

For information about the arguments you can provide to this field when configuring Spark jobs, /// see the Special Parameters Used by Glue topic in the developer guide.

///

For information about the arguments you can provide to this field when configuring Ray /// jobs, see Using /// job parameters in Ray jobs in the developer guide.

Arguments: GenericMap ///

An error message associated with this job run.

ErrorMessage: ErrorString ///

A list of predecessors to this job run.

PredecessorRuns: PredecessorList ///

This field is deprecated. Use MaxCapacity instead.

///

The number of Glue data processing units (DPUs) allocated to this JobRun. /// From 2 to 100 DPUs can be allocated; the default is 10. A DPU is a relative measure /// of processing power that consists of 4 vCPUs of compute capacity and 16 GB of memory. /// For more information, see the Glue /// pricing page.

@deprecated( message: "This property is deprecated, use MaxCapacity instead." ) AllocatedCapacity: IntegerValue = 0 ///

The amount of time (in seconds) that the job run consumed resources.

ExecutionTime: ExecutionTime = 0 ///

The JobRun timeout in minutes. This is the maximum time that a job run can /// consume resources before it is terminated and enters TIMEOUT status. This value overrides the timeout value set in the parent job.

///

Streaming jobs do not have a timeout. The default for non-streaming jobs is 2,880 minutes (48 hours).

Timeout: Timeout ///

For Glue version 1.0 or earlier jobs, using the standard worker type, the number of /// Glue data processing units (DPUs) that can be allocated when this job runs. A DPU is /// a relative measure of processing power that consists of 4 vCPUs of compute capacity and 16 GB /// of memory. For more information, see the /// Glue pricing page.

///

For Glue version 2.0+ jobs, you cannot specify a Maximum capacity. /// Instead, you should specify a Worker type and the Number of workers.

///

Do not set MaxCapacity if using WorkerType and NumberOfWorkers.

///

The value that can be allocated for MaxCapacity depends on whether you are /// running a Python shell job, an Apache Spark ETL job, or an Apache Spark streaming ETL /// job:

///
    ///
  • ///

    When you specify a Python shell job (JobCommand.Name="pythonshell"), you can /// allocate either 0.0625 or 1 DPU. The default is 0.0625 DPU.

    ///
  • ///
  • ///

    When you specify an Apache Spark ETL job (JobCommand.Name="glueetl") or Apache /// Spark streaming ETL job (JobCommand.Name="gluestreaming"), you can allocate from 2 to 100 DPUs. /// The default is 10 DPUs. This job type cannot have a fractional DPU allocation.

    ///
  • ///
MaxCapacity: NullableDouble ///

The type of predefined worker that is allocated when a job runs. Accepts a value of /// G.1X, G.2X, G.4X, G.8X or G.025X for Spark jobs. Accepts the value Z.2X for Ray jobs.

///
    ///
  • ///

    For the G.1X worker type, each worker maps to 1 DPU (4 vCPUs, 16 GB of memory) with 84GB disk (approximately 34GB free), and provides 1 executor per worker. We recommend this worker type for workloads such as data transforms, joins, and queries, to offers a scalable and cost effective way to run most jobs.

    ///
  • ///
  • ///

    For the G.2X worker type, each worker maps to 2 DPU (8 vCPUs, 32 GB of memory) with 128GB disk (approximately 77GB free), and provides 1 executor per worker. We recommend this worker type for workloads such as data transforms, joins, and queries, to offers a scalable and cost effective way to run most jobs.

    ///
  • ///
  • ///

    For the G.4X worker type, each worker maps to 4 DPU (16 vCPUs, 64 GB of memory) with 256GB disk (approximately 235GB free), and provides 1 executor per worker. We recommend this worker type for jobs whose workloads contain your most demanding transforms, aggregations, joins, and queries. This worker type is available only for Glue version 3.0 or later Spark ETL jobs in the following Amazon Web Services Regions: US East (Ohio), US East (N. Virginia), US West (Oregon), Asia Pacific (Singapore), Asia Pacific (Sydney), Asia Pacific (Tokyo), Canada (Central), Europe (Frankfurt), Europe (Ireland), and Europe (Stockholm).

    ///
  • ///
  • ///

    For the G.8X worker type, each worker maps to 8 DPU (32 vCPUs, 128 GB of memory) with 512GB disk (approximately 487GB free), and provides 1 executor per worker. We recommend this worker type for jobs whose workloads contain your most demanding transforms, aggregations, joins, and queries. This worker type is available only for Glue version 3.0 or later Spark ETL jobs, in the same Amazon Web Services Regions as supported for the G.4X worker type.

    ///
  • ///
  • ///

    For the G.025X worker type, each worker maps to 0.25 DPU (2 vCPUs, 4 GB of memory) with 84GB disk (approximately 34GB free), and provides 1 executor per worker. We recommend this worker type for low volume streaming jobs. This worker type is only available for Glue version 3.0 streaming jobs.

    ///
  • ///
  • ///

    For the Z.2X worker type, each worker maps to 2 M-DPU (8vCPUs, 64 GB of memory) with 128 GB disk (approximately 120GB free), and provides up to 8 Ray workers based on the autoscaler.

    ///
  • ///
WorkerType: WorkerType ///

The number of workers of a defined workerType that are allocated when a job runs.

NumberOfWorkers: NullableInteger ///

The name of the SecurityConfiguration structure to be used with this job /// run.

SecurityConfiguration: NameString ///

The name of the log group for secure logging that can be server-side encrypted in Amazon /// CloudWatch using KMS. This name can be /aws-glue/jobs/, in which case the /// default encryption is NONE. If you add a role name and /// SecurityConfiguration name (in other words, /// /aws-glue/jobs-yourRoleName-yourSecurityConfigurationName/), then that security /// configuration is used to encrypt the log group.

LogGroupName: GenericString ///

Specifies configuration properties of a job run notification.

NotificationProperty: NotificationProperty ///

In Spark jobs, GlueVersion determines the versions of Apache Spark and Python /// that Glue available in a job. The Python version indicates the version /// supported for jobs of type Spark.

///

Ray jobs should set GlueVersion to 4.0 or greater. However, /// the versions of Ray, Python and additional libraries available in your Ray job are determined /// by the Runtime parameter of the Job command.

///

For more information about the available Glue versions and corresponding /// Spark and Python versions, see Glue version in the developer /// guide.

///

Jobs that are created without specifying a Glue version default to Glue 0.9.

GlueVersion: GlueVersionString ///

This field populates only for Auto Scaling job runs, and represents the total time each executor ran during the lifecycle of a job run in seconds, multiplied by a DPU factor (1 for G.1X, 2 for G.2X, or 0.25 for G.025X workers). This value may be different than the executionEngineRuntime * MaxCapacity as in the case of Auto Scaling jobs, as the number of executors running at a given time may be less than the MaxCapacity. Therefore, it is possible that the value of DPUSeconds is less than executionEngineRuntime * MaxCapacity.

DPUSeconds: NullableDouble ///

Indicates whether the job is run with a standard or flexible execution class. The standard execution-class is ideal for time-sensitive workloads that require fast job startup and dedicated resources.

///

The flexible execution class is appropriate for time-insensitive jobs whose start and completion times may vary.

///

Only jobs with Glue version 3.0 and above and command type glueetl will be allowed to set ExecutionClass to FLEX. The flexible execution class is available for Spark jobs.

ExecutionClass: ExecutionClass } ///

Specifies information used to update an existing job definition. The previous job /// definition is completely overwritten by this information.

structure JobUpdate { ///

Description of the job being defined.

Description: DescriptionString ///

This field is reserved for future use.

LogUri: UriString ///

The name or Amazon Resource Name (ARN) of the IAM role associated with this job /// (required).

Role: RoleString ///

An ExecutionProperty specifying the maximum number of concurrent runs allowed /// for this job.

ExecutionProperty: ExecutionProperty ///

The JobCommand that runs this job (required).

Command: JobCommand ///

The default arguments for every run of this job, specified as name-value pairs.

///

You can specify arguments here that your own job-execution script /// consumes, as well as arguments that Glue itself consumes.

///

Job arguments may be logged. Do not pass plaintext secrets as arguments. Retrieve secrets /// from a Glue Connection, Secrets Manager or other secret management /// mechanism if you intend to keep them within the Job.

///

For information about how to specify and consume your own Job arguments, see the Calling Glue APIs in Python topic in the developer guide.

///

For information about the arguments you can provide to this field when configuring Spark jobs, /// see the Special Parameters Used by Glue topic in the developer guide.

///

For information about the arguments you can provide to this field when configuring Ray /// jobs, see Using /// job parameters in Ray jobs in the developer guide.

DefaultArguments: GenericMap ///

Arguments for this job that are not overridden when providing job arguments /// in a job run, specified as name-value pairs.

NonOverridableArguments: GenericMap ///

The connections used for this job.

Connections: ConnectionsList ///

The maximum number of times to retry this job if it fails.

MaxRetries: MaxRetries = 0 ///

This field is deprecated. Use MaxCapacity instead.

///

The number of Glue data processing units (DPUs) to allocate to this job. You can /// allocate a minimum of 2 DPUs; the default is 10. A DPU is a relative measure of processing /// power that consists of 4 vCPUs of compute capacity and 16 GB of memory. For more information, /// see the Glue pricing /// page.

@deprecated( message: "This property is deprecated, use MaxCapacity instead." ) AllocatedCapacity: IntegerValue = 0 ///

The job timeout in minutes. This is the maximum time that a job run /// can consume resources before it is terminated and enters TIMEOUT /// status. The default is 2,880 minutes (48 hours).

Timeout: Timeout ///

For Glue version 1.0 or earlier jobs, using the standard worker type, the number of /// Glue data processing units (DPUs) that can be allocated when this job runs. A DPU is /// a relative measure of processing power that consists of 4 vCPUs of compute capacity and 16 GB /// of memory. For more information, see the /// Glue pricing page.

///

For Glue version 2.0+ jobs, you cannot specify a Maximum capacity. /// Instead, you should specify a Worker type and the Number of workers.

///

Do not set MaxCapacity if using WorkerType and NumberOfWorkers.

///

The value that can be allocated for MaxCapacity depends on whether you are /// running a Python shell job, an Apache Spark ETL job, or an Apache Spark streaming ETL /// job:

///
    ///
  • ///

    When you specify a Python shell job (JobCommand.Name="pythonshell"), you can /// allocate either 0.0625 or 1 DPU. The default is 0.0625 DPU.

    ///
  • ///
  • ///

    When you specify an Apache Spark ETL job (JobCommand.Name="glueetl") or Apache /// Spark streaming ETL job (JobCommand.Name="gluestreaming"), you can allocate from 2 to 100 DPUs. /// The default is 10 DPUs. This job type cannot have a fractional DPU allocation.

    ///
  • ///
MaxCapacity: NullableDouble ///

The type of predefined worker that is allocated when a job runs. Accepts a value of /// G.1X, G.2X, G.4X, G.8X or G.025X for Spark jobs. Accepts the value Z.2X for Ray jobs.

///
    ///
  • ///

    For the G.1X worker type, each worker maps to 1 DPU (4 vCPUs, 16 GB of memory) with 84GB disk (approximately 34GB free), and provides 1 executor per worker. We recommend this worker type for workloads such as data transforms, joins, and queries, to offers a scalable and cost effective way to run most jobs.

    ///
  • ///
  • ///

    For the G.2X worker type, each worker maps to 2 DPU (8 vCPUs, 32 GB of memory) with 128GB disk (approximately 77GB free), and provides 1 executor per worker. We recommend this worker type for workloads such as data transforms, joins, and queries, to offers a scalable and cost effective way to run most jobs.

    ///
  • ///
  • ///

    For the G.4X worker type, each worker maps to 4 DPU (16 vCPUs, 64 GB of memory) with 256GB disk (approximately 235GB free), and provides 1 executor per worker. We recommend this worker type for jobs whose workloads contain your most demanding transforms, aggregations, joins, and queries. This worker type is available only for Glue version 3.0 or later Spark ETL jobs in the following Amazon Web Services Regions: US East (Ohio), US East (N. Virginia), US West (Oregon), Asia Pacific (Singapore), Asia Pacific (Sydney), Asia Pacific (Tokyo), Canada (Central), Europe (Frankfurt), Europe (Ireland), and Europe (Stockholm).

    ///
  • ///
  • ///

    For the G.8X worker type, each worker maps to 8 DPU (32 vCPUs, 128 GB of memory) with 512GB disk (approximately 487GB free), and provides 1 executor per worker. We recommend this worker type for jobs whose workloads contain your most demanding transforms, aggregations, joins, and queries. This worker type is available only for Glue version 3.0 or later Spark ETL jobs, in the same Amazon Web Services Regions as supported for the G.4X worker type.

    ///
  • ///
  • ///

    For the G.025X worker type, each worker maps to 0.25 DPU (2 vCPUs, 4 GB of memory) with 84GB disk (approximately 34GB free), and provides 1 executor per worker. We recommend this worker type for low volume streaming jobs. This worker type is only available for Glue version 3.0 streaming jobs.

    ///
  • ///
  • ///

    For the Z.2X worker type, each worker maps to 2 M-DPU (8vCPUs, 64 GB of memory) with 128 GB disk (approximately 120GB free), and provides up to 8 Ray workers based on the autoscaler.

    ///
  • ///
WorkerType: WorkerType ///

The number of workers of a defined workerType that are allocated when a job runs.

NumberOfWorkers: NullableInteger ///

The name of the SecurityConfiguration structure to be used with this /// job.

SecurityConfiguration: NameString ///

Specifies the configuration properties of a job notification.

NotificationProperty: NotificationProperty ///

In Spark jobs, GlueVersion determines the versions of Apache Spark and Python /// that Glue available in a job. The Python version indicates the version /// supported for jobs of type Spark.

///

Ray jobs should set GlueVersion to 4.0 or greater. However, /// the versions of Ray, Python and additional libraries available in your Ray job are determined /// by the Runtime parameter of the Job command.

///

For more information about the available Glue versions and corresponding /// Spark and Python versions, see Glue version in the developer /// guide.

///

Jobs that are created without specifying a Glue version default to Glue 0.9.

GlueVersion: GlueVersionString ///

The representation of a directed acyclic graph on which both the Glue Studio visual component and Glue Studio code generation is based.

CodeGenConfigurationNodes: CodeGenConfigurationNodes ///

Indicates whether the job is run with a standard or flexible execution class. The standard execution-class is ideal for time-sensitive workloads that require fast job startup and dedicated resources.

///

The flexible execution class is appropriate for time-insensitive jobs whose start and completion times may vary.

///

Only jobs with Glue version 3.0 and above and command type glueetl will be allowed to set ExecutionClass to FLEX. The flexible execution class is available for Spark jobs.

ExecutionClass: ExecutionClass ///

The details for a source control configuration for a job, allowing synchronization of job artifacts to or from a remote repository.

SourceControlDetails: SourceControlDetails } ///

Specifies a transform that joins two datasets into one dataset using a comparison phrase on the specified data property keys. You can use inner, outer, left, right, left semi, and left anti joins.

structure Join { ///

The name of the transform node.

@required Name: NodeName ///

The data inputs identified by their node names.

@required Inputs: TwoInputs ///

Specifies the type of join to be performed on the datasets.

@required JoinType: JoinType ///

A list of the two columns to be joined.

@required Columns: JoinColumns } ///

Specifies a column to be joined.

structure JoinColumn { ///

The column to be joined.

@required From: EnclosedInStringProperty ///

The key of the column to be joined.

@required Keys: GlueStudioPathList } ///

A classifier for JSON content.

structure JsonClassifier { ///

The name of the classifier.

@required Name: NameString ///

The time that this classifier was registered.

CreationTime: Timestamp ///

The time that this classifier was last updated.

LastUpdated: Timestamp ///

The version of this classifier.

Version: VersionId = 0 ///

A JsonPath string defining the JSON data for the classifier to classify. /// Glue supports a subset of JsonPath, as described in Writing JsonPath Custom Classifiers.

@required JsonPath: JsonPath } ///

Additional options for streaming.

structure KafkaStreamingSourceOptions { ///

A list of bootstrap server URLs, for example, as b-1.vpc-test-2.o4q88o.c6.kafka.us-east-1.amazonaws.com:9094. This option must be specified in the API call or defined in the table metadata in the Data Catalog.

BootstrapServers: EnclosedInStringProperty ///

The protocol used to communicate with brokers. The possible values are "SSL" or "PLAINTEXT".

SecurityProtocol: EnclosedInStringProperty ///

The name of the connection.

ConnectionName: EnclosedInStringProperty ///

The topic name as specified in Apache Kafka. You must specify at least one of "topicName", "assign" or "subscribePattern".

TopicName: EnclosedInStringProperty ///

The specific TopicPartitions to consume. You must specify at least one of "topicName", "assign" or "subscribePattern".

Assign: EnclosedInStringProperty ///

A Java regex string that identifies the topic list to subscribe to. You must specify at least one of "topicName", "assign" or "subscribePattern".

SubscribePattern: EnclosedInStringProperty ///

An optional classification.

Classification: EnclosedInStringProperty ///

Specifies the delimiter character.

Delimiter: EnclosedInStringProperty ///

The starting position in the Kafka topic to read data from. The possible values are "earliest" or "latest". The default value is "latest".

StartingOffsets: EnclosedInStringProperty ///

The end point when a batch query is ended. Possible values are either "latest" or a JSON string that specifies an ending offset for each TopicPartition.

EndingOffsets: EnclosedInStringProperty ///

The timeout in milliseconds to poll data from Kafka in Spark job executors. The default value is 512.

PollTimeoutMs: BoxedNonNegativeLong ///

The number of times to retry before failing to fetch Kafka offsets. The default value is 3.

NumRetries: BoxedNonNegativeInt ///

The time in milliseconds to wait before retrying to fetch Kafka offsets. The default value is 10.

RetryIntervalMs: BoxedNonNegativeLong ///

The rate limit on the maximum number of offsets that are processed per trigger interval. The specified total number of offsets is proportionally split across topicPartitions of different volumes. The default value is null, which means that the consumer reads all offsets until the known latest offset.

MaxOffsetsPerTrigger: BoxedNonNegativeLong ///

The desired minimum number of partitions to read from Kafka. The default value is null, which means that the number of spark partitions is equal to the number of Kafka partitions.

MinPartitions: BoxedNonNegativeInt ///

Whether to include the Kafka headers. When the option is set to "true", the data output will contain an additional column named "glue_streaming_kafka_headers" /// with type Array[Struct(key: String, value: String)]. The default value is "false". /// This option is available in Glue version 3.0 or later only.

IncludeHeaders: BoxedBoolean ///

When this option is set to 'true', the data output will contain an additional column named "__src_timestamp" that indicates the time when the corresponding record received by the topic. The default value is 'false'. This option is supported in Glue version 4.0 or later.

AddRecordTimestamp: EnclosedInStringProperty ///

When this option is set to 'true', for each batch, it will emit the metrics for the duration between the oldest record received by the topic and the time it arrives in Glue to CloudWatch. The metric's name is "glue.driver.streaming.maxConsumerLagInMs". The default value is 'false'. This option is supported in Glue version 4.0 or later.

EmitConsumerLagMetrics: EnclosedInStringProperty ///

The timestamp of the record in the Kafka topic to start reading data from. The possible values are a timestamp string in UTC format of the pattern yyyy-mm-ddTHH:MM:SSZ (where Z represents a UTC timezone offset with a +/-. For example: "2023-04-04T08:00:00+08:00").

///

Only one of StartingTimestamp or StartingOffsets must be set.

StartingTimestamp: Iso8601DateTime } ///

A partition key pair consisting of a name and a type.

structure KeySchemaElement { ///

The name of a partition key.

@required Name: NameString ///

The type of a partition key.

@required Type: ColumnTypeString } ///

Additional options for the Amazon Kinesis streaming data source.

structure KinesisStreamingSourceOptions { ///

The URL of the Kinesis endpoint.

EndpointUrl: EnclosedInStringProperty ///

The name of the Kinesis data stream.

StreamName: EnclosedInStringProperty ///

An optional classification.

Classification: EnclosedInStringProperty ///

Specifies the delimiter character.

Delimiter: EnclosedInStringProperty ///

The starting position in the Kinesis data stream to read data from. The possible values are "latest", "trim_horizon", "earliest", or a timestamp string in UTC format in the pattern yyyy-mm-ddTHH:MM:SSZ (where Z represents a UTC timezone offset with a +/-. For example: "2023-04-04T08:00:00-04:00"). The default value is "latest".

///

Note: Using a value that is a timestamp string in UTC format for "startingPosition" is supported only for Glue version 4.0 or later.

StartingPosition: StartingPosition ///

The maximum time spent in the job executor to fetch a record from the Kinesis data stream per shard, specified in milliseconds (ms). The default value is 1000.

MaxFetchTimeInMs: BoxedNonNegativeLong ///

The maximum number of records to fetch per shard in the Kinesis data stream. The default value is 100000.

MaxFetchRecordsPerShard: BoxedNonNegativeLong ///

The maximum number of records to fetch from the Kinesis data stream in each getRecords operation. The default value is 10000.

MaxRecordPerRead: BoxedNonNegativeLong ///

Adds a time delay between two consecutive getRecords operations. The default value is "False". This option is only configurable for Glue version 2.0 and above.

AddIdleTimeBetweenReads: BoxedBoolean ///

The minimum time delay between two consecutive getRecords operations, specified in ms. The default value is 1000. This option is only configurable for Glue version 2.0 and above.

IdleTimeBetweenReadsInMs: BoxedNonNegativeLong ///

The minimum time interval between two ListShards API calls for your script to consider resharding. The default value is 1s.

DescribeShardInterval: BoxedNonNegativeLong ///

The maximum number of retries for Kinesis Data Streams API requests. The default value is 3.

NumRetries: BoxedNonNegativeInt ///

The cool-off time period (specified in ms) before retrying the Kinesis Data Streams API call. The default value is 1000.

RetryIntervalMs: BoxedNonNegativeLong ///

The maximum cool-off time period (specified in ms) between two retries of a Kinesis Data Streams API call. The default value is 10000.

MaxRetryIntervalMs: BoxedNonNegativeLong ///

Avoids creating an empty microbatch job by checking for unread data in the Kinesis data stream before the batch is started. The default value is "False".

AvoidEmptyBatches: BoxedBoolean ///

The Amazon Resource Name (ARN) of the Kinesis data stream.

StreamArn: EnclosedInStringProperty ///

The Amazon Resource Name (ARN) of the role to assume using AWS Security Token Service (AWS STS). This role must have permissions for describe or read record operations for the Kinesis data stream. You must use this parameter when accessing a data stream in a different account. Used in conjunction with "awsSTSSessionName".

RoleArn: EnclosedInStringProperty ///

An identifier for the session assuming the role using AWS STS. You must use this parameter when accessing a data stream in a different account. Used in conjunction with "awsSTSRoleARN".

RoleSessionName: EnclosedInStringProperty ///

When this option is set to 'true', the data output will contain an additional column named "__src_timestamp" that indicates the time when the corresponding record received by the stream. The default value is 'false'. This option is supported in Glue version 4.0 or later.

AddRecordTimestamp: EnclosedInStringProperty ///

When this option is set to 'true', for each batch, it will emit the metrics for the duration between the oldest record received by the stream and the time it arrives in Glue to CloudWatch. The metric's name is "glue.driver.streaming.maxConsumerLagInMs". The default value is 'false'. This option is supported in Glue version 4.0 or later.

EmitConsumerLagMetrics: EnclosedInStringProperty ///

The timestamp of the record in the Kinesis data stream to start reading data from. The possible values are a timestamp string in UTC format of the pattern yyyy-mm-ddTHH:MM:SSZ (where Z represents a UTC timezone offset with a +/-. For example: "2023-04-04T08:00:00+08:00").

StartingTimestamp: Iso8601DateTime } ///

Specifies configuration properties for a labeling set generation task run.

structure LabelingSetGenerationTaskRunProperties { ///

The Amazon Simple Storage Service (Amazon S3) path where you will generate the labeling /// set.

OutputS3Path: UriString } ///

Specifies Lake Formation configuration settings for the crawler.

structure LakeFormationConfiguration { ///

Specifies whether to use Lake Formation credentials for the crawler instead of the IAM role credentials.

UseLakeFormationCredentials: NullableBoolean ///

Required for cross account crawls. For same account crawls as the target data, this can be left as null.

AccountId: AccountId } ///

When there are multiple versions of a blueprint and the latest version has some errors, this attribute indicates the last successful blueprint definition that is available with the service.

structure LastActiveDefinition { ///

The description of the blueprint.

Description: Generic512CharString ///

The date and time the blueprint was last modified.

LastModifiedOn: TimestampValue ///

A JSON string specifying the parameters for the blueprint.

ParameterSpec: BlueprintParameterSpec ///

Specifies a path in Amazon S3 where the blueprint is published by the Glue developer.

BlueprintLocation: GenericString ///

Specifies a path in Amazon S3 where the blueprint is copied when you create or update the blueprint.

BlueprintServiceLocation: GenericString } ///

Status and error information about the most recent crawl.

structure LastCrawlInfo { ///

Status of the last crawl.

Status: LastCrawlStatus ///

If an error occurred, the error information about the last crawl.

ErrorMessage: DescriptionString ///

The log group for the last crawl.

LogGroup: LogGroup ///

The log stream for the last crawl.

LogStream: LogStream ///

The prefix for a message about this crawl.

MessagePrefix: MessagePrefix ///

The time at which the crawl started.

StartTime: Timestamp } ///

Specifies data lineage configuration settings for the crawler.

structure LineageConfiguration { ///

Specifies whether data lineage is enabled for the crawler. Valid values are:

///
    ///
  • ///

    ENABLE: enables data lineage for the crawler

    ///
  • ///
  • ///

    DISABLE: disables data lineage for the crawler

    ///
  • ///
CrawlerLineageSettings: CrawlerLineageSettings } @input structure ListBlueprintsRequest { ///

A continuation token, if this is a continuation request.

NextToken: GenericString ///

The maximum size of a list to return.

MaxResults: PageSize ///

Filters the list by an Amazon Web Services resource tag.

Tags: TagsMap } @output structure ListBlueprintsResponse { ///

List of names of blueprints in the account.

Blueprints: BlueprintNames ///

A continuation token, if not all blueprint names have been returned.

NextToken: GenericString } @input structure ListCrawlersRequest { ///

The maximum size of a list to return.

MaxResults: PageSize ///

A continuation token, if this is a continuation request.

NextToken: Token ///

Specifies to return only these tagged resources.

Tags: TagsMap } @output structure ListCrawlersResponse { ///

The names of all crawlers in the account, or the crawlers with the specified tags.

CrawlerNames: CrawlerNameList ///

A continuation token, if the returned list does not contain the /// last metric available.

NextToken: Token } @input structure ListCrawlsRequest { ///

The name of the crawler whose runs you want to retrieve.

@required CrawlerName: NameString ///

The maximum number of results to return. The default is 20, and maximum is 100.

MaxResults: PageSize ///

Filters the crawls by the criteria you specify in a list of CrawlsFilter objects.

Filters: CrawlsFilterList ///

A continuation token, if this is a continuation call.

NextToken: Token } @output structure ListCrawlsResponse { ///

A list of CrawlerHistory objects representing the crawl runs that meet your criteria.

Crawls: CrawlerHistoryList ///

A continuation token for paginating the returned list of tokens, returned if the current segment of the list is not the last.

NextToken: Token } @input structure ListCustomEntityTypesRequest { ///

A paginated token to offset the results.

NextToken: PaginationToken ///

The maximum number of results to return.

MaxResults: PageSize ///

A list of key-value pair tags.

Tags: TagsMap } @output structure ListCustomEntityTypesResponse { ///

A list of CustomEntityType objects representing custom patterns.

CustomEntityTypes: CustomEntityTypes ///

A pagination token, if more results are available.

NextToken: PaginationToken } @input structure ListDataQualityResultsRequest { ///

The filter criteria.

Filter: DataQualityResultFilterCriteria ///

A paginated token to offset the results.

NextToken: PaginationToken ///

The maximum number of results to return.

MaxResults: PageSize } @output structure ListDataQualityResultsResponse { ///

A list of DataQualityResultDescription objects.

@required Results: DataQualityResultDescriptionList ///

A pagination token, if more results are available.

NextToken: PaginationToken } @input structure ListDataQualityRuleRecommendationRunsRequest { ///

The filter criteria.

Filter: DataQualityRuleRecommendationRunFilter ///

A paginated token to offset the results.

NextToken: PaginationToken ///

The maximum number of results to return.

MaxResults: PageSize } @output structure ListDataQualityRuleRecommendationRunsResponse { ///

A list of DataQualityRuleRecommendationRunDescription objects.

Runs: DataQualityRuleRecommendationRunList ///

A pagination token, if more results are available.

NextToken: PaginationToken } @input structure ListDataQualityRulesetEvaluationRunsRequest { ///

The filter criteria.

Filter: DataQualityRulesetEvaluationRunFilter ///

A paginated token to offset the results.

NextToken: PaginationToken ///

The maximum number of results to return.

MaxResults: PageSize } @output structure ListDataQualityRulesetEvaluationRunsResponse { ///

A list of DataQualityRulesetEvaluationRunDescription objects representing data quality ruleset runs.

Runs: DataQualityRulesetEvaluationRunList ///

A pagination token, if more results are available.

NextToken: PaginationToken } @input structure ListDataQualityRulesetsRequest { ///

A paginated token to offset the results.

NextToken: PaginationToken ///

The maximum number of results to return.

MaxResults: PageSize ///

The filter criteria.

Filter: DataQualityRulesetFilterCriteria ///

A list of key-value pair tags.

Tags: TagsMap } @output structure ListDataQualityRulesetsResponse { ///

A paginated list of rulesets for the specified list of Glue tables.

Rulesets: DataQualityRulesetList ///

A pagination token, if more results are available.

NextToken: PaginationToken } @input structure ListDevEndpointsRequest { ///

A continuation token, if this is a continuation request.

NextToken: GenericString ///

The maximum size of a list to return.

MaxResults: PageSize ///

Specifies to return only these tagged resources.

Tags: TagsMap } @output structure ListDevEndpointsResponse { ///

The names of all the DevEndpoints in the account, or the /// DevEndpoints with the specified tags.

DevEndpointNames: DevEndpointNameList ///

A continuation token, if the returned list does not contain the /// last metric available.

NextToken: GenericString } @input structure ListJobsRequest { ///

A continuation token, if this is a continuation request.

NextToken: GenericString ///

The maximum size of a list to return.

MaxResults: PageSize ///

Specifies to return only these tagged resources.

Tags: TagsMap } @output structure ListJobsResponse { ///

The names of all jobs in the account, or the jobs with the specified tags.

JobNames: JobNameList ///

A continuation token, if the returned list does not contain the /// last metric available.

NextToken: GenericString } @input structure ListMLTransformsRequest { ///

A continuation token, if this is a continuation request.

NextToken: PaginationToken ///

The maximum size of a list to return.

MaxResults: PageSize ///

A TransformFilterCriteria used to filter the machine learning transforms.

Filter: TransformFilterCriteria ///

A TransformSortCriteria used to sort the machine learning transforms.

Sort: TransformSortCriteria ///

Specifies to return only these tagged resources.

Tags: TagsMap } @output structure ListMLTransformsResponse { ///

The identifiers of all the machine learning transforms in the account, or the /// machine learning transforms with the specified tags.

@required TransformIds: TransformIdList ///

A continuation token, if the returned list does not contain the /// last metric available.

NextToken: PaginationToken } @output structure ListRegistriesResponse { ///

An array of RegistryDetailedListItem objects containing minimal details of each registry.

Registries: RegistryListDefinition ///

A continuation token for paginating the returned list of tokens, returned if the current segment of the list is not the last.

NextToken: SchemaRegistryTokenString } @output structure ListSchemasResponse { ///

An array of SchemaListItem objects containing details of each schema.

Schemas: SchemaListDefinition ///

A continuation token for paginating the returned list of tokens, returned if the current segment of the list is not the last.

NextToken: SchemaRegistryTokenString } @output structure ListSchemaVersionsResponse { ///

An array of SchemaVersionList objects containing details of each schema version.

Schemas: SchemaVersionList ///

A continuation token for paginating the returned list of tokens, returned if the current segment of the list is not the last.

NextToken: SchemaRegistryTokenString } @input structure ListSessionsRequest { ///

The token for the next set of results, or null if there are no more result.

NextToken: OrchestrationToken ///

The maximum number of results.

MaxResults: PageSize ///

Tags belonging to the session.

Tags: TagsMap ///

The origin of the request.

RequestOrigin: OrchestrationNameString } @output structure ListSessionsResponse { ///

Returns the ID of the session.

Ids: SessionIdList ///

Returns the session object.

Sessions: SessionList ///

The token for the next set of results, or null if there are no more result.

NextToken: OrchestrationToken } @input structure ListStatementsRequest { ///

The Session ID of the statements.

@required SessionId: NameString ///

The origin of the request to list statements.

RequestOrigin: OrchestrationNameString ///

A continuation token, if this is a continuation call.

NextToken: OrchestrationToken } @output structure ListStatementsResponse { ///

Returns the list of statements.

Statements: StatementList ///

A continuation token, if not all statements have yet been returned.

NextToken: OrchestrationToken } @input structure ListTriggersRequest { ///

A continuation token, if this is a continuation request.

NextToken: GenericString ///

The name of the job for which to retrieve triggers. The trigger that can start this job /// is returned. If there is no such trigger, all triggers are returned.

DependentJobName: NameString ///

The maximum size of a list to return.

MaxResults: PageSize ///

Specifies to return only these tagged resources.

Tags: TagsMap } @output structure ListTriggersResponse { ///

The names of all triggers in the account, or the triggers with the specified tags.

TriggerNames: TriggerNameList ///

A continuation token, if the returned list does not contain the /// last metric available.

NextToken: GenericString } @input structure ListWorkflowsRequest { ///

A continuation token, if this is a continuation request.

NextToken: GenericString ///

The maximum size of a list to return.

MaxResults: PageSize } @output structure ListWorkflowsResponse { ///

List of names of workflows in the account.

Workflows: WorkflowNames ///

A continuation token, if not all workflow names have been returned.

NextToken: GenericString } ///

The location of resources.

structure Location { ///

A JDBC location.

Jdbc: CodeGenNodeArgs ///

An Amazon Simple Storage Service (Amazon S3) location.

S3: CodeGenNodeArgs ///

An Amazon DynamoDB table location.

DynamoDB: CodeGenNodeArgs } ///

Defines column statistics supported for integer data columns.

structure LongColumnStatisticsData { ///

The lowest value in the column.

MinimumValue: Long = 0 ///

The highest value in the column.

MaximumValue: Long = 0 ///

The number of null values in the column.

@required NumberOfNulls: NonNegativeLong = 0 ///

The number of distinct values in a column.

@required NumberOfDistinctValues: NonNegativeLong = 0 } ///

Specifies the mapping of data property keys.

structure Mapping { ///

After the apply mapping, what the name of the column should be. Can be the same as FromPath.

ToKey: EnclosedInStringProperty ///

The table or column to be modified.

FromPath: EnclosedInStringProperties ///

The type of the data to be modified.

FromType: EnclosedInStringProperty ///

The data type that the data is to be modified to.

ToType: EnclosedInStringProperty ///

If true, then the column is removed.

Dropped: BoxedBoolean ///

Only applicable to nested data structures. If you want to change the parent structure, but also one of its children, you can fill out this data strucutre. It is also Mapping, but its FromPath will be the parent's FromPath plus the FromPath from this structure.

///

For the children part, suppose you have the structure:

///

/// { /// "FromPath": "OuterStructure", /// "ToKey": "OuterStructure", /// "ToType": "Struct", /// "Dropped": false, /// "Chidlren": [{ /// "FromPath": "inner", /// "ToKey": "inner", /// "ToType": "Double", /// "Dropped": false, /// }] /// } ///

///

You can specify a Mapping that looks like:

///

/// { /// "FromPath": "OuterStructure", /// "ToKey": "OuterStructure", /// "ToType": "Struct", /// "Dropped": false, /// "Chidlren": [{ /// "FromPath": "inner", /// "ToKey": "inner", /// "ToType": "Double", /// "Dropped": false, /// }] /// } ///

Children: Mappings } ///

Defines a mapping.

structure MappingEntry { ///

The name of the source table.

SourceTable: TableName ///

The source path.

SourcePath: SchemaPathString ///

The source type.

SourceType: FieldType ///

The target table.

TargetTable: TableName ///

The target path.

TargetPath: SchemaPathString ///

The target type.

TargetType: FieldType } ///

Specifies a transform that merges a DynamicFrame with a staging DynamicFrame based on the specified primary keys to identify records. Duplicate records (records with the same primary keys) are not de-duplicated.

structure Merge { ///

The name of the transform node.

@required Name: NodeName ///

The data inputs identified by their node names.

@required Inputs: TwoInputs ///

The source DynamicFrame that will be merged with a staging DynamicFrame.

@required Source: NodeId ///

The list of primary key fields to match records from the source and staging dynamic frames.

@required PrimaryKeys: GlueStudioPathList } ///

A structure containing metadata information for a schema version.

structure MetadataInfo { ///

The metadata key’s corresponding value.

MetadataValue: MetadataValueString ///

The time at which the entry was created.

CreatedTime: CreatedTimestamp ///

Other metadata belonging to the same metadata key.

OtherMetadataValueList: OtherMetadataValueList } ///

A structure containing a key value pair for metadata.

structure MetadataKeyValuePair { ///

A metadata key.

MetadataKey: MetadataKeyString ///

A metadata key’s corresponding value.

MetadataValue: MetadataValueString } ///

Specifies a Microsoft SQL server data source in the Glue Data Catalog.

structure MicrosoftSQLServerCatalogSource { ///

The name of the data source.

@required Name: NodeName ///

The name of the database to read from.

@required Database: EnclosedInStringProperty ///

The name of the table in the database to read from.

@required Table: EnclosedInStringProperty } ///

Specifies a target that uses Microsoft SQL.

structure MicrosoftSQLServerCatalogTarget { ///

The name of the data target.

@required Name: NodeName ///

The nodes that are inputs to the data target.

@required Inputs: OneInput ///

The name of the database to write to.

@required Database: EnclosedInStringProperty ///

The name of the table in the database to write to.

@required Table: EnclosedInStringProperty } ///

A structure for a machine learning transform.

structure MLTransform { ///

The unique transform ID that is generated for the machine learning transform. The ID is /// guaranteed to be unique and does not change.

TransformId: HashString ///

A user-defined name for the machine learning transform. Names are not guaranteed unique /// and can be changed at any time.

Name: NameString ///

A user-defined, long-form description text for the machine learning transform. /// Descriptions are not guaranteed to be unique and can be changed at any time.

Description: DescriptionString ///

The current status of the machine learning transform.

Status: TransformStatusType ///

A timestamp. The time and date that this machine learning transform was created.

CreatedOn: Timestamp ///

A timestamp. The last point in time when this machine learning transform was modified.

LastModifiedOn: Timestamp ///

A list of Glue table definitions used by the transform.

InputRecordTables: GlueTables ///

A TransformParameters object. You can use parameters to tune (customize) the /// behavior of the machine learning transform by specifying what data it learns from and your /// preference on various tradeoffs (such as precious vs. recall, or accuracy vs. cost).

Parameters: TransformParameters ///

An EvaluationMetrics object. Evaluation metrics provide an estimate of the quality of your machine learning transform.

EvaluationMetrics: EvaluationMetrics ///

A count identifier for the labeling files generated by Glue for this transform. As you create a better transform, you can iteratively download, label, and upload the labeling file.

LabelCount: LabelCount = 0 ///

A map of key-value pairs representing the columns and data types that this transform can /// run against. Has an upper bound of 100 columns.

Schema: TransformSchema ///

The name or Amazon Resource Name (ARN) of the IAM role with the required permissions. The required permissions include both Glue service role permissions to Glue resources, and Amazon S3 permissions required by the transform.

///
    ///
  • ///

    This role needs Glue service role permissions to allow access to resources in Glue. See Attach a Policy to IAM Users That Access Glue.

    ///
  • ///
  • ///

    This role needs permission to your Amazon Simple Storage Service (Amazon S3) sources, targets, temporary directory, scripts, and any libraries used by the task run for this transform.

    ///
  • ///
Role: RoleString ///

This value determines which version of Glue this machine learning transform is compatible with. Glue 1.0 is recommended for most customers. If the value is not set, the Glue compatibility defaults to Glue 0.9. For more information, see Glue Versions in the developer guide.

GlueVersion: GlueVersionString ///

The number of Glue data processing units (DPUs) that are allocated to task runs for this transform. You can allocate from 2 to 100 DPUs; the default is 10. A DPU is a relative measure of /// processing power that consists of 4 vCPUs of compute capacity and 16 GB of memory. For more /// information, see the Glue pricing /// page.

///

/// MaxCapacity is a mutually exclusive option with NumberOfWorkers and WorkerType.

///
    ///
  • ///

    If either NumberOfWorkers or WorkerType is set, then MaxCapacity cannot be set.

    ///
  • ///
  • ///

    If MaxCapacity is set then neither NumberOfWorkers or WorkerType can be set.

    ///
  • ///
  • ///

    If WorkerType is set, then NumberOfWorkers is required (and vice versa).

    ///
  • ///
  • ///

    /// MaxCapacity and NumberOfWorkers must both be at least 1.

    ///
  • ///
///

When the WorkerType field is set to a value other than Standard, the MaxCapacity field is set automatically and becomes read-only.

MaxCapacity: NullableDouble ///

The type of predefined worker that is allocated when a task of this transform runs. Accepts a value of Standard, G.1X, or G.2X.

///
    ///
  • ///

    For the Standard worker type, each worker provides 4 vCPU, 16 GB of memory and a 50GB disk, and 2 executors per worker.

    ///
  • ///
  • ///

    For the G.1X worker type, each worker provides 4 vCPU, 16 GB of memory and a 64GB disk, and 1 executor per worker.

    ///
  • ///
  • ///

    For the G.2X worker type, each worker provides 8 vCPU, 32 GB of memory and a 128GB disk, and 1 executor per worker.

    ///
  • ///
///

/// MaxCapacity is a mutually exclusive option with NumberOfWorkers and WorkerType.

///
    ///
  • ///

    If either NumberOfWorkers or WorkerType is set, then MaxCapacity cannot be set.

    ///
  • ///
  • ///

    If MaxCapacity is set then neither NumberOfWorkers or WorkerType can be set.

    ///
  • ///
  • ///

    If WorkerType is set, then NumberOfWorkers is required (and vice versa).

    ///
  • ///
  • ///

    /// MaxCapacity and NumberOfWorkers must both be at least 1.

    ///
  • ///
WorkerType: WorkerType ///

The number of workers of a defined workerType that are allocated when a task of the transform runs.

///

If WorkerType is set, then NumberOfWorkers is required (and vice versa).

NumberOfWorkers: NullableInteger ///

The timeout in minutes of the machine learning transform.

Timeout: Timeout ///

The maximum number of times to retry after an MLTaskRun of the machine /// learning transform fails.

MaxRetries: NullableInteger ///

The encryption-at-rest settings of the transform that apply to accessing user data. Machine learning transforms can access user data encrypted in Amazon S3 using KMS.

TransformEncryption: TransformEncryption } ///

The machine learning transform is not ready to run.

@error("client") structure MLTransformNotReadyException { ///

A message describing the problem.

Message: MessageString } ///

The encryption-at-rest settings of the transform that apply to accessing user data.

structure MLUserDataEncryption { ///

The encryption mode applied to user data. Valid values are:

///
    ///
  • ///

    DISABLED: encryption is disabled

    ///
  • ///
  • ///

    SSEKMS: use of server-side encryption with Key Management Service (SSE-KMS) for user data stored in Amazon S3.

    ///
  • ///
@required MlUserDataEncryptionMode: MLUserDataEncryptionModeString ///

The ID for the customer-provided KMS key.

KmsKeyId: NameString } ///

Specifies an Amazon DocumentDB or MongoDB data store to crawl.

structure MongoDBTarget { ///

The name of the connection to use to connect to the Amazon DocumentDB or MongoDB target.

ConnectionName: ConnectionName ///

The path of the Amazon DocumentDB or MongoDB target (database/collection).

Path: Path ///

Indicates whether to scan all the records, or to sample rows from the table. Scanning all the records can take a long time when the table is not a high throughput table.

///

A value of true means to scan all records, while a value of false means to sample the records. If no value is specified, the value defaults to true.

ScanAll: NullableBoolean } ///

Specifies a MySQL data source in the Glue Data Catalog.

structure MySQLCatalogSource { ///

The name of the data source.

@required Name: NodeName ///

The name of the database to read from.

@required Database: EnclosedInStringProperty ///

The name of the table in the database to read from.

@required Table: EnclosedInStringProperty } ///

Specifies a target that uses MySQL.

structure MySQLCatalogTarget { ///

The name of the data target.

@required Name: NodeName ///

The nodes that are inputs to the data target.

@required Inputs: OneInput ///

The name of the database to write to.

@required Database: EnclosedInStringProperty ///

The name of the table in the database to write to.

@required Table: EnclosedInStringProperty } ///

A node represents an Glue component (trigger, crawler, or job) on a workflow graph.

structure Node { ///

The type of Glue component represented by the node.

Type: NodeType ///

The name of the Glue component represented by the node.

Name: NameString ///

The unique Id assigned to the node within the workflow.

UniqueId: NameString ///

Details of the Trigger when the node represents a Trigger.

TriggerDetails: TriggerNodeDetails ///

Details of the Job when the node represents a Job.

JobDetails: JobNodeDetails ///

Details of the crawler when the node represents a crawler.

CrawlerDetails: CrawlerNodeDetails } ///

There is no applicable schedule.

@error("client") structure NoScheduleException { ///

A message describing the problem.

Message: MessageString } ///

Specifies configuration properties of a notification.

structure NotificationProperty { ///

After a job run starts, the number of minutes to wait before /// sending a job run delay notification.

NotifyDelayAfter: NotifyDelayAfter } ///

Represents whether certain values are recognized as null values for removal.

structure NullCheckBoxList { ///

Specifies that an empty string is considered as a null value.

IsEmpty: BoxedBoolean ///

Specifies that a value spelling out the word 'null' is considered as a null value.

IsNullString: BoxedBoolean ///

Specifies that an integer value of -1 is considered as a null value.

IsNegOne: BoxedBoolean } ///

Represents a custom null value such as a zeros or other value being used as a null placeholder unique to the dataset.

structure NullValueField { ///

The value of the null placeholder.

@required Value: EnclosedInStringProperty ///

The datatype of the value.

@required Datatype: Datatype } ///

A structure representing an open format table.

structure OpenTableFormatInput { ///

Specifies an IcebergInput structure that defines an Apache Iceberg metadata table.

IcebergInput: IcebergInput } ///

The operation timed out.

@error("client") structure OperationTimeoutException { ///

A message describing the problem.

Message: MessageString } ///

Specifies an option value.

structure Option { ///

Specifies the value of the option.

Value: EnclosedInStringProperty ///

Specifies the label of the option.

Label: EnclosedInStringProperty ///

Specifies the description of the option.

Description: EnclosedInStringProperty } ///

Specifies an Oracle data source in the Glue Data Catalog.

structure OracleSQLCatalogSource { ///

The name of the data source.

@required Name: NodeName ///

The name of the database to read from.

@required Database: EnclosedInStringProperty ///

The name of the table in the database to read from.

@required Table: EnclosedInStringProperty } ///

Specifies a target that uses Oracle SQL.

structure OracleSQLCatalogTarget { ///

The name of the data target.

@required Name: NodeName ///

The nodes that are inputs to the data target.

@required Inputs: OneInput ///

The name of the database to write to.

@required Database: EnclosedInStringProperty ///

The name of the table in the database to write to.

@required Table: EnclosedInStringProperty } ///

Specifies the sort order of a sorted column.

structure Order { ///

The name of the column.

@required Column: NameString ///

Indicates that the column is sorted in ascending order /// (== 1), or in descending order (==0).

@required SortOrder: IntegerFlag = 0 } ///

A structure containing other metadata for a schema version belonging to the same metadata key.

structure OtherMetadataValueListItem { ///

The metadata key’s corresponding value for the other metadata belonging to the same metadata key.

MetadataValue: MetadataValueString ///

The time at which the entry was created.

CreatedTime: CreatedTimestamp } ///

Represents a slice of table data.

structure Partition { ///

The values of the partition.

Values: ValueStringList ///

The name of the catalog database in which to create the partition.

DatabaseName: NameString ///

The name of the database table in which to create the partition.

TableName: NameString ///

The time at which the partition was created.

CreationTime: Timestamp ///

The last time at which the partition was accessed.

LastAccessTime: Timestamp ///

Provides information about the physical /// location where the partition is stored.

StorageDescriptor: StorageDescriptor ///

These key-value pairs define partition parameters.

Parameters: ParametersMap ///

The last time at which column statistics were computed for this /// partition.

LastAnalyzedTime: Timestamp ///

The ID of the Data Catalog in which the partition resides.

CatalogId: CatalogIdString } ///

Contains information about a partition error.

structure PartitionError { ///

The values that define the partition.

PartitionValues: ValueStringList ///

The details about the partition error.

ErrorDetail: ErrorDetail } ///

A structure for a partition index.

structure PartitionIndex { ///

The keys for the partition index.

@required Keys: KeyList ///

The name of the partition index.

@required IndexName: NameString } ///

A descriptor for a partition index in a table.

structure PartitionIndexDescriptor { ///

The name of the partition index.

@required IndexName: NameString ///

A list of one or more keys, as KeySchemaElement structures, for the partition index.

@required Keys: KeySchemaElementList ///

The status of the partition index.

///

The possible statuses are:

///
    ///
  • ///

    CREATING: The index is being created. When an index is in a CREATING state, the index or its table cannot be deleted.

    ///
  • ///
  • ///

    ACTIVE: The index creation succeeds.

    ///
  • ///
  • ///

    FAILED: The index creation fails.

    ///
  • ///
  • ///

    DELETING: The index is deleted from the list of indexes.

    ///
  • ///
@required IndexStatus: PartitionIndexStatus ///

A list of errors that can occur when registering partition indexes for an existing table.

BackfillErrors: BackfillErrors } ///

The structure used to create and update a partition.

structure PartitionInput { ///

The values of the partition. Although this parameter is not required by the SDK, you must specify this parameter for a valid input.

///

The values for the keys for the new partition must be passed as an array of String objects that must be ordered in the same order as the partition keys appearing in the Amazon S3 prefix. Otherwise Glue will add the values to the wrong keys.

Values: ValueStringList ///

The last time at which the partition was accessed.

LastAccessTime: Timestamp ///

Provides information about the physical /// location where the partition is stored.

StorageDescriptor: StorageDescriptor ///

These key-value pairs define partition parameters.

Parameters: ParametersMap ///

The last time at which column statistics were computed for this partition.

LastAnalyzedTime: Timestamp } ///

Contains a list of values defining partitions.

structure PartitionValueList { ///

The list of values.

@required Values: ValueStringList } ///

The operation timed out.

@error("client") structure PermissionTypeMismatchException { ///

There is a mismatch between the SupportedPermissionType used in the query request /// and the permissions defined on the target table.

Message: MessageString } ///

Specifies the physical requirements for a connection.

structure PhysicalConnectionRequirements { ///

The subnet ID used by the connection.

SubnetId: NameString ///

The security group ID list used by the connection.

SecurityGroupIdList: SecurityGroupIdList ///

The connection's Availability Zone. This field is redundant because the specified subnet /// implies the Availability Zone to be used. Currently the field must be populated, but it will /// be deprecated in the future.

AvailabilityZone: NameString } ///

Specifies a transform that identifies, removes or masks PII data.

structure PIIDetection { ///

The name of the transform node.

@required Name: NodeName ///

The node ID inputs to the transform.

@required Inputs: OneInput ///

Indicates the type of PIIDetection transform.

@required PiiType: PiiType ///

Indicates the types of entities the PIIDetection transform will identify as PII data.

///

/// PII type entities include: PERSON_NAME, DATE, USA_SNN, EMAIL, USA_ITIN, USA_PASSPORT_NUMBER, PHONE_NUMBER, BANK_ACCOUNT, /// IP_ADDRESS, MAC_ADDRESS, USA_CPT_CODE, USA_HCPCS_CODE, USA_NATIONAL_DRUG_CODE, USA_MEDICARE_BENEFICIARY_IDENTIFIER, /// USA_HEALTH_INSURANCE_CLAIM_NUMBER,CREDIT_CARD,USA_NATIONAL_PROVIDER_IDENTIFIER,USA_DEA_NUMBER,USA_DRIVING_LICENSE ///

@required EntityTypesToDetect: EnclosedInStringProperties ///

Indicates the output column name that will contain any entity type detected in that row.

OutputColumnName: EnclosedInStringProperty ///

Indicates the fraction of the data to sample when scanning for PII entities.

SampleFraction: BoxedDoubleFraction ///

Indicates the fraction of the data that must be met in order for a column to be identified as PII data.

ThresholdFraction: BoxedDoubleFraction ///

Indicates the value that will replace the detected entity.

MaskValue: MaskValue } ///

Specifies a PostgresSQL data source in the Glue Data Catalog.

structure PostgreSQLCatalogSource { ///

The name of the data source.

@required Name: NodeName ///

The name of the database to read from.

@required Database: EnclosedInStringProperty ///

The name of the table in the database to read from.

@required Table: EnclosedInStringProperty } ///

Specifies a target that uses Postgres SQL.

structure PostgreSQLCatalogTarget { ///

The name of the data target.

@required Name: NodeName ///

The nodes that are inputs to the data target.

@required Inputs: OneInput ///

The name of the database to write to.

@required Database: EnclosedInStringProperty ///

The name of the table in the database to write to.

@required Table: EnclosedInStringProperty } ///

A job run that was used in the predicate of a conditional trigger /// that triggered this job run.

structure Predecessor { ///

The name of the job definition used by the predecessor job run.

JobName: NameString ///

The job-run ID of the predecessor job run.

RunId: IdString } ///

Defines the predicate of the trigger, which determines when it fires.

structure Predicate { ///

An optional field if only one condition is listed. If multiple conditions are listed, then /// this field is required.

Logical: Logical ///

A list of the conditions that determine when the trigger will fire.

Conditions: ConditionList } ///

Permissions granted to a principal.

structure PrincipalPermissions { ///

The principal who is granted permissions.

Principal: DataLakePrincipal ///

The permissions that are granted to the principal.

Permissions: PermissionList } ///

Defines a property predicate.

structure PropertyPredicate { ///

The key of the property.

Key: ValueString ///

The value of the property.

Value: ValueString ///

The comparator used to compare this property to others.

Comparator: Comparator } @input structure PutDataCatalogEncryptionSettingsRequest { ///

The ID of the Data Catalog to set the security configuration for. If none is provided, the /// Amazon Web Services account ID is used by default.

CatalogId: CatalogIdString ///

The security configuration to set.

@required DataCatalogEncryptionSettings: DataCatalogEncryptionSettings } @output structure PutDataCatalogEncryptionSettingsResponse {} @input structure PutResourcePolicyRequest { ///

Contains the policy document to set, in JSON format.

@required PolicyInJson: PolicyJsonString ///

Do not use. For internal use only.

ResourceArn: GlueResourceArn ///

The hash value returned when the previous policy was set using /// PutResourcePolicy. Its purpose is to prevent concurrent modifications of a /// policy. Do not use this parameter if no previous policy has been set.

PolicyHashCondition: HashString ///

A value of MUST_EXIST is used to update a policy. A value of /// NOT_EXIST is used to create a new policy. If a value of NONE or a /// null value is used, the call does not depend on the existence of a policy.

PolicyExistsCondition: ExistCondition ///

If 'TRUE', indicates that you are using both methods to grant cross-account /// access to Data Catalog resources:

///
    ///
  • ///

    By directly updating the resource policy with PutResourePolicy ///

    ///
  • ///
  • ///

    By using the Grant permissions command on the Amazon Web Services Management Console.

    ///
  • ///
///

Must be set to 'TRUE' if you have already used the Management Console to /// grant cross-account access, otherwise the call fails. Default is 'FALSE'.

EnableHybrid: EnableHybridValues } @output structure PutResourcePolicyResponse { ///

A hash of the policy that has just been set. This must /// be included in a subsequent call that overwrites or updates /// this policy.

PolicyHash: HashString } @output structure PutSchemaVersionMetadataResponse { ///

The Amazon Resource Name (ARN) for the schema.

SchemaArn: GlueResourceArn ///

The name for the schema.

SchemaName: SchemaRegistryNameString ///

The name for the registry.

RegistryName: SchemaRegistryNameString ///

The latest version of the schema.

LatestVersion: LatestSchemaVersionBoolean = false ///

The version number of the schema.

VersionNumber: VersionLongNumber = 0 ///

The unique version ID of the schema version.

SchemaVersionId: SchemaVersionIdString ///

The metadata key.

MetadataKey: MetadataKeyString ///

The value of the metadata key.

MetadataValue: MetadataValueString } @input structure PutWorkflowRunPropertiesRequest { ///

Name of the workflow which was run.

@required Name: NameString ///

The ID of the workflow run for which the run properties should be updated.

@required RunId: IdString ///

The properties to put for the specified run.

@required RunProperties: WorkflowRunProperties } @output structure PutWorkflowRunPropertiesResponse {} @output structure QuerySchemaVersionMetadataResponse { ///

A map of a metadata key and associated values.

MetadataInfoMap: MetadataInfoMap ///

The unique version ID of the schema version.

SchemaVersionId: SchemaVersionIdString ///

A continuation token for paginating the returned list of tokens, returned if the current segment of the list is not the last.

NextToken: SchemaRegistryTokenString } ///

A Glue Studio node that uses a Glue DataBrew recipe in Glue jobs.

structure Recipe { ///

The name of the Glue Studio node.

@required Name: NodeName ///

The nodes that are inputs to the recipe node, identified by id.

@required Inputs: OneInput ///

A reference to the DataBrew recipe used by the node.

@required RecipeReference: RecipeReference } ///

A reference to a Glue DataBrew recipe.

structure RecipeReference { ///

The ARN of the DataBrew recipe.

@required RecipeArn: EnclosedInStringProperty ///

The RecipeVersion of the DataBrew recipe.

@required RecipeVersion: RecipeVersion } ///

When crawling an Amazon S3 data source after the first crawl is complete, specifies whether to crawl the entire dataset again or to crawl only folders that were added since the last crawler run. For more information, see Incremental Crawls in Glue in the developer guide.

structure RecrawlPolicy { ///

Specifies whether to crawl the entire dataset again or to crawl only folders that were added since the last crawler run.

///

A value of CRAWL_EVERYTHING specifies crawling the entire dataset again.

///

A value of CRAWL_NEW_FOLDERS_ONLY specifies crawling only folders that were added since the last crawler run.

///

A value of CRAWL_EVENT_MODE specifies crawling only the changes identified by Amazon S3 events.

RecrawlBehavior: RecrawlBehavior } ///

Specifies an Amazon Redshift data store.

structure RedshiftSource { ///

The name of the Amazon Redshift data store.

@required Name: NodeName ///

The database to read from.

@required Database: EnclosedInStringProperty ///

The database table to read from.

@required Table: EnclosedInStringProperty ///

The Amazon S3 path where temporary data can be staged when copying out of the database.

RedshiftTmpDir: EnclosedInStringProperty ///

The IAM role with permissions.

TmpDirIAMRole: EnclosedInStringProperty } ///

Specifies a target that uses Amazon Redshift.

structure RedshiftTarget { ///

The name of the data target.

@required Name: NodeName ///

The nodes that are inputs to the data target.

@required Inputs: OneInput ///

The name of the database to write to.

@required Database: EnclosedInStringProperty ///

The name of the table in the database to write to.

@required Table: EnclosedInStringProperty ///

The Amazon S3 path where temporary data can be staged when copying out of the database.

RedshiftTmpDir: EnclosedInStringProperty ///

The IAM role with permissions.

TmpDirIAMRole: EnclosedInStringProperty ///

The set of options to configure an upsert operation when writing to a Redshift target.

UpsertRedshiftOptions: UpsertRedshiftTargetOptions } @output structure RegisterSchemaVersionResponse { ///

The unique ID that represents the version of this schema.

SchemaVersionId: SchemaVersionIdString ///

The version of this schema (for sync flow only, in case this is the first version).

VersionNumber: VersionLongNumber = 0 ///

The status of the schema version.

Status: SchemaVersionStatus } ///

A wrapper structure that may contain the registry name and Amazon Resource Name (ARN).

structure RegistryId { ///

Name of the registry. Used only for lookup. One of RegistryArn or RegistryName has to be provided.

RegistryName: SchemaRegistryNameString ///

Arn of the registry to be updated. One of RegistryArn or RegistryName has to be provided.

RegistryArn: GlueResourceArn } ///

A structure containing the details for a registry.

structure RegistryListItem { ///

The name of the registry.

RegistryName: SchemaRegistryNameString ///

The Amazon Resource Name (ARN) of the registry.

RegistryArn: GlueResourceArn ///

A description of the registry.

Description: DescriptionString ///

The status of the registry.

Status: RegistryStatus ///

The data the registry was created.

CreatedTime: CreatedTimestamp ///

The date the registry was updated.

UpdatedTime: UpdatedTimestamp } ///

Specifies a Relational database data source in the Glue Data Catalog.

structure RelationalCatalogSource { ///

The name of the data source.

@required Name: NodeName ///

The name of the database to read from.

@required Database: EnclosedInStringProperty ///

The name of the table in the database to read from.

@required Table: EnclosedInStringProperty } @output structure RemoveSchemaVersionMetadataResponse { ///

The Amazon Resource Name (ARN) of the schema.

SchemaArn: GlueResourceArn ///

The name of the schema.

SchemaName: SchemaRegistryNameString ///

The name of the registry.

RegistryName: SchemaRegistryNameString ///

The latest version of the schema.

LatestVersion: LatestSchemaVersionBoolean = false ///

The version number of the schema.

VersionNumber: VersionLongNumber = 0 ///

The version ID for the schema version.

SchemaVersionId: SchemaVersionIdString ///

The metadata key.

MetadataKey: MetadataKeyString ///

The value of the metadata key.

MetadataValue: MetadataValueString } ///

Specifies a transform that renames a single data property key.

structure RenameField { ///

The name of the transform node.

@required Name: NodeName ///

The data inputs identified by their node names.

@required Inputs: OneInput ///

A JSON path to a variable in the data structure for the source data.

@required SourcePath: EnclosedInStringProperties ///

A JSON path to a variable in the data structure for the target data.

@required TargetPath: EnclosedInStringProperties } @input structure ResetJobBookmarkRequest { ///

The name of the job in question.

@required JobName: JobName ///

The unique run identifier associated with this job run.

RunId: RunId } @output structure ResetJobBookmarkResponse { ///

The reset bookmark entry.

JobBookmarkEntry: JobBookmarkEntry } ///

A resource was not ready for a transaction.

@error("client") structure ResourceNotReadyException { ///

A message describing the problem.

Message: MessageString } ///

A resource numerical limit was exceeded.

@error("client") structure ResourceNumberLimitExceededException { ///

A message describing the problem.

Message: MessageString } ///

The URIs for function resources.

structure ResourceUri { ///

The type of the resource.

ResourceType: ResourceType ///

The URI for accessing the resource.

Uri: URI } @input structure ResumeWorkflowRunRequest { ///

The name of the workflow to resume.

@required Name: NameString ///

The ID of the workflow run to resume.

@required RunId: IdString ///

A list of the node IDs for the nodes you want to restart. The nodes that are to be restarted must have a run attempt in the original run.

@required NodeIds: NodeIdList } @output structure ResumeWorkflowRunResponse { ///

The new ID assigned to the resumed workflow run. Each resume of a workflow run will have a new run ID.

RunId: IdString ///

A list of the node IDs for the nodes that were actually restarted.

NodeIds: NodeIdList } @input structure RunStatementRequest { ///

The Session Id of the statement to be run.

@required SessionId: NameString ///

The statement code to be run.

@required Code: OrchestrationStatementCodeString ///

The origin of the request.

RequestOrigin: OrchestrationNameString } @output structure RunStatementResponse { ///

Returns the Id of the statement that was run.

Id: IntegerValue = 0 } ///

Specifies a Delta Lake data source that is registered in the Glue Data Catalog. The data source must be stored in Amazon S3.

structure S3CatalogDeltaSource { ///

The name of the Delta Lake data source.

@required Name: NodeName ///

The name of the database to read from.

@required Database: EnclosedInStringProperty ///

The name of the table in the database to read from.

@required Table: EnclosedInStringProperty ///

Specifies additional connection options.

AdditionalDeltaOptions: AdditionalOptions ///

Specifies the data schema for the Delta Lake source.

OutputSchemas: GlueSchemas } ///

Specifies a Hudi data source that is registered in the Glue Data Catalog. The Hudi data source must be stored in Amazon S3.

structure S3CatalogHudiSource { ///

The name of the Hudi data source.

@required Name: NodeName ///

The name of the database to read from.

@required Database: EnclosedInStringProperty ///

The name of the table in the database to read from.

@required Table: EnclosedInStringProperty ///

Specifies additional connection options.

AdditionalHudiOptions: AdditionalOptions ///

Specifies the data schema for the Hudi source.

OutputSchemas: GlueSchemas } ///

Specifies an Amazon S3 data store in the Glue Data Catalog.

structure S3CatalogSource { ///

The name of the data store.

@required Name: NodeName ///

The database to read from.

@required Database: EnclosedInStringProperty ///

The database table to read from.

@required Table: EnclosedInStringProperty ///

Partitions satisfying this predicate are deleted. Files within the retention period in these partitions are not deleted. Set to "" – empty by default.

PartitionPredicate: EnclosedInStringProperty ///

Specifies additional connection options.

AdditionalOptions: S3SourceAdditionalOptions } ///

Specifies a data target that writes to Amazon S3 using the Glue Data Catalog.

structure S3CatalogTarget { ///

The name of the data target.

@required Name: NodeName ///

The nodes that are inputs to the data target.

@required Inputs: OneInput ///

Specifies native partitioning using a sequence of keys.

PartitionKeys: GlueStudioPathList ///

The name of the table in the database to write to.

@required Table: EnclosedInStringProperty ///

The name of the database to write to.

@required Database: EnclosedInStringProperty ///

A policy that specifies update behavior for the crawler.

SchemaChangePolicy: CatalogSchemaChangePolicy } ///

Specifies a command-separated value (CSV) data store stored in Amazon S3.

structure S3CsvSource { ///

The name of the data store.

@required Name: NodeName ///

A list of the Amazon S3 paths to read from.

@required Paths: EnclosedInStringProperties ///

Specifies how the data is compressed. This is generally not necessary if the data has a standard file extension. Possible values are "gzip" and "bzip").

CompressionType: CompressionType ///

A string containing a JSON list of Unix-style glob patterns to exclude. For example, "[\"**.pdf\"]" excludes all PDF files.

Exclusions: EnclosedInStringProperties ///

The target group size in bytes. The default is computed based on the input data size and the size of your cluster. When there are fewer than 50,000 input files, "groupFiles" must be set to "inPartition" for this to take effect.

GroupSize: EnclosedInStringProperty ///

Grouping files is turned on by default when the input contains more than 50,000 files. To turn on grouping with fewer than 50,000 files, set this parameter to "inPartition". To disable grouping when there are more than 50,000 files, set this parameter to "none".

GroupFiles: EnclosedInStringProperty ///

If set to true, recursively reads files in all subdirectories under the specified paths.

Recurse: BoxedBoolean ///

This option controls the duration in milliseconds after which the s3 listing is likely to be consistent. Files with modification timestamps falling within the last maxBand milliseconds are tracked specially when using JobBookmarks to account for Amazon S3 eventual consistency. Most users don't need to set this option. The default is 900000 milliseconds, or 15 minutes.

MaxBand: BoxedNonNegativeInt ///

This option specifies the maximum number of files to save from the last maxBand seconds. If this number is exceeded, extra files are skipped and only processed in the next job run.

MaxFilesInBand: BoxedNonNegativeInt ///

Specifies additional connection options.

AdditionalOptions: S3DirectSourceAdditionalOptions ///

Specifies the delimiter character. The default is a comma: ",", but any other character can be specified.

@required Separator: Separator ///

Specifies a character to use for escaping. This option is used only when reading CSV files. The default value is none. If enabled, the character which immediately follows is used as-is, except for a small set of well-known escapes (\n, \r, \t, and \0).

Escaper: EnclosedInStringPropertyWithQuote ///

Specifies the character to use for quoting. The default is a double quote: '"'. Set this to -1 to turn off quoting entirely.

@required QuoteChar: QuoteChar ///

A Boolean value that specifies whether a single record can span multiple lines. This can occur when a field contains a quoted new-line character. You must set this option to True if any record spans multiple lines. The default value is False, which allows for more aggressive file-splitting during parsing.

Multiline: BoxedBoolean ///

A Boolean value that specifies whether to treat the first line as a header. The default value is False.

WithHeader: BoxedBoolean ///

A Boolean value that specifies whether to write the header to output. The default value is True.

WriteHeader: BoxedBoolean ///

A Boolean value that specifies whether to skip the first data line. The default value is False.

SkipFirst: BoxedBoolean ///

A Boolean value that specifies whether to use the advanced SIMD CSV reader along with Apache Arrow based columnar memory formats. Only available in Glue version 3.0.

OptimizePerformance: BooleanValue = false ///

Specifies the data schema for the S3 CSV source.

OutputSchemas: GlueSchemas } ///

Specifies a target that writes to a Delta Lake data source in the Glue Data Catalog.

structure S3DeltaCatalogTarget { ///

The name of the data target.

@required Name: NodeName ///

The nodes that are inputs to the data target.

@required Inputs: OneInput ///

Specifies native partitioning using a sequence of keys.

PartitionKeys: GlueStudioPathList ///

The name of the table in the database to write to.

@required Table: EnclosedInStringProperty ///

The name of the database to write to.

@required Database: EnclosedInStringProperty ///

Specifies additional connection options for the connector.

AdditionalOptions: AdditionalOptions ///

A policy that specifies update behavior for the crawler.

SchemaChangePolicy: CatalogSchemaChangePolicy } ///

Specifies a target that writes to a Delta Lake data source in Amazon S3.

structure S3DeltaDirectTarget { ///

The name of the data target.

@required Name: NodeName ///

The nodes that are inputs to the data target.

@required Inputs: OneInput ///

Specifies native partitioning using a sequence of keys.

PartitionKeys: GlueStudioPathList ///

The Amazon S3 path of your Delta Lake data source to write to.

@required Path: EnclosedInStringProperty ///

Specifies how the data is compressed. This is generally not necessary if the data has a standard file extension. Possible values are "gzip" and "bzip").

@required Compression: DeltaTargetCompressionType ///

Specifies the data output format for the target.

@required Format: TargetFormat ///

Specifies additional connection options for the connector.

AdditionalOptions: AdditionalOptions ///

A policy that specifies update behavior for the crawler.

SchemaChangePolicy: DirectSchemaChangePolicy } ///

Specifies a Delta Lake data source stored in Amazon S3.

structure S3DeltaSource { ///

The name of the Delta Lake source.

@required Name: NodeName ///

A list of the Amazon S3 paths to read from.

@required Paths: EnclosedInStringProperties ///

Specifies additional connection options.

AdditionalDeltaOptions: AdditionalOptions ///

Specifies additional options for the connector.

AdditionalOptions: S3DirectSourceAdditionalOptions ///

Specifies the data schema for the Delta Lake source.

OutputSchemas: GlueSchemas } ///

Specifies additional connection options for the Amazon S3 data store.

structure S3DirectSourceAdditionalOptions { ///

Sets the upper limit for the target size of the dataset in bytes that will be processed.

BoundedSize: BoxedLong ///

Sets the upper limit for the target number of files that will be processed.

BoundedFiles: BoxedLong ///

Sets option to enable a sample path.

EnableSamplePath: BoxedBoolean ///

If enabled, specifies the sample path.

SamplePath: EnclosedInStringProperty } ///

Specifies a data target that writes to Amazon S3.

structure S3DirectTarget { ///

The name of the data target.

@required Name: NodeName ///

The nodes that are inputs to the data target.

@required Inputs: OneInput ///

Specifies native partitioning using a sequence of keys.

PartitionKeys: GlueStudioPathList ///

A single Amazon S3 path to write to.

@required Path: EnclosedInStringProperty ///

Specifies how the data is compressed. This is generally not necessary if the data has a standard file extension. Possible values are "gzip" and "bzip").

Compression: EnclosedInStringProperty ///

Specifies the data output format for the target.

@required Format: TargetFormat ///

A policy that specifies update behavior for the crawler.

SchemaChangePolicy: DirectSchemaChangePolicy } ///

Specifies how Amazon Simple Storage Service (Amazon S3) data should be encrypted.

structure S3Encryption { ///

The encryption mode to use for Amazon S3 data.

S3EncryptionMode: S3EncryptionMode ///

The Amazon Resource Name (ARN) of the KMS key to be used to encrypt the data.

KmsKeyArn: KmsKeyArn } ///

Specifies a data target that writes to Amazon S3 in Apache Parquet columnar storage.

structure S3GlueParquetTarget { ///

The name of the data target.

@required Name: NodeName ///

The nodes that are inputs to the data target.

@required Inputs: OneInput ///

Specifies native partitioning using a sequence of keys.

PartitionKeys: GlueStudioPathList ///

A single Amazon S3 path to write to.

@required Path: EnclosedInStringProperty ///

Specifies how the data is compressed. This is generally not necessary if the data has a standard file extension. Possible values are "gzip" and "bzip").

Compression: ParquetCompressionType ///

A policy that specifies update behavior for the crawler.

SchemaChangePolicy: DirectSchemaChangePolicy } ///

Specifies a target that writes to a Hudi data source in the Glue Data Catalog.

structure S3HudiCatalogTarget { ///

The name of the data target.

@required Name: NodeName ///

The nodes that are inputs to the data target.

@required Inputs: OneInput ///

Specifies native partitioning using a sequence of keys.

PartitionKeys: GlueStudioPathList ///

The name of the table in the database to write to.

@required Table: EnclosedInStringProperty ///

The name of the database to write to.

@required Database: EnclosedInStringProperty ///

Specifies additional connection options for the connector.

@required AdditionalOptions: AdditionalOptions ///

A policy that specifies update behavior for the crawler.

SchemaChangePolicy: CatalogSchemaChangePolicy } ///

Specifies a target that writes to a Hudi data source in Amazon S3.

structure S3HudiDirectTarget { ///

The name of the data target.

@required Name: NodeName ///

The nodes that are inputs to the data target.

@required Inputs: OneInput ///

The Amazon S3 path of your Hudi data source to write to.

@required Path: EnclosedInStringProperty ///

Specifies how the data is compressed. This is generally not necessary if the data has a standard file extension. Possible values are "gzip" and "bzip").

@required Compression: HudiTargetCompressionType ///

Specifies native partitioning using a sequence of keys.

PartitionKeys: GlueStudioPathList ///

Specifies the data output format for the target.

@required Format: TargetFormat ///

Specifies additional connection options for the connector.

@required AdditionalOptions: AdditionalOptions ///

A policy that specifies update behavior for the crawler.

SchemaChangePolicy: DirectSchemaChangePolicy } ///

Specifies a Hudi data source stored in Amazon S3.

structure S3HudiSource { ///

The name of the Hudi source.

@required Name: NodeName ///

A list of the Amazon S3 paths to read from.

@required Paths: EnclosedInStringProperties ///

Specifies additional connection options.

AdditionalHudiOptions: AdditionalOptions ///

Specifies additional options for the connector.

AdditionalOptions: S3DirectSourceAdditionalOptions ///

Specifies the data schema for the Hudi source.

OutputSchemas: GlueSchemas } ///

Specifies a JSON data store stored in Amazon S3.

structure S3JsonSource { ///

The name of the data store.

@required Name: NodeName ///

A list of the Amazon S3 paths to read from.

@required Paths: EnclosedInStringProperties ///

Specifies how the data is compressed. This is generally not necessary if the data has a standard file extension. Possible values are "gzip" and "bzip").

CompressionType: CompressionType ///

A string containing a JSON list of Unix-style glob patterns to exclude. For example, "[\"**.pdf\"]" excludes all PDF files.

Exclusions: EnclosedInStringProperties ///

The target group size in bytes. The default is computed based on the input data size and the size of your cluster. When there are fewer than 50,000 input files, "groupFiles" must be set to "inPartition" for this to take effect.

GroupSize: EnclosedInStringProperty ///

Grouping files is turned on by default when the input contains more than 50,000 files. To turn on grouping with fewer than 50,000 files, set this parameter to "inPartition". To disable grouping when there are more than 50,000 files, set this parameter to "none".

GroupFiles: EnclosedInStringProperty ///

If set to true, recursively reads files in all subdirectories under the specified paths.

Recurse: BoxedBoolean ///

This option controls the duration in milliseconds after which the s3 listing is likely to be consistent. Files with modification timestamps falling within the last maxBand milliseconds are tracked specially when using JobBookmarks to account for Amazon S3 eventual consistency. Most users don't need to set this option. The default is 900000 milliseconds, or 15 minutes.

MaxBand: BoxedNonNegativeInt ///

This option specifies the maximum number of files to save from the last maxBand seconds. If this number is exceeded, extra files are skipped and only processed in the next job run.

MaxFilesInBand: BoxedNonNegativeInt ///

Specifies additional connection options.

AdditionalOptions: S3DirectSourceAdditionalOptions ///

A JsonPath string defining the JSON data.

JsonPath: EnclosedInStringProperty ///

A Boolean value that specifies whether a single record can span multiple lines. This can occur when a field contains a quoted new-line character. You must set this option to True if any record spans multiple lines. The default value is False, which allows for more aggressive file-splitting during parsing.

Multiline: BoxedBoolean ///

Specifies the data schema for the S3 JSON source.

OutputSchemas: GlueSchemas } ///

Specifies an Apache Parquet data store stored in Amazon S3.

structure S3ParquetSource { ///

The name of the data store.

@required Name: NodeName ///

A list of the Amazon S3 paths to read from.

@required Paths: EnclosedInStringProperties ///

Specifies how the data is compressed. This is generally not necessary if the data has a standard file extension. Possible values are "gzip" and "bzip").

CompressionType: ParquetCompressionType ///

A string containing a JSON list of Unix-style glob patterns to exclude. For example, "[\"**.pdf\"]" excludes all PDF files.

Exclusions: EnclosedInStringProperties ///

The target group size in bytes. The default is computed based on the input data size and the size of your cluster. When there are fewer than 50,000 input files, "groupFiles" must be set to "inPartition" for this to take effect.

GroupSize: EnclosedInStringProperty ///

Grouping files is turned on by default when the input contains more than 50,000 files. To turn on grouping with fewer than 50,000 files, set this parameter to "inPartition". To disable grouping when there are more than 50,000 files, set this parameter to "none".

GroupFiles: EnclosedInStringProperty ///

If set to true, recursively reads files in all subdirectories under the specified paths.

Recurse: BoxedBoolean ///

This option controls the duration in milliseconds after which the s3 listing is likely to be consistent. Files with modification timestamps falling within the last maxBand milliseconds are tracked specially when using JobBookmarks to account for Amazon S3 eventual consistency. Most users don't need to set this option. The default is 900000 milliseconds, or 15 minutes.

MaxBand: BoxedNonNegativeInt ///

This option specifies the maximum number of files to save from the last maxBand seconds. If this number is exceeded, extra files are skipped and only processed in the next job run.

MaxFilesInBand: BoxedNonNegativeInt ///

Specifies additional connection options.

AdditionalOptions: S3DirectSourceAdditionalOptions ///

Specifies the data schema for the S3 Parquet source.

OutputSchemas: GlueSchemas } ///

Specifies additional connection options for the Amazon S3 data store.

structure S3SourceAdditionalOptions { ///

Sets the upper limit for the target size of the dataset in bytes that will be processed.

BoundedSize: BoxedLong ///

Sets the upper limit for the target number of files that will be processed.

BoundedFiles: BoxedLong } ///

Specifies a data store in Amazon Simple Storage Service (Amazon S3).

structure S3Target { ///

The path to the Amazon S3 target.

Path: Path ///

A list of glob patterns used to exclude from the crawl. /// For more information, see Catalog Tables with a Crawler.

Exclusions: PathList ///

The name of a connection which allows a job or crawler to access data in Amazon S3 within an Amazon Virtual Private Cloud environment (Amazon VPC).

ConnectionName: ConnectionName ///

Sets the number of files in each leaf folder to be crawled when crawling sample files in a dataset. If not set, all the files are crawled. A valid value is an integer between 1 and 249.

SampleSize: NullableInteger ///

A valid Amazon SQS ARN. For example, arn:aws:sqs:region:account:sqs.

EventQueueArn: EventQueueArn ///

A valid Amazon dead-letter SQS ARN. For example, arn:aws:sqs:region:account:deadLetterQueue.

DlqEventQueueArn: EventQueueArn } ///

A scheduling object using a cron statement to schedule an event.

structure Schedule { ///

A cron expression used to specify the schedule (see Time-Based Schedules for Jobs and Crawlers. For example, to run /// something every day at 12:15 UTC, you would specify: /// cron(15 12 * * ? *).

ScheduleExpression: CronExpression ///

The state of the schedule.

State: ScheduleState } ///

The specified scheduler is not running.

@error("client") structure SchedulerNotRunningException { ///

A message describing the problem.

Message: MessageString } ///

The specified scheduler is already running.

@error("client") structure SchedulerRunningException { ///

A message describing the problem.

Message: MessageString } ///

The specified scheduler is transitioning.

@error("client") structure SchedulerTransitioningException { ///

A message describing the problem.

Message: MessageString } ///

A policy that specifies update and deletion behaviors for the crawler.

structure SchemaChangePolicy { ///

The update behavior when the crawler finds a changed schema.

UpdateBehavior: UpdateBehavior ///

The deletion behavior when the crawler finds a deleted object.

DeleteBehavior: DeleteBehavior } ///

A key-value pair representing a column and data type that this transform can /// run against. The Schema parameter of the MLTransform may contain up to 100 of these structures.

structure SchemaColumn { ///

The name of the column.

Name: ColumnNameString ///

The type of data in the column.

DataType: ColumnTypeString } ///

The unique ID of the schema in the Glue schema registry.

structure SchemaId { ///

The Amazon Resource Name (ARN) of the schema. One of SchemaArn or SchemaName has to be provided.

SchemaArn: GlueResourceArn ///

The name of the schema. One of SchemaArn or SchemaName has to be provided.

SchemaName: SchemaRegistryNameString ///

The name of the schema registry that contains the schema.

RegistryName: SchemaRegistryNameString } ///

An object that contains minimal details for a schema.

structure SchemaListItem { ///

the name of the registry where the schema resides.

RegistryName: SchemaRegistryNameString ///

The name of the schema.

SchemaName: SchemaRegistryNameString ///

The Amazon Resource Name (ARN) for the schema.

SchemaArn: GlueResourceArn ///

A description for the schema.

Description: DescriptionString ///

The status of the schema.

SchemaStatus: SchemaStatus ///

The date and time that a schema was created.

CreatedTime: CreatedTimestamp ///

The date and time that a schema was updated.

UpdatedTime: UpdatedTimestamp } ///

An object that references a schema stored in the Glue Schema Registry.

structure SchemaReference { ///

A structure that contains schema identity fields. Either this or the SchemaVersionId has to be provided.

SchemaId: SchemaId ///

The unique ID assigned to a version of the schema. Either this or the SchemaId has to be provided.

SchemaVersionId: SchemaVersionIdString ///

The version number of the schema.

SchemaVersionNumber: VersionLongNumber = null } ///

An object that contains the error details for an operation on a schema version.

structure SchemaVersionErrorItem { ///

The version number of the schema.

VersionNumber: VersionLongNumber = 0 ///

The details of the error for the schema version.

ErrorDetails: ErrorDetails } ///

An object containing the details about a schema version.

structure SchemaVersionListItem { ///

The Amazon Resource Name (ARN) of the schema.

SchemaArn: GlueResourceArn ///

The unique identifier of the schema version.

SchemaVersionId: SchemaVersionIdString ///

The version number of the schema.

VersionNumber: VersionLongNumber = 0 ///

The status of the schema version.

Status: SchemaVersionStatus ///

The date and time the schema version was created.

CreatedTime: CreatedTimestamp } ///

A structure containing the schema version information.

structure SchemaVersionNumber { ///

The latest version available for the schema.

LatestVersion: LatestSchemaVersionBoolean = false ///

The version number of the schema.

VersionNumber: VersionLongNumber = 0 } @input structure SearchTablesRequest { ///

A unique identifier, consisting of /// account_id /// .

CatalogId: CatalogIdString ///

A continuation token, included if this is a continuation call.

NextToken: Token ///

A list of key-value pairs, and a comparator used to filter the search results. Returns all entities matching the predicate.

///

The Comparator member of the PropertyPredicate struct is used only for time fields, and can be omitted for other field types. Also, when comparing string values, such as when Key=Name, a fuzzy match algorithm is used. The Key field (for example, the value of the Name field) is split on certain punctuation characters, for example, -, :, #, etc. into tokens. Then each token is exact-match compared with the Value member of PropertyPredicate. For example, if Key=Name and Value=link, tables named customer-link and xx-link-yy are returned, but xxlinkyy is not returned.

Filters: SearchPropertyPredicates ///

A string used for a text search.

///

Specifying a value in quotes filters based on an exact match to the value.

SearchText: ValueString ///

A list of criteria for sorting the results by a field name, in an ascending or descending order.

SortCriteria: SortCriteria ///

The maximum number of tables to return in a single response.

MaxResults: PageSize ///

Allows you to specify that you want to search the tables shared with your account. The allowable values are FOREIGN or ALL.

///
    ///
  • ///

    If set to FOREIGN, will search the tables shared with your account.

    ///
  • ///
  • ///

    If set to ALL, will search the tables shared with your account, as well as the tables in yor local account.

    ///
  • ///
ResourceShareType: ResourceShareType } @output structure SearchTablesResponse { ///

A continuation token, present if the current list segment is not the last.

NextToken: Token ///

A list of the requested Table objects. The SearchTables response returns only the tables that you have access to.

TableList: TableList } ///

Specifies a security configuration.

structure SecurityConfiguration { ///

The name of the security configuration.

Name: NameString ///

The time at which this security configuration was created.

CreatedTimeStamp: TimestampValue ///

The encryption configuration associated with this security configuration.

EncryptionConfiguration: EncryptionConfiguration } ///

Defines a non-overlapping region of a table's partitions, allowing /// multiple requests to be run in parallel.

structure Segment { ///

The zero-based index number of the segment. For example, if the total number of segments /// is 4, SegmentNumber values range from 0 through 3.

@required SegmentNumber: NonNegativeInteger = 0 ///

The total number of segments.

@required TotalSegments: TotalSegmentsInteger = 0 } ///

Specifies a transform that chooses the data property keys that you want to keep.

structure SelectFields { ///

The name of the transform node.

@required Name: NodeName ///

The data inputs identified by their node names.

@required Inputs: OneInput ///

A JSON path to a variable in the data structure.

@required Paths: GlueStudioPathList } ///

Specifies a transform that chooses one DynamicFrame from a collection of DynamicFrames. The output is the selected DynamicFrame ///

structure SelectFromCollection { ///

The name of the transform node.

@required Name: NodeName ///

The data inputs identified by their node names.

@required Inputs: OneInput ///

The index for the DynamicFrame to be selected.

@required Index: NonNegativeInt = 0 } ///

Information about a serialization/deserialization program (SerDe) that serves as an /// extractor and loader.

structure SerDeInfo { ///

Name of the SerDe.

Name: NameString ///

Usually the class that implements the SerDe. An example is /// org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe.

SerializationLibrary: NameString ///

These key-value pairs define initialization parameters for the SerDe.

Parameters: ParametersMap } ///

The period in which a remote Spark runtime environment is running.

structure Session { ///

The ID of the session.

Id: NameString ///

The time and date when the session was created.

CreatedOn: TimestampValue ///

The session status.

Status: SessionStatus ///

The error message displayed during the session.

ErrorMessage: DescriptionString ///

The description of the session.

Description: DescriptionString ///

The name or Amazon Resource Name (ARN) of the IAM role associated with the Session.

Role: OrchestrationRoleArn ///

The command object.See SessionCommand.

Command: SessionCommand ///

A map array of key-value pairs. Max is 75 pairs.

DefaultArguments: OrchestrationArgumentsMap ///

The number of connections used for the session.

Connections: ConnectionsList ///

The code execution progress of the session.

Progress: DoubleValue = 0 ///

The number of Glue data processing units (DPUs) that can be allocated when the job runs. /// A DPU is a relative measure of processing power that consists of 4 vCPUs of compute capacity and 16 GB memory.

MaxCapacity: NullableDouble ///

The name of the SecurityConfiguration structure to be used with the session.

SecurityConfiguration: NameString ///

The Glue version determines the versions of Apache Spark and Python that Glue supports. /// The GlueVersion must be greater than 2.0.

GlueVersion: GlueVersionString ///

The number of workers of a defined WorkerType to use for the session.

NumberOfWorkers: NullableInteger ///

The type of predefined worker that is allocated when a session runs. Accepts a value of G.1X, G.2X, G.4X, or G.8X for Spark sessions. Accepts the value Z.2X for Ray sessions.

WorkerType: WorkerType ///

The date and time that this session is completed.

CompletedOn: TimestampValue ///

The total time the session ran for.

ExecutionTime: NullableDouble ///

The DPUs consumed by the session (formula: ExecutionTime * MaxCapacity).

DPUSeconds: NullableDouble ///

The number of minutes when idle before the session times out.

IdleTimeout: IdleTimeout } ///

The SessionCommand that runs the job.

structure SessionCommand { ///

Specifies the name of the SessionCommand. Can be 'glueetl' or 'gluestreaming'.

Name: NameString ///

Specifies the Python version. The Python version indicates the version supported for jobs of type Spark.

PythonVersion: PythonVersionString } ///

Specifies skewed values in a table. Skewed values are those that occur with very high /// frequency.

structure SkewedInfo { ///

A list of names of columns that contain skewed values.

SkewedColumnNames: NameStringList ///

A list of values that appear so frequently as to be considered /// skewed.

SkewedColumnValues: ColumnValueStringList ///

A mapping of skewed values to the columns that contain them.

SkewedColumnValueLocationMaps: LocationMap } ///

Specifies configuration for Snowflake nodes in Glue Studio.

structure SnowflakeNodeData { ///

Specifies how retrieved data is specified. Valid values: "table", /// "query".

SourceType: GenericLimitedString ///

Specifies a Glue Data Catalog Connection to a Snowflake endpoint.

Connection: Option ///

Specifies a Snowflake database schema for your node to use.

Schema: GenericString ///

Specifies a Snowflake table for your node to use.

Table: GenericString ///

Specifies a Snowflake database for your node to use.

Database: GenericString ///

Not currently used.

TempDir: EnclosedInStringProperty ///

Not currently used.

IamRole: Option ///

Specifies additional options passed to the Snowflake connector. If options are specified /// elsewhere in this node, this will take precedence.

AdditionalOptions: AdditionalOptions ///

A SQL string used to retrieve data with the query sourcetype.

SampleQuery: GenericString ///

A SQL string run before the Snowflake connector performs its standard actions.

PreAction: GenericString ///

A SQL string run after the Snowflake connector performs its standard actions.

PostAction: GenericString ///

Specifies what action to take when writing to a table with preexisting data. Valid values: /// append, merge, truncate, drop.

Action: GenericString ///

Used when Action is append. Specifies the resolution behavior when a row /// already exists. If true, preexisting rows will be updated. If false, those rows will be inserted.

Upsert: BooleanValue = false ///

Specifies a merge action. Valid values: simple, custom. If /// simple, merge behavior is defined by MergeWhenMatched and /// MergeWhenNotMatched. If custom, defined by MergeClause.

MergeAction: GenericLimitedString ///

Specifies how to resolve records that match preexisting data when merging. Valid values: /// update, delete.

MergeWhenMatched: GenericLimitedString ///

Specifies how to process records that do not match preexisting data when merging. Valid /// values: insert, none.

MergeWhenNotMatched: GenericLimitedString ///

A SQL statement that specifies a custom merge behavior.

MergeClause: GenericString ///

The name of a staging table used when performing merge or upsert append /// actions. Data is written to this table, then moved to table by a generated /// postaction.

StagingTable: GenericString ///

Specifies the columns combined to identify a record when detecting matches for merges and /// upserts. A list of structures with value, label and /// description keys. Each structure describes a column.

SelectedColumns: OptionList ///

Specifies whether automatic query pushdown is enabled. If pushdown /// is enabled, then when a query is run on Spark, if part of the query can be "pushed down" to /// the /// Snowflake server, it is pushed down. This improves performance of some queries.

AutoPushdown: BooleanValue = false ///

Manually defines the target schema for the node. A list of structures with value /// , label and description keys. Each structure defines a column.

TableSchema: OptionList } ///

Specifies a Snowflake data source.

structure SnowflakeSource { ///

The name of the Snowflake data source.

@required Name: NodeName ///

Configuration for the Snowflake data source.

@required Data: SnowflakeNodeData ///

Specifies user-defined schemas for your output data.

OutputSchemas: GlueSchemas } ///

Specifies a Snowflake target.

structure SnowflakeTarget { ///

The name of the Snowflake target.

@required Name: NodeName ///

Specifies the data of the Snowflake target node.

@required Data: SnowflakeNodeData ///

The nodes that are inputs to the data target.

Inputs: OneInput } ///

Specifies a field to sort by and a sort order.

structure SortCriterion { ///

The name of the field on which to sort.

FieldName: ValueString ///

An ascending or descending sort.

Sort: Sort } ///

The details for a source control configuration for a job, allowing synchronization of job artifacts to or from a remote repository.

structure SourceControlDetails { ///

The provider for the remote repository.

Provider: SourceControlProvider ///

The name of the remote repository that contains the job artifacts.

Repository: Generic512CharString ///

The owner of the remote repository that contains the job artifacts.

Owner: Generic512CharString ///

An optional branch in the remote repository.

Branch: Generic512CharString ///

An optional folder in the remote repository.

Folder: Generic512CharString ///

The last commit ID for a commit in the remote repository.

LastCommitId: Generic512CharString ///

The type of authentication, which can be an authentication token stored in Amazon Web Services Secrets Manager, or a personal access token.

AuthStrategy: SourceControlAuthStrategy ///

The value of an authorization token.

AuthToken: Generic512CharString } ///

Specifies a connector to an Apache Spark data source.

structure SparkConnectorSource { ///

The name of the data source.

@required Name: NodeName ///

The name of the connection that is associated with the connector.

@required ConnectionName: EnclosedInStringProperty ///

The name of a connector that assists with accessing the data store in Glue Studio.

@required ConnectorName: EnclosedInStringProperty ///

The type of connection, such as marketplace.spark or custom.spark, designating a connection to an Apache Spark data store.

@required ConnectionType: EnclosedInStringProperty ///

Additional connection options for the connector.

AdditionalOptions: AdditionalOptions ///

Specifies data schema for the custom spark source.

OutputSchemas: GlueSchemas } ///

Specifies a target that uses an Apache Spark connector.

structure SparkConnectorTarget { ///

The name of the data target.

@required Name: NodeName ///

The nodes that are inputs to the data target.

@required Inputs: OneInput ///

The name of a connection for an Apache Spark connector.

@required ConnectionName: EnclosedInStringProperty ///

The name of an Apache Spark connector.

@required ConnectorName: EnclosedInStringProperty ///

The type of connection, such as marketplace.spark or custom.spark, designating a connection to an Apache Spark data store.

@required ConnectionType: EnclosedInStringProperty ///

Additional connection options for the connector.

AdditionalOptions: AdditionalOptions ///

Specifies the data schema for the custom spark target.

OutputSchemas: GlueSchemas } ///

Specifies a transform where you enter a SQL query using Spark SQL syntax to transform the data. The output is a single DynamicFrame.

structure SparkSQL { ///

The name of the transform node.

@required Name: NodeName ///

The data inputs identified by their node names. You can associate a table name with each input node to use in the SQL query. The name you choose must meet the Spark SQL naming restrictions.

@required Inputs: ManyInputs ///

A SQL query that must use Spark SQL syntax and return a single data set.

@required SqlQuery: SqlQuery ///

A list of aliases. An alias allows you to specify what name to use in the SQL for a given input. For example, you have a datasource named "MyDataSource". If you specify From as MyDataSource, and Alias as SqlName, then in your SQL you can do:

///

/// select * /// from SqlName ///

///

and that gets data from MyDataSource.

@required SqlAliases: SqlAliases ///

Specifies the data schema for the SparkSQL transform.

OutputSchemas: GlueSchemas } ///

Specifies a transform that writes samples of the data to an Amazon S3 bucket.

structure Spigot { ///

The name of the transform node.

@required Name: NodeName ///

The data inputs identified by their node names.

@required Inputs: OneInput ///

A path in Amazon S3 where the transform will write a subset of records from the dataset to a JSON file in an Amazon S3 bucket.

@required Path: EnclosedInStringProperty ///

Specifies a number of records to write starting from the beginning of the dataset.

Topk: Topk ///

The probability (a decimal value with a maximum value of 1) of picking any given record. A value of 1 indicates that each row read from the dataset should be included in the sample output.

Prob: Prob } ///

Specifies a transform that splits data property keys into two DynamicFrames. The output is a collection of DynamicFrames: one with selected data property keys, and one with the remaining data property keys.

structure SplitFields { ///

The name of the transform node.

@required Name: NodeName ///

The data inputs identified by their node names.

@required Inputs: OneInput ///

A JSON path to a variable in the data structure.

@required Paths: GlueStudioPathList } ///

Represents a single entry in the list of values for SqlAliases.

structure SqlAlias { ///

A table, or a column in a table.

@required From: NodeId ///

A temporary name given to a table, or a column in a table.

@required Alias: EnclosedInStringPropertyWithQuote } @input structure StartBlueprintRunRequest { ///

The name of the blueprint.

@required BlueprintName: OrchestrationNameString ///

Specifies the parameters as a BlueprintParameters object.

Parameters: BlueprintParameters ///

Specifies the IAM role used to create the workflow.

@required RoleArn: OrchestrationIAMRoleArn } @output structure StartBlueprintRunResponse { ///

The run ID for this blueprint run.

RunId: IdString } @input structure StartCrawlerRequest { ///

Name of the crawler to start.

@required Name: NameString } @output structure StartCrawlerResponse {} @input structure StartCrawlerScheduleRequest { ///

Name of the crawler to schedule.

@required CrawlerName: NameString } @output structure StartCrawlerScheduleResponse {} @input structure StartDataQualityRuleRecommendationRunRequest { ///

The data source (Glue table) associated with this run.

@required DataSource: DataSource ///

An IAM role supplied to encrypt the results of the run.

@required Role: RoleString ///

The number of G.1X workers to be used in the run. The default is 5.

NumberOfWorkers: NullableInteger ///

The timeout for a run in minutes. This is the maximum time that a run can consume resources before it is terminated and enters TIMEOUT status. The default is 2,880 minutes (48 hours).

Timeout: Timeout ///

A name for the ruleset.

CreatedRulesetName: NameString ///

Used for idempotency and is recommended to be set to a random ID (such as a UUID) to avoid creating or starting multiple instances of the same resource.

ClientToken: HashString } @output structure StartDataQualityRuleRecommendationRunResponse { ///

The unique run identifier associated with this run.

RunId: HashString } @input structure StartDataQualityRulesetEvaluationRunRequest { ///

The data source (Glue table) associated with this run.

@required DataSource: DataSource ///

An IAM role supplied to encrypt the results of the run.

@required Role: RoleString ///

The number of G.1X workers to be used in the run. The default is 5.

NumberOfWorkers: NullableInteger ///

The timeout for a run in minutes. This is the maximum time that a run can consume resources before it is terminated and enters TIMEOUT status. The default is 2,880 minutes (48 hours).

Timeout: Timeout ///

Used for idempotency and is recommended to be set to a random ID (such as a UUID) to avoid creating or starting multiple instances of the same resource.

ClientToken: HashString ///

Additional run options you can specify for an evaluation run.

AdditionalRunOptions: DataQualityEvaluationRunAdditionalRunOptions ///

A list of ruleset names.

@required RulesetNames: RulesetNames ///

A map of reference strings to additional data sources you can specify for an evaluation run.

AdditionalDataSources: DataSourceMap } @output structure StartDataQualityRulesetEvaluationRunResponse { ///

The unique run identifier associated with this run.

RunId: HashString } @input structure StartExportLabelsTaskRunRequest { ///

The unique identifier of the machine learning transform.

@required TransformId: HashString ///

The Amazon S3 path where you export the labels.

@required OutputS3Path: UriString } @output structure StartExportLabelsTaskRunResponse { ///

The unique identifier for the task run.

TaskRunId: HashString } @input structure StartImportLabelsTaskRunRequest { ///

The unique identifier of the machine learning transform.

@required TransformId: HashString ///

The Amazon Simple Storage Service (Amazon S3) path from where you import the /// labels.

@required InputS3Path: UriString ///

Indicates whether to overwrite your existing labels.

ReplaceAllLabels: ReplaceBoolean = false } @output structure StartImportLabelsTaskRunResponse { ///

The unique identifier for the task run.

TaskRunId: HashString } ///

The batch condition that started the workflow run. Either the number of events in the batch size arrived, /// in which case the BatchSize member is non-zero, or the batch window expired, in which case the BatchWindow /// member is non-zero.

structure StartingEventBatchCondition { ///

Number of events in the batch.

BatchSize: NullableInteger ///

Duration of the batch window in seconds.

BatchWindow: NullableInteger } @input structure StartJobRunRequest { ///

The name of the job definition to use.

@required JobName: NameString ///

The ID of a previous JobRun to retry.

JobRunId: IdString ///

The job arguments associated with this run. For this job run, they replace the default /// arguments set in the job definition itself.

///

You can specify arguments here that your own job-execution script /// consumes, as well as arguments that Glue itself consumes.

///

Job arguments may be logged. Do not pass plaintext secrets as arguments. Retrieve secrets /// from a Glue Connection, Secrets Manager or other secret management /// mechanism if you intend to keep them within the Job.

///

For information about how to specify and consume your own Job arguments, see the Calling Glue APIs in Python topic in the developer guide.

///

For information about the arguments you can provide to this field when configuring Spark jobs, /// see the Special Parameters Used by Glue topic in the developer guide.

///

For information about the arguments you can provide to this field when configuring Ray /// jobs, see Using /// job parameters in Ray jobs in the developer guide.

Arguments: GenericMap ///

This field is deprecated. Use MaxCapacity instead.

///

The number of Glue data processing units (DPUs) to allocate to this JobRun. /// You can allocate a minimum of 2 DPUs; the default is 10. A DPU is a relative measure /// of processing power that consists of 4 vCPUs of compute capacity and 16 GB of memory. /// For more information, see the Glue /// pricing page.

@deprecated( message: "This property is deprecated, use MaxCapacity instead." ) AllocatedCapacity: IntegerValue = 0 ///

The JobRun timeout in minutes. This is the maximum time that a job run can /// consume resources before it is terminated and enters TIMEOUT status. This value overrides the timeout value set in the parent job.

///

Streaming jobs do not have a timeout. The default for non-streaming jobs is 2,880 minutes (48 hours).

Timeout: Timeout ///

For Glue version 1.0 or earlier jobs, using the standard worker type, the number of /// Glue data processing units (DPUs) that can be allocated when this job runs. A DPU is /// a relative measure of processing power that consists of 4 vCPUs of compute capacity and 16 GB /// of memory. For more information, see the /// Glue pricing page.

///

For Glue version 2.0+ jobs, you cannot specify a Maximum capacity. /// Instead, you should specify a Worker type and the Number of workers.

///

Do not set MaxCapacity if using WorkerType and NumberOfWorkers.

///

The value that can be allocated for MaxCapacity depends on whether you are /// running a Python shell job, an Apache Spark ETL job, or an Apache Spark streaming ETL /// job:

///
    ///
  • ///

    When you specify a Python shell job (JobCommand.Name="pythonshell"), you can /// allocate either 0.0625 or 1 DPU. The default is 0.0625 DPU.

    ///
  • ///
  • ///

    When you specify an Apache Spark ETL job (JobCommand.Name="glueetl") or Apache /// Spark streaming ETL job (JobCommand.Name="gluestreaming"), you can allocate from 2 to 100 DPUs. /// The default is 10 DPUs. This job type cannot have a fractional DPU allocation.

    ///
  • ///
MaxCapacity: NullableDouble ///

The name of the SecurityConfiguration structure to be used with this job /// run.

SecurityConfiguration: NameString ///

Specifies configuration properties of a job run notification.

NotificationProperty: NotificationProperty ///

The type of predefined worker that is allocated when a job runs. Accepts a value of /// G.1X, G.2X, G.4X, G.8X or G.025X for Spark jobs. Accepts the value Z.2X for Ray jobs.

///
    ///
  • ///

    For the G.1X worker type, each worker maps to 1 DPU (4 vCPUs, 16 GB of memory) with 84GB disk (approximately 34GB free), and provides 1 executor per worker. We recommend this worker type for workloads such as data transforms, joins, and queries, to offers a scalable and cost effective way to run most jobs.

    ///
  • ///
  • ///

    For the G.2X worker type, each worker maps to 2 DPU (8 vCPUs, 32 GB of memory) with 128GB disk (approximately 77GB free), and provides 1 executor per worker. We recommend this worker type for workloads such as data transforms, joins, and queries, to offers a scalable and cost effective way to run most jobs.

    ///
  • ///
  • ///

    For the G.4X worker type, each worker maps to 4 DPU (16 vCPUs, 64 GB of memory) with 256GB disk (approximately 235GB free), and provides 1 executor per worker. We recommend this worker type for jobs whose workloads contain your most demanding transforms, aggregations, joins, and queries. This worker type is available only for Glue version 3.0 or later Spark ETL jobs in the following Amazon Web Services Regions: US East (Ohio), US East (N. Virginia), US West (Oregon), Asia Pacific (Singapore), Asia Pacific (Sydney), Asia Pacific (Tokyo), Canada (Central), Europe (Frankfurt), Europe (Ireland), and Europe (Stockholm).

    ///
  • ///
  • ///

    For the G.8X worker type, each worker maps to 8 DPU (32 vCPUs, 128 GB of memory) with 512GB disk (approximately 487GB free), and provides 1 executor per worker. We recommend this worker type for jobs whose workloads contain your most demanding transforms, aggregations, joins, and queries. This worker type is available only for Glue version 3.0 or later Spark ETL jobs, in the same Amazon Web Services Regions as supported for the G.4X worker type.

    ///
  • ///
  • ///

    For the G.025X worker type, each worker maps to 0.25 DPU (2 vCPUs, 4 GB of memory) with 84GB disk (approximately 34GB free), and provides 1 executor per worker. We recommend this worker type for low volume streaming jobs. This worker type is only available for Glue version 3.0 streaming jobs.

    ///
  • ///
  • ///

    For the Z.2X worker type, each worker maps to 2 M-DPU (8vCPUs, 64 GB of memory) with 128 GB disk (approximately 120GB free), and provides up to 8 Ray workers based on the autoscaler.

    ///
  • ///
WorkerType: WorkerType ///

The number of workers of a defined workerType that are allocated when a job runs.

NumberOfWorkers: NullableInteger ///

Indicates whether the job is run with a standard or flexible execution class. The standard execution-class is ideal for time-sensitive workloads that require fast job startup and dedicated resources.

///

The flexible execution class is appropriate for time-insensitive jobs whose start and completion times may vary.

///

Only jobs with Glue version 3.0 and above and command type glueetl will be allowed to set ExecutionClass to FLEX. The flexible execution class is available for Spark jobs.

ExecutionClass: ExecutionClass } @output structure StartJobRunResponse { ///

The ID assigned to this job run.

JobRunId: IdString } @input structure StartMLEvaluationTaskRunRequest { ///

The unique identifier of the machine learning transform.

@required TransformId: HashString } @output structure StartMLEvaluationTaskRunResponse { ///

The unique identifier associated with this run.

TaskRunId: HashString } @input structure StartMLLabelingSetGenerationTaskRunRequest { ///

The unique identifier of the machine learning transform.

@required TransformId: HashString ///

The Amazon Simple Storage Service (Amazon S3) path where you generate the labeling /// set.

@required OutputS3Path: UriString } @output structure StartMLLabelingSetGenerationTaskRunResponse { ///

The unique run identifier that is associated with this task run.

TaskRunId: HashString } @input structure StartTriggerRequest { ///

The name of the trigger to start.

@required Name: NameString } @output structure StartTriggerResponse { ///

The name of the trigger that was started.

Name: NameString } @input structure StartWorkflowRunRequest { ///

The name of the workflow to start.

@required Name: NameString ///

The workflow run properties for the new workflow run.

RunProperties: WorkflowRunProperties } @output structure StartWorkflowRunResponse { ///

An Id for the new run.

RunId: IdString } ///

The statement or request for a particular action to occur in a session.

structure Statement { ///

The ID of the statement.

Id: IntegerValue = 0 ///

The execution code of the statement.

Code: GenericString ///

The state while request is actioned.

State: StatementState ///

The output in JSON.

Output: StatementOutput ///

The code execution progress.

Progress: DoubleValue = 0 ///

The unix time and date that the job definition was started.

StartedOn: LongValue = 0 ///

The unix time and date that the job definition was completed.

CompletedOn: LongValue = 0 } ///

The code execution output in JSON format.

structure StatementOutput { ///

The code execution output.

Data: StatementOutputData ///

The execution count of the output.

ExecutionCount: IntegerValue = 0 ///

The status of the code execution output.

Status: StatementState ///

The name of the error in the output.

ErrorName: GenericString ///

The error value of the output.

ErrorValue: GenericString ///

The traceback of the output.

Traceback: OrchestrationStringList } ///

The code execution output in JSON format.

structure StatementOutputData { ///

The code execution output in text format.

TextPlain: GenericString } @input structure StopCrawlerRequest { ///

Name of the crawler to stop.

@required Name: NameString } @output structure StopCrawlerResponse {} @input structure StopCrawlerScheduleRequest { ///

Name of the crawler whose schedule state to set.

@required CrawlerName: NameString } @output structure StopCrawlerScheduleResponse {} @input structure StopSessionRequest { ///

The ID of the session to be stopped.

@required Id: NameString ///

The origin of the request.

RequestOrigin: OrchestrationNameString } @output structure StopSessionResponse { ///

Returns the Id of the stopped session.

Id: NameString } @input structure StopTriggerRequest { ///

The name of the trigger to stop.

@required Name: NameString } @output structure StopTriggerResponse { ///

The name of the trigger that was stopped.

Name: NameString } @input structure StopWorkflowRunRequest { ///

The name of the workflow to stop.

@required Name: NameString ///

The ID of the workflow run to stop.

@required RunId: IdString } @output structure StopWorkflowRunResponse {} ///

Describes the physical storage of table data.

structure StorageDescriptor { ///

A list of the Columns in the table.

Columns: ColumnList ///

The physical location of the table. By default, this takes the form of the warehouse /// location, followed by the database location in the warehouse, followed by the table /// name.

Location: LocationString ///

A list of locations that point to the path where a Delta table is located.

AdditionalLocations: LocationStringList ///

The input format: SequenceFileInputFormat (binary), /// or TextInputFormat, or a custom format.

InputFormat: FormatString ///

The output format: SequenceFileOutputFormat (binary), /// or IgnoreKeyTextOutputFormat, or a custom format.

OutputFormat: FormatString ///

/// True if the data in the table is compressed, or False if /// not.

Compressed: Boolean = false ///

Must be specified if the table contains any dimension columns.

NumberOfBuckets: Integer = 0 ///

The serialization/deserialization (SerDe) information.

SerdeInfo: SerDeInfo ///

A list of reducer grouping columns, clustering columns, and /// bucketing columns in the table.

BucketColumns: NameStringList ///

A list specifying the sort order of each bucket in the table.

SortColumns: OrderList ///

The user-supplied properties in key-value form.

Parameters: ParametersMap ///

The information about values that appear frequently in a column (skewed values).

SkewedInfo: SkewedInfo ///

/// True if the table data is stored in subdirectories, or False if /// not.

StoredAsSubDirectories: Boolean = false ///

An object that references a schema stored in the Glue Schema Registry.

///

When creating a table, you can pass an empty list of columns for the schema, and instead use a schema reference.

SchemaReference: SchemaReference } ///

Specifies options related to data preview for viewing a sample of your data.

structure StreamingDataPreviewOptions { ///

The polling time in milliseconds.

PollingTime: PollingTime ///

The limit to the number of records polled.

RecordPollingLimit: PositiveLong } ///

Defines column statistics supported for character sequence data values.

structure StringColumnStatisticsData { ///

The size of the longest string in the column.

@required MaximumLength: NonNegativeLong = 0 ///

The average string length in the column.

@required AverageLength: NonNegativeDouble = 0 ///

The number of null values in the column.

@required NumberOfNulls: NonNegativeLong = 0 ///

The number of distinct values in a column.

@required NumberOfDistinctValues: NonNegativeLong = 0 } ///

Represents a collection of related data organized in columns and rows.

structure Table { ///

The table name. For Hive compatibility, this must be entirely /// lowercase.

@required Name: NameString ///

The name of the database where the table metadata resides. /// For Hive compatibility, this must be all lowercase.

DatabaseName: NameString ///

A description of the table.

Description: DescriptionString ///

The owner of the table.

Owner: NameString ///

The time when the table definition was created in the Data Catalog.

CreateTime: Timestamp ///

The last time that the table was updated.

UpdateTime: Timestamp ///

The last time that the table was accessed. This is usually taken from HDFS, and might not /// be reliable.

LastAccessTime: Timestamp ///

The last time that column statistics were computed for this table.

LastAnalyzedTime: Timestamp ///

The retention time for this table.

Retention: NonNegativeInteger = 0 ///

A storage descriptor containing information about the physical storage /// of this table.

StorageDescriptor: StorageDescriptor ///

A list of columns by which the table is partitioned. Only primitive /// types are supported as partition keys.

///

When you create a table used by Amazon Athena, and you do not specify any /// partitionKeys, you must at least set the value of partitionKeys to /// an empty list. For example:

///

/// "PartitionKeys": [] ///

PartitionKeys: ColumnList ///

Included for Apache Hive compatibility. Not used in the normal course of Glue operations. /// If the table is a VIRTUAL_VIEW, certain Athena configuration encoded in base64.

ViewOriginalText: ViewTextString ///

Included for Apache Hive compatibility. Not used in the normal course of Glue operations.

ViewExpandedText: ViewTextString ///

The type of this table. /// Glue will create tables with the EXTERNAL_TABLE type. /// Other services, such as Athena, may create tables with additional table types. ///

///

Glue related table types:

///
///
EXTERNAL_TABLE
///
///

Hive compatible attribute - indicates a non-Hive managed table.

///
///
GOVERNED
///
///

Used by Lake Formation. /// The Glue Data Catalog understands GOVERNED.

///
///
TableType: TableTypeString ///

These key-value pairs define properties associated with the table.

Parameters: ParametersMap ///

The person or entity who created the table.

CreatedBy: NameString ///

Indicates whether the table has been registered with Lake Formation.

IsRegisteredWithLakeFormation: Boolean = false ///

A TableIdentifier structure that describes a target table for resource linking.

TargetTable: TableIdentifier ///

The ID of the Data Catalog in which the table resides.

CatalogId: CatalogIdString ///

The ID of the table version.

VersionId: VersionString ///

A FederatedTable structure that references an entity outside the Glue Data Catalog.

FederatedTable: FederatedTable } ///

An error record for table operations.

structure TableError { ///

The name of the table. For Hive compatibility, this must be entirely lowercase.

TableName: NameString ///

The details about the error.

ErrorDetail: ErrorDetail } ///

A structure that describes a target table for resource linking.

structure TableIdentifier { ///

The ID of the Data Catalog in which the table resides.

CatalogId: CatalogIdString ///

The name of the catalog database that contains the target table.

DatabaseName: NameString ///

The name of the target table.

Name: NameString ///

Region of the target table.

Region: NameString } ///

A structure used to define a table.

structure TableInput { ///

The table name. For Hive compatibility, this is folded to /// lowercase when it is stored.

@required Name: NameString ///

A description of the table.

Description: DescriptionString ///

The table owner. Included for Apache Hive compatibility. Not used in the normal course of Glue operations.

Owner: NameString ///

The last time that the table was accessed.

LastAccessTime: Timestamp ///

The last time that column statistics were computed for this table.

LastAnalyzedTime: Timestamp ///

The retention time for this table.

Retention: NonNegativeInteger = 0 ///

A storage descriptor containing information about the physical storage /// of this table.

StorageDescriptor: StorageDescriptor ///

A list of columns by which the table is partitioned. Only primitive /// types are supported as partition keys.

///

When you create a table used by Amazon Athena, and you do not specify any /// partitionKeys, you must at least set the value of partitionKeys to /// an empty list. For example:

///

/// "PartitionKeys": [] ///

PartitionKeys: ColumnList ///

Included for Apache Hive compatibility. Not used in the normal course of Glue operations. /// If the table is a VIRTUAL_VIEW, certain Athena configuration encoded in base64.

ViewOriginalText: ViewTextString ///

Included for Apache Hive compatibility. Not used in the normal course of Glue operations.

ViewExpandedText: ViewTextString ///

The type of this table. /// Glue will create tables with the EXTERNAL_TABLE type. /// Other services, such as Athena, may create tables with additional table types. ///

///

Glue related table types:

///
///
EXTERNAL_TABLE
///
///

Hive compatible attribute - indicates a non-Hive managed table.

///
///
GOVERNED
///
///

Used by Lake Formation. /// The Glue Data Catalog understands GOVERNED.

///
///
TableType: TableTypeString ///

These key-value pairs define properties associated with the table.

Parameters: ParametersMap ///

A TableIdentifier structure that describes a target table for resource linking.

TargetTable: TableIdentifier } ///

Specifies a version of a table.

structure TableVersion { ///

The table in question.

Table: Table ///

The ID value that identifies this table version. A VersionId is a string representation of an integer. Each version is incremented by 1.

VersionId: VersionString } ///

An error record for table-version operations.

structure TableVersionError { ///

The name of the table in question.

TableName: NameString ///

The ID value of the version in question. A VersionID is a string representation of an integer. Each version is incremented by 1.

VersionId: VersionString ///

The details about the error.

ErrorDetail: ErrorDetail } @input structure TagResourceRequest { ///

The ARN of the Glue resource to which to add the tags. For more /// information about Glue resource ARNs, see the Glue ARN string pattern.

@required ResourceArn: GlueResourceArn ///

Tags to add to this resource.

@required TagsToAdd: TagsMap } @output structure TagResourceResponse {} ///

The sampling parameters that are associated with the machine learning transform.

structure TaskRun { ///

The unique identifier for the transform.

TransformId: HashString ///

The unique identifier for this task run.

TaskRunId: HashString ///

The current status of the requested task run.

Status: TaskStatusType ///

The names of the log group for secure logging, associated with this task run.

LogGroupName: GenericString ///

Specifies configuration properties associated with this task run.

Properties: TaskRunProperties ///

The list of error strings associated with this task run.

ErrorString: GenericString ///

The date and time that this task run started.

StartedOn: Timestamp ///

The last point in time that the requested task run was updated.

LastModifiedOn: Timestamp ///

The last point in time that the requested task run was completed.

CompletedOn: Timestamp ///

The amount of time (in seconds) that the task run consumed resources.

ExecutionTime: ExecutionTime = 0 } ///

The criteria that are used to filter the task runs for the machine learning /// transform.

structure TaskRunFilterCriteria { ///

The type of task run.

TaskRunType: TaskType ///

The current status of the task run.

Status: TaskStatusType ///

Filter on task runs started before this date.

StartedBefore: Timestamp ///

Filter on task runs started after this date.

StartedAfter: Timestamp } ///

The configuration properties for the task run.

structure TaskRunProperties { ///

The type of task run.

TaskType: TaskType ///

The configuration properties for an importing labels task run.

ImportLabelsTaskRunProperties: ImportLabelsTaskRunProperties ///

The configuration properties for an exporting labels task run.

ExportLabelsTaskRunProperties: ExportLabelsTaskRunProperties ///

The configuration properties for a labeling set generation task run.

LabelingSetGenerationTaskRunProperties: LabelingSetGenerationTaskRunProperties ///

The configuration properties for a find matches task run.

FindMatchesTaskRunProperties: FindMatchesTaskRunProperties } ///

The sorting criteria that are used to sort the list of task runs for the machine learning /// transform.

structure TaskRunSortCriteria { ///

The column to be used to sort the list of task runs for the machine learning /// transform.

@required Column: TaskRunSortColumnType ///

The sort direction to be used to sort the list of task runs for the machine learning /// transform.

@required SortDirection: SortDirectionType } ///

Specifies the parameters in the config file of the dynamic transform.

structure TransformConfigParameter { ///

Specifies the name of the parameter in the config file of the dynamic transform.

@required Name: EnclosedInStringProperty ///

Specifies the parameter type in the config file of the dynamic transform.

@required Type: ParamType ///

Specifies the validation rule in the config file of the dynamic transform.

ValidationRule: EnclosedInStringProperty ///

Specifies the validation message in the config file of the dynamic transform.

ValidationMessage: EnclosedInStringProperty ///

Specifies the value of the parameter in the config file of the dynamic transform.

Value: EnclosedInStringProperties ///

Specifies the list type of the parameter in the config file of the dynamic transform.

ListType: ParamType ///

Specifies whether the parameter is optional or not in the config file of the dynamic transform.

IsOptional: BoxedBoolean } ///

The encryption-at-rest settings of the transform that apply to accessing user data. Machine learning transforms can access user data encrypted in Amazon S3 using KMS.

///

Additionally, imported labels and trained transforms can now be encrypted using a customer provided KMS key.

structure TransformEncryption { ///

An MLUserDataEncryption object containing the encryption mode and customer-provided KMS key ID.

MlUserDataEncryption: MLUserDataEncryption ///

The name of the security configuration.

TaskRunSecurityConfigurationName: NameString } ///

The criteria used to filter the machine learning transforms.

structure TransformFilterCriteria { ///

A unique transform name that is used to filter the machine learning transforms.

Name: NameString ///

The type of machine learning transform that is used to filter the machine learning /// transforms.

TransformType: TransformType ///

Filters the list of machine learning transforms by the last known status of the transforms (to indicate whether a transform can be used or not). One of "NOT_READY", "READY", or "DELETING".

Status: TransformStatusType ///

This value determines which version of Glue this machine learning transform is compatible with. Glue 1.0 is recommended for most customers. If the value is not set, the Glue compatibility defaults to Glue 0.9. For more information, see Glue Versions in the developer guide.

GlueVersion: GlueVersionString ///

The time and date before which the transforms were created.

CreatedBefore: Timestamp ///

The time and date after which the transforms were created.

CreatedAfter: Timestamp ///

Filter on transforms last modified before this date.

LastModifiedBefore: Timestamp ///

Filter on transforms last modified after this date.

LastModifiedAfter: Timestamp ///

Filters on datasets with a specific schema. The Map /// object is an array of key-value pairs representing the schema this transform accepts, where /// Column is the name of a column, and Type is the type of the data /// such as an integer or string. Has an upper bound of 100 columns.

Schema: TransformSchema } ///

The algorithm-specific parameters that are associated with the machine learning /// transform.

structure TransformParameters { ///

The type of machine learning transform.

///

For information about the types of machine learning transforms, see Creating Machine Learning Transforms.

@required TransformType: TransformType ///

The parameters for the find matches algorithm.

FindMatchesParameters: FindMatchesParameters } ///

The sorting criteria that are associated with the machine learning transform.

structure TransformSortCriteria { ///

The column to be used in the sorting criteria that are associated with the machine /// learning transform.

@required Column: TransformSortColumnType ///

The sort direction to be used in the sorting criteria that are associated with the machine /// learning transform.

@required SortDirection: SortDirectionType } ///

Information about a specific trigger.

structure Trigger { ///

The name of the trigger.

Name: NameString ///

The name of the workflow associated with the trigger.

WorkflowName: NameString ///

Reserved for future use.

Id: IdString ///

The type of trigger that this is.

Type: TriggerType ///

The current state of the trigger.

State: TriggerState ///

A description of this trigger.

Description: DescriptionString ///

A cron expression used to specify the schedule (see Time-Based /// Schedules for Jobs and Crawlers. For example, to run /// something every day at 12:15 UTC, you would specify: /// cron(15 12 * * ? *).

Schedule: GenericString ///

The actions initiated by this trigger.

Actions: ActionList ///

The predicate of this trigger, which defines when it will fire.

Predicate: Predicate ///

Batch condition that must be met (specified number of events received or batch time window expired) /// before EventBridge event trigger fires.

EventBatchingCondition: EventBatchingCondition } ///

The details of a Trigger node present in the workflow.

structure TriggerNodeDetails { ///

The information of the trigger represented by the trigger node.

Trigger: Trigger } ///

A structure used to provide information used to update a trigger. This object updates the /// previous trigger definition by overwriting it completely.

structure TriggerUpdate { ///

Reserved for future use.

Name: NameString ///

A description of this trigger.

Description: DescriptionString ///

A cron expression used to specify the schedule (see Time-Based /// Schedules for Jobs and Crawlers. For example, to run /// something every day at 12:15 UTC, you would specify: /// cron(15 12 * * ? *).

Schedule: GenericString ///

The actions initiated by this trigger.

Actions: ActionList ///

The predicate of this trigger, which defines when it will fire.

Predicate: Predicate ///

Batch condition that must be met (specified number of events received or batch time window expired) /// before EventBridge event trigger fires.

EventBatchingCondition: EventBatchingCondition } ///

A partition that contains unfiltered metadata.

structure UnfilteredPartition { ///

The partition object.

Partition: Partition ///

The list of columns the user has permissions to access.

AuthorizedColumns: NameStringList ///

A Boolean value indicating that the partition location is registered with Lake Formation.

IsRegisteredWithLakeFormation: Boolean = false } ///

Specifies a transform that combines the rows from two or more datasets into a single result.

structure Union { ///

The name of the transform node.

@required Name: NodeName ///

The node ID inputs to the transform.

@required Inputs: TwoInputs ///

Indicates the type of Union transform.

///

Specify ALL to join all rows from data sources to the resulting DynamicFrame. The resulting union does not remove duplicate rows.

///

Specify DISTINCT to remove duplicate rows in the resulting DynamicFrame.

@required UnionType: UnionType } @input structure UntagResourceRequest { ///

The Amazon Resource Name (ARN) of the resource from which to remove the tags.

@required ResourceArn: GlueResourceArn ///

Tags to remove from this resource.

@required TagsToRemove: TagKeysList } @output structure UntagResourceResponse {} @input structure UpdateBlueprintRequest { ///

The name of the blueprint.

@required Name: OrchestrationNameString ///

A description of the blueprint.

Description: Generic512CharString ///

Specifies a path in Amazon S3 where the blueprint is published.

@required BlueprintLocation: OrchestrationS3Location } @output structure UpdateBlueprintResponse { ///

Returns the name of the blueprint that was updated.

Name: NameString } @input structure UpdateClassifierRequest { ///

A GrokClassifier object with updated fields.

GrokClassifier: UpdateGrokClassifierRequest ///

An XMLClassifier object with updated fields.

XMLClassifier: UpdateXMLClassifierRequest ///

A JsonClassifier object with updated fields.

JsonClassifier: UpdateJsonClassifierRequest ///

A CsvClassifier object with updated fields.

CsvClassifier: UpdateCsvClassifierRequest } @output structure UpdateClassifierResponse {} @input structure UpdateColumnStatisticsForPartitionRequest { ///

The ID of the Data Catalog where the partitions in question reside. /// If none is supplied, the Amazon Web Services account ID is used by default.

CatalogId: CatalogIdString ///

The name of the catalog database where the partitions reside.

@required DatabaseName: NameString ///

The name of the partitions' table.

@required TableName: NameString ///

A list of partition values identifying the partition.

@required PartitionValues: ValueStringList ///

A list of the column statistics.

@required ColumnStatisticsList: UpdateColumnStatisticsList } @output structure UpdateColumnStatisticsForPartitionResponse { ///

Error occurred during updating column statistics data.

Errors: ColumnStatisticsErrors } @input structure UpdateColumnStatisticsForTableRequest { ///

The ID of the Data Catalog where the partitions in question reside. /// If none is supplied, the Amazon Web Services account ID is used by default.

CatalogId: CatalogIdString ///

The name of the catalog database where the partitions reside.

@required DatabaseName: NameString ///

The name of the partitions' table.

@required TableName: NameString ///

A list of the column statistics.

@required ColumnStatisticsList: UpdateColumnStatisticsList } @output structure UpdateColumnStatisticsForTableResponse { ///

List of ColumnStatisticsErrors.

Errors: ColumnStatisticsErrors } @input structure UpdateConnectionRequest { ///

The ID of the Data Catalog in which the connection resides. If none is provided, the Amazon Web Services /// account ID is used by default.

CatalogId: CatalogIdString ///

The name of the connection definition to update.

@required Name: NameString ///

A ConnectionInput object that redefines the connection /// in question.

@required ConnectionInput: ConnectionInput } @output structure UpdateConnectionResponse {} @input structure UpdateCrawlerRequest { ///

Name of the new crawler.

@required Name: NameString ///

The IAM role or Amazon Resource Name (ARN) of an IAM role that is used by the new crawler /// to access customer resources.

Role: Role ///

The Glue database where results are stored, such as: /// arn:aws:daylight:us-east-1::database/sometable/*.

DatabaseName: DatabaseName ///

A description of the new crawler.

Description: DescriptionStringRemovable ///

A list of targets to crawl.

Targets: CrawlerTargets ///

A cron expression used to specify the schedule (see Time-Based Schedules for Jobs and Crawlers. For example, to run /// something every day at 12:15 UTC, you would specify: /// cron(15 12 * * ? *).

Schedule: CronExpression ///

A list of custom classifiers that the user /// has registered. By default, all built-in classifiers are included in a crawl, /// but these custom classifiers always override the default classifiers /// for a given classification.

Classifiers: ClassifierNameList ///

The table prefix used for catalog tables that are created.

TablePrefix: TablePrefix ///

The policy for the crawler's update and deletion behavior.

SchemaChangePolicy: SchemaChangePolicy ///

A policy that specifies whether to crawl the entire dataset again, or to crawl only folders that were added since the last crawler run.

RecrawlPolicy: RecrawlPolicy ///

Specifies data lineage configuration settings for the crawler.

LineageConfiguration: LineageConfiguration ///

Specifies Lake Formation configuration settings for the crawler.

LakeFormationConfiguration: LakeFormationConfiguration ///

Crawler configuration information. This versioned JSON string allows users /// to specify aspects of a crawler's behavior. /// For more information, see Setting crawler configuration options.

Configuration: CrawlerConfiguration ///

The name of the SecurityConfiguration structure to be used by this /// crawler.

CrawlerSecurityConfiguration: CrawlerSecurityConfiguration } @output structure UpdateCrawlerResponse {} @input structure UpdateCrawlerScheduleRequest { ///

The name of the crawler whose schedule to update.

@required CrawlerName: NameString ///

The updated cron expression used to specify the schedule (see Time-Based Schedules for Jobs and Crawlers. For example, to run /// something every day at 12:15 UTC, you would specify: /// cron(15 12 * * ? *).

Schedule: CronExpression } @output structure UpdateCrawlerScheduleResponse {} ///

Specifies a custom CSV classifier to be updated.

structure UpdateCsvClassifierRequest { ///

The name of the classifier.

@required Name: NameString ///

A custom symbol to denote what separates each column entry in the row.

Delimiter: CsvColumnDelimiter ///

A custom symbol to denote what combines content into a single column value. It must be /// different from the column delimiter.

QuoteSymbol: CsvQuoteSymbol ///

Indicates whether the CSV file contains a header.

ContainsHeader: CsvHeaderOption ///

A list of strings representing column names.

Header: CsvHeader ///

Specifies not to trim values before identifying the type of column values. The default value is true.

DisableValueTrimming: NullableBoolean ///

Enables the processing of files that contain only one column.

AllowSingleColumn: NullableBoolean ///

Specifies the configuration of custom datatypes.

CustomDatatypeConfigured: NullableBoolean ///

Specifies a list of supported custom datatypes.

CustomDatatypes: CustomDatatypes ///

Sets the SerDe for processing CSV in the classifier, which will be applied in the Data Catalog. Valid values are OpenCSVSerDe, LazySimpleSerDe, and None. You can specify the None value when you want the crawler to do the detection.

Serde: CsvSerdeOption } @input structure UpdateDatabaseRequest { ///

The ID of the Data Catalog in which the metadata database resides. If none is provided, /// the Amazon Web Services account ID is used by default.

CatalogId: CatalogIdString ///

The name of the database to update in the catalog. For Hive /// compatibility, this is folded to lowercase.

@required Name: NameString ///

A DatabaseInput object specifying the new definition /// of the metadata database in the catalog.

@required DatabaseInput: DatabaseInput } @output structure UpdateDatabaseResponse {} @input structure UpdateDataQualityRulesetRequest { ///

The name of the data quality ruleset.

@required Name: NameString ///

A description of the ruleset.

Description: DescriptionString ///

A Data Quality Definition Language (DQDL) ruleset. For more information, see the Glue developer guide.

Ruleset: DataQualityRulesetString } @output structure UpdateDataQualityRulesetResponse { ///

The name of the data quality ruleset.

Name: NameString ///

A description of the ruleset.

Description: DescriptionString ///

A Data Quality Definition Language (DQDL) ruleset. For more information, see the Glue developer guide.

Ruleset: DataQualityRulesetString } @input structure UpdateDevEndpointRequest { ///

The name of the DevEndpoint to be updated.

@required EndpointName: GenericString ///

The public key for the DevEndpoint to use.

PublicKey: GenericString ///

The list of public keys for the DevEndpoint to use.

AddPublicKeys: PublicKeysList ///

The list of public keys to be deleted from the DevEndpoint.

DeletePublicKeys: PublicKeysList ///

Custom Python or Java libraries to be loaded in the DevEndpoint.

CustomLibraries: DevEndpointCustomLibraries ///

/// True if the list of custom libraries to be loaded in the development endpoint /// needs to be updated, or False if otherwise.

UpdateEtlLibraries: BooleanValue = false ///

The list of argument keys to be deleted from the map of arguments used to configure the /// DevEndpoint.

DeleteArguments: StringList ///

The map of arguments to add the map of arguments used to configure the /// DevEndpoint.

///

Valid arguments are:

///
    ///
  • ///

    /// "--enable-glue-datacatalog": "" ///

    ///
  • ///
///

You can specify a version of Python support for development endpoints by using the Arguments parameter in the CreateDevEndpoint or UpdateDevEndpoint APIs. If no arguments are provided, the version defaults to Python 2.

AddArguments: MapValue } @output structure UpdateDevEndpointResponse {} ///

Specifies a grok classifier to update when passed to /// UpdateClassifier.

structure UpdateGrokClassifierRequest { ///

The name of the GrokClassifier.

@required Name: NameString ///

An identifier of the data format that the classifier matches, such as Twitter, JSON, Omniture logs, /// Amazon CloudWatch Logs, and so on.

Classification: Classification ///

The grok pattern used by this classifier.

GrokPattern: GrokPattern ///

Optional custom grok patterns used by this classifier.

CustomPatterns: CustomPatterns } @input structure UpdateJobFromSourceControlRequest { ///

The name of the Glue job to be synchronized to or from the remote repository.

JobName: NameString ///

The provider for the remote repository.

Provider: SourceControlProvider ///

The name of the remote repository that contains the job artifacts.

RepositoryName: NameString ///

The owner of the remote repository that contains the job artifacts.

RepositoryOwner: NameString ///

An optional branch in the remote repository.

BranchName: NameString ///

An optional folder in the remote repository.

Folder: NameString ///

A commit ID for a commit in the remote repository.

CommitId: CommitIdString ///

The type of authentication, which can be an authentication token stored in Amazon Web Services Secrets Manager, or a personal access token.

AuthStrategy: SourceControlAuthStrategy ///

The value of the authorization token.

AuthToken: AuthTokenString } @output structure UpdateJobFromSourceControlResponse { ///

The name of the Glue job.

JobName: NameString } @input structure UpdateJobRequest { ///

The name of the job definition to update.

@required JobName: NameString ///

Specifies the values with which to update the job definition. Unspecified configuration is removed or reset to default values.

@required JobUpdate: JobUpdate } @output structure UpdateJobResponse { ///

Returns the name of the updated job definition.

JobName: NameString } ///

Specifies a JSON classifier to be updated.

structure UpdateJsonClassifierRequest { ///

The name of the classifier.

@required Name: NameString ///

A JsonPath string defining the JSON data for the classifier to classify. /// Glue supports a subset of JsonPath, as described in Writing JsonPath Custom Classifiers.

JsonPath: JsonPath } @input structure UpdateMLTransformRequest { ///

A unique identifier that was generated when the transform was created.

@required TransformId: HashString ///

The unique name that you gave the transform when you created it.

Name: NameString ///

A description of the transform. The default is an empty string.

Description: DescriptionString ///

The configuration parameters that are specific to the transform type (algorithm) used. /// Conditionally dependent on the transform type.

Parameters: TransformParameters ///

The name or Amazon Resource Name (ARN) of the IAM role with the required /// permissions.

Role: RoleString ///

This value determines which version of Glue this machine learning transform is compatible with. Glue 1.0 is recommended for most customers. If the value is not set, the Glue compatibility defaults to Glue 0.9. For more information, see Glue Versions in the developer guide.

GlueVersion: GlueVersionString ///

The number of Glue data processing units (DPUs) that are allocated to task runs for this transform. You can allocate from 2 to 100 DPUs; the default is 10. A DPU is a relative measure of /// processing power that consists of 4 vCPUs of compute capacity and 16 GB of memory. For more /// information, see the Glue pricing /// page.

///

When the WorkerType field is set to a value other than Standard, the MaxCapacity field is set automatically and becomes read-only.

MaxCapacity: NullableDouble ///

The type of predefined worker that is allocated when this task runs. Accepts a value of Standard, G.1X, or G.2X.

///
    ///
  • ///

    For the Standard worker type, each worker provides 4 vCPU, 16 GB of memory and a 50GB disk, and 2 executors per worker.

    ///
  • ///
  • ///

    For the G.1X worker type, each worker provides 4 vCPU, 16 GB of memory and a 64GB disk, and 1 executor per worker.

    ///
  • ///
  • ///

    For the G.2X worker type, each worker provides 8 vCPU, 32 GB of memory and a 128GB disk, and 1 executor per worker.

    ///
  • ///
WorkerType: WorkerType ///

The number of workers of a defined workerType that are allocated when this task runs.

NumberOfWorkers: NullableInteger ///

The timeout for a task run for this transform in minutes. This is the maximum time that a task run for this transform can consume resources before it is terminated and enters TIMEOUT status. The default is 2,880 minutes (48 hours).

Timeout: Timeout ///

The maximum number of times to retry a task for this transform after a task run fails.

MaxRetries: NullableInteger } @output structure UpdateMLTransformResponse { ///

The unique identifier for the transform that was updated.

TransformId: HashString } @input structure UpdatePartitionRequest { ///

The ID of the Data Catalog where the partition to be updated resides. If none is provided, /// the Amazon Web Services account ID is used by default.

CatalogId: CatalogIdString ///

The name of the catalog database in which the table in question /// resides.

@required DatabaseName: NameString ///

The name of the table in which the partition to be updated is located.

@required TableName: NameString ///

List of partition key values that define the partition to update.

@required PartitionValueList: BoundedPartitionValueList ///

The new partition object to update the partition to.

///

The Values property can't be changed. If you want to change the partition key values for a partition, delete and recreate the partition.

@required PartitionInput: PartitionInput } @output structure UpdatePartitionResponse {} @output structure UpdateRegistryResponse { ///

The name of the updated registry.

RegistryName: SchemaRegistryNameString ///

The Amazon Resource name (ARN) of the updated registry.

RegistryArn: GlueResourceArn } @output structure UpdateSchemaResponse { ///

The Amazon Resource Name (ARN) of the schema.

SchemaArn: GlueResourceArn ///

The name of the schema.

SchemaName: SchemaRegistryNameString ///

The name of the registry that contains the schema.

RegistryName: SchemaRegistryNameString } @input structure UpdateSourceControlFromJobRequest { ///

The name of the Glue job to be synchronized to or from the remote repository.

JobName: NameString ///

The provider for the remote repository.

Provider: SourceControlProvider ///

The name of the remote repository that contains the job artifacts.

RepositoryName: NameString ///

The owner of the remote repository that contains the job artifacts.

RepositoryOwner: NameString ///

An optional branch in the remote repository.

BranchName: NameString ///

An optional folder in the remote repository.

Folder: NameString ///

A commit ID for a commit in the remote repository.

CommitId: CommitIdString ///

The type of authentication, which can be an authentication token stored in Amazon Web Services Secrets Manager, or a personal access token.

AuthStrategy: SourceControlAuthStrategy ///

The value of the authorization token.

AuthToken: AuthTokenString } @output structure UpdateSourceControlFromJobResponse { ///

The name of the Glue job.

JobName: NameString } @input structure UpdateTableRequest { ///

The ID of the Data Catalog where the table resides. If none is provided, the Amazon Web Services account /// ID is used by default.

CatalogId: CatalogIdString ///

The name of the catalog database in which the table resides. For Hive /// compatibility, this name is entirely lowercase.

@required DatabaseName: NameString ///

An updated TableInput object to define the metadata table /// in the catalog.

@required TableInput: TableInput ///

By default, UpdateTable always creates an archived version of the table /// before updating it. However, if skipArchive is set to true, /// UpdateTable does not create the archived version.

SkipArchive: BooleanNullable ///

The transaction ID at which to update the table contents.

TransactionId: TransactionIdString ///

The version ID at which to update the table contents.

VersionId: VersionString } @output structure UpdateTableResponse {} @input structure UpdateTriggerRequest { ///

The name of the trigger to update.

@required Name: NameString ///

The new values with which to update the trigger.

@required TriggerUpdate: TriggerUpdate } @output structure UpdateTriggerResponse { ///

The resulting trigger definition.

Trigger: Trigger } @input structure UpdateUserDefinedFunctionRequest { ///

The ID of the Data Catalog where the function to be updated is located. If none is /// provided, the Amazon Web Services account ID is used by default.

CatalogId: CatalogIdString ///

The name of the catalog database where the function to be updated is /// located.

@required DatabaseName: NameString ///

The name of the function.

@required FunctionName: NameString ///

A FunctionInput object that redefines the function in the Data /// Catalog.

@required FunctionInput: UserDefinedFunctionInput } @output structure UpdateUserDefinedFunctionResponse {} @input structure UpdateWorkflowRequest { ///

Name of the workflow to be updated.

@required Name: NameString ///

The description of the workflow.

Description: GenericString ///

A collection of properties to be used as part of each execution of the workflow.

DefaultRunProperties: WorkflowRunProperties ///

You can use this parameter to prevent unwanted multiple updates to data, to control costs, or in some cases, to prevent exceeding the maximum number of concurrent runs of any of the component jobs. If you leave this parameter blank, there is no limit to the number of concurrent workflow runs.

MaxConcurrentRuns: NullableInteger } @output structure UpdateWorkflowResponse { ///

The name of the workflow which was specified in input.

Name: NameString } ///

Specifies an XML classifier to be updated.

structure UpdateXMLClassifierRequest { ///

The name of the classifier.

@required Name: NameString ///

An identifier of the data format that the classifier matches.

Classification: Classification ///

The XML tag designating the element that contains each record in an XML document being /// parsed. This cannot identify a self-closing element (closed by />). An empty /// row element that contains only attributes can be parsed as long as it ends with a closing tag /// (for example, is okay, but /// is not).

RowTag: RowTag } ///

The options to configure an upsert operation when writing to a Redshift target .

structure UpsertRedshiftTargetOptions { ///

The physical location of the Redshift table.

TableLocation: EnclosedInStringProperty ///

The name of the connection to use to write to Redshift.

ConnectionName: EnclosedInStringProperty ///

The keys used to determine whether to perform an update or insert.

UpsertKeys: EnclosedInStringPropertiesMinOne } ///

Represents the equivalent of a Hive user-defined function /// (UDF) definition.

structure UserDefinedFunction { ///

The name of the function.

FunctionName: NameString ///

The name of the catalog database that contains the function.

DatabaseName: NameString ///

The Java class that contains the function code.

ClassName: NameString ///

The owner of the function.

OwnerName: NameString ///

The owner type.

OwnerType: PrincipalType ///

The time at which the function was created.

CreateTime: Timestamp ///

The resource URIs for the function.

ResourceUris: ResourceUriList ///

The ID of the Data Catalog in which the function resides.

CatalogId: CatalogIdString } ///

A structure used to create or update a user-defined function.

structure UserDefinedFunctionInput { ///

The name of the function.

FunctionName: NameString ///

The Java class that contains the function code.

ClassName: NameString ///

The owner of the function.

OwnerName: NameString ///

The owner type.

OwnerType: PrincipalType ///

The resource URIs for the function.

ResourceUris: ResourceUriList } ///

A value could not be validated.

@error("client") structure ValidationException { ///

A message describing the problem.

Message: MessageString } ///

There was a version conflict.

@error("client") structure VersionMismatchException { ///

A message describing the problem.

Message: MessageString } ///

A workflow is a collection of multiple dependent Glue /// jobs and crawlers that are run to complete a complex ETL task. A /// workflow manages the execution and monitoring of all its jobs and crawlers.

structure Workflow { ///

The name of the workflow.

Name: NameString ///

A description of the workflow.

Description: GenericString ///

A collection of properties to be used as part of each execution of the workflow. /// The run properties are made available to each job in the workflow. A job can modify /// the properties for the next jobs in the flow.

DefaultRunProperties: WorkflowRunProperties ///

The date and time when the workflow was created.

CreatedOn: TimestampValue ///

The date and time when the workflow was last modified.

LastModifiedOn: TimestampValue ///

The information about the last execution of the workflow.

LastRun: WorkflowRun ///

The graph representing all the Glue components that belong to the workflow as nodes and directed /// connections between them as edges.

Graph: WorkflowGraph ///

You can use this parameter to prevent unwanted multiple updates to data, to control costs, or in some cases, to prevent exceeding the maximum number of concurrent runs of any of the component jobs. If you leave this parameter blank, there is no limit to the number of concurrent workflow runs.

MaxConcurrentRuns: NullableInteger ///

This structure indicates the details of the blueprint that this particular workflow is created from.

BlueprintDetails: BlueprintDetails } ///

A workflow graph represents the complete workflow containing all the Glue components present in the /// workflow and all the directed connections between them.

structure WorkflowGraph { ///

A list of the the Glue components belong to the workflow represented as nodes.

Nodes: NodeList ///

A list of all the directed connections between the nodes belonging to the workflow.

Edges: EdgeList } ///

A workflow run is an execution of a workflow providing all the runtime information.

structure WorkflowRun { ///

Name of the workflow that was run.

Name: NameString ///

The ID of this workflow run.

WorkflowRunId: IdString ///

The ID of the previous workflow run.

PreviousRunId: IdString ///

The workflow run properties which were set during the run.

WorkflowRunProperties: WorkflowRunProperties ///

The date and time when the workflow run was started.

StartedOn: TimestampValue ///

The date and time when the workflow run completed.

CompletedOn: TimestampValue ///

The status of the workflow run.

Status: WorkflowRunStatus ///

This error message describes any error that may have occurred in starting the workflow run. Currently the only error message is "Concurrent runs exceeded for workflow: foo."

ErrorMessage: ErrorString ///

The statistics of the run.

Statistics: WorkflowRunStatistics ///

The graph representing all the Glue components that belong to the workflow as nodes and directed /// connections between them as edges.

Graph: WorkflowGraph ///

The batch condition that started the workflow run.

StartingEventBatchCondition: StartingEventBatchCondition } ///

Workflow run statistics provides statistics about the workflow run.

structure WorkflowRunStatistics { ///

Total number of Actions in the workflow run.

TotalActions: IntegerValue = 0 ///

Total number of Actions that timed out.

TimeoutActions: IntegerValue = 0 ///

Total number of Actions that have failed.

FailedActions: IntegerValue = 0 ///

Total number of Actions that have stopped.

StoppedActions: IntegerValue = 0 ///

Total number of Actions that have succeeded.

SucceededActions: IntegerValue = 0 ///

Total number Actions in running state.

RunningActions: IntegerValue = 0 ///

Indicates the count of job runs in the ERROR state in the workflow run.

ErroredActions: IntegerValue = 0 ///

Indicates the count of job runs in WAITING state in the workflow run.

WaitingActions: IntegerValue = 0 } ///

A classifier for XML content.

structure XMLClassifier { ///

The name of the classifier.

@required Name: NameString ///

An identifier of the data format that the classifier matches.

@required Classification: Classification ///

The time that this classifier was registered.

CreationTime: Timestamp ///

The time that this classifier was last updated.

LastUpdated: Timestamp ///

The version of this classifier.

Version: VersionId = 0 ///

The XML tag designating the element that contains each record in an XML document being /// parsed. This can't identify a self-closing element (closed by />). An empty /// row element that contains only attributes can be parsed as long as it ends with a closing tag /// (for example, is okay, but /// is not).

RowTag: RowTag } list ActionList { member: Action } @length( min: 1 max: 30 ) list AggregateOperations { member: AggregateOperation } list AmazonRedshiftAdvancedOptions { member: AmazonRedshiftAdvancedOption } list AuditColumnNamesList { member: ColumnNameString } list BackfillErroredPartitionsList { member: PartitionValueList } list BackfillErrors { member: BackfillError } @length( min: 0 max: 25 ) list BatchDeletePartitionValueList { member: PartitionValueList } @length( min: 0 max: 100 ) list BatchDeleteTableNameList { member: NameString } @length( min: 0 max: 100 ) list BatchDeleteTableVersionList { member: VersionString } @length( min: 1 max: 25 ) list BatchGetBlueprintNames { member: OrchestrationNameString } @length( min: 0 max: 1000 ) list BatchGetPartitionValueList { member: PartitionValueList } list BatchStopJobRunErrorList { member: BatchStopJobRunError } @length( min: 1 max: 25 ) list BatchStopJobRunJobRunIdList { member: IdString } list BatchStopJobRunSuccessfulSubmissionList { member: BatchStopJobRunSuccessfulSubmission } list BatchUpdatePartitionFailureList { member: BatchUpdatePartitionFailureEntry } @length( min: 1 max: 100 ) list BatchUpdatePartitionRequestEntryList { member: BatchUpdatePartitionRequestEntry } list BlueprintNames { member: OrchestrationNameString } list BlueprintRuns { member: BlueprintRun } list Blueprints { member: Blueprint } @length( min: 0 max: 100 ) list BoundedPartitionValueList { member: ValueString } list CatalogEntries { member: CatalogEntry } @length( min: 1 ) list CatalogTablesList { member: NameString } list CatalogTargetList { member: CatalogTarget } list ClassifierList { member: Classifier } list ClassifierNameList { member: NameString } @length( min: 0 max: 50 ) list CodeGenNodeArgs { member: CodeGenNodeArg } list ColumnErrors { member: ColumnError } @length( min: 0 max: 100 ) list ColumnImportanceList { member: ColumnImportance } list ColumnList { member: Column } list ColumnRowFilterList { member: ColumnRowFilter } list ColumnStatisticsErrors { member: ColumnStatisticsError } list ColumnStatisticsList { member: ColumnStatistics } list ColumnValueStringList { member: ColumnValuesString } list ConditionList { member: Condition } list ConnectionList { member: Connection } @length( min: 1 max: 20 ) list ContextWords { member: NameString } list CrawlerHistoryList { member: CrawlerHistory } list CrawlerList { member: Crawler } list CrawlerMetricsList { member: CrawlerMetrics } @length( min: 0 max: 100 ) list CrawlerNameList { member: NameString } list CrawlList { member: Crawl } list CrawlsFilterList { member: CrawlsFilter } list CsvHeader { member: NameString } list CustomDatatypes { member: NameString } @length( min: 1 max: 50 ) list CustomEntityTypeNames { member: NameString } list CustomEntityTypes { member: CustomEntityType } list DagEdges { member: CodeGenEdge } list DagNodes { member: CodeGenNode } list DatabaseList { member: Database } list DataQualityResultDescriptionList { member: DataQualityResultDescription } @length( min: 1 max: 10 ) list DataQualityResultIdList { member: HashString } @length( min: 1 max: 100 ) list DataQualityResultIds { member: HashString } list DataQualityResultsList { member: DataQualityResult } list DataQualityRuleRecommendationRunList { member: DataQualityRuleRecommendationRunDescription } @length( min: 1 max: 2000 ) list DataQualityRuleResults { member: DataQualityRuleResult } list DataQualityRulesetEvaluationRunList { member: DataQualityRulesetEvaluationRunDescription } list DataQualityRulesetList { member: DataQualityRulesetListDetails } @length( min: 0 max: 25 ) list DeleteConnectionNameList { member: NameString } list DeltaTargetList { member: DeltaTarget } list DevEndpointList { member: DevEndpoint } list DevEndpointNameList { member: NameString } @length( min: 1 max: 25 ) list DevEndpointNames { member: GenericString } list DynamoDBTargetList { member: DynamoDBTarget } list EdgeList { member: Edge } list EnableAdditionalMetadata { member: JdbcMetadataEntry } list EnclosedInStringProperties { member: EnclosedInStringProperty } list EnclosedInStringPropertiesMinOne { member: EnclosedInStringProperty } list FilterExpressions { member: FilterExpression } list FilterValues { member: FilterValue } @length( min: 0 max: 100 ) list GetColumnNamesList { member: NameString } list GetResourcePoliciesResponseList { member: GluePolicy } list GetTableVersionsList { member: TableVersion } list GlueSchemas { member: GlueSchema } list GlueStudioPathList { member: EnclosedInStringProperties } list GlueStudioSchemaColumnList { member: GlueStudioSchemaColumn } @length( min: 0 max: 10 ) list GlueTables { member: GlueTable } list HudiTargetList { member: HudiTarget } list IcebergTargetList { member: IcebergTarget } list JdbcTargetList { member: JdbcTarget } list JobList { member: Job } list JobNameList { member: NameString } list JobRunList { member: JobRun } @length( min: 2 max: 2 ) list JoinColumns { member: JoinColumn } @length( min: 1 ) list KeyList { member: NameString } @length( min: 1 ) list KeySchemaElementList { member: KeySchemaElement } list LimitedPathList { member: LimitedStringList } list LimitedStringList { member: GenericLimitedString } list LocationStringList { member: LocationString } @length( min: 1 ) list ManyInputs { member: NodeId } list MappingList { member: MappingEntry } list Mappings { member: Mapping } @length( min: 0 max: 10 ) list MatchCriteria { member: NameString } list MetadataList { member: MetadataKeyValuePair } list MongoDBTargetList { member: MongoDBTarget } list NameStringList { member: NameString } list NodeIdList { member: NameString } list NodeList { member: Node } @length( min: 0 max: 50 ) list NullValueFields { member: NullValueField } @length( min: 1 max: 1 ) list OneInput { member: NodeId } list OptionList { member: Option } list OrchestrationStringList { member: GenericString } list OrderList { member: Order } list OtherMetadataValueList { member: OtherMetadataValueListItem } list PartitionErrors { member: PartitionError } list PartitionIndexDescriptorList { member: PartitionIndexDescriptor } @length( min: 0 max: 3 ) list PartitionIndexList { member: PartitionIndex } @length( min: 0 max: 100 ) list PartitionInputList { member: PartitionInput } list PartitionList { member: Partition } list PathList { member: Path } list PermissionList { member: Permission } @length( min: 1 max: 255 ) list PermissionTypeList { member: PermissionType } list PredecessorList { member: Predecessor } list PrincipalPermissionsList { member: PrincipalPermissions } @length( min: 0 max: 5 ) list PublicKeysList { member: GenericString } list RegistryListDefinition { member: RegistryListItem } @length( min: 0 max: 1000 ) list ResourceUriList { member: ResourceUri } @length( min: 1 max: 10 ) list RulesetNames { member: NameString } list S3EncryptionList { member: S3Encryption } list S3TargetList { member: S3Target } list SchemaListDefinition { member: SchemaListItem } list SchemaVersionErrorList { member: SchemaVersionErrorItem } list SchemaVersionList { member: SchemaVersionListItem } list SearchPropertyPredicates { member: PropertyPredicate } list SecurityConfigurationList { member: SecurityConfiguration } @length( min: 0 max: 50 ) list SecurityGroupIdList { member: NameString } list SessionIdList { member: NameString } list SessionList { member: Session } @length( min: 0 max: 1 ) list SortCriteria { member: SortCriterion } list SqlAliases { member: SqlAlias } list StatementList { member: Statement } list StringList { member: GenericString } list TableErrors { member: TableError } list TableList { member: Table } list TableVersionErrors { member: TableVersionError } @length( min: 0 max: 50 ) list TagKeysList { member: TagKey } list TaskRunList { member: TaskRun } list TransformConfigParameterList { member: TransformConfigParameter } list TransformIdList { member: HashString } list TransformList { member: MLTransform } @length( min: 0 max: 100 ) list TransformSchema { member: SchemaColumn } list TriggerList { member: Trigger } list TriggerNameList { member: NameString } @length( min: 2 max: 2 ) list TwoInputs { member: NodeId } list UnfilteredPartitionList { member: UnfilteredPartition } @length( min: 0 max: 25 ) list UpdateColumnStatisticsList { member: ColumnStatistics } list UserDefinedFunctionList { member: UserDefinedFunction } list ValueStringList { member: ValueString } @length( min: 1 max: 25 ) list WorkflowNames { member: NameString } @length( min: 1 max: 1000 ) list WorkflowRuns { member: WorkflowRun } @length( min: 1 max: 25 ) list Workflows { member: Workflow } map AdditionalOptions { key: EnclosedInStringProperty value: EnclosedInStringProperty } map AdditionalPlanOptionsMap { key: GenericString value: GenericString } @sensitive map CodeGenConfigurationNodes { key: NodeId value: CodeGenConfigurationNode } @length( min: 0 max: 100 ) map ConnectionProperties { key: ConnectionPropertyKey value: ValueString } map DataSourceMap { key: NameString value: DataSource } map DQAdditionalOptions { key: AdditionalOptionKeys value: GenericString } map DQDLAliases { key: NodeName value: EnclosedInStringProperty } map ErrorByName { key: NameString value: ErrorDetail } map EvaluatedMetricsMap { key: NameString value: NullableDouble } map GenericMap { key: GenericString value: GenericString } @length( min: 1 max: 10 ) map GlueTableAdditionalOptions { key: NameString value: DescriptionString } map JDBCDataTypeMapping { key: JDBCDataType value: GlueRecordType } map LocationMap { key: ColumnValuesString value: ColumnValuesString } @length( min: 0 max: 100 ) map MapValue { key: GenericString value: GenericString } map MetadataInfoMap { key: MetadataKeyString value: MetadataInfo } @length( min: 0 max: 75 ) map OrchestrationArgumentsMap { key: OrchestrationNameString value: OrchestrationArgumentsValue } map ParametersMap { key: KeyString value: ParametersMapValue } @length( min: 0 max: 50 ) map TagsMap { key: TagKey value: TagValue } map WorkflowRunProperties { key: IdString value: GenericString } @length( min: 0 max: 12 ) string AccountId enum AdditionalOptionKeys { CacheOption = "performanceTuning.caching" } enum AggFunction { avg countDistinct count first last kurtosis max min skewness stddev_samp stddev_pop sum sumDistinct var_samp var_pop } @default(0) integer AttemptCount @length( min: 0 max: 2048 ) string AuditContextString @length( min: 1 max: 255 ) @pattern("^[\\u0020-\\uD7FF\\uE000-\\uFFFD\\uD800\\uDC00-\\uDBFF\\uDFFF\\t]*$") string AuthTokenString enum BackfillErrorCode { ENCRYPTED_PARTITION_ERROR INTERNAL_ERROR INVALID_PARTITION_TYPE_DATA_ERROR MISSING_PARTITION_VALUE_ERROR UNSUPPORTED_PARTITION_CHARACTER_ERROR } @default(0) @range( min: 1 max: 100 ) integer BatchSize @range( min: 1 max: 900 ) integer BatchWindow blob Blob @length( min: 1 max: 131072 ) string BlueprintParameters @length( min: 1 max: 131072 ) string BlueprintParameterSpec enum BlueprintRunState { RUNNING SUCCEEDED FAILED ROLLING_BACK } enum BlueprintStatus { CREATING ACTIVE UPDATING FAILED } @default(false) boolean Boolean boolean BooleanNullable @default(false) boolean BooleanValue boolean BoxedBoolean @range( min: 0 max: 1 ) double BoxedDoubleFraction long BoxedLong @range( min: 0 ) integer BoxedNonNegativeInt @range( min: 0 ) long BoxedNonNegativeLong @range( min: 0 ) integer BoxedPositiveInt enum CatalogEncryptionMode { DISABLED SSEKMS = "SSE-KMS" } @range( min: 1 max: 100 ) integer CatalogGetterPageSize @length( min: 1 max: 255 ) @pattern("^[\\u0020-\\uD7FF\\uE000-\\uFFFD\\uD800\\uDC00-\\uDBFF\\uDFFF\\t]*$") string CatalogIdString string Classification enum CloudWatchEncryptionMode { DISABLED SSEKMS = "SSE-KMS" } string CodeGenArgName string CodeGenArgValue @length( min: 1 max: 255 ) @pattern("^[A-Za-z_][A-Za-z0-9_]*$") string CodeGenIdentifier string CodeGenNodeType @length( min: 1 max: 1024 ) @pattern("^[\\u0020-\\uD7FF\\uE000-\\uFFFD\\uD800\\uDC00-\\uDBFF\\uDFFF\\t]*$") string ColumnNameString enum ColumnStatisticsType { BOOLEAN DATE DECIMAL DOUBLE LONG STRING BINARY } @length( min: 0 max: 131072 ) @pattern("^[\\u0020-\\uD7FF\\uE000-\\uFFFD\\uD800\\uDC00-\\uDBFF\\uDFFF\\t]*$") string ColumnTypeString string ColumnValuesString @length( min: 0 max: 255 ) @pattern("^[\\u0020-\\uD7FF\\uE000-\\uFFFD\\uD800\\uDC00-\\uDBFF\\uDFFF\\t]*$") string CommentString @length( min: 1 max: 40 ) @pattern("^[\\u0020-\\uD7FF\\uE000-\\uFFFD\\uD800\\uDC00-\\uDBFF\\uDFFF\\t]*$") string CommitIdString enum Comparator { EQUALS GREATER_THAN LESS_THAN GREATER_THAN_EQUALS LESS_THAN_EQUALS } enum Compatibility { NONE DISABLED BACKWARD BACKWARD_ALL FORWARD FORWARD_ALL FULL FULL_ALL } enum CompressionType { GZIP = "gzip" BZIP2 = "bzip2" } string ConnectionName enum ConnectionPropertyKey { HOST PORT USER_NAME = "USERNAME" PASSWORD ENCRYPTED_PASSWORD JDBC_DRIVER_JAR_URI JDBC_DRIVER_CLASS_NAME JDBC_ENGINE JDBC_ENGINE_VERSION CONFIG_FILES INSTANCE_ID JDBC_CONNECTION_URL JDBC_ENFORCE_SSL CUSTOM_JDBC_CERT SKIP_CUSTOM_JDBC_CERT_VALIDATION CUSTOM_JDBC_CERT_STRING CONNECTION_URL KAFKA_BOOTSTRAP_SERVERS KAFKA_SSL_ENABLED KAFKA_CUSTOM_CERT KAFKA_SKIP_CUSTOM_CERT_VALIDATION KAFKA_CLIENT_KEYSTORE KAFKA_CLIENT_KEYSTORE_PASSWORD KAFKA_CLIENT_KEY_PASSWORD ENCRYPTED_KAFKA_CLIENT_KEYSTORE_PASSWORD ENCRYPTED_KAFKA_CLIENT_KEY_PASSWORD SECRET_ID CONNECTOR_URL CONNECTOR_TYPE CONNECTOR_CLASS_NAME KAFKA_SASL_MECHANISM KAFKA_SASL_SCRAM_USERNAME KAFKA_SASL_SCRAM_PASSWORD KAFKA_SASL_SCRAM_SECRETS_ARN ENCRYPTED_KAFKA_SASL_SCRAM_PASSWORD KAFKA_SASL_GSSAPI_KEYTAB KAFKA_SASL_GSSAPI_KRB5_CONF KAFKA_SASL_GSSAPI_SERVICE KAFKA_SASL_GSSAPI_PRINCIPAL } enum ConnectionType { JDBC SFTP MONGODB KAFKA NETWORK MARKETPLACE CUSTOM } string CrawlerConfiguration enum CrawlerHistoryState { RUNNING COMPLETED FAILED STOPPED } enum CrawlerLineageSettings { ENABLE DISABLE } @length( min: 0 max: 128 ) string CrawlerSecurityConfiguration enum CrawlerState { READY RUNNING STOPPING } string CrawlId enum CrawlState { RUNNING CANCELLING CANCELLED SUCCEEDED FAILED ERROR } string CreatedTimestamp string CronExpression @length( min: 1 max: 1 ) @pattern("^[^\\r\\n]$") string CsvColumnDelimiter enum CsvHeaderOption { UNKNOWN PRESENT ABSENT } @length( min: 1 max: 1 ) @pattern("^[^\\r\\n]$") string CsvQuoteSymbol enum CsvSerdeOption { OpenCSVSerDe LazySimpleSerDe None } @length( min: 0 max: 16000 ) @pattern("^[\\u0020-\\uD7FF\\uE000-\\uFFFD\\uD800\\uDC00-\\uDBFF\\uDFFF\\r\\n\\t]*$") string CustomPatterns string DatabaseName enum DataFormat { AVRO JSON PROTOBUF } @length( min: 1 max: 255 ) string DataLakePrincipalString enum DataQualityRuleResultStatus { PASS FAIL ERROR } @length( min: 1 max: 65536 ) string DataQualityRulesetString enum DeleteBehavior { LOG DELETE_FROM_DATABASE DEPRECATE_IN_DATABASE } enum DeltaTargetCompressionType { UNCOMPRESSED = "uncompressed" SNAPPY = "snappy" } @length( min: 0 max: 2048 ) @pattern("^[\\u0020-\\uD7FF\\uE000-\\uFFFD\\uD800\\uDC00-\\uDBFF\\uDFFF\\r\\n\\t]*$") string DescriptionString @length( min: 0 max: 2048 ) @pattern("^[\\u0020-\\uD7FF\\uE000-\\uFFFD\\uD800\\uDC00-\\uDBFF\\uDFFF\\r\\n\\t]*$") string DescriptionStringRemovable @default(0) double Double @default(0) double DoubleValue @length( min: 1 max: 65536 ) @pattern("^([\\u0020-\\u007E\\r\\s\\n])*$") string DQDLString enum DQStopJobOnFailureTiming { Immediate AfterDataLoad } enum DQTransformOutput { PrimaryInput EvaluationResults } enum EnableHybridValues { TRUE FALSE } @pattern("^([\\u0020-\\uD7FF\\uE000-\\uFFFD\\uD800\\uDC00-\\uDBFF\\uDFFF]|[^\\S\\r\\n\"'])*$") string EnclosedInStringProperty @pattern("^([\\u0020-\\uD7FF\\uE000-\\uFFFD\\uD800\\uDC00-\\uDBFF\\uDFFF]|[^\\S\\r\\n])*$") string EnclosedInStringPropertyWithQuote string ErrorCodeString string ErrorMessageString string ErrorString string EventQueueArn @length( min: 0 max: 16 ) enum ExecutionClass { FLEX STANDARD } @default(0) integer ExecutionTime enum ExistCondition { MUST_EXIST NOT_EXIST NONE } @pattern("^[\\s\\S]*$") string ExtendedString @length( min: 1 max: 512 ) @pattern("^[\\u0020-\\uD7FF\\uE000-\\uFFFD\\uD800\\uDC00-\\uDBFF\\uDFFF\\t]*$") string FederationIdentifier enum FederationSourceErrorCode { InvalidResponseException OperationTimeoutException OperationNotSupportedException InternalServiceException ThrottlingException } enum FieldName { CRAWL_ID STATE START_TIME END_TIME DPU_HOUR } string FieldType enum FilterLogicalOperator { AND OR } enum FilterOperation { EQ LT GT LTE GTE REGEX ISNULL } enum FilterOperator { GT GE LT LE EQ NE } @length( min: 0 max: 2048 ) @pattern("^[\\u0020-\\uD7FF\\uE000-\\uFFFD\\uD800\\uDC00-\\uDBFF\\uDFFF\\t]*$") string FilterString enum FilterValueType { COLUMNEXTRACTED CONSTANT } @length( min: 0 max: 128 ) @pattern("^[\\u0020-\\uD7FF\\uE000-\\uFFFD\\uD800\\uDC00-\\uDBFF\\uDFFF\\t]*$") string FormatString @length( min: 1 max: 512 ) string Generic512CharString @range( min: 0 max: 1 ) double GenericBoundedDouble @pattern("^[A-Za-z0-9_-]*$") string GenericLimitedString string GenericString enum GlueRecordType { DATE STRING TIMESTAMP INT FLOAT LONG BIGDECIMAL BYTE SHORT DOUBLE } @length( min: 1 max: 10240 ) @pattern("^arn:(aws|aws-us-gov|aws-cn):glue:") string GlueResourceArn @length( min: 0 max: 1024 ) @pattern("^[\\u0020-\\uD7FF\\uE000-\\uFFFD\\uD800\\uDC00-\\uDBFF\\uDFFF\\t]*$") string GlueStudioColumnNameString @length( min: 1 max: 255 ) @pattern("^\\w+\\.\\w+$") string GlueVersionString @length( min: 1 max: 2048 ) @pattern("^[\\u0020-\\uD7FF\\uE000-\\uFFFD\\uD800\\uDC00-\\uDBFF\\uDFFF\\r\\t]*$") string GrokPattern @length( min: 1 max: 255 ) @pattern("^[\\u0020-\\uD7FF\\uE000-\\uFFFD\\uD800\\uDC00-\\uDBFF\\uDFFF\\t]*$") string HashString enum HudiTargetCompressionType { GZIP = "gzip" LZO = "lzo" UNCOMPRESSED = "uncompressed" SNAPPY = "snappy" } integer IdleTimeout @length( min: 1 max: 255 ) @pattern("^[\\u0020-\\uD7FF\\uE000-\\uFFFD\\uD800\\uDC00-\\uDBFF\\uDFFF\\t]*$") string IdString @default(0) integer Integer @default(0) @range( min: 0 max: 1 ) integer IntegerFlag @default(0) integer IntegerValue @timestampFormat("date-time") timestamp Iso8601DateTime @default(false) boolean IsVersionValid enum JDBCConnectionType { sqlserver mysql oracle postgresql redshift } enum JDBCDataType { ARRAY BIGINT BINARY BIT BLOB BOOLEAN CHAR CLOB DATALINK DATE DECIMAL DISTINCT DOUBLE FLOAT INTEGER JAVA_OBJECT LONGNVARCHAR LONGVARBINARY LONGVARCHAR NCHAR NCLOB NULL NUMERIC NVARCHAR OTHER REAL REF REF_CURSOR ROWID SMALLINT SQLXML STRUCT TIME TIME_WITH_TIMEZONE TIMESTAMP TIMESTAMP_WITH_TIMEZONE TINYINT VARBINARY VARCHAR } enum JdbcMetadataEntry { COMMENTS RAWTYPES } enum JobBookmarksEncryptionMode { DISABLED CSEKMS = "CSE-KMS" } string JobName enum JobRunState { STARTING RUNNING STOPPING STOPPED SUCCEEDED FAILED TIMEOUT ERROR WAITING } enum JoinType { EQUIJOIN = "equijoin" LEFT = "left" RIGHT = "right" OUTER = "outer" LEFT_SEMI = "leftsemi" LEFT_ANTI = "leftanti" } string JsonPath string JsonValue @length( min: 1 max: 255 ) @pattern("^[\\u0020-\\uD7FF\\uE000-\\uFFFD\\uD800\\uDC00-\\uDBFF\\uDFFF\\t]*$") string KeyString @pattern("^arn:aws:kms:") string KmsKeyArn @default(0) integer LabelCount enum Language { PYTHON SCALA } enum LastCrawlStatus { SUCCEEDED CANCELLED FAILED } @default(false) boolean LatestSchemaVersionBoolean @length( min: 0 max: 2056 ) @pattern("^[\\u0020-\\uD7FF\\uE000-\\uFFFD\\uD800\\uDC00-\\uDBFF\\uDFFF\\r\\n\\t]*$") string LocationString @length( min: 1 max: 512 ) @pattern("^[\\.\\-_/#A-Za-z0-9]+$") string LogGroup enum Logical { AND ANY } enum LogicalOperator { EQUALS } @length( min: 1 max: 512 ) @pattern("^[^:*]*$") string LogStream @default(0) long Long @default(0) long LongValue @length( min: 0 max: 256 ) @pattern("^[*A-Za-z0-9_-]*$") string MaskValue @default(0) integer MaxConcurrentRuns @range( min: 1 max: 100 ) integer MaxResultsNumber @default(0) integer MaxRetries @length( min: 1 max: 255 ) @pattern("^[\\u0020-\\uD7FF\\uE000-\\uFFFD\\uD800\\uDC00-\\uDBFF\\uDFFF\\t]*$") string MessagePrefix string MessageString @length( min: 1 max: 128 ) @pattern("^[a-zA-Z0-9+-=._./@]+$") string MetadataKeyString enum MetadataOperation { CREATE } @length( min: 1 max: 256 ) @pattern("^[a-zA-Z0-9+-=._./@]+$") string MetadataValueString @default(0) long MillisecondsCount enum MLUserDataEncryptionModeString { DISABLED SSEKMS = "SSE-KMS" } @length( min: 1 max: 255 ) @pattern("^[\\u0020-\\uD7FF\\uE000-\\uFFFD\\uD800\\uDC00-\\uDBFF\\uDFFF\\t]*$") string NameString @pattern("^[A-Za-z0-9_-]*$") string NodeId @pattern("^([\\u0020-\\uD7FF\\uE000-\\uFFFD\\uD800\\uDC00-\\uDBFF\\uDFFF]|[^\\r\\n])*$") string NodeName enum NodeType { CRAWLER JOB TRIGGER } @default(0) @range( min: 0 ) double NonNegativeDouble @default(0) @range( min: 0 ) integer NonNegativeInt @default(0) @range( min: 0 ) integer NonNegativeInteger @default(0) @range( min: 0 ) long NonNegativeLong @range( min: 1 ) integer NotifyDelayAfter boolean NullableBoolean double NullableDouble integer NullableInteger @length( min: 0 max: 4096 ) @pattern("^[\\u0020-\\uD7FF\\uE000-\\uFFFD\\uD800\\uDC00-\\uDBFF\\uDFFF\\r\\n\\t]*$") string OrchestrationArgumentsValue @length( min: 1 max: 1024 ) @pattern("^arn:aws[^:]*:iam::[0-9]*:role/.+$") string OrchestrationIAMRoleArn @length( min: 1 max: 128 ) @pattern("^[\\.\\-_A-Za-z0-9]+$") string OrchestrationNameString @length( min: 20 max: 2048 ) @pattern("^arn:aws[^:]*:iam::[0-9]*:role/.+$") string OrchestrationRoleArn @length( min: 1 max: 8192 ) @pattern("^s3://([^/]+)/([^/]+/)*([^/]+)$") string OrchestrationS3Location @length( min: 0 max: 68000 ) string OrchestrationStatementCodeString @length( min: 0 max: 400000 ) string OrchestrationToken @range( min: 1 max: 1000 ) integer PageSize string PaginationToken @length( min: 0 max: 512000 ) string ParametersMapValue enum ParamType { STR = "str" INT = "int" FLOAT = "float" COMPLEX = "complex" BOOL = "bool" LIST = "list" NULL = "null" } enum ParquetCompressionType { SNAPPY = "snappy" LZO = "lzo" GZIP = "gzip" UNCOMPRESSED = "uncompressed" NONE = "none" } enum PartitionIndexStatus { CREATING ACTIVE DELETING FAILED } string Path enum Permission { ALL SELECT ALTER DROP DELETE INSERT CREATE_DATABASE CREATE_TABLE DATA_LOCATION_ACCESS } enum PermissionType { COLUMN_PERMISSION CELL_FILTER_PERMISSION NESTED_PERMISSION NESTED_CELL_PERMISSION } enum PiiType { RowAudit RowMasking ColumnAudit ColumnMasking } @length( min: 2 ) string PolicyJsonString @range( min: 10 ) long PollingTime @range( min: 1 ) long PositiveLong @length( min: 0 max: 2048 ) @pattern("^[\\u0020-\\uD7FF\\uE000-\\uFFFD\\uD800\\uDC00-\\uDBFF\\uDFFF\\r\\n\\t]*$") string PredicateString enum PrincipalType { USER ROLE GROUP } @range( min: 0 max: 1 ) double Prob string PythonScript @pattern("^([2-3]|3[.]9)$") string PythonVersionString @default(0) @range( min: 1 max: 50 ) integer QuerySchemaVersionMetadataMaxResults enum QuoteChar { QUOTE = "quote" QUILLEMET = "quillemet" SINGLE_QUOTE = "single_quote" DISABLED = "disabled" } @length( min: 1 max: 16 ) string RecipeVersion long RecordsCount enum RecrawlBehavior { CRAWL_EVERYTHING CRAWL_NEW_FOLDERS_ONLY CRAWL_EVENT_MODE } enum RegistryStatus { AVAILABLE DELETING } @default(false) boolean ReplaceBoolean enum ResourceShareType { FOREIGN ALL FEDERATED } enum ResourceType { JAR FILE ARCHIVE } string Role @pattern("^arn:aws:iam::\\d{12}:role/") string RoleArn string RoleString string RowTag string RunId @length( min: 0 max: 64 ) @pattern(".*") string RuntimeNameString enum S3EncryptionMode { DISABLED SSEKMS = "SSE-KMS" SSES3 = "SSE-S3" } string ScalaCode enum ScheduleState { SCHEDULED NOT_SCHEDULED TRANSITIONING } @default(0) @range( min: 1 max: 100000 ) long SchemaCheckpointNumber @length( min: 1 max: 340000 ) @pattern("\\S") string SchemaDefinitionDiff @length( min: 1 max: 170000 ) @pattern("\\S") string SchemaDefinitionString enum SchemaDiffType { SYNTAX_DIFF } string SchemaPathString @length( min: 1 max: 255 ) @pattern("^[a-zA-Z0-9-_$#.]+$") string SchemaRegistryNameString string SchemaRegistryTokenString enum SchemaStatus { AVAILABLE PENDING DELETING } @length( min: 1 max: 5000 ) string SchemaValidationError @length( min: 36 max: 36 ) @pattern("^[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}$") string SchemaVersionIdString enum SchemaVersionStatus { AVAILABLE PENDING FAILURE DELETING } @length( min: 0 max: 400000 ) string ScriptLocationString enum Separator { COMMA = "comma" CTRLA = "ctrla" PIPE = "pipe" SEMICOLON = "semicolon" TAB = "tab" } enum SessionStatus { PROVISIONING READY FAILED TIMEOUT STOPPING STOPPED } enum Sort { ASCENDING = "ASC" DESCENDING = "DESC" } enum SortDirectionType { DESCENDING ASCENDING } enum SourceControlAuthStrategy { PERSONAL_ACCESS_TOKEN AWS_SECRETS_MANAGER } enum SourceControlProvider { GITHUB AWS_CODE_COMMIT } @pattern("^([\\u0020-\\uD7FF\\uE000-\\uFFFD\\uD800\\uDC00-\\uDBFF\\uDFFF\\s])*$") string SqlQuery enum StartingPosition { LATEST = "latest" TRIM_HORIZON = "trim_horizon" EARLIEST = "earliest" TIMESTAMP = "timestamp" } enum StatementState { WAITING RUNNING AVAILABLE CANCELLING CANCELLED ERROR } string TableName @length( min: 0 max: 128 ) string TablePrefix @length( min: 0 max: 255 ) string TableTypeString @length( min: 1 max: 128 ) string TagKey @length( min: 0 max: 256 ) string TagValue enum TargetFormat { JSON = "json" CSV = "csv" AVRO = "avro" ORC = "orc" PARQUET = "parquet" HUDI = "hudi" DELTA = "delta" } enum TaskRunSortColumnType { TASK_RUN_TYPE STATUS STARTED } enum TaskStatusType { STARTING RUNNING STOPPING STOPPED SUCCEEDED FAILED TIMEOUT } enum TaskType { EVALUATION LABELING_SET_GENERATION IMPORT_LABELS EXPORT_LABELS FIND_MATCHES } @range( min: 1 ) integer Timeout timestamp Timestamp timestamp TimestampValue string Token @range( min: 0 max: 100 ) integer Topk @default(0) @range( min: 1 max: 10 ) integer TotalSegmentsInteger @length( min: 1 max: 255 ) @pattern("^[\\p{L}\\p{N}\\p{P}]*$") string TransactionIdString enum TransformSortColumnType { NAME TRANSFORM_TYPE STATUS CREATED LAST_MODIFIED } enum TransformStatusType { NOT_READY READY DELETING } enum TransformType { FIND_MATCHES } enum TriggerState { CREATING CREATED ACTIVATING ACTIVATED DEACTIVATING DEACTIVATED DELETING UPDATING } enum TriggerType { SCHEDULED CONDITIONAL ON_DEMAND EVENT } @length( min: 0 max: 20000 ) @pattern("^[\\u0020-\\uD7FF\\uE000-\\uFFFD\\uD800\\uDC00-\\uDBFF\\uDFFF\\t]*$") string TypeString enum UnionType { ALL DISTINCT } enum UpdateBehavior { LOG UPDATE_IN_DATABASE } enum UpdateCatalogBehavior { UPDATE_IN_DATABASE LOG } string UpdatedTimestamp @length( min: 1 max: 1024 ) @pattern("^[\\u0020-\\uD7FF\\uE000-\\uFFFD\\uD800\\uDC00-\\uDBFF\\uDFFF\\r\\n\\t]*$") string URI string UriString @length( min: 0 max: 1024 ) string ValueString @default(0) long VersionId @default(0) @range( min: 1 max: 100000 ) long VersionLongNumber @length( min: 1 max: 100000 ) @pattern("^[1-9][0-9]*|[1-9][0-9]*-[1-9][0-9]*$") string VersionsString @length( min: 1 max: 255 ) @pattern("^[\\u0020-\\uD7FF\\uE000-\\uFFFD\\uD800\\uDC00-\\uDBFF\\uDFFF\\t]*$") string VersionString @length( min: 0 max: 409600 ) string ViewTextString enum WorkerType { Standard G_1X = "G.1X" G_2X = "G.2X" G_025X = "G.025X" G_4X = "G.4X" G_8X = "G.8X" Z_2X = "Z.2X" } enum WorkflowRunStatus { RUNNING COMPLETED STOPPING STOPPED ERROR }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy