META-INF.smithy.glue.smithy Maven / Gradle / Ivy
Show all versions of aws-glue-spec Show documentation
$version: "2.0"
metadata suppressions = [
{
id: "HttpMethodSemantics"
namespace: "*"
}
{
id: "HttpResponseCodeSemantics"
namespace: "*"
}
{
id: "PaginatedTrait"
namespace: "*"
}
{
id: "HttpHeaderTrait"
namespace: "*"
}
{
id: "HttpUriConflict"
namespace: "*"
}
{
id: "Service"
namespace: "*"
}
]
namespace com.amazonaws.glue
use aws.api#service
use aws.auth#sigv4
use aws.protocols#awsJson1_1
/// Glue
/// Defines the public endpoint for the Glue service.
@service(
sdkId: "Glue"
arnNamespace: "glue"
cloudFormationName: "Glue"
cloudTrailEventSource: "glue.amazonaws.com"
endpointPrefix: "glue"
)
@sigv4(
name: "glue"
)
@awsJson1_1
@title("AWS Glue")
service AWSGlue {
version: "2017-03-31"
operations: [
BatchCreatePartition
BatchDeleteConnection
BatchDeletePartition
BatchDeleteTable
BatchDeleteTableVersion
BatchGetBlueprints
BatchGetCrawlers
BatchGetCustomEntityTypes
BatchGetDataQualityResult
BatchGetDevEndpoints
BatchGetJobs
BatchGetPartition
BatchGetTriggers
BatchGetWorkflows
BatchStopJobRun
BatchUpdatePartition
CancelDataQualityRuleRecommendationRun
CancelDataQualityRulesetEvaluationRun
CancelMLTaskRun
CancelStatement
CheckSchemaVersionValidity
CreateBlueprint
CreateClassifier
CreateConnection
CreateCrawler
CreateCustomEntityType
CreateDatabase
CreateDataQualityRuleset
CreateDevEndpoint
CreateJob
CreateMLTransform
CreatePartition
CreatePartitionIndex
CreateRegistry
CreateSchema
CreateScript
CreateSecurityConfiguration
CreateSession
CreateTable
CreateTrigger
CreateUserDefinedFunction
CreateWorkflow
DeleteBlueprint
DeleteClassifier
DeleteColumnStatisticsForPartition
DeleteColumnStatisticsForTable
DeleteConnection
DeleteCrawler
DeleteCustomEntityType
DeleteDatabase
DeleteDataQualityRuleset
DeleteDevEndpoint
DeleteJob
DeleteMLTransform
DeletePartition
DeletePartitionIndex
DeleteRegistry
DeleteResourcePolicy
DeleteSchema
DeleteSchemaVersions
DeleteSecurityConfiguration
DeleteSession
DeleteTable
DeleteTableVersion
DeleteTrigger
DeleteUserDefinedFunction
DeleteWorkflow
GetBlueprint
GetBlueprintRun
GetBlueprintRuns
GetCatalogImportStatus
GetClassifier
GetClassifiers
GetColumnStatisticsForPartition
GetColumnStatisticsForTable
GetConnection
GetConnections
GetCrawler
GetCrawlerMetrics
GetCrawlers
GetCustomEntityType
GetDatabase
GetDatabases
GetDataCatalogEncryptionSettings
GetDataflowGraph
GetDataQualityResult
GetDataQualityRuleRecommendationRun
GetDataQualityRuleset
GetDataQualityRulesetEvaluationRun
GetDevEndpoint
GetDevEndpoints
GetJob
GetJobBookmark
GetJobRun
GetJobRuns
GetJobs
GetMapping
GetMLTaskRun
GetMLTaskRuns
GetMLTransform
GetMLTransforms
GetPartition
GetPartitionIndexes
GetPartitions
GetPlan
GetRegistry
GetResourcePolicies
GetResourcePolicy
GetSchema
GetSchemaByDefinition
GetSchemaVersion
GetSchemaVersionsDiff
GetSecurityConfiguration
GetSecurityConfigurations
GetSession
GetStatement
GetTable
GetTables
GetTableVersion
GetTableVersions
GetTags
GetTrigger
GetTriggers
GetUnfilteredPartitionMetadata
GetUnfilteredPartitionsMetadata
GetUnfilteredTableMetadata
GetUserDefinedFunction
GetUserDefinedFunctions
GetWorkflow
GetWorkflowRun
GetWorkflowRunProperties
GetWorkflowRuns
ImportCatalogToGlue
ListBlueprints
ListCrawlers
ListCrawls
ListCustomEntityTypes
ListDataQualityResults
ListDataQualityRuleRecommendationRuns
ListDataQualityRulesetEvaluationRuns
ListDataQualityRulesets
ListDevEndpoints
ListJobs
ListMLTransforms
ListRegistries
ListSchemas
ListSchemaVersions
ListSessions
ListStatements
ListTriggers
ListWorkflows
PutDataCatalogEncryptionSettings
PutResourcePolicy
PutSchemaVersionMetadata
PutWorkflowRunProperties
QuerySchemaVersionMetadata
RegisterSchemaVersion
RemoveSchemaVersionMetadata
ResetJobBookmark
ResumeWorkflowRun
RunStatement
SearchTables
StartBlueprintRun
StartCrawler
StartCrawlerSchedule
StartDataQualityRuleRecommendationRun
StartDataQualityRulesetEvaluationRun
StartExportLabelsTaskRun
StartImportLabelsTaskRun
StartJobRun
StartMLEvaluationTaskRun
StartMLLabelingSetGenerationTaskRun
StartTrigger
StartWorkflowRun
StopCrawler
StopCrawlerSchedule
StopSession
StopTrigger
StopWorkflowRun
TagResource
UntagResource
UpdateBlueprint
UpdateClassifier
UpdateColumnStatisticsForPartition
UpdateColumnStatisticsForTable
UpdateConnection
UpdateCrawler
UpdateCrawlerSchedule
UpdateDatabase
UpdateDataQualityRuleset
UpdateDevEndpoint
UpdateJob
UpdateJobFromSourceControl
UpdateMLTransform
UpdatePartition
UpdateRegistry
UpdateSchema
UpdateSourceControlFromJob
UpdateTable
UpdateTrigger
UpdateUserDefinedFunction
UpdateWorkflow
]
}
/// Creates one or more partitions in a batch operation.
operation BatchCreatePartition {
input: BatchCreatePartitionRequest
output: BatchCreatePartitionResponse
errors: [
AlreadyExistsException
EntityNotFoundException
GlueEncryptionException
InternalServiceException
InvalidInputException
OperationTimeoutException
ResourceNumberLimitExceededException
]
}
/// Deletes a list of connection definitions from the Data Catalog.
operation BatchDeleteConnection {
input: BatchDeleteConnectionRequest
output: BatchDeleteConnectionResponse
errors: [
InternalServiceException
OperationTimeoutException
]
}
/// Deletes one or more partitions in a batch operation.
operation BatchDeletePartition {
input: BatchDeletePartitionRequest
output: BatchDeletePartitionResponse
errors: [
EntityNotFoundException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Deletes multiple tables at once.
///
/// After completing this operation, you no longer have access to the table versions and
/// partitions that belong to the deleted table. Glue deletes these "orphaned" resources
/// asynchronously in a timely manner, at the discretion of the service.
/// To ensure the immediate deletion of all related resources, before calling
/// BatchDeleteTable
, use DeleteTableVersion
or
/// BatchDeleteTableVersion
, and DeletePartition
or
/// BatchDeletePartition
, to delete any resources that belong to the
/// table.
///
operation BatchDeleteTable {
input: BatchDeleteTableRequest
output: BatchDeleteTableResponse
errors: [
EntityNotFoundException
GlueEncryptionException
InternalServiceException
InvalidInputException
OperationTimeoutException
ResourceNotReadyException
]
}
/// Deletes a specified batch of versions of a table.
operation BatchDeleteTableVersion {
input: BatchDeleteTableVersionRequest
output: BatchDeleteTableVersionResponse
errors: [
EntityNotFoundException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Retrieves information about a list of blueprints.
operation BatchGetBlueprints {
input: BatchGetBlueprintsRequest
output: BatchGetBlueprintsResponse
errors: [
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Returns a list of resource metadata for a given list of crawler names. After calling the ListCrawlers
operation, you can call this operation to access the data to which you have been granted permissions. This operation supports all IAM permissions, including permission conditions that uses tags.
operation BatchGetCrawlers {
input: BatchGetCrawlersRequest
output: BatchGetCrawlersResponse
errors: [
InvalidInputException
OperationTimeoutException
]
}
/// Retrieves the details for the custom patterns specified by a list of names.
operation BatchGetCustomEntityTypes {
input: BatchGetCustomEntityTypesRequest
output: BatchGetCustomEntityTypesResponse
errors: [
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Retrieves a list of data quality results for the specified result IDs.
operation BatchGetDataQualityResult {
input: BatchGetDataQualityResultRequest
output: BatchGetDataQualityResultResponse
errors: [
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Returns a list of resource metadata for a given list of development endpoint names. After
/// calling the ListDevEndpoints
operation, you can call this operation to access the
/// data to which you have been granted permissions. This operation supports all IAM permissions,
/// including permission conditions that uses tags.
operation BatchGetDevEndpoints {
input: BatchGetDevEndpointsRequest
output: BatchGetDevEndpointsResponse
errors: [
AccessDeniedException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Returns a list of resource metadata for a given list of job names. After calling the ListJobs
operation, you can call this operation to access the data to which you have been granted permissions. This operation supports all IAM permissions, including permission conditions that uses tags.
///
operation BatchGetJobs {
input: BatchGetJobsRequest
output: BatchGetJobsResponse
errors: [
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Retrieves partitions in a batch request.
operation BatchGetPartition {
input: BatchGetPartitionRequest
output: BatchGetPartitionResponse
errors: [
EntityNotFoundException
FederationSourceException
FederationSourceRetryableException
GlueEncryptionException
InternalServiceException
InvalidInputException
InvalidStateException
OperationTimeoutException
]
}
/// Returns a list of resource metadata for a given list of trigger names. After calling the ListTriggers
operation, you can call this operation to access the data to which you have been granted permissions. This operation supports all IAM permissions, including permission conditions that uses tags.
operation BatchGetTriggers {
input: BatchGetTriggersRequest
output: BatchGetTriggersResponse
errors: [
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Returns a list of resource metadata for a given list of workflow names. After calling the ListWorkflows
operation, you can call this operation to access the data to which you have been granted permissions. This operation supports all IAM permissions, including permission conditions that uses tags.
operation BatchGetWorkflows {
input: BatchGetWorkflowsRequest
output: BatchGetWorkflowsResponse
errors: [
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Stops one or more job runs for a specified job definition.
operation BatchStopJobRun {
input: BatchStopJobRunRequest
output: BatchStopJobRunResponse
errors: [
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Updates one or more partitions in a batch operation.
operation BatchUpdatePartition {
input: BatchUpdatePartitionRequest
output: BatchUpdatePartitionResponse
errors: [
EntityNotFoundException
GlueEncryptionException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Cancels the specified recommendation run that was being used to generate rules.
operation CancelDataQualityRuleRecommendationRun {
input: CancelDataQualityRuleRecommendationRunRequest
output: CancelDataQualityRuleRecommendationRunResponse
errors: [
EntityNotFoundException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Cancels a run where a ruleset is being evaluated against a data source.
operation CancelDataQualityRulesetEvaluationRun {
input: CancelDataQualityRulesetEvaluationRunRequest
output: CancelDataQualityRulesetEvaluationRunResponse
errors: [
EntityNotFoundException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Cancels (stops) a task run. Machine learning task runs are asynchronous tasks that Glue runs on your behalf as part of various machine learning workflows. You can cancel a
/// machine learning task run at any time by calling CancelMLTaskRun
with a task
/// run's parent transform's TransformID
and the task run's TaskRunId
.
operation CancelMLTaskRun {
input: CancelMLTaskRunRequest
output: CancelMLTaskRunResponse
errors: [
EntityNotFoundException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Cancels the statement.
operation CancelStatement {
input: CancelStatementRequest
output: CancelStatementResponse
errors: [
AccessDeniedException
EntityNotFoundException
IllegalSessionStateException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Validates the supplied schema. This call has no side effects, it simply validates using the supplied schema using DataFormat
as the format. Since it does not take a schema set name, no compatibility checks are performed.
operation CheckSchemaVersionValidity {
input := {
/// The data format of the schema definition. Currently AVRO
, JSON
and PROTOBUF
are supported.
@required
DataFormat: DataFormat
/// The definition of the schema that has to be validated.
@required
SchemaDefinition: SchemaDefinitionString
}
output: CheckSchemaVersionValidityResponse
errors: [
AccessDeniedException
InternalServiceException
InvalidInputException
]
}
/// Registers a blueprint with Glue.
operation CreateBlueprint {
input: CreateBlueprintRequest
output: CreateBlueprintResponse
errors: [
AlreadyExistsException
InternalServiceException
InvalidInputException
OperationTimeoutException
ResourceNumberLimitExceededException
]
}
/// Creates a classifier in the user's account. This can be a GrokClassifier
, an
/// XMLClassifier
, a JsonClassifier
, or a CsvClassifier
,
/// depending on which field of the request is present.
operation CreateClassifier {
input: CreateClassifierRequest
output: CreateClassifierResponse
errors: [
AlreadyExistsException
InvalidInputException
OperationTimeoutException
]
}
/// Creates a connection definition in the Data Catalog.
operation CreateConnection {
input: CreateConnectionRequest
output: CreateConnectionResponse
errors: [
AlreadyExistsException
GlueEncryptionException
InvalidInputException
OperationTimeoutException
ResourceNumberLimitExceededException
]
}
/// Creates a new crawler with specified targets, role, configuration, and optional schedule.
/// At least one crawl target must be specified, in the s3Targets
field, the
/// jdbcTargets
field, or the DynamoDBTargets
field.
operation CreateCrawler {
input: CreateCrawlerRequest
output: CreateCrawlerResponse
errors: [
AlreadyExistsException
InvalidInputException
OperationTimeoutException
ResourceNumberLimitExceededException
]
}
/// Creates a custom pattern that is used to detect sensitive data across the columns and rows of your structured data.
/// Each custom pattern you create specifies a regular expression and an optional list of context words. If no context words are passed only a regular expression is checked.
operation CreateCustomEntityType {
input: CreateCustomEntityTypeRequest
output: CreateCustomEntityTypeResponse
errors: [
AccessDeniedException
AlreadyExistsException
IdempotentParameterMismatchException
InternalServiceException
InvalidInputException
OperationTimeoutException
ResourceNumberLimitExceededException
]
}
/// Creates a new database in a Data Catalog.
operation CreateDatabase {
input: CreateDatabaseRequest
output: CreateDatabaseResponse
errors: [
AlreadyExistsException
ConcurrentModificationException
FederatedResourceAlreadyExistsException
GlueEncryptionException
InternalServiceException
InvalidInputException
OperationTimeoutException
ResourceNumberLimitExceededException
]
}
/// Creates a data quality ruleset with DQDL rules applied to a specified Glue table.
/// You create the ruleset using the Data Quality Definition Language (DQDL). For more information, see the Glue developer guide.
@idempotent
operation CreateDataQualityRuleset {
input: CreateDataQualityRulesetRequest
output: CreateDataQualityRulesetResponse
errors: [
AlreadyExistsException
InternalServiceException
InvalidInputException
OperationTimeoutException
ResourceNumberLimitExceededException
]
}
/// Creates a new development endpoint.
operation CreateDevEndpoint {
input: CreateDevEndpointRequest
output: CreateDevEndpointResponse
errors: [
AccessDeniedException
AlreadyExistsException
IdempotentParameterMismatchException
InternalServiceException
InvalidInputException
OperationTimeoutException
ResourceNumberLimitExceededException
ValidationException
]
}
/// Creates a new job definition.
operation CreateJob {
input: CreateJobRequest
output: CreateJobResponse
errors: [
AlreadyExistsException
ConcurrentModificationException
IdempotentParameterMismatchException
InternalServiceException
InvalidInputException
OperationTimeoutException
ResourceNumberLimitExceededException
]
}
/// Creates an Glue machine learning transform. This operation creates the transform and
/// all the necessary parameters to train it.
/// Call this operation as the first step in the process of using a machine learning transform
/// (such as the FindMatches
transform) for deduplicating data. You can provide an
/// optional Description
, in addition to the parameters that you want to use for your
/// algorithm.
/// You must also specify certain parameters for the tasks that Glue runs on your
/// behalf as part of learning from your data and creating a high-quality machine learning
/// transform. These parameters include Role
, and optionally,
/// AllocatedCapacity
, Timeout
, and MaxRetries
. For more
/// information, see Jobs.
operation CreateMLTransform {
input: CreateMLTransformRequest
output: CreateMLTransformResponse
errors: [
AccessDeniedException
AlreadyExistsException
IdempotentParameterMismatchException
InternalServiceException
InvalidInputException
OperationTimeoutException
ResourceNumberLimitExceededException
]
}
/// Creates a new partition.
operation CreatePartition {
input: CreatePartitionRequest
output: CreatePartitionResponse
errors: [
AlreadyExistsException
EntityNotFoundException
GlueEncryptionException
InternalServiceException
InvalidInputException
OperationTimeoutException
ResourceNumberLimitExceededException
]
}
/// Creates a specified partition index in an existing table.
operation CreatePartitionIndex {
input: CreatePartitionIndexRequest
output: CreatePartitionIndexResponse
errors: [
AlreadyExistsException
EntityNotFoundException
GlueEncryptionException
InternalServiceException
InvalidInputException
OperationTimeoutException
ResourceNumberLimitExceededException
]
}
/// Creates a new registry which may be used to hold a collection of schemas.
operation CreateRegistry {
input := {
/// Name of the registry to be created of max length of 255, and may only contain letters, numbers, hyphen, underscore, dollar sign, or hash mark. No whitespace.
@required
RegistryName: SchemaRegistryNameString
/// A description of the registry. If description is not provided, there will not be any default value for this.
Description: DescriptionString
/// Amazon Web Services tags that contain a key value pair and may be searched by console, command line, or API.
Tags: TagsMap
}
output: CreateRegistryResponse
errors: [
AccessDeniedException
AlreadyExistsException
ConcurrentModificationException
InternalServiceException
InvalidInputException
ResourceNumberLimitExceededException
]
}
/// Creates a new schema set and registers the schema definition. Returns an error if the schema set already exists without actually registering the version.
/// When the schema set is created, a version checkpoint will be set to the first version. Compatibility mode "DISABLED" restricts any additional schema versions from being added after the first schema version. For all other compatibility modes, validation of compatibility settings will be applied only from the second version onwards when the RegisterSchemaVersion
API is used.
/// When this API is called without a RegistryId
, this will create an entry for a "default-registry" in the registry database tables, if it is not already present.
operation CreateSchema {
input := {
/// This is a wrapper shape to contain the registry identity fields. If this is not provided, the default registry will be used. The ARN format for the same will be: arn:aws:glue:us-east-2::registry/default-registry:random-5-letter-id
.
RegistryId: RegistryId
/// Name of the schema to be created of max length of 255, and may only contain letters, numbers, hyphen, underscore, dollar sign, or hash mark. No whitespace.
@required
SchemaName: SchemaRegistryNameString
/// The data format of the schema definition. Currently AVRO
, JSON
and PROTOBUF
are supported.
@required
DataFormat: DataFormat
/// The compatibility mode of the schema. The possible values are:
///
/// -
///
/// NONE: No compatibility mode applies. You can use this choice in development scenarios or if you do not know the compatibility mode that you want to apply to schemas. Any new version added will be accepted without undergoing a compatibility check.
///
/// -
///
/// DISABLED: This compatibility choice prevents versioning for a particular schema. You can use this choice to prevent future versioning of a schema.
///
/// -
///
/// BACKWARD: This compatibility choice is recommended as it allows data receivers to read both the current and one previous schema version. This means that for instance, a new schema version cannot drop data fields or change the type of these fields, so they can't be read by readers using the previous version.
///
/// -
///
/// BACKWARD_ALL: This compatibility choice allows data receivers to read both the current and all previous schema versions. You can use this choice when you need to delete fields or add optional fields, and check compatibility against all previous schema versions.
///
/// -
///
/// FORWARD: This compatibility choice allows data receivers to read both the current and one next schema version, but not necessarily later versions. You can use this choice when you need to add fields or delete optional fields, but only check compatibility against the last schema version.
///
/// -
///
/// FORWARD_ALL: This compatibility choice allows data receivers to read written by producers of any new registered schema. You can use this choice when you need to add fields or delete optional fields, and check compatibility against all previous schema versions.
///
/// -
///
/// FULL: This compatibility choice allows data receivers to read data written by producers using the previous or next version of the schema, but not necessarily earlier or later versions. You can use this choice when you need to add or remove optional fields, but only check compatibility against the last schema version.
///
/// -
///
/// FULL_ALL: This compatibility choice allows data receivers to read data written by producers using all previous schema versions. You can use this choice when you need to add or remove optional fields, and check compatibility against all previous schema versions.
///
///
Compatibility: Compatibility
/// An optional description of the schema. If description is not provided, there will not be any automatic default value for this.
Description: DescriptionString
/// Amazon Web Services tags that contain a key value pair and may be searched by console, command line, or API. If specified, follows the Amazon Web Services tags-on-create pattern.
Tags: TagsMap
/// The schema definition using the DataFormat
setting for SchemaName
.
SchemaDefinition: SchemaDefinitionString
}
output: CreateSchemaResponse
errors: [
AccessDeniedException
AlreadyExistsException
ConcurrentModificationException
EntityNotFoundException
InternalServiceException
InvalidInputException
ResourceNumberLimitExceededException
]
}
/// Transforms a directed acyclic graph (DAG) into code.
operation CreateScript {
input: CreateScriptRequest
output: CreateScriptResponse
errors: [
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Creates a new security configuration. A security configuration is a set of security properties that can be used by Glue. You can use a security configuration to encrypt data at rest. For information about using security configurations in Glue, see Encrypting Data Written by Crawlers, Jobs, and Development Endpoints.
operation CreateSecurityConfiguration {
input: CreateSecurityConfigurationRequest
output: CreateSecurityConfigurationResponse
errors: [
AlreadyExistsException
InternalServiceException
InvalidInputException
OperationTimeoutException
ResourceNumberLimitExceededException
]
}
/// Creates a new session.
operation CreateSession {
input: CreateSessionRequest
output: CreateSessionResponse
errors: [
AccessDeniedException
AlreadyExistsException
IdempotentParameterMismatchException
InternalServiceException
InvalidInputException
OperationTimeoutException
ResourceNumberLimitExceededException
ValidationException
]
}
/// Creates a new table definition in the Data Catalog.
operation CreateTable {
input: CreateTableRequest
output: CreateTableResponse
errors: [
AlreadyExistsException
ConcurrentModificationException
EntityNotFoundException
GlueEncryptionException
InternalServiceException
InvalidInputException
OperationTimeoutException
ResourceNotReadyException
ResourceNumberLimitExceededException
]
}
/// Creates a new trigger.
operation CreateTrigger {
input: CreateTriggerRequest
output: CreateTriggerResponse
errors: [
AlreadyExistsException
ConcurrentModificationException
EntityNotFoundException
IdempotentParameterMismatchException
InternalServiceException
InvalidInputException
OperationTimeoutException
ResourceNumberLimitExceededException
]
}
/// Creates a new function definition in the Data Catalog.
operation CreateUserDefinedFunction {
input: CreateUserDefinedFunctionRequest
output: CreateUserDefinedFunctionResponse
errors: [
AlreadyExistsException
EntityNotFoundException
GlueEncryptionException
InternalServiceException
InvalidInputException
OperationTimeoutException
ResourceNumberLimitExceededException
]
}
/// Creates a new workflow.
operation CreateWorkflow {
input: CreateWorkflowRequest
output: CreateWorkflowResponse
errors: [
AlreadyExistsException
ConcurrentModificationException
InternalServiceException
InvalidInputException
OperationTimeoutException
ResourceNumberLimitExceededException
]
}
/// Deletes an existing blueprint.
operation DeleteBlueprint {
input: DeleteBlueprintRequest
output: DeleteBlueprintResponse
errors: [
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Removes a classifier from the Data Catalog.
operation DeleteClassifier {
input: DeleteClassifierRequest
output: DeleteClassifierResponse
errors: [
EntityNotFoundException
OperationTimeoutException
]
}
/// Delete the partition column statistics of a column.
/// The Identity and Access Management (IAM) permission required for this operation is DeletePartition
.
operation DeleteColumnStatisticsForPartition {
input: DeleteColumnStatisticsForPartitionRequest
output: DeleteColumnStatisticsForPartitionResponse
errors: [
EntityNotFoundException
GlueEncryptionException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Retrieves table statistics of columns.
/// The Identity and Access Management (IAM) permission required for this operation is DeleteTable
.
operation DeleteColumnStatisticsForTable {
input: DeleteColumnStatisticsForTableRequest
output: DeleteColumnStatisticsForTableResponse
errors: [
EntityNotFoundException
GlueEncryptionException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Deletes a connection from the Data Catalog.
operation DeleteConnection {
input: DeleteConnectionRequest
output: DeleteConnectionResponse
errors: [
EntityNotFoundException
OperationTimeoutException
]
}
/// Removes a specified crawler from the Glue Data Catalog, unless the crawler state is
/// RUNNING
.
operation DeleteCrawler {
input: DeleteCrawlerRequest
output: DeleteCrawlerResponse
errors: [
CrawlerRunningException
EntityNotFoundException
OperationTimeoutException
SchedulerTransitioningException
]
}
/// Deletes a custom pattern by specifying its name.
operation DeleteCustomEntityType {
input: DeleteCustomEntityTypeRequest
output: DeleteCustomEntityTypeResponse
errors: [
AccessDeniedException
EntityNotFoundException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Removes a specified database from a Data Catalog.
///
/// After completing this operation, you no longer have access to the tables (and all table
/// versions and partitions that might belong to the tables) and the user-defined functions in
/// the deleted database. Glue deletes these "orphaned" resources asynchronously in a timely
/// manner, at the discretion of the service.
/// To ensure the immediate deletion of all related resources, before calling
/// DeleteDatabase
, use DeleteTableVersion
or
/// BatchDeleteTableVersion
, DeletePartition
or
/// BatchDeletePartition
, DeleteUserDefinedFunction
, and
/// DeleteTable
or BatchDeleteTable
, to delete any resources that
/// belong to the database.
///
operation DeleteDatabase {
input: DeleteDatabaseRequest
output: DeleteDatabaseResponse
errors: [
ConcurrentModificationException
EntityNotFoundException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Deletes a data quality ruleset.
operation DeleteDataQualityRuleset {
input: DeleteDataQualityRulesetRequest
output: DeleteDataQualityRulesetResponse
errors: [
EntityNotFoundException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Deletes a specified development endpoint.
operation DeleteDevEndpoint {
input: DeleteDevEndpointRequest
output: DeleteDevEndpointResponse
errors: [
EntityNotFoundException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Deletes a specified job definition. If the job definition
/// is not found, no exception is thrown.
operation DeleteJob {
input: DeleteJobRequest
output: DeleteJobResponse
errors: [
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Deletes an Glue machine learning transform. Machine learning transforms are a special
/// type of transform that use machine learning to learn the details of the transformation to be
/// performed by learning from examples provided by humans. These transformations are then saved
/// by Glue. If you no longer need a transform, you can delete it by calling
/// DeleteMLTransforms
. However, any Glue jobs that still reference the deleted
/// transform will no longer succeed.
operation DeleteMLTransform {
input: DeleteMLTransformRequest
output: DeleteMLTransformResponse
errors: [
EntityNotFoundException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Deletes a specified partition.
operation DeletePartition {
input: DeletePartitionRequest
output: DeletePartitionResponse
errors: [
EntityNotFoundException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Deletes a specified partition index from an existing table.
operation DeletePartitionIndex {
input: DeletePartitionIndexRequest
output: DeletePartitionIndexResponse
errors: [
ConflictException
EntityNotFoundException
GlueEncryptionException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Delete the entire registry including schema and all of its versions. To get the status of the delete operation, you can call the GetRegistry
API after the asynchronous call. Deleting a registry will deactivate all online operations for the registry such as the UpdateRegistry
, CreateSchema
, UpdateSchema
, and RegisterSchemaVersion
APIs.
operation DeleteRegistry {
input := {
/// This is a wrapper structure that may contain the registry name and Amazon Resource Name (ARN).
@required
RegistryId: RegistryId
}
output: DeleteRegistryResponse
errors: [
AccessDeniedException
ConcurrentModificationException
EntityNotFoundException
InvalidInputException
]
}
/// Deletes a specified policy.
operation DeleteResourcePolicy {
input: DeleteResourcePolicyRequest
output: DeleteResourcePolicyResponse
errors: [
ConditionCheckFailureException
EntityNotFoundException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Deletes the entire schema set, including the schema set and all of its versions. To get the status of the delete operation, you can call GetSchema
API after the asynchronous call. Deleting a registry will deactivate all online operations for the schema, such as the GetSchemaByDefinition
, and RegisterSchemaVersion
APIs.
operation DeleteSchema {
input := {
/// This is a wrapper structure that may contain the schema name and Amazon Resource Name (ARN).
@required
SchemaId: SchemaId
}
output: DeleteSchemaResponse
errors: [
AccessDeniedException
ConcurrentModificationException
EntityNotFoundException
InvalidInputException
]
}
/// Remove versions from the specified schema. A version number or range may be supplied. If the compatibility mode forbids deleting of a version that is necessary, such as BACKWARDS_FULL, an error is returned. Calling the GetSchemaVersions
API after this call will list the status of the deleted versions.
/// When the range of version numbers contain check pointed version, the API will return a 409 conflict and will not proceed with the deletion. You have to remove the checkpoint first using the DeleteSchemaCheckpoint
API before using this API.
/// You cannot use the DeleteSchemaVersions
API to delete the first schema version in the schema set. The first schema version can only be deleted by the DeleteSchema
API. This operation will also delete the attached SchemaVersionMetadata
under the schema versions. Hard deletes will be enforced on the database.
/// If the compatibility mode forbids deleting of a version that is necessary, such as BACKWARDS_FULL, an error is returned.
operation DeleteSchemaVersions {
input := {
/// This is a wrapper structure that may contain the schema name and Amazon Resource Name (ARN).
@required
SchemaId: SchemaId
/// A version range may be supplied which may be of the format:
///
/// -
///
a single version number, 5
///
/// -
///
a range, 5-8 : deletes versions 5, 6, 7, 8
///
///
@required
Versions: VersionsString
}
output: DeleteSchemaVersionsResponse
errors: [
AccessDeniedException
ConcurrentModificationException
EntityNotFoundException
InvalidInputException
]
}
/// Deletes a specified security configuration.
operation DeleteSecurityConfiguration {
input: DeleteSecurityConfigurationRequest
output: DeleteSecurityConfigurationResponse
errors: [
EntityNotFoundException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Deletes the session.
operation DeleteSession {
input: DeleteSessionRequest
output: DeleteSessionResponse
errors: [
AccessDeniedException
ConcurrentModificationException
IllegalSessionStateException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Removes a table definition from the Data Catalog.
///
/// After completing this operation, you no longer have access to the table versions and
/// partitions that belong to the deleted table. Glue deletes these "orphaned" resources
/// asynchronously in a timely manner, at the discretion of the service.
/// To ensure the immediate deletion of all related resources, before calling
/// DeleteTable
, use DeleteTableVersion
or
/// BatchDeleteTableVersion
, and DeletePartition
or
/// BatchDeletePartition
, to delete any resources that belong to the
/// table.
///
operation DeleteTable {
input: DeleteTableRequest
output: DeleteTableResponse
errors: [
ConcurrentModificationException
EntityNotFoundException
InternalServiceException
InvalidInputException
OperationTimeoutException
ResourceNotReadyException
]
}
/// Deletes a specified version of a table.
operation DeleteTableVersion {
input: DeleteTableVersionRequest
output: DeleteTableVersionResponse
errors: [
EntityNotFoundException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Deletes a specified trigger. If the trigger is not found, no
/// exception is thrown.
operation DeleteTrigger {
input: DeleteTriggerRequest
output: DeleteTriggerResponse
errors: [
ConcurrentModificationException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Deletes an existing function definition from the Data Catalog.
operation DeleteUserDefinedFunction {
input: DeleteUserDefinedFunctionRequest
output: DeleteUserDefinedFunctionResponse
errors: [
EntityNotFoundException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Deletes a workflow.
operation DeleteWorkflow {
input: DeleteWorkflowRequest
output: DeleteWorkflowResponse
errors: [
ConcurrentModificationException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Retrieves the details of a blueprint.
operation GetBlueprint {
input: GetBlueprintRequest
output: GetBlueprintResponse
errors: [
EntityNotFoundException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Retrieves the details of a blueprint run.
operation GetBlueprintRun {
input: GetBlueprintRunRequest
output: GetBlueprintRunResponse
errors: [
EntityNotFoundException
InternalServiceException
OperationTimeoutException
]
}
/// Retrieves the details of blueprint runs for a specified blueprint.
@paginated(
inputToken: "NextToken"
outputToken: "NextToken"
pageSize: "MaxResults"
)
operation GetBlueprintRuns {
input: GetBlueprintRunsRequest
output: GetBlueprintRunsResponse
errors: [
EntityNotFoundException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Retrieves the status of a migration operation.
operation GetCatalogImportStatus {
input: GetCatalogImportStatusRequest
output: GetCatalogImportStatusResponse
errors: [
InternalServiceException
OperationTimeoutException
]
}
/// Retrieve a classifier by name.
operation GetClassifier {
input: GetClassifierRequest
output: GetClassifierResponse
errors: [
EntityNotFoundException
OperationTimeoutException
]
}
/// Lists all classifier objects in the Data Catalog.
@paginated(
inputToken: "NextToken"
outputToken: "NextToken"
pageSize: "MaxResults"
)
operation GetClassifiers {
input: GetClassifiersRequest
output: GetClassifiersResponse
errors: [
OperationTimeoutException
]
}
/// Retrieves partition statistics of columns.
/// The Identity and Access Management (IAM) permission required for this operation is GetPartition
.
operation GetColumnStatisticsForPartition {
input: GetColumnStatisticsForPartitionRequest
output: GetColumnStatisticsForPartitionResponse
errors: [
EntityNotFoundException
GlueEncryptionException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Retrieves table statistics of columns.
/// The Identity and Access Management (IAM) permission required for this operation is GetTable
.
operation GetColumnStatisticsForTable {
input: GetColumnStatisticsForTableRequest
output: GetColumnStatisticsForTableResponse
errors: [
EntityNotFoundException
GlueEncryptionException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Retrieves a connection definition from the Data Catalog.
operation GetConnection {
input: GetConnectionRequest
output: GetConnectionResponse
errors: [
EntityNotFoundException
GlueEncryptionException
InvalidInputException
OperationTimeoutException
]
}
/// Retrieves a list of connection definitions from the Data Catalog.
@paginated(
inputToken: "NextToken"
outputToken: "NextToken"
pageSize: "MaxResults"
)
operation GetConnections {
input: GetConnectionsRequest
output: GetConnectionsResponse
errors: [
EntityNotFoundException
GlueEncryptionException
InvalidInputException
OperationTimeoutException
]
}
/// Retrieves metadata for a specified crawler.
operation GetCrawler {
input: GetCrawlerRequest
output: GetCrawlerResponse
errors: [
EntityNotFoundException
OperationTimeoutException
]
}
/// Retrieves metrics about specified crawlers.
@paginated(
inputToken: "NextToken"
outputToken: "NextToken"
pageSize: "MaxResults"
)
operation GetCrawlerMetrics {
input: GetCrawlerMetricsRequest
output: GetCrawlerMetricsResponse
errors: [
OperationTimeoutException
]
}
/// Retrieves metadata for all crawlers defined in the customer
/// account.
@paginated(
inputToken: "NextToken"
outputToken: "NextToken"
pageSize: "MaxResults"
)
operation GetCrawlers {
input: GetCrawlersRequest
output: GetCrawlersResponse
errors: [
OperationTimeoutException
]
}
/// Retrieves the details of a custom pattern by specifying its name.
operation GetCustomEntityType {
input: GetCustomEntityTypeRequest
output: GetCustomEntityTypeResponse
errors: [
AccessDeniedException
EntityNotFoundException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Retrieves the definition of a specified database.
operation GetDatabase {
input: GetDatabaseRequest
output: GetDatabaseResponse
errors: [
EntityNotFoundException
FederationSourceException
GlueEncryptionException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Retrieves all databases defined in a given Data Catalog.
@paginated(
inputToken: "NextToken"
outputToken: "NextToken"
pageSize: "MaxResults"
)
operation GetDatabases {
input: GetDatabasesRequest
output: GetDatabasesResponse
errors: [
GlueEncryptionException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Retrieves the security configuration for a specified catalog.
operation GetDataCatalogEncryptionSettings {
input: GetDataCatalogEncryptionSettingsRequest
output: GetDataCatalogEncryptionSettingsResponse
errors: [
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Transforms a Python script into a directed acyclic graph (DAG).
operation GetDataflowGraph {
input: GetDataflowGraphRequest
output: GetDataflowGraphResponse
errors: [
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Retrieves the result of a data quality rule evaluation.
operation GetDataQualityResult {
input: GetDataQualityResultRequest
output: GetDataQualityResultResponse
errors: [
EntityNotFoundException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Gets the specified recommendation run that was used to generate rules.
operation GetDataQualityRuleRecommendationRun {
input: GetDataQualityRuleRecommendationRunRequest
output: GetDataQualityRuleRecommendationRunResponse
errors: [
EntityNotFoundException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Returns an existing ruleset by identifier or name.
operation GetDataQualityRuleset {
input: GetDataQualityRulesetRequest
output: GetDataQualityRulesetResponse
errors: [
EntityNotFoundException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Retrieves a specific run where a ruleset is evaluated against a data source.
operation GetDataQualityRulesetEvaluationRun {
input: GetDataQualityRulesetEvaluationRunRequest
output: GetDataQualityRulesetEvaluationRunResponse
errors: [
EntityNotFoundException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Retrieves information about a specified development endpoint.
///
/// When you create a development endpoint in a virtual private cloud (VPC), Glue returns only
/// a private IP address, and the public IP address field is not populated. When you create a
/// non-VPC development endpoint, Glue returns only a public IP address.
///
operation GetDevEndpoint {
input: GetDevEndpointRequest
output: GetDevEndpointResponse
errors: [
EntityNotFoundException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Retrieves all the development endpoints in this Amazon Web Services account.
///
/// When you create a development endpoint in a virtual private cloud (VPC), Glue returns only a private IP address
/// and the public IP address field is not populated. When you create a non-VPC development
/// endpoint, Glue returns only a public IP address.
///
@paginated(
inputToken: "NextToken"
outputToken: "NextToken"
pageSize: "MaxResults"
)
operation GetDevEndpoints {
input: GetDevEndpointsRequest
output: GetDevEndpointsResponse
errors: [
EntityNotFoundException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Retrieves an existing job definition.
operation GetJob {
input: GetJobRequest
output: GetJobResponse
errors: [
EntityNotFoundException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Returns information on a job bookmark entry.
/// For more information about enabling and using job bookmarks, see:
///
/// -
///
///
/// -
///
/// Job parameters used by Glue
///
///
/// -
///
/// Job structure
///
///
///
operation GetJobBookmark {
input: GetJobBookmarkRequest
output: GetJobBookmarkResponse
errors: [
EntityNotFoundException
InternalServiceException
InvalidInputException
OperationTimeoutException
ValidationException
]
}
/// Retrieves the metadata for a given job run.
operation GetJobRun {
input: GetJobRunRequest
output: GetJobRunResponse
errors: [
EntityNotFoundException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Retrieves metadata for all runs of a given job definition.
@paginated(
inputToken: "NextToken"
outputToken: "NextToken"
pageSize: "MaxResults"
)
operation GetJobRuns {
input: GetJobRunsRequest
output: GetJobRunsResponse
errors: [
EntityNotFoundException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Retrieves all current job definitions.
@paginated(
inputToken: "NextToken"
outputToken: "NextToken"
pageSize: "MaxResults"
)
operation GetJobs {
input: GetJobsRequest
output: GetJobsResponse
errors: [
EntityNotFoundException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Creates mappings.
operation GetMapping {
input: GetMappingRequest
output: GetMappingResponse
errors: [
EntityNotFoundException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Gets details for a specific task run on a machine learning transform. Machine learning
/// task runs are asynchronous tasks that Glue runs on your behalf as part of various machine
/// learning workflows. You can check the stats of any task run by calling
/// GetMLTaskRun
with the TaskRunID
and its parent transform's
/// TransformID
.
operation GetMLTaskRun {
input: GetMLTaskRunRequest
output: GetMLTaskRunResponse
errors: [
EntityNotFoundException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Gets a list of runs for a machine learning transform. Machine learning task runs are
/// asynchronous tasks that Glue runs on your behalf as part of various machine learning
/// workflows. You can get a sortable, filterable list of machine learning task runs by calling
/// GetMLTaskRuns
with their parent transform's TransformID
and other
/// optional parameters as documented in this section.
/// This operation returns a list of historic runs and must be paginated.
@paginated(
inputToken: "NextToken"
outputToken: "NextToken"
pageSize: "MaxResults"
)
operation GetMLTaskRuns {
input: GetMLTaskRunsRequest
output: GetMLTaskRunsResponse
errors: [
EntityNotFoundException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Gets an Glue machine learning transform artifact and all its corresponding metadata.
/// Machine learning transforms are a special type of transform that use machine learning to learn
/// the details of the transformation to be performed by learning from examples provided by
/// humans. These transformations are then saved by Glue. You can retrieve their metadata by
/// calling GetMLTransform
.
operation GetMLTransform {
input: GetMLTransformRequest
output: GetMLTransformResponse
errors: [
EntityNotFoundException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Gets a sortable, filterable list of existing Glue machine learning transforms. Machine
/// learning transforms are a special type of transform that use machine learning to learn the
/// details of the transformation to be performed by learning from examples provided by humans.
/// These transformations are then saved by Glue, and you can retrieve their metadata by
/// calling GetMLTransforms
.
@paginated(
inputToken: "NextToken"
outputToken: "NextToken"
pageSize: "MaxResults"
)
operation GetMLTransforms {
input: GetMLTransformsRequest
output: GetMLTransformsResponse
errors: [
EntityNotFoundException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Retrieves information about a specified partition.
operation GetPartition {
input: GetPartitionRequest
output: GetPartitionResponse
errors: [
EntityNotFoundException
FederationSourceException
FederationSourceRetryableException
GlueEncryptionException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Retrieves the partition indexes associated with a table.
@paginated(
inputToken: "NextToken"
outputToken: "NextToken"
items: "PartitionIndexDescriptorList"
)
operation GetPartitionIndexes {
input: GetPartitionIndexesRequest
output: GetPartitionIndexesResponse
errors: [
ConflictException
EntityNotFoundException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Retrieves information about the partitions in a table.
@paginated(
inputToken: "NextToken"
outputToken: "NextToken"
pageSize: "MaxResults"
)
operation GetPartitions {
input: GetPartitionsRequest
output: GetPartitionsResponse
errors: [
EntityNotFoundException
FederationSourceException
FederationSourceRetryableException
GlueEncryptionException
InternalServiceException
InvalidInputException
InvalidStateException
OperationTimeoutException
ResourceNotReadyException
]
}
/// Gets code to perform a specified mapping.
operation GetPlan {
input: GetPlanRequest
output: GetPlanResponse
errors: [
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Describes the specified registry in detail.
operation GetRegistry {
input := {
/// This is a wrapper structure that may contain the registry name and Amazon Resource Name (ARN).
@required
RegistryId: RegistryId
}
output: GetRegistryResponse
errors: [
AccessDeniedException
EntityNotFoundException
InternalServiceException
InvalidInputException
]
}
/// Retrieves the resource policies set on individual resources by Resource Access Manager
/// during cross-account permission grants. Also retrieves the Data Catalog resource
/// policy.
/// If you enabled metadata encryption in Data Catalog settings, and you do not have
/// permission on the KMS key, the operation can't return the Data Catalog resource
/// policy.
@paginated(
inputToken: "NextToken"
outputToken: "NextToken"
items: "GetResourcePoliciesResponseList"
pageSize: "MaxResults"
)
operation GetResourcePolicies {
input: GetResourcePoliciesRequest
output: GetResourcePoliciesResponse
errors: [
GlueEncryptionException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Retrieves a specified resource policy.
operation GetResourcePolicy {
input: GetResourcePolicyRequest
output: GetResourcePolicyResponse
errors: [
EntityNotFoundException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Describes the specified schema in detail.
operation GetSchema {
input := {
/// This is a wrapper structure to contain schema identity fields. The structure contains:
///
/// -
///
SchemaId$SchemaArn: The Amazon Resource Name (ARN) of the schema. Either SchemaArn
or SchemaName
and RegistryName
has to be provided.
///
/// -
///
SchemaId$SchemaName: The name of the schema. Either SchemaArn
or SchemaName
and RegistryName
has to be provided.
///
///
@required
SchemaId: SchemaId
}
output: GetSchemaResponse
errors: [
AccessDeniedException
EntityNotFoundException
InternalServiceException
InvalidInputException
]
}
/// Retrieves a schema by the SchemaDefinition
. The schema definition is sent to the Schema Registry, canonicalized, and hashed. If the hash is matched within the scope of the SchemaName
or ARN (or the default registry, if none is supplied), that schema’s metadata is returned. Otherwise, a 404 or NotFound error is returned. Schema versions in Deleted
statuses will not be included in the results.
operation GetSchemaByDefinition {
input := {
/// This is a wrapper structure to contain schema identity fields. The structure contains:
///
/// -
///
SchemaId$SchemaArn: The Amazon Resource Name (ARN) of the schema. One of SchemaArn
or SchemaName
has to be provided.
///
/// -
///
SchemaId$SchemaName: The name of the schema. One of SchemaArn
or SchemaName
has to be provided.
///
///
@required
SchemaId: SchemaId
/// The definition of the schema for which schema details are required.
@required
SchemaDefinition: SchemaDefinitionString
}
output: GetSchemaByDefinitionResponse
errors: [
AccessDeniedException
EntityNotFoundException
InternalServiceException
InvalidInputException
]
}
/// Get the specified schema by its unique ID assigned when a version of the schema is created or registered. Schema versions in Deleted status will not be included in the results.
operation GetSchemaVersion {
input := {
/// This is a wrapper structure to contain schema identity fields. The structure contains:
///
/// -
///
SchemaId$SchemaArn: The Amazon Resource Name (ARN) of the schema. Either SchemaArn
or SchemaName
and RegistryName
has to be provided.
///
/// -
///
SchemaId$SchemaName: The name of the schema. Either SchemaArn
or SchemaName
and RegistryName
has to be provided.
///
///
SchemaId: SchemaId
/// The SchemaVersionId
of the schema version. This field is required for fetching by schema ID. Either this or the SchemaId
wrapper has to be provided.
SchemaVersionId: SchemaVersionIdString
/// The version number of the schema.
SchemaVersionNumber: SchemaVersionNumber
}
output: GetSchemaVersionResponse
errors: [
AccessDeniedException
EntityNotFoundException
InternalServiceException
InvalidInputException
]
}
/// Fetches the schema version difference in the specified difference type between two stored schema versions in the Schema Registry.
/// This API allows you to compare two schema versions between two schema definitions under the same schema.
operation GetSchemaVersionsDiff {
input := {
/// This is a wrapper structure to contain schema identity fields. The structure contains:
///
/// -
///
SchemaId$SchemaArn: The Amazon Resource Name (ARN) of the schema. One of SchemaArn
or SchemaName
has to be provided.
///
/// -
///
SchemaId$SchemaName: The name of the schema. One of SchemaArn
or SchemaName
has to be provided.
///
///
@required
SchemaId: SchemaId
/// The first of the two schema versions to be compared.
@required
FirstSchemaVersionNumber: SchemaVersionNumber
/// The second of the two schema versions to be compared.
@required
SecondSchemaVersionNumber: SchemaVersionNumber
/// Refers to SYNTAX_DIFF
, which is the currently supported diff type.
@required
SchemaDiffType: SchemaDiffType
}
output: GetSchemaVersionsDiffResponse
errors: [
AccessDeniedException
EntityNotFoundException
InternalServiceException
InvalidInputException
]
}
/// Retrieves a specified security configuration.
operation GetSecurityConfiguration {
input: GetSecurityConfigurationRequest
output: GetSecurityConfigurationResponse
errors: [
EntityNotFoundException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Retrieves a list of all security configurations.
@paginated(
inputToken: "NextToken"
outputToken: "NextToken"
items: "SecurityConfigurations"
pageSize: "MaxResults"
)
operation GetSecurityConfigurations {
input: GetSecurityConfigurationsRequest
output: GetSecurityConfigurationsResponse
errors: [
EntityNotFoundException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Retrieves the session.
operation GetSession {
input: GetSessionRequest
output: GetSessionResponse
errors: [
AccessDeniedException
EntityNotFoundException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Retrieves the statement.
operation GetStatement {
input: GetStatementRequest
output: GetStatementResponse
errors: [
AccessDeniedException
EntityNotFoundException
IllegalSessionStateException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Retrieves the Table
definition in a Data Catalog for
/// a specified table.
operation GetTable {
input: GetTableRequest
output: GetTableResponse
errors: [
EntityNotFoundException
FederationSourceException
FederationSourceRetryableException
GlueEncryptionException
InternalServiceException
InvalidInputException
OperationTimeoutException
ResourceNotReadyException
]
}
/// Retrieves the definitions of some or all of the tables in a given
/// Database
.
@paginated(
inputToken: "NextToken"
outputToken: "NextToken"
pageSize: "MaxResults"
)
operation GetTables {
input: GetTablesRequest
output: GetTablesResponse
errors: [
EntityNotFoundException
FederationSourceException
FederationSourceRetryableException
GlueEncryptionException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Retrieves a specified version of a table.
operation GetTableVersion {
input: GetTableVersionRequest
output: GetTableVersionResponse
errors: [
EntityNotFoundException
GlueEncryptionException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Retrieves a list of strings that identify available versions of
/// a specified table.
@paginated(
inputToken: "NextToken"
outputToken: "NextToken"
pageSize: "MaxResults"
)
operation GetTableVersions {
input: GetTableVersionsRequest
output: GetTableVersionsResponse
errors: [
EntityNotFoundException
GlueEncryptionException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Retrieves a list of tags associated with a resource.
operation GetTags {
input: GetTagsRequest
output: GetTagsResponse
errors: [
EntityNotFoundException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Retrieves the definition of a trigger.
operation GetTrigger {
input: GetTriggerRequest
output: GetTriggerResponse
errors: [
EntityNotFoundException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Gets all the triggers associated with a job.
@paginated(
inputToken: "NextToken"
outputToken: "NextToken"
pageSize: "MaxResults"
)
operation GetTriggers {
input: GetTriggersRequest
output: GetTriggersResponse
errors: [
EntityNotFoundException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Retrieves partition metadata from the Data Catalog that contains unfiltered
/// metadata.
/// For IAM authorization, the public IAM action associated with this API is glue:GetPartition
.
operation GetUnfilteredPartitionMetadata {
input: GetUnfilteredPartitionMetadataRequest
output: GetUnfilteredPartitionMetadataResponse
errors: [
EntityNotFoundException
FederationSourceException
FederationSourceRetryableException
GlueEncryptionException
InternalServiceException
InvalidInputException
OperationTimeoutException
PermissionTypeMismatchException
]
}
/// Retrieves partition metadata from the Data Catalog that contains unfiltered
/// metadata.
/// For IAM authorization, the public IAM action associated with this API is glue:GetPartitions
.
@paginated(
inputToken: "NextToken"
outputToken: "NextToken"
pageSize: "MaxResults"
)
operation GetUnfilteredPartitionsMetadata {
input: GetUnfilteredPartitionsMetadataRequest
output: GetUnfilteredPartitionsMetadataResponse
errors: [
EntityNotFoundException
FederationSourceException
FederationSourceRetryableException
GlueEncryptionException
InternalServiceException
InvalidInputException
OperationTimeoutException
PermissionTypeMismatchException
]
}
/// Retrieves table metadata from the Data Catalog that contains unfiltered
/// metadata.
/// For IAM authorization, the public IAM action associated with this API is glue:GetTable
.
operation GetUnfilteredTableMetadata {
input: GetUnfilteredTableMetadataRequest
output: GetUnfilteredTableMetadataResponse
errors: [
EntityNotFoundException
FederationSourceException
FederationSourceRetryableException
GlueEncryptionException
InternalServiceException
InvalidInputException
OperationTimeoutException
PermissionTypeMismatchException
]
}
/// Retrieves a specified function definition from the Data Catalog.
operation GetUserDefinedFunction {
input: GetUserDefinedFunctionRequest
output: GetUserDefinedFunctionResponse
errors: [
EntityNotFoundException
GlueEncryptionException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Retrieves multiple function definitions from the Data Catalog.
@paginated(
inputToken: "NextToken"
outputToken: "NextToken"
pageSize: "MaxResults"
)
operation GetUserDefinedFunctions {
input: GetUserDefinedFunctionsRequest
output: GetUserDefinedFunctionsResponse
errors: [
EntityNotFoundException
GlueEncryptionException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Retrieves resource metadata for a workflow.
operation GetWorkflow {
input: GetWorkflowRequest
output: GetWorkflowResponse
errors: [
EntityNotFoundException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Retrieves the metadata for a given workflow run.
operation GetWorkflowRun {
input: GetWorkflowRunRequest
output: GetWorkflowRunResponse
errors: [
EntityNotFoundException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Retrieves the workflow run properties which were set during the run.
operation GetWorkflowRunProperties {
input: GetWorkflowRunPropertiesRequest
output: GetWorkflowRunPropertiesResponse
errors: [
EntityNotFoundException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Retrieves metadata for all runs of a given workflow.
@paginated(
inputToken: "NextToken"
outputToken: "NextToken"
pageSize: "MaxResults"
)
operation GetWorkflowRuns {
input: GetWorkflowRunsRequest
output: GetWorkflowRunsResponse
errors: [
EntityNotFoundException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Imports an existing Amazon Athena Data Catalog to Glue.
operation ImportCatalogToGlue {
input: ImportCatalogToGlueRequest
output: ImportCatalogToGlueResponse
errors: [
InternalServiceException
OperationTimeoutException
]
}
/// Lists all the blueprint names in an account.
@paginated(
inputToken: "NextToken"
outputToken: "NextToken"
pageSize: "MaxResults"
)
operation ListBlueprints {
input: ListBlueprintsRequest
output: ListBlueprintsResponse
errors: [
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Retrieves the names of all crawler resources in this Amazon Web Services account, or the
/// resources with the specified tag. This operation allows you to see which
/// resources are available in your account, and their names.
/// This operation takes the optional Tags
field, which you can use as a filter on
/// the response so that tagged resources can be retrieved as a group. If you choose to use tags
/// filtering, only resources with the tag are retrieved.
@paginated(
inputToken: "NextToken"
outputToken: "NextToken"
pageSize: "MaxResults"
)
operation ListCrawlers {
input: ListCrawlersRequest
output: ListCrawlersResponse
errors: [
OperationTimeoutException
]
}
/// Returns all the crawls of a specified crawler. Returns only the crawls that have occurred since the launch date of the crawler history feature, and only retains up to 12 months of crawls. Older crawls will not be returned.
/// You may use this API to:
///
/// -
///
Retrive all the crawls of a specified crawler.
///
/// -
///
Retrieve all the crawls of a specified crawler within a limited count.
///
/// -
///
Retrieve all the crawls of a specified crawler in a specific time range.
///
/// -
///
Retrieve all the crawls of a specified crawler with a particular state, crawl ID, or DPU hour value.
///
///
operation ListCrawls {
input: ListCrawlsRequest
output: ListCrawlsResponse
errors: [
EntityNotFoundException
InvalidInputException
OperationTimeoutException
]
}
/// Lists all the custom patterns that have been created.
@paginated(
inputToken: "NextToken"
outputToken: "NextToken"
pageSize: "MaxResults"
)
operation ListCustomEntityTypes {
input: ListCustomEntityTypesRequest
output: ListCustomEntityTypesResponse
errors: [
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Returns all data quality execution results for your account.
@paginated(
inputToken: "NextToken"
outputToken: "NextToken"
pageSize: "MaxResults"
)
operation ListDataQualityResults {
input: ListDataQualityResultsRequest
output: ListDataQualityResultsResponse
errors: [
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Lists the recommendation runs meeting the filter criteria.
@paginated(
inputToken: "NextToken"
outputToken: "NextToken"
pageSize: "MaxResults"
)
operation ListDataQualityRuleRecommendationRuns {
input: ListDataQualityRuleRecommendationRunsRequest
output: ListDataQualityRuleRecommendationRunsResponse
errors: [
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Lists all the runs meeting the filter criteria, where a ruleset is evaluated against a data source.
@paginated(
inputToken: "NextToken"
outputToken: "NextToken"
pageSize: "MaxResults"
)
operation ListDataQualityRulesetEvaluationRuns {
input: ListDataQualityRulesetEvaluationRunsRequest
output: ListDataQualityRulesetEvaluationRunsResponse
errors: [
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Returns a paginated list of rulesets for the specified list of Glue tables.
@paginated(
inputToken: "NextToken"
outputToken: "NextToken"
pageSize: "MaxResults"
)
operation ListDataQualityRulesets {
input: ListDataQualityRulesetsRequest
output: ListDataQualityRulesetsResponse
errors: [
EntityNotFoundException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Retrieves the names of all DevEndpoint
resources in this Amazon Web Services account, or the
/// resources with the specified tag. This operation allows you to see which resources are
/// available in your account, and their names.
/// This operation takes the optional Tags
field, which you can use as a filter on
/// the response so that tagged resources can be retrieved as a group. If you choose to use tags
/// filtering, only resources with the tag are retrieved.
@paginated(
inputToken: "NextToken"
outputToken: "NextToken"
pageSize: "MaxResults"
)
operation ListDevEndpoints {
input: ListDevEndpointsRequest
output: ListDevEndpointsResponse
errors: [
EntityNotFoundException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Retrieves the names of all job resources in this Amazon Web Services account, or the resources with the specified tag. This operation allows you to see which resources are available in your account, and their names.
/// This operation takes the optional Tags
field, which you can use as a filter on
/// the response so that tagged resources can be retrieved as a group. If you choose to use tags
/// filtering, only resources with the tag are retrieved.
@paginated(
inputToken: "NextToken"
outputToken: "NextToken"
pageSize: "MaxResults"
)
operation ListJobs {
input: ListJobsRequest
output: ListJobsResponse
errors: [
EntityNotFoundException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Retrieves a sortable, filterable list of existing Glue machine learning transforms in this Amazon Web Services account,
/// or the resources with the specified tag. This operation takes the optional Tags
field, which you can use as
/// a filter of the responses so that tagged resources can be retrieved as a group. If you choose to use tag
/// filtering, only resources with the tags are retrieved.
///
@paginated(
inputToken: "NextToken"
outputToken: "NextToken"
pageSize: "MaxResults"
)
operation ListMLTransforms {
input: ListMLTransformsRequest
output: ListMLTransformsResponse
errors: [
EntityNotFoundException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Returns a list of registries that you have created, with minimal registry information. Registries in the Deleting
status will not be included in the results. Empty results will be returned if there are no registries available.
@paginated(
inputToken: "NextToken"
outputToken: "NextToken"
items: "Registries"
pageSize: "MaxResults"
)
operation ListRegistries {
input := {
/// Maximum number of results required per page. If the value is not supplied, this will be defaulted to 25 per page.
MaxResults: MaxResultsNumber
/// A continuation token, if this is a continuation call.
NextToken: SchemaRegistryTokenString
}
output: ListRegistriesResponse
errors: [
AccessDeniedException
InternalServiceException
InvalidInputException
]
}
/// Returns a list of schemas with minimal details. Schemas in Deleting status will not be included in the results. Empty results will be returned if there are no schemas available.
/// When the RegistryId
is not provided, all the schemas across registries will be part of the API response.
@paginated(
inputToken: "NextToken"
outputToken: "NextToken"
items: "Schemas"
pageSize: "MaxResults"
)
operation ListSchemas {
input := {
/// A wrapper structure that may contain the registry name and Amazon Resource Name (ARN).
RegistryId: RegistryId
/// Maximum number of results required per page. If the value is not supplied, this will be defaulted to 25 per page.
MaxResults: MaxResultsNumber
/// A continuation token, if this is a continuation call.
NextToken: SchemaRegistryTokenString
}
output: ListSchemasResponse
errors: [
AccessDeniedException
EntityNotFoundException
InternalServiceException
InvalidInputException
]
}
/// Returns a list of schema versions that you have created, with minimal information. Schema versions in Deleted status will not be included in the results. Empty results will be returned if there are no schema versions available.
@paginated(
inputToken: "NextToken"
outputToken: "NextToken"
items: "Schemas"
pageSize: "MaxResults"
)
operation ListSchemaVersions {
input := {
/// This is a wrapper structure to contain schema identity fields. The structure contains:
///
/// -
///
SchemaId$SchemaArn: The Amazon Resource Name (ARN) of the schema. Either SchemaArn
or SchemaName
and RegistryName
has to be provided.
///
/// -
///
SchemaId$SchemaName: The name of the schema. Either SchemaArn
or SchemaName
and RegistryName
has to be provided.
///
///
@required
SchemaId: SchemaId
/// Maximum number of results required per page. If the value is not supplied, this will be defaulted to 25 per page.
MaxResults: MaxResultsNumber
/// A continuation token, if this is a continuation call.
NextToken: SchemaRegistryTokenString
}
output: ListSchemaVersionsResponse
errors: [
AccessDeniedException
EntityNotFoundException
InternalServiceException
InvalidInputException
]
}
/// Retrieve a list of sessions.
@paginated(
inputToken: "NextToken"
outputToken: "NextToken"
pageSize: "MaxResults"
)
operation ListSessions {
input: ListSessionsRequest
output: ListSessionsResponse
errors: [
AccessDeniedException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Lists statements for the session.
operation ListStatements {
input: ListStatementsRequest
output: ListStatementsResponse
errors: [
AccessDeniedException
EntityNotFoundException
IllegalSessionStateException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Retrieves the names of all trigger resources in this Amazon Web Services account, or the resources with the specified tag. This operation allows you to see which resources are available in your account, and their names.
/// This operation takes the optional Tags
field, which you can use as a filter on
/// the response so that tagged resources can be retrieved as a group. If you choose to use tags
/// filtering, only resources with the tag are retrieved.
@paginated(
inputToken: "NextToken"
outputToken: "NextToken"
pageSize: "MaxResults"
)
operation ListTriggers {
input: ListTriggersRequest
output: ListTriggersResponse
errors: [
EntityNotFoundException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Lists names of workflows created in the account.
@paginated(
inputToken: "NextToken"
outputToken: "NextToken"
pageSize: "MaxResults"
)
operation ListWorkflows {
input: ListWorkflowsRequest
output: ListWorkflowsResponse
errors: [
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Sets the security configuration for a specified catalog. After the configuration has been
/// set, the specified encryption is applied to every catalog write thereafter.
operation PutDataCatalogEncryptionSettings {
input: PutDataCatalogEncryptionSettingsRequest
output: PutDataCatalogEncryptionSettingsResponse
errors: [
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Sets the Data Catalog resource policy for access control.
operation PutResourcePolicy {
input: PutResourcePolicyRequest
output: PutResourcePolicyResponse
errors: [
ConditionCheckFailureException
EntityNotFoundException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Puts the metadata key value pair for a specified schema version ID. A maximum of 10 key value pairs will be allowed per schema version. They can be added over one or more calls.
operation PutSchemaVersionMetadata {
input := {
/// The unique ID for the schema.
SchemaId: SchemaId
/// The version number of the schema.
SchemaVersionNumber: SchemaVersionNumber
/// The unique version ID of the schema version.
SchemaVersionId: SchemaVersionIdString
/// The metadata key's corresponding value.
@required
MetadataKeyValue: MetadataKeyValuePair
}
output: PutSchemaVersionMetadataResponse
errors: [
AccessDeniedException
AlreadyExistsException
EntityNotFoundException
InvalidInputException
ResourceNumberLimitExceededException
]
}
/// Puts the specified workflow run properties for the given workflow run. If a property already exists for the specified run, then it overrides the value otherwise adds the property to existing properties.
operation PutWorkflowRunProperties {
input: PutWorkflowRunPropertiesRequest
output: PutWorkflowRunPropertiesResponse
errors: [
AlreadyExistsException
ConcurrentModificationException
EntityNotFoundException
InternalServiceException
InvalidInputException
OperationTimeoutException
ResourceNumberLimitExceededException
]
}
/// Queries for the schema version metadata information.
operation QuerySchemaVersionMetadata {
input := {
/// A wrapper structure that may contain the schema name and Amazon Resource Name (ARN).
SchemaId: SchemaId
/// The version number of the schema.
SchemaVersionNumber: SchemaVersionNumber
/// The unique version ID of the schema version.
SchemaVersionId: SchemaVersionIdString
/// Search key-value pairs for metadata, if they are not provided all the metadata information will be fetched.
MetadataList: MetadataList
/// Maximum number of results required per page. If the value is not supplied, this will be defaulted to 25 per page.
MaxResults: QuerySchemaVersionMetadataMaxResults = 0
/// A continuation token, if this is a continuation call.
NextToken: SchemaRegistryTokenString
}
output: QuerySchemaVersionMetadataResponse
errors: [
AccessDeniedException
EntityNotFoundException
InvalidInputException
]
}
/// Adds a new version to the existing schema. Returns an error if new version of schema does not meet the compatibility requirements of the schema set. This API will not create a new schema set and will return a 404 error if the schema set is not already present in the Schema Registry.
/// If this is the first schema definition to be registered in the Schema Registry, this API will store the schema version and return immediately. Otherwise, this call has the potential to run longer than other operations due to compatibility modes. You can call the GetSchemaVersion
API with the SchemaVersionId
to check compatibility modes.
/// If the same schema definition is already stored in Schema Registry as a version, the schema ID of the existing schema is returned to the caller.
operation RegisterSchemaVersion {
input := {
/// This is a wrapper structure to contain schema identity fields. The structure contains:
///
/// -
///
SchemaId$SchemaArn: The Amazon Resource Name (ARN) of the schema. Either SchemaArn
or SchemaName
and RegistryName
has to be provided.
///
/// -
///
SchemaId$SchemaName: The name of the schema. Either SchemaArn
or SchemaName
and RegistryName
has to be provided.
///
///
@required
SchemaId: SchemaId
/// The schema definition using the DataFormat
setting for the SchemaName
.
@required
SchemaDefinition: SchemaDefinitionString
}
output: RegisterSchemaVersionResponse
errors: [
AccessDeniedException
ConcurrentModificationException
EntityNotFoundException
InternalServiceException
InvalidInputException
ResourceNumberLimitExceededException
]
}
/// Removes a key value pair from the schema version metadata for the specified schema version ID.
operation RemoveSchemaVersionMetadata {
input := {
/// A wrapper structure that may contain the schema name and Amazon Resource Name (ARN).
SchemaId: SchemaId
/// The version number of the schema.
SchemaVersionNumber: SchemaVersionNumber
/// The unique version ID of the schema version.
SchemaVersionId: SchemaVersionIdString
/// The value of the metadata key.
@required
MetadataKeyValue: MetadataKeyValuePair
}
output: RemoveSchemaVersionMetadataResponse
errors: [
AccessDeniedException
EntityNotFoundException
InvalidInputException
]
}
/// Resets a bookmark entry.
/// For more information about enabling and using job bookmarks, see:
///
/// -
///
///
/// -
///
/// Job parameters used by Glue
///
///
/// -
///
/// Job structure
///
///
///
operation ResetJobBookmark {
input: ResetJobBookmarkRequest
output: ResetJobBookmarkResponse
errors: [
EntityNotFoundException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Restarts selected nodes of a previous partially completed workflow run and resumes the workflow run. The selected nodes and all nodes that are downstream from the selected nodes are run.
operation ResumeWorkflowRun {
input: ResumeWorkflowRunRequest
output: ResumeWorkflowRunResponse
errors: [
ConcurrentRunsExceededException
EntityNotFoundException
IllegalWorkflowStateException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Executes the statement.
operation RunStatement {
input: RunStatementRequest
output: RunStatementResponse
errors: [
AccessDeniedException
EntityNotFoundException
IllegalSessionStateException
InternalServiceException
InvalidInputException
OperationTimeoutException
ResourceNumberLimitExceededException
ValidationException
]
}
/// Searches a set of tables based on properties in the table metadata as well as on the parent database. You can search against text or filter conditions.
/// You can only get tables that you have access to based on the security policies defined in Lake Formation. You need at least a read-only access to the table for it to be returned. If you do not have access to all the columns in the table, these columns will not be searched against when returning the list of tables back to you. If you have access to the columns but not the data in the columns, those columns and the associated metadata for those columns will be included in the search.
@paginated(
inputToken: "NextToken"
outputToken: "NextToken"
pageSize: "MaxResults"
)
operation SearchTables {
input: SearchTablesRequest
output: SearchTablesResponse
errors: [
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Starts a new run of the specified blueprint.
operation StartBlueprintRun {
input: StartBlueprintRunRequest
output: StartBlueprintRunResponse
errors: [
EntityNotFoundException
IllegalBlueprintStateException
InternalServiceException
InvalidInputException
OperationTimeoutException
ResourceNumberLimitExceededException
]
}
/// Starts a crawl using the specified crawler, regardless
/// of what is scheduled. If the crawler is already running, returns a
/// CrawlerRunningException.
operation StartCrawler {
input: StartCrawlerRequest
output: StartCrawlerResponse
errors: [
CrawlerRunningException
EntityNotFoundException
OperationTimeoutException
]
}
/// Changes the schedule state of the specified crawler to
/// SCHEDULED
, unless the crawler is already running or the
/// schedule state is already SCHEDULED
.
operation StartCrawlerSchedule {
input: StartCrawlerScheduleRequest
output: StartCrawlerScheduleResponse
errors: [
EntityNotFoundException
NoScheduleException
OperationTimeoutException
SchedulerRunningException
SchedulerTransitioningException
]
}
/// Starts a recommendation run that is used to generate rules when you don't know what rules to write. Glue Data Quality analyzes the data and comes up with recommendations for a potential ruleset. You can then triage the ruleset and modify the generated ruleset to your liking.
@idempotent
operation StartDataQualityRuleRecommendationRun {
input: StartDataQualityRuleRecommendationRunRequest
output: StartDataQualityRuleRecommendationRunResponse
errors: [
ConflictException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Once you have a ruleset definition (either recommended or your own), you call this operation to evaluate the ruleset against a data source (Glue table). The evaluation computes results which you can retrieve with the GetDataQualityResult
API.
@idempotent
operation StartDataQualityRulesetEvaluationRun {
input: StartDataQualityRulesetEvaluationRunRequest
output: StartDataQualityRulesetEvaluationRunResponse
errors: [
ConflictException
EntityNotFoundException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Begins an asynchronous task to export all labeled data for a particular transform. This
/// task is the only label-related API call that is not part of the typical active learning
/// workflow. You typically use StartExportLabelsTaskRun
when you want to work with
/// all of your existing labels at the same time, such as when you want to remove or change labels
/// that were previously submitted as truth. This API operation accepts the
/// TransformId
whose labels you want to export and an Amazon Simple Storage
/// Service (Amazon S3) path to export the labels to. The operation returns a
/// TaskRunId
. You can check on the status of your task run by calling the
/// GetMLTaskRun
API.
operation StartExportLabelsTaskRun {
input: StartExportLabelsTaskRunRequest
output: StartExportLabelsTaskRunResponse
errors: [
EntityNotFoundException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Enables you to provide additional labels (examples of truth) to be used to teach the
/// machine learning transform and improve its quality. This API operation is generally used as
/// part of the active learning workflow that starts with the
/// StartMLLabelingSetGenerationTaskRun
call and that ultimately results in
/// improving the quality of your machine learning transform.
/// After the StartMLLabelingSetGenerationTaskRun
finishes, Glue machine learning
/// will have generated a series of questions for humans to answer. (Answering these questions is
/// often called 'labeling' in the machine learning workflows). In the case of the
/// FindMatches
transform, these questions are of the form, “What is the correct
/// way to group these rows together into groups composed entirely of matching records?” After the
/// labeling process is finished, users upload their answers/labels with a call to
/// StartImportLabelsTaskRun
. After StartImportLabelsTaskRun
finishes,
/// all future runs of the machine learning transform use the new and improved labels and perform
/// a higher-quality transformation.
/// By default, StartMLLabelingSetGenerationTaskRun
continually learns from and
/// combines all labels that you upload unless you set Replace
to true. If you set
/// Replace
to true, StartImportLabelsTaskRun
deletes and forgets all
/// previously uploaded labels and learns only from the exact set that you upload. Replacing
/// labels can be helpful if you realize that you previously uploaded incorrect labels, and you
/// believe that they are having a negative effect on your transform quality.
/// You can check on the status of your task run by calling the GetMLTaskRun
/// operation.
operation StartImportLabelsTaskRun {
input: StartImportLabelsTaskRunRequest
output: StartImportLabelsTaskRunResponse
errors: [
EntityNotFoundException
InternalServiceException
InvalidInputException
OperationTimeoutException
ResourceNumberLimitExceededException
]
}
/// Starts a job run using a job definition.
operation StartJobRun {
input: StartJobRunRequest
output: StartJobRunResponse
errors: [
ConcurrentRunsExceededException
EntityNotFoundException
InternalServiceException
InvalidInputException
OperationTimeoutException
ResourceNumberLimitExceededException
]
}
/// Starts a task to estimate the quality of the transform.
/// When you provide label sets as examples of truth, Glue machine learning uses some of
/// those examples to learn from them. The rest of the labels are used as a test to estimate
/// quality.
/// Returns a unique identifier for the run. You can call GetMLTaskRun
to get more
/// information about the stats of the EvaluationTaskRun
.
operation StartMLEvaluationTaskRun {
input: StartMLEvaluationTaskRunRequest
output: StartMLEvaluationTaskRunResponse
errors: [
ConcurrentRunsExceededException
EntityNotFoundException
InternalServiceException
InvalidInputException
MLTransformNotReadyException
OperationTimeoutException
]
}
/// Starts the active learning workflow for your machine learning transform to improve the
/// transform's quality by generating label sets and adding labels.
/// When the StartMLLabelingSetGenerationTaskRun
finishes, Glue will have
/// generated a "labeling set" or a set of questions for humans to answer.
/// In the case of the FindMatches
transform, these questions are of the form,
/// “What is the correct way to group these rows together into groups composed entirely of
/// matching records?”
/// After the labeling process is finished, you can upload your labels with a call to
/// StartImportLabelsTaskRun
. After StartImportLabelsTaskRun
finishes,
/// all future runs of the machine learning transform will use the new and improved labels and
/// perform a higher-quality transformation.
operation StartMLLabelingSetGenerationTaskRun {
input: StartMLLabelingSetGenerationTaskRunRequest
output: StartMLLabelingSetGenerationTaskRunResponse
errors: [
ConcurrentRunsExceededException
EntityNotFoundException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Starts an existing trigger. See Triggering
/// Jobs for information about how different types of trigger are
/// started.
operation StartTrigger {
input: StartTriggerRequest
output: StartTriggerResponse
errors: [
ConcurrentRunsExceededException
EntityNotFoundException
InternalServiceException
InvalidInputException
OperationTimeoutException
ResourceNumberLimitExceededException
]
}
/// Starts a new run of the specified workflow.
operation StartWorkflowRun {
input: StartWorkflowRunRequest
output: StartWorkflowRunResponse
errors: [
ConcurrentRunsExceededException
EntityNotFoundException
InternalServiceException
InvalidInputException
OperationTimeoutException
ResourceNumberLimitExceededException
]
}
/// If the specified crawler is running, stops the crawl.
operation StopCrawler {
input: StopCrawlerRequest
output: StopCrawlerResponse
errors: [
CrawlerNotRunningException
CrawlerStoppingException
EntityNotFoundException
OperationTimeoutException
]
}
/// Sets the schedule state of the specified crawler to
/// NOT_SCHEDULED
, but does not stop the crawler if it is
/// already running.
operation StopCrawlerSchedule {
input: StopCrawlerScheduleRequest
output: StopCrawlerScheduleResponse
errors: [
EntityNotFoundException
OperationTimeoutException
SchedulerNotRunningException
SchedulerTransitioningException
]
}
/// Stops the session.
operation StopSession {
input: StopSessionRequest
output: StopSessionResponse
errors: [
AccessDeniedException
ConcurrentModificationException
IllegalSessionStateException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Stops a specified trigger.
operation StopTrigger {
input: StopTriggerRequest
output: StopTriggerResponse
errors: [
ConcurrentModificationException
EntityNotFoundException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Stops the execution of the specified workflow run.
operation StopWorkflowRun {
input: StopWorkflowRunRequest
output: StopWorkflowRunResponse
errors: [
EntityNotFoundException
IllegalWorkflowStateException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Adds tags to a resource. A tag is a label you can assign to an Amazon Web Services resource.
/// In Glue, you can tag only certain resources. For information about what
/// resources you can tag, see Amazon Web Services Tags in Glue.
operation TagResource {
input: TagResourceRequest
output: TagResourceResponse
errors: [
EntityNotFoundException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Removes tags from a resource.
operation UntagResource {
input: UntagResourceRequest
output: UntagResourceResponse
errors: [
EntityNotFoundException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Updates a registered blueprint.
operation UpdateBlueprint {
input: UpdateBlueprintRequest
output: UpdateBlueprintResponse
errors: [
ConcurrentModificationException
EntityNotFoundException
IllegalBlueprintStateException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Modifies an existing classifier (a GrokClassifier
,
/// an XMLClassifier
, a JsonClassifier
, or a CsvClassifier
, depending on
/// which field is present).
operation UpdateClassifier {
input: UpdateClassifierRequest
output: UpdateClassifierResponse
errors: [
EntityNotFoundException
InvalidInputException
OperationTimeoutException
VersionMismatchException
]
}
/// Creates or updates partition statistics of columns.
/// The Identity and Access Management (IAM) permission required for this operation is UpdatePartition
.
operation UpdateColumnStatisticsForPartition {
input: UpdateColumnStatisticsForPartitionRequest
output: UpdateColumnStatisticsForPartitionResponse
errors: [
EntityNotFoundException
GlueEncryptionException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Creates or updates table statistics of columns.
/// The Identity and Access Management (IAM) permission required for this operation is UpdateTable
.
operation UpdateColumnStatisticsForTable {
input: UpdateColumnStatisticsForTableRequest
output: UpdateColumnStatisticsForTableResponse
errors: [
EntityNotFoundException
GlueEncryptionException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Updates a connection definition in the Data Catalog.
operation UpdateConnection {
input: UpdateConnectionRequest
output: UpdateConnectionResponse
errors: [
EntityNotFoundException
GlueEncryptionException
InvalidInputException
OperationTimeoutException
]
}
/// Updates a crawler. If a crawler is
/// running, you must stop it using StopCrawler
before updating
/// it.
operation UpdateCrawler {
input: UpdateCrawlerRequest
output: UpdateCrawlerResponse
errors: [
CrawlerRunningException
EntityNotFoundException
InvalidInputException
OperationTimeoutException
VersionMismatchException
]
}
/// Updates the schedule of a crawler using a cron
expression.
operation UpdateCrawlerSchedule {
input: UpdateCrawlerScheduleRequest
output: UpdateCrawlerScheduleResponse
errors: [
EntityNotFoundException
InvalidInputException
OperationTimeoutException
SchedulerTransitioningException
VersionMismatchException
]
}
/// Updates an existing database definition in a Data Catalog.
operation UpdateDatabase {
input: UpdateDatabaseRequest
output: UpdateDatabaseResponse
errors: [
ConcurrentModificationException
EntityNotFoundException
GlueEncryptionException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Updates the specified data quality ruleset.
operation UpdateDataQualityRuleset {
input: UpdateDataQualityRulesetRequest
output: UpdateDataQualityRulesetResponse
errors: [
AlreadyExistsException
EntityNotFoundException
IdempotentParameterMismatchException
InternalServiceException
InvalidInputException
OperationTimeoutException
ResourceNumberLimitExceededException
]
}
/// Updates a specified development endpoint.
operation UpdateDevEndpoint {
input: UpdateDevEndpointRequest
output: UpdateDevEndpointResponse
errors: [
EntityNotFoundException
InternalServiceException
InvalidInputException
OperationTimeoutException
ValidationException
]
}
/// Updates an existing job definition. The previous job definition is completely overwritten by this information.
operation UpdateJob {
input: UpdateJobRequest
output: UpdateJobResponse
errors: [
ConcurrentModificationException
EntityNotFoundException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Synchronizes a job from the source control repository. This operation takes the job artifacts that are located in the remote repository and updates the Glue internal stores with these artifacts.
/// This API supports optional parameters which take in the repository information.
operation UpdateJobFromSourceControl {
input: UpdateJobFromSourceControlRequest
output: UpdateJobFromSourceControlResponse
errors: [
AccessDeniedException
AlreadyExistsException
EntityNotFoundException
InternalServiceException
InvalidInputException
OperationTimeoutException
ValidationException
]
}
/// Updates an existing machine learning transform. Call this operation to tune the algorithm parameters to achieve better results.
/// After calling this operation, you can call the StartMLEvaluationTaskRun
/// operation to assess how well your new parameters achieved your goals (such as improving the
/// quality of your machine learning transform, or making it more cost-effective).
operation UpdateMLTransform {
input: UpdateMLTransformRequest
output: UpdateMLTransformResponse
errors: [
AccessDeniedException
EntityNotFoundException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Updates a partition.
operation UpdatePartition {
input: UpdatePartitionRequest
output: UpdatePartitionResponse
errors: [
EntityNotFoundException
GlueEncryptionException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Updates an existing registry which is used to hold a collection of schemas. The updated properties relate to the registry, and do not modify any of the schemas within the registry.
operation UpdateRegistry {
input := {
/// This is a wrapper structure that may contain the registry name and Amazon Resource Name (ARN).
@required
RegistryId: RegistryId
/// A description of the registry. If description is not provided, this field will not be updated.
@required
Description: DescriptionString
}
output: UpdateRegistryResponse
errors: [
AccessDeniedException
ConcurrentModificationException
EntityNotFoundException
InternalServiceException
InvalidInputException
]
}
/// Updates the description, compatibility setting, or version checkpoint for a schema set.
/// For updating the compatibility setting, the call will not validate compatibility for the entire set of schema versions with the new compatibility setting. If the value for Compatibility
is provided, the VersionNumber
(a checkpoint) is also required. The API will validate the checkpoint version number for consistency.
/// If the value for the VersionNumber
(checkpoint) is provided, Compatibility
is optional and this can be used to set/reset a checkpoint for the schema.
/// This update will happen only if the schema is in the AVAILABLE state.
operation UpdateSchema {
input := {
/// This is a wrapper structure to contain schema identity fields. The structure contains:
///
/// -
///
SchemaId$SchemaArn: The Amazon Resource Name (ARN) of the schema. One of SchemaArn
or SchemaName
has to be provided.
///
/// -
///
SchemaId$SchemaName: The name of the schema. One of SchemaArn
or SchemaName
has to be provided.
///
///
@required
SchemaId: SchemaId
/// Version number required for check pointing. One of VersionNumber
or Compatibility
has to be provided.
SchemaVersionNumber: SchemaVersionNumber
/// The new compatibility setting for the schema.
Compatibility: Compatibility
/// The new description for the schema.
Description: DescriptionString
}
output: UpdateSchemaResponse
errors: [
AccessDeniedException
ConcurrentModificationException
EntityNotFoundException
InternalServiceException
InvalidInputException
]
}
/// Synchronizes a job to the source control repository. This operation takes the job artifacts from the Glue internal stores and makes a commit to the remote repository that is configured on the job.
/// This API supports optional parameters which take in the repository information.
operation UpdateSourceControlFromJob {
input: UpdateSourceControlFromJobRequest
output: UpdateSourceControlFromJobResponse
errors: [
AccessDeniedException
AlreadyExistsException
EntityNotFoundException
InternalServiceException
InvalidInputException
OperationTimeoutException
ValidationException
]
}
/// Updates a metadata table in the Data Catalog.
operation UpdateTable {
input: UpdateTableRequest
output: UpdateTableResponse
errors: [
ConcurrentModificationException
EntityNotFoundException
GlueEncryptionException
InternalServiceException
InvalidInputException
OperationTimeoutException
ResourceNotReadyException
ResourceNumberLimitExceededException
]
}
/// Updates a trigger definition.
operation UpdateTrigger {
input: UpdateTriggerRequest
output: UpdateTriggerResponse
errors: [
ConcurrentModificationException
EntityNotFoundException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Updates an existing function definition in the Data Catalog.
operation UpdateUserDefinedFunction {
input: UpdateUserDefinedFunctionRequest
output: UpdateUserDefinedFunctionResponse
errors: [
EntityNotFoundException
GlueEncryptionException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Updates an existing workflow.
operation UpdateWorkflow {
input: UpdateWorkflowRequest
output: UpdateWorkflowResponse
errors: [
ConcurrentModificationException
EntityNotFoundException
InternalServiceException
InvalidInputException
OperationTimeoutException
]
}
/// Access to a resource was denied.
@error("client")
structure AccessDeniedException {
/// A message describing the problem.
Message: MessageString
}
/// Defines an action to be initiated by a trigger.
structure Action {
/// The name of a job to be run.
JobName: NameString
/// The job arguments used when this trigger fires. For this job run, they replace the default arguments set in the job definition itself.
/// You can specify arguments here that your own job-execution script
/// consumes, as well as arguments that Glue itself consumes.
/// For information about how to specify and consume your own Job arguments, see the Calling Glue APIs in Python topic in the developer guide.
/// For information about the key-value pairs that Glue consumes to set up your job, see the Special Parameters Used by Glue topic in the developer guide.
Arguments: GenericMap
/// The JobRun
timeout in minutes. This is the maximum time that a job run can
/// consume resources before it is terminated and enters TIMEOUT
status. The default
/// is 2,880 minutes (48 hours). This overrides the timeout value set in the parent job.
Timeout: Timeout
/// The name of the SecurityConfiguration
structure to be used with this
/// action.
SecurityConfiguration: NameString
/// Specifies configuration properties of a job run notification.
NotificationProperty: NotificationProperty
/// The name of the crawler to be used with this action.
CrawlerName: NameString
}
/// Specifies a transform that groups rows by chosen fields and computes the aggregated value by specified function.
structure Aggregate {
/// The name of the transform node.
@required
Name: NodeName
/// Specifies the fields and rows to use as inputs for the aggregate transform.
@required
Inputs: OneInput
/// Specifies the fields to group by.
@required
Groups: GlueStudioPathList
/// Specifies the aggregate functions to be performed on specified fields.
@required
Aggs: AggregateOperations
}
/// Specifies the set of parameters needed to perform aggregation in the aggregate transform.
structure AggregateOperation {
/// Specifies the column on the data set on which the aggregation function will be applied.
@required
Column: EnclosedInStringProperties
/// Specifies the aggregation function to apply.
/// Possible aggregation functions include: avg countDistinct, count, first, last, kurtosis, max, min, skewness,
/// stddev_samp, stddev_pop, sum, sumDistinct, var_samp, var_pop
@required
AggFunc: AggFunction
}
/// A resource to be created or added already exists.
@error("client")
structure AlreadyExistsException {
/// A message describing the problem.
Message: MessageString
}
/// Specifies an optional value when connecting to the Redshift cluster.
structure AmazonRedshiftAdvancedOption {
/// The key for the additional connection option.
Key: GenericString
/// The value for the additional connection option.
Value: GenericString
}
/// Specifies an Amazon Redshift node.
structure AmazonRedshiftNodeData {
/// The access type for the Redshift connection. Can be a direct connection or catalog connections.
AccessType: GenericLimitedString
/// The source type to specify whether a specific table is the source or a custom query.
SourceType: GenericLimitedString
/// The Glue connection to the Redshift cluster.
Connection: Option
/// The Redshift schema name when working with a direct connection.
Schema: Option
/// The Redshift table name when working with a direct connection.
Table: Option
/// The name of the Glue Data Catalog database when working with a data catalog.
CatalogDatabase: Option
/// The Glue Data Catalog table name when working with a data catalog.
CatalogTable: Option
/// The Redshift schema name when working with a data catalog.
CatalogRedshiftSchema: GenericString
/// The database table to read from.
CatalogRedshiftTable: GenericString
/// The Amazon S3 path where temporary data can be staged when copying out of the database.
TempDir: EnclosedInStringProperty
/// Optional. The role name use when connection to S3. The IAM role ill default to the role on the job when left blank.
IamRole: Option
/// Optional values when connecting to the Redshift cluster.
AdvancedOptions: AmazonRedshiftAdvancedOptions
/// The SQL used to fetch the data from a Redshift sources when the SourceType is 'query'.
SampleQuery: GenericString
/// The SQL used before a MERGE or APPEND with upsert is run.
PreAction: GenericString
/// The SQL used before a MERGE or APPEND with upsert is run.
PostAction: GenericString
/// Specifies how writing to a Redshift cluser will occur.
Action: GenericString
/// Specifies the prefix to a table.
TablePrefix: GenericLimitedString
/// The action used on Redshift sinks when doing an APPEND.
Upsert: BooleanValue = false
/// The action used when to detemine how a MERGE in a Redshift sink will be handled.
MergeAction: GenericLimitedString
/// The action used when to detemine how a MERGE in a Redshift sink will be handled when an existing record matches a new record.
MergeWhenMatched: GenericLimitedString
/// The action used when to detemine how a MERGE in a Redshift sink will be handled when an existing record doesn't match a new record.
MergeWhenNotMatched: GenericLimitedString
/// The SQL used in a custom merge to deal with matching records.
MergeClause: GenericString
/// Specifies the name of the connection that is associated with the catalog table used.
CrawlerConnection: GenericString
/// The array of schema output for a given node.
TableSchema: OptionList
/// The name of the temporary staging table that is used when doing a MERGE or APPEND with upsert.
StagingTable: GenericString
/// The list of column names used to determine a matching record when doing a MERGE or APPEND with upsert.
SelectedColumns: OptionList
}
/// Specifies an Amazon Redshift source.
structure AmazonRedshiftSource {
/// The name of the Amazon Redshift source.
Name: NodeName
/// Specifies the data of the Amazon Reshift source node.
Data: AmazonRedshiftNodeData
}
/// Specifies an Amazon Redshift target.
structure AmazonRedshiftTarget {
/// The name of the Amazon Redshift target.
Name: NodeName
/// Specifies the data of the Amazon Redshift target node.
Data: AmazonRedshiftNodeData
/// The nodes that are inputs to the data target.
Inputs: OneInput
}
/// Specifies a transform that maps data property keys in the data source to data property keys in the data target. You can rename keys, modify the data types for keys, and choose which keys to drop from the dataset.
structure ApplyMapping {
/// The name of the transform node.
@required
Name: NodeName
/// The data inputs identified by their node names.
@required
Inputs: OneInput
/// Specifies the mapping of data property keys in the data source to data property keys in the data target.
@required
Mapping: Mappings
}
/// Specifies a connector to an Amazon Athena data source.
structure AthenaConnectorSource {
/// The name of the data source.
@required
Name: NodeName
/// The name of the connection that is associated with the connector.
@required
ConnectionName: EnclosedInStringProperty
/// The name of a connector that assists with accessing the data store in Glue Studio.
@required
ConnectorName: EnclosedInStringProperty
/// The type of connection, such as marketplace.athena or custom.athena, designating a connection to an Amazon Athena data store.
@required
ConnectionType: EnclosedInStringProperty
/// The name of the table in the data source.
ConnectionTable: EnclosedInStringPropertyWithQuote
/// The name of the Cloudwatch log group to read from. For example, /aws-glue/jobs/output
.
@required
SchemaName: EnclosedInStringProperty
/// Specifies the data schema for the custom Athena source.
OutputSchemas: GlueSchemas
}
/// A structure containing the Lake Formation audit context.
structure AuditContext {
/// A string containing the additional audit context information.
AdditionalAuditContext: AuditContextString
/// The requested columns for audit.
RequestedColumns: AuditColumnNamesList
/// All columns request for audit.
AllColumnsRequested: NullableBoolean
}
/// A list of errors that can occur when registering partition indexes for an existing table.
/// These errors give the details about why an index registration failed and provide a limited number of partitions in the response, so that you can fix the partitions at fault and try registering the index again. The most common set of errors that can occur are categorized as follows:
///
/// -
///
EncryptedPartitionError: The partitions are encrypted.
///
/// -
///
InvalidPartitionTypeDataError: The partition value doesn't match the data type for that partition column.
///
/// -
///
MissingPartitionValueError: The partitions are encrypted.
///
/// -
///
UnsupportedPartitionCharacterError: Characters inside the partition value are not supported. For example: U+0000 , U+0001, U+0002.
///
/// -
///
InternalError: Any error which does not belong to other error codes.
///
///
structure BackfillError {
/// The error code for an error that occurred when registering partition indexes for an existing table.
Code: BackfillErrorCode
/// A list of a limited number of partitions in the response.
Partitions: BackfillErroredPartitionsList
}
/// Specifies a target that uses a Glue Data Catalog table.
structure BasicCatalogTarget {
/// The name of your data target.
@required
Name: NodeName
/// The nodes that are inputs to the data target.
@required
Inputs: OneInput
/// The database that contains the table you want to use as the target. This database must already exist in the Data Catalog.
@required
Database: EnclosedInStringProperty
/// The table that defines the schema of your output data. This table must already exist in the Data Catalog.
@required
Table: EnclosedInStringProperty
}
@input
structure BatchCreatePartitionRequest {
/// The ID of the catalog in which the partition is to be created. Currently, this should be
/// the Amazon Web Services account ID.
CatalogId: CatalogIdString
/// The name of the metadata database in which the partition is
/// to be created.
@required
DatabaseName: NameString
/// The name of the metadata table in which the partition is to be created.
@required
TableName: NameString
/// A list of PartitionInput
structures that define
/// the partitions to be created.
@required
PartitionInputList: PartitionInputList
}
@output
structure BatchCreatePartitionResponse {
/// The errors encountered when trying to create the requested partitions.
Errors: PartitionErrors
}
@input
structure BatchDeleteConnectionRequest {
/// The ID of the Data Catalog in which the connections reside. If none is provided, the Amazon Web Services
/// account ID is used by default.
CatalogId: CatalogIdString
/// A list of names of the connections to delete.
@required
ConnectionNameList: DeleteConnectionNameList
}
@output
structure BatchDeleteConnectionResponse {
/// A list of names of the connection definitions that were
/// successfully deleted.
Succeeded: NameStringList
/// A map of the names of connections that were not successfully
/// deleted to error details.
Errors: ErrorByName
}
@input
structure BatchDeletePartitionRequest {
/// The ID of the Data Catalog where the partition to be deleted resides. If none is provided,
/// the Amazon Web Services account ID is used by default.
CatalogId: CatalogIdString
/// The name of the catalog database in which the table in question
/// resides.
@required
DatabaseName: NameString
/// The name of the table that contains the partitions to be deleted.
@required
TableName: NameString
/// A list of PartitionInput
structures that define
/// the partitions to be deleted.
@required
PartitionsToDelete: BatchDeletePartitionValueList
}
@output
structure BatchDeletePartitionResponse {
/// The errors encountered when trying to delete the requested partitions.
Errors: PartitionErrors
}
@input
structure BatchDeleteTableRequest {
/// The ID of the Data Catalog where the table resides. If none is provided, the Amazon Web Services account
/// ID is used by default.
CatalogId: CatalogIdString
/// The name of the catalog database in which the tables to delete reside. For Hive
/// compatibility, this name is entirely lowercase.
@required
DatabaseName: NameString
/// A list of the table to delete.
@required
TablesToDelete: BatchDeleteTableNameList
/// The transaction ID at which to delete the table contents.
TransactionId: TransactionIdString
}
@output
structure BatchDeleteTableResponse {
/// A list of errors encountered in attempting to delete the specified tables.
Errors: TableErrors
}
@input
structure BatchDeleteTableVersionRequest {
/// The ID of the Data Catalog where the tables reside. If none is provided, the Amazon Web Services account
/// ID is used by default.
CatalogId: CatalogIdString
/// The database in the catalog in which the table resides. For Hive
/// compatibility, this name is entirely lowercase.
@required
DatabaseName: NameString
/// The name of the table. For Hive compatibility,
/// this name is entirely lowercase.
@required
TableName: NameString
/// A list of the IDs of versions to be deleted. A VersionId
is a string representation of an integer. Each version is incremented by 1.
@required
VersionIds: BatchDeleteTableVersionList
}
@output
structure BatchDeleteTableVersionResponse {
/// A list of errors encountered while trying to delete
/// the specified table versions.
Errors: TableVersionErrors
}
@input
structure BatchGetBlueprintsRequest {
/// A list of blueprint names.
@required
Names: BatchGetBlueprintNames
/// Specifies whether or not to include the blueprint in the response.
IncludeBlueprint: NullableBoolean
/// Specifies whether or not to include the parameters, as a JSON string, for the blueprint in the response.
IncludeParameterSpec: NullableBoolean
}
@output
structure BatchGetBlueprintsResponse {
/// Returns a list of blueprint as a Blueprints
object.
Blueprints: Blueprints
/// Returns a list of BlueprintNames
that were not found.
MissingBlueprints: BlueprintNames
}
@input
structure BatchGetCrawlersRequest {
/// A list of crawler names, which might be the names returned from the
/// ListCrawlers
operation.
@required
CrawlerNames: CrawlerNameList
}
@output
structure BatchGetCrawlersResponse {
/// A list of crawler definitions.
Crawlers: CrawlerList
/// A list of names of crawlers that were not found.
CrawlersNotFound: CrawlerNameList
}
@input
structure BatchGetCustomEntityTypesRequest {
/// A list of names of the custom patterns that you want to retrieve.
@required
Names: CustomEntityTypeNames
}
@output
structure BatchGetCustomEntityTypesResponse {
/// A list of CustomEntityType
objects representing the custom patterns that have been created.
CustomEntityTypes: CustomEntityTypes
/// A list of the names of custom patterns that were not found.
CustomEntityTypesNotFound: CustomEntityTypeNames
}
@input
structure BatchGetDataQualityResultRequest {
/// A list of unique result IDs for the data quality results.
@required
ResultIds: DataQualityResultIds
}
@output
structure BatchGetDataQualityResultResponse {
/// A list of DataQualityResult
objects representing the data quality results.
@required
Results: DataQualityResultsList
/// A list of result IDs for which results were not found.
ResultsNotFound: DataQualityResultIds
}
@input
structure BatchGetDevEndpointsRequest {
/// The list of DevEndpoint
names, which might be the names returned from the
/// ListDevEndpoint
operation.
@required
DevEndpointNames: DevEndpointNames
}
@output
structure BatchGetDevEndpointsResponse {
/// A list of DevEndpoint
definitions.
DevEndpoints: DevEndpointList
/// A list of DevEndpoints
not found.
DevEndpointsNotFound: DevEndpointNames
}
@input
structure BatchGetJobsRequest {
/// A list of job names, which might be the names returned from the ListJobs
/// operation.
@required
JobNames: JobNameList
}
@output
structure BatchGetJobsResponse {
/// A list of job definitions.
Jobs: JobList
/// A list of names of jobs not found.
JobsNotFound: JobNameList
}
@input
structure BatchGetPartitionRequest {
/// The ID of the Data Catalog where the partitions in question reside.
/// If none is supplied, the Amazon Web Services account ID is used by default.
CatalogId: CatalogIdString
/// The name of the catalog database where the partitions reside.
@required
DatabaseName: NameString
/// The name of the partitions' table.
@required
TableName: NameString
/// A list of partition values identifying the partitions to retrieve.
@required
PartitionsToGet: BatchGetPartitionValueList
}
@output
structure BatchGetPartitionResponse {
/// A list of the requested partitions.
Partitions: PartitionList
/// A list of the partition values in the request for which partitions were not
/// returned.
UnprocessedKeys: BatchGetPartitionValueList
}
@input
structure BatchGetTriggersRequest {
/// A list of trigger names, which may be the names returned from the ListTriggers
operation.
@required
TriggerNames: TriggerNameList
}
@output
structure BatchGetTriggersResponse {
/// A list of trigger definitions.
Triggers: TriggerList
/// A list of names of triggers not found.
TriggersNotFound: TriggerNameList
}
@input
structure BatchGetWorkflowsRequest {
/// A list of workflow names, which may be the names returned from the ListWorkflows
operation.
@required
Names: WorkflowNames
/// Specifies whether to include a graph when returning the workflow resource metadata.
IncludeGraph: NullableBoolean
}
@output
structure BatchGetWorkflowsResponse {
/// A list of workflow resource metadata.
Workflows: Workflows
/// A list of names of workflows not found.
MissingWorkflows: WorkflowNames
}
/// Records an error that occurred when attempting to stop a
/// specified job run.
structure BatchStopJobRunError {
/// The name of the job definition that is used in the job run in question.
JobName: NameString
/// The JobRunId
of the job run in question.
JobRunId: IdString
/// Specifies details about the error that was encountered.
ErrorDetail: ErrorDetail
}
@input
structure BatchStopJobRunRequest {
/// The name of the job definition for which to stop job runs.
@required
JobName: NameString
/// A list of the JobRunIds
that should be stopped for that job
/// definition.
@required
JobRunIds: BatchStopJobRunJobRunIdList
}
@output
structure BatchStopJobRunResponse {
/// A list of the JobRuns that were successfully submitted for stopping.
SuccessfulSubmissions: BatchStopJobRunSuccessfulSubmissionList
/// A list of the errors that were encountered in trying to stop JobRuns
,
/// including the JobRunId
for which each error was encountered and details about the
/// error.
Errors: BatchStopJobRunErrorList
}
/// Records a successful request to stop a specified JobRun
.
structure BatchStopJobRunSuccessfulSubmission {
/// The name of the job definition used in the job run that was stopped.
JobName: NameString
/// The JobRunId
of the job run that was stopped.
JobRunId: IdString
}
/// Contains information about a batch update partition error.
structure BatchUpdatePartitionFailureEntry {
/// A list of values defining the partitions.
PartitionValueList: BoundedPartitionValueList
/// The details about the batch update partition error.
ErrorDetail: ErrorDetail
}
@input
structure BatchUpdatePartitionRequest {
/// The ID of the catalog in which the partition is to be updated. Currently, this should be
/// the Amazon Web Services account ID.
CatalogId: CatalogIdString
/// The name of the metadata database in which the partition is
/// to be updated.
@required
DatabaseName: NameString
/// The name of the metadata table in which the partition is to be updated.
@required
TableName: NameString
/// A list of up to 100 BatchUpdatePartitionRequestEntry
objects to update.
@required
Entries: BatchUpdatePartitionRequestEntryList
}
/// A structure that contains the values and structure used to update a partition.
structure BatchUpdatePartitionRequestEntry {
/// A list of values defining the partitions.
@required
PartitionValueList: BoundedPartitionValueList
/// The structure used to update a partition.
@required
PartitionInput: PartitionInput
}
@output
structure BatchUpdatePartitionResponse {
/// The errors encountered when trying to update the requested partitions. A list of BatchUpdatePartitionFailureEntry
objects.
Errors: BatchUpdatePartitionFailureList
}
/// Defines column statistics supported for bit sequence data values.
structure BinaryColumnStatisticsData {
/// The size of the longest bit sequence in the column.
@required
MaximumLength: NonNegativeLong = 0
/// The average bit sequence length in the column.
@required
AverageLength: NonNegativeDouble = 0
/// The number of null values in the column.
@required
NumberOfNulls: NonNegativeLong = 0
}
/// The details of a blueprint.
structure Blueprint {
/// The name of the blueprint.
Name: OrchestrationNameString
/// The description of the blueprint.
Description: Generic512CharString
/// The date and time the blueprint was registered.
CreatedOn: TimestampValue
/// The date and time the blueprint was last modified.
LastModifiedOn: TimestampValue
/// A JSON string that indicates the list of parameter specifications for the blueprint.
ParameterSpec: BlueprintParameterSpec
/// Specifies the path in Amazon S3 where the blueprint is published.
BlueprintLocation: GenericString
/// Specifies a path in Amazon S3 where the blueprint is copied when you call CreateBlueprint/UpdateBlueprint
to register the blueprint in Glue.
BlueprintServiceLocation: GenericString
/// The status of the blueprint registration.
///
/// -
///
Creating — The blueprint registration is in progress.
///
/// -
///
Active — The blueprint has been successfully registered.
///
/// -
///
Updating — An update to the blueprint registration is in progress.
///
/// -
///
Failed — The blueprint registration failed.
///
///
Status: BlueprintStatus
/// An error message.
ErrorMessage: ErrorString
/// When there are multiple versions of a blueprint and the latest version has some errors, this attribute indicates the last successful blueprint definition that is available with the service.
LastActiveDefinition: LastActiveDefinition
}
/// The details of a blueprint.
structure BlueprintDetails {
/// The name of the blueprint.
BlueprintName: OrchestrationNameString
/// The run ID for this blueprint.
RunId: IdString
}
/// The details of a blueprint run.
structure BlueprintRun {
/// The name of the blueprint.
BlueprintName: OrchestrationNameString
/// The run ID for this blueprint run.
RunId: IdString
/// The name of a workflow that is created as a result of a successful blueprint run. If a blueprint run has an error, there will not be a workflow created.
WorkflowName: NameString
/// The state of the blueprint run. Possible values are:
///
/// -
///
Running — The blueprint run is in progress.
///
/// -
///
Succeeded — The blueprint run completed successfully.
///
/// -
///
Failed — The blueprint run failed and rollback is complete.
///
/// -
///
Rolling Back — The blueprint run failed and rollback is in progress.
///
///
State: BlueprintRunState
/// The date and time that the blueprint run started.
StartedOn: TimestampValue
/// The date and time that the blueprint run completed.
CompletedOn: TimestampValue
/// Indicates any errors that are seen while running the blueprint.
ErrorMessage: MessageString
/// If there are any errors while creating the entities of a workflow, we try to roll back the created entities until that point and delete them. This attribute indicates the errors seen while trying to delete the entities that are created.
RollbackErrorMessage: MessageString
/// The blueprint parameters as a string. You will have to provide a value for each key that is required from the parameter spec that is defined in the Blueprint$ParameterSpec
.
Parameters: BlueprintParameters
/// The role ARN. This role will be assumed by the Glue service and will be used to create the workflow and other entities of a workflow.
RoleArn: OrchestrationIAMRoleArn
}
/// Defines column statistics supported for Boolean data columns.
structure BooleanColumnStatisticsData {
/// The number of true values in the column.
@required
NumberOfTrues: NonNegativeLong = 0
/// The number of false values in the column.
@required
NumberOfFalses: NonNegativeLong = 0
/// The number of null values in the column.
@required
NumberOfNulls: NonNegativeLong = 0
}
@input
structure CancelDataQualityRuleRecommendationRunRequest {
/// The unique run identifier associated with this run.
@required
RunId: HashString
}
@output
structure CancelDataQualityRuleRecommendationRunResponse {}
@input
structure CancelDataQualityRulesetEvaluationRunRequest {
/// The unique run identifier associated with this run.
@required
RunId: HashString
}
@output
structure CancelDataQualityRulesetEvaluationRunResponse {}
@input
structure CancelMLTaskRunRequest {
/// The unique identifier of the machine learning transform.
@required
TransformId: HashString
/// A unique identifier for the task run.
@required
TaskRunId: HashString
}
@output
structure CancelMLTaskRunResponse {
/// The unique identifier of the machine learning transform.
TransformId: HashString
/// The unique identifier for the task run.
TaskRunId: HashString
/// The status for this run.
Status: TaskStatusType
}
@input
structure CancelStatementRequest {
/// The Session ID of the statement to be cancelled.
@required
SessionId: NameString
/// The ID of the statement to be cancelled.
@required
Id: IntegerValue = 0
/// The origin of the request to cancel the statement.
RequestOrigin: OrchestrationNameString
}
@output
structure CancelStatementResponse {}
/// Specifies a Delta Lake data source that is registered in the Glue Data Catalog.
structure CatalogDeltaSource {
/// The name of the Delta Lake data source.
@required
Name: NodeName
/// The name of the database to read from.
@required
Database: EnclosedInStringProperty
/// The name of the table in the database to read from.
@required
Table: EnclosedInStringProperty
/// Specifies additional connection options.
AdditionalDeltaOptions: AdditionalOptions
/// Specifies the data schema for the Delta Lake source.
OutputSchemas: GlueSchemas
}
/// Specifies a table definition in the Glue Data Catalog.
structure CatalogEntry {
/// The database in which the table metadata resides.
@required
DatabaseName: NameString
/// The name of the table in question.
@required
TableName: NameString
}
/// Specifies a Hudi data source that is registered in the Glue Data Catalog.
structure CatalogHudiSource {
/// The name of the Hudi data source.
@required
Name: NodeName
/// The name of the database to read from.
@required
Database: EnclosedInStringProperty
/// The name of the table in the database to read from.
@required
Table: EnclosedInStringProperty
/// Specifies additional connection options.
AdditionalHudiOptions: AdditionalOptions
/// Specifies the data schema for the Hudi source.
OutputSchemas: GlueSchemas
}
/// A structure containing migration status information.
structure CatalogImportStatus {
///
/// True
if the migration has completed, or False
otherwise.
ImportCompleted: Boolean = false
/// The time that the migration was started.
ImportTime: Timestamp
/// The name of the person who initiated the migration.
ImportedBy: NameString
}
/// Specifies an Apache Kafka data store in the Data Catalog.
structure CatalogKafkaSource {
/// The name of the data store.
@required
Name: NodeName
/// The amount of time to spend processing each micro batch.
WindowSize: BoxedPositiveInt
/// Whether to automatically determine the schema from the incoming data.
DetectSchema: BoxedBoolean
/// The name of the table in the database to read from.
@required
Table: EnclosedInStringProperty
/// The name of the database to read from.
@required
Database: EnclosedInStringProperty
/// Specifies the streaming options.
StreamingOptions: KafkaStreamingSourceOptions
/// Specifies options related to data preview for viewing a sample of your data.
DataPreviewOptions: StreamingDataPreviewOptions
}
/// Specifies a Kinesis data source in the Glue Data Catalog.
structure CatalogKinesisSource {
/// The name of the data source.
@required
Name: NodeName
/// The amount of time to spend processing each micro batch.
WindowSize: BoxedPositiveInt
/// Whether to automatically determine the schema from the incoming data.
DetectSchema: BoxedBoolean
/// The name of the table in the database to read from.
@required
Table: EnclosedInStringProperty
/// The name of the database to read from.
@required
Database: EnclosedInStringProperty
/// Additional options for the Kinesis streaming data source.
StreamingOptions: KinesisStreamingSourceOptions
/// Additional options for data preview.
DataPreviewOptions: StreamingDataPreviewOptions
}
/// A policy that specifies update behavior for the crawler.
structure CatalogSchemaChangePolicy {
/// Whether to use the specified update behavior when the crawler finds a changed schema.
EnableUpdateCatalog: BoxedBoolean
/// The update behavior when the crawler finds a changed schema.
UpdateBehavior: UpdateCatalogBehavior
}
/// Specifies a data store in the Glue Data Catalog.
structure CatalogSource {
/// The name of the data store.
@required
Name: NodeName
/// The name of the database to read from.
@required
Database: EnclosedInStringProperty
/// The name of the table in the database to read from.
@required
Table: EnclosedInStringProperty
}
/// Specifies an Glue Data Catalog target.
structure CatalogTarget {
/// The name of the database to be synchronized.
@required
DatabaseName: NameString
/// A list of the tables to be synchronized.
@required
Tables: CatalogTablesList
/// The name of the connection for an Amazon S3-backed Data Catalog table to be a target of the crawl when using a Catalog
connection type paired with a NETWORK
Connection type.
ConnectionName: ConnectionName
/// A valid Amazon SQS ARN. For example, arn:aws:sqs:region:account:sqs
.
EventQueueArn: EventQueueArn
/// A valid Amazon dead-letter SQS ARN. For example, arn:aws:sqs:region:account:deadLetterQueue
.
DlqEventQueueArn: EventQueueArn
}
@output
structure CheckSchemaVersionValidityResponse {
/// Return true, if the schema is valid and false otherwise.
Valid: IsVersionValid = false
/// A validation failure error message.
Error: SchemaValidationError
}
/// Classifiers are triggered during a crawl task. A classifier checks whether a given file is
/// in a format it can handle. If it is, the classifier creates a schema in the form of a
/// StructType
object that matches that data format.
/// You can use the standard classifiers that Glue provides, or you can write your own
/// classifiers to best categorize your data sources and specify the appropriate schemas to use
/// for them. A classifier can be a grok
classifier, an XML
classifier,
/// a JSON
classifier, or a custom CSV
classifier, as specified in one
/// of the fields in the Classifier
object.
structure Classifier {
/// A classifier that uses grok
.
GrokClassifier: GrokClassifier
/// A classifier for XML content.
XMLClassifier: XMLClassifier
/// A classifier for JSON content.
JsonClassifier: JsonClassifier
/// A classifier for comma-separated values (CSV).
CsvClassifier: CsvClassifier
}
/// Specifies how Amazon CloudWatch data should be encrypted.
structure CloudWatchEncryption {
/// The encryption mode to use for CloudWatch data.
CloudWatchEncryptionMode: CloudWatchEncryptionMode
/// The Amazon Resource Name (ARN) of the KMS key to be used to encrypt the data.
KmsKeyArn: KmsKeyArn
}
///
/// CodeGenConfigurationNode
enumerates all valid Node types. One and only one of its member variables can be populated.
structure CodeGenConfigurationNode {
/// Specifies a connector to an Amazon Athena data source.
AthenaConnectorSource: AthenaConnectorSource
/// Specifies a connector to a JDBC data source.
JDBCConnectorSource: JDBCConnectorSource
/// Specifies a connector to an Apache Spark data source.
SparkConnectorSource: SparkConnectorSource
/// Specifies a data store in the Glue Data Catalog.
CatalogSource: CatalogSource
/// Specifies an Amazon Redshift data store.
RedshiftSource: RedshiftSource
/// Specifies an Amazon S3 data store in the Glue Data Catalog.
S3CatalogSource: S3CatalogSource
/// Specifies a command-separated value (CSV) data store stored in Amazon S3.
S3CsvSource: S3CsvSource
/// Specifies a JSON data store stored in Amazon S3.
S3JsonSource: S3JsonSource
/// Specifies an Apache Parquet data store stored in Amazon S3.
S3ParquetSource: S3ParquetSource
/// Specifies a relational catalog data store in the Glue Data Catalog.
RelationalCatalogSource: RelationalCatalogSource
/// Specifies a DynamoDBC Catalog data store in the Glue Data Catalog.
DynamoDBCatalogSource: DynamoDBCatalogSource
/// Specifies a data target that writes to Amazon S3 in Apache Parquet columnar storage.
JDBCConnectorTarget: JDBCConnectorTarget
/// Specifies a target that uses an Apache Spark connector.
SparkConnectorTarget: SparkConnectorTarget
/// Specifies a target that uses a Glue Data Catalog table.
CatalogTarget: BasicCatalogTarget
/// Specifies a target that uses Amazon Redshift.
RedshiftTarget: RedshiftTarget
/// Specifies a data target that writes to Amazon S3 using the Glue Data Catalog.
S3CatalogTarget: S3CatalogTarget
/// Specifies a data target that writes to Amazon S3 in Apache Parquet columnar storage.
S3GlueParquetTarget: S3GlueParquetTarget
/// Specifies a data target that writes to Amazon S3.
S3DirectTarget: S3DirectTarget
/// Specifies a transform that maps data property keys in the data source to data property keys in the data target. You can rename keys, modify the data types for keys, and choose which keys to drop from the dataset.
ApplyMapping: ApplyMapping
/// Specifies a transform that chooses the data property keys that you want to keep.
SelectFields: SelectFields
/// Specifies a transform that chooses the data property keys that you want to drop.
DropFields: DropFields
/// Specifies a transform that renames a single data property key.
RenameField: RenameField
/// Specifies a transform that writes samples of the data to an Amazon S3 bucket.
Spigot: Spigot
/// Specifies a transform that joins two datasets into one dataset using a comparison phrase on the specified data property keys. You can use inner, outer, left, right, left semi, and left anti joins.
Join: Join
/// Specifies a transform that splits data property keys into two DynamicFrames
. The output is a collection of DynamicFrames
: one with selected data property keys, and one with the remaining data property keys.
SplitFields: SplitFields
/// Specifies a transform that chooses one DynamicFrame
from a collection of DynamicFrames
. The output is the selected DynamicFrame
///
SelectFromCollection: SelectFromCollection
/// Specifies a transform that locates records in the dataset that have missing values and adds a new field with a value determined by imputation. The input data set is used to train the machine learning model that determines what the missing value should be.
FillMissingValues: FillMissingValues
/// Specifies a transform that splits a dataset into two, based on a filter condition.
Filter: Filter
/// Specifies a transform that uses custom code you provide to perform the data transformation. The output is a collection of DynamicFrames.
CustomCode: CustomCode
/// Specifies a transform where you enter a SQL query using Spark SQL syntax to transform the data. The output is a single DynamicFrame
.
SparkSQL: SparkSQL
/// Specifies a direct Amazon Kinesis data source.
DirectKinesisSource: DirectKinesisSource
/// Specifies an Apache Kafka data store.
DirectKafkaSource: DirectKafkaSource
/// Specifies a Kinesis data source in the Glue Data Catalog.
CatalogKinesisSource: CatalogKinesisSource
/// Specifies an Apache Kafka data store in the Data Catalog.
CatalogKafkaSource: CatalogKafkaSource
/// Specifies a transform that removes columns from the dataset if all values in the column are 'null'. By default, Glue Studio will recognize null objects, but some values such as empty strings, strings that are "null", -1 integers or other placeholders such as zeros, are not automatically recognized as nulls.
DropNullFields: DropNullFields
/// Specifies a transform that merges a DynamicFrame
with a staging DynamicFrame
based on the specified primary keys to identify records. Duplicate records (records with the same primary keys) are not de-duplicated.
Merge: Merge
/// Specifies a transform that combines the rows from two or more datasets into a single result.
Union: Union
/// Specifies a transform that identifies, removes or masks PII data.
PIIDetection: PIIDetection
/// Specifies a transform that groups rows by chosen fields and computes the aggregated value by specified function.
Aggregate: Aggregate
/// Specifies a transform that removes rows of repeating data from a data set.
DropDuplicates: DropDuplicates
/// Specifies a data target that writes to a goverened catalog.
GovernedCatalogTarget: GovernedCatalogTarget
/// Specifies a data source in a goverened Data Catalog.
GovernedCatalogSource: GovernedCatalogSource
/// Specifies a Microsoft SQL server data source in the Glue Data Catalog.
MicrosoftSQLServerCatalogSource: MicrosoftSQLServerCatalogSource
/// Specifies a MySQL data source in the Glue Data Catalog.
MySQLCatalogSource: MySQLCatalogSource
/// Specifies an Oracle data source in the Glue Data Catalog.
OracleSQLCatalogSource: OracleSQLCatalogSource
/// Specifies a PostgresSQL data source in the Glue Data Catalog.
PostgreSQLCatalogSource: PostgreSQLCatalogSource
/// Specifies a target that uses Microsoft SQL.
MicrosoftSQLServerCatalogTarget: MicrosoftSQLServerCatalogTarget
/// Specifies a target that uses MySQL.
MySQLCatalogTarget: MySQLCatalogTarget
/// Specifies a target that uses Oracle SQL.
OracleSQLCatalogTarget: OracleSQLCatalogTarget
/// Specifies a target that uses Postgres SQL.
PostgreSQLCatalogTarget: PostgreSQLCatalogTarget
/// Specifies a custom visual transform created by a user.
DynamicTransform: DynamicTransform
/// Specifies your data quality evaluation criteria.
EvaluateDataQuality: EvaluateDataQuality
/// Specifies a Hudi data source that is registered in the Glue Data Catalog. The data source must be stored in Amazon S3.
S3CatalogHudiSource: S3CatalogHudiSource
/// Specifies a Hudi data source that is registered in the Glue Data Catalog.
CatalogHudiSource: CatalogHudiSource
/// Specifies a Hudi data source stored in Amazon S3.
S3HudiSource: S3HudiSource
/// Specifies a target that writes to a Hudi data source in the Glue Data Catalog.
S3HudiCatalogTarget: S3HudiCatalogTarget
/// Specifies a target that writes to a Hudi data source in Amazon S3.
S3HudiDirectTarget: S3HudiDirectTarget
DirectJDBCSource: DirectJDBCSource
/// Specifies a Delta Lake data source that is registered in the Glue Data Catalog. The data source must be stored in Amazon S3.
S3CatalogDeltaSource: S3CatalogDeltaSource
/// Specifies a Delta Lake data source that is registered in the Glue Data Catalog.
CatalogDeltaSource: CatalogDeltaSource
/// Specifies a Delta Lake data source stored in Amazon S3.
S3DeltaSource: S3DeltaSource
/// Specifies a target that writes to a Delta Lake data source in the Glue Data Catalog.
S3DeltaCatalogTarget: S3DeltaCatalogTarget
/// Specifies a target that writes to a Delta Lake data source in Amazon S3.
S3DeltaDirectTarget: S3DeltaDirectTarget
/// Specifies a target that writes to a data source in Amazon Redshift.
AmazonRedshiftSource: AmazonRedshiftSource
/// Specifies a target that writes to a data target in Amazon Redshift.
AmazonRedshiftTarget: AmazonRedshiftTarget
/// Specifies your data quality evaluation criteria. Allows multiple input data and returns a collection of Dynamic Frames.
EvaluateDataQualityMultiFrame: EvaluateDataQualityMultiFrame
/// Specifies a Glue DataBrew recipe node.
Recipe: Recipe
/// Specifies a Snowflake data source.
SnowflakeSource: SnowflakeSource
/// Specifies a target that writes to a Snowflake data source.
SnowflakeTarget: SnowflakeTarget
}
/// Represents a directional edge in a directed acyclic graph (DAG).
structure CodeGenEdge {
/// The ID of the node at which the edge starts.
@required
Source: CodeGenIdentifier
/// The ID of the node at which the edge ends.
@required
Target: CodeGenIdentifier
/// The target of the edge.
TargetParameter: CodeGenArgName
}
/// Represents a node in a directed acyclic graph (DAG)
structure CodeGenNode {
/// A node identifier that is unique within the node's graph.
@required
Id: CodeGenIdentifier
/// The type of node that this is.
@required
NodeType: CodeGenNodeType
/// Properties of the node, in the form of name-value pairs.
@required
Args: CodeGenNodeArgs
/// The line number of the node.
LineNumber: Integer = 0
}
/// An argument or property of a node.
structure CodeGenNodeArg {
/// The name of the argument or property.
@required
Name: CodeGenArgName
/// The value of the argument or property.
@required
Value: CodeGenArgValue
/// True if the value is used as a parameter.
Param: Boolean = false
}
/// A column in a Table
.
structure Column {
/// The name of the Column
.
@required
Name: NameString
/// The data type of the Column
.
Type: ColumnTypeString
/// A free-form text comment.
Comment: CommentString
/// These key-value pairs define properties associated with the column.
Parameters: ParametersMap
}
/// Encapsulates a column name that failed and the reason for failure.
structure ColumnError {
/// The name of the column that failed.
ColumnName: NameString
/// An error message with the reason for the failure of an operation.
Error: ErrorDetail
}
/// A structure containing the column name and column importance score for a column.
/// Column importance helps you understand how columns contribute to your model, by identifying which columns in your records are more important than others.
structure ColumnImportance {
/// The name of a column.
ColumnName: NameString
/// The column importance score for the column, as a decimal.
Importance: GenericBoundedDouble
}
/// A filter that uses both column-level and row-level filtering.
structure ColumnRowFilter {
/// A string containing the name of the column.
ColumnName: NameString
/// A string containing the row-level filter expression.
RowFilterExpression: PredicateString
}
/// Represents the generated column-level statistics for a table or partition.
structure ColumnStatistics {
/// Name of column which statistics belong to.
@required
ColumnName: NameString
/// The data type of the column.
@required
ColumnType: TypeString
/// The timestamp of when column statistics were generated.
@required
AnalyzedTime: Timestamp
/// A ColumnStatisticData
object that contains the statistics data values.
@required
StatisticsData: ColumnStatisticsData
}
/// Contains the individual types of column statistics data. Only one data object should be set and indicated by the Type
attribute.
structure ColumnStatisticsData {
/// The type of column statistics data.
@required
Type: ColumnStatisticsType
/// Boolean column statistics data.
BooleanColumnStatisticsData: BooleanColumnStatisticsData
/// Date column statistics data.
DateColumnStatisticsData: DateColumnStatisticsData
///
/// Decimal column statistics data. UnscaledValues within are Base64-encoded
/// binary objects storing big-endian, two's complement representations of
/// the decimal's unscaled value.
///
DecimalColumnStatisticsData: DecimalColumnStatisticsData
/// Double column statistics data.
DoubleColumnStatisticsData: DoubleColumnStatisticsData
/// Long column statistics data.
LongColumnStatisticsData: LongColumnStatisticsData
/// String column statistics data.
StringColumnStatisticsData: StringColumnStatisticsData
/// Binary column statistics data.
BinaryColumnStatisticsData: BinaryColumnStatisticsData
}
/// Encapsulates a ColumnStatistics
object that failed and the reason for failure.
structure ColumnStatisticsError {
/// The ColumnStatistics
of the column.
ColumnStatistics: ColumnStatistics
/// An error message with the reason for the failure of an operation.
Error: ErrorDetail
}
/// Two processes are trying to modify a resource simultaneously.
@error("client")
structure ConcurrentModificationException {
/// A message describing the problem.
Message: MessageString
}
/// Too many jobs are being run concurrently.
@error("client")
structure ConcurrentRunsExceededException {
/// A message describing the problem.
Message: MessageString
}
/// Defines a condition under which a trigger fires.
structure Condition {
/// A logical operator.
LogicalOperator: LogicalOperator
/// The name of the job whose JobRuns
this condition applies to, and on which
/// this trigger waits.
JobName: NameString
/// The condition state. Currently, the only job states that a trigger can listen for are SUCCEEDED
, STOPPED
, FAILED
, and TIMEOUT
. The only crawler states that a trigger can listen for are SUCCEEDED
, FAILED
, and CANCELLED
.
State: JobRunState
/// The name of the crawler to which this condition applies.
CrawlerName: NameString
/// The state of the crawler to which this condition applies.
CrawlState: CrawlState
}
/// A specified condition was not satisfied.
@error("client")
structure ConditionCheckFailureException {
/// A message describing the problem.
Message: MessageString
}
/// The CreatePartitions
API was called on a table that has indexes enabled.
@error("client")
structure ConflictException {
/// A message describing the problem.
Message: MessageString
}
/// The confusion matrix shows you what your transform is predicting accurately and what types of errors it is making.
/// For more information, see Confusion matrix in Wikipedia.
structure ConfusionMatrix {
/// The number of matches in the data that the transform correctly found, in the confusion matrix for your transform.
NumTruePositives: RecordsCount
/// The number of nonmatches in the data that the transform incorrectly classified as a match,
/// in the confusion matrix for your transform.
NumFalsePositives: RecordsCount
/// The number of nonmatches in the data that the transform correctly rejected, in the
/// confusion matrix for your transform.
NumTrueNegatives: RecordsCount
/// The number of matches in the data that the transform didn't find, in the confusion matrix for your transform.
NumFalseNegatives: RecordsCount
}
/// Defines a connection to a data source.
structure Connection {
/// The name of the connection definition.
Name: NameString
/// The description of the connection.
Description: DescriptionString
/// The type of the connection. Currently, SFTP is not supported.
ConnectionType: ConnectionType
/// A list of criteria that can be used in selecting this connection.
MatchCriteria: MatchCriteria
/// These key-value pairs define parameters for the connection:
///
/// -
///
/// HOST
- The host URI: either the
/// fully qualified domain name (FQDN) or the IPv4 address of
/// the database host.
///
/// -
///
/// PORT
- The port number, between
/// 1024 and 65535, of the port on which the database host is
/// listening for database connections.
///
/// -
///
/// USER_NAME
- The name under which
/// to log in to the database. The value string for USER_NAME
is "USERNAME
".
///
/// -
///
/// PASSWORD
- A password,
/// if one is used, for the user name.
///
/// -
///
/// ENCRYPTED_PASSWORD
- When you enable connection password protection by setting ConnectionPasswordEncryption
in the Data Catalog encryption settings, this field stores the encrypted password.
///
/// -
///
/// JDBC_DRIVER_JAR_URI
- The Amazon Simple Storage Service (Amazon S3) path of the
/// JAR file that contains the JDBC driver to use.
///
/// -
///
/// JDBC_DRIVER_CLASS_NAME
- The class name of the JDBC driver to use.
///
/// -
///
/// JDBC_ENGINE
- The name of the JDBC engine to use.
///
/// -
///
/// JDBC_ENGINE_VERSION
- The version of the JDBC engine to use.
///
/// -
///
/// CONFIG_FILES
- (Reserved for future use.)
///
/// -
///
/// INSTANCE_ID
- The instance ID to use.
///
/// -
///
/// JDBC_CONNECTION_URL
- The URL for connecting to a JDBC data source.
///
/// -
///
/// JDBC_ENFORCE_SSL
- A Boolean string (true, false) specifying whether Secure
/// Sockets Layer (SSL) with hostname matching is enforced for the JDBC connection on the
/// client. The default is false.
///
/// -
///
/// CUSTOM_JDBC_CERT
- An Amazon S3 location specifying the customer's root certificate. Glue uses this root certificate to validate the customer’s certificate when connecting to the customer database. Glue only handles X.509 certificates. The certificate provided must be DER-encoded and supplied in Base64 encoding PEM format.
///
/// -
///
/// SKIP_CUSTOM_JDBC_CERT_VALIDATION
- By default, this is false
. Glue validates the Signature algorithm and Subject Public Key Algorithm for the customer certificate. The only permitted algorithms for the Signature algorithm are SHA256withRSA, SHA384withRSA or SHA512withRSA. For the Subject Public Key Algorithm, the key length must be at least 2048. You can set the value of this property to true
to skip Glue’s validation of the customer certificate.
///
/// -
///
/// CUSTOM_JDBC_CERT_STRING
- A custom JDBC certificate string which is used for domain match or distinguished name match to prevent a man-in-the-middle attack. In Oracle database, this is used as the SSL_SERVER_CERT_DN
; in Microsoft SQL Server, this is used as the hostNameInCertificate
.
///
/// -
///
/// CONNECTION_URL
- The URL for connecting to a general (non-JDBC) data source.
///
/// -
///
/// SECRET_ID
- The secret ID used for the secret manager of credentials.
///
/// -
///
/// CONNECTOR_URL
- The connector URL for a MARKETPLACE or CUSTOM connection.
///
/// -
///
/// CONNECTOR_TYPE
- The connector type for a MARKETPLACE or CUSTOM connection.
///
/// -
///
/// CONNECTOR_CLASS_NAME
- The connector class name for a MARKETPLACE or CUSTOM connection.
///
/// -
///
/// KAFKA_BOOTSTRAP_SERVERS
- A comma-separated list of host and port pairs that are the addresses of the Apache Kafka brokers in a Kafka cluster to which a Kafka client will connect to and bootstrap itself.
///
/// -
///
/// KAFKA_SSL_ENABLED
- Whether to enable or disable SSL on an Apache Kafka connection. Default value is "true".
///
/// -
///
/// KAFKA_CUSTOM_CERT
- The Amazon S3 URL for the private CA cert file (.pem format). The default is an empty string.
///
/// -
///
/// KAFKA_SKIP_CUSTOM_CERT_VALIDATION
- Whether to skip the validation of the CA cert file or not. Glue validates for three algorithms: SHA256withRSA, SHA384withRSA and SHA512withRSA. Default value is "false".
///
/// -
///
/// KAFKA_CLIENT_KEYSTORE
- The Amazon S3 location of the client keystore file for Kafka client side authentication (Optional).
///
/// -
///
/// KAFKA_CLIENT_KEYSTORE_PASSWORD
- The password to access the provided keystore (Optional).
///
/// -
///
/// KAFKA_CLIENT_KEY_PASSWORD
- A keystore can consist of multiple keys, so this is the password to access the client key to be used with the Kafka server side key (Optional).
///
/// -
///
/// ENCRYPTED_KAFKA_CLIENT_KEYSTORE_PASSWORD
- The encrypted version of the Kafka client keystore password (if the user has the Glue encrypt passwords setting selected).
///
/// -
///
/// ENCRYPTED_KAFKA_CLIENT_KEY_PASSWORD
- The encrypted version of the Kafka client key password (if the user has the Glue encrypt passwords setting selected).
///
/// -
///
/// KAFKA_SASL_MECHANISM
- "SCRAM-SHA-512"
, "GSSAPI"
, or "AWS_MSK_IAM"
. These are the supported SASL Mechanisms.
///
/// -
///
/// KAFKA_SASL_SCRAM_USERNAME
- A plaintext username used to authenticate with the "SCRAM-SHA-512" mechanism.
///
/// -
///
/// KAFKA_SASL_SCRAM_PASSWORD
- A plaintext password used to authenticate with the "SCRAM-SHA-512" mechanism.
///
/// -
///
/// ENCRYPTED_KAFKA_SASL_SCRAM_PASSWORD
- The encrypted version of the Kafka SASL SCRAM password (if the user has the Glue encrypt passwords setting selected).
///
/// -
///
/// KAFKA_SASL_SCRAM_SECRETS_ARN
- The Amazon Resource Name of a secret in Amazon Web Services Secrets Manager.
///
/// -
///
/// KAFKA_SASL_GSSAPI_KEYTAB
- The S3 location of a Kerberos keytab
file. A keytab stores long-term keys for one or more principals. For more information, see MIT Kerberos Documentation: Keytab.
///
/// -
///
/// KAFKA_SASL_GSSAPI_KRB5_CONF
- The S3 location of a Kerberos krb5.conf
file. A krb5.conf stores Kerberos configuration information, such as the location of the KDC server. For more information, see MIT Kerberos Documentation: krb5.conf.
///
/// -
///
/// KAFKA_SASL_GSSAPI_SERVICE
- The Kerberos service name, as set with sasl.kerberos.service.name
in your Kafka Configuration.
///
/// -
///
/// KAFKA_SASL_GSSAPI_PRINCIPAL
- The name of the Kerberos princial used by Glue. For more information, see Kafka Documentation: Configuring Kafka Brokers.
///
///
ConnectionProperties: ConnectionProperties
/// A map of physical connection requirements, such as virtual private cloud (VPC) and
/// SecurityGroup
, that are needed to make this connection successfully.
PhysicalConnectionRequirements: PhysicalConnectionRequirements
/// The time that this connection definition was created.
CreationTime: Timestamp
/// The last time that this connection definition was updated.
LastUpdatedTime: Timestamp
/// The user, group, or role that last updated this connection definition.
LastUpdatedBy: NameString
}
/// A structure that is used to specify a connection to create or update.
structure ConnectionInput {
/// The name of the connection. Connection will not function as expected without a name.
@required
Name: NameString
/// The description of the connection.
Description: DescriptionString
/// The type of the connection. Currently, these types are supported:
///
/// -
///
/// JDBC
- Designates a connection to a database through Java Database Connectivity (JDBC).
///
/// JDBC
Connections use the following ConnectionParameters.
///
/// -
///
Required: All of (HOST
, PORT
, JDBC_ENGINE
) or JDBC_CONNECTION_URL
.
///
/// -
///
Required: All of (USERNAME
, PASSWORD
) or SECRET_ID
.
///
/// -
///
Optional: JDBC_ENFORCE_SSL
, CUSTOM_JDBC_CERT
, CUSTOM_JDBC_CERT_STRING
, SKIP_CUSTOM_JDBC_CERT_VALIDATION
. These parameters are used to configure SSL with JDBC.
///
///
///
/// -
///
/// KAFKA
- Designates a connection to an Apache Kafka streaming platform.
///
/// KAFKA
Connections use the following ConnectionParameters.
///
/// -
///
Required: KAFKA_BOOTSTRAP_SERVERS
.
///
/// -
///
Optional: KAFKA_SSL_ENABLED
, KAFKA_CUSTOM_CERT
, KAFKA_SKIP_CUSTOM_CERT_VALIDATION
. These parameters are used to configure SSL with KAFKA
.
///
/// -
///
Optional: KAFKA_CLIENT_KEYSTORE
, KAFKA_CLIENT_KEYSTORE_PASSWORD
, KAFKA_CLIENT_KEY_PASSWORD
, ENCRYPTED_KAFKA_CLIENT_KEYSTORE_PASSWORD
, ENCRYPTED_KAFKA_CLIENT_KEY_PASSWORD
. These parameters are used to configure TLS client configuration with SSL in KAFKA
.
///
/// -
///
Optional: KAFKA_SASL_MECHANISM
. Can be specified as SCRAM-SHA-512
, GSSAPI
, or AWS_MSK_IAM
.
///
/// -
///
Optional: KAFKA_SASL_SCRAM_USERNAME
, KAFKA_SASL_SCRAM_PASSWORD
, ENCRYPTED_KAFKA_SASL_SCRAM_PASSWORD
. These parameters are used to configure SASL/SCRAM-SHA-512 authentication with KAFKA
.
///
/// -
///
Optional: KAFKA_SASL_GSSAPI_KEYTAB
, KAFKA_SASL_GSSAPI_KRB5_CONF
, KAFKA_SASL_GSSAPI_SERVICE
, KAFKA_SASL_GSSAPI_PRINCIPAL
. These parameters are used to configure SASL/GSSAPI authentication with KAFKA
.
///
///
///
/// -
///
/// MONGODB
- Designates a connection to a MongoDB document database.
///
/// MONGODB
Connections use the following ConnectionParameters.
///
/// -
///
Required: CONNECTION_URL
.
///
/// -
///
Required: All of (USERNAME
, PASSWORD
) or SECRET_ID
.
///
///
///
/// -
///
/// NETWORK
- Designates a network connection to a data source within an Amazon Virtual Private Cloud environment (Amazon VPC).
///
/// NETWORK
Connections do not require ConnectionParameters. Instead, provide a PhysicalConnectionRequirements.
///
/// -
///
/// MARKETPLACE
- Uses configuration settings contained in a connector purchased from Amazon Web Services Marketplace to read from and write to data stores that are not natively supported by Glue.
///
/// MARKETPLACE
Connections use the following ConnectionParameters.
///
/// -
///
Required: CONNECTOR_TYPE
, CONNECTOR_URL
, CONNECTOR_CLASS_NAME
, CONNECTION_URL
.
///
/// -
///
Required for JDBC
/// CONNECTOR_TYPE
connections: All of (USERNAME
, PASSWORD
) or SECRET_ID
.
///
///
///
/// -
///
/// CUSTOM
- Uses configuration settings contained in a custom connector to read from and write to data stores that are not natively supported by Glue.
///
///
///
/// SFTP
is not supported.
/// For more information about how optional ConnectionProperties are used to configure features in Glue, consult Glue connection properties.
/// For more information about how optional ConnectionProperties are used to configure features in Glue Studio, consult Using connectors and connections.
@required
ConnectionType: ConnectionType
/// A list of criteria that can be used in selecting this connection.
MatchCriteria: MatchCriteria
/// These key-value pairs define parameters for the connection.
@required
ConnectionProperties: ConnectionProperties
/// A map of physical connection requirements, such as virtual private cloud (VPC) and
/// SecurityGroup
, that are needed to successfully make this connection.
PhysicalConnectionRequirements: PhysicalConnectionRequirements
}
/// The data structure used by the Data Catalog to encrypt the password as part of
/// CreateConnection
or UpdateConnection
and store it in the
/// ENCRYPTED_PASSWORD
field in the connection properties. You can enable catalog
/// encryption or only password encryption.
/// When a CreationConnection
request arrives containing a password, the Data
/// Catalog first encrypts the password using your KMS key. It then encrypts the whole
/// connection object again if catalog encryption is also enabled.
/// This encryption requires that you set KMS key permissions to enable or restrict access
/// on the password key according to your security requirements. For example, you might want only
/// administrators to have decrypt permission on the password key.
structure ConnectionPasswordEncryption {
/// When the ReturnConnectionPasswordEncrypted
flag is set to "true", passwords remain encrypted in the responses of GetConnection
and GetConnections
. This encryption takes effect independently from catalog encryption.
@required
ReturnConnectionPasswordEncrypted: Boolean = false
/// An KMS key that is used to encrypt the connection password.
/// If connection password protection is enabled, the caller of CreateConnection
/// and UpdateConnection
needs at least kms:Encrypt
permission on the
/// specified KMS key, to encrypt passwords before storing them in the Data Catalog.
/// You can set the decrypt permission to enable or restrict access on the password key according to your security requirements.
AwsKmsKeyId: NameString
}
/// Specifies the connections used by a job.
structure ConnectionsList {
/// A list of connections used by the job.
Connections: OrchestrationStringList
}
/// The details of a crawl in the workflow.
structure Crawl {
/// The state of the crawler.
State: CrawlState
/// The date and time on which the crawl started.
StartedOn: TimestampValue
/// The date and time on which the crawl completed.
CompletedOn: TimestampValue
/// The error message associated with the crawl.
ErrorMessage: DescriptionString
/// The log group associated with the crawl.
LogGroup: LogGroup
/// The log stream associated with the crawl.
LogStream: LogStream
}
/// Specifies a crawler program that examines a data source and uses classifiers to try to
/// determine its schema. If successful, the crawler records metadata concerning the data source
/// in the Glue Data Catalog.
structure Crawler {
/// The name of the crawler.
Name: NameString
/// The Amazon Resource Name (ARN) of an IAM role that's used to access customer resources,
/// such as Amazon Simple Storage Service (Amazon S3) data.
Role: Role
/// A collection of targets to crawl.
Targets: CrawlerTargets
/// The name of the database in which the crawler's output is stored.
DatabaseName: DatabaseName
/// A description of the crawler.
Description: DescriptionString
/// A list of UTF-8 strings that specify the custom classifiers that are associated
/// with the crawler.
Classifiers: ClassifierNameList
/// A policy that specifies whether to crawl the entire dataset again, or to crawl only folders that were added since the last crawler run.
RecrawlPolicy: RecrawlPolicy
/// The policy that specifies update and delete behaviors for the crawler.
SchemaChangePolicy: SchemaChangePolicy
/// A configuration that specifies whether data lineage is enabled for the crawler.
LineageConfiguration: LineageConfiguration
/// Indicates whether the crawler is running, or whether a run is pending.
State: CrawlerState
/// The prefix added to the names of tables that are created.
TablePrefix: TablePrefix
/// For scheduled crawlers, the schedule when the crawler runs.
Schedule: Schedule
/// If the crawler is running, contains the total time elapsed since the last crawl
/// began.
CrawlElapsedTime: MillisecondsCount = 0
/// The time that the crawler was created.
CreationTime: Timestamp
/// The time that the crawler was last updated.
LastUpdated: Timestamp
/// The status of the last crawl, and potentially error information if
/// an error occurred.
LastCrawl: LastCrawlInfo
/// The version of the crawler.
Version: VersionId = 0
/// Crawler configuration information. This versioned JSON string allows users to specify
/// aspects of a crawler's behavior. For more information, see Setting crawler configuration options.
Configuration: CrawlerConfiguration
/// The name of the SecurityConfiguration
structure to be used by this
/// crawler.
CrawlerSecurityConfiguration: CrawlerSecurityConfiguration
/// Specifies whether the crawler should use Lake Formation credentials for the crawler instead of the IAM role credentials.
LakeFormationConfiguration: LakeFormationConfiguration
}
/// Contains the information for a run of a crawler.
structure CrawlerHistory {
/// A UUID identifier for each crawl.
CrawlId: CrawlId
/// The state of the crawl.
State: CrawlerHistoryState
/// The date and time on which the crawl started.
StartTime: Timestamp
/// The date and time on which the crawl ended.
EndTime: Timestamp
/// A run summary for the specific crawl in JSON. Contains the catalog tables and partitions that were added, updated, or deleted.
Summary: NameString
/// If an error occurred, the error message associated with the crawl.
ErrorMessage: DescriptionString
/// The log group associated with the crawl.
LogGroup: LogGroup
/// The log stream associated with the crawl.
LogStream: LogStream
/// The prefix for a CloudWatch message about this crawl.
MessagePrefix: MessagePrefix
/// The number of data processing units (DPU) used in hours for the crawl.
DPUHour: NonNegativeDouble = 0
}
/// Metrics for a specified crawler.
structure CrawlerMetrics {
/// The name of the crawler.
CrawlerName: NameString
/// The estimated time left to complete a running crawl.
TimeLeftSeconds: NonNegativeDouble = 0
/// True if the crawler is still estimating how long it will take to complete this run.
StillEstimating: Boolean = false
/// The duration of the crawler's most recent run, in seconds.
LastRuntimeSeconds: NonNegativeDouble = 0
/// The median duration of this crawler's runs, in seconds.
MedianRuntimeSeconds: NonNegativeDouble = 0
/// The number of tables created by this crawler.
TablesCreated: NonNegativeInteger = 0
/// The number of tables updated by this crawler.
TablesUpdated: NonNegativeInteger = 0
/// The number of tables deleted by this crawler.
TablesDeleted: NonNegativeInteger = 0
}
/// The details of a Crawler node present in the workflow.
structure CrawlerNodeDetails {
/// A list of crawls represented by the crawl node.
Crawls: CrawlList
}
/// The specified crawler is not running.
@error("client")
structure CrawlerNotRunningException {
/// A message describing the problem.
Message: MessageString
}
/// The operation cannot be performed because the crawler is already running.
@error("client")
structure CrawlerRunningException {
/// A message describing the problem.
Message: MessageString
}
/// The specified crawler is stopping.
@error("client")
structure CrawlerStoppingException {
/// A message describing the problem.
Message: MessageString
}
/// Specifies data stores to crawl.
structure CrawlerTargets {
/// Specifies Amazon Simple Storage Service (Amazon S3) targets.
S3Targets: S3TargetList
/// Specifies JDBC targets.
JdbcTargets: JdbcTargetList
/// Specifies Amazon DocumentDB or MongoDB targets.
MongoDBTargets: MongoDBTargetList
/// Specifies Amazon DynamoDB targets.
DynamoDBTargets: DynamoDBTargetList
/// Specifies Glue Data Catalog targets.
CatalogTargets: CatalogTargetList
/// Specifies Delta data store targets.
DeltaTargets: DeltaTargetList
/// Specifies Apache Iceberg data store targets.
IcebergTargets: IcebergTargetList
/// Specifies Apache Hudi data store targets.
HudiTargets: HudiTargetList
}
/// A list of fields, comparators and value that you can use to filter the crawler runs for a specified crawler.
structure CrawlsFilter {
/// A key used to filter the crawler runs for a specified crawler. Valid values for each of the field names are:
///
/// -
///
/// CRAWL_ID
: A string representing the UUID identifier for a crawl.
///
/// -
///
/// STATE
: A string representing the state of the crawl.
///
/// -
///
/// START_TIME
and END_TIME
: The epoch timestamp in milliseconds.
///
/// -
///
/// DPU_HOUR
: The number of data processing unit (DPU) hours used for the crawl.
///
///
FieldName: FieldName
/// A defined comparator that operates on the value. The available operators are:
///
/// -
///
/// GT
: Greater than.
///
/// -
///
/// GE
: Greater than or equal to.
///
/// -
///
/// LT
: Less than.
///
/// -
///
/// LE
: Less than or equal to.
///
/// -
///
/// EQ
: Equal to.
///
/// -
///
/// NE
: Not equal to.
///
///
FilterOperator: FilterOperator
/// The value provided for comparison on the crawl field.
FieldValue: GenericString
}
@input
structure CreateBlueprintRequest {
/// The name of the blueprint.
@required
Name: OrchestrationNameString
/// A description of the blueprint.
Description: Generic512CharString
/// Specifies a path in Amazon S3 where the blueprint is published.
@required
BlueprintLocation: OrchestrationS3Location
/// The tags to be applied to this blueprint.
Tags: TagsMap
}
@output
structure CreateBlueprintResponse {
/// Returns the name of the blueprint that was registered.
Name: NameString
}
@input
structure CreateClassifierRequest {
/// A GrokClassifier
object specifying the classifier
/// to create.
GrokClassifier: CreateGrokClassifierRequest
/// An XMLClassifier
object specifying the classifier
/// to create.
XMLClassifier: CreateXMLClassifierRequest
/// A JsonClassifier
object specifying the classifier
/// to create.
JsonClassifier: CreateJsonClassifierRequest
/// A CsvClassifier
object specifying the classifier
/// to create.
CsvClassifier: CreateCsvClassifierRequest
}
@output
structure CreateClassifierResponse {}
@input
structure CreateConnectionRequest {
/// The ID of the Data Catalog in which to create the connection. If none is provided, the Amazon Web Services
/// account ID is used by default.
CatalogId: CatalogIdString
/// A ConnectionInput
object defining the connection
/// to create.
@required
ConnectionInput: ConnectionInput
/// The tags you assign to the connection.
Tags: TagsMap
}
@output
structure CreateConnectionResponse {}
@input
structure CreateCrawlerRequest {
/// Name of the new crawler.
@required
Name: NameString
/// The IAM role or Amazon Resource Name (ARN) of an IAM role used by the new crawler to
/// access customer resources.
@required
Role: Role
/// The Glue database where results are written, such as:
/// arn:aws:daylight:us-east-1::database/sometable/*
.
DatabaseName: DatabaseName
/// A description of the new crawler.
Description: DescriptionString
/// A list of collection of targets to crawl.
@required
Targets: CrawlerTargets
/// A cron
expression used to specify the schedule (see Time-Based Schedules for Jobs and Crawlers. For example, to run
/// something every day at 12:15 UTC, you would specify:
/// cron(15 12 * * ? *)
.
Schedule: CronExpression
/// A list of custom classifiers that the user has registered. By default, all built-in
/// classifiers are included in a crawl, but these custom classifiers always override the default
/// classifiers for a given classification.
Classifiers: ClassifierNameList
/// The table prefix used for catalog tables that are created.
TablePrefix: TablePrefix
/// The policy for the crawler's update and deletion behavior.
SchemaChangePolicy: SchemaChangePolicy
/// A policy that specifies whether to crawl the entire dataset again, or to crawl only folders that were added since the last crawler run.
RecrawlPolicy: RecrawlPolicy
/// Specifies data lineage configuration settings for the crawler.
LineageConfiguration: LineageConfiguration
/// Specifies Lake Formation configuration settings for the crawler.
LakeFormationConfiguration: LakeFormationConfiguration
/// Crawler configuration information. This versioned JSON
/// string allows users to specify aspects of a crawler's behavior.
/// For more information, see Setting crawler configuration options.
Configuration: CrawlerConfiguration
/// The name of the SecurityConfiguration
structure to be used by this
/// crawler.
CrawlerSecurityConfiguration: CrawlerSecurityConfiguration
/// The tags to use with this crawler request. You may use tags to limit access to the
/// crawler. For more information about tags in Glue, see Amazon Web Services Tags in Glue in the developer
/// guide.
Tags: TagsMap
}
@output
structure CreateCrawlerResponse {}
/// Specifies a custom CSV classifier for CreateClassifier
to create.
structure CreateCsvClassifierRequest {
/// The name of the classifier.
@required
Name: NameString
/// A custom symbol to denote what separates each column entry in the row.
Delimiter: CsvColumnDelimiter
/// A custom symbol to denote what combines content into a single column value. Must be different from the column delimiter.
QuoteSymbol: CsvQuoteSymbol
/// Indicates whether the CSV file contains a header.
ContainsHeader: CsvHeaderOption
/// A list of strings representing column names.
Header: CsvHeader
/// Specifies not to trim values before identifying the type of column values. The default value is true.
DisableValueTrimming: NullableBoolean
/// Enables the processing of files that contain only one column.
AllowSingleColumn: NullableBoolean
/// Enables the configuration of custom datatypes.
CustomDatatypeConfigured: NullableBoolean
/// Creates a list of supported custom datatypes.
CustomDatatypes: CustomDatatypes
/// Sets the SerDe for processing CSV in the classifier, which will be applied in the Data Catalog. Valid values are OpenCSVSerDe
, LazySimpleSerDe
, and None
. You can specify the None
value when you want the crawler to do the detection.
Serde: CsvSerdeOption
}
@input
structure CreateCustomEntityTypeRequest {
/// A name for the custom pattern that allows it to be retrieved or deleted later. This name must be unique per Amazon Web Services account.
@required
Name: NameString
/// A regular expression string that is used for detecting sensitive data in a custom pattern.
@required
RegexString: NameString
/// A list of context words. If none of these context words are found within the vicinity of the regular expression the data will not be detected as sensitive data.
/// If no context words are passed only a regular expression is checked.
ContextWords: ContextWords
/// A list of tags applied to the custom entity type.
Tags: TagsMap
}
@output
structure CreateCustomEntityTypeResponse {
/// The name of the custom pattern you created.
Name: NameString
}
@input
structure CreateDatabaseRequest {
/// The ID of the Data Catalog in which to create the database. If none is provided, the Amazon Web Services
/// account ID is used by default.
CatalogId: CatalogIdString
/// The metadata for the database.
@required
DatabaseInput: DatabaseInput
/// The tags you assign to the database.
Tags: TagsMap
}
@output
structure CreateDatabaseResponse {}
@input
structure CreateDataQualityRulesetRequest {
/// A unique name for the data quality ruleset.
@required
Name: NameString
/// A description of the data quality ruleset.
Description: DescriptionString
/// A Data Quality Definition Language (DQDL) ruleset. For more information, see the Glue developer guide.
@required
Ruleset: DataQualityRulesetString
/// A list of tags applied to the data quality ruleset.
Tags: TagsMap
/// A target table associated with the data quality ruleset.
TargetTable: DataQualityTargetTable
/// Used for idempotency and is recommended to be set to a random ID (such as a UUID) to avoid creating or starting multiple instances of the same resource.
ClientToken: HashString
}
@output
structure CreateDataQualityRulesetResponse {
/// A unique name for the data quality ruleset.
Name: NameString
}
@input
structure CreateDevEndpointRequest {
/// The name to be assigned to the new DevEndpoint
.
@required
EndpointName: GenericString
/// The IAM role for the DevEndpoint
.
@required
RoleArn: RoleArn
/// Security group IDs for the security groups to be used by the new
/// DevEndpoint
.
SecurityGroupIds: StringList
/// The subnet ID for the new DevEndpoint
to use.
SubnetId: GenericString
/// The public key to be used by this DevEndpoint
for authentication. This
/// attribute is provided for backward compatibility because the recommended attribute to use is
/// public keys.
PublicKey: GenericString
/// A list of public keys to be used by the development endpoints for authentication. The use
/// of this attribute is preferred over a single public key because the public keys allow you to
/// have a different private key per client.
///
/// If you previously created an endpoint with a public key, you must remove that key to be able
/// to set a list of public keys. Call the UpdateDevEndpoint
API with the public
/// key content in the deletePublicKeys
attribute, and the list of new keys in the
/// addPublicKeys
attribute.
///
PublicKeys: PublicKeysList
/// The number of Glue Data Processing Units (DPUs) to allocate to this
/// DevEndpoint
.
NumberOfNodes: IntegerValue = 0
/// The type of predefined worker that is allocated to the development endpoint. Accepts a value of Standard, G.1X, or G.2X.
///
/// -
///
For the Standard
worker type, each worker provides 4 vCPU, 16 GB of memory and a 50GB disk, and 2 executors per worker.
///
/// -
///
For the G.1X
worker type, each worker maps to 1 DPU (4 vCPU, 16 GB of memory, 64 GB disk), and provides 1 executor per worker. We recommend this worker type for memory-intensive jobs.
///
/// -
///
For the G.2X
worker type, each worker maps to 2 DPU (8 vCPU, 32 GB of memory, 128 GB disk), and provides 1 executor per worker. We recommend this worker type for memory-intensive jobs.
///
///
/// Known issue: when a development endpoint is created with the G.2X
/// WorkerType
configuration, the Spark drivers for the development endpoint will run on 4 vCPU, 16 GB of memory, and a 64 GB disk.
WorkerType: WorkerType
/// Glue version determines the versions of Apache Spark and Python that Glue supports. The Python version indicates the version supported for running your ETL scripts on development endpoints.
/// For more information about the available Glue versions and corresponding Spark and Python versions, see Glue version in the developer guide.
/// Development endpoints that are created without specifying a Glue version default to Glue 0.9.
/// You can specify a version of Python support for development endpoints by using the Arguments
parameter in the CreateDevEndpoint
or UpdateDevEndpoint
APIs. If no arguments are provided, the version defaults to Python 2.
GlueVersion: GlueVersionString
/// The number of workers of a defined workerType
that are allocated to the development endpoint.
/// The maximum number of workers you can define are 299 for G.1X
, and 149 for G.2X
.
NumberOfWorkers: NullableInteger
/// The paths to one or more Python libraries in an Amazon S3 bucket that should be loaded in
/// your DevEndpoint
. Multiple values must be complete paths separated by a
/// comma.
///
/// You can only use pure Python libraries with a DevEndpoint
. Libraries that rely on
/// C extensions, such as the pandas Python data
/// analysis library, are not yet supported.
///
ExtraPythonLibsS3Path: GenericString
/// The path to one or more Java .jar
files in an S3 bucket that should be loaded
/// in your DevEndpoint
.
ExtraJarsS3Path: GenericString
/// The name of the SecurityConfiguration
structure to be used with this
/// DevEndpoint
.
SecurityConfiguration: NameString
/// The tags to use with this DevEndpoint. You may use tags to limit access to the DevEndpoint. For more information about tags in Glue, see Amazon Web Services Tags in Glue in the developer guide.
Tags: TagsMap
/// A map of arguments used to configure the DevEndpoint
.
Arguments: MapValue
}
@output
structure CreateDevEndpointResponse {
/// The name assigned to the new DevEndpoint
.
EndpointName: GenericString
/// The current status of the new DevEndpoint
.
Status: GenericString
/// The security groups assigned to the new DevEndpoint
.
SecurityGroupIds: StringList
/// The subnet ID assigned to the new DevEndpoint
.
SubnetId: GenericString
/// The Amazon Resource Name (ARN) of the role assigned to the new
/// DevEndpoint
.
RoleArn: RoleArn
/// The address of the YARN endpoint used by this DevEndpoint
.
YarnEndpointAddress: GenericString
/// The Apache Zeppelin port for the remote Apache Spark interpreter.
ZeppelinRemoteSparkInterpreterPort: IntegerValue = 0
/// The number of Glue Data Processing Units (DPUs) allocated to this DevEndpoint.
NumberOfNodes: IntegerValue = 0
/// The type of predefined worker that is allocated to the development endpoint. May be a value of Standard, G.1X, or G.2X.
WorkerType: WorkerType
/// Glue version determines the versions of Apache Spark and Python that Glue supports. The Python version indicates the version supported for running your ETL scripts on development endpoints.
/// For more information about the available Glue versions and corresponding Spark and Python versions, see Glue version in the developer guide.
GlueVersion: GlueVersionString
/// The number of workers of a defined workerType
that are allocated to the development endpoint.
NumberOfWorkers: NullableInteger
/// The Amazon Web Services Availability Zone where this DevEndpoint
is located.
AvailabilityZone: GenericString
/// The ID of the virtual private cloud (VPC) used by this DevEndpoint
.
VpcId: GenericString
/// The paths to one or more Python libraries in an S3 bucket that will be loaded in your
/// DevEndpoint
.
ExtraPythonLibsS3Path: GenericString
/// Path to one or more Java .jar
files in an S3 bucket that will be loaded in
/// your DevEndpoint
.
ExtraJarsS3Path: GenericString
/// The reason for a current failure in this DevEndpoint
.
FailureReason: GenericString
/// The name of the SecurityConfiguration
structure being used with this
/// DevEndpoint
.
SecurityConfiguration: NameString
/// The point in time at which this DevEndpoint
was created.
CreatedTimestamp: TimestampValue
/// The map of arguments used to configure this DevEndpoint
.
/// Valid arguments are:
///
/// -
///
/// "--enable-glue-datacatalog": ""
///
///
///
/// You can specify a version of Python support for development endpoints by using the Arguments
parameter in the CreateDevEndpoint
or UpdateDevEndpoint
APIs. If no arguments are provided, the version defaults to Python 2.
Arguments: MapValue
}
/// Specifies a grok
classifier for CreateClassifier
/// to create.
structure CreateGrokClassifierRequest {
/// An identifier of the data format that the classifier matches,
/// such as Twitter, JSON, Omniture logs, Amazon CloudWatch Logs, and so on.
@required
Classification: Classification
/// The name of the new classifier.
@required
Name: NameString
/// The grok pattern used by this classifier.
@required
GrokPattern: GrokPattern
/// Optional custom grok patterns used by this classifier.
CustomPatterns: CustomPatterns
}
@input
structure CreateJobRequest {
/// The name you assign to this job definition. It must be unique in your account.
@required
Name: NameString
/// Description of the job being defined.
Description: DescriptionString
/// This field is reserved for future use.
LogUri: UriString
/// The name or Amazon Resource Name (ARN) of the IAM role associated with this job.
@required
Role: RoleString
/// An ExecutionProperty
specifying the maximum number of concurrent runs allowed
/// for this job.
ExecutionProperty: ExecutionProperty
/// The JobCommand
that runs this job.
@required
Command: JobCommand
/// The default arguments for every run of this job, specified as name-value pairs.
/// You can specify arguments here that your own job-execution script
/// consumes, as well as arguments that Glue itself consumes.
/// Job arguments may be logged. Do not pass plaintext secrets as arguments. Retrieve secrets
/// from a Glue Connection, Secrets Manager or other secret management
/// mechanism if you intend to keep them within the Job.
/// For information about how to specify and consume your own Job arguments, see the Calling Glue APIs in Python topic in the developer guide.
/// For information about the arguments you can provide to this field when configuring Spark jobs,
/// see the Special Parameters Used by Glue topic in the developer guide.
/// For information about the arguments you can provide to this field when configuring Ray
/// jobs, see Using
/// job parameters in Ray jobs in the developer guide.
DefaultArguments: GenericMap
/// Arguments for this job that are not overridden when providing job arguments
/// in a job run, specified as name-value pairs.
NonOverridableArguments: GenericMap
/// The connections used for this job.
Connections: ConnectionsList
/// The maximum number of times to retry this job if it fails.
MaxRetries: MaxRetries = 0
/// This parameter is deprecated. Use MaxCapacity
instead.
/// The number of Glue data processing units (DPUs) to allocate to this Job. You can
/// allocate a minimum of 2 DPUs; the default is 10. A DPU is a relative measure of processing
/// power that consists of 4 vCPUs of compute capacity and 16 GB of memory. For more information,
/// see the Glue pricing
/// page.
@deprecated(
message: "This property is deprecated, use MaxCapacity instead."
)
AllocatedCapacity: IntegerValue = 0
/// The job timeout in minutes. This is the maximum time that a job run
/// can consume resources before it is terminated and enters TIMEOUT
/// status. The default is 2,880 minutes (48 hours).
Timeout: Timeout
/// For Glue version 1.0 or earlier jobs, using the standard worker type, the number of
/// Glue data processing units (DPUs) that can be allocated when this job runs. A DPU is
/// a relative measure of processing power that consists of 4 vCPUs of compute capacity and 16 GB
/// of memory. For more information, see the
/// Glue pricing page.
/// For Glue version 2.0+ jobs, you cannot specify a Maximum capacity
.
/// Instead, you should specify a Worker type
and the Number of workers
.
/// Do not set MaxCapacity
if using WorkerType
and NumberOfWorkers
.
/// The value that can be allocated for MaxCapacity
depends on whether you are
/// running a Python shell job, an Apache Spark ETL job, or an Apache Spark streaming ETL
/// job:
///
/// -
///
When you specify a Python shell job (JobCommand.Name
="pythonshell"), you can
/// allocate either 0.0625 or 1 DPU. The default is 0.0625 DPU.
///
/// -
///
When you specify an Apache Spark ETL job (JobCommand.Name
="glueetl") or Apache
/// Spark streaming ETL job (JobCommand.Name
="gluestreaming"), you can allocate from 2 to 100 DPUs.
/// The default is 10 DPUs. This job type cannot have a fractional DPU allocation.
///
///
MaxCapacity: NullableDouble
/// The name of the SecurityConfiguration
structure to be used with this
/// job.
SecurityConfiguration: NameString
/// The tags to use with this job. You may use tags to limit access to the job. For more information about tags in Glue, see Amazon Web Services Tags in Glue in the developer guide.
Tags: TagsMap
/// Specifies configuration properties of a job notification.
NotificationProperty: NotificationProperty
/// In Spark jobs, GlueVersion
determines the versions of Apache Spark and Python
/// that Glue available in a job. The Python version indicates the version
/// supported for jobs of type Spark.
/// Ray jobs should set GlueVersion
to 4.0
or greater. However,
/// the versions of Ray, Python and additional libraries available in your Ray job are determined
/// by the Runtime
parameter of the Job command.
/// For more information about the available Glue versions and corresponding
/// Spark and Python versions, see Glue version in the developer
/// guide.
/// Jobs that are created without specifying a Glue version default to Glue 0.9.
GlueVersion: GlueVersionString
/// The number of workers of a defined workerType
that are allocated when a job runs.
NumberOfWorkers: NullableInteger
/// The type of predefined worker that is allocated when a job runs. Accepts a value of
/// G.1X, G.2X, G.4X, G.8X or G.025X for Spark jobs. Accepts the value Z.2X for Ray jobs.
///
/// -
///
For the G.1X
worker type, each worker maps to 1 DPU (4 vCPUs, 16 GB of memory) with 84GB disk (approximately 34GB free), and provides 1 executor per worker. We recommend this worker type for workloads such as data transforms, joins, and queries, to offers a scalable and cost effective way to run most jobs.
///
/// -
///
For the G.2X
worker type, each worker maps to 2 DPU (8 vCPUs, 32 GB of memory) with 128GB disk (approximately 77GB free), and provides 1 executor per worker. We recommend this worker type for workloads such as data transforms, joins, and queries, to offers a scalable and cost effective way to run most jobs.
///
/// -
///
For the G.4X
worker type, each worker maps to 4 DPU (16 vCPUs, 64 GB of memory) with 256GB disk (approximately 235GB free), and provides 1 executor per worker. We recommend this worker type for jobs whose workloads contain your most demanding transforms, aggregations, joins, and queries. This worker type is available only for Glue version 3.0 or later Spark ETL jobs in the following Amazon Web Services Regions: US East (Ohio), US East (N. Virginia), US West (Oregon), Asia Pacific (Singapore), Asia Pacific (Sydney), Asia Pacific (Tokyo), Canada (Central), Europe (Frankfurt), Europe (Ireland), and Europe (Stockholm).
///
/// -
///
For the G.8X
worker type, each worker maps to 8 DPU (32 vCPUs, 128 GB of memory) with 512GB disk (approximately 487GB free), and provides 1 executor per worker. We recommend this worker type for jobs whose workloads contain your most demanding transforms, aggregations, joins, and queries. This worker type is available only for Glue version 3.0 or later Spark ETL jobs, in the same Amazon Web Services Regions as supported for the G.4X
worker type.
///
/// -
///
For the G.025X
worker type, each worker maps to 0.25 DPU (2 vCPUs, 4 GB of memory) with 84GB disk (approximately 34GB free), and provides 1 executor per worker. We recommend this worker type for low volume streaming jobs. This worker type is only available for Glue version 3.0 streaming jobs.
///
/// -
///
For the Z.2X
worker type, each worker maps to 2 M-DPU (8vCPUs, 64 GB of memory) with 128 GB disk (approximately 120GB free), and provides up to 8 Ray workers based on the autoscaler.
///
///
WorkerType: WorkerType
/// The representation of a directed acyclic graph on which both the Glue Studio visual component and Glue Studio code generation is based.
CodeGenConfigurationNodes: CodeGenConfigurationNodes
/// Indicates whether the job is run with a standard or flexible execution class. The standard execution-class is ideal for time-sensitive workloads that require fast job startup and dedicated resources.
/// The flexible execution class is appropriate for time-insensitive jobs whose start and completion times may vary.
/// Only jobs with Glue version 3.0 and above and command type glueetl
will be allowed to set ExecutionClass
to FLEX
. The flexible execution class is available for Spark jobs.
ExecutionClass: ExecutionClass
/// The details for a source control configuration for a job, allowing synchronization of job artifacts to or from a remote repository.
SourceControlDetails: SourceControlDetails
}
@output
structure CreateJobResponse {
/// The unique name that was provided for this job definition.
Name: NameString
}
/// Specifies a JSON classifier for CreateClassifier
to create.
structure CreateJsonClassifierRequest {
/// The name of the classifier.
@required
Name: NameString
/// A JsonPath
string defining the JSON data for the classifier to classify.
/// Glue supports a subset of JsonPath, as described in Writing JsonPath Custom Classifiers.
@required
JsonPath: JsonPath
}
@input
structure CreateMLTransformRequest {
/// The unique name that you give the transform when you create it.
@required
Name: NameString
/// A description of the machine learning transform that is being defined. The default is an
/// empty string.
Description: DescriptionString
/// A list of Glue table definitions used by the transform.
@required
InputRecordTables: GlueTables
/// The algorithmic parameters that are specific to the transform type used. Conditionally
/// dependent on the transform type.
@required
Parameters: TransformParameters
/// The name or Amazon Resource Name (ARN) of the IAM role with the required permissions. The required permissions include both Glue service role permissions to Glue resources, and Amazon S3 permissions required by the transform.
///
/// -
///
This role needs Glue service role permissions to allow access to resources in Glue. See Attach a Policy to IAM Users That Access Glue.
///
/// -
///
This role needs permission to your Amazon Simple Storage Service (Amazon S3) sources, targets, temporary directory, scripts, and any libraries used by the task run for this transform.
///
///
@required
Role: RoleString
/// This value determines which version of Glue this machine learning transform is compatible with. Glue 1.0 is recommended for most customers. If the value is not set, the Glue compatibility defaults to Glue 0.9. For more information, see Glue Versions in the developer guide.
GlueVersion: GlueVersionString
/// The number of Glue data processing units (DPUs) that are allocated to task runs for this transform. You can allocate from 2 to 100 DPUs; the default is 10. A DPU is a relative measure of
/// processing power that consists of 4 vCPUs of compute capacity and 16 GB of memory. For more
/// information, see the Glue pricing
/// page.
///
/// MaxCapacity
is a mutually exclusive option with NumberOfWorkers
and WorkerType
.
///
/// -
///
If either NumberOfWorkers
or WorkerType
is set, then MaxCapacity
cannot be set.
///
/// -
///
If MaxCapacity
is set then neither NumberOfWorkers
or WorkerType
can be set.
///
/// -
///
If WorkerType
is set, then NumberOfWorkers
is required (and vice versa).
///
/// -
///
/// MaxCapacity
and NumberOfWorkers
must both be at least 1.
///
///
/// When the WorkerType
field is set to a value other than Standard
, the MaxCapacity
field is set automatically and becomes read-only.
/// When the WorkerType
field is set to a value other than Standard
, the MaxCapacity
field is set automatically and becomes read-only.
MaxCapacity: NullableDouble
/// The type of predefined worker that is allocated when this task runs. Accepts a value of Standard, G.1X, or G.2X.
///
/// -
///
For the Standard
worker type, each worker provides 4 vCPU, 16 GB of memory and a 50GB disk, and 2 executors per worker.
///
/// -
///
For the G.1X
worker type, each worker provides 4 vCPU, 16 GB of memory and a 64GB disk, and 1 executor per worker.
///
/// -
///
For the G.2X
worker type, each worker provides 8 vCPU, 32 GB of memory and a 128GB disk, and 1 executor per worker.
///
///
///
/// MaxCapacity
is a mutually exclusive option with NumberOfWorkers
and WorkerType
.
///
/// -
///
If either NumberOfWorkers
or WorkerType
is set, then MaxCapacity
cannot be set.
///
/// -
///
If MaxCapacity
is set then neither NumberOfWorkers
or WorkerType
can be set.
///
/// -
///
If WorkerType
is set, then NumberOfWorkers
is required (and vice versa).
///
/// -
///
/// MaxCapacity
and NumberOfWorkers
must both be at least 1.
///
///
WorkerType: WorkerType
/// The number of workers of a defined workerType
that are allocated when this task runs.
/// If WorkerType
is set, then NumberOfWorkers
is required (and vice versa).
NumberOfWorkers: NullableInteger
/// The timeout of the task run for this transform in minutes. This is the maximum time that a task run for this transform can consume resources before it is terminated and enters TIMEOUT
status. The default is 2,880 minutes (48 hours).
Timeout: Timeout
/// The maximum number of times to retry a task for this transform after a task run fails.
MaxRetries: NullableInteger
/// The tags to use with this machine learning transform. You may use tags to limit access to the machine learning transform. For more information about tags in Glue, see Amazon Web Services Tags in Glue in the developer guide.
Tags: TagsMap
/// The encryption-at-rest settings of the transform that apply to accessing user data. Machine learning transforms can access user data encrypted in Amazon S3 using KMS.
TransformEncryption: TransformEncryption
}
@output
structure CreateMLTransformResponse {
/// A unique identifier that is generated for the transform.
TransformId: HashString
}
@input
structure CreatePartitionIndexRequest {
/// The catalog ID where the table resides.
CatalogId: CatalogIdString
/// Specifies the name of a database in which you want to create a partition index.
@required
DatabaseName: NameString
/// Specifies the name of a table in which you want to create a partition index.
@required
TableName: NameString
/// Specifies a PartitionIndex
structure to create a partition index in an existing table.
@required
PartitionIndex: PartitionIndex
}
@output
structure CreatePartitionIndexResponse {}
@input
structure CreatePartitionRequest {
/// The Amazon Web Services account ID of the catalog in which the partition is to be created.
CatalogId: CatalogIdString
/// The name of the metadata database in which the partition is
/// to be created.
@required
DatabaseName: NameString
/// The name of the metadata table in which the partition is to be created.
@required
TableName: NameString
/// A PartitionInput
structure defining the partition
/// to be created.
@required
PartitionInput: PartitionInput
}
@output
structure CreatePartitionResponse {}
@output
structure CreateRegistryResponse {
/// The Amazon Resource Name (ARN) of the newly created registry.
RegistryArn: GlueResourceArn
/// The name of the registry.
RegistryName: SchemaRegistryNameString
/// A description of the registry.
Description: DescriptionString
/// The tags for the registry.
Tags: TagsMap
}
@output
structure CreateSchemaResponse {
/// The name of the registry.
RegistryName: SchemaRegistryNameString
/// The Amazon Resource Name (ARN) of the registry.
RegistryArn: GlueResourceArn
/// The name of the schema.
SchemaName: SchemaRegistryNameString
/// The Amazon Resource Name (ARN) of the schema.
SchemaArn: GlueResourceArn
/// A description of the schema if specified when created.
Description: DescriptionString
/// The data format of the schema definition. Currently AVRO
, JSON
and PROTOBUF
are supported.
DataFormat: DataFormat
/// The schema compatibility mode.
Compatibility: Compatibility
/// The version number of the checkpoint (the last time the compatibility mode was changed).
SchemaCheckpoint: SchemaCheckpointNumber = 0
/// The latest version of the schema associated with the returned schema definition.
LatestSchemaVersion: VersionLongNumber = 0
/// The next version of the schema associated with the returned schema definition.
NextSchemaVersion: VersionLongNumber = 0
/// The status of the schema.
SchemaStatus: SchemaStatus
/// The tags for the schema.
Tags: TagsMap
/// The unique identifier of the first schema version.
SchemaVersionId: SchemaVersionIdString
/// The status of the first schema version created.
SchemaVersionStatus: SchemaVersionStatus
}
@input
structure CreateScriptRequest {
/// A list of the nodes in the DAG.
DagNodes: DagNodes
/// A list of the edges in the DAG.
DagEdges: DagEdges
/// The programming language of the resulting code from the DAG.
Language: Language
}
@output
structure CreateScriptResponse {
/// The Python script generated from the DAG.
PythonScript: PythonScript
/// The Scala code generated from the DAG.
ScalaCode: ScalaCode
}
@input
structure CreateSecurityConfigurationRequest {
/// The name for the new security configuration.
@required
Name: NameString
/// The encryption configuration for the new security configuration.
@required
EncryptionConfiguration: EncryptionConfiguration
}
@output
structure CreateSecurityConfigurationResponse {
/// The name assigned to the new security configuration.
Name: NameString
/// The time at which the new security configuration was created.
CreatedTimestamp: TimestampValue
}
/// Request to create a new session.
@input
structure CreateSessionRequest {
/// The ID of the session request.
@required
Id: NameString
/// The description of the session.
Description: DescriptionString
/// The IAM Role ARN
@required
Role: OrchestrationRoleArn
/// The SessionCommand
that runs the job.
@required
Command: SessionCommand
///
/// The number of minutes before session times out. Default for Spark ETL
/// jobs is 48 hours (2880 minutes), the maximum session lifetime for this job type.
/// Consult the documentation for other job types.
///
Timeout: Timeout
///
/// The number of minutes when idle before session times out. Default for
/// Spark ETL jobs is value of Timeout. Consult the documentation
/// for other job types.
///
IdleTimeout: Timeout
/// A map array of key-value pairs. Max is 75 pairs.
DefaultArguments: OrchestrationArgumentsMap
/// The number of connections to use for the session.
Connections: ConnectionsList
/// The number of Glue data processing units (DPUs) that can be allocated when the job runs.
/// A DPU is a relative measure of processing power that consists of 4 vCPUs of compute capacity and 16 GB memory.
MaxCapacity: NullableDouble
/// The number of workers of a defined WorkerType
to use for the session.
NumberOfWorkers: NullableInteger
/// The type of predefined worker that is allocated when a job runs. Accepts a value of
/// G.1X, G.2X, G.4X, or G.8X for Spark jobs. Accepts the value Z.2X for Ray notebooks.
///
/// -
///
For the G.1X
worker type, each worker maps to 1 DPU (4 vCPUs, 16 GB of memory) with 84GB disk (approximately 34GB free), and provides 1 executor per worker. We recommend this worker type for workloads such as data transforms, joins, and queries, to offers a scalable and cost effective way to run most jobs.
///
/// -
///
For the G.2X
worker type, each worker maps to 2 DPU (8 vCPUs, 32 GB of memory) with 128GB disk (approximately 77GB free), and provides 1 executor per worker. We recommend this worker type for workloads such as data transforms, joins, and queries, to offers a scalable and cost effective way to run most jobs.
///
/// -
///
For the G.4X
worker type, each worker maps to 4 DPU (16 vCPUs, 64 GB of memory) with 256GB disk (approximately 235GB free), and provides 1 executor per worker. We recommend this worker type for jobs whose workloads contain your most demanding transforms, aggregations, joins, and queries. This worker type is available only for Glue version 3.0 or later Spark ETL jobs in the following Amazon Web Services Regions: US East (Ohio), US East (N. Virginia), US West (Oregon), Asia Pacific (Singapore), Asia Pacific (Sydney), Asia Pacific (Tokyo), Canada (Central), Europe (Frankfurt), Europe (Ireland), and Europe (Stockholm).
///
/// -
///
For the G.8X
worker type, each worker maps to 8 DPU (32 vCPUs, 128 GB of memory) with 512GB disk (approximately 487GB free), and provides 1 executor per worker. We recommend this worker type for jobs whose workloads contain your most demanding transforms, aggregations, joins, and queries. This worker type is available only for Glue version 3.0 or later Spark ETL jobs, in the same Amazon Web Services Regions as supported for the G.4X
worker type.
///
/// -
///
For the Z.2X
worker type, each worker maps to 2 M-DPU (8vCPUs, 64 GB of memory) with 128 GB disk (approximately 120GB free), and provides up to 8 Ray workers based on the autoscaler.
///
///
WorkerType: WorkerType
/// The name of the SecurityConfiguration structure to be used with the session
SecurityConfiguration: NameString
/// The Glue version determines the versions of Apache Spark and Python that Glue supports.
/// The GlueVersion must be greater than 2.0.
GlueVersion: GlueVersionString
/// The map of key value pairs (tags) belonging to the session.
Tags: TagsMap
/// The origin of the request.
RequestOrigin: OrchestrationNameString
}
@output
structure CreateSessionResponse {
/// Returns the session object in the response.
Session: Session
}
@input
structure CreateTableRequest {
/// The ID of the Data Catalog in which to create the Table
.
/// If none is supplied, the Amazon Web Services account ID is used by default.
CatalogId: CatalogIdString
/// The catalog database in which to create the new table. For Hive
/// compatibility, this name is entirely lowercase.
@required
DatabaseName: NameString
/// The TableInput
object that defines the metadata table
/// to create in the catalog.
@required
TableInput: TableInput
/// A list of partition indexes, PartitionIndex
structures, to create in the table.
PartitionIndexes: PartitionIndexList
/// The ID of the transaction.
TransactionId: TransactionIdString
/// Specifies an OpenTableFormatInput
structure when creating an open format table.
OpenTableFormatInput: OpenTableFormatInput
}
@output
structure CreateTableResponse {}
@input
structure CreateTriggerRequest {
/// The name of the trigger.
@required
Name: NameString
/// The name of the workflow associated with the trigger.
WorkflowName: NameString
/// The type of the new trigger.
@required
Type: TriggerType
/// A cron
expression used to specify the schedule (see Time-Based Schedules for Jobs and Crawlers. For example, to run
/// something every day at 12:15 UTC, you would specify:
/// cron(15 12 * * ? *)
.
/// This field is required when the trigger type is SCHEDULED.
Schedule: GenericString
/// A predicate to specify when the new trigger should fire.
/// This field is required when the trigger type is CONDITIONAL
.
Predicate: Predicate
/// The actions initiated by this trigger when it fires.
@required
Actions: ActionList
/// A description of the new trigger.
Description: DescriptionString
/// Set to true
to start SCHEDULED
and CONDITIONAL
/// triggers when created. True is not supported for ON_DEMAND
triggers.
StartOnCreation: BooleanValue = false
/// The tags to use with this trigger. You may use tags to limit access to the trigger.
/// For more information about tags in Glue, see
/// Amazon Web Services Tags in Glue in the developer guide.
Tags: TagsMap
/// Batch condition that must be met (specified number of events received or batch time window expired)
/// before EventBridge event trigger fires.
EventBatchingCondition: EventBatchingCondition
}
@output
structure CreateTriggerResponse {
/// The name of the trigger.
Name: NameString
}
@input
structure CreateUserDefinedFunctionRequest {
/// The ID of the Data Catalog in which to create the function. If none is provided, the Amazon Web Services
/// account ID is used by default.
CatalogId: CatalogIdString
/// The name of the catalog database in which to create the function.
@required
DatabaseName: NameString
/// A FunctionInput
object that defines the function
/// to create in the Data Catalog.
@required
FunctionInput: UserDefinedFunctionInput
}
@output
structure CreateUserDefinedFunctionResponse {}
@input
structure CreateWorkflowRequest {
/// The name to be assigned to the workflow. It should be unique within your account.
@required
Name: NameString
/// A description of the workflow.
Description: GenericString
/// A collection of properties to be used as part of each execution of the workflow.
DefaultRunProperties: WorkflowRunProperties
/// The tags to be used with this workflow.
Tags: TagsMap
/// You can use this parameter to prevent unwanted multiple updates to data, to control costs, or in some cases, to prevent exceeding the maximum number of concurrent runs of any of the component jobs. If you leave this parameter blank, there is no limit to the number of concurrent workflow runs.
MaxConcurrentRuns: NullableInteger
}
@output
structure CreateWorkflowResponse {
/// The name of the workflow which was provided as part of the request.
Name: NameString
}
/// Specifies an XML classifier for CreateClassifier
to create.
structure CreateXMLClassifierRequest {
/// An identifier of the data format that the classifier matches.
@required
Classification: Classification
/// The name of the classifier.
@required
Name: NameString
/// The XML tag designating the element that contains each record in an XML document being
/// parsed. This can't identify a self-closing element (closed by />
). An empty
/// row element that contains only attributes can be parsed as long as it ends with a closing tag
/// (for example, |
is okay, but
/// |
is not).
RowTag: RowTag
}
/// A classifier for custom CSV
content.
structure CsvClassifier {
/// The name of the classifier.
@required
Name: NameString
/// The time that this classifier was registered.
CreationTime: Timestamp
/// The time that this classifier was last updated.
LastUpdated: Timestamp
/// The version of this classifier.
Version: VersionId = 0
/// A custom symbol to denote what separates each column entry in the row.
Delimiter: CsvColumnDelimiter
/// A custom symbol to denote what combines content into a single column value. It must be
/// different from the column delimiter.
QuoteSymbol: CsvQuoteSymbol
/// Indicates whether the CSV file contains a header.
ContainsHeader: CsvHeaderOption
/// A list of strings representing column names.
Header: CsvHeader
/// Specifies not to trim values before identifying the type of column values. The default
/// value is true
.
DisableValueTrimming: NullableBoolean
/// Enables the processing of files that contain only one column.
AllowSingleColumn: NullableBoolean
/// Enables the custom datatype to be configured.
CustomDatatypeConfigured: NullableBoolean
/// A list of custom datatypes including "BINARY", "BOOLEAN", "DATE", "DECIMAL", "DOUBLE", "FLOAT", "INT", "LONG", "SHORT", "STRING", "TIMESTAMP".
CustomDatatypes: CustomDatatypes
/// Sets the SerDe for processing CSV in the classifier, which will be applied in the Data Catalog. Valid values are OpenCSVSerDe
, LazySimpleSerDe
, and None
. You can specify the None
value when you want the crawler to do the detection.
Serde: CsvSerdeOption
}
/// Specifies a transform that uses custom code you provide to perform the data transformation. The output is a collection of DynamicFrames.
structure CustomCode {
/// The name of the transform node.
@required
Name: NodeName
/// The data inputs identified by their node names.
@required
Inputs: ManyInputs
/// The custom code that is used to perform the data transformation.
@required
Code: ExtendedString
/// The name defined for the custom code node class.
@required
ClassName: EnclosedInStringProperty
/// Specifies the data schema for the custom code transform.
OutputSchemas: GlueSchemas
}
/// An object representing a custom pattern for detecting sensitive data across the columns and rows of your structured data.
structure CustomEntityType {
/// A name for the custom pattern that allows it to be retrieved or deleted later. This name must be unique per Amazon Web Services account.
@required
Name: NameString
/// A regular expression string that is used for detecting sensitive data in a custom pattern.
@required
RegexString: NameString
/// A list of context words. If none of these context words are found within the vicinity of the regular expression the data will not be detected as sensitive data.
/// If no context words are passed only a regular expression is checked.
ContextWords: ContextWords
}
/// The Database
object represents a logical grouping of tables that might reside
/// in a Hive metastore or an RDBMS.
structure Database {
/// The name of the database. For Hive compatibility, this is folded to lowercase when it is
/// stored.
@required
Name: NameString
/// A description of the database.
Description: DescriptionString
/// The location of the database (for example, an HDFS path).
LocationUri: URI
/// These key-value pairs define parameters and properties
/// of the database.
Parameters: ParametersMap
/// The time at which the metadata database was created in the catalog.
CreateTime: Timestamp
/// Creates a set of default permissions on the table for principals. Used by Lake Formation. Not used in the normal course of Glue operations.
CreateTableDefaultPermissions: PrincipalPermissionsList
/// A DatabaseIdentifier
structure that describes a target database for resource linking.
TargetDatabase: DatabaseIdentifier
/// The ID of the Data Catalog in which the database resides.
CatalogId: CatalogIdString
/// A FederatedDatabase
structure that references an entity outside the Glue Data Catalog.
FederatedDatabase: FederatedDatabase
}
/// A structure that describes a target database for resource linking.
structure DatabaseIdentifier {
/// The ID of the Data Catalog in which the database resides.
CatalogId: CatalogIdString
/// The name of the catalog database.
DatabaseName: NameString
/// Region of the target database.
Region: NameString
}
/// The structure used to create or update a database.
structure DatabaseInput {
/// The name of the database. For Hive compatibility, this is folded to lowercase when it is
/// stored.
@required
Name: NameString
/// A description of the database.
Description: DescriptionString
/// The location of the database (for example, an HDFS path).
LocationUri: URI
/// These key-value pairs define parameters and properties
/// of the database.
/// These key-value pairs define parameters and properties of the database.
Parameters: ParametersMap
/// Creates a set of default permissions on the table for principals. Used by Lake Formation. Not used in the normal course of Glue operations.
CreateTableDefaultPermissions: PrincipalPermissionsList
/// A DatabaseIdentifier
structure that describes a target database for resource linking.
TargetDatabase: DatabaseIdentifier
/// A FederatedDatabase
structure that references an entity outside the Glue Data Catalog.
FederatedDatabase: FederatedDatabase
}
/// Contains configuration information for maintaining Data Catalog security.
structure DataCatalogEncryptionSettings {
/// Specifies the encryption-at-rest configuration for the Data Catalog.
EncryptionAtRest: EncryptionAtRest
/// When connection password protection is enabled, the Data Catalog uses a customer-provided
/// key to encrypt the password as part of CreateConnection
or
/// UpdateConnection
and store it in the ENCRYPTED_PASSWORD
field in
/// the connection properties. You can enable catalog encryption or only password
/// encryption.
ConnectionPasswordEncryption: ConnectionPasswordEncryption
}
/// The Lake Formation principal.
structure DataLakePrincipal {
/// An identifier for the Lake Formation principal.
DataLakePrincipalIdentifier: DataLakePrincipalString
}
/// Additional run options you can specify for an evaluation run.
structure DataQualityEvaluationRunAdditionalRunOptions {
/// Whether or not to enable CloudWatch metrics.
CloudWatchMetricsEnabled: NullableBoolean
/// Prefix for Amazon S3 to store results.
ResultsS3Prefix: UriString
}
/// Describes a data quality result.
structure DataQualityResult {
/// A unique result ID for the data quality result.
ResultId: HashString
/// An aggregate data quality score. Represents the ratio of rules that passed to the total number of rules.
Score: GenericBoundedDouble
/// The table associated with the data quality result, if any.
DataSource: DataSource
/// The name of the ruleset associated with the data quality result.
RulesetName: NameString
/// In the context of a job in Glue Studio, each node in the canvas is typically assigned some sort of name and data quality nodes will have names. In the case of multiple nodes, the evaluationContext
can differentiate the nodes.
EvaluationContext: GenericString
/// The date and time when this data quality run started.
StartedOn: Timestamp
/// The date and time when this data quality run completed.
CompletedOn: Timestamp
/// The job name associated with the data quality result, if any.
JobName: NameString
/// The job run ID associated with the data quality result, if any.
JobRunId: HashString
/// The unique run ID for the ruleset evaluation for this data quality result.
RulesetEvaluationRunId: HashString
/// A list of DataQualityRuleResult
objects representing the results for each rule.
RuleResults: DataQualityRuleResults
}
/// Describes a data quality result.
structure DataQualityResultDescription {
/// The unique result ID for this data quality result.
ResultId: HashString
/// The table name associated with the data quality result.
DataSource: DataSource
/// The job name associated with the data quality result.
JobName: NameString
/// The job run ID associated with the data quality result.
JobRunId: HashString
/// The time that the run started for this data quality result.
StartedOn: Timestamp
}
/// Criteria used to return data quality results.
structure DataQualityResultFilterCriteria {
/// Filter results by the specified data source. For example, retrieving all results for an Glue table.
DataSource: DataSource
/// Filter results by the specified job name.
JobName: NameString
/// Filter results by the specified job run ID.
JobRunId: HashString
/// Filter results by runs that started after this time.
StartedAfter: Timestamp
/// Filter results by runs that started before this time.
StartedBefore: Timestamp
}
/// Describes the result of a data quality rule recommendation run.
structure DataQualityRuleRecommendationRunDescription {
/// The unique run identifier associated with this run.
RunId: HashString
/// The status for this run.
Status: TaskStatusType
/// The date and time when this run started.
StartedOn: Timestamp
/// The data source (Glue table) associated with the recommendation run.
DataSource: DataSource
}
/// A filter for listing data quality recommendation runs.
structure DataQualityRuleRecommendationRunFilter {
/// Filter based on a specified data source (Glue table).
@required
DataSource: DataSource
/// Filter based on time for results started before provided time.
StartedBefore: Timestamp
/// Filter based on time for results started after provided time.
StartedAfter: Timestamp
}
/// Describes the result of the evaluation of a data quality rule.
structure DataQualityRuleResult {
/// The name of the data quality rule.
Name: NameString
/// A description of the data quality rule.
Description: DescriptionString
/// An evaluation message.
EvaluationMessage: DescriptionString
/// A pass or fail status for the rule.
Result: DataQualityRuleResultStatus
/// A map of metrics associated with the evaluation of the rule.
EvaluatedMetrics: EvaluatedMetricsMap
}
/// Describes the result of a data quality ruleset evaluation run.
structure DataQualityRulesetEvaluationRunDescription {
/// The unique run identifier associated with this run.
RunId: HashString
/// The status for this run.
Status: TaskStatusType
/// The date and time when the run started.
StartedOn: Timestamp
/// The data source (an Glue table) associated with the run.
DataSource: DataSource
}
/// The filter criteria.
structure DataQualityRulesetEvaluationRunFilter {
/// Filter based on a data source (an Glue table) associated with the run.
@required
DataSource: DataSource
/// Filter results by runs that started before this time.
StartedBefore: Timestamp
/// Filter results by runs that started after this time.
StartedAfter: Timestamp
}
/// The criteria used to filter data quality rulesets.
structure DataQualityRulesetFilterCriteria {
/// The name of the ruleset filter criteria.
Name: NameString
/// The description of the ruleset filter criteria.
Description: DescriptionString
/// Filter on rulesets created before this date.
CreatedBefore: Timestamp
/// Filter on rulesets created after this date.
CreatedAfter: Timestamp
/// Filter on rulesets last modified before this date.
LastModifiedBefore: Timestamp
/// Filter on rulesets last modified after this date.
LastModifiedAfter: Timestamp
/// The name and database name of the target table.
TargetTable: DataQualityTargetTable
}
/// Describes a data quality ruleset returned by GetDataQualityRuleset
.
structure DataQualityRulesetListDetails {
/// The name of the data quality ruleset.
Name: NameString
/// A description of the data quality ruleset.
Description: DescriptionString
/// The date and time the data quality ruleset was created.
CreatedOn: Timestamp
/// The date and time the data quality ruleset was last modified.
LastModifiedOn: Timestamp
/// An object representing an Glue table.
TargetTable: DataQualityTargetTable
/// When a ruleset was created from a recommendation run, this run ID is generated to link the two together.
RecommendationRunId: HashString
/// The number of rules in the ruleset.
RuleCount: NullableInteger
}
/// An object representing an Glue table.
structure DataQualityTargetTable {
/// The name of the Glue table.
@required
TableName: NameString
/// The name of the database where the Glue table exists.
@required
DatabaseName: NameString
/// The catalog id where the Glue table exists.
CatalogId: NameString
}
/// A data source (an Glue table) for which you want data quality results.
structure DataSource {
/// An Glue table.
@required
GlueTable: GlueTable
}
/// A structure representing the datatype of the value.
structure Datatype {
/// The datatype of the value.
@required
Id: GenericLimitedString
/// A label assigned to the datatype.
@required
Label: GenericLimitedString
}
/// Defines column statistics supported for timestamp data columns.
structure DateColumnStatisticsData {
/// The lowest value in the column.
MinimumValue: Timestamp
/// The highest value in the column.
MaximumValue: Timestamp
/// The number of null values in the column.
@required
NumberOfNulls: NonNegativeLong = 0
/// The number of distinct values in a column.
@required
NumberOfDistinctValues: NonNegativeLong = 0
}
/// Defines column statistics supported for fixed-point number data columns.
structure DecimalColumnStatisticsData {
/// The lowest value in the column.
MinimumValue: DecimalNumber
/// The highest value in the column.
MaximumValue: DecimalNumber
/// The number of null values in the column.
@required
NumberOfNulls: NonNegativeLong = 0
/// The number of distinct values in a column.
@required
NumberOfDistinctValues: NonNegativeLong = 0
}
/// Contains a numeric value in decimal format.
structure DecimalNumber {
/// The unscaled numeric value.
@required
UnscaledValue: Blob
/// The scale that determines where the decimal point falls in the
/// unscaled value.
@required
Scale: Integer = 0
}
@input
structure DeleteBlueprintRequest {
/// The name of the blueprint to delete.
@required
Name: NameString
}
@output
structure DeleteBlueprintResponse {
/// Returns the name of the blueprint that was deleted.
Name: NameString
}
@input
structure DeleteClassifierRequest {
/// Name of the classifier to remove.
@required
Name: NameString
}
@output
structure DeleteClassifierResponse {}
@input
structure DeleteColumnStatisticsForPartitionRequest {
/// The ID of the Data Catalog where the partitions in question reside.
/// If none is supplied, the Amazon Web Services account ID is used by default.
CatalogId: CatalogIdString
/// The name of the catalog database where the partitions reside.
@required
DatabaseName: NameString
/// The name of the partitions' table.
@required
TableName: NameString
/// A list of partition values identifying the partition.
@required
PartitionValues: ValueStringList
/// Name of the column.
@required
ColumnName: NameString
}
@output
structure DeleteColumnStatisticsForPartitionResponse {}
@input
structure DeleteColumnStatisticsForTableRequest {
/// The ID of the Data Catalog where the partitions in question reside.
/// If none is supplied, the Amazon Web Services account ID is used by default.
CatalogId: CatalogIdString
/// The name of the catalog database where the partitions reside.
@required
DatabaseName: NameString
/// The name of the partitions' table.
@required
TableName: NameString
/// The name of the column.
@required
ColumnName: NameString
}
@output
structure DeleteColumnStatisticsForTableResponse {}
@input
structure DeleteConnectionRequest {
/// The ID of the Data Catalog in which the connection resides. If none is provided, the Amazon Web Services
/// account ID is used by default.
CatalogId: CatalogIdString
/// The name of the connection to delete.
@required
ConnectionName: NameString
}
@output
structure DeleteConnectionResponse {}
@input
structure DeleteCrawlerRequest {
/// The name of the crawler to remove.
@required
Name: NameString
}
@output
structure DeleteCrawlerResponse {}
@input
structure DeleteCustomEntityTypeRequest {
/// The name of the custom pattern that you want to delete.
@required
Name: NameString
}
@output
structure DeleteCustomEntityTypeResponse {
/// The name of the custom pattern you deleted.
Name: NameString
}
@input
structure DeleteDatabaseRequest {
/// The ID of the Data Catalog in which the database resides. If none is provided, the Amazon Web Services
/// account ID is used by default.
CatalogId: CatalogIdString
/// The name of the database to delete. For Hive compatibility, this must be all
/// lowercase.
@required
Name: NameString
}
@output
structure DeleteDatabaseResponse {}
@input
structure DeleteDataQualityRulesetRequest {
/// A name for the data quality ruleset.
@required
Name: NameString
}
@output
structure DeleteDataQualityRulesetResponse {}
@input
structure DeleteDevEndpointRequest {
/// The name of the DevEndpoint
.
@required
EndpointName: GenericString
}
@output
structure DeleteDevEndpointResponse {}
@input
structure DeleteJobRequest {
/// The name of the job definition to delete.
@required
JobName: NameString
}
@output
structure DeleteJobResponse {
/// The name of the job definition that was deleted.
JobName: NameString
}
@input
structure DeleteMLTransformRequest {
/// The unique identifier of the transform to delete.
@required
TransformId: HashString
}
@output
structure DeleteMLTransformResponse {
/// The unique identifier of the transform that was deleted.
TransformId: HashString
}
@input
structure DeletePartitionIndexRequest {
/// The catalog ID where the table resides.
CatalogId: CatalogIdString
/// Specifies the name of a database from which you want to delete a partition index.
@required
DatabaseName: NameString
/// Specifies the name of a table from which you want to delete a partition index.
@required
TableName: NameString
/// The name of the partition index to be deleted.
@required
IndexName: NameString
}
@output
structure DeletePartitionIndexResponse {}
@input
structure DeletePartitionRequest {
/// The ID of the Data Catalog where the partition to be deleted resides. If none is provided,
/// the Amazon Web Services account ID is used by default.
CatalogId: CatalogIdString
/// The name of the catalog database in which the table in question
/// resides.
@required
DatabaseName: NameString
/// The name of the table that contains the partition to be deleted.
@required
TableName: NameString
/// The values that define the partition.
@required
PartitionValues: ValueStringList
}
@output
structure DeletePartitionResponse {}
@output
structure DeleteRegistryResponse {
/// The name of the registry being deleted.
RegistryName: SchemaRegistryNameString
/// The Amazon Resource Name (ARN) of the registry being deleted.
RegistryArn: GlueResourceArn
/// The status of the registry. A successful operation will return the Deleting
status.
Status: RegistryStatus
}
@input
structure DeleteResourcePolicyRequest {
/// The hash value returned when this policy was set.
PolicyHashCondition: HashString
/// The ARN of the Glue resource for the resource policy to be deleted.
ResourceArn: GlueResourceArn
}
@output
structure DeleteResourcePolicyResponse {}
@output
structure DeleteSchemaResponse {
/// The Amazon Resource Name (ARN) of the schema being deleted.
SchemaArn: GlueResourceArn
/// The name of the schema being deleted.
SchemaName: SchemaRegistryNameString
/// The status of the schema.
Status: SchemaStatus
}
@output
structure DeleteSchemaVersionsResponse {
/// A list of SchemaVersionErrorItem
objects, each containing an error and schema version.
SchemaVersionErrors: SchemaVersionErrorList
}
@input
structure DeleteSecurityConfigurationRequest {
/// The name of the security configuration to delete.
@required
Name: NameString
}
@output
structure DeleteSecurityConfigurationResponse {}
@input
structure DeleteSessionRequest {
/// The ID of the session to be deleted.
@required
Id: NameString
/// The name of the origin of the delete session request.
RequestOrigin: OrchestrationNameString
}
@output
structure DeleteSessionResponse {
/// Returns the ID of the deleted session.
Id: NameString
}
@input
structure DeleteTableRequest {
/// The ID of the Data Catalog where the table resides. If none is provided, the Amazon Web Services account
/// ID is used by default.
CatalogId: CatalogIdString
/// The name of the catalog database in which the table resides. For Hive
/// compatibility, this name is entirely lowercase.
@required
DatabaseName: NameString
/// The name of the table to be deleted. For Hive
/// compatibility, this name is entirely lowercase.
@required
Name: NameString
/// The transaction ID at which to delete the table contents.
TransactionId: TransactionIdString
}
@output
structure DeleteTableResponse {}
@input
structure DeleteTableVersionRequest {
/// The ID of the Data Catalog where the tables reside. If none is provided, the Amazon Web Services account
/// ID is used by default.
CatalogId: CatalogIdString
/// The database in the catalog in which the table resides. For Hive
/// compatibility, this name is entirely lowercase.
@required
DatabaseName: NameString
/// The name of the table. For Hive compatibility,
/// this name is entirely lowercase.
@required
TableName: NameString
/// The ID of the table version to be deleted. A VersionID
is a string representation of an integer. Each version is incremented by 1.
@required
VersionId: VersionString
}
@output
structure DeleteTableVersionResponse {}
@input
structure DeleteTriggerRequest {
/// The name of the trigger to delete.
@required
Name: NameString
}
@output
structure DeleteTriggerResponse {
/// The name of the trigger that was deleted.
Name: NameString
}
@input
structure DeleteUserDefinedFunctionRequest {
/// The ID of the Data Catalog where the function to be deleted is
/// located. If none is supplied, the Amazon Web Services account ID is used by default.
CatalogId: CatalogIdString
/// The name of the catalog database where the function is located.
@required
DatabaseName: NameString
/// The name of the function definition to be deleted.
@required
FunctionName: NameString
}
@output
structure DeleteUserDefinedFunctionResponse {}
@input
structure DeleteWorkflowRequest {
/// Name of the workflow to be deleted.
@required
Name: NameString
}
@output
structure DeleteWorkflowResponse {
/// Name of the workflow specified in input.
Name: NameString
}
/// Specifies a Delta data store to crawl one or more Delta tables.
structure DeltaTarget {
/// A list of the Amazon S3 paths to the Delta tables.
DeltaTables: PathList
/// The name of the connection to use to connect to the Delta table target.
ConnectionName: ConnectionName
/// Specifies whether to write the manifest files to the Delta table path.
WriteManifest: NullableBoolean
/// Specifies whether the crawler will create native tables, to allow integration with query engines that support querying of the Delta transaction log directly.
CreateNativeDeltaTable: NullableBoolean
}
/// A development endpoint where a developer can remotely debug extract, transform, and load
/// (ETL) scripts.
structure DevEndpoint {
/// The name of the DevEndpoint
.
EndpointName: GenericString
/// The Amazon Resource Name (ARN) of the IAM role used in this
/// DevEndpoint
.
RoleArn: RoleArn
/// A list of security group identifiers used in this DevEndpoint
.
SecurityGroupIds: StringList
/// The subnet ID for this DevEndpoint
.
SubnetId: GenericString
/// The YARN endpoint address used by this DevEndpoint
.
YarnEndpointAddress: GenericString
/// A private IP address to access the DevEndpoint
within a VPC if the
/// DevEndpoint
is created within one. The PrivateAddress
field is
/// present only when you create the DevEndpoint
within your VPC.
PrivateAddress: GenericString
/// The Apache Zeppelin port for the remote Apache Spark interpreter.
ZeppelinRemoteSparkInterpreterPort: IntegerValue = 0
/// The public IP address used by this DevEndpoint
. The
/// PublicAddress
field is present only when you create a non-virtual private cloud
/// (VPC) DevEndpoint
.
PublicAddress: GenericString
/// The current status of this DevEndpoint
.
Status: GenericString
/// The type of predefined worker that is allocated to the development endpoint. Accepts a value of Standard, G.1X, or G.2X.
///
/// -
///
For the Standard
worker type, each worker provides 4 vCPU, 16 GB of memory and a 50GB disk, and 2 executors per worker.
///
/// -
///
For the G.1X
worker type, each worker maps to 1 DPU (4 vCPU, 16 GB of memory, 64 GB disk), and provides 1 executor per worker. We recommend this worker type for memory-intensive jobs.
///
/// -
///
For the G.2X
worker type, each worker maps to 2 DPU (8 vCPU, 32 GB of memory, 128 GB disk), and provides 1 executor per worker. We recommend this worker type for memory-intensive jobs.
///
///
/// Known issue: when a development endpoint is created with the G.2X
/// WorkerType
configuration, the Spark drivers for the development endpoint will run on 4 vCPU, 16 GB of memory, and a 64 GB disk.
WorkerType: WorkerType
/// Glue version determines the versions of Apache Spark and Python that Glue supports. The Python version indicates the version supported for running your ETL scripts on development endpoints.
/// For more information about the available Glue versions and corresponding Spark and Python versions, see Glue version in the developer guide.
/// Development endpoints that are created without specifying a Glue version default to Glue 0.9.
/// You can specify a version of Python support for development endpoints by using the Arguments
parameter in the CreateDevEndpoint
or UpdateDevEndpoint
APIs. If no arguments are provided, the version defaults to Python 2.
GlueVersion: GlueVersionString
/// The number of workers of a defined workerType
that are allocated to the development endpoint.
/// The maximum number of workers you can define are 299 for G.1X
, and 149 for G.2X
.
NumberOfWorkers: NullableInteger
/// The number of Glue Data Processing Units (DPUs) allocated to this
/// DevEndpoint
.
NumberOfNodes: IntegerValue = 0
/// The Amazon Web Services Availability Zone where this DevEndpoint
is located.
AvailabilityZone: GenericString
/// The ID of the virtual private cloud (VPC) used by this DevEndpoint
.
VpcId: GenericString
/// The paths to one or more Python libraries in an Amazon S3 bucket that should be loaded in
/// your DevEndpoint
. Multiple values must be complete paths separated by a
/// comma.
///
/// You can only use pure Python libraries with a DevEndpoint
. Libraries that rely on
/// C extensions, such as the pandas Python data
/// analysis library, are not currently supported.
///
ExtraPythonLibsS3Path: GenericString
/// The path to one or more Java .jar
files in an S3 bucket that should be loaded
/// in your DevEndpoint
.
///
/// You can only use pure Java/Scala libraries with a DevEndpoint
.
///
ExtraJarsS3Path: GenericString
/// The reason for a current failure in this DevEndpoint
.
FailureReason: GenericString
/// The status of the last update.
LastUpdateStatus: GenericString
/// The point in time at which this DevEndpoint was created.
CreatedTimestamp: TimestampValue
/// The point in time at which this DevEndpoint
was last modified.
LastModifiedTimestamp: TimestampValue
/// The public key to be used by this DevEndpoint
for authentication. This
/// attribute is provided for backward compatibility because the recommended attribute to use is
/// public keys.
PublicKey: GenericString
/// A list of public keys to be used by the DevEndpoints
for authentication.
/// Using this attribute is preferred over a single public key because the public keys allow you
/// to have a different private key per client.
///
/// If you previously created an endpoint with a public key, you must remove that key to be
/// able to set a list of public keys. Call the UpdateDevEndpoint
API operation
/// with the public key content in the deletePublicKeys
attribute, and the list of
/// new keys in the addPublicKeys
attribute.
///
PublicKeys: PublicKeysList
/// The name of the SecurityConfiguration
structure to be used with this
/// DevEndpoint
.
SecurityConfiguration: NameString
/// A map of arguments used to configure the DevEndpoint
.
/// Valid arguments are:
///
/// -
///
/// "--enable-glue-datacatalog": ""
///
///
///
/// You can specify a version of Python support for development endpoints by using the Arguments
parameter in the CreateDevEndpoint
or UpdateDevEndpoint
APIs. If no arguments are provided, the version defaults to Python 2.
Arguments: MapValue
}
/// Custom libraries to be loaded into a development endpoint.
structure DevEndpointCustomLibraries {
/// The paths to one or more Python libraries in an Amazon Simple Storage Service (Amazon S3)
/// bucket that should be loaded in your DevEndpoint
. Multiple values must be
/// complete paths separated by a comma.
///
/// You can only use pure Python libraries with a DevEndpoint
. Libraries that rely on
/// C extensions, such as the pandas Python data
/// analysis library, are not currently supported.
///
ExtraPythonLibsS3Path: GenericString
/// The path to one or more Java .jar
files in an S3 bucket that should be loaded
/// in your DevEndpoint
.
///
/// You can only use pure Java/Scala libraries with a DevEndpoint
.
///
ExtraJarsS3Path: GenericString
}
/// Specifies the direct JDBC source connection.
structure DirectJDBCSource {
/// The name of the JDBC source connection.
@required
Name: NodeName
/// The database of the JDBC source connection.
@required
Database: EnclosedInStringProperty
/// The table of the JDBC source connection.
@required
Table: EnclosedInStringProperty
/// The connection name of the JDBC source.
@required
ConnectionName: EnclosedInStringProperty
/// The connection type of the JDBC source.
@required
ConnectionType: JDBCConnectionType
/// The temp directory of the JDBC Redshift source.
RedshiftTmpDir: EnclosedInStringProperty
}
/// Specifies an Apache Kafka data store.
structure DirectKafkaSource {
/// The name of the data store.
@required
Name: NodeName
/// Specifies the streaming options.
StreamingOptions: KafkaStreamingSourceOptions
/// The amount of time to spend processing each micro batch.
WindowSize: BoxedPositiveInt
/// Whether to automatically determine the schema from the incoming data.
DetectSchema: BoxedBoolean
/// Specifies options related to data preview for viewing a sample of your data.
DataPreviewOptions: StreamingDataPreviewOptions
}
/// Specifies a direct Amazon Kinesis data source.
structure DirectKinesisSource {
/// The name of the data source.
@required
Name: NodeName
/// The amount of time to spend processing each micro batch.
WindowSize: BoxedPositiveInt
/// Whether to automatically determine the schema from the incoming data.
DetectSchema: BoxedBoolean
/// Additional options for the Kinesis streaming data source.
StreamingOptions: KinesisStreamingSourceOptions
/// Additional options for data preview.
DataPreviewOptions: StreamingDataPreviewOptions
}
/// A policy that specifies update behavior for the crawler.
structure DirectSchemaChangePolicy {
/// Whether to use the specified update behavior when the crawler finds a changed schema.
EnableUpdateCatalog: BoxedBoolean
/// The update behavior when the crawler finds a changed schema.
UpdateBehavior: UpdateCatalogBehavior
/// Specifies the table in the database that the schema change policy applies to.
Table: EnclosedInStringProperty
/// Specifies the database that the schema change policy applies to.
Database: EnclosedInStringProperty
}
/// Defines column statistics supported for floating-point number data columns.
structure DoubleColumnStatisticsData {
/// The lowest value in the column.
MinimumValue: Double = 0
/// The highest value in the column.
MaximumValue: Double = 0
/// The number of null values in the column.
@required
NumberOfNulls: NonNegativeLong = 0
/// The number of distinct values in a column.
@required
NumberOfDistinctValues: NonNegativeLong = 0
}
/// Options to configure how your data quality evaluation results are published.
structure DQResultsPublishingOptions {
/// The context of the evaluation.
EvaluationContext: GenericLimitedString
/// The Amazon S3 prefix prepended to the results.
ResultsS3Prefix: EnclosedInStringProperty
/// Enable metrics for your data quality results.
CloudWatchMetricsEnabled: BoxedBoolean
/// Enable publishing for your data quality results.
ResultsPublishingEnabled: BoxedBoolean
}
/// Options to configure how your job will stop if your data quality evaluation fails.
structure DQStopJobOnFailureOptions {
/// When to stop job if your data quality evaluation fails. Options are Immediate or AfterDataLoad.
StopJobOnFailureTiming: DQStopJobOnFailureTiming
}
/// Specifies a transform that removes rows of repeating data from a data set.
structure DropDuplicates {
/// The name of the transform node.
@required
Name: NodeName
/// The data inputs identified by their node names.
@required
Inputs: OneInput
/// The name of the columns to be merged or removed if repeating.
Columns: LimitedPathList
}
/// Specifies a transform that chooses the data property keys that you want to drop.
structure DropFields {
/// The name of the transform node.
@required
Name: NodeName
/// The data inputs identified by their node names.
@required
Inputs: OneInput
/// A JSON path to a variable in the data structure.
@required
Paths: GlueStudioPathList
}
/// Specifies a transform that removes columns from the dataset if all values in the column are 'null'. By default, Glue Studio will recognize null objects, but some values such as empty strings, strings that are "null", -1 integers or other placeholders such as zeros, are not automatically recognized as nulls.
structure DropNullFields {
/// The name of the transform node.
@required
Name: NodeName
/// The data inputs identified by their node names.
@required
Inputs: OneInput
/// A structure that represents whether certain values are recognized as null values for removal.
NullCheckBoxList: NullCheckBoxList
/// A structure that specifies a list of NullValueField structures that represent a custom null value such as zero or other value being used as a null placeholder unique to the dataset.
/// The DropNullFields
transform removes custom null values only if both the value of the null placeholder and the datatype match the data.
NullTextList: NullValueFields
}
/// Specifies the set of parameters needed to perform the dynamic transform.
structure DynamicTransform {
/// Specifies the name of the dynamic transform.
@required
Name: EnclosedInStringProperty
/// Specifies the name of the dynamic transform as it appears in the Glue Studio visual editor.
@required
TransformName: EnclosedInStringProperty
/// Specifies the inputs for the dynamic transform that are required.
@required
Inputs: OneInput
/// Specifies the parameters of the dynamic transform.
Parameters: TransformConfigParameterList
/// Specifies the name of the function of the dynamic transform.
@required
FunctionName: EnclosedInStringProperty
/// Specifies the path of the dynamic transform source and config files.
@required
Path: EnclosedInStringProperty
/// This field is not used and will be deprecated in future release.
Version: EnclosedInStringProperty
/// Specifies the data schema for the dynamic transform.
OutputSchemas: GlueSchemas
}
/// Specifies a DynamoDB data source in the Glue Data Catalog.
structure DynamoDBCatalogSource {
/// The name of the data source.
@required
Name: NodeName
/// The name of the database to read from.
@required
Database: EnclosedInStringProperty
/// The name of the table in the database to read from.
@required
Table: EnclosedInStringProperty
}
/// Specifies an Amazon DynamoDB table to crawl.
structure DynamoDBTarget {
/// The name of the DynamoDB table to crawl.
Path: Path
/// Indicates whether to scan all the records, or to sample rows from the table. Scanning all the records can take a long time when the table is not a high throughput table.
/// A value of true
means to scan all records, while a value of false
means to sample the records. If no value is specified, the value defaults to true
.
scanAll: NullableBoolean
/// The percentage of the configured read capacity units to use by the Glue crawler. Read capacity units is a term defined by DynamoDB, and is a numeric value that acts as rate limiter for the number of reads that can be performed on that table per second.
/// The valid values are null or a value between 0.1 to 1.5. A null value is used when user does not provide a value, and defaults to 0.5 of the configured Read Capacity Unit (for provisioned tables), or 0.25 of the max configured Read Capacity Unit (for tables using on-demand mode).
scanRate: NullableDouble
}
/// An edge represents a directed connection between two Glue components that are part of the workflow the
/// edge belongs to.
structure Edge {
/// The unique of the node within the workflow where the edge starts.
SourceId: NameString
/// The unique of the node within the workflow where the edge ends.
DestinationId: NameString
}
/// Specifies the encryption-at-rest configuration for the Data Catalog.
structure EncryptionAtRest {
/// The encryption-at-rest mode for encrypting Data Catalog data.
@required
CatalogEncryptionMode: CatalogEncryptionMode
/// The ID of the KMS key to use for encryption at rest.
SseAwsKmsKeyId: NameString
}
/// Specifies an encryption configuration.
structure EncryptionConfiguration {
/// The encryption configuration for Amazon Simple Storage Service (Amazon S3) data.
S3Encryption: S3EncryptionList
/// The encryption configuration for Amazon CloudWatch.
CloudWatchEncryption: CloudWatchEncryption
/// The encryption configuration for job bookmarks.
JobBookmarksEncryption: JobBookmarksEncryption
}
/// A specified entity does not exist
@error("client")
structure EntityNotFoundException {
/// A message describing the problem.
Message: MessageString
/// Indicates whether or not the exception relates to a federated source.
FromFederationSource: NullableBoolean
}
/// Contains details about an error.
structure ErrorDetail {
/// The code associated with this error.
ErrorCode: NameString
/// A message describing the error.
ErrorMessage: DescriptionString
}
/// An object containing error details.
structure ErrorDetails {
/// The error code for an error.
ErrorCode: ErrorCodeString
/// The error message for an error.
ErrorMessage: ErrorMessageString
}
/// Specifies your data quality evaluation criteria.
structure EvaluateDataQuality {
/// The name of the data quality evaluation.
@required
Name: NodeName
/// The inputs of your data quality evaluation.
@required
Inputs: OneInput
/// The ruleset for your data quality evaluation.
@required
Ruleset: DQDLString
/// The output of your data quality evaluation.
Output: DQTransformOutput
/// Options to configure how your results are published.
PublishingOptions: DQResultsPublishingOptions
/// Options to configure how your job will stop if your data quality evaluation fails.
StopJobOnFailureOptions: DQStopJobOnFailureOptions
}
/// Specifies your data quality evaluation criteria.
structure EvaluateDataQualityMultiFrame {
/// The name of the data quality evaluation.
@required
Name: NodeName
/// The inputs of your data quality evaluation. The first input in this list is the primary data source.
@required
Inputs: ManyInputs
/// The aliases of all data sources except primary.
AdditionalDataSources: DQDLAliases
/// The ruleset for your data quality evaluation.
@required
Ruleset: DQDLString
/// Options to configure how your results are published.
PublishingOptions: DQResultsPublishingOptions
/// Options to configure runtime behavior of the transform.
AdditionalOptions: DQAdditionalOptions
/// Options to configure how your job will stop if your data quality evaluation fails.
StopJobOnFailureOptions: DQStopJobOnFailureOptions
}
/// Evaluation metrics provide an estimate of the quality of your machine learning transform.
structure EvaluationMetrics {
/// The type of machine learning transform.
@required
TransformType: TransformType
/// The evaluation metrics for the find matches algorithm.
FindMatchesMetrics: FindMatchesMetrics
}
/// Batch condition that must be met (specified number of events received or batch time window expired)
/// before EventBridge event trigger fires.
structure EventBatchingCondition {
/// Number of events that must be received from Amazon EventBridge before EventBridge event trigger fires.
@required
BatchSize: BatchSize = 0
/// Window of time in seconds after which EventBridge event trigger fires. Window starts when first event is received.
BatchWindow: BatchWindow
}
/// An execution property of a job.
structure ExecutionProperty {
/// The maximum number of concurrent runs allowed for the job.
/// The default is 1. An error is returned when this threshold is reached.
/// The maximum value you can specify is controlled by a service limit.
MaxConcurrentRuns: MaxConcurrentRuns = 0
}
/// Specifies configuration properties for an exporting labels task run.
structure ExportLabelsTaskRunProperties {
/// The Amazon Simple Storage Service (Amazon S3) path where you will export the
/// labels.
OutputS3Path: UriString
}
/// A database that points to an entity outside the Glue Data Catalog.
structure FederatedDatabase {
/// A unique identifier for the federated database.
Identifier: FederationIdentifier
/// The name of the connection to the external metastore.
ConnectionName: NameString
}
/// A federated resource already exists.
@error("client")
structure FederatedResourceAlreadyExistsException {
/// The message describing the problem.
Message: MessageString
/// The associated Glue resource already exists.
AssociatedGlueResource: GlueResourceArn
}
/// A table that points to an entity outside the Glue Data Catalog.
structure FederatedTable {
/// A unique identifier for the federated table.
Identifier: FederationIdentifier
/// A unique identifier for the federated database.
DatabaseIdentifier: FederationIdentifier
/// The name of the connection to the external metastore.
ConnectionName: NameString
}
/// A federation source failed.
@error("client")
structure FederationSourceException {
/// The error code of the problem.
FederationSourceErrorCode: FederationSourceErrorCode
/// The message describing the problem.
Message: MessageString
}
@error("client")
structure FederationSourceRetryableException {
Message: MessageString
}
/// Specifies a transform that locates records in the dataset that have missing values and adds a new field with a value determined by imputation. The input data set is used to train the machine learning model that determines what the missing value should be.
structure FillMissingValues {
/// The name of the transform node.
@required
Name: NodeName
/// The data inputs identified by their node names.
@required
Inputs: OneInput
/// A JSON path to a variable in the data structure for the dataset that is imputed.
@required
ImputedPath: EnclosedInStringProperty
/// A JSON path to a variable in the data structure for the dataset that is filled.
FilledPath: EnclosedInStringProperty
}
/// Specifies a transform that splits a dataset into two, based on a filter condition.
structure Filter {
/// The name of the transform node.
@required
Name: NodeName
/// The data inputs identified by their node names.
@required
Inputs: OneInput
/// The operator used to filter rows by comparing the key value to a specified value.
@required
LogicalOperator: FilterLogicalOperator
/// Specifies a filter expression.
@required
Filters: FilterExpressions
}
/// Specifies a filter expression.
structure FilterExpression {
/// The type of operation to perform in the expression.
@required
Operation: FilterOperation
/// Whether the expression is to be negated.
Negated: BoxedBoolean
/// A list of filter values.
@required
Values: FilterValues
}
/// Represents a single entry in the list of values for a FilterExpression
.
structure FilterValue {
/// The type of filter value.
@required
Type: FilterValueType
/// The value to be associated.
@required
Value: EnclosedInStringProperties
}
/// The evaluation metrics for the find matches algorithm. The quality of your machine
/// learning transform is measured by getting your transform to predict some matches and comparing
/// the results to known matches from the same dataset. The quality metrics are based on a subset
/// of your data, so they are not precise.
structure FindMatchesMetrics {
/// The area under the precision/recall curve (AUPRC) is a single number measuring the overall
/// quality of the transform, that is independent of the choice made for precision vs. recall.
/// Higher values indicate that you have a more attractive precision vs. recall tradeoff.
/// For more information, see Precision and recall in Wikipedia.
AreaUnderPRCurve: GenericBoundedDouble
/// The precision metric indicates when often your transform is correct when it predicts a match. Specifically, it measures how well the transform finds true positives from the total true positives possible.
/// For more information, see Precision and recall in Wikipedia.
Precision: GenericBoundedDouble
/// The recall metric indicates that for an actual match, how often your transform predicts
/// the match. Specifically, it measures how well the transform finds true positives from the
/// total records in the source data.
/// For more information, see Precision and recall in Wikipedia.
Recall: GenericBoundedDouble
/// The maximum F1 metric indicates the transform's accuracy between 0 and 1, where 1 is the best accuracy.
/// For more information, see F1 score in Wikipedia.
F1: GenericBoundedDouble
/// The confusion matrix shows you what your transform is predicting accurately and what types of errors it is making.
/// For more information, see Confusion matrix in Wikipedia.
ConfusionMatrix: ConfusionMatrix
/// A list of ColumnImportance
structures containing column importance metrics, sorted in order of descending importance.
ColumnImportances: ColumnImportanceList
}
/// The parameters to configure the find matches transform.
structure FindMatchesParameters {
/// The name of a column that uniquely identifies rows in the source table. Used to help identify matching records.
PrimaryKeyColumnName: ColumnNameString
/// The value selected when tuning your transform for a balance between precision and recall.
/// A value of 0.5 means no preference; a value of 1.0 means a bias purely for precision, and a
/// value of 0.0 means a bias for recall. Because this is a tradeoff, choosing values close to 1.0
/// means very low recall, and choosing values close to 0.0 results in very low precision.
/// The precision metric indicates how often your model is correct when it predicts a match.
/// The recall metric indicates that for an actual match, how often your model predicts the
/// match.
PrecisionRecallTradeoff: GenericBoundedDouble
/// The value that is selected when tuning your transform for a balance between accuracy and
/// cost. A value of 0.5 means that the system balances accuracy and cost concerns. A value of 1.0
/// means a bias purely for accuracy, which typically results in a higher cost, sometimes
/// substantially higher. A value of 0.0 means a bias purely for cost, which results in a less
/// accurate FindMatches
transform, sometimes with unacceptable accuracy.
/// Accuracy measures how well the transform finds true positives and true negatives. Increasing accuracy requires more machine resources and cost. But it also results in increased recall.
/// Cost measures how many compute resources, and thus money, are consumed to run the
/// transform.
AccuracyCostTradeoff: GenericBoundedDouble
/// The value to switch on or off to force the output to match the provided labels from users. If the value is True
, the find matches
transform forces the output to match the provided labels. The results override the normal conflation results. If the value is False
, the find matches
transform does not ensure all the labels provided are respected, and the results rely on the trained model.
/// Note that setting this value to true may increase the conflation execution time.
EnforceProvidedLabels: NullableBoolean
}
/// Specifies configuration properties for a Find Matches task run.
structure FindMatchesTaskRunProperties {
/// The job ID for the Find Matches task run.
JobId: HashString
/// The name assigned to the job for the Find Matches task run.
JobName: NameString
/// The job run ID for the Find Matches task run.
JobRunId: HashString
}
@input
structure GetBlueprintRequest {
/// The name of the blueprint.
@required
Name: NameString
/// Specifies whether or not to include the blueprint in the response.
IncludeBlueprint: NullableBoolean
/// Specifies whether or not to include the parameter specification.
IncludeParameterSpec: NullableBoolean
}
@output
structure GetBlueprintResponse {
/// Returns a Blueprint
object.
Blueprint: Blueprint
}
@input
structure GetBlueprintRunRequest {
/// The name of the blueprint.
@required
BlueprintName: OrchestrationNameString
/// The run ID for the blueprint run you want to retrieve.
@required
RunId: IdString
}
@output
structure GetBlueprintRunResponse {
/// Returns a BlueprintRun
object.
BlueprintRun: BlueprintRun
}
@input
structure GetBlueprintRunsRequest {
/// The name of the blueprint.
@required
BlueprintName: NameString
/// A continuation token, if this is a continuation request.
NextToken: GenericString
/// The maximum size of a list to return.
MaxResults: PageSize
}
@output
structure GetBlueprintRunsResponse {
/// Returns a list of BlueprintRun
objects.
BlueprintRuns: BlueprintRuns
/// A continuation token, if not all blueprint runs have been returned.
NextToken: GenericString
}
@input
structure GetCatalogImportStatusRequest {
/// The ID of the catalog to migrate. Currently, this should be the Amazon Web Services account ID.
CatalogId: CatalogIdString
}
@output
structure GetCatalogImportStatusResponse {
/// The status of the specified catalog migration.
ImportStatus: CatalogImportStatus
}
@input
structure GetClassifierRequest {
/// Name of the classifier to retrieve.
@required
Name: NameString
}
@output
structure GetClassifierResponse {
/// The requested classifier.
Classifier: Classifier
}
@input
structure GetClassifiersRequest {
/// The size of the list to return (optional).
MaxResults: PageSize
/// An optional continuation token.
NextToken: Token
}
@output
structure GetClassifiersResponse {
/// The requested list of classifier
/// objects.
Classifiers: ClassifierList
/// A continuation token.
NextToken: Token
}
@input
structure GetColumnStatisticsForPartitionRequest {
/// The ID of the Data Catalog where the partitions in question reside.
/// If none is supplied, the Amazon Web Services account ID is used by default.
CatalogId: CatalogIdString
/// The name of the catalog database where the partitions reside.
@required
DatabaseName: NameString
/// The name of the partitions' table.
@required
TableName: NameString
/// A list of partition values identifying the partition.
@required
PartitionValues: ValueStringList
/// A list of the column names.
@required
ColumnNames: GetColumnNamesList
}
@output
structure GetColumnStatisticsForPartitionResponse {
/// List of ColumnStatistics that failed to be retrieved.
ColumnStatisticsList: ColumnStatisticsList
/// Error occurred during retrieving column statistics data.
Errors: ColumnErrors
}
@input
structure GetColumnStatisticsForTableRequest {
/// The ID of the Data Catalog where the partitions in question reside.
/// If none is supplied, the Amazon Web Services account ID is used by default.
CatalogId: CatalogIdString
/// The name of the catalog database where the partitions reside.
@required
DatabaseName: NameString
/// The name of the partitions' table.
@required
TableName: NameString
/// A list of the column names.
@required
ColumnNames: GetColumnNamesList
}
@output
structure GetColumnStatisticsForTableResponse {
/// List of ColumnStatistics.
ColumnStatisticsList: ColumnStatisticsList
/// List of ColumnStatistics that failed to be retrieved.
Errors: ColumnErrors
}
@input
structure GetConnectionRequest {
/// The ID of the Data Catalog in which the connection resides. If none is provided, the Amazon Web Services
/// account ID is used by default.
CatalogId: CatalogIdString
/// The name of the connection definition to retrieve.
@required
Name: NameString
/// Allows you to retrieve the connection metadata without returning the password. For
/// instance, the Glue console uses this flag to retrieve the connection, and does not display
/// the password. Set this parameter when the caller might not have permission to use the KMS
/// key to decrypt the password, but it does have permission to access the rest of the connection
/// properties.
HidePassword: Boolean = false
}
@output
structure GetConnectionResponse {
/// The requested connection definition.
Connection: Connection
}
/// Filters the connection definitions that are returned by the GetConnections
/// API operation.
structure GetConnectionsFilter {
/// A criteria string that must match the criteria recorded in the
/// connection definition for that connection definition to be returned.
MatchCriteria: MatchCriteria
/// The type of connections to return. Currently, SFTP is not supported.
ConnectionType: ConnectionType
}
@input
structure GetConnectionsRequest {
/// The ID of the Data Catalog in which the connections reside. If none is provided, the Amazon Web Services
/// account ID is used by default.
CatalogId: CatalogIdString
/// A filter that controls which connections are returned.
Filter: GetConnectionsFilter
/// Allows you to retrieve the connection metadata without returning the password. For
/// instance, the Glue console uses this flag to retrieve the connection, and does not display
/// the password. Set this parameter when the caller might not have permission to use the KMS
/// key to decrypt the password, but it does have permission to access the rest of the connection
/// properties.
HidePassword: Boolean = false
/// A continuation token, if this is a continuation call.
NextToken: Token
/// The maximum number of connections to return in one response.
MaxResults: PageSize
}
@output
structure GetConnectionsResponse {
/// A list of requested connection definitions.
ConnectionList: ConnectionList
/// A continuation token, if the list of connections returned does not
/// include the last of the filtered connections.
NextToken: Token
}
@input
structure GetCrawlerMetricsRequest {
/// A list of the names of crawlers about which to retrieve metrics.
CrawlerNameList: CrawlerNameList
/// The maximum size of a list to return.
MaxResults: PageSize
/// A continuation token, if this is a continuation call.
NextToken: Token
}
@output
structure GetCrawlerMetricsResponse {
/// A list of metrics for the specified crawler.
CrawlerMetricsList: CrawlerMetricsList
/// A continuation token, if the returned list does not contain the
/// last metric available.
NextToken: Token
}
@input
structure GetCrawlerRequest {
/// The name of the crawler to retrieve metadata for.
@required
Name: NameString
}
@output
structure GetCrawlerResponse {
/// The metadata for the specified crawler.
Crawler: Crawler
}
@input
structure GetCrawlersRequest {
/// The number of crawlers to return on each call.
MaxResults: PageSize
/// A continuation token, if this is a continuation request.
NextToken: Token
}
@output
structure GetCrawlersResponse {
/// A list of crawler metadata.
Crawlers: CrawlerList
/// A continuation token, if the returned list has not reached the end
/// of those defined in this customer account.
NextToken: Token
}
@input
structure GetCustomEntityTypeRequest {
/// The name of the custom pattern that you want to retrieve.
@required
Name: NameString
}
@output
structure GetCustomEntityTypeResponse {
/// The name of the custom pattern that you retrieved.
Name: NameString
/// A regular expression string that is used for detecting sensitive data in a custom pattern.
RegexString: NameString
/// A list of context words if specified when you created the custom pattern. If none of these context words are found within the vicinity of the regular expression the data will not be detected as sensitive data.
ContextWords: ContextWords
}
@input
structure GetDatabaseRequest {
/// The ID of the Data Catalog in which the database resides. If none is provided, the Amazon Web Services
/// account ID is used by default.
CatalogId: CatalogIdString
/// The name of the database to retrieve. For Hive compatibility, this
/// should be all lowercase.
@required
Name: NameString
}
@output
structure GetDatabaseResponse {
/// The definition of the specified database in the Data Catalog.
Database: Database
}
@input
structure GetDatabasesRequest {
/// The ID of the Data Catalog from which to retrieve Databases
. If none is
/// provided, the Amazon Web Services account ID is used by default.
CatalogId: CatalogIdString
/// A continuation token, if this is a continuation call.
NextToken: Token
/// The maximum number of databases to return in one response.
MaxResults: CatalogGetterPageSize
/// Allows you to specify that you want to list the databases shared with your account. The allowable values are FEDERATED
, FOREIGN
or ALL
.
///
/// -
///
If set to FEDERATED
, will list the federated databases (referencing an external entity) shared with your account.
///
/// -
///
If set to FOREIGN
, will list the databases shared with your account.
///
/// -
///
If set to ALL
, will list the databases shared with your account, as well as the databases in yor local account.
///
///
ResourceShareType: ResourceShareType
}
@output
structure GetDatabasesResponse {
/// A list of Database
objects from the specified catalog.
@required
DatabaseList: DatabaseList
/// A continuation token for paginating the returned list of tokens,
/// returned if the current segment of the list is not the last.
NextToken: Token
}
@input
structure GetDataCatalogEncryptionSettingsRequest {
/// The ID of the Data Catalog to retrieve the security configuration for. If none is
/// provided, the Amazon Web Services account ID is used by default.
CatalogId: CatalogIdString
}
@output
structure GetDataCatalogEncryptionSettingsResponse {
/// The requested security configuration.
DataCatalogEncryptionSettings: DataCatalogEncryptionSettings
}
@input
structure GetDataflowGraphRequest {
/// The Python script to transform.
PythonScript: PythonScript
}
@output
structure GetDataflowGraphResponse {
/// A list of the nodes in the resulting DAG.
DagNodes: DagNodes
/// A list of the edges in the resulting DAG.
DagEdges: DagEdges
}
@input
structure GetDataQualityResultRequest {
/// A unique result ID for the data quality result.
@required
ResultId: HashString
}
@output
structure GetDataQualityResultResponse {
/// A unique result ID for the data quality result.
ResultId: HashString
/// An aggregate data quality score. Represents the ratio of rules that passed to the total number of rules.
Score: GenericBoundedDouble
/// The table associated with the data quality result, if any.
DataSource: DataSource
/// The name of the ruleset associated with the data quality result.
RulesetName: NameString
/// In the context of a job in Glue Studio, each node in the canvas is typically assigned some sort of name and data quality nodes will have names. In the case of multiple nodes, the evaluationContext
can differentiate the nodes.
EvaluationContext: GenericString
/// The date and time when the run for this data quality result started.
StartedOn: Timestamp
/// The date and time when the run for this data quality result was completed.
CompletedOn: Timestamp
/// The job name associated with the data quality result, if any.
JobName: NameString
/// The job run ID associated with the data quality result, if any.
JobRunId: HashString
/// The unique run ID associated with the ruleset evaluation.
RulesetEvaluationRunId: HashString
/// A list of DataQualityRuleResult
objects representing the results for each rule.
RuleResults: DataQualityRuleResults
}
@input
structure GetDataQualityRuleRecommendationRunRequest {
/// The unique run identifier associated with this run.
@required
RunId: HashString
}
@output
structure GetDataQualityRuleRecommendationRunResponse {
/// The unique run identifier associated with this run.
RunId: HashString
/// The data source (an Glue table) associated with this run.
DataSource: DataSource
/// An IAM role supplied to encrypt the results of the run.
Role: RoleString
/// The number of G.1X
workers to be used in the run. The default is 5.
NumberOfWorkers: NullableInteger
/// The timeout for a run in minutes. This is the maximum time that a run can consume resources before it is terminated and enters TIMEOUT
status. The default is 2,880 minutes (48 hours).
Timeout: Timeout
/// The status for this run.
Status: TaskStatusType
/// The error strings that are associated with the run.
ErrorString: GenericString
/// The date and time when this run started.
StartedOn: Timestamp
/// A timestamp. The last point in time when this data quality rule recommendation run was modified.
LastModifiedOn: Timestamp
/// The date and time when this run was completed.
CompletedOn: Timestamp
/// The amount of time (in seconds) that the run consumed resources.
ExecutionTime: ExecutionTime = 0
/// When a start rule recommendation run completes, it creates a recommended ruleset (a set of rules). This member has those rules in Data Quality Definition Language (DQDL) format.
RecommendedRuleset: DataQualityRulesetString
/// The name of the ruleset that was created by the run.
CreatedRulesetName: NameString
}
@input
structure GetDataQualityRulesetEvaluationRunRequest {
/// The unique run identifier associated with this run.
@required
RunId: HashString
}
@output
structure GetDataQualityRulesetEvaluationRunResponse {
/// The unique run identifier associated with this run.
RunId: HashString
/// The data source (an Glue table) associated with this evaluation run.
DataSource: DataSource
/// An IAM role supplied to encrypt the results of the run.
Role: RoleString
/// The number of G.1X
workers to be used in the run. The default is 5.
NumberOfWorkers: NullableInteger
/// The timeout for a run in minutes. This is the maximum time that a run can consume resources before it is terminated and enters TIMEOUT
status. The default is 2,880 minutes (48 hours).
Timeout: Timeout
/// Additional run options you can specify for an evaluation run.
AdditionalRunOptions: DataQualityEvaluationRunAdditionalRunOptions
/// The status for this run.
Status: TaskStatusType
/// The error strings that are associated with the run.
ErrorString: GenericString
/// The date and time when this run started.
StartedOn: Timestamp
/// A timestamp. The last point in time when this data quality rule recommendation run was modified.
LastModifiedOn: Timestamp
/// The date and time when this run was completed.
CompletedOn: Timestamp
/// The amount of time (in seconds) that the run consumed resources.
ExecutionTime: ExecutionTime = 0
/// A list of ruleset names for the run.
RulesetNames: RulesetNames
/// A list of result IDs for the data quality results for the run.
ResultIds: DataQualityResultIdList
/// A map of reference strings to additional data sources you can specify for an evaluation run.
AdditionalDataSources: DataSourceMap
}
@input
structure GetDataQualityRulesetRequest {
/// The name of the ruleset.
@required
Name: NameString
}
@output
structure GetDataQualityRulesetResponse {
/// The name of the ruleset.
Name: NameString
/// A description of the ruleset.
Description: DescriptionString
/// A Data Quality Definition Language (DQDL) ruleset. For more information, see the Glue developer guide.
Ruleset: DataQualityRulesetString
/// The name and database name of the target table.
TargetTable: DataQualityTargetTable
/// A timestamp. The time and date that this data quality ruleset was created.
CreatedOn: Timestamp
/// A timestamp. The last point in time when this data quality ruleset was modified.
LastModifiedOn: Timestamp
/// When a ruleset was created from a recommendation run, this run ID is generated to link the two together.
RecommendationRunId: HashString
}
@input
structure GetDevEndpointRequest {
/// Name of the DevEndpoint
to retrieve information for.
@required
EndpointName: GenericString
}
@output
structure GetDevEndpointResponse {
/// A DevEndpoint
definition.
DevEndpoint: DevEndpoint
}
@input
structure GetDevEndpointsRequest {
/// The maximum size of information to return.
MaxResults: PageSize
/// A continuation token, if this is a continuation call.
NextToken: GenericString
}
@output
structure GetDevEndpointsResponse {
/// A list of DevEndpoint
definitions.
DevEndpoints: DevEndpointList
/// A continuation token, if not all DevEndpoint
definitions have yet been
/// returned.
NextToken: GenericString
}
@input
structure GetJobBookmarkRequest {
/// The name of the job in question.
@required
JobName: JobName
/// The unique run identifier associated with this job run.
RunId: RunId
}
@output
structure GetJobBookmarkResponse {
/// A structure that defines a point that a job can resume processing.
JobBookmarkEntry: JobBookmarkEntry
}
@input
structure GetJobRequest {
/// The name of the job definition to retrieve.
@required
JobName: NameString
}
@output
structure GetJobResponse {
/// The requested job definition.
Job: Job
}
@input
structure GetJobRunRequest {
/// Name of the job definition being run.
@required
JobName: NameString
/// The ID of the job run.
@required
RunId: IdString
/// True if a list of predecessor runs should be returned.
PredecessorsIncluded: BooleanValue = false
}
@output
structure GetJobRunResponse {
/// The requested job-run metadata.
JobRun: JobRun
}
@input
structure GetJobRunsRequest {
/// The name of the job definition for which to retrieve all job runs.
@required
JobName: NameString
/// A continuation token, if this is a continuation call.
NextToken: GenericString
/// The maximum size of the response.
MaxResults: PageSize
}
@output
structure GetJobRunsResponse {
/// A list of job-run metadata objects.
JobRuns: JobRunList
/// A continuation token, if not all requested job runs have been returned.
NextToken: GenericString
}
@input
structure GetJobsRequest {
/// A continuation token, if this is a continuation call.
NextToken: GenericString
/// The maximum size of the response.
MaxResults: PageSize
}
@output
structure GetJobsResponse {
/// A list of job definitions.
Jobs: JobList
/// A continuation token, if not all job definitions have yet been returned.
NextToken: GenericString
}
@input
structure GetMappingRequest {
/// Specifies the source table.
@required
Source: CatalogEntry
/// A list of target tables.
Sinks: CatalogEntries
/// Parameters for the mapping.
Location: Location
}
@output
structure GetMappingResponse {
/// A list of mappings to the specified targets.
@required
Mapping: MappingList
}
@input
structure GetMLTaskRunRequest {
/// The unique identifier of the machine learning transform.
@required
TransformId: HashString
/// The unique identifier of the task run.
@required
TaskRunId: HashString
}
@output
structure GetMLTaskRunResponse {
/// The unique identifier of the task run.
TransformId: HashString
/// The unique run identifier associated with this run.
TaskRunId: HashString
/// The status for this task run.
Status: TaskStatusType
/// The names of the log groups that are associated with the task run.
LogGroupName: GenericString
/// The list of properties that are associated with the task run.
Properties: TaskRunProperties
/// The error strings that are associated with the task run.
ErrorString: GenericString
/// The date and time when this task run started.
StartedOn: Timestamp
/// The date and time when this task run was last modified.
LastModifiedOn: Timestamp
/// The date and time when this task run was completed.
CompletedOn: Timestamp
/// The amount of time (in seconds) that the task run consumed resources.
ExecutionTime: ExecutionTime = 0
}
@input
structure GetMLTaskRunsRequest {
/// The unique identifier of the machine learning transform.
@required
TransformId: HashString
/// A token for pagination of the results. The default is empty.
NextToken: PaginationToken
/// The maximum number of results to return.
MaxResults: PageSize
/// The filter criteria, in the TaskRunFilterCriteria
structure, for the task run.
Filter: TaskRunFilterCriteria
/// The sorting criteria, in the TaskRunSortCriteria
structure, for the task run.
Sort: TaskRunSortCriteria
}
@output
structure GetMLTaskRunsResponse {
/// A list of task runs that are associated with the transform.
TaskRuns: TaskRunList
/// A pagination token, if more results are available.
NextToken: PaginationToken
}
@input
structure GetMLTransformRequest {
/// The unique identifier of the transform, generated at the time that the transform was
/// created.
@required
TransformId: HashString
}
@output
structure GetMLTransformResponse {
/// The unique identifier of the transform, generated at the time that the transform was
/// created.
TransformId: HashString
/// The unique name given to the transform when it was created.
Name: NameString
/// A description of the transform.
Description: DescriptionString
/// The last known status of the transform (to indicate whether it can be used or not). One of "NOT_READY", "READY", or "DELETING".
Status: TransformStatusType
/// The date and time when the transform was created.
CreatedOn: Timestamp
/// The date and time when the transform was last modified.
LastModifiedOn: Timestamp
/// A list of Glue table definitions used by the transform.
InputRecordTables: GlueTables
/// The configuration parameters that are specific to the algorithm used.
Parameters: TransformParameters
/// The latest evaluation metrics.
EvaluationMetrics: EvaluationMetrics
/// The number of labels available for this transform.
LabelCount: LabelCount = 0
/// The Map
object that represents the schema that this
/// transform accepts. Has an upper bound of 100 columns.
Schema: TransformSchema
/// The name or Amazon Resource Name (ARN) of the IAM role with the required
/// permissions.
Role: RoleString
/// This value determines which version of Glue this machine learning transform is compatible with. Glue 1.0 is recommended for most customers. If the value is not set, the Glue compatibility defaults to Glue 0.9. For more information, see Glue Versions in the developer guide.
GlueVersion: GlueVersionString
/// The number of Glue data processing units (DPUs) that are allocated to task runs for this transform. You can allocate from 2 to 100 DPUs; the default is 10. A DPU is a relative measure of
/// processing power that consists of 4 vCPUs of compute capacity and 16 GB of memory. For more
/// information, see the Glue pricing
/// page.
/// When the WorkerType
field is set to a value other than Standard
, the MaxCapacity
field is set automatically and becomes read-only.
MaxCapacity: NullableDouble
/// The type of predefined worker that is allocated when this task runs. Accepts a value of Standard, G.1X, or G.2X.
///
/// -
///
For the Standard
worker type, each worker provides 4 vCPU, 16 GB of memory and a 50GB disk, and 2 executors per worker.
///
/// -
///
For the G.1X
worker type, each worker provides 4 vCPU, 16 GB of memory and a 64GB disk, and 1 executor per worker.
///
/// -
///
For the G.2X
worker type, each worker provides 8 vCPU, 32 GB of memory and a 128GB disk, and 1 executor per worker.
///
///
WorkerType: WorkerType
/// The number of workers of a defined workerType
that are allocated when this task runs.
NumberOfWorkers: NullableInteger
/// The timeout for a task run for this transform in minutes. This is the maximum time that a task run for this transform can consume resources before it is terminated and enters TIMEOUT
status. The default is 2,880 minutes (48 hours).
Timeout: Timeout
/// The maximum number of times to retry a task for this transform after a task run fails.
MaxRetries: NullableInteger
/// The encryption-at-rest settings of the transform that apply to accessing user data. Machine learning transforms can access user data encrypted in Amazon S3 using KMS.
TransformEncryption: TransformEncryption
}
@input
structure GetMLTransformsRequest {
/// A paginated token to offset the results.
NextToken: PaginationToken
/// The maximum number of results to return.
MaxResults: PageSize
/// The filter transformation criteria.
Filter: TransformFilterCriteria
/// The sorting criteria.
Sort: TransformSortCriteria
}
@output
structure GetMLTransformsResponse {
/// A list of machine learning transforms.
@required
Transforms: TransformList
/// A pagination token, if more results are available.
NextToken: PaginationToken
}
@input
structure GetPartitionIndexesRequest {
/// The catalog ID where the table resides.
CatalogId: CatalogIdString
/// Specifies the name of a database from which you want to retrieve partition indexes.
@required
DatabaseName: NameString
/// Specifies the name of a table for which you want to retrieve the partition indexes.
@required
TableName: NameString
/// A continuation token, included if this is a continuation call.
NextToken: Token
}
@output
structure GetPartitionIndexesResponse {
/// A list of index descriptors.
PartitionIndexDescriptorList: PartitionIndexDescriptorList
/// A continuation token, present if the current list segment is not the last.
NextToken: Token
}
@input
structure GetPartitionRequest {
/// The ID of the Data Catalog where the partition in question resides. If none is provided,
/// the Amazon Web Services account ID is used by default.
CatalogId: CatalogIdString
/// The name of the catalog database where the partition resides.
@required
DatabaseName: NameString
/// The name of the partition's table.
@required
TableName: NameString
/// The values that define the partition.
@required
PartitionValues: ValueStringList
}
@output
structure GetPartitionResponse {
/// The requested information, in the form of a Partition
/// object.
Partition: Partition
}
@input
structure GetPartitionsRequest {
/// The ID of the Data Catalog where the partitions in question reside. If none is provided,
/// the Amazon Web Services account ID is used by default.
CatalogId: CatalogIdString
/// The name of the catalog database where the partitions reside.
@required
DatabaseName: NameString
/// The name of the partitions' table.
@required
TableName: NameString
/// An expression that filters the partitions to be returned.
/// The expression uses SQL syntax similar to the SQL WHERE
filter clause. The
/// SQL statement parser JSQLParser parses the expression.
///
/// Operators: The following are the operators that you can use in the
/// Expression
API call:
///
/// - =
/// -
///
Checks whether the values of the two operands are equal; if yes, then the condition becomes
/// true.
/// Example: Assume 'variable a' holds 10 and 'variable b' holds 20.
/// (a = b) is not true.
///
/// - < >
/// -
///
Checks whether the values of two operands are equal; if the values are not equal,
/// then the condition becomes true.
/// Example: (a < > b) is true.
///
/// - >
/// -
///
Checks whether the value of the left operand is greater than the value of the right
/// operand; if yes, then the condition becomes true.
/// Example: (a > b) is not true.
///
/// - <
/// -
///
Checks whether the value of the left operand is less than the value of the right
/// operand; if yes, then the condition becomes true.
/// Example: (a < b) is true.
///
/// - >=
/// -
///
Checks whether the value of the left operand is greater than or equal to the value
/// of the right operand; if yes, then the condition becomes true.
/// Example: (a >= b) is not true.
///
/// - <=
/// -
///
Checks whether the value of the left operand is less than or equal to the value of
/// the right operand; if yes, then the condition becomes true.
/// Example: (a <= b) is true.
///
/// - AND, OR, IN, BETWEEN, LIKE, NOT, IS NULL
/// -
///
Logical operators.
///
///
///
/// Supported Partition Key Types: The following are the supported
/// partition keys.
///
/// -
///
/// string
///
///
/// -
///
/// date
///
///
/// -
///
/// timestamp
///
///
/// -
///
/// int
///
///
/// -
///
/// bigint
///
///
/// -
///
/// long
///
///
/// -
///
/// tinyint
///
///
/// -
///
/// smallint
///
///
/// -
///
/// decimal
///
///
///
/// If an type is encountered that is not valid, an exception is thrown.
/// The following list shows the valid operators on each type. When you define a crawler, the
/// partitionKey
type is created as a STRING
, to be compatible with the catalog
/// partitions.
///
/// Sample API Call:
Expression: PredicateString
/// A continuation token, if this is not the first call to retrieve
/// these partitions.
NextToken: Token
/// The segment of the table's partitions to scan in this request.
Segment: Segment
/// The maximum number of partitions to return in a single response.
MaxResults: PageSize
/// When true, specifies not returning the partition column schema. Useful when you are interested only in other partition attributes such as partition values or location. This approach avoids the problem of a large response by not returning duplicate data.
ExcludeColumnSchema: BooleanNullable
/// The transaction ID at which to read the partition contents.
TransactionId: TransactionIdString
/// The time as of when to read the partition contents. If not set, the most recent transaction commit time will be used. Cannot be specified along with TransactionId
.
QueryAsOfTime: Timestamp
}
@output
structure GetPartitionsResponse {
/// A list of requested partitions.
Partitions: PartitionList
/// A continuation token, if the returned list of partitions does not include the last
/// one.
NextToken: Token
}
@input
structure GetPlanRequest {
/// The list of mappings from a source table to target tables.
@required
Mapping: MappingList
/// The source table.
@required
Source: CatalogEntry
/// The target tables.
Sinks: CatalogEntries
/// The parameters for the mapping.
Location: Location
/// The programming language of the code to perform the mapping.
Language: Language
/// A map to hold additional optional key-value parameters.
/// Currently, these key-value pairs are supported:
///
/// -
///
/// inferSchema
— Specifies whether to set inferSchema
to true or false for the default script generated by an Glue job. For example, to set inferSchema
to true, pass the following key value pair:
///
/// --additional-plan-options-map '{"inferSchema":"true"}'
///
///
///
AdditionalPlanOptionsMap: AdditionalPlanOptionsMap
}
@output
structure GetPlanResponse {
/// A Python script to perform the mapping.
PythonScript: PythonScript
/// The Scala code to perform the mapping.
ScalaCode: ScalaCode
}
@output
structure GetRegistryResponse {
/// The name of the registry.
RegistryName: SchemaRegistryNameString
/// The Amazon Resource Name (ARN) of the registry.
RegistryArn: GlueResourceArn
/// A description of the registry.
Description: DescriptionString
/// The status of the registry.
Status: RegistryStatus
/// The date and time the registry was created.
CreatedTime: CreatedTimestamp
/// The date and time the registry was updated.
UpdatedTime: UpdatedTimestamp
}
@input
structure GetResourcePoliciesRequest {
/// A continuation token, if this is a continuation request.
NextToken: Token
/// The maximum size of a list to return.
MaxResults: PageSize
}
@output
structure GetResourcePoliciesResponse {
/// A list of the individual resource policies and the account-level resource policy.
GetResourcePoliciesResponseList: GetResourcePoliciesResponseList
/// A continuation token, if the returned list does not contain the last resource policy available.
NextToken: Token
}
@input
structure GetResourcePolicyRequest {
/// The ARN of the Glue resource for which to retrieve the resource policy. If not
/// supplied, the Data Catalog resource policy is returned. Use GetResourcePolicies
/// to view all existing resource policies. For more information see Specifying Glue Resource ARNs.
///
ResourceArn: GlueResourceArn
}
@output
structure GetResourcePolicyResponse {
/// Contains the requested policy document, in JSON format.
PolicyInJson: PolicyJsonString
/// Contains the hash value associated with this policy.
PolicyHash: HashString
/// The date and time at which the policy was created.
CreateTime: Timestamp
/// The date and time at which the policy was last updated.
UpdateTime: Timestamp
}
@output
structure GetSchemaByDefinitionResponse {
/// The schema ID of the schema version.
SchemaVersionId: SchemaVersionIdString
/// The Amazon Resource Name (ARN) of the schema.
SchemaArn: GlueResourceArn
/// The data format of the schema definition. Currently AVRO
, JSON
and PROTOBUF
are supported.
DataFormat: DataFormat
/// The status of the schema version.
Status: SchemaVersionStatus
/// The date and time the schema was created.
CreatedTime: CreatedTimestamp
}
@output
structure GetSchemaResponse {
/// The name of the registry.
RegistryName: SchemaRegistryNameString
/// The Amazon Resource Name (ARN) of the registry.
RegistryArn: GlueResourceArn
/// The name of the schema.
SchemaName: SchemaRegistryNameString
/// The Amazon Resource Name (ARN) of the schema.
SchemaArn: GlueResourceArn
/// A description of schema if specified when created
Description: DescriptionString
/// The data format of the schema definition. Currently AVRO
, JSON
and PROTOBUF
are supported.
DataFormat: DataFormat
/// The compatibility mode of the schema.
Compatibility: Compatibility
/// The version number of the checkpoint (the last time the compatibility mode was changed).
SchemaCheckpoint: SchemaCheckpointNumber = 0
/// The latest version of the schema associated with the returned schema definition.
LatestSchemaVersion: VersionLongNumber = 0
/// The next version of the schema associated with the returned schema definition.
NextSchemaVersion: VersionLongNumber = 0
/// The status of the schema.
SchemaStatus: SchemaStatus
/// The date and time the schema was created.
CreatedTime: CreatedTimestamp
/// The date and time the schema was updated.
UpdatedTime: UpdatedTimestamp
}
@output
structure GetSchemaVersionResponse {
/// The SchemaVersionId
of the schema version.
SchemaVersionId: SchemaVersionIdString
/// The schema definition for the schema ID.
SchemaDefinition: SchemaDefinitionString
/// The data format of the schema definition. Currently AVRO
, JSON
and PROTOBUF
are supported.
DataFormat: DataFormat
/// The Amazon Resource Name (ARN) of the schema.
SchemaArn: GlueResourceArn
/// The version number of the schema.
VersionNumber: VersionLongNumber = 0
/// The status of the schema version.
Status: SchemaVersionStatus
/// The date and time the schema version was created.
CreatedTime: CreatedTimestamp
}
@output
structure GetSchemaVersionsDiffResponse {
/// The difference between schemas as a string in JsonPatch format.
Diff: SchemaDefinitionDiff
}
@input
structure GetSecurityConfigurationRequest {
/// The name of the security configuration to retrieve.
@required
Name: NameString
}
@output
structure GetSecurityConfigurationResponse {
/// The requested security configuration.
SecurityConfiguration: SecurityConfiguration
}
@input
structure GetSecurityConfigurationsRequest {
/// The maximum number of results to return.
MaxResults: PageSize
/// A continuation token, if this is a continuation call.
NextToken: GenericString
}
@output
structure GetSecurityConfigurationsResponse {
/// A list of security configurations.
SecurityConfigurations: SecurityConfigurationList
/// A continuation token, if there are more security
/// configurations to return.
NextToken: GenericString
}
@input
structure GetSessionRequest {
/// The ID of the session.
@required
Id: NameString
/// The origin of the request.
RequestOrigin: OrchestrationNameString
}
@output
structure GetSessionResponse {
/// The session object is returned in the response.
Session: Session
}
@input
structure GetStatementRequest {
/// The Session ID of the statement.
@required
SessionId: NameString
/// The Id of the statement.
@required
Id: IntegerValue = 0
/// The origin of the request.
RequestOrigin: OrchestrationNameString
}
@output
structure GetStatementResponse {
/// Returns the statement.
Statement: Statement
}
@input
structure GetTableRequest {
/// The ID of the Data Catalog where the table resides. If none is provided, the Amazon Web Services account
/// ID is used by default.
CatalogId: CatalogIdString
/// The name of the database in the catalog in which the table resides.
/// For Hive compatibility, this name is entirely lowercase.
@required
DatabaseName: NameString
/// The name of the table for which to retrieve the definition. For Hive
/// compatibility, this name is entirely lowercase.
@required
Name: NameString
/// The transaction ID at which to read the table contents.
TransactionId: TransactionIdString
/// The time as of when to read the table contents. If not set, the most recent transaction commit time will be used. Cannot be specified along with TransactionId
.
QueryAsOfTime: Timestamp
}
@output
structure GetTableResponse {
/// The Table
object that defines the specified table.
Table: Table
}
@input
structure GetTablesRequest {
/// The ID of the Data Catalog where the tables reside. If none is provided, the Amazon Web Services account
/// ID is used by default.
CatalogId: CatalogIdString
/// The database in the catalog whose tables to list. For Hive
/// compatibility, this name is entirely lowercase.
@required
DatabaseName: NameString
/// A regular expression pattern. If present, only those tables
/// whose names match the pattern are returned.
Expression: FilterString
/// A continuation token, included if this is a continuation call.
NextToken: Token
/// The maximum number of tables to return in a single response.
MaxResults: CatalogGetterPageSize
/// The transaction ID at which to read the table contents.
TransactionId: TransactionIdString
/// The time as of when to read the table contents. If not set, the most recent transaction commit time will be used. Cannot be specified along with TransactionId
.
QueryAsOfTime: Timestamp
}
@output
structure GetTablesResponse {
/// A list of the requested Table
objects.
TableList: TableList
/// A continuation token, present if the current list segment is
/// not the last.
NextToken: Token
}
@input
structure GetTableVersionRequest {
/// The ID of the Data Catalog where the tables reside. If none is provided, the Amazon Web Services account
/// ID is used by default.
CatalogId: CatalogIdString
/// The database in the catalog in which the table resides. For Hive
/// compatibility, this name is entirely lowercase.
@required
DatabaseName: NameString
/// The name of the table. For Hive compatibility,
/// this name is entirely lowercase.
@required
TableName: NameString
/// The ID value of the table version to be retrieved. A VersionID
is a string representation of an integer. Each version is incremented by 1.
VersionId: VersionString
}
@output
structure GetTableVersionResponse {
/// The requested table version.
TableVersion: TableVersion
}
@input
structure GetTableVersionsRequest {
/// The ID of the Data Catalog where the tables reside. If none is provided, the Amazon Web Services account
/// ID is used by default.
CatalogId: CatalogIdString
/// The database in the catalog in which the table resides. For Hive
/// compatibility, this name is entirely lowercase.
@required
DatabaseName: NameString
/// The name of the table. For Hive
/// compatibility, this name is entirely lowercase.
@required
TableName: NameString
/// A continuation token, if this is not the first call.
NextToken: Token
/// The maximum number of table versions to return in one response.
MaxResults: CatalogGetterPageSize
}
@output
structure GetTableVersionsResponse {
/// A list of strings identifying available versions of the
/// specified table.
TableVersions: GetTableVersionsList
/// A continuation token, if the list of available versions does
/// not include the last one.
NextToken: Token
}
@input
structure GetTagsRequest {
/// The Amazon Resource Name (ARN) of the resource for which to retrieve tags.
@required
ResourceArn: GlueResourceArn
}
@output
structure GetTagsResponse {
/// The requested tags.
Tags: TagsMap
}
@input
structure GetTriggerRequest {
/// The name of the trigger to retrieve.
@required
Name: NameString
}
@output
structure GetTriggerResponse {
/// The requested trigger definition.
Trigger: Trigger
}
@input
structure GetTriggersRequest {
/// A continuation token, if this is a continuation call.
NextToken: GenericString
/// The name of the job to retrieve triggers for. The trigger that can start this job is
/// returned, and if there is no such trigger, all triggers are returned.
DependentJobName: NameString
/// The maximum size of the response.
MaxResults: PageSize
}
@output
structure GetTriggersResponse {
/// A list of triggers for the specified job.
Triggers: TriggerList
/// A continuation token, if not all the requested triggers
/// have yet been returned.
NextToken: GenericString
}
@input
structure GetUnfilteredPartitionMetadataRequest {
/// The catalog ID where the partition resides.
@required
CatalogId: CatalogIdString
/// (Required) Specifies the name of a database that contains the partition.
@required
DatabaseName: NameString
/// (Required) Specifies the name of a table that contains the partition.
@required
TableName: NameString
/// (Required) A list of partition key values.
@required
PartitionValues: ValueStringList
/// A structure containing Lake Formation audit context information.
AuditContext: AuditContext
/// (Required) A list of supported permission types.
@required
SupportedPermissionTypes: PermissionTypeList
}
@output
structure GetUnfilteredPartitionMetadataResponse {
/// A Partition object containing the partition metadata.
Partition: Partition
/// A list of column names that the user has been granted access to.
AuthorizedColumns: NameStringList
/// A Boolean value that indicates whether the partition location is registered
/// with Lake Formation.
IsRegisteredWithLakeFormation: Boolean = false
}
@input
structure GetUnfilteredPartitionsMetadataRequest {
/// The ID of the Data Catalog where the partitions in question reside. If none is provided,
/// the AWS account ID is used by default.
@required
CatalogId: CatalogIdString
/// The name of the catalog database where the partitions reside.
@required
DatabaseName: NameString
/// The name of the table that contains the partition.
@required
TableName: NameString
/// An expression that filters the partitions to be returned.
/// The expression uses SQL syntax similar to the SQL WHERE
filter clause. The
/// SQL statement parser JSQLParser parses the expression.
///
/// Operators: The following are the operators that you can use in the
/// Expression
API call:
///
/// - =
/// -
///
Checks whether the values of the two operands are equal; if yes, then the condition becomes
/// true.
/// Example: Assume 'variable a' holds 10 and 'variable b' holds 20.
/// (a = b) is not true.
///
/// - < >
/// -
///
Checks whether the values of two operands are equal; if the values are not equal,
/// then the condition becomes true.
/// Example: (a < > b) is true.
///
/// - >
/// -
///
Checks whether the value of the left operand is greater than the value of the right
/// operand; if yes, then the condition becomes true.
/// Example: (a > b) is not true.
///
/// - <
/// -
///
Checks whether the value of the left operand is less than the value of the right
/// operand; if yes, then the condition becomes true.
/// Example: (a < b) is true.
///
/// - >=
/// -
///
Checks whether the value of the left operand is greater than or equal to the value
/// of the right operand; if yes, then the condition becomes true.
/// Example: (a >= b) is not true.
///
/// - <=
/// -
///
Checks whether the value of the left operand is less than or equal to the value of
/// the right operand; if yes, then the condition becomes true.
/// Example: (a <= b) is true.
///
/// - AND, OR, IN, BETWEEN, LIKE, NOT, IS NULL
/// -
///
Logical operators.
///
///
///
/// Supported Partition Key Types: The following are the supported
/// partition keys.
///
/// -
///
/// string
///
///
/// -
///
/// date
///
///
/// -
///
/// timestamp
///
///
/// -
///
/// int
///
///
/// -
///
/// bigint
///
///
/// -
///
/// long
///
///
/// -
///
/// tinyint
///
///
/// -
///
/// smallint
///
///
/// -
///
/// decimal
///
///
///
/// If an type is encountered that is not valid, an exception is thrown.
Expression: PredicateString
/// A structure containing Lake Formation audit context information.
AuditContext: AuditContext
/// A list of supported permission types.
@required
SupportedPermissionTypes: PermissionTypeList
/// A continuation token, if this is not the first call to retrieve
/// these partitions.
NextToken: Token
/// The segment of the table's partitions to scan in this request.
Segment: Segment
/// The maximum number of partitions to return in a single response.
MaxResults: PageSize
}
@output
structure GetUnfilteredPartitionsMetadataResponse {
/// A list of requested partitions.
UnfilteredPartitions: UnfilteredPartitionList
/// A continuation token, if the returned list of partitions does not include the last
/// one.
NextToken: Token
}
@input
structure GetUnfilteredTableMetadataRequest {
/// The catalog ID where the table resides.
@required
CatalogId: CatalogIdString
/// (Required) Specifies the name of a database that contains the table.
@required
DatabaseName: NameString
/// (Required) Specifies the name of a table for which you are requesting metadata.
@required
Name: NameString
/// A structure containing Lake Formation audit context information.
AuditContext: AuditContext
/// (Required) A list of supported permission types.
@required
SupportedPermissionTypes: PermissionTypeList
}
@output
structure GetUnfilteredTableMetadataResponse {
/// A Table object containing the table metadata.
Table: Table
/// A list of column names that the user has been granted access to.
AuthorizedColumns: NameStringList
/// A Boolean value that indicates whether the partition location is registered
/// with Lake Formation.
IsRegisteredWithLakeFormation: Boolean = false
/// A list of column row filters.
CellFilters: ColumnRowFilterList
}
@input
structure GetUserDefinedFunctionRequest {
/// The ID of the Data Catalog where the function to be retrieved is located. If none is
/// provided, the Amazon Web Services account ID is used by default.
CatalogId: CatalogIdString
/// The name of the catalog database where the function is located.
@required
DatabaseName: NameString
/// The name of the function.
@required
FunctionName: NameString
}
@output
structure GetUserDefinedFunctionResponse {
/// The requested function definition.
UserDefinedFunction: UserDefinedFunction
}
@input
structure GetUserDefinedFunctionsRequest {
/// The ID of the Data Catalog where the functions to be retrieved are located. If none is
/// provided, the Amazon Web Services account ID is used by default.
CatalogId: CatalogIdString
/// The name of the catalog database where the functions are located. If none is provided, functions from all the
/// databases across the catalog will be returned.
DatabaseName: NameString
/// An optional function-name pattern string that filters the function
/// definitions returned.
@required
Pattern: NameString
/// A continuation token, if this is a continuation call.
NextToken: Token
/// The maximum number of functions to return in one response.
MaxResults: CatalogGetterPageSize
}
@output
structure GetUserDefinedFunctionsResponse {
/// A list of requested function definitions.
UserDefinedFunctions: UserDefinedFunctionList
/// A continuation token, if the list of functions returned does
/// not include the last requested function.
NextToken: Token
}
@input
structure GetWorkflowRequest {
/// The name of the workflow to retrieve.
@required
Name: NameString
/// Specifies whether to include a graph when returning the workflow resource metadata.
IncludeGraph: NullableBoolean
}
@output
structure GetWorkflowResponse {
/// The resource metadata for the workflow.
Workflow: Workflow
}
@input
structure GetWorkflowRunPropertiesRequest {
/// Name of the workflow which was run.
@required
Name: NameString
/// The ID of the workflow run whose run properties should be returned.
@required
RunId: IdString
}
@output
structure GetWorkflowRunPropertiesResponse {
/// The workflow run properties which were set during the specified run.
RunProperties: WorkflowRunProperties
}
@input
structure GetWorkflowRunRequest {
/// Name of the workflow being run.
@required
Name: NameString
/// The ID of the workflow run.
@required
RunId: IdString
/// Specifies whether to include the workflow graph in response or not.
IncludeGraph: NullableBoolean
}
@output
structure GetWorkflowRunResponse {
/// The requested workflow run metadata.
Run: WorkflowRun
}
@input
structure GetWorkflowRunsRequest {
/// Name of the workflow whose metadata of runs should be returned.
@required
Name: NameString
/// Specifies whether to include the workflow graph in response or not.
IncludeGraph: NullableBoolean
/// The maximum size of the response.
NextToken: GenericString
/// The maximum number of workflow runs to be included in the response.
MaxResults: PageSize
}
@output
structure GetWorkflowRunsResponse {
/// A list of workflow run metadata objects.
Runs: WorkflowRuns
/// A continuation token, if not all requested workflow runs have been returned.
NextToken: GenericString
}
/// An encryption operation failed.
@error("client")
structure GlueEncryptionException {
/// The message describing the problem.
Message: MessageString
}
/// A structure for returning a resource policy.
structure GluePolicy {
/// Contains the requested policy document, in JSON format.
PolicyInJson: PolicyJsonString
/// Contains the hash value associated with this policy.
PolicyHash: HashString
/// The date and time at which the policy was created.
CreateTime: Timestamp
/// The date and time at which the policy was last updated.
UpdateTime: Timestamp
}
/// Specifies a user-defined schema when a schema cannot be determined by Glue.
structure GlueSchema {
/// Specifies the column definitions that make up a Glue schema.
Columns: GlueStudioSchemaColumnList
}
/// Specifies a single column in a Glue schema definition.
structure GlueStudioSchemaColumn {
/// The name of the column in the Glue Studio schema.
@required
Name: GlueStudioColumnNameString
/// The hive type for this column in the Glue Studio schema.
Type: ColumnTypeString
}
/// The database and table in the Glue Data Catalog that is used for input or output data.
structure GlueTable {
/// A database name in the Glue Data Catalog.
@required
DatabaseName: NameString
/// A table name in the Glue Data Catalog.
@required
TableName: NameString
/// A unique identifier for the Glue Data Catalog.
CatalogId: NameString
/// The name of the connection to the Glue Data Catalog.
ConnectionName: NameString
/// Additional options for the table. Currently there are two keys supported:
///
/// -
///
/// pushDownPredicate
: to filter on partitions without having to list and read all the files in your dataset.
///
/// -
///
/// catalogPartitionPredicate
: to use server-side partition pruning using partition indexes in the Glue Data Catalog.
///
///
AdditionalOptions: GlueTableAdditionalOptions
}
/// Specifies the data store in the governed Glue Data Catalog.
structure GovernedCatalogSource {
/// The name of the data store.
@required
Name: NodeName
/// The database to read from.
@required
Database: EnclosedInStringProperty
/// The database table to read from.
@required
Table: EnclosedInStringProperty
/// Partitions satisfying this predicate are deleted. Files within the retention period in these partitions are not deleted. Set to ""
– empty by default.
PartitionPredicate: EnclosedInStringProperty
/// Specifies additional connection options.
AdditionalOptions: S3SourceAdditionalOptions
}
/// Specifies a data target that writes to Amazon S3 using the Glue Data Catalog.
structure GovernedCatalogTarget {
/// The name of the data target.
@required
Name: NodeName
/// The nodes that are inputs to the data target.
@required
Inputs: OneInput
/// Specifies native partitioning using a sequence of keys.
PartitionKeys: GlueStudioPathList
/// The name of the table in the database to write to.
@required
Table: EnclosedInStringProperty
/// The name of the database to write to.
@required
Database: EnclosedInStringProperty
/// A policy that specifies update behavior for the governed catalog.
SchemaChangePolicy: CatalogSchemaChangePolicy
}
/// A classifier that uses grok
patterns.
structure GrokClassifier {
/// The name of the classifier.
@required
Name: NameString
/// An identifier of the data format that the classifier matches, such as Twitter, JSON, Omniture logs, and
/// so on.
@required
Classification: Classification
/// The time that this classifier was registered.
CreationTime: Timestamp
/// The time that this classifier was last updated.
LastUpdated: Timestamp
/// The version of this classifier.
Version: VersionId = 0
/// The grok pattern applied to a data store by this classifier.
/// For more information, see built-in patterns in Writing Custom Classifiers.
@required
GrokPattern: GrokPattern
/// Optional custom grok patterns defined by this classifier.
/// For more information, see custom patterns in Writing Custom Classifiers.
CustomPatterns: CustomPatterns
}
/// Specifies an Apache Hudi data source.
structure HudiTarget {
/// An array of Amazon S3 location strings for Hudi, each indicating the root folder with which the metadata files for a Hudi table resides. The Hudi folder may be located in a child folder of the root folder.
/// The crawler will scan all folders underneath a path for a Hudi folder.
Paths: PathList
/// The name of the connection to use to connect to the Hudi target. If your Hudi files are stored in buckets that require VPC authorization, you can set their connection properties here.
ConnectionName: ConnectionName
/// A list of glob patterns used to exclude from the crawl.
/// For more information, see Catalog Tables with a Crawler.
Exclusions: PathList
/// The maximum depth of Amazon S3 paths that the crawler can traverse to discover the Hudi metadata folder in your Amazon S3 path. Used to limit the crawler run time.
MaximumTraversalDepth: NullableInteger
}
/// A structure that defines an Apache Iceberg metadata table to create in the catalog.
structure IcebergInput {
/// A required metadata operation. Can only be set to CREATE
.
@required
MetadataOperation: MetadataOperation
/// The table version for the Iceberg table. Defaults to 2.
Version: VersionString
}
/// Specifies an Apache Iceberg data source where Iceberg tables are stored in Amazon S3.
structure IcebergTarget {
/// One or more Amazon S3 paths that contains Iceberg metadata folders as s3://bucket/prefix
.
Paths: PathList
/// The name of the connection to use to connect to the Iceberg target.
ConnectionName: ConnectionName
/// A list of glob patterns used to exclude from the crawl.
/// For more information, see Catalog Tables with a Crawler.
Exclusions: PathList
/// The maximum depth of Amazon S3 paths that the crawler can traverse to discover the Iceberg metadata folder in your Amazon S3 path. Used to limit the crawler run time.
MaximumTraversalDepth: NullableInteger
}
/// The same unique identifier was associated with two different records.
@error("client")
structure IdempotentParameterMismatchException {
/// A message describing the problem.
Message: MessageString
}
/// The blueprint is in an invalid state to perform a requested operation.
@error("client")
structure IllegalBlueprintStateException {
/// A message describing the problem.
Message: MessageString
}
/// The session is in an invalid state to perform a requested operation.
@error("client")
structure IllegalSessionStateException {
/// A message describing the problem.
Message: MessageString
}
/// The workflow is in an invalid state to perform a requested operation.
@error("client")
structure IllegalWorkflowStateException {
/// A message describing the problem.
Message: MessageString
}
@input
structure ImportCatalogToGlueRequest {
/// The ID of the catalog to import. Currently, this should be the Amazon Web Services account ID.
CatalogId: CatalogIdString
}
@output
structure ImportCatalogToGlueResponse {}
/// Specifies configuration properties for an importing labels task run.
structure ImportLabelsTaskRunProperties {
/// The Amazon Simple Storage Service (Amazon S3) path from where you will import the
/// labels.
InputS3Path: UriString
/// Indicates whether to overwrite your existing labels.
Replace: ReplaceBoolean = false
}
/// An internal service error occurred.
@error("server")
structure InternalServiceException {
/// A message describing the problem.
Message: MessageString
}
/// The input provided was not valid.
@error("client")
structure InvalidInputException {
/// A message describing the problem.
Message: MessageString
/// Indicates whether or not the exception relates to a federated source.
FromFederationSource: NullableBoolean
}
/// An error that indicates your data is in an invalid state.
@error("client")
structure InvalidStateException {
/// A message describing the problem.
Message: MessageString
}
/// Additional connection options for the connector.
structure JDBCConnectorOptions {
/// Extra condition clause to filter data from source. For example:
///
/// BillingCity='Mountain View'
///
/// When using a query instead of a table name, you should validate that the query works with the specified filterPredicate
.
FilterPredicate: EnclosedInStringProperty
/// The name of an integer column that is used for partitioning. This option works only when it's included with lowerBound
, upperBound
, and numPartitions
. This option works the same way as in the Spark SQL JDBC reader.
PartitionColumn: EnclosedInStringProperty
/// The minimum value of partitionColumn
that is used to decide partition stride.
LowerBound: BoxedNonNegativeLong
/// The maximum value of partitionColumn
that is used to decide partition stride.
UpperBound: BoxedNonNegativeLong
/// The number of partitions. This value, along with lowerBound
(inclusive) and upperBound
(exclusive), form partition strides for generated WHERE
clause expressions that are used to split the partitionColumn
.
NumPartitions: BoxedNonNegativeLong
/// The name of the job bookmark keys on which to sort.
JobBookmarkKeys: EnclosedInStringProperties
/// Specifies an ascending or descending sort order.
JobBookmarkKeysSortOrder: EnclosedInStringProperty
/// Custom data type mapping that builds a mapping from a JDBC data type to an Glue data type. For example, the option "dataTypeMapping":{"FLOAT":"STRING"}
maps data fields of JDBC type FLOAT
into the Java String
type by calling the ResultSet.getString()
method of the driver, and uses it to build the Glue record. The ResultSet
object is implemented by each driver, so the behavior is specific to the driver you use. Refer to the documentation for your JDBC driver to understand how the driver performs the conversions.
DataTypeMapping: JDBCDataTypeMapping
}
/// Specifies a connector to a JDBC data source.
structure JDBCConnectorSource {
/// The name of the data source.
@required
Name: NodeName
/// The name of the connection that is associated with the connector.
@required
ConnectionName: EnclosedInStringProperty
/// The name of a connector that assists with accessing the data store in Glue Studio.
@required
ConnectorName: EnclosedInStringProperty
/// The type of connection, such as marketplace.jdbc or custom.jdbc, designating a connection to a JDBC data store.
@required
ConnectionType: EnclosedInStringProperty
/// Additional connection options for the connector.
AdditionalOptions: JDBCConnectorOptions
/// The name of the table in the data source.
ConnectionTable: EnclosedInStringPropertyWithQuote
/// The table or SQL query to get the data from. You can specify either ConnectionTable
or query
, but not both.
Query: SqlQuery
/// Specifies the data schema for the custom JDBC source.
OutputSchemas: GlueSchemas
}
/// Specifies a data target that writes to Amazon S3 in Apache Parquet columnar storage.
structure JDBCConnectorTarget {
/// The name of the data target.
@required
Name: NodeName
/// The nodes that are inputs to the data target.
@required
Inputs: OneInput
/// The name of the connection that is associated with the connector.
@required
ConnectionName: EnclosedInStringProperty
/// The name of the table in the data target.
@required
ConnectionTable: EnclosedInStringPropertyWithQuote
/// The name of a connector that will be used.
@required
ConnectorName: EnclosedInStringProperty
/// The type of connection, such as marketplace.jdbc or custom.jdbc, designating a connection to a JDBC data target.
@required
ConnectionType: EnclosedInStringProperty
/// Additional connection options for the connector.
AdditionalOptions: AdditionalOptions
/// Specifies the data schema for the JDBC target.
OutputSchemas: GlueSchemas
}
/// Specifies a JDBC data store to crawl.
structure JdbcTarget {
/// The name of the connection to use to connect to the JDBC target.
ConnectionName: ConnectionName
/// The path of the JDBC target.
Path: Path
/// A list of glob patterns used to exclude from the crawl.
/// For more information, see Catalog Tables with a Crawler.
Exclusions: PathList
/// Specify a value of RAWTYPES
or COMMENTS
to enable additional metadata in table responses. RAWTYPES
provides the native-level datatype. COMMENTS
provides comments associated with a column or table in the database.
/// If you do not need additional metadata, keep the field empty.
EnableAdditionalMetadata: EnableAdditionalMetadata
}
/// Specifies a job definition.
structure Job {
/// The name you assign to this job definition.
Name: NameString
/// A description of the job.
Description: DescriptionString
/// This field is reserved for future use.
LogUri: UriString
/// The name or Amazon Resource Name (ARN) of the IAM role associated with this job.
Role: RoleString
/// The time and date that this job definition was created.
CreatedOn: TimestampValue
/// The last point in time when this job definition was modified.
LastModifiedOn: TimestampValue
/// An ExecutionProperty
specifying the maximum number of concurrent runs allowed
/// for this job.
ExecutionProperty: ExecutionProperty
/// The JobCommand
that runs this job.
Command: JobCommand
/// The default arguments for every run of this job, specified as name-value pairs.
/// You can specify arguments here that your own job-execution script
/// consumes, as well as arguments that Glue itself consumes.
/// Job arguments may be logged. Do not pass plaintext secrets as arguments. Retrieve secrets
/// from a Glue Connection, Secrets Manager or other secret management
/// mechanism if you intend to keep them within the Job.
/// For information about how to specify and consume your own Job arguments, see the Calling Glue APIs in Python topic in the developer guide.
/// For information about the arguments you can provide to this field when configuring Spark jobs,
/// see the Special Parameters Used by Glue topic in the developer guide.
/// For information about the arguments you can provide to this field when configuring Ray
/// jobs, see Using
/// job parameters in Ray jobs in the developer guide.
DefaultArguments: GenericMap
/// Arguments for this job that are not overridden when providing job arguments
/// in a job run, specified as name-value pairs.
NonOverridableArguments: GenericMap
/// The connections used for this job.
Connections: ConnectionsList
/// The maximum number of times to retry this job after a JobRun fails.
MaxRetries: MaxRetries = 0
/// This field is deprecated. Use MaxCapacity
instead.
/// The number of Glue data processing units (DPUs) allocated to runs of this job. You can
/// allocate a minimum of 2 DPUs; the default is 10. A DPU is a relative measure of processing
/// power that consists of 4 vCPUs of compute capacity and 16 GB of memory. For more information,
/// see the Glue pricing
/// page.
///
@deprecated(
message: "This property is deprecated, use MaxCapacity instead."
)
AllocatedCapacity: IntegerValue = 0
/// The job timeout in minutes. This is the maximum time that a job run
/// can consume resources before it is terminated and enters TIMEOUT
/// status. The default is 2,880 minutes (48 hours).
Timeout: Timeout
/// For Glue version 1.0 or earlier jobs, using the standard worker type, the number of
/// Glue data processing units (DPUs) that can be allocated when this job runs. A DPU is
/// a relative measure of processing power that consists of 4 vCPUs of compute capacity and 16 GB
/// of memory. For more information, see the
/// Glue pricing page.
/// For Glue version 2.0 or later jobs, you cannot specify a Maximum capacity
.
/// Instead, you should specify a Worker type
and the Number of workers
.
/// Do not set MaxCapacity
if using WorkerType
and NumberOfWorkers
.
/// The value that can be allocated for MaxCapacity
depends on whether you are
/// running a Python shell job, an Apache Spark ETL job, or an Apache Spark streaming ETL
/// job:
///
/// -
///
When you specify a Python shell job (JobCommand.Name
="pythonshell"), you can
/// allocate either 0.0625 or 1 DPU. The default is 0.0625 DPU.
///
/// -
///
When you specify an Apache Spark ETL job (JobCommand.Name
="glueetl") or Apache
/// Spark streaming ETL job (JobCommand.Name
="gluestreaming"), you can allocate from 2 to 100 DPUs.
/// The default is 10 DPUs. This job type cannot have a fractional DPU allocation.
///
///
MaxCapacity: NullableDouble
/// The type of predefined worker that is allocated when a job runs. Accepts a value of
/// G.1X, G.2X, G.4X, G.8X or G.025X for Spark jobs. Accepts the value Z.2X for Ray jobs.
///
/// -
///
For the G.1X
worker type, each worker maps to 1 DPU (4 vCPUs, 16 GB of memory) with 84GB disk (approximately 34GB free), and provides 1 executor per worker. We recommend this worker type for workloads such as data transforms, joins, and queries, to offers a scalable and cost effective way to run most jobs.
///
/// -
///
For the G.2X
worker type, each worker maps to 2 DPU (8 vCPUs, 32 GB of memory) with 128GB disk (approximately 77GB free), and provides 1 executor per worker. We recommend this worker type for workloads such as data transforms, joins, and queries, to offers a scalable and cost effective way to run most jobs.
///
/// -
///
For the G.4X
worker type, each worker maps to 4 DPU (16 vCPUs, 64 GB of memory) with 256GB disk (approximately 235GB free), and provides 1 executor per worker. We recommend this worker type for jobs whose workloads contain your most demanding transforms, aggregations, joins, and queries. This worker type is available only for Glue version 3.0 or later Spark ETL jobs in the following Amazon Web Services Regions: US East (Ohio), US East (N. Virginia), US West (Oregon), Asia Pacific (Singapore), Asia Pacific (Sydney), Asia Pacific (Tokyo), Canada (Central), Europe (Frankfurt), Europe (Ireland), and Europe (Stockholm).
///
/// -
///
For the G.8X
worker type, each worker maps to 8 DPU (32 vCPUs, 128 GB of memory) with 512GB disk (approximately 487GB free), and provides 1 executor per worker. We recommend this worker type for jobs whose workloads contain your most demanding transforms, aggregations, joins, and queries. This worker type is available only for Glue version 3.0 or later Spark ETL jobs, in the same Amazon Web Services Regions as supported for the G.4X
worker type.
///
/// -
///
For the G.025X
worker type, each worker maps to 0.25 DPU (2 vCPUs, 4 GB of memory) with 84GB disk (approximately 34GB free), and provides 1 executor per worker. We recommend this worker type for low volume streaming jobs. This worker type is only available for Glue version 3.0 streaming jobs.
///
/// -
///
For the Z.2X
worker type, each worker maps to 2 M-DPU (8vCPUs, 64 GB of memory) with 128 GB disk (approximately 120GB free), and provides up to 8 Ray workers based on the autoscaler.
///
///
WorkerType: WorkerType
/// The number of workers of a defined workerType
that are allocated when a job runs.
NumberOfWorkers: NullableInteger
/// The name of the SecurityConfiguration
structure to be used with this
/// job.
SecurityConfiguration: NameString
/// Specifies configuration properties of a job notification.
NotificationProperty: NotificationProperty
/// In Spark jobs, GlueVersion
determines the versions of Apache Spark and Python
/// that Glue available in a job. The Python version indicates the version
/// supported for jobs of type Spark.
/// Ray jobs should set GlueVersion
to 4.0
or greater. However,
/// the versions of Ray, Python and additional libraries available in your Ray job are determined
/// by the Runtime
parameter of the Job command.
/// For more information about the available Glue versions and corresponding
/// Spark and Python versions, see Glue version in the developer
/// guide.
/// Jobs that are created without specifying a Glue version default to Glue 0.9.
GlueVersion: GlueVersionString
/// The representation of a directed acyclic graph on which both the Glue Studio visual component and Glue Studio code generation is based.
CodeGenConfigurationNodes: CodeGenConfigurationNodes
/// Indicates whether the job is run with a standard or flexible execution class. The standard execution class is ideal for time-sensitive workloads that require fast job startup and dedicated resources.
/// The flexible execution class is appropriate for time-insensitive jobs whose start and completion times may vary.
/// Only jobs with Glue version 3.0 and above and command type glueetl
will be allowed to set ExecutionClass
to FLEX
. The flexible execution class is available for Spark jobs.
ExecutionClass: ExecutionClass
/// The details for a source control configuration for a job, allowing synchronization of job artifacts to or from a remote repository.
SourceControlDetails: SourceControlDetails
}
/// Defines a point that a job can resume processing.
structure JobBookmarkEntry {
/// The name of the job in question.
JobName: JobName
/// The version of the job.
Version: IntegerValue = 0
/// The run ID number.
Run: IntegerValue = 0
/// The attempt ID number.
Attempt: IntegerValue = 0
/// The unique run identifier associated with the previous job run.
PreviousRunId: RunId
/// The run ID number.
RunId: RunId
/// The bookmark itself.
JobBookmark: JsonValue
}
/// Specifies how job bookmark data should be encrypted.
structure JobBookmarksEncryption {
/// The encryption mode to use for job bookmarks data.
JobBookmarksEncryptionMode: JobBookmarksEncryptionMode
/// The Amazon Resource Name (ARN) of the KMS key to be used to encrypt the data.
KmsKeyArn: KmsKeyArn
}
/// Specifies code that runs when a job is run.
structure JobCommand {
/// The name of the job command. For an Apache Spark ETL job, this must be
/// glueetl
. For a Python shell job, it must be pythonshell
.
/// For an Apache Spark streaming ETL job, this must be gluestreaming
. For a Ray job,
/// this must be glueray
.
Name: GenericString
/// Specifies the Amazon Simple Storage Service (Amazon S3) path to a script that runs a
/// job.
ScriptLocation: ScriptLocationString
/// The Python version being used to run a Python shell job. Allowed values are 2 or 3.
PythonVersion: PythonVersionString
/// In Ray jobs, Runtime is used to specify the versions of Ray, Python and additional
/// libraries available in your environment. This field is not used in other job types. For
/// supported runtime environment values, see Working with Ray jobs
/// in the Glue Developer Guide.
Runtime: RuntimeNameString
}
/// The details of a Job node present in the workflow.
structure JobNodeDetails {
/// The information for the job runs represented by the job node.
JobRuns: JobRunList
}
/// Contains information about a job run.
structure JobRun {
/// The ID of this job run.
Id: IdString
/// The number of the attempt to run this job.
Attempt: AttemptCount = 0
/// The ID of the previous run of this job. For example, the JobRunId
specified
/// in the StartJobRun
action.
PreviousRunId: IdString
/// The name of the trigger that started this job run.
TriggerName: NameString
/// The name of the job definition being used in this run.
JobName: NameString
/// The date and time at which this job run was started.
StartedOn: TimestampValue
/// The last time that this job run was modified.
LastModifiedOn: TimestampValue
/// The date and time that this job run completed.
CompletedOn: TimestampValue
/// The current state of the job run. For more information about the statuses of jobs that have terminated abnormally, see Glue Job Run Statuses.
JobRunState: JobRunState
/// The job arguments associated with this run. For this job run, they replace the default
/// arguments set in the job definition itself.
/// You can specify arguments here that your own job-execution script
/// consumes, as well as arguments that Glue itself consumes.
/// Job arguments may be logged. Do not pass plaintext secrets as arguments. Retrieve secrets
/// from a Glue Connection, Secrets Manager or other secret management
/// mechanism if you intend to keep them within the Job.
/// For information about how to specify and consume your own Job arguments, see the Calling Glue APIs in Python topic in the developer guide.
/// For information about the arguments you can provide to this field when configuring Spark jobs,
/// see the Special Parameters Used by Glue topic in the developer guide.
/// For information about the arguments you can provide to this field when configuring Ray
/// jobs, see Using
/// job parameters in Ray jobs in the developer guide.
Arguments: GenericMap
/// An error message associated with this job run.
ErrorMessage: ErrorString
/// A list of predecessors to this job run.
PredecessorRuns: PredecessorList
/// This field is deprecated. Use MaxCapacity
instead.
/// The number of Glue data processing units (DPUs) allocated to this JobRun.
/// From 2 to 100 DPUs can be allocated; the default is 10. A DPU is a relative measure
/// of processing power that consists of 4 vCPUs of compute capacity and 16 GB of memory.
/// For more information, see the Glue
/// pricing page.
@deprecated(
message: "This property is deprecated, use MaxCapacity instead."
)
AllocatedCapacity: IntegerValue = 0
/// The amount of time (in seconds) that the job run consumed resources.
ExecutionTime: ExecutionTime = 0
/// The JobRun
timeout in minutes. This is the maximum time that a job run can
/// consume resources before it is terminated and enters TIMEOUT
status. This value overrides the timeout value set in the parent job.
/// Streaming jobs do not have a timeout. The default for non-streaming jobs is 2,880 minutes (48 hours).
Timeout: Timeout
/// For Glue version 1.0 or earlier jobs, using the standard worker type, the number of
/// Glue data processing units (DPUs) that can be allocated when this job runs. A DPU is
/// a relative measure of processing power that consists of 4 vCPUs of compute capacity and 16 GB
/// of memory. For more information, see the
/// Glue pricing page.
/// For Glue version 2.0+ jobs, you cannot specify a Maximum capacity
.
/// Instead, you should specify a Worker type
and the Number of workers
.
/// Do not set MaxCapacity
if using WorkerType
and NumberOfWorkers
.
/// The value that can be allocated for MaxCapacity
depends on whether you are
/// running a Python shell job, an Apache Spark ETL job, or an Apache Spark streaming ETL
/// job:
///
/// -
///
When you specify a Python shell job (JobCommand.Name
="pythonshell"), you can
/// allocate either 0.0625 or 1 DPU. The default is 0.0625 DPU.
///
/// -
///
When you specify an Apache Spark ETL job (JobCommand.Name
="glueetl") or Apache
/// Spark streaming ETL job (JobCommand.Name
="gluestreaming"), you can allocate from 2 to 100 DPUs.
/// The default is 10 DPUs. This job type cannot have a fractional DPU allocation.
///
///
MaxCapacity: NullableDouble
/// The type of predefined worker that is allocated when a job runs. Accepts a value of
/// G.1X, G.2X, G.4X, G.8X or G.025X for Spark jobs. Accepts the value Z.2X for Ray jobs.
///
/// -
///
For the G.1X
worker type, each worker maps to 1 DPU (4 vCPUs, 16 GB of memory) with 84GB disk (approximately 34GB free), and provides 1 executor per worker. We recommend this worker type for workloads such as data transforms, joins, and queries, to offers a scalable and cost effective way to run most jobs.
///
/// -
///
For the G.2X
worker type, each worker maps to 2 DPU (8 vCPUs, 32 GB of memory) with 128GB disk (approximately 77GB free), and provides 1 executor per worker. We recommend this worker type for workloads such as data transforms, joins, and queries, to offers a scalable and cost effective way to run most jobs.
///
/// -
///
For the G.4X
worker type, each worker maps to 4 DPU (16 vCPUs, 64 GB of memory) with 256GB disk (approximately 235GB free), and provides 1 executor per worker. We recommend this worker type for jobs whose workloads contain your most demanding transforms, aggregations, joins, and queries. This worker type is available only for Glue version 3.0 or later Spark ETL jobs in the following Amazon Web Services Regions: US East (Ohio), US East (N. Virginia), US West (Oregon), Asia Pacific (Singapore), Asia Pacific (Sydney), Asia Pacific (Tokyo), Canada (Central), Europe (Frankfurt), Europe (Ireland), and Europe (Stockholm).
///
/// -
///
For the G.8X
worker type, each worker maps to 8 DPU (32 vCPUs, 128 GB of memory) with 512GB disk (approximately 487GB free), and provides 1 executor per worker. We recommend this worker type for jobs whose workloads contain your most demanding transforms, aggregations, joins, and queries. This worker type is available only for Glue version 3.0 or later Spark ETL jobs, in the same Amazon Web Services Regions as supported for the G.4X
worker type.
///
/// -
///
For the G.025X
worker type, each worker maps to 0.25 DPU (2 vCPUs, 4 GB of memory) with 84GB disk (approximately 34GB free), and provides 1 executor per worker. We recommend this worker type for low volume streaming jobs. This worker type is only available for Glue version 3.0 streaming jobs.
///
/// -
///
For the Z.2X
worker type, each worker maps to 2 M-DPU (8vCPUs, 64 GB of memory) with 128 GB disk (approximately 120GB free), and provides up to 8 Ray workers based on the autoscaler.
///
///
WorkerType: WorkerType
/// The number of workers of a defined workerType
that are allocated when a job runs.
NumberOfWorkers: NullableInteger
/// The name of the SecurityConfiguration
structure to be used with this job
/// run.
SecurityConfiguration: NameString
/// The name of the log group for secure logging that can be server-side encrypted in Amazon
/// CloudWatch using KMS. This name can be /aws-glue/jobs/
, in which case the
/// default encryption is NONE
. If you add a role name and
/// SecurityConfiguration
name (in other words,
/// /aws-glue/jobs-yourRoleName-yourSecurityConfigurationName/
), then that security
/// configuration is used to encrypt the log group.
LogGroupName: GenericString
/// Specifies configuration properties of a job run notification.
NotificationProperty: NotificationProperty
/// In Spark jobs, GlueVersion
determines the versions of Apache Spark and Python
/// that Glue available in a job. The Python version indicates the version
/// supported for jobs of type Spark.
/// Ray jobs should set GlueVersion
to 4.0
or greater. However,
/// the versions of Ray, Python and additional libraries available in your Ray job are determined
/// by the Runtime
parameter of the Job command.
/// For more information about the available Glue versions and corresponding
/// Spark and Python versions, see Glue version in the developer
/// guide.
/// Jobs that are created without specifying a Glue version default to Glue 0.9.
GlueVersion: GlueVersionString
/// This field populates only for Auto Scaling job runs, and represents the total time each executor ran during the lifecycle of a job run in seconds, multiplied by a DPU factor (1 for G.1X
, 2 for G.2X
, or 0.25 for G.025X
workers). This value may be different than the executionEngineRuntime
* MaxCapacity
as in the case of Auto Scaling jobs, as the number of executors running at a given time may be less than the MaxCapacity
. Therefore, it is possible that the value of DPUSeconds
is less than executionEngineRuntime
* MaxCapacity
.
DPUSeconds: NullableDouble
/// Indicates whether the job is run with a standard or flexible execution class. The standard execution-class is ideal for time-sensitive workloads that require fast job startup and dedicated resources.
/// The flexible execution class is appropriate for time-insensitive jobs whose start and completion times may vary.
/// Only jobs with Glue version 3.0 and above and command type glueetl
will be allowed to set ExecutionClass
to FLEX
. The flexible execution class is available for Spark jobs.
ExecutionClass: ExecutionClass
}
/// Specifies information used to update an existing job definition. The previous job
/// definition is completely overwritten by this information.
structure JobUpdate {
/// Description of the job being defined.
Description: DescriptionString
/// This field is reserved for future use.
LogUri: UriString
/// The name or Amazon Resource Name (ARN) of the IAM role associated with this job
/// (required).
Role: RoleString
/// An ExecutionProperty
specifying the maximum number of concurrent runs allowed
/// for this job.
ExecutionProperty: ExecutionProperty
/// The JobCommand
that runs this job (required).
Command: JobCommand
/// The default arguments for every run of this job, specified as name-value pairs.
/// You can specify arguments here that your own job-execution script
/// consumes, as well as arguments that Glue itself consumes.
/// Job arguments may be logged. Do not pass plaintext secrets as arguments. Retrieve secrets
/// from a Glue Connection, Secrets Manager or other secret management
/// mechanism if you intend to keep them within the Job.
/// For information about how to specify and consume your own Job arguments, see the Calling Glue APIs in Python topic in the developer guide.
/// For information about the arguments you can provide to this field when configuring Spark jobs,
/// see the Special Parameters Used by Glue topic in the developer guide.
/// For information about the arguments you can provide to this field when configuring Ray
/// jobs, see Using
/// job parameters in Ray jobs in the developer guide.
DefaultArguments: GenericMap
/// Arguments for this job that are not overridden when providing job arguments
/// in a job run, specified as name-value pairs.
NonOverridableArguments: GenericMap
/// The connections used for this job.
Connections: ConnectionsList
/// The maximum number of times to retry this job if it fails.
MaxRetries: MaxRetries = 0
/// This field is deprecated. Use MaxCapacity
instead.
/// The number of Glue data processing units (DPUs) to allocate to this job. You can
/// allocate a minimum of 2 DPUs; the default is 10. A DPU is a relative measure of processing
/// power that consists of 4 vCPUs of compute capacity and 16 GB of memory. For more information,
/// see the Glue pricing
/// page.
@deprecated(
message: "This property is deprecated, use MaxCapacity instead."
)
AllocatedCapacity: IntegerValue = 0
/// The job timeout in minutes. This is the maximum time that a job run
/// can consume resources before it is terminated and enters TIMEOUT
/// status. The default is 2,880 minutes (48 hours).
Timeout: Timeout
/// For Glue version 1.0 or earlier jobs, using the standard worker type, the number of
/// Glue data processing units (DPUs) that can be allocated when this job runs. A DPU is
/// a relative measure of processing power that consists of 4 vCPUs of compute capacity and 16 GB
/// of memory. For more information, see the
/// Glue pricing page.
/// For Glue version 2.0+ jobs, you cannot specify a Maximum capacity
.
/// Instead, you should specify a Worker type
and the Number of workers
.
/// Do not set MaxCapacity
if using WorkerType
and NumberOfWorkers
.
/// The value that can be allocated for MaxCapacity
depends on whether you are
/// running a Python shell job, an Apache Spark ETL job, or an Apache Spark streaming ETL
/// job:
///
/// -
///
When you specify a Python shell job (JobCommand.Name
="pythonshell"), you can
/// allocate either 0.0625 or 1 DPU. The default is 0.0625 DPU.
///
/// -
///
When you specify an Apache Spark ETL job (JobCommand.Name
="glueetl") or Apache
/// Spark streaming ETL job (JobCommand.Name
="gluestreaming"), you can allocate from 2 to 100 DPUs.
/// The default is 10 DPUs. This job type cannot have a fractional DPU allocation.
///
///
MaxCapacity: NullableDouble
/// The type of predefined worker that is allocated when a job runs. Accepts a value of
/// G.1X, G.2X, G.4X, G.8X or G.025X for Spark jobs. Accepts the value Z.2X for Ray jobs.
///
/// -
///
For the G.1X
worker type, each worker maps to 1 DPU (4 vCPUs, 16 GB of memory) with 84GB disk (approximately 34GB free), and provides 1 executor per worker. We recommend this worker type for workloads such as data transforms, joins, and queries, to offers a scalable and cost effective way to run most jobs.
///
/// -
///
For the G.2X
worker type, each worker maps to 2 DPU (8 vCPUs, 32 GB of memory) with 128GB disk (approximately 77GB free), and provides 1 executor per worker. We recommend this worker type for workloads such as data transforms, joins, and queries, to offers a scalable and cost effective way to run most jobs.
///
/// -
///
For the G.4X
worker type, each worker maps to 4 DPU (16 vCPUs, 64 GB of memory) with 256GB disk (approximately 235GB free), and provides 1 executor per worker. We recommend this worker type for jobs whose workloads contain your most demanding transforms, aggregations, joins, and queries. This worker type is available only for Glue version 3.0 or later Spark ETL jobs in the following Amazon Web Services Regions: US East (Ohio), US East (N. Virginia), US West (Oregon), Asia Pacific (Singapore), Asia Pacific (Sydney), Asia Pacific (Tokyo), Canada (Central), Europe (Frankfurt), Europe (Ireland), and Europe (Stockholm).
///
/// -
///
For the G.8X
worker type, each worker maps to 8 DPU (32 vCPUs, 128 GB of memory) with 512GB disk (approximately 487GB free), and provides 1 executor per worker. We recommend this worker type for jobs whose workloads contain your most demanding transforms, aggregations, joins, and queries. This worker type is available only for Glue version 3.0 or later Spark ETL jobs, in the same Amazon Web Services Regions as supported for the G.4X
worker type.
///
/// -
///
For the G.025X
worker type, each worker maps to 0.25 DPU (2 vCPUs, 4 GB of memory) with 84GB disk (approximately 34GB free), and provides 1 executor per worker. We recommend this worker type for low volume streaming jobs. This worker type is only available for Glue version 3.0 streaming jobs.
///
/// -
///
For the Z.2X
worker type, each worker maps to 2 M-DPU (8vCPUs, 64 GB of memory) with 128 GB disk (approximately 120GB free), and provides up to 8 Ray workers based on the autoscaler.
///
///
WorkerType: WorkerType
/// The number of workers of a defined workerType
that are allocated when a job runs.
NumberOfWorkers: NullableInteger
/// The name of the SecurityConfiguration
structure to be used with this
/// job.
SecurityConfiguration: NameString
/// Specifies the configuration properties of a job notification.
NotificationProperty: NotificationProperty
/// In Spark jobs, GlueVersion
determines the versions of Apache Spark and Python
/// that Glue available in a job. The Python version indicates the version
/// supported for jobs of type Spark.
/// Ray jobs should set GlueVersion
to 4.0
or greater. However,
/// the versions of Ray, Python and additional libraries available in your Ray job are determined
/// by the Runtime
parameter of the Job command.
/// For more information about the available Glue versions and corresponding
/// Spark and Python versions, see Glue version in the developer
/// guide.
/// Jobs that are created without specifying a Glue version default to Glue 0.9.
GlueVersion: GlueVersionString
/// The representation of a directed acyclic graph on which both the Glue Studio visual component and Glue Studio code generation is based.
CodeGenConfigurationNodes: CodeGenConfigurationNodes
/// Indicates whether the job is run with a standard or flexible execution class. The standard execution-class is ideal for time-sensitive workloads that require fast job startup and dedicated resources.
/// The flexible execution class is appropriate for time-insensitive jobs whose start and completion times may vary.
/// Only jobs with Glue version 3.0 and above and command type glueetl
will be allowed to set ExecutionClass
to FLEX
. The flexible execution class is available for Spark jobs.
ExecutionClass: ExecutionClass
/// The details for a source control configuration for a job, allowing synchronization of job artifacts to or from a remote repository.
SourceControlDetails: SourceControlDetails
}
/// Specifies a transform that joins two datasets into one dataset using a comparison phrase on the specified data property keys. You can use inner, outer, left, right, left semi, and left anti joins.
structure Join {
/// The name of the transform node.
@required
Name: NodeName
/// The data inputs identified by their node names.
@required
Inputs: TwoInputs
/// Specifies the type of join to be performed on the datasets.
@required
JoinType: JoinType
/// A list of the two columns to be joined.
@required
Columns: JoinColumns
}
/// Specifies a column to be joined.
structure JoinColumn {
/// The column to be joined.
@required
From: EnclosedInStringProperty
/// The key of the column to be joined.
@required
Keys: GlueStudioPathList
}
/// A classifier for JSON
content.
structure JsonClassifier {
/// The name of the classifier.
@required
Name: NameString
/// The time that this classifier was registered.
CreationTime: Timestamp
/// The time that this classifier was last updated.
LastUpdated: Timestamp
/// The version of this classifier.
Version: VersionId = 0
/// A JsonPath
string defining the JSON data for the classifier to classify.
/// Glue supports a subset of JsonPath, as described in Writing JsonPath Custom Classifiers.
@required
JsonPath: JsonPath
}
/// Additional options for streaming.
structure KafkaStreamingSourceOptions {
/// A list of bootstrap server URLs, for example, as b-1.vpc-test-2.o4q88o.c6.kafka.us-east-1.amazonaws.com:9094
. This option must be specified in the API call or defined in the table metadata in the Data Catalog.
BootstrapServers: EnclosedInStringProperty
/// The protocol used to communicate with brokers. The possible values are "SSL"
or "PLAINTEXT"
.
SecurityProtocol: EnclosedInStringProperty
/// The name of the connection.
ConnectionName: EnclosedInStringProperty
/// The topic name as specified in Apache Kafka. You must specify at least one of "topicName"
, "assign"
or "subscribePattern"
.
TopicName: EnclosedInStringProperty
/// The specific TopicPartitions
to consume. You must specify at least one of "topicName"
, "assign"
or "subscribePattern"
.
Assign: EnclosedInStringProperty
/// A Java regex string that identifies the topic list to subscribe to. You must specify at least one of "topicName"
, "assign"
or "subscribePattern"
.
SubscribePattern: EnclosedInStringProperty
/// An optional classification.
Classification: EnclosedInStringProperty
/// Specifies the delimiter character.
Delimiter: EnclosedInStringProperty
/// The starting position in the Kafka topic to read data from. The possible values are "earliest"
or "latest"
. The default value is "latest"
.
StartingOffsets: EnclosedInStringProperty
/// The end point when a batch query is ended. Possible values are either "latest"
or a JSON string that specifies an ending offset for each TopicPartition
.
EndingOffsets: EnclosedInStringProperty
/// The timeout in milliseconds to poll data from Kafka in Spark job executors. The default value is 512
.
PollTimeoutMs: BoxedNonNegativeLong
/// The number of times to retry before failing to fetch Kafka offsets. The default value is 3
.
NumRetries: BoxedNonNegativeInt
/// The time in milliseconds to wait before retrying to fetch Kafka offsets. The default value is 10
.
RetryIntervalMs: BoxedNonNegativeLong
/// The rate limit on the maximum number of offsets that are processed per trigger interval. The specified total number of offsets is proportionally split across topicPartitions
of different volumes. The default value is null, which means that the consumer reads all offsets until the known latest offset.
MaxOffsetsPerTrigger: BoxedNonNegativeLong
/// The desired minimum number of partitions to read from Kafka. The default value is null, which means that the number of spark partitions is equal to the number of Kafka partitions.
MinPartitions: BoxedNonNegativeInt
/// Whether to include the Kafka headers. When the option is set to "true", the data output will contain an additional column named "glue_streaming_kafka_headers"
/// with type Array[Struct(key: String, value: String)]
. The default value is "false".
/// This option is available in Glue version 3.0 or later only.
IncludeHeaders: BoxedBoolean
/// When this option is set to 'true', the data output will contain an additional column named "__src_timestamp" that indicates the time when the corresponding record received by the topic. The default value is 'false'. This option is supported in Glue version 4.0 or later.
AddRecordTimestamp: EnclosedInStringProperty
/// When this option is set to 'true', for each batch, it will emit the metrics for the duration between the oldest record received by the topic and the time it arrives in Glue to CloudWatch. The metric's name is "glue.driver.streaming.maxConsumerLagInMs". The default value is 'false'. This option is supported in Glue version 4.0 or later.
EmitConsumerLagMetrics: EnclosedInStringProperty
/// The timestamp of the record in the Kafka topic to start reading data from. The possible values are a timestamp string in UTC format of the pattern yyyy-mm-ddTHH:MM:SSZ
(where Z represents a UTC timezone offset with a +/-. For example: "2023-04-04T08:00:00+08:00").
/// Only one of StartingTimestamp
or StartingOffsets
must be set.
StartingTimestamp: Iso8601DateTime
}
/// A partition key pair consisting of a name and a type.
structure KeySchemaElement {
/// The name of a partition key.
@required
Name: NameString
/// The type of a partition key.
@required
Type: ColumnTypeString
}
/// Additional options for the Amazon Kinesis streaming data source.
structure KinesisStreamingSourceOptions {
/// The URL of the Kinesis endpoint.
EndpointUrl: EnclosedInStringProperty
/// The name of the Kinesis data stream.
StreamName: EnclosedInStringProperty
/// An optional classification.
Classification: EnclosedInStringProperty
/// Specifies the delimiter character.
Delimiter: EnclosedInStringProperty
/// The starting position in the Kinesis data stream to read data from. The possible values are "latest"
, "trim_horizon"
, "earliest"
, or a timestamp string in UTC format in the pattern yyyy-mm-ddTHH:MM:SSZ
(where Z
represents a UTC timezone offset with a +/-. For example: "2023-04-04T08:00:00-04:00"). The default value is "latest"
.
/// Note: Using a value that is a timestamp string in UTC format for "startingPosition" is supported only for Glue version 4.0 or later.
StartingPosition: StartingPosition
/// The maximum time spent in the job executor to fetch a record from the Kinesis data stream per shard, specified in milliseconds (ms). The default value is 1000
.
MaxFetchTimeInMs: BoxedNonNegativeLong
/// The maximum number of records to fetch per shard in the Kinesis data stream. The default value is 100000
.
MaxFetchRecordsPerShard: BoxedNonNegativeLong
/// The maximum number of records to fetch from the Kinesis data stream in each getRecords operation. The default value is 10000
.
MaxRecordPerRead: BoxedNonNegativeLong
/// Adds a time delay between two consecutive getRecords operations. The default value is "False"
. This option is only configurable for Glue version 2.0 and above.
AddIdleTimeBetweenReads: BoxedBoolean
/// The minimum time delay between two consecutive getRecords operations, specified in ms. The default value is 1000
. This option is only configurable for Glue version 2.0 and above.
IdleTimeBetweenReadsInMs: BoxedNonNegativeLong
/// The minimum time interval between two ListShards API calls for your script to consider resharding. The default value is 1s
.
DescribeShardInterval: BoxedNonNegativeLong
/// The maximum number of retries for Kinesis Data Streams API requests. The default value is 3
.
NumRetries: BoxedNonNegativeInt
/// The cool-off time period (specified in ms) before retrying the Kinesis Data Streams API call. The default value is 1000
.
RetryIntervalMs: BoxedNonNegativeLong
/// The maximum cool-off time period (specified in ms) between two retries of a Kinesis Data Streams API call. The default value is 10000
.
MaxRetryIntervalMs: BoxedNonNegativeLong
/// Avoids creating an empty microbatch job by checking for unread data in the Kinesis data stream before the batch is started. The default value is "False"
.
AvoidEmptyBatches: BoxedBoolean
/// The Amazon Resource Name (ARN) of the Kinesis data stream.
StreamArn: EnclosedInStringProperty
/// The Amazon Resource Name (ARN) of the role to assume using AWS Security Token Service (AWS STS). This role must have permissions for describe or read record operations for the Kinesis data stream. You must use this parameter when accessing a data stream in a different account. Used in conjunction with "awsSTSSessionName"
.
RoleArn: EnclosedInStringProperty
/// An identifier for the session assuming the role using AWS STS. You must use this parameter when accessing a data stream in a different account. Used in conjunction with "awsSTSRoleARN"
.
RoleSessionName: EnclosedInStringProperty
/// When this option is set to 'true', the data output will contain an additional column named "__src_timestamp" that indicates the time when the corresponding record received by the stream. The default value is 'false'. This option is supported in Glue version 4.0 or later.
AddRecordTimestamp: EnclosedInStringProperty
/// When this option is set to 'true', for each batch, it will emit the metrics for the duration between the oldest record received by the stream and the time it arrives in Glue to CloudWatch. The metric's name is "glue.driver.streaming.maxConsumerLagInMs". The default value is 'false'. This option is supported in Glue version 4.0 or later.
EmitConsumerLagMetrics: EnclosedInStringProperty
/// The timestamp of the record in the Kinesis data stream to start reading data from. The possible values are a timestamp string in UTC format of the pattern yyyy-mm-ddTHH:MM:SSZ
(where Z represents a UTC timezone offset with a +/-. For example: "2023-04-04T08:00:00+08:00").
StartingTimestamp: Iso8601DateTime
}
/// Specifies configuration properties for a labeling set generation task run.
structure LabelingSetGenerationTaskRunProperties {
/// The Amazon Simple Storage Service (Amazon S3) path where you will generate the labeling
/// set.
OutputS3Path: UriString
}
/// Specifies Lake Formation configuration settings for the crawler.
structure LakeFormationConfiguration {
/// Specifies whether to use Lake Formation credentials for the crawler instead of the IAM role credentials.
UseLakeFormationCredentials: NullableBoolean
/// Required for cross account crawls. For same account crawls as the target data, this can be left as null.
AccountId: AccountId
}
/// When there are multiple versions of a blueprint and the latest version has some errors, this attribute indicates the last successful blueprint definition that is available with the service.
structure LastActiveDefinition {
/// The description of the blueprint.
Description: Generic512CharString
/// The date and time the blueprint was last modified.
LastModifiedOn: TimestampValue
/// A JSON string specifying the parameters for the blueprint.
ParameterSpec: BlueprintParameterSpec
/// Specifies a path in Amazon S3 where the blueprint is published by the Glue developer.
BlueprintLocation: GenericString
/// Specifies a path in Amazon S3 where the blueprint is copied when you create or update the blueprint.
BlueprintServiceLocation: GenericString
}
/// Status and error information about the most recent crawl.
structure LastCrawlInfo {
/// Status of the last crawl.
Status: LastCrawlStatus
/// If an error occurred, the error information about the last crawl.
ErrorMessage: DescriptionString
/// The log group for the last crawl.
LogGroup: LogGroup
/// The log stream for the last crawl.
LogStream: LogStream
/// The prefix for a message about this crawl.
MessagePrefix: MessagePrefix
/// The time at which the crawl started.
StartTime: Timestamp
}
/// Specifies data lineage configuration settings for the crawler.
structure LineageConfiguration {
/// Specifies whether data lineage is enabled for the crawler. Valid values are:
///
/// -
///
ENABLE: enables data lineage for the crawler
///
/// -
///
DISABLE: disables data lineage for the crawler
///
///
CrawlerLineageSettings: CrawlerLineageSettings
}
@input
structure ListBlueprintsRequest {
/// A continuation token, if this is a continuation request.
NextToken: GenericString
/// The maximum size of a list to return.
MaxResults: PageSize
/// Filters the list by an Amazon Web Services resource tag.
Tags: TagsMap
}
@output
structure ListBlueprintsResponse {
/// List of names of blueprints in the account.
Blueprints: BlueprintNames
/// A continuation token, if not all blueprint names have been returned.
NextToken: GenericString
}
@input
structure ListCrawlersRequest {
/// The maximum size of a list to return.
MaxResults: PageSize
/// A continuation token, if this is a continuation request.
NextToken: Token
/// Specifies to return only these tagged resources.
Tags: TagsMap
}
@output
structure ListCrawlersResponse {
/// The names of all crawlers in the account, or the crawlers with the specified tags.
CrawlerNames: CrawlerNameList
/// A continuation token, if the returned list does not contain the
/// last metric available.
NextToken: Token
}
@input
structure ListCrawlsRequest {
/// The name of the crawler whose runs you want to retrieve.
@required
CrawlerName: NameString
/// The maximum number of results to return. The default is 20, and maximum is 100.
MaxResults: PageSize
/// Filters the crawls by the criteria you specify in a list of CrawlsFilter
objects.
Filters: CrawlsFilterList
/// A continuation token, if this is a continuation call.
NextToken: Token
}
@output
structure ListCrawlsResponse {
/// A list of CrawlerHistory
objects representing the crawl runs that meet your criteria.
Crawls: CrawlerHistoryList
/// A continuation token for paginating the returned list of tokens, returned if the current segment of the list is not the last.
NextToken: Token
}
@input
structure ListCustomEntityTypesRequest {
/// A paginated token to offset the results.
NextToken: PaginationToken
/// The maximum number of results to return.
MaxResults: PageSize
/// A list of key-value pair tags.
Tags: TagsMap
}
@output
structure ListCustomEntityTypesResponse {
/// A list of CustomEntityType
objects representing custom patterns.
CustomEntityTypes: CustomEntityTypes
/// A pagination token, if more results are available.
NextToken: PaginationToken
}
@input
structure ListDataQualityResultsRequest {
/// The filter criteria.
Filter: DataQualityResultFilterCriteria
/// A paginated token to offset the results.
NextToken: PaginationToken
/// The maximum number of results to return.
MaxResults: PageSize
}
@output
structure ListDataQualityResultsResponse {
/// A list of DataQualityResultDescription
objects.
@required
Results: DataQualityResultDescriptionList
/// A pagination token, if more results are available.
NextToken: PaginationToken
}
@input
structure ListDataQualityRuleRecommendationRunsRequest {
/// The filter criteria.
Filter: DataQualityRuleRecommendationRunFilter
/// A paginated token to offset the results.
NextToken: PaginationToken
/// The maximum number of results to return.
MaxResults: PageSize
}
@output
structure ListDataQualityRuleRecommendationRunsResponse {
/// A list of DataQualityRuleRecommendationRunDescription
objects.
Runs: DataQualityRuleRecommendationRunList
/// A pagination token, if more results are available.
NextToken: PaginationToken
}
@input
structure ListDataQualityRulesetEvaluationRunsRequest {
/// The filter criteria.
Filter: DataQualityRulesetEvaluationRunFilter
/// A paginated token to offset the results.
NextToken: PaginationToken
/// The maximum number of results to return.
MaxResults: PageSize
}
@output
structure ListDataQualityRulesetEvaluationRunsResponse {
/// A list of DataQualityRulesetEvaluationRunDescription
objects representing data quality ruleset runs.
Runs: DataQualityRulesetEvaluationRunList
/// A pagination token, if more results are available.
NextToken: PaginationToken
}
@input
structure ListDataQualityRulesetsRequest {
/// A paginated token to offset the results.
NextToken: PaginationToken
/// The maximum number of results to return.
MaxResults: PageSize
/// The filter criteria.
Filter: DataQualityRulesetFilterCriteria
/// A list of key-value pair tags.
Tags: TagsMap
}
@output
structure ListDataQualityRulesetsResponse {
/// A paginated list of rulesets for the specified list of Glue tables.
Rulesets: DataQualityRulesetList
/// A pagination token, if more results are available.
NextToken: PaginationToken
}
@input
structure ListDevEndpointsRequest {
/// A continuation token, if this is a continuation request.
NextToken: GenericString
/// The maximum size of a list to return.
MaxResults: PageSize
/// Specifies to return only these tagged resources.
Tags: TagsMap
}
@output
structure ListDevEndpointsResponse {
/// The names of all the DevEndpoint
s in the account, or the
/// DevEndpoint
s with the specified tags.
DevEndpointNames: DevEndpointNameList
/// A continuation token, if the returned list does not contain the
/// last metric available.
NextToken: GenericString
}
@input
structure ListJobsRequest {
/// A continuation token, if this is a continuation request.
NextToken: GenericString
/// The maximum size of a list to return.
MaxResults: PageSize
/// Specifies to return only these tagged resources.
Tags: TagsMap
}
@output
structure ListJobsResponse {
/// The names of all jobs in the account, or the jobs with the specified tags.
JobNames: JobNameList
/// A continuation token, if the returned list does not contain the
/// last metric available.
NextToken: GenericString
}
@input
structure ListMLTransformsRequest {
/// A continuation token, if this is a continuation request.
NextToken: PaginationToken
/// The maximum size of a list to return.
MaxResults: PageSize
/// A TransformFilterCriteria
used to filter the machine learning transforms.
Filter: TransformFilterCriteria
/// A TransformSortCriteria
used to sort the machine learning transforms.
Sort: TransformSortCriteria
/// Specifies to return only these tagged resources.
Tags: TagsMap
}
@output
structure ListMLTransformsResponse {
/// The identifiers of all the machine learning transforms in the account, or the
/// machine learning transforms with the specified tags.
@required
TransformIds: TransformIdList
/// A continuation token, if the returned list does not contain the
/// last metric available.
NextToken: PaginationToken
}
@output
structure ListRegistriesResponse {
/// An array of RegistryDetailedListItem
objects containing minimal details of each registry.
Registries: RegistryListDefinition
/// A continuation token for paginating the returned list of tokens, returned if the current segment of the list is not the last.
NextToken: SchemaRegistryTokenString
}
@output
structure ListSchemasResponse {
/// An array of SchemaListItem
objects containing details of each schema.
Schemas: SchemaListDefinition
/// A continuation token for paginating the returned list of tokens, returned if the current segment of the list is not the last.
NextToken: SchemaRegistryTokenString
}
@output
structure ListSchemaVersionsResponse {
/// An array of SchemaVersionList
objects containing details of each schema version.
Schemas: SchemaVersionList
/// A continuation token for paginating the returned list of tokens, returned if the current segment of the list is not the last.
NextToken: SchemaRegistryTokenString
}
@input
structure ListSessionsRequest {
/// The token for the next set of results, or null if there are no more result.
NextToken: OrchestrationToken
/// The maximum number of results.
MaxResults: PageSize
/// Tags belonging to the session.
Tags: TagsMap
/// The origin of the request.
RequestOrigin: OrchestrationNameString
}
@output
structure ListSessionsResponse {
/// Returns the ID of the session.
Ids: SessionIdList
/// Returns the session object.
Sessions: SessionList
/// The token for the next set of results, or null if there are no more result.
NextToken: OrchestrationToken
}
@input
structure ListStatementsRequest {
/// The Session ID of the statements.
@required
SessionId: NameString
/// The origin of the request to list statements.
RequestOrigin: OrchestrationNameString
/// A continuation token, if this is a continuation call.
NextToken: OrchestrationToken
}
@output
structure ListStatementsResponse {
/// Returns the list of statements.
Statements: StatementList
/// A continuation token, if not all statements have yet been returned.
NextToken: OrchestrationToken
}
@input
structure ListTriggersRequest {
/// A continuation token, if this is a continuation request.
NextToken: GenericString
/// The name of the job for which to retrieve triggers. The trigger that can start this job
/// is returned. If there is no such trigger, all triggers are returned.
DependentJobName: NameString
/// The maximum size of a list to return.
MaxResults: PageSize
/// Specifies to return only these tagged resources.
Tags: TagsMap
}
@output
structure ListTriggersResponse {
/// The names of all triggers in the account, or the triggers with the specified tags.
TriggerNames: TriggerNameList
/// A continuation token, if the returned list does not contain the
/// last metric available.
NextToken: GenericString
}
@input
structure ListWorkflowsRequest {
/// A continuation token, if this is a continuation request.
NextToken: GenericString
/// The maximum size of a list to return.
MaxResults: PageSize
}
@output
structure ListWorkflowsResponse {
/// List of names of workflows in the account.
Workflows: WorkflowNames
/// A continuation token, if not all workflow names have been returned.
NextToken: GenericString
}
/// The location of resources.
structure Location {
/// A JDBC location.
Jdbc: CodeGenNodeArgs
/// An Amazon Simple Storage Service (Amazon S3) location.
S3: CodeGenNodeArgs
/// An Amazon DynamoDB table location.
DynamoDB: CodeGenNodeArgs
}
/// Defines column statistics supported for integer data columns.
structure LongColumnStatisticsData {
/// The lowest value in the column.
MinimumValue: Long = 0
/// The highest value in the column.
MaximumValue: Long = 0
/// The number of null values in the column.
@required
NumberOfNulls: NonNegativeLong = 0
/// The number of distinct values in a column.
@required
NumberOfDistinctValues: NonNegativeLong = 0
}
/// Specifies the mapping of data property keys.
structure Mapping {
/// After the apply mapping, what the name of the column should be. Can be the same as FromPath
.
ToKey: EnclosedInStringProperty
/// The table or column to be modified.
FromPath: EnclosedInStringProperties
/// The type of the data to be modified.
FromType: EnclosedInStringProperty
/// The data type that the data is to be modified to.
ToType: EnclosedInStringProperty
/// If true, then the column is removed.
Dropped: BoxedBoolean
/// Only applicable to nested data structures. If you want to change the parent structure, but also one of its children, you can fill out this data strucutre. It is also Mapping
, but its FromPath
will be the parent's FromPath
plus the FromPath
from this structure.
/// For the children part, suppose you have the structure:
///
/// {
/// "FromPath": "OuterStructure",
/// "ToKey": "OuterStructure",
/// "ToType": "Struct",
/// "Dropped": false,
/// "Chidlren": [{
/// "FromPath": "inner",
/// "ToKey": "inner",
/// "ToType": "Double",
/// "Dropped": false,
/// }]
/// }
///
/// You can specify a Mapping
that looks like:
///
/// {
/// "FromPath": "OuterStructure",
/// "ToKey": "OuterStructure",
/// "ToType": "Struct",
/// "Dropped": false,
/// "Chidlren": [{
/// "FromPath": "inner",
/// "ToKey": "inner",
/// "ToType": "Double",
/// "Dropped": false,
/// }]
/// }
///
Children: Mappings
}
/// Defines a mapping.
structure MappingEntry {
/// The name of the source table.
SourceTable: TableName
/// The source path.
SourcePath: SchemaPathString
/// The source type.
SourceType: FieldType
/// The target table.
TargetTable: TableName
/// The target path.
TargetPath: SchemaPathString
/// The target type.
TargetType: FieldType
}
/// Specifies a transform that merges a DynamicFrame
with a staging DynamicFrame
based on the specified primary keys to identify records. Duplicate records (records with the same primary keys) are not de-duplicated.
structure Merge {
/// The name of the transform node.
@required
Name: NodeName
/// The data inputs identified by their node names.
@required
Inputs: TwoInputs
/// The source DynamicFrame
that will be merged with a staging DynamicFrame
.
@required
Source: NodeId
/// The list of primary key fields to match records from the source and staging dynamic frames.
@required
PrimaryKeys: GlueStudioPathList
}
/// A structure containing metadata information for a schema version.
structure MetadataInfo {
/// The metadata key’s corresponding value.
MetadataValue: MetadataValueString
/// The time at which the entry was created.
CreatedTime: CreatedTimestamp
/// Other metadata belonging to the same metadata key.
OtherMetadataValueList: OtherMetadataValueList
}
/// A structure containing a key value pair for metadata.
structure MetadataKeyValuePair {
/// A metadata key.
MetadataKey: MetadataKeyString
/// A metadata key’s corresponding value.
MetadataValue: MetadataValueString
}
/// Specifies a Microsoft SQL server data source in the Glue Data Catalog.
structure MicrosoftSQLServerCatalogSource {
/// The name of the data source.
@required
Name: NodeName
/// The name of the database to read from.
@required
Database: EnclosedInStringProperty
/// The name of the table in the database to read from.
@required
Table: EnclosedInStringProperty
}
/// Specifies a target that uses Microsoft SQL.
structure MicrosoftSQLServerCatalogTarget {
/// The name of the data target.
@required
Name: NodeName
/// The nodes that are inputs to the data target.
@required
Inputs: OneInput
/// The name of the database to write to.
@required
Database: EnclosedInStringProperty
/// The name of the table in the database to write to.
@required
Table: EnclosedInStringProperty
}
/// A structure for a machine learning transform.
structure MLTransform {
/// The unique transform ID that is generated for the machine learning transform. The ID is
/// guaranteed to be unique and does not change.
TransformId: HashString
/// A user-defined name for the machine learning transform. Names are not guaranteed unique
/// and can be changed at any time.
Name: NameString
/// A user-defined, long-form description text for the machine learning transform.
/// Descriptions are not guaranteed to be unique and can be changed at any time.
Description: DescriptionString
/// The current status of the machine learning transform.
Status: TransformStatusType
/// A timestamp. The time and date that this machine learning transform was created.
CreatedOn: Timestamp
/// A timestamp. The last point in time when this machine learning transform was modified.
LastModifiedOn: Timestamp
/// A list of Glue table definitions used by the transform.
InputRecordTables: GlueTables
/// A TransformParameters
object. You can use parameters to tune (customize) the
/// behavior of the machine learning transform by specifying what data it learns from and your
/// preference on various tradeoffs (such as precious vs. recall, or accuracy vs. cost).
Parameters: TransformParameters
/// An EvaluationMetrics
object. Evaluation metrics provide an estimate of the quality of your machine learning transform.
EvaluationMetrics: EvaluationMetrics
/// A count identifier for the labeling files generated by Glue for this transform. As you create a better transform, you can iteratively download, label, and upload the labeling file.
LabelCount: LabelCount = 0
/// A map of key-value pairs representing the columns and data types that this transform can
/// run against. Has an upper bound of 100 columns.
Schema: TransformSchema
/// The name or Amazon Resource Name (ARN) of the IAM role with the required permissions. The required permissions include both Glue service role permissions to Glue resources, and Amazon S3 permissions required by the transform.
///
/// -
///
This role needs Glue service role permissions to allow access to resources in Glue. See Attach a Policy to IAM Users That Access Glue.
///
/// -
///
This role needs permission to your Amazon Simple Storage Service (Amazon S3) sources, targets, temporary directory, scripts, and any libraries used by the task run for this transform.
///
///
Role: RoleString
/// This value determines which version of Glue this machine learning transform is compatible with. Glue 1.0 is recommended for most customers. If the value is not set, the Glue compatibility defaults to Glue 0.9. For more information, see Glue Versions in the developer guide.
GlueVersion: GlueVersionString
/// The number of Glue data processing units (DPUs) that are allocated to task runs for this transform. You can allocate from 2 to 100 DPUs; the default is 10. A DPU is a relative measure of
/// processing power that consists of 4 vCPUs of compute capacity and 16 GB of memory. For more
/// information, see the Glue pricing
/// page.
///
/// MaxCapacity
is a mutually exclusive option with NumberOfWorkers
and WorkerType
.
///
/// -
///
If either NumberOfWorkers
or WorkerType
is set, then MaxCapacity
cannot be set.
///
/// -
///
If MaxCapacity
is set then neither NumberOfWorkers
or WorkerType
can be set.
///
/// -
///
If WorkerType
is set, then NumberOfWorkers
is required (and vice versa).
///
/// -
///
/// MaxCapacity
and NumberOfWorkers
must both be at least 1.
///
///
/// When the WorkerType
field is set to a value other than Standard
, the MaxCapacity
field is set automatically and becomes read-only.
MaxCapacity: NullableDouble
/// The type of predefined worker that is allocated when a task of this transform runs. Accepts a value of Standard, G.1X, or G.2X.
///
/// -
///
For the Standard
worker type, each worker provides 4 vCPU, 16 GB of memory and a 50GB disk, and 2 executors per worker.
///
/// -
///
For the G.1X
worker type, each worker provides 4 vCPU, 16 GB of memory and a 64GB disk, and 1 executor per worker.
///
/// -
///
For the G.2X
worker type, each worker provides 8 vCPU, 32 GB of memory and a 128GB disk, and 1 executor per worker.
///
///
///
/// MaxCapacity
is a mutually exclusive option with NumberOfWorkers
and WorkerType
.
///
/// -
///
If either NumberOfWorkers
or WorkerType
is set, then MaxCapacity
cannot be set.
///
/// -
///
If MaxCapacity
is set then neither NumberOfWorkers
or WorkerType
can be set.
///
/// -
///
If WorkerType
is set, then NumberOfWorkers
is required (and vice versa).
///
/// -
///
/// MaxCapacity
and NumberOfWorkers
must both be at least 1.
///
///
WorkerType: WorkerType
/// The number of workers of a defined workerType
that are allocated when a task of the transform runs.
/// If WorkerType
is set, then NumberOfWorkers
is required (and vice versa).
NumberOfWorkers: NullableInteger
/// The timeout in minutes of the machine learning transform.
Timeout: Timeout
/// The maximum number of times to retry after an MLTaskRun
of the machine
/// learning transform fails.
MaxRetries: NullableInteger
/// The encryption-at-rest settings of the transform that apply to accessing user data. Machine learning transforms can access user data encrypted in Amazon S3 using KMS.
TransformEncryption: TransformEncryption
}
/// The machine learning transform is not ready to run.
@error("client")
structure MLTransformNotReadyException {
/// A message describing the problem.
Message: MessageString
}
/// The encryption-at-rest settings of the transform that apply to accessing user data.
structure MLUserDataEncryption {
/// The encryption mode applied to user data. Valid values are:
///
/// -
///
DISABLED: encryption is disabled
///
/// -
///
SSEKMS: use of server-side encryption with Key Management Service (SSE-KMS) for user data stored in Amazon S3.
///
///
@required
MlUserDataEncryptionMode: MLUserDataEncryptionModeString
/// The ID for the customer-provided KMS key.
KmsKeyId: NameString
}
/// Specifies an Amazon DocumentDB or MongoDB data store to crawl.
structure MongoDBTarget {
/// The name of the connection to use to connect to the Amazon DocumentDB or MongoDB target.
ConnectionName: ConnectionName
/// The path of the Amazon DocumentDB or MongoDB target (database/collection).
Path: Path
/// Indicates whether to scan all the records, or to sample rows from the table. Scanning all the records can take a long time when the table is not a high throughput table.
/// A value of true
means to scan all records, while a value of false
means to sample the records. If no value is specified, the value defaults to true
.
ScanAll: NullableBoolean
}
/// Specifies a MySQL data source in the Glue Data Catalog.
structure MySQLCatalogSource {
/// The name of the data source.
@required
Name: NodeName
/// The name of the database to read from.
@required
Database: EnclosedInStringProperty
/// The name of the table in the database to read from.
@required
Table: EnclosedInStringProperty
}
/// Specifies a target that uses MySQL.
structure MySQLCatalogTarget {
/// The name of the data target.
@required
Name: NodeName
/// The nodes that are inputs to the data target.
@required
Inputs: OneInput
/// The name of the database to write to.
@required
Database: EnclosedInStringProperty
/// The name of the table in the database to write to.
@required
Table: EnclosedInStringProperty
}
/// A node represents an Glue component (trigger, crawler, or job) on a workflow graph.
structure Node {
/// The type of Glue component represented by the node.
Type: NodeType
/// The name of the Glue component represented by the node.
Name: NameString
/// The unique Id assigned to the node within the workflow.
UniqueId: NameString
/// Details of the Trigger when the node represents a Trigger.
TriggerDetails: TriggerNodeDetails
/// Details of the Job when the node represents a Job.
JobDetails: JobNodeDetails
/// Details of the crawler when the node represents a crawler.
CrawlerDetails: CrawlerNodeDetails
}
/// There is no applicable schedule.
@error("client")
structure NoScheduleException {
/// A message describing the problem.
Message: MessageString
}
/// Specifies configuration properties of a notification.
structure NotificationProperty {
/// After a job run starts, the number of minutes to wait before
/// sending a job run delay notification.
NotifyDelayAfter: NotifyDelayAfter
}
/// Represents whether certain values are recognized as null values for removal.
structure NullCheckBoxList {
/// Specifies that an empty string is considered as a null value.
IsEmpty: BoxedBoolean
/// Specifies that a value spelling out the word 'null' is considered as a null value.
IsNullString: BoxedBoolean
/// Specifies that an integer value of -1 is considered as a null value.
IsNegOne: BoxedBoolean
}
/// Represents a custom null value such as a zeros or other value being used as a null placeholder unique to the dataset.
structure NullValueField {
/// The value of the null placeholder.
@required
Value: EnclosedInStringProperty
/// The datatype of the value.
@required
Datatype: Datatype
}
/// A structure representing an open format table.
structure OpenTableFormatInput {
/// Specifies an IcebergInput
structure that defines an Apache Iceberg metadata table.
IcebergInput: IcebergInput
}
/// The operation timed out.
@error("client")
structure OperationTimeoutException {
/// A message describing the problem.
Message: MessageString
}
/// Specifies an option value.
structure Option {
/// Specifies the value of the option.
Value: EnclosedInStringProperty
/// Specifies the label of the option.
Label: EnclosedInStringProperty
/// Specifies the description of the option.
Description: EnclosedInStringProperty
}
/// Specifies an Oracle data source in the Glue Data Catalog.
structure OracleSQLCatalogSource {
/// The name of the data source.
@required
Name: NodeName
/// The name of the database to read from.
@required
Database: EnclosedInStringProperty
/// The name of the table in the database to read from.
@required
Table: EnclosedInStringProperty
}
/// Specifies a target that uses Oracle SQL.
structure OracleSQLCatalogTarget {
/// The name of the data target.
@required
Name: NodeName
/// The nodes that are inputs to the data target.
@required
Inputs: OneInput
/// The name of the database to write to.
@required
Database: EnclosedInStringProperty
/// The name of the table in the database to write to.
@required
Table: EnclosedInStringProperty
}
/// Specifies the sort order of a sorted column.
structure Order {
/// The name of the column.
@required
Column: NameString
/// Indicates that the column is sorted in ascending order
/// (== 1
), or in descending order (==0
).
@required
SortOrder: IntegerFlag = 0
}
/// A structure containing other metadata for a schema version belonging to the same metadata key.
structure OtherMetadataValueListItem {
/// The metadata key’s corresponding value for the other metadata belonging to the same metadata key.
MetadataValue: MetadataValueString
/// The time at which the entry was created.
CreatedTime: CreatedTimestamp
}
/// Represents a slice of table data.
structure Partition {
/// The values of the partition.
Values: ValueStringList
/// The name of the catalog database in which to create the partition.
DatabaseName: NameString
/// The name of the database table in which to create the partition.
TableName: NameString
/// The time at which the partition was created.
CreationTime: Timestamp
/// The last time at which the partition was accessed.
LastAccessTime: Timestamp
/// Provides information about the physical
/// location where the partition is stored.
StorageDescriptor: StorageDescriptor
/// These key-value pairs define partition parameters.
Parameters: ParametersMap
/// The last time at which column statistics were computed for this
/// partition.
LastAnalyzedTime: Timestamp
/// The ID of the Data Catalog in which the partition resides.
CatalogId: CatalogIdString
}
/// Contains information about a partition error.
structure PartitionError {
/// The values that define the partition.
PartitionValues: ValueStringList
/// The details about the partition error.
ErrorDetail: ErrorDetail
}
/// A structure for a partition index.
structure PartitionIndex {
/// The keys for the partition index.
@required
Keys: KeyList
/// The name of the partition index.
@required
IndexName: NameString
}
/// A descriptor for a partition index in a table.
structure PartitionIndexDescriptor {
/// The name of the partition index.
@required
IndexName: NameString
/// A list of one or more keys, as KeySchemaElement
structures, for the partition index.
@required
Keys: KeySchemaElementList
/// The status of the partition index.
/// The possible statuses are:
///
/// -
///
CREATING: The index is being created. When an index is in a CREATING state, the index or its table cannot be deleted.
///
/// -
///
ACTIVE: The index creation succeeds.
///
/// -
///
FAILED: The index creation fails.
///
/// -
///
DELETING: The index is deleted from the list of indexes.
///
///
@required
IndexStatus: PartitionIndexStatus
/// A list of errors that can occur when registering partition indexes for an existing table.
BackfillErrors: BackfillErrors
}
/// The structure used to create and update a partition.
structure PartitionInput {
/// The values of the partition. Although this parameter is not required by the SDK, you must specify this parameter for a valid input.
/// The values for the keys for the new partition must be passed as an array of String objects that must be ordered in the same order as the partition keys appearing in the Amazon S3 prefix. Otherwise Glue will add the values to the wrong keys.
Values: ValueStringList
/// The last time at which the partition was accessed.
LastAccessTime: Timestamp
/// Provides information about the physical
/// location where the partition is stored.
StorageDescriptor: StorageDescriptor
/// These key-value pairs define partition parameters.
Parameters: ParametersMap
/// The last time at which column statistics were computed for this partition.
LastAnalyzedTime: Timestamp
}
/// Contains a list of values defining partitions.
structure PartitionValueList {
/// The list of values.
@required
Values: ValueStringList
}
/// The operation timed out.
@error("client")
structure PermissionTypeMismatchException {
/// There is a mismatch between the SupportedPermissionType used in the query request
/// and the permissions defined on the target table.
Message: MessageString
}
/// Specifies the physical requirements for a connection.
structure PhysicalConnectionRequirements {
/// The subnet ID used by the connection.
SubnetId: NameString
/// The security group ID list used by the connection.
SecurityGroupIdList: SecurityGroupIdList
/// The connection's Availability Zone. This field is redundant because the specified subnet
/// implies the Availability Zone to be used. Currently the field must be populated, but it will
/// be deprecated in the future.
AvailabilityZone: NameString
}
/// Specifies a transform that identifies, removes or masks PII data.
structure PIIDetection {
/// The name of the transform node.
@required
Name: NodeName
/// The node ID inputs to the transform.
@required
Inputs: OneInput
/// Indicates the type of PIIDetection transform.
@required
PiiType: PiiType
/// Indicates the types of entities the PIIDetection transform will identify as PII data.
///
/// PII type entities include: PERSON_NAME, DATE, USA_SNN, EMAIL, USA_ITIN, USA_PASSPORT_NUMBER, PHONE_NUMBER, BANK_ACCOUNT,
/// IP_ADDRESS, MAC_ADDRESS, USA_CPT_CODE, USA_HCPCS_CODE, USA_NATIONAL_DRUG_CODE, USA_MEDICARE_BENEFICIARY_IDENTIFIER,
/// USA_HEALTH_INSURANCE_CLAIM_NUMBER,CREDIT_CARD,USA_NATIONAL_PROVIDER_IDENTIFIER,USA_DEA_NUMBER,USA_DRIVING_LICENSE
///
@required
EntityTypesToDetect: EnclosedInStringProperties
/// Indicates the output column name that will contain any entity type detected in that row.
OutputColumnName: EnclosedInStringProperty
/// Indicates the fraction of the data to sample when scanning for PII entities.
SampleFraction: BoxedDoubleFraction
/// Indicates the fraction of the data that must be met in order for a column to be identified as PII data.
ThresholdFraction: BoxedDoubleFraction
/// Indicates the value that will replace the detected entity.
MaskValue: MaskValue
}
/// Specifies a PostgresSQL data source in the Glue Data Catalog.
structure PostgreSQLCatalogSource {
/// The name of the data source.
@required
Name: NodeName
/// The name of the database to read from.
@required
Database: EnclosedInStringProperty
/// The name of the table in the database to read from.
@required
Table: EnclosedInStringProperty
}
/// Specifies a target that uses Postgres SQL.
structure PostgreSQLCatalogTarget {
/// The name of the data target.
@required
Name: NodeName
/// The nodes that are inputs to the data target.
@required
Inputs: OneInput
/// The name of the database to write to.
@required
Database: EnclosedInStringProperty
/// The name of the table in the database to write to.
@required
Table: EnclosedInStringProperty
}
/// A job run that was used in the predicate of a conditional trigger
/// that triggered this job run.
structure Predecessor {
/// The name of the job definition used by the predecessor job run.
JobName: NameString
/// The job-run ID of the predecessor job run.
RunId: IdString
}
/// Defines the predicate of the trigger, which determines when it fires.
structure Predicate {
/// An optional field if only one condition is listed. If multiple conditions are listed, then
/// this field is required.
Logical: Logical
/// A list of the conditions that determine when the trigger will fire.
Conditions: ConditionList
}
/// Permissions granted to a principal.
structure PrincipalPermissions {
/// The principal who is granted permissions.
Principal: DataLakePrincipal
/// The permissions that are granted to the principal.
Permissions: PermissionList
}
/// Defines a property predicate.
structure PropertyPredicate {
/// The key of the property.
Key: ValueString
/// The value of the property.
Value: ValueString
/// The comparator used to compare this property to others.
Comparator: Comparator
}
@input
structure PutDataCatalogEncryptionSettingsRequest {
/// The ID of the Data Catalog to set the security configuration for. If none is provided, the
/// Amazon Web Services account ID is used by default.
CatalogId: CatalogIdString
/// The security configuration to set.
@required
DataCatalogEncryptionSettings: DataCatalogEncryptionSettings
}
@output
structure PutDataCatalogEncryptionSettingsResponse {}
@input
structure PutResourcePolicyRequest {
/// Contains the policy document to set, in JSON format.
@required
PolicyInJson: PolicyJsonString
/// Do not use. For internal use only.
ResourceArn: GlueResourceArn
/// The hash value returned when the previous policy was set using
/// PutResourcePolicy
. Its purpose is to prevent concurrent modifications of a
/// policy. Do not use this parameter if no previous policy has been set.
PolicyHashCondition: HashString
/// A value of MUST_EXIST
is used to update a policy. A value of
/// NOT_EXIST
is used to create a new policy. If a value of NONE
or a
/// null value is used, the call does not depend on the existence of a policy.
PolicyExistsCondition: ExistCondition
/// If 'TRUE'
, indicates that you are using both methods to grant cross-account
/// access to Data Catalog resources:
///
/// -
///
By directly updating the resource policy with PutResourePolicy
///
///
/// -
///
By using the Grant permissions command on the Amazon Web Services Management Console.
///
///
/// Must be set to 'TRUE'
if you have already used the Management Console to
/// grant cross-account access, otherwise the call fails. Default is 'FALSE'.
EnableHybrid: EnableHybridValues
}
@output
structure PutResourcePolicyResponse {
/// A hash of the policy that has just been set. This must
/// be included in a subsequent call that overwrites or updates
/// this policy.
PolicyHash: HashString
}
@output
structure PutSchemaVersionMetadataResponse {
/// The Amazon Resource Name (ARN) for the schema.
SchemaArn: GlueResourceArn
/// The name for the schema.
SchemaName: SchemaRegistryNameString
/// The name for the registry.
RegistryName: SchemaRegistryNameString
/// The latest version of the schema.
LatestVersion: LatestSchemaVersionBoolean = false
/// The version number of the schema.
VersionNumber: VersionLongNumber = 0
/// The unique version ID of the schema version.
SchemaVersionId: SchemaVersionIdString
/// The metadata key.
MetadataKey: MetadataKeyString
/// The value of the metadata key.
MetadataValue: MetadataValueString
}
@input
structure PutWorkflowRunPropertiesRequest {
/// Name of the workflow which was run.
@required
Name: NameString
/// The ID of the workflow run for which the run properties should be updated.
@required
RunId: IdString
/// The properties to put for the specified run.
@required
RunProperties: WorkflowRunProperties
}
@output
structure PutWorkflowRunPropertiesResponse {}
@output
structure QuerySchemaVersionMetadataResponse {
/// A map of a metadata key and associated values.
MetadataInfoMap: MetadataInfoMap
/// The unique version ID of the schema version.
SchemaVersionId: SchemaVersionIdString
/// A continuation token for paginating the returned list of tokens, returned if the current segment of the list is not the last.
NextToken: SchemaRegistryTokenString
}
/// A Glue Studio node that uses a Glue DataBrew recipe in Glue jobs.
structure Recipe {
/// The name of the Glue Studio node.
@required
Name: NodeName
/// The nodes that are inputs to the recipe node, identified by id.
@required
Inputs: OneInput
/// A reference to the DataBrew recipe used by the node.
@required
RecipeReference: RecipeReference
}
/// A reference to a Glue DataBrew recipe.
structure RecipeReference {
/// The ARN of the DataBrew recipe.
@required
RecipeArn: EnclosedInStringProperty
/// The RecipeVersion of the DataBrew recipe.
@required
RecipeVersion: RecipeVersion
}
/// When crawling an Amazon S3 data source after the first crawl is complete, specifies whether to crawl the entire dataset again or to crawl only folders that were added since the last crawler run. For more information, see Incremental Crawls in Glue in the developer guide.
structure RecrawlPolicy {
/// Specifies whether to crawl the entire dataset again or to crawl only folders that were added since the last crawler run.
/// A value of CRAWL_EVERYTHING
specifies crawling the entire dataset again.
/// A value of CRAWL_NEW_FOLDERS_ONLY
specifies crawling only folders that were added since the last crawler run.
/// A value of CRAWL_EVENT_MODE
specifies crawling only the changes identified by Amazon S3 events.
RecrawlBehavior: RecrawlBehavior
}
/// Specifies an Amazon Redshift data store.
structure RedshiftSource {
/// The name of the Amazon Redshift data store.
@required
Name: NodeName
/// The database to read from.
@required
Database: EnclosedInStringProperty
/// The database table to read from.
@required
Table: EnclosedInStringProperty
/// The Amazon S3 path where temporary data can be staged when copying out of the database.
RedshiftTmpDir: EnclosedInStringProperty
/// The IAM role with permissions.
TmpDirIAMRole: EnclosedInStringProperty
}
/// Specifies a target that uses Amazon Redshift.
structure RedshiftTarget {
/// The name of the data target.
@required
Name: NodeName
/// The nodes that are inputs to the data target.
@required
Inputs: OneInput
/// The name of the database to write to.
@required
Database: EnclosedInStringProperty
/// The name of the table in the database to write to.
@required
Table: EnclosedInStringProperty
/// The Amazon S3 path where temporary data can be staged when copying out of the database.
RedshiftTmpDir: EnclosedInStringProperty
/// The IAM role with permissions.
TmpDirIAMRole: EnclosedInStringProperty
/// The set of options to configure an upsert operation when writing to a Redshift target.
UpsertRedshiftOptions: UpsertRedshiftTargetOptions
}
@output
structure RegisterSchemaVersionResponse {
/// The unique ID that represents the version of this schema.
SchemaVersionId: SchemaVersionIdString
/// The version of this schema (for sync flow only, in case this is the first version).
VersionNumber: VersionLongNumber = 0
/// The status of the schema version.
Status: SchemaVersionStatus
}
/// A wrapper structure that may contain the registry name and Amazon Resource Name (ARN).
structure RegistryId {
/// Name of the registry. Used only for lookup. One of RegistryArn
or RegistryName
has to be provided.
RegistryName: SchemaRegistryNameString
/// Arn of the registry to be updated. One of RegistryArn
or RegistryName
has to be provided.
RegistryArn: GlueResourceArn
}
/// A structure containing the details for a registry.
structure RegistryListItem {
/// The name of the registry.
RegistryName: SchemaRegistryNameString
/// The Amazon Resource Name (ARN) of the registry.
RegistryArn: GlueResourceArn
/// A description of the registry.
Description: DescriptionString
/// The status of the registry.
Status: RegistryStatus
/// The data the registry was created.
CreatedTime: CreatedTimestamp
/// The date the registry was updated.
UpdatedTime: UpdatedTimestamp
}
/// Specifies a Relational database data source in the Glue Data Catalog.
structure RelationalCatalogSource {
/// The name of the data source.
@required
Name: NodeName
/// The name of the database to read from.
@required
Database: EnclosedInStringProperty
/// The name of the table in the database to read from.
@required
Table: EnclosedInStringProperty
}
@output
structure RemoveSchemaVersionMetadataResponse {
/// The Amazon Resource Name (ARN) of the schema.
SchemaArn: GlueResourceArn
/// The name of the schema.
SchemaName: SchemaRegistryNameString
/// The name of the registry.
RegistryName: SchemaRegistryNameString
/// The latest version of the schema.
LatestVersion: LatestSchemaVersionBoolean = false
/// The version number of the schema.
VersionNumber: VersionLongNumber = 0
/// The version ID for the schema version.
SchemaVersionId: SchemaVersionIdString
/// The metadata key.
MetadataKey: MetadataKeyString
/// The value of the metadata key.
MetadataValue: MetadataValueString
}
/// Specifies a transform that renames a single data property key.
structure RenameField {
/// The name of the transform node.
@required
Name: NodeName
/// The data inputs identified by their node names.
@required
Inputs: OneInput
/// A JSON path to a variable in the data structure for the source data.
@required
SourcePath: EnclosedInStringProperties
/// A JSON path to a variable in the data structure for the target data.
@required
TargetPath: EnclosedInStringProperties
}
@input
structure ResetJobBookmarkRequest {
/// The name of the job in question.
@required
JobName: JobName
/// The unique run identifier associated with this job run.
RunId: RunId
}
@output
structure ResetJobBookmarkResponse {
/// The reset bookmark entry.
JobBookmarkEntry: JobBookmarkEntry
}
/// A resource was not ready for a transaction.
@error("client")
structure ResourceNotReadyException {
/// A message describing the problem.
Message: MessageString
}
/// A resource numerical limit was exceeded.
@error("client")
structure ResourceNumberLimitExceededException {
/// A message describing the problem.
Message: MessageString
}
/// The URIs for function resources.
structure ResourceUri {
/// The type of the resource.
ResourceType: ResourceType
/// The URI for accessing the resource.
Uri: URI
}
@input
structure ResumeWorkflowRunRequest {
/// The name of the workflow to resume.
@required
Name: NameString
/// The ID of the workflow run to resume.
@required
RunId: IdString
/// A list of the node IDs for the nodes you want to restart. The nodes that are to be restarted must have a run attempt in the original run.
@required
NodeIds: NodeIdList
}
@output
structure ResumeWorkflowRunResponse {
/// The new ID assigned to the resumed workflow run. Each resume of a workflow run will have a new run ID.
RunId: IdString
/// A list of the node IDs for the nodes that were actually restarted.
NodeIds: NodeIdList
}
@input
structure RunStatementRequest {
/// The Session Id of the statement to be run.
@required
SessionId: NameString
/// The statement code to be run.
@required
Code: OrchestrationStatementCodeString
/// The origin of the request.
RequestOrigin: OrchestrationNameString
}
@output
structure RunStatementResponse {
/// Returns the Id of the statement that was run.
Id: IntegerValue = 0
}
/// Specifies a Delta Lake data source that is registered in the Glue Data Catalog. The data source must be stored in Amazon S3.
structure S3CatalogDeltaSource {
/// The name of the Delta Lake data source.
@required
Name: NodeName
/// The name of the database to read from.
@required
Database: EnclosedInStringProperty
/// The name of the table in the database to read from.
@required
Table: EnclosedInStringProperty
/// Specifies additional connection options.
AdditionalDeltaOptions: AdditionalOptions
/// Specifies the data schema for the Delta Lake source.
OutputSchemas: GlueSchemas
}
/// Specifies a Hudi data source that is registered in the Glue Data Catalog. The Hudi data source must be stored in Amazon S3.
structure S3CatalogHudiSource {
/// The name of the Hudi data source.
@required
Name: NodeName
/// The name of the database to read from.
@required
Database: EnclosedInStringProperty
/// The name of the table in the database to read from.
@required
Table: EnclosedInStringProperty
/// Specifies additional connection options.
AdditionalHudiOptions: AdditionalOptions
/// Specifies the data schema for the Hudi source.
OutputSchemas: GlueSchemas
}
/// Specifies an Amazon S3 data store in the Glue Data Catalog.
structure S3CatalogSource {
/// The name of the data store.
@required
Name: NodeName
/// The database to read from.
@required
Database: EnclosedInStringProperty
/// The database table to read from.
@required
Table: EnclosedInStringProperty
/// Partitions satisfying this predicate are deleted. Files within the retention period in these partitions are not deleted. Set to ""
– empty by default.
PartitionPredicate: EnclosedInStringProperty
/// Specifies additional connection options.
AdditionalOptions: S3SourceAdditionalOptions
}
/// Specifies a data target that writes to Amazon S3 using the Glue Data Catalog.
structure S3CatalogTarget {
/// The name of the data target.
@required
Name: NodeName
/// The nodes that are inputs to the data target.
@required
Inputs: OneInput
/// Specifies native partitioning using a sequence of keys.
PartitionKeys: GlueStudioPathList
/// The name of the table in the database to write to.
@required
Table: EnclosedInStringProperty
/// The name of the database to write to.
@required
Database: EnclosedInStringProperty
/// A policy that specifies update behavior for the crawler.
SchemaChangePolicy: CatalogSchemaChangePolicy
}
/// Specifies a command-separated value (CSV) data store stored in Amazon S3.
structure S3CsvSource {
/// The name of the data store.
@required
Name: NodeName
/// A list of the Amazon S3 paths to read from.
@required
Paths: EnclosedInStringProperties
/// Specifies how the data is compressed. This is generally not necessary if the data has a standard file extension. Possible values are "gzip"
and "bzip"
).
CompressionType: CompressionType
/// A string containing a JSON list of Unix-style glob patterns to exclude. For example, "[\"**.pdf\"]" excludes all PDF files.
Exclusions: EnclosedInStringProperties
/// The target group size in bytes. The default is computed based on the input data size and the size of your cluster. When there are fewer than 50,000 input files, "groupFiles"
must be set to "inPartition"
for this to take effect.
GroupSize: EnclosedInStringProperty
/// Grouping files is turned on by default when the input contains more than 50,000 files. To turn on grouping with fewer than 50,000 files, set this parameter to "inPartition". To disable grouping when there are more than 50,000 files, set this parameter to "none"
.
GroupFiles: EnclosedInStringProperty
/// If set to true, recursively reads files in all subdirectories under the specified paths.
Recurse: BoxedBoolean
/// This option controls the duration in milliseconds after which the s3 listing is likely to be consistent. Files with modification timestamps falling within the last maxBand milliseconds are tracked specially when using JobBookmarks to account for Amazon S3 eventual consistency. Most users don't need to set this option. The default is 900000 milliseconds, or 15 minutes.
MaxBand: BoxedNonNegativeInt
/// This option specifies the maximum number of files to save from the last maxBand seconds. If this number is exceeded, extra files are skipped and only processed in the next job run.
MaxFilesInBand: BoxedNonNegativeInt
/// Specifies additional connection options.
AdditionalOptions: S3DirectSourceAdditionalOptions
/// Specifies the delimiter character. The default is a comma: ",", but any other character can be specified.
@required
Separator: Separator
/// Specifies a character to use for escaping. This option is used only when reading CSV files. The default value is none
. If enabled, the character which immediately follows is used as-is, except for a small set of well-known escapes (\n
, \r
, \t
, and \0
).
Escaper: EnclosedInStringPropertyWithQuote
/// Specifies the character to use for quoting. The default is a double quote: '"'
. Set this to -1
to turn off quoting entirely.
@required
QuoteChar: QuoteChar
/// A Boolean value that specifies whether a single record can span multiple lines. This can occur when a field contains a quoted new-line character. You must set this option to True if any record spans multiple lines. The default value is False
, which allows for more aggressive file-splitting during parsing.
Multiline: BoxedBoolean
/// A Boolean value that specifies whether to treat the first line as a header. The default value is False
.
WithHeader: BoxedBoolean
/// A Boolean value that specifies whether to write the header to output. The default value is True
.
WriteHeader: BoxedBoolean
/// A Boolean value that specifies whether to skip the first data line. The default value is False
.
SkipFirst: BoxedBoolean
/// A Boolean value that specifies whether to use the advanced SIMD CSV reader along with Apache Arrow based columnar memory formats. Only available in Glue version 3.0.
OptimizePerformance: BooleanValue = false
/// Specifies the data schema for the S3 CSV source.
OutputSchemas: GlueSchemas
}
/// Specifies a target that writes to a Delta Lake data source in the Glue Data Catalog.
structure S3DeltaCatalogTarget {
/// The name of the data target.
@required
Name: NodeName
/// The nodes that are inputs to the data target.
@required
Inputs: OneInput
/// Specifies native partitioning using a sequence of keys.
PartitionKeys: GlueStudioPathList
/// The name of the table in the database to write to.
@required
Table: EnclosedInStringProperty
/// The name of the database to write to.
@required
Database: EnclosedInStringProperty
/// Specifies additional connection options for the connector.
AdditionalOptions: AdditionalOptions
/// A policy that specifies update behavior for the crawler.
SchemaChangePolicy: CatalogSchemaChangePolicy
}
/// Specifies a target that writes to a Delta Lake data source in Amazon S3.
structure S3DeltaDirectTarget {
/// The name of the data target.
@required
Name: NodeName
/// The nodes that are inputs to the data target.
@required
Inputs: OneInput
/// Specifies native partitioning using a sequence of keys.
PartitionKeys: GlueStudioPathList
/// The Amazon S3 path of your Delta Lake data source to write to.
@required
Path: EnclosedInStringProperty
/// Specifies how the data is compressed. This is generally not necessary if the data has a standard file extension. Possible values are "gzip"
and "bzip"
).
@required
Compression: DeltaTargetCompressionType
/// Specifies the data output format for the target.
@required
Format: TargetFormat
/// Specifies additional connection options for the connector.
AdditionalOptions: AdditionalOptions
/// A policy that specifies update behavior for the crawler.
SchemaChangePolicy: DirectSchemaChangePolicy
}
/// Specifies a Delta Lake data source stored in Amazon S3.
structure S3DeltaSource {
/// The name of the Delta Lake source.
@required
Name: NodeName
/// A list of the Amazon S3 paths to read from.
@required
Paths: EnclosedInStringProperties
/// Specifies additional connection options.
AdditionalDeltaOptions: AdditionalOptions
/// Specifies additional options for the connector.
AdditionalOptions: S3DirectSourceAdditionalOptions
/// Specifies the data schema for the Delta Lake source.
OutputSchemas: GlueSchemas
}
/// Specifies additional connection options for the Amazon S3 data store.
structure S3DirectSourceAdditionalOptions {
/// Sets the upper limit for the target size of the dataset in bytes that will be processed.
BoundedSize: BoxedLong
/// Sets the upper limit for the target number of files that will be processed.
BoundedFiles: BoxedLong
/// Sets option to enable a sample path.
EnableSamplePath: BoxedBoolean
/// If enabled, specifies the sample path.
SamplePath: EnclosedInStringProperty
}
/// Specifies a data target that writes to Amazon S3.
structure S3DirectTarget {
/// The name of the data target.
@required
Name: NodeName
/// The nodes that are inputs to the data target.
@required
Inputs: OneInput
/// Specifies native partitioning using a sequence of keys.
PartitionKeys: GlueStudioPathList
/// A single Amazon S3 path to write to.
@required
Path: EnclosedInStringProperty
/// Specifies how the data is compressed. This is generally not necessary if the data has a standard file extension. Possible values are "gzip"
and "bzip"
).
Compression: EnclosedInStringProperty
/// Specifies the data output format for the target.
@required
Format: TargetFormat
/// A policy that specifies update behavior for the crawler.
SchemaChangePolicy: DirectSchemaChangePolicy
}
/// Specifies how Amazon Simple Storage Service (Amazon S3) data should be encrypted.
structure S3Encryption {
/// The encryption mode to use for Amazon S3 data.
S3EncryptionMode: S3EncryptionMode
/// The Amazon Resource Name (ARN) of the KMS key to be used to encrypt the data.
KmsKeyArn: KmsKeyArn
}
/// Specifies a data target that writes to Amazon S3 in Apache Parquet columnar storage.
structure S3GlueParquetTarget {
/// The name of the data target.
@required
Name: NodeName
/// The nodes that are inputs to the data target.
@required
Inputs: OneInput
/// Specifies native partitioning using a sequence of keys.
PartitionKeys: GlueStudioPathList
/// A single Amazon S3 path to write to.
@required
Path: EnclosedInStringProperty
/// Specifies how the data is compressed. This is generally not necessary if the data has a standard file extension. Possible values are "gzip"
and "bzip"
).
Compression: ParquetCompressionType
/// A policy that specifies update behavior for the crawler.
SchemaChangePolicy: DirectSchemaChangePolicy
}
/// Specifies a target that writes to a Hudi data source in the Glue Data Catalog.
structure S3HudiCatalogTarget {
/// The name of the data target.
@required
Name: NodeName
/// The nodes that are inputs to the data target.
@required
Inputs: OneInput
/// Specifies native partitioning using a sequence of keys.
PartitionKeys: GlueStudioPathList
/// The name of the table in the database to write to.
@required
Table: EnclosedInStringProperty
/// The name of the database to write to.
@required
Database: EnclosedInStringProperty
/// Specifies additional connection options for the connector.
@required
AdditionalOptions: AdditionalOptions
/// A policy that specifies update behavior for the crawler.
SchemaChangePolicy: CatalogSchemaChangePolicy
}
/// Specifies a target that writes to a Hudi data source in Amazon S3.
structure S3HudiDirectTarget {
/// The name of the data target.
@required
Name: NodeName
/// The nodes that are inputs to the data target.
@required
Inputs: OneInput
/// The Amazon S3 path of your Hudi data source to write to.
@required
Path: EnclosedInStringProperty
/// Specifies how the data is compressed. This is generally not necessary if the data has a standard file extension. Possible values are "gzip"
and "bzip"
).
@required
Compression: HudiTargetCompressionType
/// Specifies native partitioning using a sequence of keys.
PartitionKeys: GlueStudioPathList
/// Specifies the data output format for the target.
@required
Format: TargetFormat
/// Specifies additional connection options for the connector.
@required
AdditionalOptions: AdditionalOptions
/// A policy that specifies update behavior for the crawler.
SchemaChangePolicy: DirectSchemaChangePolicy
}
/// Specifies a Hudi data source stored in Amazon S3.
structure S3HudiSource {
/// The name of the Hudi source.
@required
Name: NodeName
/// A list of the Amazon S3 paths to read from.
@required
Paths: EnclosedInStringProperties
/// Specifies additional connection options.
AdditionalHudiOptions: AdditionalOptions
/// Specifies additional options for the connector.
AdditionalOptions: S3DirectSourceAdditionalOptions
/// Specifies the data schema for the Hudi source.
OutputSchemas: GlueSchemas
}
/// Specifies a JSON data store stored in Amazon S3.
structure S3JsonSource {
/// The name of the data store.
@required
Name: NodeName
/// A list of the Amazon S3 paths to read from.
@required
Paths: EnclosedInStringProperties
/// Specifies how the data is compressed. This is generally not necessary if the data has a standard file extension. Possible values are "gzip"
and "bzip"
).
CompressionType: CompressionType
/// A string containing a JSON list of Unix-style glob patterns to exclude. For example, "[\"**.pdf\"]" excludes all PDF files.
Exclusions: EnclosedInStringProperties
/// The target group size in bytes. The default is computed based on the input data size and the size of your cluster. When there are fewer than 50,000 input files, "groupFiles"
must be set to "inPartition"
for this to take effect.
GroupSize: EnclosedInStringProperty
/// Grouping files is turned on by default when the input contains more than 50,000 files. To turn on grouping with fewer than 50,000 files, set this parameter to "inPartition". To disable grouping when there are more than 50,000 files, set this parameter to "none"
.
GroupFiles: EnclosedInStringProperty
/// If set to true, recursively reads files in all subdirectories under the specified paths.
Recurse: BoxedBoolean
/// This option controls the duration in milliseconds after which the s3 listing is likely to be consistent. Files with modification timestamps falling within the last maxBand milliseconds are tracked specially when using JobBookmarks to account for Amazon S3 eventual consistency. Most users don't need to set this option. The default is 900000 milliseconds, or 15 minutes.
MaxBand: BoxedNonNegativeInt
/// This option specifies the maximum number of files to save from the last maxBand seconds. If this number is exceeded, extra files are skipped and only processed in the next job run.
MaxFilesInBand: BoxedNonNegativeInt
/// Specifies additional connection options.
AdditionalOptions: S3DirectSourceAdditionalOptions
/// A JsonPath string defining the JSON data.
JsonPath: EnclosedInStringProperty
/// A Boolean value that specifies whether a single record can span multiple lines. This can occur when a field contains a quoted new-line character. You must set this option to True if any record spans multiple lines. The default value is False
, which allows for more aggressive file-splitting during parsing.
Multiline: BoxedBoolean
/// Specifies the data schema for the S3 JSON source.
OutputSchemas: GlueSchemas
}
/// Specifies an Apache Parquet data store stored in Amazon S3.
structure S3ParquetSource {
/// The name of the data store.
@required
Name: NodeName
/// A list of the Amazon S3 paths to read from.
@required
Paths: EnclosedInStringProperties
/// Specifies how the data is compressed. This is generally not necessary if the data has a standard file extension. Possible values are "gzip"
and "bzip"
).
CompressionType: ParquetCompressionType
/// A string containing a JSON list of Unix-style glob patterns to exclude. For example, "[\"**.pdf\"]" excludes all PDF files.
Exclusions: EnclosedInStringProperties
/// The target group size in bytes. The default is computed based on the input data size and the size of your cluster. When there are fewer than 50,000 input files, "groupFiles"
must be set to "inPartition"
for this to take effect.
GroupSize: EnclosedInStringProperty
/// Grouping files is turned on by default when the input contains more than 50,000 files. To turn on grouping with fewer than 50,000 files, set this parameter to "inPartition". To disable grouping when there are more than 50,000 files, set this parameter to "none"
.
GroupFiles: EnclosedInStringProperty
/// If set to true, recursively reads files in all subdirectories under the specified paths.
Recurse: BoxedBoolean
/// This option controls the duration in milliseconds after which the s3 listing is likely to be consistent. Files with modification timestamps falling within the last maxBand milliseconds are tracked specially when using JobBookmarks to account for Amazon S3 eventual consistency. Most users don't need to set this option. The default is 900000 milliseconds, or 15 minutes.
MaxBand: BoxedNonNegativeInt
/// This option specifies the maximum number of files to save from the last maxBand seconds. If this number is exceeded, extra files are skipped and only processed in the next job run.
MaxFilesInBand: BoxedNonNegativeInt
/// Specifies additional connection options.
AdditionalOptions: S3DirectSourceAdditionalOptions
/// Specifies the data schema for the S3 Parquet source.
OutputSchemas: GlueSchemas
}
/// Specifies additional connection options for the Amazon S3 data store.
structure S3SourceAdditionalOptions {
/// Sets the upper limit for the target size of the dataset in bytes that will be processed.
BoundedSize: BoxedLong
/// Sets the upper limit for the target number of files that will be processed.
BoundedFiles: BoxedLong
}
/// Specifies a data store in Amazon Simple Storage Service (Amazon S3).
structure S3Target {
/// The path to the Amazon S3 target.
Path: Path
/// A list of glob patterns used to exclude from the crawl.
/// For more information, see Catalog Tables with a Crawler.
Exclusions: PathList
/// The name of a connection which allows a job or crawler to access data in Amazon S3 within an Amazon Virtual Private Cloud environment (Amazon VPC).
ConnectionName: ConnectionName
/// Sets the number of files in each leaf folder to be crawled when crawling sample files in a dataset. If not set, all the files are crawled. A valid value is an integer between 1 and 249.
SampleSize: NullableInteger
/// A valid Amazon SQS ARN. For example, arn:aws:sqs:region:account:sqs
.
EventQueueArn: EventQueueArn
/// A valid Amazon dead-letter SQS ARN. For example, arn:aws:sqs:region:account:deadLetterQueue
.
DlqEventQueueArn: EventQueueArn
}
/// A scheduling object using a cron
statement to schedule an event.
structure Schedule {
/// A cron
expression used to specify the schedule (see Time-Based Schedules for Jobs and Crawlers. For example, to run
/// something every day at 12:15 UTC, you would specify:
/// cron(15 12 * * ? *)
.
ScheduleExpression: CronExpression
/// The state of the schedule.
State: ScheduleState
}
/// The specified scheduler is not running.
@error("client")
structure SchedulerNotRunningException {
/// A message describing the problem.
Message: MessageString
}
/// The specified scheduler is already running.
@error("client")
structure SchedulerRunningException {
/// A message describing the problem.
Message: MessageString
}
/// The specified scheduler is transitioning.
@error("client")
structure SchedulerTransitioningException {
/// A message describing the problem.
Message: MessageString
}
/// A policy that specifies update and deletion behaviors for the crawler.
structure SchemaChangePolicy {
/// The update behavior when the crawler finds a changed schema.
UpdateBehavior: UpdateBehavior
/// The deletion behavior when the crawler finds a deleted object.
DeleteBehavior: DeleteBehavior
}
/// A key-value pair representing a column and data type that this transform can
/// run against. The Schema
parameter of the MLTransform
may contain up to 100 of these structures.
structure SchemaColumn {
/// The name of the column.
Name: ColumnNameString
/// The type of data in the column.
DataType: ColumnTypeString
}
/// The unique ID of the schema in the Glue schema registry.
structure SchemaId {
/// The Amazon Resource Name (ARN) of the schema. One of SchemaArn
or SchemaName
has to be provided.
SchemaArn: GlueResourceArn
/// The name of the schema. One of SchemaArn
or SchemaName
has to be provided.
SchemaName: SchemaRegistryNameString
/// The name of the schema registry that contains the schema.
RegistryName: SchemaRegistryNameString
}
/// An object that contains minimal details for a schema.
structure SchemaListItem {
/// the name of the registry where the schema resides.
RegistryName: SchemaRegistryNameString
/// The name of the schema.
SchemaName: SchemaRegistryNameString
/// The Amazon Resource Name (ARN) for the schema.
SchemaArn: GlueResourceArn
/// A description for the schema.
Description: DescriptionString
/// The status of the schema.
SchemaStatus: SchemaStatus
/// The date and time that a schema was created.
CreatedTime: CreatedTimestamp
/// The date and time that a schema was updated.
UpdatedTime: UpdatedTimestamp
}
/// An object that references a schema stored in the Glue Schema Registry.
structure SchemaReference {
/// A structure that contains schema identity fields. Either this or the SchemaVersionId
has to be provided.
SchemaId: SchemaId
/// The unique ID assigned to a version of the schema. Either this or the SchemaId
has to be provided.
SchemaVersionId: SchemaVersionIdString
/// The version number of the schema.
SchemaVersionNumber: VersionLongNumber = null
}
/// An object that contains the error details for an operation on a schema version.
structure SchemaVersionErrorItem {
/// The version number of the schema.
VersionNumber: VersionLongNumber = 0
/// The details of the error for the schema version.
ErrorDetails: ErrorDetails
}
/// An object containing the details about a schema version.
structure SchemaVersionListItem {
/// The Amazon Resource Name (ARN) of the schema.
SchemaArn: GlueResourceArn
/// The unique identifier of the schema version.
SchemaVersionId: SchemaVersionIdString
/// The version number of the schema.
VersionNumber: VersionLongNumber = 0
/// The status of the schema version.
Status: SchemaVersionStatus
/// The date and time the schema version was created.
CreatedTime: CreatedTimestamp
}
/// A structure containing the schema version information.
structure SchemaVersionNumber {
/// The latest version available for the schema.
LatestVersion: LatestSchemaVersionBoolean = false
/// The version number of the schema.
VersionNumber: VersionLongNumber = 0
}
@input
structure SearchTablesRequest {
/// A unique identifier, consisting of
/// account_id
///
.
CatalogId: CatalogIdString
/// A continuation token, included if this is a continuation call.
NextToken: Token
/// A list of key-value pairs, and a comparator used to filter the search results. Returns all entities matching the predicate.
/// The Comparator
member of the PropertyPredicate
struct is used only for time fields, and can be omitted for other field types. Also, when comparing string values, such as when Key=Name
, a fuzzy match algorithm is used. The Key
field (for example, the value of the Name
field) is split on certain punctuation characters, for example, -, :, #, etc. into tokens. Then each token is exact-match compared with the Value
member of PropertyPredicate
. For example, if Key=Name
and Value=link
, tables named customer-link
and xx-link-yy
are returned, but xxlinkyy
is not returned.
Filters: SearchPropertyPredicates
/// A string used for a text search.
/// Specifying a value in quotes filters based on an exact match to the value.
SearchText: ValueString
/// A list of criteria for sorting the results by a field name, in an ascending or descending order.
SortCriteria: SortCriteria
/// The maximum number of tables to return in a single response.
MaxResults: PageSize
/// Allows you to specify that you want to search the tables shared with your account. The allowable values are FOREIGN
or ALL
.
///
/// -
///
If set to FOREIGN
, will search the tables shared with your account.
///
/// -
///
If set to ALL
, will search the tables shared with your account, as well as the tables in yor local account.
///
///
ResourceShareType: ResourceShareType
}
@output
structure SearchTablesResponse {
/// A continuation token, present if the current list segment is not the last.
NextToken: Token
/// A list of the requested Table
objects. The SearchTables
response returns only the tables that you have access to.
TableList: TableList
}
/// Specifies a security configuration.
structure SecurityConfiguration {
/// The name of the security configuration.
Name: NameString
/// The time at which this security configuration was created.
CreatedTimeStamp: TimestampValue
/// The encryption configuration associated with this security configuration.
EncryptionConfiguration: EncryptionConfiguration
}
/// Defines a non-overlapping region of a table's partitions, allowing
/// multiple requests to be run in parallel.
structure Segment {
/// The zero-based index number of the segment. For example, if the total number of segments
/// is 4, SegmentNumber
values range from 0 through 3.
@required
SegmentNumber: NonNegativeInteger = 0
/// The total number of segments.
@required
TotalSegments: TotalSegmentsInteger = 0
}
/// Specifies a transform that chooses the data property keys that you want to keep.
structure SelectFields {
/// The name of the transform node.
@required
Name: NodeName
/// The data inputs identified by their node names.
@required
Inputs: OneInput
/// A JSON path to a variable in the data structure.
@required
Paths: GlueStudioPathList
}
/// Specifies a transform that chooses one DynamicFrame
from a collection of DynamicFrames
. The output is the selected DynamicFrame
///
structure SelectFromCollection {
/// The name of the transform node.
@required
Name: NodeName
/// The data inputs identified by their node names.
@required
Inputs: OneInput
/// The index for the DynamicFrame to be selected.
@required
Index: NonNegativeInt = 0
}
/// Information about a serialization/deserialization program (SerDe) that serves as an
/// extractor and loader.
structure SerDeInfo {
/// Name of the SerDe.
Name: NameString
/// Usually the class that implements the SerDe. An example is
/// org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
.
SerializationLibrary: NameString
/// These key-value pairs define initialization parameters for the SerDe.
Parameters: ParametersMap
}
/// The period in which a remote Spark runtime environment is running.
structure Session {
/// The ID of the session.
Id: NameString
/// The time and date when the session was created.
CreatedOn: TimestampValue
/// The session status.
Status: SessionStatus
/// The error message displayed during the session.
ErrorMessage: DescriptionString
/// The description of the session.
Description: DescriptionString
/// The name or Amazon Resource Name (ARN) of the IAM role associated with the Session.
Role: OrchestrationRoleArn
/// The command object.See SessionCommand.
Command: SessionCommand
/// A map array of key-value pairs. Max is 75 pairs.
DefaultArguments: OrchestrationArgumentsMap
/// The number of connections used for the session.
Connections: ConnectionsList
/// The code execution progress of the session.
Progress: DoubleValue = 0
/// The number of Glue data processing units (DPUs) that can be allocated when the job runs.
/// A DPU is a relative measure of processing power that consists of 4 vCPUs of compute capacity and 16 GB memory.
MaxCapacity: NullableDouble
/// The name of the SecurityConfiguration structure to be used with the session.
SecurityConfiguration: NameString
/// The Glue version determines the versions of Apache Spark and Python that Glue supports.
/// The GlueVersion must be greater than 2.0.
GlueVersion: GlueVersionString
/// The number of workers of a defined WorkerType
to use for the session.
NumberOfWorkers: NullableInteger
/// The type of predefined worker that is allocated when a session runs. Accepts a value of G.1X
, G.2X
, G.4X
, or G.8X
for Spark sessions. Accepts the value Z.2X
for Ray sessions.
WorkerType: WorkerType
/// The date and time that this session is completed.
CompletedOn: TimestampValue
/// The total time the session ran for.
ExecutionTime: NullableDouble
/// The DPUs consumed by the session (formula: ExecutionTime * MaxCapacity).
DPUSeconds: NullableDouble
/// The number of minutes when idle before the session times out.
IdleTimeout: IdleTimeout
}
/// The SessionCommand
that runs the job.
structure SessionCommand {
/// Specifies the name of the SessionCommand. Can be 'glueetl' or 'gluestreaming'.
Name: NameString
/// Specifies the Python version. The Python version indicates the version supported for jobs of type Spark.
PythonVersion: PythonVersionString
}
/// Specifies skewed values in a table. Skewed values are those that occur with very high
/// frequency.
structure SkewedInfo {
/// A list of names of columns that contain skewed values.
SkewedColumnNames: NameStringList
/// A list of values that appear so frequently as to be considered
/// skewed.
SkewedColumnValues: ColumnValueStringList
/// A mapping of skewed values to the columns that contain them.
SkewedColumnValueLocationMaps: LocationMap
}
/// Specifies configuration for Snowflake nodes in Glue Studio.
structure SnowflakeNodeData {
/// Specifies how retrieved data is specified. Valid values: "table"
,
/// "query"
.
SourceType: GenericLimitedString
/// Specifies a Glue Data Catalog Connection to a Snowflake endpoint.
Connection: Option
/// Specifies a Snowflake database schema for your node to use.
Schema: GenericString
/// Specifies a Snowflake table for your node to use.
Table: GenericString
/// Specifies a Snowflake database for your node to use.
Database: GenericString
/// Not currently used.
TempDir: EnclosedInStringProperty
/// Not currently used.
IamRole: Option
/// Specifies additional options passed to the Snowflake connector. If options are specified
/// elsewhere in this node, this will take precedence.
AdditionalOptions: AdditionalOptions
/// A SQL string used to retrieve data with the query
sourcetype.
SampleQuery: GenericString
/// A SQL string run before the Snowflake connector performs its standard actions.
PreAction: GenericString
/// A SQL string run after the Snowflake connector performs its standard actions.
PostAction: GenericString
/// Specifies what action to take when writing to a table with preexisting data. Valid values:
/// append
, merge
, truncate
, drop
.
Action: GenericString
/// Used when Action is append
. Specifies the resolution behavior when a row
/// already exists. If true, preexisting rows will be updated. If false, those rows will be inserted.
Upsert: BooleanValue = false
/// Specifies a merge action. Valid values: simple
, custom
. If
/// simple, merge behavior is defined by MergeWhenMatched
and
/// MergeWhenNotMatched
. If custom, defined by MergeClause
.
MergeAction: GenericLimitedString
/// Specifies how to resolve records that match preexisting data when merging. Valid values:
/// update
, delete
.
MergeWhenMatched: GenericLimitedString
/// Specifies how to process records that do not match preexisting data when merging. Valid
/// values: insert
, none
.
MergeWhenNotMatched: GenericLimitedString
/// A SQL statement that specifies a custom merge behavior.
MergeClause: GenericString
/// The name of a staging table used when performing merge
or upsert append
/// actions. Data is written to this table, then moved to table
by a generated
/// postaction.
StagingTable: GenericString
/// Specifies the columns combined to identify a record when detecting matches for merges and
/// upserts. A list of structures with value
, label
and
/// description
keys. Each structure describes a column.
SelectedColumns: OptionList
/// Specifies whether automatic query pushdown is enabled. If pushdown
/// is enabled, then when a query is run on Spark, if part of the query can be "pushed down" to
/// the
/// Snowflake server, it is pushed down. This improves performance of some queries.
AutoPushdown: BooleanValue = false
/// Manually defines the target schema for the node. A list of structures with value
/// , label
and description
keys. Each structure defines a column.
TableSchema: OptionList
}
/// Specifies a Snowflake data source.
structure SnowflakeSource {
/// The name of the Snowflake data source.
@required
Name: NodeName
/// Configuration for the Snowflake data source.
@required
Data: SnowflakeNodeData
/// Specifies user-defined schemas for your output data.
OutputSchemas: GlueSchemas
}
/// Specifies a Snowflake target.
structure SnowflakeTarget {
/// The name of the Snowflake target.
@required
Name: NodeName
/// Specifies the data of the Snowflake target node.
@required
Data: SnowflakeNodeData
/// The nodes that are inputs to the data target.
Inputs: OneInput
}
/// Specifies a field to sort by and a sort order.
structure SortCriterion {
/// The name of the field on which to sort.
FieldName: ValueString
/// An ascending or descending sort.
Sort: Sort
}
/// The details for a source control configuration for a job, allowing synchronization of job artifacts to or from a remote repository.
structure SourceControlDetails {
/// The provider for the remote repository.
Provider: SourceControlProvider
/// The name of the remote repository that contains the job artifacts.
Repository: Generic512CharString
/// The owner of the remote repository that contains the job artifacts.
Owner: Generic512CharString
/// An optional branch in the remote repository.
Branch: Generic512CharString
/// An optional folder in the remote repository.
Folder: Generic512CharString
/// The last commit ID for a commit in the remote repository.
LastCommitId: Generic512CharString
/// The type of authentication, which can be an authentication token stored in Amazon Web Services Secrets Manager, or a personal access token.
AuthStrategy: SourceControlAuthStrategy
/// The value of an authorization token.
AuthToken: Generic512CharString
}
/// Specifies a connector to an Apache Spark data source.
structure SparkConnectorSource {
/// The name of the data source.
@required
Name: NodeName
/// The name of the connection that is associated with the connector.
@required
ConnectionName: EnclosedInStringProperty
/// The name of a connector that assists with accessing the data store in Glue Studio.
@required
ConnectorName: EnclosedInStringProperty
/// The type of connection, such as marketplace.spark or custom.spark, designating a connection to an Apache Spark data store.
@required
ConnectionType: EnclosedInStringProperty
/// Additional connection options for the connector.
AdditionalOptions: AdditionalOptions
/// Specifies data schema for the custom spark source.
OutputSchemas: GlueSchemas
}
/// Specifies a target that uses an Apache Spark connector.
structure SparkConnectorTarget {
/// The name of the data target.
@required
Name: NodeName
/// The nodes that are inputs to the data target.
@required
Inputs: OneInput
/// The name of a connection for an Apache Spark connector.
@required
ConnectionName: EnclosedInStringProperty
/// The name of an Apache Spark connector.
@required
ConnectorName: EnclosedInStringProperty
/// The type of connection, such as marketplace.spark or custom.spark, designating a connection to an Apache Spark data store.
@required
ConnectionType: EnclosedInStringProperty
/// Additional connection options for the connector.
AdditionalOptions: AdditionalOptions
/// Specifies the data schema for the custom spark target.
OutputSchemas: GlueSchemas
}
/// Specifies a transform where you enter a SQL query using Spark SQL syntax to transform the data. The output is a single DynamicFrame
.
structure SparkSQL {
/// The name of the transform node.
@required
Name: NodeName
/// The data inputs identified by their node names. You can associate a table name with each input node to use in the SQL query. The name you choose must meet the Spark SQL naming restrictions.
@required
Inputs: ManyInputs
/// A SQL query that must use Spark SQL syntax and return a single data set.
@required
SqlQuery: SqlQuery
/// A list of aliases. An alias allows you to specify what name to use in the SQL for a given input. For example, you have a datasource named "MyDataSource". If you specify From
as MyDataSource, and Alias
as SqlName, then in your SQL you can do:
///
/// select *
/// from SqlName
///
/// and that gets data from MyDataSource.
@required
SqlAliases: SqlAliases
/// Specifies the data schema for the SparkSQL transform.
OutputSchemas: GlueSchemas
}
/// Specifies a transform that writes samples of the data to an Amazon S3 bucket.
structure Spigot {
/// The name of the transform node.
@required
Name: NodeName
/// The data inputs identified by their node names.
@required
Inputs: OneInput
/// A path in Amazon S3 where the transform will write a subset of records from the dataset to a JSON file in an Amazon S3 bucket.
@required
Path: EnclosedInStringProperty
/// Specifies a number of records to write starting from the beginning of the dataset.
Topk: Topk
/// The probability (a decimal value with a maximum value of 1) of picking any given record. A value of 1 indicates that each row read from the dataset should be included in the sample output.
Prob: Prob
}
/// Specifies a transform that splits data property keys into two DynamicFrames
. The output is a collection of DynamicFrames
: one with selected data property keys, and one with the remaining data property keys.
structure SplitFields {
/// The name of the transform node.
@required
Name: NodeName
/// The data inputs identified by their node names.
@required
Inputs: OneInput
/// A JSON path to a variable in the data structure.
@required
Paths: GlueStudioPathList
}
/// Represents a single entry in the list of values for SqlAliases
.
structure SqlAlias {
/// A table, or a column in a table.
@required
From: NodeId
/// A temporary name given to a table, or a column in a table.
@required
Alias: EnclosedInStringPropertyWithQuote
}
@input
structure StartBlueprintRunRequest {
/// The name of the blueprint.
@required
BlueprintName: OrchestrationNameString
/// Specifies the parameters as a BlueprintParameters
object.
Parameters: BlueprintParameters
/// Specifies the IAM role used to create the workflow.
@required
RoleArn: OrchestrationIAMRoleArn
}
@output
structure StartBlueprintRunResponse {
/// The run ID for this blueprint run.
RunId: IdString
}
@input
structure StartCrawlerRequest {
/// Name of the crawler to start.
@required
Name: NameString
}
@output
structure StartCrawlerResponse {}
@input
structure StartCrawlerScheduleRequest {
/// Name of the crawler to schedule.
@required
CrawlerName: NameString
}
@output
structure StartCrawlerScheduleResponse {}
@input
structure StartDataQualityRuleRecommendationRunRequest {
/// The data source (Glue table) associated with this run.
@required
DataSource: DataSource
/// An IAM role supplied to encrypt the results of the run.
@required
Role: RoleString
/// The number of G.1X
workers to be used in the run. The default is 5.
NumberOfWorkers: NullableInteger
/// The timeout for a run in minutes. This is the maximum time that a run can consume resources before it is terminated and enters TIMEOUT
status. The default is 2,880 minutes (48 hours).
Timeout: Timeout
/// A name for the ruleset.
CreatedRulesetName: NameString
/// Used for idempotency and is recommended to be set to a random ID (such as a UUID) to avoid creating or starting multiple instances of the same resource.
ClientToken: HashString
}
@output
structure StartDataQualityRuleRecommendationRunResponse {
/// The unique run identifier associated with this run.
RunId: HashString
}
@input
structure StartDataQualityRulesetEvaluationRunRequest {
/// The data source (Glue table) associated with this run.
@required
DataSource: DataSource
/// An IAM role supplied to encrypt the results of the run.
@required
Role: RoleString
/// The number of G.1X
workers to be used in the run. The default is 5.
NumberOfWorkers: NullableInteger
/// The timeout for a run in minutes. This is the maximum time that a run can consume resources before it is terminated and enters TIMEOUT
status. The default is 2,880 minutes (48 hours).
Timeout: Timeout
/// Used for idempotency and is recommended to be set to a random ID (such as a UUID) to avoid creating or starting multiple instances of the same resource.
ClientToken: HashString
/// Additional run options you can specify for an evaluation run.
AdditionalRunOptions: DataQualityEvaluationRunAdditionalRunOptions
/// A list of ruleset names.
@required
RulesetNames: RulesetNames
/// A map of reference strings to additional data sources you can specify for an evaluation run.
AdditionalDataSources: DataSourceMap
}
@output
structure StartDataQualityRulesetEvaluationRunResponse {
/// The unique run identifier associated with this run.
RunId: HashString
}
@input
structure StartExportLabelsTaskRunRequest {
/// The unique identifier of the machine learning transform.
@required
TransformId: HashString
/// The Amazon S3 path where you export the labels.
@required
OutputS3Path: UriString
}
@output
structure StartExportLabelsTaskRunResponse {
/// The unique identifier for the task run.
TaskRunId: HashString
}
@input
structure StartImportLabelsTaskRunRequest {
/// The unique identifier of the machine learning transform.
@required
TransformId: HashString
/// The Amazon Simple Storage Service (Amazon S3) path from where you import the
/// labels.
@required
InputS3Path: UriString
/// Indicates whether to overwrite your existing labels.
ReplaceAllLabels: ReplaceBoolean = false
}
@output
structure StartImportLabelsTaskRunResponse {
/// The unique identifier for the task run.
TaskRunId: HashString
}
/// The batch condition that started the workflow run. Either the number of events in the batch size arrived,
/// in which case the BatchSize member is non-zero, or the batch window expired, in which case the BatchWindow
/// member is non-zero.
structure StartingEventBatchCondition {
/// Number of events in the batch.
BatchSize: NullableInteger
/// Duration of the batch window in seconds.
BatchWindow: NullableInteger
}
@input
structure StartJobRunRequest {
/// The name of the job definition to use.
@required
JobName: NameString
/// The ID of a previous JobRun
to retry.
JobRunId: IdString
/// The job arguments associated with this run. For this job run, they replace the default
/// arguments set in the job definition itself.
/// You can specify arguments here that your own job-execution script
/// consumes, as well as arguments that Glue itself consumes.
/// Job arguments may be logged. Do not pass plaintext secrets as arguments. Retrieve secrets
/// from a Glue Connection, Secrets Manager or other secret management
/// mechanism if you intend to keep them within the Job.
/// For information about how to specify and consume your own Job arguments, see the Calling Glue APIs in Python topic in the developer guide.
/// For information about the arguments you can provide to this field when configuring Spark jobs,
/// see the Special Parameters Used by Glue topic in the developer guide.
/// For information about the arguments you can provide to this field when configuring Ray
/// jobs, see Using
/// job parameters in Ray jobs in the developer guide.
Arguments: GenericMap
/// This field is deprecated. Use MaxCapacity
instead.
/// The number of Glue data processing units (DPUs) to allocate to this JobRun.
/// You can allocate a minimum of 2 DPUs; the default is 10. A DPU is a relative measure
/// of processing power that consists of 4 vCPUs of compute capacity and 16 GB of memory.
/// For more information, see the Glue
/// pricing page.
@deprecated(
message: "This property is deprecated, use MaxCapacity instead."
)
AllocatedCapacity: IntegerValue = 0
/// The JobRun
timeout in minutes. This is the maximum time that a job run can
/// consume resources before it is terminated and enters TIMEOUT
status. This value overrides the timeout value set in the parent job.
/// Streaming jobs do not have a timeout. The default for non-streaming jobs is 2,880 minutes (48 hours).
Timeout: Timeout
/// For Glue version 1.0 or earlier jobs, using the standard worker type, the number of
/// Glue data processing units (DPUs) that can be allocated when this job runs. A DPU is
/// a relative measure of processing power that consists of 4 vCPUs of compute capacity and 16 GB
/// of memory. For more information, see the
/// Glue pricing page.
/// For Glue version 2.0+ jobs, you cannot specify a Maximum capacity
.
/// Instead, you should specify a Worker type
and the Number of workers
.
/// Do not set MaxCapacity
if using WorkerType
and NumberOfWorkers
.
/// The value that can be allocated for MaxCapacity
depends on whether you are
/// running a Python shell job, an Apache Spark ETL job, or an Apache Spark streaming ETL
/// job:
///
/// -
///
When you specify a Python shell job (JobCommand.Name
="pythonshell"), you can
/// allocate either 0.0625 or 1 DPU. The default is 0.0625 DPU.
///
/// -
///
When you specify an Apache Spark ETL job (JobCommand.Name
="glueetl") or Apache
/// Spark streaming ETL job (JobCommand.Name
="gluestreaming"), you can allocate from 2 to 100 DPUs.
/// The default is 10 DPUs. This job type cannot have a fractional DPU allocation.
///
///
MaxCapacity: NullableDouble
/// The name of the SecurityConfiguration
structure to be used with this job
/// run.
SecurityConfiguration: NameString
/// Specifies configuration properties of a job run notification.
NotificationProperty: NotificationProperty
/// The type of predefined worker that is allocated when a job runs. Accepts a value of
/// G.1X, G.2X, G.4X, G.8X or G.025X for Spark jobs. Accepts the value Z.2X for Ray jobs.
///
/// -
///
For the G.1X
worker type, each worker maps to 1 DPU (4 vCPUs, 16 GB of memory) with 84GB disk (approximately 34GB free), and provides 1 executor per worker. We recommend this worker type for workloads such as data transforms, joins, and queries, to offers a scalable and cost effective way to run most jobs.
///
/// -
///
For the G.2X
worker type, each worker maps to 2 DPU (8 vCPUs, 32 GB of memory) with 128GB disk (approximately 77GB free), and provides 1 executor per worker. We recommend this worker type for workloads such as data transforms, joins, and queries, to offers a scalable and cost effective way to run most jobs.
///
/// -
///
For the G.4X
worker type, each worker maps to 4 DPU (16 vCPUs, 64 GB of memory) with 256GB disk (approximately 235GB free), and provides 1 executor per worker. We recommend this worker type for jobs whose workloads contain your most demanding transforms, aggregations, joins, and queries. This worker type is available only for Glue version 3.0 or later Spark ETL jobs in the following Amazon Web Services Regions: US East (Ohio), US East (N. Virginia), US West (Oregon), Asia Pacific (Singapore), Asia Pacific (Sydney), Asia Pacific (Tokyo), Canada (Central), Europe (Frankfurt), Europe (Ireland), and Europe (Stockholm).
///
/// -
///
For the G.8X
worker type, each worker maps to 8 DPU (32 vCPUs, 128 GB of memory) with 512GB disk (approximately 487GB free), and provides 1 executor per worker. We recommend this worker type for jobs whose workloads contain your most demanding transforms, aggregations, joins, and queries. This worker type is available only for Glue version 3.0 or later Spark ETL jobs, in the same Amazon Web Services Regions as supported for the G.4X
worker type.
///
/// -
///
For the G.025X
worker type, each worker maps to 0.25 DPU (2 vCPUs, 4 GB of memory) with 84GB disk (approximately 34GB free), and provides 1 executor per worker. We recommend this worker type for low volume streaming jobs. This worker type is only available for Glue version 3.0 streaming jobs.
///
/// -
///
For the Z.2X
worker type, each worker maps to 2 M-DPU (8vCPUs, 64 GB of memory) with 128 GB disk (approximately 120GB free), and provides up to 8 Ray workers based on the autoscaler.
///
///
WorkerType: WorkerType
/// The number of workers of a defined workerType
that are allocated when a job runs.
NumberOfWorkers: NullableInteger
/// Indicates whether the job is run with a standard or flexible execution class. The standard execution-class is ideal for time-sensitive workloads that require fast job startup and dedicated resources.
/// The flexible execution class is appropriate for time-insensitive jobs whose start and completion times may vary.
/// Only jobs with Glue version 3.0 and above and command type glueetl
will be allowed to set ExecutionClass
to FLEX
. The flexible execution class is available for Spark jobs.
ExecutionClass: ExecutionClass
}
@output
structure StartJobRunResponse {
/// The ID assigned to this job run.
JobRunId: IdString
}
@input
structure StartMLEvaluationTaskRunRequest {
/// The unique identifier of the machine learning transform.
@required
TransformId: HashString
}
@output
structure StartMLEvaluationTaskRunResponse {
/// The unique identifier associated with this run.
TaskRunId: HashString
}
@input
structure StartMLLabelingSetGenerationTaskRunRequest {
/// The unique identifier of the machine learning transform.
@required
TransformId: HashString
/// The Amazon Simple Storage Service (Amazon S3) path where you generate the labeling
/// set.
@required
OutputS3Path: UriString
}
@output
structure StartMLLabelingSetGenerationTaskRunResponse {
/// The unique run identifier that is associated with this task run.
TaskRunId: HashString
}
@input
structure StartTriggerRequest {
/// The name of the trigger to start.
@required
Name: NameString
}
@output
structure StartTriggerResponse {
/// The name of the trigger that was started.
Name: NameString
}
@input
structure StartWorkflowRunRequest {
/// The name of the workflow to start.
@required
Name: NameString
/// The workflow run properties for the new workflow run.
RunProperties: WorkflowRunProperties
}
@output
structure StartWorkflowRunResponse {
/// An Id for the new run.
RunId: IdString
}
/// The statement or request for a particular action to occur in a session.
structure Statement {
/// The ID of the statement.
Id: IntegerValue = 0
/// The execution code of the statement.
Code: GenericString
/// The state while request is actioned.
State: StatementState
/// The output in JSON.
Output: StatementOutput
/// The code execution progress.
Progress: DoubleValue = 0
/// The unix time and date that the job definition was started.
StartedOn: LongValue = 0
/// The unix time and date that the job definition was completed.
CompletedOn: LongValue = 0
}
/// The code execution output in JSON format.
structure StatementOutput {
/// The code execution output.
Data: StatementOutputData
/// The execution count of the output.
ExecutionCount: IntegerValue = 0
/// The status of the code execution output.
Status: StatementState
/// The name of the error in the output.
ErrorName: GenericString
/// The error value of the output.
ErrorValue: GenericString
/// The traceback of the output.
Traceback: OrchestrationStringList
}
/// The code execution output in JSON format.
structure StatementOutputData {
/// The code execution output in text format.
TextPlain: GenericString
}
@input
structure StopCrawlerRequest {
/// Name of the crawler to stop.
@required
Name: NameString
}
@output
structure StopCrawlerResponse {}
@input
structure StopCrawlerScheduleRequest {
/// Name of the crawler whose schedule state to set.
@required
CrawlerName: NameString
}
@output
structure StopCrawlerScheduleResponse {}
@input
structure StopSessionRequest {
/// The ID of the session to be stopped.
@required
Id: NameString
/// The origin of the request.
RequestOrigin: OrchestrationNameString
}
@output
structure StopSessionResponse {
/// Returns the Id of the stopped session.
Id: NameString
}
@input
structure StopTriggerRequest {
/// The name of the trigger to stop.
@required
Name: NameString
}
@output
structure StopTriggerResponse {
/// The name of the trigger that was stopped.
Name: NameString
}
@input
structure StopWorkflowRunRequest {
/// The name of the workflow to stop.
@required
Name: NameString
/// The ID of the workflow run to stop.
@required
RunId: IdString
}
@output
structure StopWorkflowRunResponse {}
/// Describes the physical storage of table data.
structure StorageDescriptor {
/// A list of the Columns
in the table.
Columns: ColumnList
/// The physical location of the table. By default, this takes the form of the warehouse
/// location, followed by the database location in the warehouse, followed by the table
/// name.
Location: LocationString
/// A list of locations that point to the path where a Delta table is located.
AdditionalLocations: LocationStringList
/// The input format: SequenceFileInputFormat
(binary),
/// or TextInputFormat
, or a custom format.
InputFormat: FormatString
/// The output format: SequenceFileOutputFormat
(binary),
/// or IgnoreKeyTextOutputFormat
, or a custom format.
OutputFormat: FormatString
///
/// True
if the data in the table is compressed, or False
if
/// not.
Compressed: Boolean = false
/// Must be specified if the table contains any dimension columns.
NumberOfBuckets: Integer = 0
/// The serialization/deserialization (SerDe) information.
SerdeInfo: SerDeInfo
/// A list of reducer grouping columns, clustering columns, and
/// bucketing columns in the table.
BucketColumns: NameStringList
/// A list specifying the sort order of each bucket in the table.
SortColumns: OrderList
/// The user-supplied properties in key-value form.
Parameters: ParametersMap
/// The information about values that appear frequently in a column (skewed values).
SkewedInfo: SkewedInfo
///
/// True
if the table data is stored in subdirectories, or False
if
/// not.
StoredAsSubDirectories: Boolean = false
/// An object that references a schema stored in the Glue Schema Registry.
/// When creating a table, you can pass an empty list of columns for the schema, and instead use a schema reference.
SchemaReference: SchemaReference
}
/// Specifies options related to data preview for viewing a sample of your data.
structure StreamingDataPreviewOptions {
/// The polling time in milliseconds.
PollingTime: PollingTime
/// The limit to the number of records polled.
RecordPollingLimit: PositiveLong
}
/// Defines column statistics supported for character sequence data values.
structure StringColumnStatisticsData {
/// The size of the longest string in the column.
@required
MaximumLength: NonNegativeLong = 0
/// The average string length in the column.
@required
AverageLength: NonNegativeDouble = 0
/// The number of null values in the column.
@required
NumberOfNulls: NonNegativeLong = 0
/// The number of distinct values in a column.
@required
NumberOfDistinctValues: NonNegativeLong = 0
}
/// Represents a collection of related data organized in columns and rows.
structure Table {
/// The table name. For Hive compatibility, this must be entirely
/// lowercase.
@required
Name: NameString
/// The name of the database where the table metadata resides.
/// For Hive compatibility, this must be all lowercase.
DatabaseName: NameString
/// A description of the table.
Description: DescriptionString
/// The owner of the table.
Owner: NameString
/// The time when the table definition was created in the Data Catalog.
CreateTime: Timestamp
/// The last time that the table was updated.
UpdateTime: Timestamp
/// The last time that the table was accessed. This is usually taken from HDFS, and might not
/// be reliable.
LastAccessTime: Timestamp
/// The last time that column statistics were computed for this table.
LastAnalyzedTime: Timestamp
/// The retention time for this table.
Retention: NonNegativeInteger = 0
/// A storage descriptor containing information about the physical storage
/// of this table.
StorageDescriptor: StorageDescriptor
/// A list of columns by which the table is partitioned. Only primitive
/// types are supported as partition keys.
/// When you create a table used by Amazon Athena, and you do not specify any
/// partitionKeys
, you must at least set the value of partitionKeys
to
/// an empty list. For example:
///
/// "PartitionKeys": []
///
PartitionKeys: ColumnList
/// Included for Apache Hive compatibility. Not used in the normal course of Glue operations.
/// If the table is a VIRTUAL_VIEW
, certain Athena configuration encoded in base64.
ViewOriginalText: ViewTextString
/// Included for Apache Hive compatibility. Not used in the normal course of Glue operations.
ViewExpandedText: ViewTextString
/// The type of this table.
/// Glue will create tables with the EXTERNAL_TABLE
type.
/// Other services, such as Athena, may create tables with additional table types.
///
/// Glue related table types:
///
/// - EXTERNAL_TABLE
/// -
///
Hive compatible attribute - indicates a non-Hive managed table.
///
/// - GOVERNED
/// -
///
Used by Lake Formation.
/// The Glue Data Catalog understands GOVERNED
.
///
///
TableType: TableTypeString
/// These key-value pairs define properties associated with the table.
Parameters: ParametersMap
/// The person or entity who created the table.
CreatedBy: NameString
/// Indicates whether the table has been registered with Lake Formation.
IsRegisteredWithLakeFormation: Boolean = false
/// A TableIdentifier
structure that describes a target table for resource linking.
TargetTable: TableIdentifier
/// The ID of the Data Catalog in which the table resides.
CatalogId: CatalogIdString
/// The ID of the table version.
VersionId: VersionString
/// A FederatedTable
structure that references an entity outside the Glue Data Catalog.
FederatedTable: FederatedTable
}
/// An error record for table operations.
structure TableError {
/// The name of the table. For Hive compatibility, this must be entirely lowercase.
TableName: NameString
/// The details about the error.
ErrorDetail: ErrorDetail
}
/// A structure that describes a target table for resource linking.
structure TableIdentifier {
/// The ID of the Data Catalog in which the table resides.
CatalogId: CatalogIdString
/// The name of the catalog database that contains the target table.
DatabaseName: NameString
/// The name of the target table.
Name: NameString
/// Region of the target table.
Region: NameString
}
/// A structure used to define a table.
structure TableInput {
/// The table name. For Hive compatibility, this is folded to
/// lowercase when it is stored.
@required
Name: NameString
/// A description of the table.
Description: DescriptionString
/// The table owner. Included for Apache Hive compatibility. Not used in the normal course of Glue operations.
Owner: NameString
/// The last time that the table was accessed.
LastAccessTime: Timestamp
/// The last time that column statistics were computed for this table.
LastAnalyzedTime: Timestamp
/// The retention time for this table.
Retention: NonNegativeInteger = 0
/// A storage descriptor containing information about the physical storage
/// of this table.
StorageDescriptor: StorageDescriptor
/// A list of columns by which the table is partitioned. Only primitive
/// types are supported as partition keys.
/// When you create a table used by Amazon Athena, and you do not specify any
/// partitionKeys
, you must at least set the value of partitionKeys
to
/// an empty list. For example:
///
/// "PartitionKeys": []
///
PartitionKeys: ColumnList
/// Included for Apache Hive compatibility. Not used in the normal course of Glue operations.
/// If the table is a VIRTUAL_VIEW
, certain Athena configuration encoded in base64.
ViewOriginalText: ViewTextString
/// Included for Apache Hive compatibility. Not used in the normal course of Glue operations.
ViewExpandedText: ViewTextString
/// The type of this table.
/// Glue will create tables with the EXTERNAL_TABLE
type.
/// Other services, such as Athena, may create tables with additional table types.
///
/// Glue related table types:
///
/// - EXTERNAL_TABLE
/// -
///
Hive compatible attribute - indicates a non-Hive managed table.
///
/// - GOVERNED
/// -
///
Used by Lake Formation.
/// The Glue Data Catalog understands GOVERNED
.
///
///
TableType: TableTypeString
/// These key-value pairs define properties associated with the table.
Parameters: ParametersMap
/// A TableIdentifier
structure that describes a target table for resource linking.
TargetTable: TableIdentifier
}
/// Specifies a version of a table.
structure TableVersion {
/// The table in question.
Table: Table
/// The ID value that identifies this table version. A VersionId
is a string representation of an integer. Each version is incremented by 1.
VersionId: VersionString
}
/// An error record for table-version operations.
structure TableVersionError {
/// The name of the table in question.
TableName: NameString
/// The ID value of the version in question. A VersionID
is a string representation of an integer. Each version is incremented by 1.
VersionId: VersionString
/// The details about the error.
ErrorDetail: ErrorDetail
}
@input
structure TagResourceRequest {
/// The ARN of the Glue resource to which to add the tags. For more
/// information about Glue resource ARNs, see the Glue ARN string pattern.
@required
ResourceArn: GlueResourceArn
/// Tags to add to this resource.
@required
TagsToAdd: TagsMap
}
@output
structure TagResourceResponse {}
/// The sampling parameters that are associated with the machine learning transform.
structure TaskRun {
/// The unique identifier for the transform.
TransformId: HashString
/// The unique identifier for this task run.
TaskRunId: HashString
/// The current status of the requested task run.
Status: TaskStatusType
/// The names of the log group for secure logging, associated with this task run.
LogGroupName: GenericString
/// Specifies configuration properties associated with this task run.
Properties: TaskRunProperties
/// The list of error strings associated with this task run.
ErrorString: GenericString
/// The date and time that this task run started.
StartedOn: Timestamp
/// The last point in time that the requested task run was updated.
LastModifiedOn: Timestamp
/// The last point in time that the requested task run was completed.
CompletedOn: Timestamp
/// The amount of time (in seconds) that the task run consumed resources.
ExecutionTime: ExecutionTime = 0
}
/// The criteria that are used to filter the task runs for the machine learning
/// transform.
structure TaskRunFilterCriteria {
/// The type of task run.
TaskRunType: TaskType
/// The current status of the task run.
Status: TaskStatusType
/// Filter on task runs started before this date.
StartedBefore: Timestamp
/// Filter on task runs started after this date.
StartedAfter: Timestamp
}
/// The configuration properties for the task run.
structure TaskRunProperties {
/// The type of task run.
TaskType: TaskType
/// The configuration properties for an importing labels task run.
ImportLabelsTaskRunProperties: ImportLabelsTaskRunProperties
/// The configuration properties for an exporting labels task run.
ExportLabelsTaskRunProperties: ExportLabelsTaskRunProperties
/// The configuration properties for a labeling set generation task run.
LabelingSetGenerationTaskRunProperties: LabelingSetGenerationTaskRunProperties
/// The configuration properties for a find matches task run.
FindMatchesTaskRunProperties: FindMatchesTaskRunProperties
}
/// The sorting criteria that are used to sort the list of task runs for the machine learning
/// transform.
structure TaskRunSortCriteria {
/// The column to be used to sort the list of task runs for the machine learning
/// transform.
@required
Column: TaskRunSortColumnType
/// The sort direction to be used to sort the list of task runs for the machine learning
/// transform.
@required
SortDirection: SortDirectionType
}
/// Specifies the parameters in the config file of the dynamic transform.
structure TransformConfigParameter {
/// Specifies the name of the parameter in the config file of the dynamic transform.
@required
Name: EnclosedInStringProperty
/// Specifies the parameter type in the config file of the dynamic transform.
@required
Type: ParamType
/// Specifies the validation rule in the config file of the dynamic transform.
ValidationRule: EnclosedInStringProperty
/// Specifies the validation message in the config file of the dynamic transform.
ValidationMessage: EnclosedInStringProperty
/// Specifies the value of the parameter in the config file of the dynamic transform.
Value: EnclosedInStringProperties
/// Specifies the list type of the parameter in the config file of the dynamic transform.
ListType: ParamType
/// Specifies whether the parameter is optional or not in the config file of the dynamic transform.
IsOptional: BoxedBoolean
}
/// The encryption-at-rest settings of the transform that apply to accessing user data. Machine learning transforms can access user data encrypted in Amazon S3 using KMS.
/// Additionally, imported labels and trained transforms can now be encrypted using a customer provided KMS key.
structure TransformEncryption {
/// An MLUserDataEncryption
object containing the encryption mode and customer-provided KMS key ID.
MlUserDataEncryption: MLUserDataEncryption
/// The name of the security configuration.
TaskRunSecurityConfigurationName: NameString
}
/// The criteria used to filter the machine learning transforms.
structure TransformFilterCriteria {
/// A unique transform name that is used to filter the machine learning transforms.
Name: NameString
/// The type of machine learning transform that is used to filter the machine learning
/// transforms.
TransformType: TransformType
/// Filters the list of machine learning transforms by the last known status of the transforms (to indicate whether a transform can be used or not). One of "NOT_READY", "READY", or "DELETING".
Status: TransformStatusType
/// This value determines which version of Glue this machine learning transform is compatible with. Glue 1.0 is recommended for most customers. If the value is not set, the Glue compatibility defaults to Glue 0.9. For more information, see Glue Versions in the developer guide.
GlueVersion: GlueVersionString
/// The time and date before which the transforms were created.
CreatedBefore: Timestamp
/// The time and date after which the transforms were created.
CreatedAfter: Timestamp
/// Filter on transforms last modified before this date.
LastModifiedBefore: Timestamp
/// Filter on transforms last modified after this date.
LastModifiedAfter: Timestamp
/// Filters on datasets with a specific schema. The Map
/// object is an array of key-value pairs representing the schema this transform accepts, where
/// Column
is the name of a column, and Type
is the type of the data
/// such as an integer or string. Has an upper bound of 100 columns.
Schema: TransformSchema
}
/// The algorithm-specific parameters that are associated with the machine learning
/// transform.
structure TransformParameters {
/// The type of machine learning transform.
/// For information about the types of machine learning transforms, see Creating Machine Learning Transforms.
@required
TransformType: TransformType
/// The parameters for the find matches algorithm.
FindMatchesParameters: FindMatchesParameters
}
/// The sorting criteria that are associated with the machine learning transform.
structure TransformSortCriteria {
/// The column to be used in the sorting criteria that are associated with the machine
/// learning transform.
@required
Column: TransformSortColumnType
/// The sort direction to be used in the sorting criteria that are associated with the machine
/// learning transform.
@required
SortDirection: SortDirectionType
}
/// Information about a specific trigger.
structure Trigger {
/// The name of the trigger.
Name: NameString
/// The name of the workflow associated with the trigger.
WorkflowName: NameString
/// Reserved for future use.
Id: IdString
/// The type of trigger that this is.
Type: TriggerType
/// The current state of the trigger.
State: TriggerState
/// A description of this trigger.
Description: DescriptionString
/// A cron
expression used to specify the schedule (see Time-Based
/// Schedules for Jobs and Crawlers. For example, to run
/// something every day at 12:15 UTC, you would specify:
/// cron(15 12 * * ? *)
.
Schedule: GenericString
/// The actions initiated by this trigger.
Actions: ActionList
/// The predicate of this trigger, which defines when it will fire.
Predicate: Predicate
/// Batch condition that must be met (specified number of events received or batch time window expired)
/// before EventBridge event trigger fires.
EventBatchingCondition: EventBatchingCondition
}
/// The details of a Trigger node present in the workflow.
structure TriggerNodeDetails {
/// The information of the trigger represented by the trigger node.
Trigger: Trigger
}
/// A structure used to provide information used to update a trigger. This object updates the
/// previous trigger definition by overwriting it completely.
structure TriggerUpdate {
/// Reserved for future use.
Name: NameString
/// A description of this trigger.
Description: DescriptionString
/// A cron
expression used to specify the schedule (see Time-Based
/// Schedules for Jobs and Crawlers. For example, to run
/// something every day at 12:15 UTC, you would specify:
/// cron(15 12 * * ? *)
.
Schedule: GenericString
/// The actions initiated by this trigger.
Actions: ActionList
/// The predicate of this trigger, which defines when it will fire.
Predicate: Predicate
/// Batch condition that must be met (specified number of events received or batch time window expired)
/// before EventBridge event trigger fires.
EventBatchingCondition: EventBatchingCondition
}
/// A partition that contains unfiltered metadata.
structure UnfilteredPartition {
/// The partition object.
Partition: Partition
/// The list of columns the user has permissions to access.
AuthorizedColumns: NameStringList
/// A Boolean value indicating that the partition location is registered with Lake Formation.
IsRegisteredWithLakeFormation: Boolean = false
}
/// Specifies a transform that combines the rows from two or more datasets into a single result.
structure Union {
/// The name of the transform node.
@required
Name: NodeName
/// The node ID inputs to the transform.
@required
Inputs: TwoInputs
/// Indicates the type of Union transform.
/// Specify ALL
to join all rows from data sources to the resulting DynamicFrame. The resulting union does not remove duplicate rows.
/// Specify DISTINCT
to remove duplicate rows in the resulting DynamicFrame.
@required
UnionType: UnionType
}
@input
structure UntagResourceRequest {
/// The Amazon Resource Name (ARN) of the resource from which to remove the tags.
@required
ResourceArn: GlueResourceArn
/// Tags to remove from this resource.
@required
TagsToRemove: TagKeysList
}
@output
structure UntagResourceResponse {}
@input
structure UpdateBlueprintRequest {
/// The name of the blueprint.
@required
Name: OrchestrationNameString
/// A description of the blueprint.
Description: Generic512CharString
/// Specifies a path in Amazon S3 where the blueprint is published.
@required
BlueprintLocation: OrchestrationS3Location
}
@output
structure UpdateBlueprintResponse {
/// Returns the name of the blueprint that was updated.
Name: NameString
}
@input
structure UpdateClassifierRequest {
/// A GrokClassifier
object with updated fields.
GrokClassifier: UpdateGrokClassifierRequest
/// An XMLClassifier
object with updated fields.
XMLClassifier: UpdateXMLClassifierRequest
/// A JsonClassifier
object with updated fields.
JsonClassifier: UpdateJsonClassifierRequest
/// A CsvClassifier
object with updated fields.
CsvClassifier: UpdateCsvClassifierRequest
}
@output
structure UpdateClassifierResponse {}
@input
structure UpdateColumnStatisticsForPartitionRequest {
/// The ID of the Data Catalog where the partitions in question reside.
/// If none is supplied, the Amazon Web Services account ID is used by default.
CatalogId: CatalogIdString
/// The name of the catalog database where the partitions reside.
@required
DatabaseName: NameString
/// The name of the partitions' table.
@required
TableName: NameString
/// A list of partition values identifying the partition.
@required
PartitionValues: ValueStringList
/// A list of the column statistics.
@required
ColumnStatisticsList: UpdateColumnStatisticsList
}
@output
structure UpdateColumnStatisticsForPartitionResponse {
/// Error occurred during updating column statistics data.
Errors: ColumnStatisticsErrors
}
@input
structure UpdateColumnStatisticsForTableRequest {
/// The ID of the Data Catalog where the partitions in question reside.
/// If none is supplied, the Amazon Web Services account ID is used by default.
CatalogId: CatalogIdString
/// The name of the catalog database where the partitions reside.
@required
DatabaseName: NameString
/// The name of the partitions' table.
@required
TableName: NameString
/// A list of the column statistics.
@required
ColumnStatisticsList: UpdateColumnStatisticsList
}
@output
structure UpdateColumnStatisticsForTableResponse {
/// List of ColumnStatisticsErrors.
Errors: ColumnStatisticsErrors
}
@input
structure UpdateConnectionRequest {
/// The ID of the Data Catalog in which the connection resides. If none is provided, the Amazon Web Services
/// account ID is used by default.
CatalogId: CatalogIdString
/// The name of the connection definition to update.
@required
Name: NameString
/// A ConnectionInput
object that redefines the connection
/// in question.
@required
ConnectionInput: ConnectionInput
}
@output
structure UpdateConnectionResponse {}
@input
structure UpdateCrawlerRequest {
/// Name of the new crawler.
@required
Name: NameString
/// The IAM role or Amazon Resource Name (ARN) of an IAM role that is used by the new crawler
/// to access customer resources.
Role: Role
/// The Glue database where results are stored, such as:
/// arn:aws:daylight:us-east-1::database/sometable/*
.
DatabaseName: DatabaseName
/// A description of the new crawler.
Description: DescriptionStringRemovable
/// A list of targets to crawl.
Targets: CrawlerTargets
/// A cron
expression used to specify the schedule (see Time-Based Schedules for Jobs and Crawlers. For example, to run
/// something every day at 12:15 UTC, you would specify:
/// cron(15 12 * * ? *)
.
Schedule: CronExpression
/// A list of custom classifiers that the user
/// has registered. By default, all built-in classifiers are included in a crawl,
/// but these custom classifiers always override the default classifiers
/// for a given classification.
Classifiers: ClassifierNameList
/// The table prefix used for catalog tables that are created.
TablePrefix: TablePrefix
/// The policy for the crawler's update and deletion behavior.
SchemaChangePolicy: SchemaChangePolicy
/// A policy that specifies whether to crawl the entire dataset again, or to crawl only folders that were added since the last crawler run.
RecrawlPolicy: RecrawlPolicy
/// Specifies data lineage configuration settings for the crawler.
LineageConfiguration: LineageConfiguration
/// Specifies Lake Formation configuration settings for the crawler.
LakeFormationConfiguration: LakeFormationConfiguration
/// Crawler configuration information. This versioned JSON string allows users
/// to specify aspects of a crawler's behavior.
/// For more information, see Setting crawler configuration options.
Configuration: CrawlerConfiguration
/// The name of the SecurityConfiguration
structure to be used by this
/// crawler.
CrawlerSecurityConfiguration: CrawlerSecurityConfiguration
}
@output
structure UpdateCrawlerResponse {}
@input
structure UpdateCrawlerScheduleRequest {
/// The name of the crawler whose schedule to update.
@required
CrawlerName: NameString
/// The updated cron
expression used to specify the schedule (see Time-Based Schedules for Jobs and Crawlers. For example, to run
/// something every day at 12:15 UTC, you would specify:
/// cron(15 12 * * ? *)
.
Schedule: CronExpression
}
@output
structure UpdateCrawlerScheduleResponse {}
/// Specifies a custom CSV classifier to be updated.
structure UpdateCsvClassifierRequest {
/// The name of the classifier.
@required
Name: NameString
/// A custom symbol to denote what separates each column entry in the row.
Delimiter: CsvColumnDelimiter
/// A custom symbol to denote what combines content into a single column value. It must be
/// different from the column delimiter.
QuoteSymbol: CsvQuoteSymbol
/// Indicates whether the CSV file contains a header.
ContainsHeader: CsvHeaderOption
/// A list of strings representing column names.
Header: CsvHeader
/// Specifies not to trim values before identifying the type of column values. The default value is true.
DisableValueTrimming: NullableBoolean
/// Enables the processing of files that contain only one column.
AllowSingleColumn: NullableBoolean
/// Specifies the configuration of custom datatypes.
CustomDatatypeConfigured: NullableBoolean
/// Specifies a list of supported custom datatypes.
CustomDatatypes: CustomDatatypes
/// Sets the SerDe for processing CSV in the classifier, which will be applied in the Data Catalog. Valid values are OpenCSVSerDe
, LazySimpleSerDe
, and None
. You can specify the None
value when you want the crawler to do the detection.
Serde: CsvSerdeOption
}
@input
structure UpdateDatabaseRequest {
/// The ID of the Data Catalog in which the metadata database resides. If none is provided,
/// the Amazon Web Services account ID is used by default.
CatalogId: CatalogIdString
/// The name of the database to update in the catalog. For Hive
/// compatibility, this is folded to lowercase.
@required
Name: NameString
/// A DatabaseInput
object specifying the new definition
/// of the metadata database in the catalog.
@required
DatabaseInput: DatabaseInput
}
@output
structure UpdateDatabaseResponse {}
@input
structure UpdateDataQualityRulesetRequest {
/// The name of the data quality ruleset.
@required
Name: NameString
/// A description of the ruleset.
Description: DescriptionString
/// A Data Quality Definition Language (DQDL) ruleset. For more information, see the Glue developer guide.
Ruleset: DataQualityRulesetString
}
@output
structure UpdateDataQualityRulesetResponse {
/// The name of the data quality ruleset.
Name: NameString
/// A description of the ruleset.
Description: DescriptionString
/// A Data Quality Definition Language (DQDL) ruleset. For more information, see the Glue developer guide.
Ruleset: DataQualityRulesetString
}
@input
structure UpdateDevEndpointRequest {
/// The name of the DevEndpoint
to be updated.
@required
EndpointName: GenericString
/// The public key for the DevEndpoint
to use.
PublicKey: GenericString
/// The list of public keys for the DevEndpoint
to use.
AddPublicKeys: PublicKeysList
/// The list of public keys to be deleted from the DevEndpoint
.
DeletePublicKeys: PublicKeysList
/// Custom Python or Java libraries to be loaded in the DevEndpoint
.
CustomLibraries: DevEndpointCustomLibraries
///
/// True
if the list of custom libraries to be loaded in the development endpoint
/// needs to be updated, or False
if otherwise.
UpdateEtlLibraries: BooleanValue = false
/// The list of argument keys to be deleted from the map of arguments used to configure the
/// DevEndpoint
.
DeleteArguments: StringList
/// The map of arguments to add the map of arguments used to configure the
/// DevEndpoint
.
/// Valid arguments are:
///
/// -
///
/// "--enable-glue-datacatalog": ""
///
///
///
/// You can specify a version of Python support for development endpoints by using the Arguments
parameter in the CreateDevEndpoint
or UpdateDevEndpoint
APIs. If no arguments are provided, the version defaults to Python 2.
AddArguments: MapValue
}
@output
structure UpdateDevEndpointResponse {}
/// Specifies a grok classifier to update when passed to
/// UpdateClassifier
.
structure UpdateGrokClassifierRequest {
/// The name of the GrokClassifier
.
@required
Name: NameString
/// An identifier of the data format that the classifier matches, such as Twitter, JSON, Omniture logs,
/// Amazon CloudWatch Logs, and so on.
Classification: Classification
/// The grok pattern used by this classifier.
GrokPattern: GrokPattern
/// Optional custom grok patterns used by this classifier.
CustomPatterns: CustomPatterns
}
@input
structure UpdateJobFromSourceControlRequest {
/// The name of the Glue job to be synchronized to or from the remote repository.
JobName: NameString
/// The provider for the remote repository.
Provider: SourceControlProvider
/// The name of the remote repository that contains the job artifacts.
RepositoryName: NameString
/// The owner of the remote repository that contains the job artifacts.
RepositoryOwner: NameString
/// An optional branch in the remote repository.
BranchName: NameString
/// An optional folder in the remote repository.
Folder: NameString
/// A commit ID for a commit in the remote repository.
CommitId: CommitIdString
/// The type of authentication, which can be an authentication token stored in Amazon Web Services Secrets Manager, or a personal access token.
AuthStrategy: SourceControlAuthStrategy
/// The value of the authorization token.
AuthToken: AuthTokenString
}
@output
structure UpdateJobFromSourceControlResponse {
/// The name of the Glue job.
JobName: NameString
}
@input
structure UpdateJobRequest {
/// The name of the job definition to update.
@required
JobName: NameString
/// Specifies the values with which to update the job definition. Unspecified configuration is removed or reset to default values.
@required
JobUpdate: JobUpdate
}
@output
structure UpdateJobResponse {
/// Returns the name of the updated job definition.
JobName: NameString
}
/// Specifies a JSON classifier to be updated.
structure UpdateJsonClassifierRequest {
/// The name of the classifier.
@required
Name: NameString
/// A JsonPath
string defining the JSON data for the classifier to classify.
/// Glue supports a subset of JsonPath, as described in Writing JsonPath Custom Classifiers.
JsonPath: JsonPath
}
@input
structure UpdateMLTransformRequest {
/// A unique identifier that was generated when the transform was created.
@required
TransformId: HashString
/// The unique name that you gave the transform when you created it.
Name: NameString
/// A description of the transform. The default is an empty string.
Description: DescriptionString
/// The configuration parameters that are specific to the transform type (algorithm) used.
/// Conditionally dependent on the transform type.
Parameters: TransformParameters
/// The name or Amazon Resource Name (ARN) of the IAM role with the required
/// permissions.
Role: RoleString
/// This value determines which version of Glue this machine learning transform is compatible with. Glue 1.0 is recommended for most customers. If the value is not set, the Glue compatibility defaults to Glue 0.9. For more information, see Glue Versions in the developer guide.
GlueVersion: GlueVersionString
/// The number of Glue data processing units (DPUs) that are allocated to task runs for this transform. You can allocate from 2 to 100 DPUs; the default is 10. A DPU is a relative measure of
/// processing power that consists of 4 vCPUs of compute capacity and 16 GB of memory. For more
/// information, see the Glue pricing
/// page.
/// When the WorkerType
field is set to a value other than Standard
, the MaxCapacity
field is set automatically and becomes read-only.
MaxCapacity: NullableDouble
/// The type of predefined worker that is allocated when this task runs. Accepts a value of Standard, G.1X, or G.2X.
///
/// -
///
For the Standard
worker type, each worker provides 4 vCPU, 16 GB of memory and a 50GB disk, and 2 executors per worker.
///
/// -
///
For the G.1X
worker type, each worker provides 4 vCPU, 16 GB of memory and a 64GB disk, and 1 executor per worker.
///
/// -
///
For the G.2X
worker type, each worker provides 8 vCPU, 32 GB of memory and a 128GB disk, and 1 executor per worker.
///
///
WorkerType: WorkerType
/// The number of workers of a defined workerType
that are allocated when this task runs.
NumberOfWorkers: NullableInteger
/// The timeout for a task run for this transform in minutes. This is the maximum time that a task run for this transform can consume resources before it is terminated and enters TIMEOUT
status. The default is 2,880 minutes (48 hours).
Timeout: Timeout
/// The maximum number of times to retry a task for this transform after a task run fails.
MaxRetries: NullableInteger
}
@output
structure UpdateMLTransformResponse {
/// The unique identifier for the transform that was updated.
TransformId: HashString
}
@input
structure UpdatePartitionRequest {
/// The ID of the Data Catalog where the partition to be updated resides. If none is provided,
/// the Amazon Web Services account ID is used by default.
CatalogId: CatalogIdString
/// The name of the catalog database in which the table in question
/// resides.
@required
DatabaseName: NameString
/// The name of the table in which the partition to be updated is located.
@required
TableName: NameString
/// List of partition key values that define the partition to update.
@required
PartitionValueList: BoundedPartitionValueList
/// The new partition object to update the partition to.
/// The Values
property can't be changed. If you want to change the partition key values for a partition, delete and recreate the partition.
@required
PartitionInput: PartitionInput
}
@output
structure UpdatePartitionResponse {}
@output
structure UpdateRegistryResponse {
/// The name of the updated registry.
RegistryName: SchemaRegistryNameString
/// The Amazon Resource name (ARN) of the updated registry.
RegistryArn: GlueResourceArn
}
@output
structure UpdateSchemaResponse {
/// The Amazon Resource Name (ARN) of the schema.
SchemaArn: GlueResourceArn
/// The name of the schema.
SchemaName: SchemaRegistryNameString
/// The name of the registry that contains the schema.
RegistryName: SchemaRegistryNameString
}
@input
structure UpdateSourceControlFromJobRequest {
/// The name of the Glue job to be synchronized to or from the remote repository.
JobName: NameString
/// The provider for the remote repository.
Provider: SourceControlProvider
/// The name of the remote repository that contains the job artifacts.
RepositoryName: NameString
/// The owner of the remote repository that contains the job artifacts.
RepositoryOwner: NameString
/// An optional branch in the remote repository.
BranchName: NameString
/// An optional folder in the remote repository.
Folder: NameString
/// A commit ID for a commit in the remote repository.
CommitId: CommitIdString
/// The type of authentication, which can be an authentication token stored in Amazon Web Services Secrets Manager, or a personal access token.
AuthStrategy: SourceControlAuthStrategy
/// The value of the authorization token.
AuthToken: AuthTokenString
}
@output
structure UpdateSourceControlFromJobResponse {
/// The name of the Glue job.
JobName: NameString
}
@input
structure UpdateTableRequest {
/// The ID of the Data Catalog where the table resides. If none is provided, the Amazon Web Services account
/// ID is used by default.
CatalogId: CatalogIdString
/// The name of the catalog database in which the table resides. For Hive
/// compatibility, this name is entirely lowercase.
@required
DatabaseName: NameString
/// An updated TableInput
object to define the metadata table
/// in the catalog.
@required
TableInput: TableInput
/// By default, UpdateTable
always creates an archived version of the table
/// before updating it. However, if skipArchive
is set to true,
/// UpdateTable
does not create the archived version.
SkipArchive: BooleanNullable
/// The transaction ID at which to update the table contents.
TransactionId: TransactionIdString
/// The version ID at which to update the table contents.
VersionId: VersionString
}
@output
structure UpdateTableResponse {}
@input
structure UpdateTriggerRequest {
/// The name of the trigger to update.
@required
Name: NameString
/// The new values with which to update the trigger.
@required
TriggerUpdate: TriggerUpdate
}
@output
structure UpdateTriggerResponse {
/// The resulting trigger definition.
Trigger: Trigger
}
@input
structure UpdateUserDefinedFunctionRequest {
/// The ID of the Data Catalog where the function to be updated is located. If none is
/// provided, the Amazon Web Services account ID is used by default.
CatalogId: CatalogIdString
/// The name of the catalog database where the function to be updated is
/// located.
@required
DatabaseName: NameString
/// The name of the function.
@required
FunctionName: NameString
/// A FunctionInput
object that redefines the function in the Data
/// Catalog.
@required
FunctionInput: UserDefinedFunctionInput
}
@output
structure UpdateUserDefinedFunctionResponse {}
@input
structure UpdateWorkflowRequest {
/// Name of the workflow to be updated.
@required
Name: NameString
/// The description of the workflow.
Description: GenericString
/// A collection of properties to be used as part of each execution of the workflow.
DefaultRunProperties: WorkflowRunProperties
/// You can use this parameter to prevent unwanted multiple updates to data, to control costs, or in some cases, to prevent exceeding the maximum number of concurrent runs of any of the component jobs. If you leave this parameter blank, there is no limit to the number of concurrent workflow runs.
MaxConcurrentRuns: NullableInteger
}
@output
structure UpdateWorkflowResponse {
/// The name of the workflow which was specified in input.
Name: NameString
}
/// Specifies an XML classifier to be updated.
structure UpdateXMLClassifierRequest {
/// The name of the classifier.
@required
Name: NameString
/// An identifier of the data format that the classifier matches.
Classification: Classification
/// The XML tag designating the element that contains each record in an XML document being
/// parsed. This cannot identify a self-closing element (closed by />
). An empty
/// row element that contains only attributes can be parsed as long as it ends with a closing tag
/// (for example, |
is okay, but
/// |
is not).
RowTag: RowTag
}
/// The options to configure an upsert operation when writing to a Redshift target .
structure UpsertRedshiftTargetOptions {
/// The physical location of the Redshift table.
TableLocation: EnclosedInStringProperty
/// The name of the connection to use to write to Redshift.
ConnectionName: EnclosedInStringProperty
/// The keys used to determine whether to perform an update or insert.
UpsertKeys: EnclosedInStringPropertiesMinOne
}
/// Represents the equivalent of a Hive user-defined function
/// (UDF
) definition.
structure UserDefinedFunction {
/// The name of the function.
FunctionName: NameString
/// The name of the catalog database that contains the function.
DatabaseName: NameString
/// The Java class that contains the function code.
ClassName: NameString
/// The owner of the function.
OwnerName: NameString
/// The owner type.
OwnerType: PrincipalType
/// The time at which the function was created.
CreateTime: Timestamp
/// The resource URIs for the function.
ResourceUris: ResourceUriList
/// The ID of the Data Catalog in which the function resides.
CatalogId: CatalogIdString
}
/// A structure used to create or update a user-defined function.
structure UserDefinedFunctionInput {
/// The name of the function.
FunctionName: NameString
/// The Java class that contains the function code.
ClassName: NameString
/// The owner of the function.
OwnerName: NameString
/// The owner type.
OwnerType: PrincipalType
/// The resource URIs for the function.
ResourceUris: ResourceUriList
}
/// A value could not be validated.
@error("client")
structure ValidationException {
/// A message describing the problem.
Message: MessageString
}
/// There was a version conflict.
@error("client")
structure VersionMismatchException {
/// A message describing the problem.
Message: MessageString
}
/// A workflow is a collection of multiple dependent Glue
/// jobs and crawlers that are run to complete a complex ETL task. A
/// workflow manages the execution and monitoring of all its jobs and crawlers.
structure Workflow {
/// The name of the workflow.
Name: NameString
/// A description of the workflow.
Description: GenericString
/// A collection of properties to be used as part of each execution of the workflow.
/// The run properties are made available to each job in the workflow. A job can modify
/// the properties for the next jobs in the flow.
DefaultRunProperties: WorkflowRunProperties
/// The date and time when the workflow was created.
CreatedOn: TimestampValue
/// The date and time when the workflow was last modified.
LastModifiedOn: TimestampValue
/// The information about the last execution of the workflow.
LastRun: WorkflowRun
/// The graph representing all the Glue components that belong to the workflow as nodes and directed
/// connections between them as edges.
Graph: WorkflowGraph
/// You can use this parameter to prevent unwanted multiple updates to data, to control costs, or in some cases, to prevent exceeding the maximum number of concurrent runs of any of the component jobs. If you leave this parameter blank, there is no limit to the number of concurrent workflow runs.
MaxConcurrentRuns: NullableInteger
/// This structure indicates the details of the blueprint that this particular workflow is created from.
BlueprintDetails: BlueprintDetails
}
/// A workflow graph represents the complete workflow containing all the Glue components present in the
/// workflow and all the directed connections between them.
structure WorkflowGraph {
/// A list of the the Glue components belong to the workflow represented as nodes.
Nodes: NodeList
/// A list of all the directed connections between the nodes belonging to the workflow.
Edges: EdgeList
}
/// A workflow run is an execution of a workflow providing all the runtime information.
structure WorkflowRun {
/// Name of the workflow that was run.
Name: NameString
/// The ID of this workflow run.
WorkflowRunId: IdString
/// The ID of the previous workflow run.
PreviousRunId: IdString
/// The workflow run properties which were set during the run.
WorkflowRunProperties: WorkflowRunProperties
/// The date and time when the workflow run was started.
StartedOn: TimestampValue
/// The date and time when the workflow run completed.
CompletedOn: TimestampValue
/// The status of the workflow run.
Status: WorkflowRunStatus
/// This error message describes any error that may have occurred in starting the workflow run. Currently the only error message is "Concurrent runs exceeded for workflow: foo
."
ErrorMessage: ErrorString
/// The statistics of the run.
Statistics: WorkflowRunStatistics
/// The graph representing all the Glue components that belong to the workflow as nodes and directed
/// connections between them as edges.
Graph: WorkflowGraph
/// The batch condition that started the workflow run.
StartingEventBatchCondition: StartingEventBatchCondition
}
/// Workflow run statistics provides statistics about the workflow run.
structure WorkflowRunStatistics {
/// Total number of Actions in the workflow run.
TotalActions: IntegerValue = 0
/// Total number of Actions that timed out.
TimeoutActions: IntegerValue = 0
/// Total number of Actions that have failed.
FailedActions: IntegerValue = 0
/// Total number of Actions that have stopped.
StoppedActions: IntegerValue = 0
/// Total number of Actions that have succeeded.
SucceededActions: IntegerValue = 0
/// Total number Actions in running state.
RunningActions: IntegerValue = 0
/// Indicates the count of job runs in the ERROR state in the workflow run.
ErroredActions: IntegerValue = 0
/// Indicates the count of job runs in WAITING state in the workflow run.
WaitingActions: IntegerValue = 0
}
/// A classifier for XML
content.
structure XMLClassifier {
/// The name of the classifier.
@required
Name: NameString
/// An identifier of the data format that the classifier matches.
@required
Classification: Classification
/// The time that this classifier was registered.
CreationTime: Timestamp
/// The time that this classifier was last updated.
LastUpdated: Timestamp
/// The version of this classifier.
Version: VersionId = 0
/// The XML tag designating the element that contains each record in an XML document being
/// parsed. This can't identify a self-closing element (closed by />
). An empty
/// row element that contains only attributes can be parsed as long as it ends with a closing tag
/// (for example, |
is okay, but
/// |
is not).
RowTag: RowTag
}
list ActionList {
member: Action
}
@length(
min: 1
max: 30
)
list AggregateOperations {
member: AggregateOperation
}
list AmazonRedshiftAdvancedOptions {
member: AmazonRedshiftAdvancedOption
}
list AuditColumnNamesList {
member: ColumnNameString
}
list BackfillErroredPartitionsList {
member: PartitionValueList
}
list BackfillErrors {
member: BackfillError
}
@length(
min: 0
max: 25
)
list BatchDeletePartitionValueList {
member: PartitionValueList
}
@length(
min: 0
max: 100
)
list BatchDeleteTableNameList {
member: NameString
}
@length(
min: 0
max: 100
)
list BatchDeleteTableVersionList {
member: VersionString
}
@length(
min: 1
max: 25
)
list BatchGetBlueprintNames {
member: OrchestrationNameString
}
@length(
min: 0
max: 1000
)
list BatchGetPartitionValueList {
member: PartitionValueList
}
list BatchStopJobRunErrorList {
member: BatchStopJobRunError
}
@length(
min: 1
max: 25
)
list BatchStopJobRunJobRunIdList {
member: IdString
}
list BatchStopJobRunSuccessfulSubmissionList {
member: BatchStopJobRunSuccessfulSubmission
}
list BatchUpdatePartitionFailureList {
member: BatchUpdatePartitionFailureEntry
}
@length(
min: 1
max: 100
)
list BatchUpdatePartitionRequestEntryList {
member: BatchUpdatePartitionRequestEntry
}
list BlueprintNames {
member: OrchestrationNameString
}
list BlueprintRuns {
member: BlueprintRun
}
list Blueprints {
member: Blueprint
}
@length(
min: 0
max: 100
)
list BoundedPartitionValueList {
member: ValueString
}
list CatalogEntries {
member: CatalogEntry
}
@length(
min: 1
)
list CatalogTablesList {
member: NameString
}
list CatalogTargetList {
member: CatalogTarget
}
list ClassifierList {
member: Classifier
}
list ClassifierNameList {
member: NameString
}
@length(
min: 0
max: 50
)
list CodeGenNodeArgs {
member: CodeGenNodeArg
}
list ColumnErrors {
member: ColumnError
}
@length(
min: 0
max: 100
)
list ColumnImportanceList {
member: ColumnImportance
}
list ColumnList {
member: Column
}
list ColumnRowFilterList {
member: ColumnRowFilter
}
list ColumnStatisticsErrors {
member: ColumnStatisticsError
}
list ColumnStatisticsList {
member: ColumnStatistics
}
list ColumnValueStringList {
member: ColumnValuesString
}
list ConditionList {
member: Condition
}
list ConnectionList {
member: Connection
}
@length(
min: 1
max: 20
)
list ContextWords {
member: NameString
}
list CrawlerHistoryList {
member: CrawlerHistory
}
list CrawlerList {
member: Crawler
}
list CrawlerMetricsList {
member: CrawlerMetrics
}
@length(
min: 0
max: 100
)
list CrawlerNameList {
member: NameString
}
list CrawlList {
member: Crawl
}
list CrawlsFilterList {
member: CrawlsFilter
}
list CsvHeader {
member: NameString
}
list CustomDatatypes {
member: NameString
}
@length(
min: 1
max: 50
)
list CustomEntityTypeNames {
member: NameString
}
list CustomEntityTypes {
member: CustomEntityType
}
list DagEdges {
member: CodeGenEdge
}
list DagNodes {
member: CodeGenNode
}
list DatabaseList {
member: Database
}
list DataQualityResultDescriptionList {
member: DataQualityResultDescription
}
@length(
min: 1
max: 10
)
list DataQualityResultIdList {
member: HashString
}
@length(
min: 1
max: 100
)
list DataQualityResultIds {
member: HashString
}
list DataQualityResultsList {
member: DataQualityResult
}
list DataQualityRuleRecommendationRunList {
member: DataQualityRuleRecommendationRunDescription
}
@length(
min: 1
max: 2000
)
list DataQualityRuleResults {
member: DataQualityRuleResult
}
list DataQualityRulesetEvaluationRunList {
member: DataQualityRulesetEvaluationRunDescription
}
list DataQualityRulesetList {
member: DataQualityRulesetListDetails
}
@length(
min: 0
max: 25
)
list DeleteConnectionNameList {
member: NameString
}
list DeltaTargetList {
member: DeltaTarget
}
list DevEndpointList {
member: DevEndpoint
}
list DevEndpointNameList {
member: NameString
}
@length(
min: 1
max: 25
)
list DevEndpointNames {
member: GenericString
}
list DynamoDBTargetList {
member: DynamoDBTarget
}
list EdgeList {
member: Edge
}
list EnableAdditionalMetadata {
member: JdbcMetadataEntry
}
list EnclosedInStringProperties {
member: EnclosedInStringProperty
}
list EnclosedInStringPropertiesMinOne {
member: EnclosedInStringProperty
}
list FilterExpressions {
member: FilterExpression
}
list FilterValues {
member: FilterValue
}
@length(
min: 0
max: 100
)
list GetColumnNamesList {
member: NameString
}
list GetResourcePoliciesResponseList {
member: GluePolicy
}
list GetTableVersionsList {
member: TableVersion
}
list GlueSchemas {
member: GlueSchema
}
list GlueStudioPathList {
member: EnclosedInStringProperties
}
list GlueStudioSchemaColumnList {
member: GlueStudioSchemaColumn
}
@length(
min: 0
max: 10
)
list GlueTables {
member: GlueTable
}
list HudiTargetList {
member: HudiTarget
}
list IcebergTargetList {
member: IcebergTarget
}
list JdbcTargetList {
member: JdbcTarget
}
list JobList {
member: Job
}
list JobNameList {
member: NameString
}
list JobRunList {
member: JobRun
}
@length(
min: 2
max: 2
)
list JoinColumns {
member: JoinColumn
}
@length(
min: 1
)
list KeyList {
member: NameString
}
@length(
min: 1
)
list KeySchemaElementList {
member: KeySchemaElement
}
list LimitedPathList {
member: LimitedStringList
}
list LimitedStringList {
member: GenericLimitedString
}
list LocationStringList {
member: LocationString
}
@length(
min: 1
)
list ManyInputs {
member: NodeId
}
list MappingList {
member: MappingEntry
}
list Mappings {
member: Mapping
}
@length(
min: 0
max: 10
)
list MatchCriteria {
member: NameString
}
list MetadataList {
member: MetadataKeyValuePair
}
list MongoDBTargetList {
member: MongoDBTarget
}
list NameStringList {
member: NameString
}
list NodeIdList {
member: NameString
}
list NodeList {
member: Node
}
@length(
min: 0
max: 50
)
list NullValueFields {
member: NullValueField
}
@length(
min: 1
max: 1
)
list OneInput {
member: NodeId
}
list OptionList {
member: Option
}
list OrchestrationStringList {
member: GenericString
}
list OrderList {
member: Order
}
list OtherMetadataValueList {
member: OtherMetadataValueListItem
}
list PartitionErrors {
member: PartitionError
}
list PartitionIndexDescriptorList {
member: PartitionIndexDescriptor
}
@length(
min: 0
max: 3
)
list PartitionIndexList {
member: PartitionIndex
}
@length(
min: 0
max: 100
)
list PartitionInputList {
member: PartitionInput
}
list PartitionList {
member: Partition
}
list PathList {
member: Path
}
list PermissionList {
member: Permission
}
@length(
min: 1
max: 255
)
list PermissionTypeList {
member: PermissionType
}
list PredecessorList {
member: Predecessor
}
list PrincipalPermissionsList {
member: PrincipalPermissions
}
@length(
min: 0
max: 5
)
list PublicKeysList {
member: GenericString
}
list RegistryListDefinition {
member: RegistryListItem
}
@length(
min: 0
max: 1000
)
list ResourceUriList {
member: ResourceUri
}
@length(
min: 1
max: 10
)
list RulesetNames {
member: NameString
}
list S3EncryptionList {
member: S3Encryption
}
list S3TargetList {
member: S3Target
}
list SchemaListDefinition {
member: SchemaListItem
}
list SchemaVersionErrorList {
member: SchemaVersionErrorItem
}
list SchemaVersionList {
member: SchemaVersionListItem
}
list SearchPropertyPredicates {
member: PropertyPredicate
}
list SecurityConfigurationList {
member: SecurityConfiguration
}
@length(
min: 0
max: 50
)
list SecurityGroupIdList {
member: NameString
}
list SessionIdList {
member: NameString
}
list SessionList {
member: Session
}
@length(
min: 0
max: 1
)
list SortCriteria {
member: SortCriterion
}
list SqlAliases {
member: SqlAlias
}
list StatementList {
member: Statement
}
list StringList {
member: GenericString
}
list TableErrors {
member: TableError
}
list TableList {
member: Table
}
list TableVersionErrors {
member: TableVersionError
}
@length(
min: 0
max: 50
)
list TagKeysList {
member: TagKey
}
list TaskRunList {
member: TaskRun
}
list TransformConfigParameterList {
member: TransformConfigParameter
}
list TransformIdList {
member: HashString
}
list TransformList {
member: MLTransform
}
@length(
min: 0
max: 100
)
list TransformSchema {
member: SchemaColumn
}
list TriggerList {
member: Trigger
}
list TriggerNameList {
member: NameString
}
@length(
min: 2
max: 2
)
list TwoInputs {
member: NodeId
}
list UnfilteredPartitionList {
member: UnfilteredPartition
}
@length(
min: 0
max: 25
)
list UpdateColumnStatisticsList {
member: ColumnStatistics
}
list UserDefinedFunctionList {
member: UserDefinedFunction
}
list ValueStringList {
member: ValueString
}
@length(
min: 1
max: 25
)
list WorkflowNames {
member: NameString
}
@length(
min: 1
max: 1000
)
list WorkflowRuns {
member: WorkflowRun
}
@length(
min: 1
max: 25
)
list Workflows {
member: Workflow
}
map AdditionalOptions {
key: EnclosedInStringProperty
value: EnclosedInStringProperty
}
map AdditionalPlanOptionsMap {
key: GenericString
value: GenericString
}
@sensitive
map CodeGenConfigurationNodes {
key: NodeId
value: CodeGenConfigurationNode
}
@length(
min: 0
max: 100
)
map ConnectionProperties {
key: ConnectionPropertyKey
value: ValueString
}
map DataSourceMap {
key: NameString
value: DataSource
}
map DQAdditionalOptions {
key: AdditionalOptionKeys
value: GenericString
}
map DQDLAliases {
key: NodeName
value: EnclosedInStringProperty
}
map ErrorByName {
key: NameString
value: ErrorDetail
}
map EvaluatedMetricsMap {
key: NameString
value: NullableDouble
}
map GenericMap {
key: GenericString
value: GenericString
}
@length(
min: 1
max: 10
)
map GlueTableAdditionalOptions {
key: NameString
value: DescriptionString
}
map JDBCDataTypeMapping {
key: JDBCDataType
value: GlueRecordType
}
map LocationMap {
key: ColumnValuesString
value: ColumnValuesString
}
@length(
min: 0
max: 100
)
map MapValue {
key: GenericString
value: GenericString
}
map MetadataInfoMap {
key: MetadataKeyString
value: MetadataInfo
}
@length(
min: 0
max: 75
)
map OrchestrationArgumentsMap {
key: OrchestrationNameString
value: OrchestrationArgumentsValue
}
map ParametersMap {
key: KeyString
value: ParametersMapValue
}
@length(
min: 0
max: 50
)
map TagsMap {
key: TagKey
value: TagValue
}
map WorkflowRunProperties {
key: IdString
value: GenericString
}
@length(
min: 0
max: 12
)
string AccountId
enum AdditionalOptionKeys {
CacheOption = "performanceTuning.caching"
}
enum AggFunction {
avg
countDistinct
count
first
last
kurtosis
max
min
skewness
stddev_samp
stddev_pop
sum
sumDistinct
var_samp
var_pop
}
@default(0)
integer AttemptCount
@length(
min: 0
max: 2048
)
string AuditContextString
@length(
min: 1
max: 255
)
@pattern("^[\\u0020-\\uD7FF\\uE000-\\uFFFD\\uD800\\uDC00-\\uDBFF\\uDFFF\\t]*$")
string AuthTokenString
enum BackfillErrorCode {
ENCRYPTED_PARTITION_ERROR
INTERNAL_ERROR
INVALID_PARTITION_TYPE_DATA_ERROR
MISSING_PARTITION_VALUE_ERROR
UNSUPPORTED_PARTITION_CHARACTER_ERROR
}
@default(0)
@range(
min: 1
max: 100
)
integer BatchSize
@range(
min: 1
max: 900
)
integer BatchWindow
blob Blob
@length(
min: 1
max: 131072
)
string BlueprintParameters
@length(
min: 1
max: 131072
)
string BlueprintParameterSpec
enum BlueprintRunState {
RUNNING
SUCCEEDED
FAILED
ROLLING_BACK
}
enum BlueprintStatus {
CREATING
ACTIVE
UPDATING
FAILED
}
@default(false)
boolean Boolean
boolean BooleanNullable
@default(false)
boolean BooleanValue
boolean BoxedBoolean
@range(
min: 0
max: 1
)
double BoxedDoubleFraction
long BoxedLong
@range(
min: 0
)
integer BoxedNonNegativeInt
@range(
min: 0
)
long BoxedNonNegativeLong
@range(
min: 0
)
integer BoxedPositiveInt
enum CatalogEncryptionMode {
DISABLED
SSEKMS = "SSE-KMS"
}
@range(
min: 1
max: 100
)
integer CatalogGetterPageSize
@length(
min: 1
max: 255
)
@pattern("^[\\u0020-\\uD7FF\\uE000-\\uFFFD\\uD800\\uDC00-\\uDBFF\\uDFFF\\t]*$")
string CatalogIdString
string Classification
enum CloudWatchEncryptionMode {
DISABLED
SSEKMS = "SSE-KMS"
}
string CodeGenArgName
string CodeGenArgValue
@length(
min: 1
max: 255
)
@pattern("^[A-Za-z_][A-Za-z0-9_]*$")
string CodeGenIdentifier
string CodeGenNodeType
@length(
min: 1
max: 1024
)
@pattern("^[\\u0020-\\uD7FF\\uE000-\\uFFFD\\uD800\\uDC00-\\uDBFF\\uDFFF\\t]*$")
string ColumnNameString
enum ColumnStatisticsType {
BOOLEAN
DATE
DECIMAL
DOUBLE
LONG
STRING
BINARY
}
@length(
min: 0
max: 131072
)
@pattern("^[\\u0020-\\uD7FF\\uE000-\\uFFFD\\uD800\\uDC00-\\uDBFF\\uDFFF\\t]*$")
string ColumnTypeString
string ColumnValuesString
@length(
min: 0
max: 255
)
@pattern("^[\\u0020-\\uD7FF\\uE000-\\uFFFD\\uD800\\uDC00-\\uDBFF\\uDFFF\\t]*$")
string CommentString
@length(
min: 1
max: 40
)
@pattern("^[\\u0020-\\uD7FF\\uE000-\\uFFFD\\uD800\\uDC00-\\uDBFF\\uDFFF\\t]*$")
string CommitIdString
enum Comparator {
EQUALS
GREATER_THAN
LESS_THAN
GREATER_THAN_EQUALS
LESS_THAN_EQUALS
}
enum Compatibility {
NONE
DISABLED
BACKWARD
BACKWARD_ALL
FORWARD
FORWARD_ALL
FULL
FULL_ALL
}
enum CompressionType {
GZIP = "gzip"
BZIP2 = "bzip2"
}
string ConnectionName
enum ConnectionPropertyKey {
HOST
PORT
USER_NAME = "USERNAME"
PASSWORD
ENCRYPTED_PASSWORD
JDBC_DRIVER_JAR_URI
JDBC_DRIVER_CLASS_NAME
JDBC_ENGINE
JDBC_ENGINE_VERSION
CONFIG_FILES
INSTANCE_ID
JDBC_CONNECTION_URL
JDBC_ENFORCE_SSL
CUSTOM_JDBC_CERT
SKIP_CUSTOM_JDBC_CERT_VALIDATION
CUSTOM_JDBC_CERT_STRING
CONNECTION_URL
KAFKA_BOOTSTRAP_SERVERS
KAFKA_SSL_ENABLED
KAFKA_CUSTOM_CERT
KAFKA_SKIP_CUSTOM_CERT_VALIDATION
KAFKA_CLIENT_KEYSTORE
KAFKA_CLIENT_KEYSTORE_PASSWORD
KAFKA_CLIENT_KEY_PASSWORD
ENCRYPTED_KAFKA_CLIENT_KEYSTORE_PASSWORD
ENCRYPTED_KAFKA_CLIENT_KEY_PASSWORD
SECRET_ID
CONNECTOR_URL
CONNECTOR_TYPE
CONNECTOR_CLASS_NAME
KAFKA_SASL_MECHANISM
KAFKA_SASL_SCRAM_USERNAME
KAFKA_SASL_SCRAM_PASSWORD
KAFKA_SASL_SCRAM_SECRETS_ARN
ENCRYPTED_KAFKA_SASL_SCRAM_PASSWORD
KAFKA_SASL_GSSAPI_KEYTAB
KAFKA_SASL_GSSAPI_KRB5_CONF
KAFKA_SASL_GSSAPI_SERVICE
KAFKA_SASL_GSSAPI_PRINCIPAL
}
enum ConnectionType {
JDBC
SFTP
MONGODB
KAFKA
NETWORK
MARKETPLACE
CUSTOM
}
string CrawlerConfiguration
enum CrawlerHistoryState {
RUNNING
COMPLETED
FAILED
STOPPED
}
enum CrawlerLineageSettings {
ENABLE
DISABLE
}
@length(
min: 0
max: 128
)
string CrawlerSecurityConfiguration
enum CrawlerState {
READY
RUNNING
STOPPING
}
string CrawlId
enum CrawlState {
RUNNING
CANCELLING
CANCELLED
SUCCEEDED
FAILED
ERROR
}
string CreatedTimestamp
string CronExpression
@length(
min: 1
max: 1
)
@pattern("^[^\\r\\n]$")
string CsvColumnDelimiter
enum CsvHeaderOption {
UNKNOWN
PRESENT
ABSENT
}
@length(
min: 1
max: 1
)
@pattern("^[^\\r\\n]$")
string CsvQuoteSymbol
enum CsvSerdeOption {
OpenCSVSerDe
LazySimpleSerDe
None
}
@length(
min: 0
max: 16000
)
@pattern("^[\\u0020-\\uD7FF\\uE000-\\uFFFD\\uD800\\uDC00-\\uDBFF\\uDFFF\\r\\n\\t]*$")
string CustomPatterns
string DatabaseName
enum DataFormat {
AVRO
JSON
PROTOBUF
}
@length(
min: 1
max: 255
)
string DataLakePrincipalString
enum DataQualityRuleResultStatus {
PASS
FAIL
ERROR
}
@length(
min: 1
max: 65536
)
string DataQualityRulesetString
enum DeleteBehavior {
LOG
DELETE_FROM_DATABASE
DEPRECATE_IN_DATABASE
}
enum DeltaTargetCompressionType {
UNCOMPRESSED = "uncompressed"
SNAPPY = "snappy"
}
@length(
min: 0
max: 2048
)
@pattern("^[\\u0020-\\uD7FF\\uE000-\\uFFFD\\uD800\\uDC00-\\uDBFF\\uDFFF\\r\\n\\t]*$")
string DescriptionString
@length(
min: 0
max: 2048
)
@pattern("^[\\u0020-\\uD7FF\\uE000-\\uFFFD\\uD800\\uDC00-\\uDBFF\\uDFFF\\r\\n\\t]*$")
string DescriptionStringRemovable
@default(0)
double Double
@default(0)
double DoubleValue
@length(
min: 1
max: 65536
)
@pattern("^([\\u0020-\\u007E\\r\\s\\n])*$")
string DQDLString
enum DQStopJobOnFailureTiming {
Immediate
AfterDataLoad
}
enum DQTransformOutput {
PrimaryInput
EvaluationResults
}
enum EnableHybridValues {
TRUE
FALSE
}
@pattern("^([\\u0020-\\uD7FF\\uE000-\\uFFFD\\uD800\\uDC00-\\uDBFF\\uDFFF]|[^\\S\\r\\n\"'])*$")
string EnclosedInStringProperty
@pattern("^([\\u0020-\\uD7FF\\uE000-\\uFFFD\\uD800\\uDC00-\\uDBFF\\uDFFF]|[^\\S\\r\\n])*$")
string EnclosedInStringPropertyWithQuote
string ErrorCodeString
string ErrorMessageString
string ErrorString
string EventQueueArn
@length(
min: 0
max: 16
)
enum ExecutionClass {
FLEX
STANDARD
}
@default(0)
integer ExecutionTime
enum ExistCondition {
MUST_EXIST
NOT_EXIST
NONE
}
@pattern("^[\\s\\S]*$")
string ExtendedString
@length(
min: 1
max: 512
)
@pattern("^[\\u0020-\\uD7FF\\uE000-\\uFFFD\\uD800\\uDC00-\\uDBFF\\uDFFF\\t]*$")
string FederationIdentifier
enum FederationSourceErrorCode {
InvalidResponseException
OperationTimeoutException
OperationNotSupportedException
InternalServiceException
ThrottlingException
}
enum FieldName {
CRAWL_ID
STATE
START_TIME
END_TIME
DPU_HOUR
}
string FieldType
enum FilterLogicalOperator {
AND
OR
}
enum FilterOperation {
EQ
LT
GT
LTE
GTE
REGEX
ISNULL
}
enum FilterOperator {
GT
GE
LT
LE
EQ
NE
}
@length(
min: 0
max: 2048
)
@pattern("^[\\u0020-\\uD7FF\\uE000-\\uFFFD\\uD800\\uDC00-\\uDBFF\\uDFFF\\t]*$")
string FilterString
enum FilterValueType {
COLUMNEXTRACTED
CONSTANT
}
@length(
min: 0
max: 128
)
@pattern("^[\\u0020-\\uD7FF\\uE000-\\uFFFD\\uD800\\uDC00-\\uDBFF\\uDFFF\\t]*$")
string FormatString
@length(
min: 1
max: 512
)
string Generic512CharString
@range(
min: 0
max: 1
)
double GenericBoundedDouble
@pattern("^[A-Za-z0-9_-]*$")
string GenericLimitedString
string GenericString
enum GlueRecordType {
DATE
STRING
TIMESTAMP
INT
FLOAT
LONG
BIGDECIMAL
BYTE
SHORT
DOUBLE
}
@length(
min: 1
max: 10240
)
@pattern("^arn:(aws|aws-us-gov|aws-cn):glue:")
string GlueResourceArn
@length(
min: 0
max: 1024
)
@pattern("^[\\u0020-\\uD7FF\\uE000-\\uFFFD\\uD800\\uDC00-\\uDBFF\\uDFFF\\t]*$")
string GlueStudioColumnNameString
@length(
min: 1
max: 255
)
@pattern("^\\w+\\.\\w+$")
string GlueVersionString
@length(
min: 1
max: 2048
)
@pattern("^[\\u0020-\\uD7FF\\uE000-\\uFFFD\\uD800\\uDC00-\\uDBFF\\uDFFF\\r\\t]*$")
string GrokPattern
@length(
min: 1
max: 255
)
@pattern("^[\\u0020-\\uD7FF\\uE000-\\uFFFD\\uD800\\uDC00-\\uDBFF\\uDFFF\\t]*$")
string HashString
enum HudiTargetCompressionType {
GZIP = "gzip"
LZO = "lzo"
UNCOMPRESSED = "uncompressed"
SNAPPY = "snappy"
}
integer IdleTimeout
@length(
min: 1
max: 255
)
@pattern("^[\\u0020-\\uD7FF\\uE000-\\uFFFD\\uD800\\uDC00-\\uDBFF\\uDFFF\\t]*$")
string IdString
@default(0)
integer Integer
@default(0)
@range(
min: 0
max: 1
)
integer IntegerFlag
@default(0)
integer IntegerValue
@timestampFormat("date-time")
timestamp Iso8601DateTime
@default(false)
boolean IsVersionValid
enum JDBCConnectionType {
sqlserver
mysql
oracle
postgresql
redshift
}
enum JDBCDataType {
ARRAY
BIGINT
BINARY
BIT
BLOB
BOOLEAN
CHAR
CLOB
DATALINK
DATE
DECIMAL
DISTINCT
DOUBLE
FLOAT
INTEGER
JAVA_OBJECT
LONGNVARCHAR
LONGVARBINARY
LONGVARCHAR
NCHAR
NCLOB
NULL
NUMERIC
NVARCHAR
OTHER
REAL
REF
REF_CURSOR
ROWID
SMALLINT
SQLXML
STRUCT
TIME
TIME_WITH_TIMEZONE
TIMESTAMP
TIMESTAMP_WITH_TIMEZONE
TINYINT
VARBINARY
VARCHAR
}
enum JdbcMetadataEntry {
COMMENTS
RAWTYPES
}
enum JobBookmarksEncryptionMode {
DISABLED
CSEKMS = "CSE-KMS"
}
string JobName
enum JobRunState {
STARTING
RUNNING
STOPPING
STOPPED
SUCCEEDED
FAILED
TIMEOUT
ERROR
WAITING
}
enum JoinType {
EQUIJOIN = "equijoin"
LEFT = "left"
RIGHT = "right"
OUTER = "outer"
LEFT_SEMI = "leftsemi"
LEFT_ANTI = "leftanti"
}
string JsonPath
string JsonValue
@length(
min: 1
max: 255
)
@pattern("^[\\u0020-\\uD7FF\\uE000-\\uFFFD\\uD800\\uDC00-\\uDBFF\\uDFFF\\t]*$")
string KeyString
@pattern("^arn:aws:kms:")
string KmsKeyArn
@default(0)
integer LabelCount
enum Language {
PYTHON
SCALA
}
enum LastCrawlStatus {
SUCCEEDED
CANCELLED
FAILED
}
@default(false)
boolean LatestSchemaVersionBoolean
@length(
min: 0
max: 2056
)
@pattern("^[\\u0020-\\uD7FF\\uE000-\\uFFFD\\uD800\\uDC00-\\uDBFF\\uDFFF\\r\\n\\t]*$")
string LocationString
@length(
min: 1
max: 512
)
@pattern("^[\\.\\-_/#A-Za-z0-9]+$")
string LogGroup
enum Logical {
AND
ANY
}
enum LogicalOperator {
EQUALS
}
@length(
min: 1
max: 512
)
@pattern("^[^:*]*$")
string LogStream
@default(0)
long Long
@default(0)
long LongValue
@length(
min: 0
max: 256
)
@pattern("^[*A-Za-z0-9_-]*$")
string MaskValue
@default(0)
integer MaxConcurrentRuns
@range(
min: 1
max: 100
)
integer MaxResultsNumber
@default(0)
integer MaxRetries
@length(
min: 1
max: 255
)
@pattern("^[\\u0020-\\uD7FF\\uE000-\\uFFFD\\uD800\\uDC00-\\uDBFF\\uDFFF\\t]*$")
string MessagePrefix
string MessageString
@length(
min: 1
max: 128
)
@pattern("^[a-zA-Z0-9+-=._./@]+$")
string MetadataKeyString
enum MetadataOperation {
CREATE
}
@length(
min: 1
max: 256
)
@pattern("^[a-zA-Z0-9+-=._./@]+$")
string MetadataValueString
@default(0)
long MillisecondsCount
enum MLUserDataEncryptionModeString {
DISABLED
SSEKMS = "SSE-KMS"
}
@length(
min: 1
max: 255
)
@pattern("^[\\u0020-\\uD7FF\\uE000-\\uFFFD\\uD800\\uDC00-\\uDBFF\\uDFFF\\t]*$")
string NameString
@pattern("^[A-Za-z0-9_-]*$")
string NodeId
@pattern("^([\\u0020-\\uD7FF\\uE000-\\uFFFD\\uD800\\uDC00-\\uDBFF\\uDFFF]|[^\\r\\n])*$")
string NodeName
enum NodeType {
CRAWLER
JOB
TRIGGER
}
@default(0)
@range(
min: 0
)
double NonNegativeDouble
@default(0)
@range(
min: 0
)
integer NonNegativeInt
@default(0)
@range(
min: 0
)
integer NonNegativeInteger
@default(0)
@range(
min: 0
)
long NonNegativeLong
@range(
min: 1
)
integer NotifyDelayAfter
boolean NullableBoolean
double NullableDouble
integer NullableInteger
@length(
min: 0
max: 4096
)
@pattern("^[\\u0020-\\uD7FF\\uE000-\\uFFFD\\uD800\\uDC00-\\uDBFF\\uDFFF\\r\\n\\t]*$")
string OrchestrationArgumentsValue
@length(
min: 1
max: 1024
)
@pattern("^arn:aws[^:]*:iam::[0-9]*:role/.+$")
string OrchestrationIAMRoleArn
@length(
min: 1
max: 128
)
@pattern("^[\\.\\-_A-Za-z0-9]+$")
string OrchestrationNameString
@length(
min: 20
max: 2048
)
@pattern("^arn:aws[^:]*:iam::[0-9]*:role/.+$")
string OrchestrationRoleArn
@length(
min: 1
max: 8192
)
@pattern("^s3://([^/]+)/([^/]+/)*([^/]+)$")
string OrchestrationS3Location
@length(
min: 0
max: 68000
)
string OrchestrationStatementCodeString
@length(
min: 0
max: 400000
)
string OrchestrationToken
@range(
min: 1
max: 1000
)
integer PageSize
string PaginationToken
@length(
min: 0
max: 512000
)
string ParametersMapValue
enum ParamType {
STR = "str"
INT = "int"
FLOAT = "float"
COMPLEX = "complex"
BOOL = "bool"
LIST = "list"
NULL = "null"
}
enum ParquetCompressionType {
SNAPPY = "snappy"
LZO = "lzo"
GZIP = "gzip"
UNCOMPRESSED = "uncompressed"
NONE = "none"
}
enum PartitionIndexStatus {
CREATING
ACTIVE
DELETING
FAILED
}
string Path
enum Permission {
ALL
SELECT
ALTER
DROP
DELETE
INSERT
CREATE_DATABASE
CREATE_TABLE
DATA_LOCATION_ACCESS
}
enum PermissionType {
COLUMN_PERMISSION
CELL_FILTER_PERMISSION
NESTED_PERMISSION
NESTED_CELL_PERMISSION
}
enum PiiType {
RowAudit
RowMasking
ColumnAudit
ColumnMasking
}
@length(
min: 2
)
string PolicyJsonString
@range(
min: 10
)
long PollingTime
@range(
min: 1
)
long PositiveLong
@length(
min: 0
max: 2048
)
@pattern("^[\\u0020-\\uD7FF\\uE000-\\uFFFD\\uD800\\uDC00-\\uDBFF\\uDFFF\\r\\n\\t]*$")
string PredicateString
enum PrincipalType {
USER
ROLE
GROUP
}
@range(
min: 0
max: 1
)
double Prob
string PythonScript
@pattern("^([2-3]|3[.]9)$")
string PythonVersionString
@default(0)
@range(
min: 1
max: 50
)
integer QuerySchemaVersionMetadataMaxResults
enum QuoteChar {
QUOTE = "quote"
QUILLEMET = "quillemet"
SINGLE_QUOTE = "single_quote"
DISABLED = "disabled"
}
@length(
min: 1
max: 16
)
string RecipeVersion
long RecordsCount
enum RecrawlBehavior {
CRAWL_EVERYTHING
CRAWL_NEW_FOLDERS_ONLY
CRAWL_EVENT_MODE
}
enum RegistryStatus {
AVAILABLE
DELETING
}
@default(false)
boolean ReplaceBoolean
enum ResourceShareType {
FOREIGN
ALL
FEDERATED
}
enum ResourceType {
JAR
FILE
ARCHIVE
}
string Role
@pattern("^arn:aws:iam::\\d{12}:role/")
string RoleArn
string RoleString
string RowTag
string RunId
@length(
min: 0
max: 64
)
@pattern(".*")
string RuntimeNameString
enum S3EncryptionMode {
DISABLED
SSEKMS = "SSE-KMS"
SSES3 = "SSE-S3"
}
string ScalaCode
enum ScheduleState {
SCHEDULED
NOT_SCHEDULED
TRANSITIONING
}
@default(0)
@range(
min: 1
max: 100000
)
long SchemaCheckpointNumber
@length(
min: 1
max: 340000
)
@pattern("\\S")
string SchemaDefinitionDiff
@length(
min: 1
max: 170000
)
@pattern("\\S")
string SchemaDefinitionString
enum SchemaDiffType {
SYNTAX_DIFF
}
string SchemaPathString
@length(
min: 1
max: 255
)
@pattern("^[a-zA-Z0-9-_$#.]+$")
string SchemaRegistryNameString
string SchemaRegistryTokenString
enum SchemaStatus {
AVAILABLE
PENDING
DELETING
}
@length(
min: 1
max: 5000
)
string SchemaValidationError
@length(
min: 36
max: 36
)
@pattern("^[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}$")
string SchemaVersionIdString
enum SchemaVersionStatus {
AVAILABLE
PENDING
FAILURE
DELETING
}
@length(
min: 0
max: 400000
)
string ScriptLocationString
enum Separator {
COMMA = "comma"
CTRLA = "ctrla"
PIPE = "pipe"
SEMICOLON = "semicolon"
TAB = "tab"
}
enum SessionStatus {
PROVISIONING
READY
FAILED
TIMEOUT
STOPPING
STOPPED
}
enum Sort {
ASCENDING = "ASC"
DESCENDING = "DESC"
}
enum SortDirectionType {
DESCENDING
ASCENDING
}
enum SourceControlAuthStrategy {
PERSONAL_ACCESS_TOKEN
AWS_SECRETS_MANAGER
}
enum SourceControlProvider {
GITHUB
AWS_CODE_COMMIT
}
@pattern("^([\\u0020-\\uD7FF\\uE000-\\uFFFD\\uD800\\uDC00-\\uDBFF\\uDFFF\\s])*$")
string SqlQuery
enum StartingPosition {
LATEST = "latest"
TRIM_HORIZON = "trim_horizon"
EARLIEST = "earliest"
TIMESTAMP = "timestamp"
}
enum StatementState {
WAITING
RUNNING
AVAILABLE
CANCELLING
CANCELLED
ERROR
}
string TableName
@length(
min: 0
max: 128
)
string TablePrefix
@length(
min: 0
max: 255
)
string TableTypeString
@length(
min: 1
max: 128
)
string TagKey
@length(
min: 0
max: 256
)
string TagValue
enum TargetFormat {
JSON = "json"
CSV = "csv"
AVRO = "avro"
ORC = "orc"
PARQUET = "parquet"
HUDI = "hudi"
DELTA = "delta"
}
enum TaskRunSortColumnType {
TASK_RUN_TYPE
STATUS
STARTED
}
enum TaskStatusType {
STARTING
RUNNING
STOPPING
STOPPED
SUCCEEDED
FAILED
TIMEOUT
}
enum TaskType {
EVALUATION
LABELING_SET_GENERATION
IMPORT_LABELS
EXPORT_LABELS
FIND_MATCHES
}
@range(
min: 1
)
integer Timeout
timestamp Timestamp
timestamp TimestampValue
string Token
@range(
min: 0
max: 100
)
integer Topk
@default(0)
@range(
min: 1
max: 10
)
integer TotalSegmentsInteger
@length(
min: 1
max: 255
)
@pattern("^[\\p{L}\\p{N}\\p{P}]*$")
string TransactionIdString
enum TransformSortColumnType {
NAME
TRANSFORM_TYPE
STATUS
CREATED
LAST_MODIFIED
}
enum TransformStatusType {
NOT_READY
READY
DELETING
}
enum TransformType {
FIND_MATCHES
}
enum TriggerState {
CREATING
CREATED
ACTIVATING
ACTIVATED
DEACTIVATING
DEACTIVATED
DELETING
UPDATING
}
enum TriggerType {
SCHEDULED
CONDITIONAL
ON_DEMAND
EVENT
}
@length(
min: 0
max: 20000
)
@pattern("^[\\u0020-\\uD7FF\\uE000-\\uFFFD\\uD800\\uDC00-\\uDBFF\\uDFFF\\t]*$")
string TypeString
enum UnionType {
ALL
DISTINCT
}
enum UpdateBehavior {
LOG
UPDATE_IN_DATABASE
}
enum UpdateCatalogBehavior {
UPDATE_IN_DATABASE
LOG
}
string UpdatedTimestamp
@length(
min: 1
max: 1024
)
@pattern("^[\\u0020-\\uD7FF\\uE000-\\uFFFD\\uD800\\uDC00-\\uDBFF\\uDFFF\\r\\n\\t]*$")
string URI
string UriString
@length(
min: 0
max: 1024
)
string ValueString
@default(0)
long VersionId
@default(0)
@range(
min: 1
max: 100000
)
long VersionLongNumber
@length(
min: 1
max: 100000
)
@pattern("^[1-9][0-9]*|[1-9][0-9]*-[1-9][0-9]*$")
string VersionsString
@length(
min: 1
max: 255
)
@pattern("^[\\u0020-\\uD7FF\\uE000-\\uFFFD\\uD800\\uDC00-\\uDBFF\\uDFFF\\t]*$")
string VersionString
@length(
min: 0
max: 409600
)
string ViewTextString
enum WorkerType {
Standard
G_1X = "G.1X"
G_2X = "G.2X"
G_025X = "G.025X"
G_4X = "G.4X"
G_8X = "G.8X"
Z_2X = "Z.2X"
}
enum WorkflowRunStatus {
RUNNING
COMPLETED
STOPPING
STOPPED
ERROR
}