commonMain.aws.sdk.kotlin.services.glue.model.S3CsvSource.kt Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of glue-jvm Show documentation
The AWS SDK for Kotlin client for Glue
There is a newer version: 1.3.76
// Code generated by smithy-kotlin-codegen. DO NOT EDIT!

package aws.sdk.kotlin.services.glue.model

import aws.smithy.kotlin.runtime.SdkDsl

/**
 * Specifies a command-separated value (CSV) data store stored in Amazon S3.
 */
public class S3CsvSource private constructor(builder: Builder) {
    /**
     * Specifies additional connection options.
     */
    public val additionalOptions: aws.sdk.kotlin.services.glue.model.S3DirectSourceAdditionalOptions? = builder.additionalOptions
    /**
     * Specifies how the data is compressed. This is generally not necessary if the data has a standard file extension. Possible values are `"gzip"` and `"bzip"`).
     */
    public val compressionType: aws.sdk.kotlin.services.glue.model.CompressionType? = builder.compressionType
    /**
     * Specifies a character to use for escaping. This option is used only when reading CSV files. The default value is `none`. If enabled, the character which immediately follows is used as-is, except for a small set of well-known escapes (`\n`, `\r`, `\t`, and `\0`).
     */
    public val escaper: kotlin.String? = builder.escaper
    /**
     * A string containing a JSON list of Unix-style glob patterns to exclude. For example, "[\"**.pdf\"]" excludes all PDF files.
     */
    public val exclusions: List? = builder.exclusions
    /**
     * Grouping files is turned on by default when the input contains more than 50,000 files. To turn on grouping with fewer than 50,000 files, set this parameter to "inPartition". To disable grouping when there are more than 50,000 files, set this parameter to `"none"`.
     */
    public val groupFiles: kotlin.String? = builder.groupFiles
    /**
     * The target group size in bytes. The default is computed based on the input data size and the size of your cluster. When there are fewer than 50,000 input files, `"groupFiles"` must be set to `"inPartition"` for this to take effect.
     */
    public val groupSize: kotlin.String? = builder.groupSize
    /**
     * This option controls the duration in milliseconds after which the s3 listing is likely to be consistent. Files with modification timestamps falling within the last maxBand milliseconds are tracked specially when using JobBookmarks to account for Amazon S3 eventual consistency. Most users don't need to set this option. The default is 900000 milliseconds, or 15 minutes.
     */
    public val maxBand: kotlin.Int? = builder.maxBand
    /**
     * This option specifies the maximum number of files to save from the last maxBand seconds. If this number is exceeded, extra files are skipped and only processed in the next job run.
     */
    public val maxFilesInBand: kotlin.Int? = builder.maxFilesInBand
    /**
     * A Boolean value that specifies whether a single record can span multiple lines. This can occur when a field contains a quoted new-line character. You must set this option to True if any record spans multiple lines. The default value is `False`, which allows for more aggressive file-splitting during parsing.
     */
    public val multiline: kotlin.Boolean? = builder.multiline
    /**
     * The name of the data store.
     */
    public val name: kotlin.String = requireNotNull(builder.name) { "A non-null value must be provided for name" }
    /**
     * A Boolean value that specifies whether to use the advanced SIMD CSV reader along with Apache Arrow based columnar memory formats. Only available in Glue version 3.0.
     */
    public val optimizePerformance: kotlin.Boolean = builder.optimizePerformance
    /**
     * Specifies the data schema for the S3 CSV source.
     */
    public val outputSchemas: List? = builder.outputSchemas
    /**
     * A list of the Amazon S3 paths to read from.
     */
    public val paths: List = requireNotNull(builder.paths) { "A non-null value must be provided for paths" }
    /**
     * Specifies the character to use for quoting. The default is a double quote: `'"'`. Set this to `-1` to turn off quoting entirely.
     */
    public val quoteChar: aws.sdk.kotlin.services.glue.model.QuoteChar = requireNotNull(builder.quoteChar) { "A non-null value must be provided for quoteChar" }
    /**
     * If set to true, recursively reads files in all subdirectories under the specified paths.
     */
    public val recurse: kotlin.Boolean? = builder.recurse
    /**
     * Specifies the delimiter character. The default is a comma: ",", but any other character can be specified.
     */
    public val separator: aws.sdk.kotlin.services.glue.model.Separator = requireNotNull(builder.separator) { "A non-null value must be provided for separator" }
    /**
     * A Boolean value that specifies whether to skip the first data line. The default value is `False`.
     */
    public val skipFirst: kotlin.Boolean? = builder.skipFirst
    /**
     * A Boolean value that specifies whether to treat the first line as a header. The default value is `False`.
     */
    public val withHeader: kotlin.Boolean? = builder.withHeader
    /**
     * A Boolean value that specifies whether to write the header to output. The default value is `True`.
     */
    public val writeHeader: kotlin.Boolean? = builder.writeHeader

    public companion object {
        public operator fun invoke(block: Builder.() -> kotlin.Unit): aws.sdk.kotlin.services.glue.model.S3CsvSource = Builder().apply(block).build()
    }

    override fun toString(): kotlin.String = buildString {
        append("S3CsvSource(")
        append("additionalOptions=$additionalOptions,")
        append("compressionType=$compressionType,")
        append("escaper=$escaper,")
        append("exclusions=$exclusions,")
        append("groupFiles=$groupFiles,")
        append("groupSize=$groupSize,")
        append("maxBand=$maxBand,")
        append("maxFilesInBand=$maxFilesInBand,")
        append("multiline=$multiline,")
        append("name=$name,")
        append("optimizePerformance=$optimizePerformance,")
        append("outputSchemas=$outputSchemas,")
        append("paths=$paths,")
        append("quoteChar=$quoteChar,")
        append("recurse=$recurse,")
        append("separator=$separator,")
        append("skipFirst=$skipFirst,")
        append("withHeader=$withHeader,")
        append("writeHeader=$writeHeader")
        append(")")
    }

    override fun hashCode(): kotlin.Int {
        var result = additionalOptions?.hashCode() ?: 0
        result = 31 * result + (compressionType?.hashCode() ?: 0)
        result = 31 * result + (escaper?.hashCode() ?: 0)
        result = 31 * result + (exclusions?.hashCode() ?: 0)
        result = 31 * result + (groupFiles?.hashCode() ?: 0)
        result = 31 * result + (groupSize?.hashCode() ?: 0)
        result = 31 * result + (maxBand ?: 0)
        result = 31 * result + (maxFilesInBand ?: 0)
        result = 31 * result + (multiline?.hashCode() ?: 0)
        result = 31 * result + (name.hashCode())
        result = 31 * result + (optimizePerformance.hashCode())
        result = 31 * result + (outputSchemas?.hashCode() ?: 0)
        result = 31 * result + (paths.hashCode())
        result = 31 * result + (quoteChar.hashCode())
        result = 31 * result + (recurse?.hashCode() ?: 0)
        result = 31 * result + (separator.hashCode())
        result = 31 * result + (skipFirst?.hashCode() ?: 0)
        result = 31 * result + (withHeader?.hashCode() ?: 0)
        result = 31 * result + (writeHeader?.hashCode() ?: 0)
        return result
    }

    override fun equals(other: kotlin.Any?): kotlin.Boolean {
        if (this === other) return true
        if (other == null || this::class != other::class) return false

        other as S3CsvSource

        if (additionalOptions != other.additionalOptions) return false
        if (compressionType != other.compressionType) return false
        if (escaper != other.escaper) return false
        if (exclusions != other.exclusions) return false
        if (groupFiles != other.groupFiles) return false
        if (groupSize != other.groupSize) return false
        if (maxBand != other.maxBand) return false
        if (maxFilesInBand != other.maxFilesInBand) return false
        if (multiline != other.multiline) return false
        if (name != other.name) return false
        if (optimizePerformance != other.optimizePerformance) return false
        if (outputSchemas != other.outputSchemas) return false
        if (paths != other.paths) return false
        if (quoteChar != other.quoteChar) return false
        if (recurse != other.recurse) return false
        if (separator != other.separator) return false
        if (skipFirst != other.skipFirst) return false
        if (withHeader != other.withHeader) return false
        if (writeHeader != other.writeHeader) return false

        return true
    }

    public inline fun copy(block: Builder.() -> kotlin.Unit = {}): aws.sdk.kotlin.services.glue.model.S3CsvSource = Builder(this).apply(block).build()

    @SdkDsl
    public class Builder {
        /**
         * Specifies additional connection options.
         */
        public var additionalOptions: aws.sdk.kotlin.services.glue.model.S3DirectSourceAdditionalOptions? = null
        /**
         * Specifies how the data is compressed. This is generally not necessary if the data has a standard file extension. Possible values are `"gzip"` and `"bzip"`).
         */
        public var compressionType: aws.sdk.kotlin.services.glue.model.CompressionType? = null
        /**
         * Specifies a character to use for escaping. This option is used only when reading CSV files. The default value is `none`. If enabled, the character which immediately follows is used as-is, except for a small set of well-known escapes (`\n`, `\r`, `\t`, and `\0`).
         */
        public var escaper: kotlin.String? = null
        /**
         * A string containing a JSON list of Unix-style glob patterns to exclude. For example, "[\"**.pdf\"]" excludes all PDF files.
         */
        public var exclusions: List? = null
        /**
         * Grouping files is turned on by default when the input contains more than 50,000 files. To turn on grouping with fewer than 50,000 files, set this parameter to "inPartition". To disable grouping when there are more than 50,000 files, set this parameter to `"none"`.
         */
        public var groupFiles: kotlin.String? = null
        /**
         * The target group size in bytes. The default is computed based on the input data size and the size of your cluster. When there are fewer than 50,000 input files, `"groupFiles"` must be set to `"inPartition"` for this to take effect.
         */
        public var groupSize: kotlin.String? = null
        /**
         * This option controls the duration in milliseconds after which the s3 listing is likely to be consistent. Files with modification timestamps falling within the last maxBand milliseconds are tracked specially when using JobBookmarks to account for Amazon S3 eventual consistency. Most users don't need to set this option. The default is 900000 milliseconds, or 15 minutes.
         */
        public var maxBand: kotlin.Int? = null
        /**
         * This option specifies the maximum number of files to save from the last maxBand seconds. If this number is exceeded, extra files are skipped and only processed in the next job run.
         */
        public var maxFilesInBand: kotlin.Int? = null
        /**
         * A Boolean value that specifies whether a single record can span multiple lines. This can occur when a field contains a quoted new-line character. You must set this option to True if any record spans multiple lines. The default value is `False`, which allows for more aggressive file-splitting during parsing.
         */
        public var multiline: kotlin.Boolean? = null
        /**
         * The name of the data store.
         */
        public var name: kotlin.String? = null
        /**
         * A Boolean value that specifies whether to use the advanced SIMD CSV reader along with Apache Arrow based columnar memory formats. Only available in Glue version 3.0.
         */
        public var optimizePerformance: kotlin.Boolean = false
        /**
         * Specifies the data schema for the S3 CSV source.
         */
        public var outputSchemas: List? = null
        /**
         * A list of the Amazon S3 paths to read from.
         */
        public var paths: List? = null
        /**
         * Specifies the character to use for quoting. The default is a double quote: `'"'`. Set this to `-1` to turn off quoting entirely.
         */
        public var quoteChar: aws.sdk.kotlin.services.glue.model.QuoteChar? = null
        /**
         * If set to true, recursively reads files in all subdirectories under the specified paths.
         */
        public var recurse: kotlin.Boolean? = null
        /**
         * Specifies the delimiter character. The default is a comma: ",", but any other character can be specified.
         */
        public var separator: aws.sdk.kotlin.services.glue.model.Separator? = null
        /**
         * A Boolean value that specifies whether to skip the first data line. The default value is `False`.
         */
        public var skipFirst: kotlin.Boolean? = null
        /**
         * A Boolean value that specifies whether to treat the first line as a header. The default value is `False`.
         */
        public var withHeader: kotlin.Boolean? = null
        /**
         * A Boolean value that specifies whether to write the header to output. The default value is `True`.
         */
        public var writeHeader: kotlin.Boolean? = null

        @PublishedApi
        internal constructor()
        @PublishedApi
        internal constructor(x: aws.sdk.kotlin.services.glue.model.S3CsvSource) : this() {
            this.additionalOptions = x.additionalOptions
            this.compressionType = x.compressionType
            this.escaper = x.escaper
            this.exclusions = x.exclusions
            this.groupFiles = x.groupFiles
            this.groupSize = x.groupSize
            this.maxBand = x.maxBand
            this.maxFilesInBand = x.maxFilesInBand
            this.multiline = x.multiline
            this.name = x.name
            this.optimizePerformance = x.optimizePerformance
            this.outputSchemas = x.outputSchemas
            this.paths = x.paths
            this.quoteChar = x.quoteChar
            this.recurse = x.recurse
            this.separator = x.separator
            this.skipFirst = x.skipFirst
            this.withHeader = x.withHeader
            this.writeHeader = x.writeHeader
        }

        @PublishedApi
        internal fun build(): aws.sdk.kotlin.services.glue.model.S3CsvSource = S3CsvSource(this)

        /**
         * construct an [aws.sdk.kotlin.services.glue.model.S3DirectSourceAdditionalOptions] inside the given [block]
         */
        public fun additionalOptions(block: aws.sdk.kotlin.services.glue.model.S3DirectSourceAdditionalOptions.Builder.() -> kotlin.Unit) {
            this.additionalOptions = aws.sdk.kotlin.services.glue.model.S3DirectSourceAdditionalOptions.invoke(block)
        }

        internal fun correctErrors(): Builder {
            if (name == null) name = ""
            if (paths == null) paths = emptyList()
            if (quoteChar == null) quoteChar = QuoteChar.SdkUnknown("no value provided")
            if (separator == null) separator = Separator.SdkUnknown("no value provided")
            return this
        }
    }
}