software.amazon.awssdk.services.firehose.model.OrcSerDe Maven / Gradle / Ivy
Show all versions of firehose Show documentation
/*
* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance with
* the License. A copy of the License is located at
*
* http://aws.amazon.com/apache2.0
*
* or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
* CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions
* and limitations under the License.
*/
package software.amazon.awssdk.services.firehose.model;
import java.io.Serializable;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.function.BiConsumer;
import java.util.function.Function;
import software.amazon.awssdk.annotations.Generated;
import software.amazon.awssdk.core.SdkField;
import software.amazon.awssdk.core.SdkPojo;
import software.amazon.awssdk.core.protocol.MarshallLocation;
import software.amazon.awssdk.core.protocol.MarshallingType;
import software.amazon.awssdk.core.traits.ListTrait;
import software.amazon.awssdk.core.traits.LocationTrait;
import software.amazon.awssdk.core.util.DefaultSdkAutoConstructList;
import software.amazon.awssdk.core.util.SdkAutoConstructList;
import software.amazon.awssdk.utils.ToString;
import software.amazon.awssdk.utils.builder.CopyableBuilder;
import software.amazon.awssdk.utils.builder.ToCopyableBuilder;
/**
*
* A serializer to use for converting data to the ORC format before storing it in Amazon S3. For more information, see
* Apache ORC.
*
*/
@Generated("software.amazon.awssdk:codegen")
public final class OrcSerDe implements SdkPojo, Serializable, ToCopyableBuilder {
private static final SdkField STRIPE_SIZE_BYTES_FIELD = SdkField. builder(MarshallingType.INTEGER)
.memberName("StripeSizeBytes").getter(getter(OrcSerDe::stripeSizeBytes)).setter(setter(Builder::stripeSizeBytes))
.traits(LocationTrait.builder().location(MarshallLocation.PAYLOAD).locationName("StripeSizeBytes").build()).build();
private static final SdkField BLOCK_SIZE_BYTES_FIELD = SdkField. builder(MarshallingType.INTEGER)
.memberName("BlockSizeBytes").getter(getter(OrcSerDe::blockSizeBytes)).setter(setter(Builder::blockSizeBytes))
.traits(LocationTrait.builder().location(MarshallLocation.PAYLOAD).locationName("BlockSizeBytes").build()).build();
private static final SdkField ROW_INDEX_STRIDE_FIELD = SdkField. builder(MarshallingType.INTEGER)
.memberName("RowIndexStride").getter(getter(OrcSerDe::rowIndexStride)).setter(setter(Builder::rowIndexStride))
.traits(LocationTrait.builder().location(MarshallLocation.PAYLOAD).locationName("RowIndexStride").build()).build();
private static final SdkField ENABLE_PADDING_FIELD = SdkField. builder(MarshallingType.BOOLEAN)
.memberName("EnablePadding").getter(getter(OrcSerDe::enablePadding)).setter(setter(Builder::enablePadding))
.traits(LocationTrait.builder().location(MarshallLocation.PAYLOAD).locationName("EnablePadding").build()).build();
private static final SdkField PADDING_TOLERANCE_FIELD = SdkField. builder(MarshallingType.DOUBLE)
.memberName("PaddingTolerance").getter(getter(OrcSerDe::paddingTolerance)).setter(setter(Builder::paddingTolerance))
.traits(LocationTrait.builder().location(MarshallLocation.PAYLOAD).locationName("PaddingTolerance").build()).build();
private static final SdkField COMPRESSION_FIELD = SdkField. builder(MarshallingType.STRING)
.memberName("Compression").getter(getter(OrcSerDe::compressionAsString)).setter(setter(Builder::compression))
.traits(LocationTrait.builder().location(MarshallLocation.PAYLOAD).locationName("Compression").build()).build();
private static final SdkField> BLOOM_FILTER_COLUMNS_FIELD = SdkField
.> builder(MarshallingType.LIST)
.memberName("BloomFilterColumns")
.getter(getter(OrcSerDe::bloomFilterColumns))
.setter(setter(Builder::bloomFilterColumns))
.traits(LocationTrait.builder().location(MarshallLocation.PAYLOAD).locationName("BloomFilterColumns").build(),
ListTrait
.builder()
.memberLocationName(null)
.memberFieldInfo(
SdkField. builder(MarshallingType.STRING)
.traits(LocationTrait.builder().location(MarshallLocation.PAYLOAD)
.locationName("member").build()).build()).build()).build();
private static final SdkField BLOOM_FILTER_FALSE_POSITIVE_PROBABILITY_FIELD = SdkField
. builder(MarshallingType.DOUBLE)
.memberName("BloomFilterFalsePositiveProbability")
.getter(getter(OrcSerDe::bloomFilterFalsePositiveProbability))
.setter(setter(Builder::bloomFilterFalsePositiveProbability))
.traits(LocationTrait.builder().location(MarshallLocation.PAYLOAD)
.locationName("BloomFilterFalsePositiveProbability").build()).build();
private static final SdkField DICTIONARY_KEY_THRESHOLD_FIELD = SdkField. builder(MarshallingType.DOUBLE)
.memberName("DictionaryKeyThreshold").getter(getter(OrcSerDe::dictionaryKeyThreshold))
.setter(setter(Builder::dictionaryKeyThreshold))
.traits(LocationTrait.builder().location(MarshallLocation.PAYLOAD).locationName("DictionaryKeyThreshold").build())
.build();
private static final SdkField FORMAT_VERSION_FIELD = SdkField. builder(MarshallingType.STRING)
.memberName("FormatVersion").getter(getter(OrcSerDe::formatVersionAsString)).setter(setter(Builder::formatVersion))
.traits(LocationTrait.builder().location(MarshallLocation.PAYLOAD).locationName("FormatVersion").build()).build();
private static final List> SDK_FIELDS = Collections.unmodifiableList(Arrays.asList(STRIPE_SIZE_BYTES_FIELD,
BLOCK_SIZE_BYTES_FIELD, ROW_INDEX_STRIDE_FIELD, ENABLE_PADDING_FIELD, PADDING_TOLERANCE_FIELD, COMPRESSION_FIELD,
BLOOM_FILTER_COLUMNS_FIELD, BLOOM_FILTER_FALSE_POSITIVE_PROBABILITY_FIELD, DICTIONARY_KEY_THRESHOLD_FIELD,
FORMAT_VERSION_FIELD));
private static final long serialVersionUID = 1L;
private final Integer stripeSizeBytes;
private final Integer blockSizeBytes;
private final Integer rowIndexStride;
private final Boolean enablePadding;
private final Double paddingTolerance;
private final String compression;
private final List bloomFilterColumns;
private final Double bloomFilterFalsePositiveProbability;
private final Double dictionaryKeyThreshold;
private final String formatVersion;
private OrcSerDe(BuilderImpl builder) {
this.stripeSizeBytes = builder.stripeSizeBytes;
this.blockSizeBytes = builder.blockSizeBytes;
this.rowIndexStride = builder.rowIndexStride;
this.enablePadding = builder.enablePadding;
this.paddingTolerance = builder.paddingTolerance;
this.compression = builder.compression;
this.bloomFilterColumns = builder.bloomFilterColumns;
this.bloomFilterFalsePositiveProbability = builder.bloomFilterFalsePositiveProbability;
this.dictionaryKeyThreshold = builder.dictionaryKeyThreshold;
this.formatVersion = builder.formatVersion;
}
/**
*
* The number of bytes in each stripe. The default is 64 MiB and the minimum is 8 MiB.
*
*
* @return The number of bytes in each stripe. The default is 64 MiB and the minimum is 8 MiB.
*/
public final Integer stripeSizeBytes() {
return stripeSizeBytes;
}
/**
*
* The Hadoop Distributed File System (HDFS) block size. This is useful if you intend to copy the data from Amazon
* S3 to HDFS before querying. The default is 256 MiB and the minimum is 64 MiB. Firehose uses this value for
* padding calculations.
*
*
* @return The Hadoop Distributed File System (HDFS) block size. This is useful if you intend to copy the data from
* Amazon S3 to HDFS before querying. The default is 256 MiB and the minimum is 64 MiB. Firehose uses this
* value for padding calculations.
*/
public final Integer blockSizeBytes() {
return blockSizeBytes;
}
/**
*
* The number of rows between index entries. The default is 10,000 and the minimum is 1,000.
*
*
* @return The number of rows between index entries. The default is 10,000 and the minimum is 1,000.
*/
public final Integer rowIndexStride() {
return rowIndexStride;
}
/**
*
* Set this to true
to indicate that you want stripes to be padded to the HDFS block boundaries. This
* is useful if you intend to copy the data from Amazon S3 to HDFS before querying. The default is
* false
.
*
*
* @return Set this to true
to indicate that you want stripes to be padded to the HDFS block
* boundaries. This is useful if you intend to copy the data from Amazon S3 to HDFS before querying. The
* default is false
.
*/
public final Boolean enablePadding() {
return enablePadding;
}
/**
*
* A number between 0 and 1 that defines the tolerance for block padding as a decimal fraction of stripe size. The
* default value is 0.05, which means 5 percent of stripe size.
*
*
* For the default values of 64 MiB ORC stripes and 256 MiB HDFS blocks, the default block padding tolerance of 5
* percent reserves a maximum of 3.2 MiB for padding within the 256 MiB block. In such a case, if the available size
* within the block is more than 3.2 MiB, a new, smaller stripe is inserted to fit within that space. This ensures
* that no stripe crosses block boundaries and causes remote reads within a node-local task.
*
*
* Firehose ignores this parameter when OrcSerDe$EnablePadding is false
.
*
*
* @return A number between 0 and 1 that defines the tolerance for block padding as a decimal fraction of stripe
* size. The default value is 0.05, which means 5 percent of stripe size.
*
* For the default values of 64 MiB ORC stripes and 256 MiB HDFS blocks, the default block padding tolerance
* of 5 percent reserves a maximum of 3.2 MiB for padding within the 256 MiB block. In such a case, if the
* available size within the block is more than 3.2 MiB, a new, smaller stripe is inserted to fit within
* that space. This ensures that no stripe crosses block boundaries and causes remote reads within a
* node-local task.
*
*
* Firehose ignores this parameter when OrcSerDe$EnablePadding is false
.
*/
public final Double paddingTolerance() {
return paddingTolerance;
}
/**
*
* The compression code to use over data blocks. The default is SNAPPY
.
*
*
* If the service returns an enum value that is not available in the current SDK version, {@link #compression} will
* return {@link OrcCompression#UNKNOWN_TO_SDK_VERSION}. The raw value returned by the service is available from
* {@link #compressionAsString}.
*
*
* @return The compression code to use over data blocks. The default is SNAPPY
.
* @see OrcCompression
*/
public final OrcCompression compression() {
return OrcCompression.fromValue(compression);
}
/**
*
* The compression code to use over data blocks. The default is SNAPPY
.
*
*
* If the service returns an enum value that is not available in the current SDK version, {@link #compression} will
* return {@link OrcCompression#UNKNOWN_TO_SDK_VERSION}. The raw value returned by the service is available from
* {@link #compressionAsString}.
*
*
* @return The compression code to use over data blocks. The default is SNAPPY
.
* @see OrcCompression
*/
public final String compressionAsString() {
return compression;
}
/**
* For responses, this returns true if the service returned a value for the BloomFilterColumns property. This DOES
* NOT check that the value is non-empty (for which, you should check the {@code isEmpty()} method on the property).
* This is useful because the SDK will never return a null collection or map, but you may need to differentiate
* between the service returning nothing (or null) and the service returning an empty collection or map. For
* requests, this returns true if a value for the property was specified in the request builder, and false if a
* value was not specified.
*/
public final boolean hasBloomFilterColumns() {
return bloomFilterColumns != null && !(bloomFilterColumns instanceof SdkAutoConstructList);
}
/**
*
* The column names for which you want Firehose to create bloom filters. The default is null
.
*
*
* Attempts to modify the collection returned by this method will result in an UnsupportedOperationException.
*
*
* This method will never return null. If you would like to know whether the service returned this field (so that
* you can differentiate between null and empty), you can use the {@link #hasBloomFilterColumns} method.
*
*
* @return The column names for which you want Firehose to create bloom filters. The default is null
.
*/
public final List bloomFilterColumns() {
return bloomFilterColumns;
}
/**
*
* The Bloom filter false positive probability (FPP). The lower the FPP, the bigger the Bloom filter. The default
* value is 0.05, the minimum is 0, and the maximum is 1.
*
*
* @return The Bloom filter false positive probability (FPP). The lower the FPP, the bigger the Bloom filter. The
* default value is 0.05, the minimum is 0, and the maximum is 1.
*/
public final Double bloomFilterFalsePositiveProbability() {
return bloomFilterFalsePositiveProbability;
}
/**
*
* Represents the fraction of the total number of non-null rows. To turn off dictionary encoding, set this fraction
* to a number that is less than the number of distinct keys in a dictionary. To always use dictionary encoding, set
* this threshold to 1.
*
*
* @return Represents the fraction of the total number of non-null rows. To turn off dictionary encoding, set this
* fraction to a number that is less than the number of distinct keys in a dictionary. To always use
* dictionary encoding, set this threshold to 1.
*/
public final Double dictionaryKeyThreshold() {
return dictionaryKeyThreshold;
}
/**
*
* The version of the file to write. The possible values are V0_11
and V0_12
. The default
* is V0_12
.
*
*
* If the service returns an enum value that is not available in the current SDK version, {@link #formatVersion}
* will return {@link OrcFormatVersion#UNKNOWN_TO_SDK_VERSION}. The raw value returned by the service is available
* from {@link #formatVersionAsString}.
*
*
* @return The version of the file to write. The possible values are V0_11
and V0_12
. The
* default is V0_12
.
* @see OrcFormatVersion
*/
public final OrcFormatVersion formatVersion() {
return OrcFormatVersion.fromValue(formatVersion);
}
/**
*
* The version of the file to write. The possible values are V0_11
and V0_12
. The default
* is V0_12
.
*
*
* If the service returns an enum value that is not available in the current SDK version, {@link #formatVersion}
* will return {@link OrcFormatVersion#UNKNOWN_TO_SDK_VERSION}. The raw value returned by the service is available
* from {@link #formatVersionAsString}.
*
*
* @return The version of the file to write. The possible values are V0_11
and V0_12
. The
* default is V0_12
.
* @see OrcFormatVersion
*/
public final String formatVersionAsString() {
return formatVersion;
}
@Override
public Builder toBuilder() {
return new BuilderImpl(this);
}
public static Builder builder() {
return new BuilderImpl();
}
public static Class extends Builder> serializableBuilderClass() {
return BuilderImpl.class;
}
@Override
public final int hashCode() {
int hashCode = 1;
hashCode = 31 * hashCode + Objects.hashCode(stripeSizeBytes());
hashCode = 31 * hashCode + Objects.hashCode(blockSizeBytes());
hashCode = 31 * hashCode + Objects.hashCode(rowIndexStride());
hashCode = 31 * hashCode + Objects.hashCode(enablePadding());
hashCode = 31 * hashCode + Objects.hashCode(paddingTolerance());
hashCode = 31 * hashCode + Objects.hashCode(compressionAsString());
hashCode = 31 * hashCode + Objects.hashCode(hasBloomFilterColumns() ? bloomFilterColumns() : null);
hashCode = 31 * hashCode + Objects.hashCode(bloomFilterFalsePositiveProbability());
hashCode = 31 * hashCode + Objects.hashCode(dictionaryKeyThreshold());
hashCode = 31 * hashCode + Objects.hashCode(formatVersionAsString());
return hashCode;
}
@Override
public final boolean equals(Object obj) {
return equalsBySdkFields(obj);
}
@Override
public final boolean equalsBySdkFields(Object obj) {
if (this == obj) {
return true;
}
if (obj == null) {
return false;
}
if (!(obj instanceof OrcSerDe)) {
return false;
}
OrcSerDe other = (OrcSerDe) obj;
return Objects.equals(stripeSizeBytes(), other.stripeSizeBytes())
&& Objects.equals(blockSizeBytes(), other.blockSizeBytes())
&& Objects.equals(rowIndexStride(), other.rowIndexStride())
&& Objects.equals(enablePadding(), other.enablePadding())
&& Objects.equals(paddingTolerance(), other.paddingTolerance())
&& Objects.equals(compressionAsString(), other.compressionAsString())
&& hasBloomFilterColumns() == other.hasBloomFilterColumns()
&& Objects.equals(bloomFilterColumns(), other.bloomFilterColumns())
&& Objects.equals(bloomFilterFalsePositiveProbability(), other.bloomFilterFalsePositiveProbability())
&& Objects.equals(dictionaryKeyThreshold(), other.dictionaryKeyThreshold())
&& Objects.equals(formatVersionAsString(), other.formatVersionAsString());
}
/**
* Returns a string representation of this object. This is useful for testing and debugging. Sensitive data will be
* redacted from this string using a placeholder value.
*/
@Override
public final String toString() {
return ToString.builder("OrcSerDe").add("StripeSizeBytes", stripeSizeBytes()).add("BlockSizeBytes", blockSizeBytes())
.add("RowIndexStride", rowIndexStride()).add("EnablePadding", enablePadding())
.add("PaddingTolerance", paddingTolerance()).add("Compression", compressionAsString())
.add("BloomFilterColumns", hasBloomFilterColumns() ? bloomFilterColumns() : null)
.add("BloomFilterFalsePositiveProbability", bloomFilterFalsePositiveProbability())
.add("DictionaryKeyThreshold", dictionaryKeyThreshold()).add("FormatVersion", formatVersionAsString()).build();
}
public final Optional getValueForField(String fieldName, Class clazz) {
switch (fieldName) {
case "StripeSizeBytes":
return Optional.ofNullable(clazz.cast(stripeSizeBytes()));
case "BlockSizeBytes":
return Optional.ofNullable(clazz.cast(blockSizeBytes()));
case "RowIndexStride":
return Optional.ofNullable(clazz.cast(rowIndexStride()));
case "EnablePadding":
return Optional.ofNullable(clazz.cast(enablePadding()));
case "PaddingTolerance":
return Optional.ofNullable(clazz.cast(paddingTolerance()));
case "Compression":
return Optional.ofNullable(clazz.cast(compressionAsString()));
case "BloomFilterColumns":
return Optional.ofNullable(clazz.cast(bloomFilterColumns()));
case "BloomFilterFalsePositiveProbability":
return Optional.ofNullable(clazz.cast(bloomFilterFalsePositiveProbability()));
case "DictionaryKeyThreshold":
return Optional.ofNullable(clazz.cast(dictionaryKeyThreshold()));
case "FormatVersion":
return Optional.ofNullable(clazz.cast(formatVersionAsString()));
default:
return Optional.empty();
}
}
@Override
public final List> sdkFields() {
return SDK_FIELDS;
}
private static Function