com.amazonaws.athena.connector.lambda.serde.BlockSerializer Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of aws-athena-federation-sdk Show documentation
Show all versions of aws-athena-federation-sdk Show documentation
The Athena Query Federation SDK defines a set of interfaces and wire protocols that you can implement to enable Athena to delegate portions of it's query execution plan to code that you deploy/write.
package com.amazonaws.athena.connector.lambda.serde;
/*-
* #%L
* Amazon Athena Query Federation SDK
* %%
* Copyright (C) 2019 Amazon Web Services
* %%
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* #L%
*/
import com.amazonaws.athena.connector.lambda.data.Block;
import com.amazonaws.athena.connector.lambda.data.RecordBatchSerDe;
import com.amazonaws.athena.connector.lambda.data.SchemaSerDe;
import com.fasterxml.jackson.core.JsonGenerator;
import com.fasterxml.jackson.databind.SerializerProvider;
import com.fasterxml.jackson.databind.ser.std.StdSerializer;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
/**
* Uses either an explicit BlockAllocator or a BlockAllocatorRegistry to handle memory pooling associated with
* serializing blocks. Blocks are serialized as an Apache Arrow Schema + Apache Arrow Record Batch. If you
* need to serialize multiple Blocks of the same Schema we do not recommend using this class since it will
* result in the same schema being serialized multiple times. It may be more efficient for you to serialize
* the Schema once, separately, and then each Record Batch.
*
* This class also attempts to make use of the allocator_id field of the Block so that it will reuse the
* same allocator id when deserializing. This can be helpful when attempting to limit the number of copy
* operations that are required to move a Block around. It also allows you to put tighter control around
* which parts of your query execution get which memory pool / limit.
*
* @deprecated {@link com.amazonaws.athena.connector.lambda.serde.v3.BlockSerDeV3} should be used instead
*/
@Deprecated
public class BlockSerializer
extends StdSerializer
{
protected static final String ALLOCATOR_ID_FIELD_NAME = "aId";
protected static final String SCHEMA_FIELD_NAME = "schema";
protected static final String BATCH_FIELD_NAME = "records";
private final SchemaSerDe schemaSerDe;
private final RecordBatchSerDe recordBatchSerDe;
public BlockSerializer()
{
super(Block.class);
this.schemaSerDe = new SchemaSerDe();
this.recordBatchSerDe = new RecordBatchSerDe(null);
}
@Override
public void serialize(Block block, JsonGenerator jsonGenerator, SerializerProvider serializerProvider)
throws IOException
{
jsonGenerator.writeStartObject();
jsonGenerator.writeStringField(ALLOCATOR_ID_FIELD_NAME, block.getAllocatorId());
ByteArrayOutputStream schemaOut = new ByteArrayOutputStream();
schemaSerDe.serialize(block.getSchema(), schemaOut);
jsonGenerator.writeBinaryField(SCHEMA_FIELD_NAME, schemaOut.toByteArray());
schemaOut.close();
ByteArrayOutputStream batchOut = new ByteArrayOutputStream();
if (block != null && block.getRowCount() > 0) {
recordBatchSerDe.serialize(block.getRecordBatch(), batchOut);
}
jsonGenerator.writeBinaryField(BATCH_FIELD_NAME, batchOut.toByteArray());
jsonGenerator.writeEndObject();
}
}