io.kestra.plugin.aws.kinesis.PutRecords Maven / Gradle / Ivy
The newest version!
package io.kestra.plugin.aws.kinesis;
import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.base.Strings;
import io.kestra.core.exceptions.IllegalVariableEvaluationException;
import io.kestra.core.models.annotations.Example;
import io.kestra.core.models.annotations.Plugin;
import io.kestra.core.models.annotations.PluginProperty;
import io.kestra.core.models.executions.metrics.Counter;
import io.kestra.core.models.executions.metrics.Timer;
import io.kestra.core.models.flows.State;
import io.kestra.core.models.tasks.RunnableTask;
import io.kestra.core.runners.RunContext;
import io.kestra.core.serializers.FileSerde;
import io.kestra.core.serializers.JacksonMapper;
import io.kestra.plugin.aws.AbstractConnection;
import io.kestra.plugin.aws.ConnectionUtils;
import io.kestra.plugin.aws.kinesis.model.Record;
import io.swagger.v3.oas.annotations.media.Schema;
import jakarta.validation.constraints.NotNull;
import lombok.*;
import lombok.experimental.SuperBuilder;
import reactor.core.publisher.Flux;
import software.amazon.awssdk.services.kinesis.KinesisClient;
import software.amazon.awssdk.services.kinesis.model.PutRecordsRequest;
import software.amazon.awssdk.services.kinesis.model.PutRecordsRequestEntry;
import software.amazon.awssdk.services.kinesis.model.PutRecordsResponse;
import java.io.*;
import java.net.URI;
import java.net.URISyntaxException;
import java.time.Duration;
import java.util.List;
import java.util.Optional;
import java.util.stream.Collectors;
import static io.kestra.core.utils.Rethrow.throwConsumer;
import static io.kestra.core.utils.Rethrow.throwFunction;
@SuperBuilder
@ToString
@EqualsAndHashCode
@Getter
@NoArgsConstructor
@Plugin(
examples = {
@Example(
title = "Send multiple records as maps to Amazon Kinesis Data Streams. Check the following AWS API reference for the structure of the [PutRecordsRequestEntry](https://docs.aws.amazon.com/kinesis/latest/APIReference/API_PutRecordsRequestEntry.html) request payload.",
full = true,
code = """
id: aws_kinesis_put_records
namespace: company.team
tasks:
- id: put_records
type: io.kestra.plugin.aws.kinesis.PutRecords
accessKeyId: ""
secretKeyId: ""
region: "eu-central-1"
streamName: "mystream"
records:
- data: "user sign-in event"
explicitHashKey: "optional hash value overriding the partition key"
partitionKey: "user1"
- data: "user sign-out event"
partitionKey: "user1"
"""
),
@Example(
title = "Send multiple records from an internal storage ion file to Amazon Kinesis Data Streams.",
full = true,
code = """
id: aws_kinesis_put_records
namespace: company.team
tasks:
- id: put_records
type: io.kestra.plugin.aws.kinesis.PutRecords
accessKeyId: ""
secretKeyId: ""
region: "eu-central-1"
streamName: "mystream"
records: kestra:///myfile.ion
"""
)
}
)
@Schema(
title = "Send multiple records to Amazon Kinesis Data Streams."
)
public class PutRecords extends AbstractConnection implements RunnableTask {
private static final ObjectMapper MAPPER = JacksonMapper.ofIon()
.setSerializationInclusion(JsonInclude.Include.ALWAYS);
@PluginProperty
@NotNull
@Schema(
title = "Mark the task as failed when sending a record is unsuccessful.",
description = "If true, the task will fail when any record fails to be sent."
)
@Builder.Default
private boolean failOnUnsuccessfulRecords = true;
@PluginProperty(dynamic = true)
@Schema(
title = "The name of the stream to push the records.",
description = "Make sure to set either `streamName` or `streamArn`. One of those must be provided."
)
private String streamName;
@PluginProperty(dynamic = true)
@Schema(
title = "The ARN of the stream to push the records.",
description = "Make sure to set either `streamName` or `streamArn`. One of those must be provided."
)
private String streamArn;
@PluginProperty(dynamic = true)
@Schema(
title = "List of records (i.e., list of maps) or internal storage URI of the file that defines the records to be sent to AWS Kinesis Data Streams.",
description = "A list of at least one record with a map including `data` and `partitionKey` properties (those two are required arguments). Check the [PutRecordsRequestEntry](https://docs.aws.amazon.com/kinesis/latest/APIReference/API_PutRecordsRequestEntry.html) API reference for a detailed description of required fields.",
anyOf = {String.class, Record[].class}
)
@NotNull
private Object records;
@Override
public Output run(RunContext runContext) throws Exception {
final long start = System.nanoTime();
List records = getRecordList(this.records, runContext);
PutRecordsResponse putRecordsResponse = putRecords(runContext, records);
// Fail if failOnUnsuccessfulRecords
if (failOnUnsuccessfulRecords && putRecordsResponse.failedRecordCount() > 0) {
var logger = runContext.logger();
logger.error("Response show {} record failed: {}", putRecordsResponse.failedRecordCount(), putRecordsResponse);
throw new RuntimeException(String.format("Response show %d record failed: %s", putRecordsResponse.failedRecordCount(), putRecordsResponse));
}
// Set metrics
runContext.metric(Timer.of("duration", Duration.ofNanos(System.nanoTime() - start)));
runContext.metric(Counter.of("failedRecordCount", putRecordsResponse.failedRecordCount()));
runContext.metric(Counter.of("successfulRecordCount", records.size() - putRecordsResponse.failedRecordCount()));
runContext.metric(Counter.of("recordCount", records.size()));
File tempFile = writeOutputFile(runContext, putRecordsResponse, records);
return Output.builder()
.uri(runContext.storage().putFile(tempFile))
.failedRecordsCount(putRecordsResponse.failedRecordCount())
.recordCount(records.size())
.build();
}
private PutRecordsResponse putRecords(RunContext runContext, List records) throws IllegalVariableEvaluationException {
try (KinesisClient client = client(runContext)) {
PutRecordsRequest.Builder builder = PutRecordsRequest.builder();
if (!Strings.isNullOrEmpty(streamArn)) {
builder.streamARN(streamArn);
} else if (!Strings.isNullOrEmpty(streamName)) {
builder.streamName(streamName);
} else {
throw new IllegalArgumentException("Either streamName or streamArn has to be set.");
}
List putRecordsRequestEntryList = records.stream()
.map(throwFunction(record -> record.toPutRecordsRequestEntry(runContext)))
.collect(Collectors.toList());
builder.records(putRecordsRequestEntryList);
return client.putRecords(builder.build());
}
}
private List getRecordList(Object records, RunContext runContext) throws IllegalVariableEvaluationException, URISyntaxException, IOException {
if (records instanceof String) {
URI from = new URI(runContext.render((String) records));
if (!from.getScheme().equals("kestra")) {
throw new IllegalArgumentException("Invalid records parameter, must be a Kestra internal storage URI, or a list of records.");
}
try (BufferedReader inputStream = new BufferedReader(new InputStreamReader(runContext.storage().getFile(from)))) {
return FileSerde.readAll(inputStream, Record.class)
.collectList().block();
}
} else if (records instanceof List) {
return MAPPER.convertValue(records, new TypeReference<>() {
});
}
throw new IllegalVariableEvaluationException("Invalid records type '" + records.getClass() + "'");
}
private File writeOutputFile(RunContext runContext, PutRecordsResponse putRecordsResponse, List records) throws IOException {
// Create Output
File tempFile = runContext.workingDir().createTempFile(".ion").toFile();
try (var stream = new FileOutputStream(tempFile)) {
Flux.fromIterable(records)
.zipWithIterable(putRecordsResponse.records(), (record, response) -> OutputEntry.builder()
.record(record)
.sequenceNumber(response.sequenceNumber())
.shardId(response.shardId())
.errorCode(response.errorCode())
.errorMessage(response.errorMessage())
.build())
.doOnEach(throwConsumer(outputEntry -> FileSerde.write(stream, outputEntry.get())))
.collectList()
.block();
}
return tempFile;
}
protected KinesisClient client(final RunContext runContext) throws IllegalVariableEvaluationException {
final AwsClientConfig clientConfig = awsClientConfig(runContext);
return ConnectionUtils.configureSyncClient(clientConfig, KinesisClient.builder()).build();
}
@Builder
@Getter
public static class Output implements io.kestra.core.models.tasks.Output {
@Schema(
title = "The URI of stored data",
description = "The successfully and unsuccessfully ingested records." +
"If the ingestion was successful, the output includes the record sequence number." +
"Otherwise, the output provides the error code and error message for troubleshooting."
)
private URI uri;
@Schema(
title = "The number of failed records."
)
private int failedRecordsCount;
@Schema(
title = "The total number of records sent to AWS Kinesis Data Streams."
)
private int recordCount;
@Override
public Optional finalState() {
return this.failedRecordsCount > 0 ? Optional.of(State.Type.WARNING) : io.kestra.core.models.tasks.Output.super.finalState();
}
}
@Builder
@Getter
public static class OutputEntry {
@Schema(
title = "The sequence number for an individual record result."
)
private final String sequenceNumber;
@Schema(
title = "The shard ID for an individual record result."
)
private final String shardId;
@Schema(
title = "The error code that indicates the failure."
)
private final String errorCode;
@Schema(
title = "The error message that explains the failure."
)
private final String errorMessage;
@Schema(
title = "The original record."
)
private final Record record;
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy