Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
* Copyright (c) 2018-2019 Snowplow Analytics Ltd. All rights reserved.
* This program is licensed to you under the Apache License Version 2.0,
* and you may not use this file except in compliance with the Apache License Version 2.0.
* You may obtain a copy of the Apache License Version 2.0 at
* Unless required by applicable law or agreed to in writing,
* software distributed under the Apache License Version 2.0 is distributed on an
* See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
package com.snowplowanalytics.snowplow.eventsmanifest
// java.time
import java.time.Instant
import java.util.UUID
// Scala
import scala.jdk.CollectionConverters._
// AWS
* Wrapper for a DynamoDB client that handles interactions with the events manifest table.
* Due to containing lots of mutable state, references and unserializable objects this wrapper
* should be constructed as late as possible - straight inside a `ShredJob`,
* Initialized via `EventsManifest.initStorage()`,
* @param client AWS DynamoDB client object
* @param table AWS DynamoDB table name
case class DynamoDbManifest(client: AmazonDynamoDB, table: String) extends EventsManifest {
import DynamoDbManifest._
* Try to store parts of an event into a previously specified table.
* @param eventId Snowplow event ID (UUID)
* @param eventFingerprint enriched event's fingerprint
* @param etlTstamp enrichment job's timestamp
* @return true if the event is successfully stored in the table,
* false if both eventId and fingerprint are already in the table
def put(eventId: UUID, eventFingerprint: String, etlTstamp: Instant): Boolean = {
val etl: Int = (etlTstamp.toEpochMilli / 1000).toInt
val ttl = etl + 180 * 24 * 60 * 60
val putRequest = putRequestDummy
.withExpressionAttributeValues(Map(":tst" -> new AttributeValue().withN(etl.toString)).asJava)
EventIdColumn -> eventId.toString,
FingerprintColumn -> eventFingerprint,
EtlTstampColumn -> etl,
TimeToLiveColumn -> ttl)
try {
} catch {
case _: ConditionalCheckFailedException => false
* A conditional write request that will pass if both `eventId` AND `fingerprint` are not present in the table,
* effectively meaning that only non-duplicates will be written.
* Dupes can still pass if an event's `etl_timestamp` matches, effectively meaning that a previous shred was
* interrupted and an event is being overwritten.
val putRequestDummy: PutItemRequest = new PutItemRequest()
.withConditionExpression(s"(attribute_not_exists($EventIdColumn) AND attribute_not_exists($FingerprintColumn)) OR $EtlTstampColumn = :tst")
object DynamoDbManifest {
val EventIdColumn = "eventId"
val FingerprintColumn = "fingerprint"
val EtlTstampColumn = "etlTime"
val TimeToLiveColumn = "ttl"
* Check that a table is available (block for some time if necessary).
* @param client AWS DynamoDB client with an established connection
* @param name DynamoDB table name
* @return either the same table name or an exception
def checkTable(client: AmazonDynamoDB, name: String): String = {
val request = new DescribeTableRequest().withTableName(name)
val result = try {
} catch {
case _: ResourceNotFoundException => None
result match {
case Some(description) =>
waitForActive(client, name, description)
case None =>
throw new IllegalStateException("Amazon DynamoDB table for event manifest is unavailable")
* Create a DynamoDB table with indices designed to store event manifests.
* Unlike `DynamoDB#createTable`, this is a blocking operation.
* @param client AWS DynamoDB client with an established connection
* @param name DynamoDB table name
* @param readCapacity DynamoDB reads/second
* @param writeCapacity DynamoDB writes/second
* @return table description object
def createTable(client: AmazonDynamoDB, name: String, readCapacity: Option[Long], writeCapacity: Option[Long]): TableDescription = {
val pks = List(
new AttributeDefinition(EventIdColumn, ScalarAttributeType.S),
new AttributeDefinition(FingerprintColumn, ScalarAttributeType.S))
val schema = List(
new KeySchemaElement(EventIdColumn, KeyType.HASH),
new KeySchemaElement(FingerprintColumn, KeyType.RANGE))
val readCapacityUnits: Long = readCapacity.getOrElse(20L)
val writeCapacityUnits: Long = writeCapacity.getOrElse(100L)
val request = new CreateTableRequest()
.withProvisionedThroughput(new ProvisionedThroughput(readCapacityUnits, writeCapacityUnits))
val response = client.createTable(request)
val description = waitForActive(client, name, response.getTableDescription)
val ttlSpecification = new TimeToLiveSpecification()
val ttlRequest = new UpdateTimeToLiveRequest()
client.updateTimeToLive(ttlRequest) // Update when the table is active
* Blocking method to reassure that a table is available for read.
def waitForActive(client: AmazonDynamoDB, name: String, description: TableDescription): TableDescription = {
new Table(client, name, description).waitForActive()
* Helper method to transform list arguments into a DynamoDB-compatible hash map.
* @param attributes list of key-value pairs (where values can only be strings or integers)
* @return Java-compatible Hash-map
def attributeValues(attributes: Seq[(String, Any)]): java.util.Map[String, AttributeValue] =
.map { case (k, v) => (k, asAttributeValue(v)) }
* Convert **only** strings and numbers to DynamoDB-compatible attribute data.
def asAttributeValue(v: Any): AttributeValue = {
val value = new AttributeValue
v match {
case s: String => value.withS(s)
case n: java.lang.Number => value.withN(n.toString)
case _ => null