software.amazon.kinesis.retrieval.AggregatorUtil Maven / Gradle / Ivy
/*
* Copyright 2019 Amazon.com, Inc. or its affiliates.
* Licensed under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package software.amazon.kinesis.retrieval;
import java.io.UnsupportedEncodingException;
import java.math.BigInteger;
import java.nio.ByteBuffer;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.apache.commons.lang3.StringUtils;
import com.google.protobuf.InvalidProtocolBufferException;
import lombok.NonNull;
import lombok.extern.slf4j.Slf4j;
import software.amazon.kinesis.retrieval.kpl.Messages;
/**
*
*/
@Slf4j
public class AggregatorUtil {
public static final byte[] AGGREGATED_RECORD_MAGIC = new byte[]{-13, -119, -102, -62};
private static final int DIGEST_SIZE = 16;
private static final BigInteger STARTING_HASH_KEY = new BigInteger("0");
// largest hash key = 2^128-1
private static final BigInteger ENDING_HASH_KEY = new BigInteger(StringUtils.repeat("FF", 16), 16);
/**
* This method deaggregates the given list of Amazon Kinesis records into a
* list of KPL user records. This method will then return the resulting list
* of KPL user records.
*
* @param records A list of Amazon Kinesis records, each possibly aggregated.
* @return A resulting list of deaggregated KPL user records.
*/
public List deaggregate(List records) {
return deaggregate(records, STARTING_HASH_KEY, ENDING_HASH_KEY);
}
/**
* Deaggregate any KPL records found. This method converts the starting and ending hash keys to {@link BigInteger}s
* before passing them on to {@link #deaggregate(List, BigInteger, BigInteger)}
*
* @param records
* the records to potentially deaggreate
* @param startingHashKey
* the starting hash key of the shard
* @param endingHashKey
* the ending hash key of the shard
* @return A list of records with any aggregate records deaggregated
*/
public List deaggregate(List records, String startingHashKey,
String endingHashKey) {
return deaggregate(records, new BigInteger(startingHashKey), new BigInteger(endingHashKey));
}
/**
* This method deaggregates the given list of Amazon Kinesis records into a
* list of KPL user records. Any KPL user records whose explicit hash key or
* partition key falls outside the range of the startingHashKey and the
* endingHashKey are discarded from the resulting list. This method will
* then return the resulting list of KPL user records.
*
* @param records A list of Amazon Kinesis records, each possibly aggregated.
* @param startingHashKey A BigInteger representing the starting hash key that the
* explicit hash keys or partition keys of retained resulting KPL
* user records must be greater than or equal to.
* @param endingHashKey A BigInteger representing the ending hash key that the the
* explicit hash keys or partition keys of retained resulting KPL
* user records must be smaller than or equal to.
* @return A resulting list of KPL user records whose explicit hash keys or
* partition keys fall within the range of the startingHashKey and
* the endingHashKey.
*/
// CHECKSTYLE:OFF NPathComplexity
public List deaggregate(List records,
BigInteger startingHashKey,
BigInteger endingHashKey) {
List result = new ArrayList<>();
byte[] magic = new byte[AGGREGATED_RECORD_MAGIC.length];
byte[] digest = new byte[DIGEST_SIZE];
for (KinesisClientRecord r : records) {
boolean isAggregated = true;
long subSeqNum = 0;
ByteBuffer bb = r.data();
if (bb.remaining() >= magic.length) {
bb.get(magic);
} else {
isAggregated = false;
}
if (!Arrays.equals(AGGREGATED_RECORD_MAGIC, magic) || bb.remaining() <= DIGEST_SIZE) {
isAggregated = false;
}
if (isAggregated) {
int oldLimit = bb.limit();
bb.limit(oldLimit - DIGEST_SIZE);
byte[] messageData = new byte[bb.remaining()];
bb.get(messageData);
bb.limit(oldLimit);
bb.get(digest);
byte[] calculatedDigest = calculateTailCheck(messageData);
if (!Arrays.equals(digest, calculatedDigest)) {
isAggregated = false;
} else {
try {
Messages.AggregatedRecord ar = Messages.AggregatedRecord.parseFrom(messageData);
List pks = ar.getPartitionKeyTableList();
List ehks = ar.getExplicitHashKeyTableList();
long aat = r.approximateArrivalTimestamp() == null
? -1 : r.approximateArrivalTimestamp().toEpochMilli();
try {
int recordsInCurrRecord = 0;
for (Messages.Record mr : ar.getRecordsList()) {
String explicitHashKey = null;
String partitionKey = pks.get((int) mr.getPartitionKeyIndex());
if (mr.hasExplicitHashKeyIndex()) {
explicitHashKey = ehks.get((int) mr.getExplicitHashKeyIndex());
}
BigInteger effectiveHashKey = effectiveHashKey(partitionKey, explicitHashKey);
if (effectiveHashKey.compareTo(startingHashKey) < 0
|| effectiveHashKey.compareTo(endingHashKey) > 0) {
for (int toRemove = 0; toRemove < recordsInCurrRecord; ++toRemove) {
result.remove(result.size() - 1);
}
break;
}
++recordsInCurrRecord;
KinesisClientRecord record = r.toBuilder()
.data(ByteBuffer.wrap(mr.getData().toByteArray()))
.partitionKey(partitionKey)
.explicitHashKey(explicitHashKey)
.build();
result.add(convertRecordToKinesisClientRecord(record, true, subSeqNum++, explicitHashKey));
}
} catch (Exception e) {
StringBuilder sb = new StringBuilder();
sb.append("Unexpected exception during deaggregation, record was:\n");
sb.append("PKS:\n");
for (String s : pks) {
sb.append(s).append("\n");
}
sb.append("EHKS: \n");
for (String s : ehks) {
sb.append(s).append("\n");
}
for (Messages.Record mr : ar.getRecordsList()) {
sb.append("Record: [hasEhk=").append(mr.hasExplicitHashKeyIndex()).append(", ")
.append("ehkIdx=").append(mr.getExplicitHashKeyIndex()).append(", ")
.append("pkIdx=").append(mr.getPartitionKeyIndex()).append(", ")
.append("dataLen=").append(mr.getData().toByteArray().length).append("]\n");
}
sb.append("Sequence number: ").append(r.sequenceNumber()).append("\n")
.append("Raw data: ")
.append(javax.xml.bind.DatatypeConverter.printBase64Binary(messageData)).append("\n");
log.error(sb.toString(), e);
}
} catch (InvalidProtocolBufferException e) {
isAggregated = false;
}
}
}
if (!isAggregated) {
bb.rewind();
result.add(r);
}
}
return result;
}
protected byte[] calculateTailCheck(byte[] data) {
return md5(data);
}
protected BigInteger effectiveHashKey(String partitionKey, String explicitHashKey) throws UnsupportedEncodingException {
if (explicitHashKey == null) {
return new BigInteger(1, md5(partitionKey.getBytes("UTF-8")));
}
return new BigInteger(explicitHashKey);
}
private byte[] md5(byte[] data) {
try {
MessageDigest d = MessageDigest.getInstance("MD5");
return d.digest(data);
} catch (NoSuchAlgorithmException e) {
throw new RuntimeException(e);
}
}
public KinesisClientRecord convertRecordToKinesisClientRecord(@NonNull final KinesisClientRecord record,
final boolean aggregated,
final long subSequenceNumber,
final String explicitHashKey) {
return KinesisClientRecord.builder()
.data(record.data())
.partitionKey(record.partitionKey())
.approximateArrivalTimestamp(record.approximateArrivalTimestamp())
.encryptionType(record.encryptionType())
.sequenceNumber(record.sequenceNumber())
.aggregated(aggregated)
.subSequenceNumber(subSequenceNumber)
.explicitHashKey(explicitHashKey)
.build();
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy