All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.quasardb.kinesis.Consumer Maven / Gradle / Ivy

There is a newer version: 3.14.1
Show newest version
package net.quasardb.kinesis;

import software.amazon.awssdk.services.kinesis.KinesisClient;

import software.amazon.awssdk.services.kinesis.model.*;

import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.ByteBuffer;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ThreadLocalRandom;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicReference;
import java.util.concurrent.atomic.AtomicLong;
import java.util.AbstractCollection;
import java.util.List;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Map;
import java.util.HashMap;
import java.util.Set;
import java.lang.InterruptedException;
import java.lang.ArrayIndexOutOfBoundsException;
import java.lang.Class;
import java.time.Instant;
import java.time.Duration;
import java.time.temporal.TemporalAmount;

import net.quasardb.qdb.Session;
import net.quasardb.qdb.SessionFactory;
import net.quasardb.qdb.ts.WritableRow;
import net.quasardb.qdb.ts.Timespec;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

// import com.codahale.metrics.*;
import io.micrometer.core.instrument.*;
import io.micrometer.core.instrument.distribution.*;
import io.micrometer.core.instrument.Tag;
import io.micrometer.core.instrument.binder.BaseUnits;



class Consumer extends Thread {

    // To avoid 'storming' kinesis, we use a launch delay between Kinesis consumer subscriptions
    private static final Logger logger = LoggerFactory.getLogger(Consumer.class);

    private SessionFactory sessionFactory;
    private Session session = null;
    private RelayPool relayPool;
    private int batchSize;
    private Class parser;

    private List shardIds;
    private Map shardIteratorsById = new HashMap();
    private String prefix;
    private AbstractCollection tableNames;
    private String streamName;

    private KinesisClient client;
    private CompletableFuture isStopped;
    private AtomicReference shouldStop;
    private CompletableFuture subscriptionFuture;
    private Set blacklist;
    private long rejectionAgeMs;

    private Map          lastLagByShardId = new HashMap();
    private Map batchSizeDistributionByShardId = new HashMap();
    private Map kinesisResponseSizeDistributionByShardId = new HashMap();
    private Map             kinesisRequestsCounterByShardId = new HashMap();
    private Map recordAgeDistributionByShardId = new HashMap();
    private Map             recordsCounterByShardId = new HashMap();
    private Map             emptyKinesisResponseCounterByShardId = new HashMap();
    private Map             rejectedRecordsCounterByShardId = new HashMap();

    private Counter recordsCounter;

    public Consumer (KinesisClient client,
                     SessionFactory sessionFactory,
                     RelayPool relayPool,

                     int batchSize,
                     long rejectionAgeMs,
                     Set blacklist,

                     Class parser,
                     List shardIds,
                     String prefix,
                     Counter recordsCounter,

                     String streamName) throws Exception {
        this.client         = client;
        this.sessionFactory = sessionFactory;
        this.batchSize      = batchSize;
        this.rejectionAgeMs = rejectionAgeMs;
        this.blacklist      = blacklist;

        this.parser         = parser;
        this.shardIds       = shardIds;
        this.prefix         = prefix;
        this.recordsCounter = recordsCounter;
        this.tableNames     = tableNames;

        this.streamName     = streamName;

        this.relayPool      = relayPool;
        this.isStopped      = new CompletableFuture();
        this.shouldStop     = new AtomicReference(false);

        for (String shardId : shardIds) {
            List tags = Arrays.asList(Tag.of("prefix", prefix),
                                           Tag.of("shardId", shardId));

            this.recordsCounterByShardId.put(shardId, Metrics.registry.counter("records",
                                                                               tags));
            this.emptyKinesisResponseCounterByShardId.put(shardId, Metrics.registry.counter("empty_responses",
                                                                                            tags));
            this.rejectedRecordsCounterByShardId.put(shardId,
                                                     Metrics.registry.counter("rejected_records",
                                                                              tags));

            this.kinesisRequestsCounterByShardId.put(shardId, Metrics.registry.counter("requests",
                                                                                       tags));


            // We use an atomic reference for the `lastLag` which is kept track of below
            // AtomicReference lastLag = new AtomicReference();
            AtomicLong lastLag = new AtomicLong();
            this.lastLagByShardId.put(shardId, lastLag);
            Metrics.registry.more().timeGauge("lag",
                                              tags,
                                              lastLag,
                                              TimeUnit.MILLISECONDS,
                                              (x) -> x.doubleValue()
                                              );

            this.batchSizeDistributionByShardId.put(shardId,
                                                    Metrics.registry.summary("batch.size",
                                                                             tags));

            this.kinesisResponseSizeDistributionByShardId.put(shardId,
                                                              Metrics.registry.summary("response.size",
                                                                                       tags));


            this.recordAgeDistributionByShardId.put(shardId,
                                                    DistributionSummary
                                                    .builder("record.age")
                                                    .baseUnit(BaseUnits.MILLISECONDS)

                                                    // 1ms min -- Micrometer requirement of being at
                                                    // least 1
                                                    .minimumExpectedValue((long)(1))

                                                    // 1000 days max
                                                    .maximumExpectedValue((long)(1000 * 86400 * 1000))
                                                    .publishPercentiles(0.99999,
                                                                        0.9999,
                                                                        0.999,
                                                                        0.99,
                                                                        0.9)
                                                    .tags(tags)
                                                    .distributionStatisticExpiry(Duration.ofMinutes(15))
                                                    .register(Metrics.registry));

        }

    }


    @Override
    public void run() {

        long errorCount = 0;
        long maxErrors = 30;

        try {
            Parser parser = PluginLoader.instantiate(this.parser);

            while (this.shouldStop.get() == false) {
                if (this.session == null) {
                    logger.info("No session initialized yet, initializing new session");
                    this.session = this.sessionFactory.newSession();
                }

                try {
                    boolean  noneHaveData = true;
                    boolean  rateLimited = false;


                    for (String shardId : this.shardIds) {
                        if (this.shouldStop.get() == true) {
                            break;
                        }

                        try {
                            boolean  hasData = this.doPoll(parser, shardId);

                            if (hasData == true) {
                                noneHaveData = false;
                            }
                        } catch (ProvisionedThroughputExceededException e) {
                            logger.warn("rate limit exceeded, waiting 5s");
                            rateLimited = true;
                        } catch (ExpiredIteratorException e) {
                            logger.warn("Iterator expired for shard {}, doing nothing", shardId);
                        }
                    }


                    if (noneHaveData == true) {
                        logger.info("no data for any shards, waiting 5s");
                        Thread.sleep(5000);
                    }

                    if (rateLimited == true) {
                        logger.info("one of the shards was rate limited, waiting for 5s");
                        Thread.sleep(5000);
                    }

                    // Everything went fine, reset to default value
                    errorCount = 0;
                } catch (Exception e) {
                    logger.error("Unexpected exception while supervising consumers: ", e);

                    errorCount++;
                    long errorSleepMs = errorCount * 15000;

                    if (errorCount >= maxErrors) {
                        logger.error("too many errors!");
                        this.isStopped.completeExceptionally(e);
                        System.exit(-1);

                    }

                    logger.info("error count = {}, sleeping for {} ms", errorCount, errorSleepMs);
                    Thread.sleep(errorSleepMs);

                    // Exponential backoff in case errors keep repeating
                }
            }

            this.isStopped.complete(true);

        } catch (Exception e) {
            logger.error("Unexpected exception", e);
            logger.error("Consumer will stop");
            try {
                this.isStopped.completeExceptionally(e);
            } catch (Exception e2) {
                logger.error("Error while completing isStopped!", e2);
            }
        }

        logger.info("Consumer is completed");
    }

    public void gracefulStop() {
        this.shouldStop.set(true);
    }


    public CompletableFuture isStopped() {
        return this.isStopped;
    }


    private synchronized StartingPosition getStartingPosition(String shardId) {
        return Checkpointer.startingPosition(this.session,
                                             this.prefix,
                                             shardId);
    }

    private synchronized void storeCheckpoint(String shardId, String sequenceNumber) {
        Checkpointer.store(this.session,
                           this.prefix,
                           shardId,
                           sequenceNumber);
    }


    private String getShardIterator(String shardId) {
        StartingPosition pos = this.getStartingPosition(shardId);

        GetShardIteratorRequest.Builder builder = GetShardIteratorRequest.builder()
            .streamName(this.streamName)
            .shardIteratorType(pos.type())
            .shardId(shardId);

        if (pos.type () == ShardIteratorType.AFTER_SEQUENCE_NUMBER) {
            builder = builder.startingSequenceNumber(pos.sequenceNumber());
        }

        GetShardIteratorRequest itReq = builder.build();

        String shardIterator = this.client.getShardIterator(itReq).shardIterator();
        logger.debug("shard id {} has iterator iterator = {}", shardId, shardIterator);
        return shardIterator;
    }

    private void recordResultMetrics(String shardId, GetRecordsResponse result) {
        this.kinesisRequestsCounterByShardId.get(shardId)
            .increment();
        this.kinesisResponseSizeDistributionByShardId.get(shardId)
            .record((double)(result.records().size()));
        this.lastLagByShardId.get(shardId)
            .set(result.millisBehindLatest());
        this.recordsCounterByShardId.get(shardId)
            .increment(result.records().size());
        this.recordsCounter.increment(result.records().size());

        if (result.records().size() == 0) {
            this.emptyKinesisResponseCounterByShardId.get(shardId).increment();
        }
    }

    private boolean doPoll(Parser parser, String shardId) throws Exception {
        String shardIterator = null;

        try {
            if (this.shardIteratorsById.containsKey(shardId)) {
                shardIterator = this.shardIteratorsById.get(shardId);
            } else {
                shardIterator = this.getShardIterator(shardId);
            }

            if (shardIterator == null) {
                logger.debug("shard {} has ended", shardId);
                return false;
            } else {
                return this.doPoll(parser, shardId, shardIterator);
            }
        } catch (ExpiredIteratorException e) {
            logger.warn("Iterator expired for shard {}, removing entry", shardId);
            this.shardIteratorsById.remove(shardId);

            throw e;
        }
    }

    private boolean doPoll(Parser parser, String shardId, String shardIterator) throws Exception {

        GetRecordsRequest getRecordsRequest = GetRecordsRequest.builder()
            .shardIterator(shardIterator)
            // We always request 10,000 messages because it's more efficient and it's the
            // maximum kinesis supports
            .limit(10000)
            .build();

        GetRecordsResponse result = this.client.getRecords(getRecordsRequest);
        logger.debug("has result, hasRecords = {}, records size = {}", result.hasRecords(), result.records().size());

        if (result.hasRecords() && result.records().size() > 0) {
            this.handleResult(parser, shardId, result);
        }

        logger.debug("putting next shard iterator for shard {}", shardId);
        this.shardIteratorsById.put(shardId, result.nextShardIterator());
        this.recordResultMetrics(shardId, result);

        return result.hasRecords();
    }

    private static long rowAgeMs(WritableRow row) {
        Timespec rowTs = row.getTimestamp();
        Timespec nowTs = Timespec.now();

        return nowTs.toEpochMillis () - rowTs.toEpochMillis();
    }

    private void measureRecordAge(String shardId, WritableRow row) throws IOException {

        this.recordAgeDistributionByShardId.get(shardId).record(rowAgeMs(row));
    }

    private void handleBatch(List batch) throws Exception {
        if (this.shouldStop.get() == true) {
            return;
        }

        Relay relay = this.relayPool.acquire();
        try {
            relay.enqueue(batch);
            relay.flush();
        } finally {
            this.relayPool.release(relay);
        }
    }

    private boolean isRowRejected(String sensorId, WritableRow row) {
        if (this.blacklist.contains(sensorId)) {
            return true;
        }

        if (this.rejectionAgeMs != 0 && this.rowAgeMs(row) > this.rejectionAgeMs) {
            return true;
        }

        return false;
    }

    private void handleResult(Parser parser, String shardId, GetRecordsResponse result) throws Exception {
        logger.debug("handling result for shard {} with {} records", shardId, result.records().size());
        List workItems = new ArrayList(result.records().size() * 2000);

	int i = 0;
        String lastSequenceNumber = null;
        for (software.amazon.awssdk.services.kinesis.model.Record record : result.records()) {

            lastSequenceNumber = record.sequenceNumber();
            String sensorId = parser.recordToSensorId(record);
	    String tableName = parser.sensorIdToTableName(sensorId);

            for (WritableRow row : parser.kinesisToQdb(record)) {
		this.measureRecordAge(shardId, row);

                long rowAgeMs = this.rowAgeMs(row);
                if (this.isRowRejected(sensorId, row)) {
                    this.rejectedRecordsCounterByShardId.get(shardId).increment();
                } else {
                    workItems.add(new Relay.WorkItem(tableName, row));
                }
            }
        }

        assert(lastSequenceNumber != null);

        logger.debug("have total of {} work items", workItems.size(), shardId);

        List> partitions = Util.partitionList(workItems, this.batchSize);

        for (List batch : partitions) {
            logger.debug("about to handle batch with size {} for shard {}", batch.size(), shardId);
            this.batchSizeDistributionByShardId.get(shardId).record((double)(batch.size()));
            this.handleBatch(batch);
        }

        this.storeCheckpoint(shardId, lastSequenceNumber);
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy