All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.github.edgar615.util.eventbus.KafkaEventConsumer Maven / Gradle / Ivy

package com.github.edgar615.util.eventbus;

import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Iterables;

import com.github.edgar615.util.event.Event;
import com.github.edgar615.util.concurrent.NamedThreadFactory;
import org.apache.kafka.clients.consumer.ConsumerRebalanceListener;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.clients.consumer.OffsetAndMetadata;
import org.apache.kafka.common.PartitionInfo;
import org.apache.kafka.common.TopicPartition;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicLong;
import java.util.function.Function;

/**
 * kafka的consumer对象不是线程安全的,如果在不同的线程里使用consumer会抛出异常.
 * 

* 消息的消费有两种方式:每个线程维护一个KafkaConsumer 或者 维护一个或多个KafkaConsumer,同时维护多个事件处理线程(worker thread) *

* 每个线程维护一个KafkaConsumer * 一个或多个Consumer线程,Consumer除了读取消息外,还包括具体的业务逻辑处理,同一个Consumer线程里对事件串行处理, * 每个事件完成之后再commit. *

* 同一个主题的线程数受限于主题的分区数,多余的线程不会接收任何消息。 *

* 如果对消息的处理比较耗时,容易导致消费者的rebalance,因为如果在一段事件内没有收到Consumer的poll请求,会触发kafka的rebalance. *

*

* 维护一个或多个KafkaConsumer,同时维护多个事件处理线程(worker thread) * 一个或者多个Consumer线程,Consumer只用来从kafka读取消息,并不涉及具体的业务逻辑处理, 具体的业务逻辑由Consumer转发给工作线程来处理. *

* 使用工作线程处理事件的时候,需要注意commit的正确的offset。 * 如果有两个工作线程处理事件,工作线程A,处理事件 1,工作线程B,处理事件2. 如果工作线程的2先处理完,不能立刻commit。 * 否则有可能导致1的丢失.所以这种模式需要一个协调器来检测各个工作线程的消费状态,来对合适的offset进行commit *

*

* Eventbus采用第二种方案消费消息. * * @author Edgar Date 2017/4/5 */ public class KafkaEventConsumer extends EventConsumerImpl implements Runnable { private static final Logger LOGGER = LoggerFactory.getLogger(KafkaEventConsumer.class); private final KafkaConsumerOptions options; private final ExecutorService consumerExecutor; private final AtomicLong eventCount = new AtomicLong(0); private final AtomicBoolean pause = new AtomicBoolean(false); private final Function blackListFilter; /** * 正在处理的消息(不包括已经处理完成或者还在线程池排队的任务) */ private Map> process = new HashMap<>(); private KafkaConsumer consumer; private volatile boolean running = true; private volatile boolean started = false; public KafkaEventConsumer(KafkaConsumerOptions options) { super(options); this.consumerExecutor = Executors.newFixedThreadPool(1, NamedThreadFactory.create("eventbus-consumer")); this.options = options; consumerExecutor.submit(this); if (options.getBlackListFilter() == null) { blackListFilter = e -> false; } else { blackListFilter = options.getBlackListFilter(); } } /** * 将消息标记为完成. * * @param record */ private synchronized void complete(ConsumerRecord record) { TopicPartition tp = new TopicPartition(record.topic(), record.partition()); Set metas = process.get(tp); metas.stream() .filter(m -> m.offset() == record.offset()) .forEach(m -> m.completed()); } /** * 将消息入队 * * @param record */ private synchronized void enqueue(ConsumerRecord record) { TopicPartition tp = new TopicPartition(record.topic(), record.partition()); RecordFuture meta = RecordFuture.create(record); if (process.containsKey(tp)) { Set metas = process.get(tp); metas.add(meta); } else { TreeSet metas = new TreeSet<>(); metas.add(meta); process.put(tp, metas); } } /** * commit完成的消息 */ private synchronized void commit() { if (process.isEmpty()) { return; } Map commited = new HashMap<>(); for (TopicPartition tp : process.keySet()) { long offset = -1; Set metas = process.get(tp); for (RecordFuture meta : metas) { if (meta.isCompleted()) { offset = meta.offset(); } else { break; } } if (offset > -1) { commited.put(tp, new OffsetAndMetadata(offset + 1)); } } if (commited.isEmpty()) { return; } consumer.commitAsync( ImmutableMap.copyOf(commited), (offsets, exception) -> { if (exception != null) { LOGGER.error("[consumer] [commit: {}]", commited, exception.getMessage(), exception); } else { synchronized (this) { for (TopicPartition tp : offsets.keySet()) { OffsetAndMetadata data = offsets.get(tp); Set metas = process.get(tp); long count = metas.stream() .filter(m -> m.offset() < data.offset()) .count(); metas.removeIf(m -> m.offset() < data.offset()); eventCount.accumulateAndGet(count, (l, r) -> l - r); } } if (!offsets.isEmpty()) { LOGGER.info("[consumer] [commit: {}] [{}]", offsets, eventCount); } } }); //https://issues.apache.org/jira/browse/KAFKA-3412 // 线上有个BUG:偶尔会跳过一条消息,猜测是这个方法引起,测试一段时间发现没有这个方法commitAsync也没有像以前一样报异常,先注释 // consumer.poll(0); } @Override public void run() { try { startConsumer(); } catch (Exception e) { LOGGER.error("[consumer] [Starting]", e); } } public void close() { running = false; consumer.close(); } private void startConsumer() { consumer = new KafkaConsumer<>(options.consumerProps()); List partitions; for (String topic : options.getTopics()) { while ((partitions = consumer.partitionsFor(topic)) == null) { try { LOGGER.info("[consumer] [topic {} since no metadata is available, wait 5s]", topic); TimeUnit.SECONDS.sleep(5); } catch (InterruptedException e) { e.printStackTrace(); } } LOGGER.info("[consumer] [topic:{} is available] [partitions:{}]", topic, partitions); } consumer.subscribe(options.getTopics(), new ConsumerRebalanceListener() { @Override public void onPartitionsRevoked(Collection partitions) { LOGGER.info( "[consumer] [onPartitionsRevoked] [partitions:{}]", partitions); } @Override public void onPartitionsAssigned(Collection partitions) { Iterator topicPartitionIterator = partitions.iterator(); while (topicPartitionIterator.hasNext()) { TopicPartition topicPartition = topicPartitionIterator.next(); long position = consumer.position(topicPartition); OffsetAndMetadata lastCommitedOffsetAndMetadata = consumer.committed(topicPartition); LOGGER.info( "[consumer] [onPartitionsAssigned] [topic:{}, parition:{}, offset:{}, " + "commited:{}]", topicPartition.topic(), topicPartition.partition(), position, lastCommitedOffsetAndMetadata); if (!started) { setStartOffset(topicPartition); } } started = true; } }); try { while (running) { try { ConsumerRecords records = consumer.poll(100); if (records.count() > 0) { LOGGER.info( "[consumer] [poll {} messages]", records.count()); } List> recordList = new ArrayList<>(); for (ConsumerRecord record : records) { Event event = record.value(); LOGGER.info("<====== [{}] [{},{},{}] [{}] [{}] [{}]", event.head().id(), record.topic(), record.partition(), record.offset(), event.head().action(), Helper.toHeadString(event), Helper.toActionString(event)); if (!blackListFilter.apply(event)) { recordList.add(record); } else { LOGGER.info("---| [{}] [BLACKLIST]", event.head().id()); } } ratelimit(recordList.size()); recordList.forEach(r -> { handle(r.value(), () -> enqueue(r), () -> complete(r)); }); commit(); } catch (Exception e) { LOGGER.error("[consumer] [ERROR]", e); } } } catch (Exception e) { LOGGER.error("[consumer] [ERROR]", e); } finally { LOGGER.warn("[consumer] [EXIT]"); } } private void ratelimit(int receivedCount) { long totalCount = eventCount.accumulateAndGet(receivedCount, (l, r) -> l + r); if (totalCount > options.getMaxQuota() && pause.compareAndSet(false, true)) { consumer.pause(Iterables.toArray(process.keySet(), TopicPartition.class)); LOGGER.info( "[consumer] [pause] [{}]", totalCount); } else if (totalCount < options.getMaxQuota() / 2 && pause.compareAndSet(true, false)) { consumer.resume(Iterables.toArray(process.keySet(), TopicPartition.class)); LOGGER.info( "[consumer] [resume] [{}]", totalCount); } } private void setStartOffset(TopicPartition topicPartition) { long startingOffset = options.getStartingOffset(topicPartition); if (startingOffset == -2) { LOGGER.info( "[consumer] [StartingOffset] [topic:{}, parition:{}, offset:{}]", topicPartition.topic(), topicPartition.partition(), "none"); } else if (startingOffset == 0) { consumer.seekToBeginning(topicPartition); LOGGER.info( "[consumer] [StartingOffset] [topic:{}, parition:{}, offset:{}]", topicPartition.topic(), topicPartition.partition(), "beginning"); } else if (startingOffset == -1) { consumer.seekToEnd(topicPartition); LOGGER.info( "[consumer] [StartingOffset] [topic:{}, parition:{}, offset:{}]", topicPartition.topic(), topicPartition.partition(), "end"); } else { consumer.seek(topicPartition, startingOffset); LOGGER.info( "[consumer] [StartingOffset] [topic:{}, parition:{}, offset:{}]", topicPartition.topic(), topicPartition.partition(), startingOffset); } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy