Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
com.xiaomi.infra.galaxy.talos.consumer.PartitionFetcher Maven / Gradle / Ivy
/**
* Copyright 2015, Xiaomi.
* All rights reserved.
* Author: [email protected]
*/
package com.xiaomi.infra.galaxy.talos.consumer;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import com.google.common.base.Preconditions;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.xiaomi.infra.galaxy.talos.thrift.ConsumeUnit;
import com.xiaomi.infra.galaxy.talos.thrift.ConsumerService;
import com.xiaomi.infra.galaxy.talos.thrift.LockPartitionRequest;
import com.xiaomi.infra.galaxy.talos.thrift.LockPartitionResponse;
import com.xiaomi.infra.galaxy.talos.thrift.MessageService;
import com.xiaomi.infra.galaxy.talos.thrift.TopicAndPartition;
import com.xiaomi.infra.galaxy.talos.thrift.TopicTalosResourceName;
import com.xiaomi.infra.galaxy.talos.thrift.UnlockPartitionRequest;
/**
* PartitionFetcher
*
* Per partition per PartitionFetcher
*
* PartitionFetcher as the message process task for one partition, which has four state:
* INIT, LOCKED, UNLOCKING, UNLOCKED
* Every PartitionFetcher has one runnable FetcherStateMachine to fetch messages continuously.
*
* when standing be LOCKED, it continuously reading messages by SimpleConsumer.fetchMessage;
* when standing be UNLOCKING, it stop to read, commit offset and release the partition lock;
* when standing be UNLOCKED, it do not serve any partition and wait to be invoking;
*/
public class PartitionFetcher {
/**
* State of PartitionFetcher
*
* The state evolution as follows:
* INIT -> LOCKED;
* LOCKED -> UNLOCKING;
* LOCKED -> UNLOCKED;
* UNLOCKING -> UNLOCKED;
* UNLOCKED -> LOCKED;
*/
private enum TASK_STATE {
INIT,
LOCKED,
UNLOCKING,
UNLOCKED,
SHUTDOWNED,
}
private class FetcherStateMachine implements Runnable {
private MessageReader messageReader;
private FetcherStateMachine(MessageReader messageReader) {
this.messageReader = messageReader;
LOG.info("initialize FetcherStateMachine for partition: " + partitionId);
}
@Override
public void run() {
// try to lock partition from HBase, if failed, set to UNLOCKED and return;
if (!stealPartition()) {
updateState(TASK_STATE.UNLOCKED);
return;
}
// query start offset to read, if failed, clean and return;
try {
messageReader.initStartOffset();
} catch (Throwable e) {
LOG.error("Worker: " + workerId + " query partition offset error: " +
e.toString() + " skip this partition");
clean();
return;
}
// reading data
LOG.info("The workerId: " + workerId + " is serving partition: " +
partitionId + " from offset: " + messageReader.getStartOffset().get());
while (getCurState() == TASK_STATE.LOCKED) {
messageReader.fetchData();
}
// wait task quit gracefully: stop reading, commit offset, clean and shutdown
messageReader.cleanReader();
clean();
LOG.info("The MessageProcessTask for topic: " + topicTalosResourceName +
" partition: " + partitionId + " is finished");
}
} // FetcherStateMachine
private static final Logger LOG = LoggerFactory.getLogger(PartitionFetcher.class);
private String consumerGroup;
private TopicTalosResourceName topicTalosResourceName;
private int partitionId;
private String workerId;
private ConsumerService.Iface consumerClient;
private TASK_STATE curState;
private ExecutorService singleExecutor;
private Future fetcherFuture;
private TopicAndPartition topicAndPartition;
private SimpleConsumer simpleConsumer;
private MessageReader messageReader;
public PartitionFetcher(String consumerGroup, String topicName,
TopicTalosResourceName topicTalosResourceName, int partitionId,
TalosConsumerConfig talosConsumerConfig, String workerId,
ConsumerService.Iface consumerClient, MessageService.Iface messageClient,
MessageProcessor messageProcessor, MessageReader messageReader) {
this.consumerGroup = consumerGroup;
this.topicTalosResourceName = topicTalosResourceName;
this.partitionId = partitionId;
this.workerId = workerId;
this.consumerClient = consumerClient;
curState = TASK_STATE.INIT;
singleExecutor = Executors.newSingleThreadExecutor();
fetcherFuture = null;
topicAndPartition = new TopicAndPartition(topicName,
topicTalosResourceName, partitionId);
simpleConsumer = new SimpleConsumer(talosConsumerConfig, topicAndPartition,
messageClient);
// set MessageReader
messageReader.setWorkerId(workerId)
.setConsumerGroup(consumerGroup)
.setTopicAndPartition(topicAndPartition)
.setSimpleConsumer(simpleConsumer)
.setMessageProcessor(messageProcessor)
.setConsumerClient(consumerClient);
this.messageReader = messageReader;
LOG.info("The PartitionFetcher for topic: " + topicTalosResourceName +
" partition: " + partitionId + " init.");
}
// for test
public PartitionFetcher(String consumerGroup, String topicName,
TopicTalosResourceName topicTalosResourceName, int partitionId,
String workerId, ConsumerService.Iface consumerClient,
SimpleConsumer simpleConsumer,MessageReader messageReader) {
this.consumerGroup = consumerGroup;
this.topicTalosResourceName = topicTalosResourceName;
this.partitionId = partitionId;
this.workerId = workerId;
this.consumerClient = consumerClient;
this.messageReader = messageReader;
curState = TASK_STATE.INIT;
singleExecutor = Executors.newSingleThreadExecutor();
fetcherFuture = null;
topicAndPartition = new TopicAndPartition(topicName,
topicTalosResourceName, partitionId);
this.simpleConsumer = simpleConsumer;
}
// used to know whether is serving and reading data
public synchronized boolean isServing() {
return (curState == TASK_STATE.LOCKED);
}
// used to know whether need to renew
public synchronized boolean isHoldingLock() {
return (curState == TASK_STATE.LOCKED || curState == TASK_STATE.UNLOCKING);
}
/**
* we want to guarantee the operation order for partitionFetcher,
* such as process the following operation call:
* 1) lock -> lock: the second 'lock' will be useless
* 2) unlock -> unlock: the second 'unlock' will be useless
* 3) lock -> unlock: every step within 'lock' can gracefully exit by unlock
* 4) unlock -> lock: the 'lock' operation is useless before 'unlock' process done
*/
// used for invoke this partition fetcher
public void lock() {
if (updateState(TASK_STATE.LOCKED)) {
FetcherStateMachine fetcherStateMachine = new FetcherStateMachine(
messageReader);
fetcherFuture = singleExecutor.submit(fetcherStateMachine);
LOG.info("Worker: " + workerId + " invoke partition: " +
partitionId + " to 'LOCKED', try to serve it.");
}
}
// used for revoke this partition fetcher async
public void unlock() {
if (updateState(TASK_STATE.UNLOCKING)) {
LOG.info("Worker: " + workerId + " has set partition: " +
partitionId + " to 'UNLOCKING', it is revoking gracefully.");
}
}
public void shutDown() {
// set UNLOCKING to stop read and wait fetcher gracefully quit
updateState(TASK_STATE.UNLOCKING);
if (fetcherFuture != null) {
LOG.info("worker: " + workerId + " try to shutdown partition: " +
partitionId);
// 'false' means not stop the running task;
fetcherFuture.cancel(false);
}
singleExecutor.shutdown();
while (true) {
try {
if (singleExecutor.awaitTermination(100, TimeUnit.MILLISECONDS)) {
break;
}
} catch (InterruptedException e) {
}
}
updateState(TASK_STATE.SHUTDOWNED);
}
private synchronized TASK_STATE getCurState() {
return curState;
}
private synchronized boolean updateState(TASK_STATE targetState) {
LOG.info("PartitionFetcher for Partition: " + partitionId + " update " +
"status from: " + curState + " to: " + targetState);
switch (targetState) {
case INIT:
LOG.error("targetState can never be INIT, " +
"updateState error for: " + partitionId);
break;
case LOCKED:
if (curState == TASK_STATE.INIT || curState == TASK_STATE.UNLOCKED) {
curState = TASK_STATE.LOCKED;
return true;
}
LOG.error("targetState is LOCKED, but curState is: " + curState +
" for partition: " + partitionId);
break;
case UNLOCKING:
if (curState == TASK_STATE.LOCKED) {
curState = TASK_STATE.UNLOCKING;
return true;
}
LOG.error("targetState is UNLOCKING, but curState is: " + curState +
" for partition: " + partitionId);
break;
case UNLOCKED:
if (curState == TASK_STATE.UNLOCKING || curState == TASK_STATE.LOCKED) {
curState = TASK_STATE.UNLOCKED;
return true;
}
LOG.error("targetState is UNLOCKED, but curState is: " + curState +
" for partition: " + partitionId);
break;
case SHUTDOWNED:
curState = TASK_STATE.SHUTDOWNED;
default:
}
return false;
}
/**
* conditions for releasePartition:
* 1) LOCKED, stealPartition success but get startOffset failed
* 2) UNLOCKING, stop to serve this partition
*/
private void releasePartition() {
// release lock, if unlock failed, we just wait ttl work.
List toReleaseList = new ArrayList();
toReleaseList.add(partitionId);
ConsumeUnit consumeUnit = new ConsumeUnit(consumerGroup,
topicTalosResourceName, toReleaseList, workerId);
UnlockPartitionRequest unlockRequest = new UnlockPartitionRequest(consumeUnit);
try {
consumerClient.unlockPartition(unlockRequest);
} catch (Throwable e) {
LOG.warn("Worker: " + workerId + " release partition error: " + e.toString());
return;
}
LOG.info("Worker: " + workerId + " success to release partition: " + partitionId);
}
private boolean stealPartition() {
TASK_STATE state = getCurState();
if (state != TASK_STATE.LOCKED) {
LOG.error("Worker: " + workerId + " try to stealPartitionLock: " +
partitionId + " but got state: " + state);
return false;
}
// steal lock, if lock failed, we skip it and wait next re-balance
List toStealList = new ArrayList();
toStealList.add(partitionId);
ConsumeUnit consumeUnit = new ConsumeUnit(consumerGroup,
topicTalosResourceName, toStealList, workerId);
LockPartitionRequest lockRequest = new LockPartitionRequest(consumeUnit);
LockPartitionResponse lockResponse = null;
try {
lockResponse = consumerClient.lockPartition(lockRequest);
} catch (Throwable e) {
LOG.error("Worker: " + workerId + " steal partition error: " + e.toString());
return false;
}
// get the successfully locked partition
List successPartitionList = lockResponse.getSuccessPartitions();
if (successPartitionList.size() > 0) {
Preconditions.checkArgument(successPartitionList.get(0) == partitionId);
LOG.info("Worker: " + workerId + " success to lock partitions: " +
partitionId);
return true;
}
LOG.error("Worker: " + workerId + " failed to lock partitions: " + partitionId);
return false;
}
// unlock partitionLock, then revoke this task and set it to 'UNLOCKED'
private void clean() {
releasePartition();
updateState(TASK_STATE.UNLOCKED);
}
}