
com.pinterest.secor.parser.PartitionFinalizer Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of secor Show documentation
Show all versions of secor Show documentation
Kafka to s3/gs/swift logs exporter
The newest version!
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package com.pinterest.secor.parser;
import com.pinterest.secor.common.KafkaClient;
import com.pinterest.secor.common.LogFilePath;
import com.pinterest.secor.common.SecorConfig;
import com.pinterest.secor.common.TopicPartition;
import com.pinterest.secor.common.ZookeeperConnector;
import com.pinterest.secor.message.Message;
import com.pinterest.secor.util.CompressionUtil;
import com.pinterest.secor.util.FileUtil;
import com.pinterest.secor.util.ReflectionUtil;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Stack;
/**
* Partition finalizer writes _SUCCESS files to date partitions that very likely won't be receiving
* any new messages. It also adds those partitions to Hive.
*
* @author Pawel Garbacki ([email protected])
*/
public class PartitionFinalizer {
private static final Logger LOG = LoggerFactory.getLogger(PartitionFinalizer.class);
private final SecorConfig mConfig;
private final ZookeeperConnector mZookeeperConnector;
private final TimestampedMessageParser mMessageParser;
private final KafkaClient mKafkaClient;
private final QuboleClient mQuboleClient;
private final String mFileExtension;
private final int mLookbackPeriods;
public PartitionFinalizer(SecorConfig config) throws Exception {
mConfig = config;
Class kafkaClientClass = Class.forName(mConfig.getKafkaClientClass());
this.mKafkaClient = (KafkaClient) kafkaClientClass.newInstance();
this.mKafkaClient.init(config);
mZookeeperConnector = new ZookeeperConnector(mConfig);
mMessageParser = (TimestampedMessageParser) ReflectionUtil.createMessageParser(
mConfig.getMessageParserClass(), mConfig);
mQuboleClient = new QuboleClient(mConfig);
if (mConfig.getFileExtension() != null && !mConfig.getFileExtension().isEmpty()) {
mFileExtension = mConfig.getFileExtension();
} else if (mConfig.getCompressionCodec() != null && !mConfig.getCompressionCodec().isEmpty()) {
CompressionCodec codec = CompressionUtil.createCompressionCodec(mConfig.getCompressionCodec());
mFileExtension = codec.getDefaultExtension();
} else {
mFileExtension = "";
}
mLookbackPeriods = config.getFinalizerLookbackPeriods();
LOG.info("Lookback periods: " + mLookbackPeriods);
}
private String[] getFinalizedUptoPartitions(String topic) throws Exception {
final int numPartitions = mKafkaClient.getNumPartitions(topic);
List lastMessages = new ArrayList(numPartitions);
List committedMessages = new ArrayList(numPartitions);
for (int partition = 0; partition < numPartitions; ++partition) {
TopicPartition topicPartition = new TopicPartition(topic, partition);
Message lastMessage = mKafkaClient.getLastMessage(topicPartition);
Message committedMessage = mKafkaClient.getCommittedMessage(topicPartition);
if (lastMessage == null || committedMessage == null) {
// This will happen if no messages have been posted to the given topic partition.
LOG.error("For topic {} partition {}, lastMessage: {}, committed: {}",
topicPartition.getTopic(), topicPartition.getPartition(),
lastMessage, committedMessage);
continue;
}
lastMessages.add(lastMessage);
committedMessages.add(committedMessage);
}
return mMessageParser.getFinalizedUptoPartitions(lastMessages, committedMessages);
}
private void finalizePartitionsUpTo(String topic, String[] uptoPartitions) throws Exception {
String prefix = FileUtil.getPrefix(topic, mConfig);
LOG.info("Finalize up to (but not include) {}, dim: {}",
uptoPartitions, uptoPartitions.length);
String[] previous = mMessageParser.getPreviousPartitions(uptoPartitions);
Stack toBeFinalized = new Stack();
// Walk backwards to collect all partitions which are previous to the upTo partition
// Do not include the upTo partition
// Stop at the first partition which already have the SUCCESS file
for (int i = 0; i < mLookbackPeriods; i++) {
LOG.info("Looking for partition: " + Arrays.toString(previous));
LogFilePath logFilePath = new LogFilePath(prefix, topic, previous,
mConfig.getGeneration(), 0, 0, mFileExtension);
if (FileUtil.s3PathPrefixIsAltered(logFilePath.getLogFilePath(), mConfig)) {
logFilePath = logFilePath.withPrefix(FileUtil.getS3AlternativePrefix(mConfig));
}
String logFileDir = logFilePath.getLogFileDir();
if (FileUtil.exists(logFileDir)) {
String successFilePath = logFileDir + "/_SUCCESS";
if (FileUtil.exists(successFilePath)) {
LOG.info(
"SuccessFile exist already, short circuit return. " + successFilePath);
break;
}
LOG.info("Folder {} exists and ready to be finalized.", logFileDir);
toBeFinalized.push(previous);
} else {
LOG.info("Folder {} doesn't exist, skip", logFileDir);
}
previous = mMessageParser.getPreviousPartitions(previous);
}
LOG.info("To be finalized partitions: {}", toBeFinalized);
if (toBeFinalized.isEmpty()) {
LOG.warn("There is no partitions to be finalized.");
return;
}
// Now walk forward the collected partitions to do the finalization
// Note we are deliberately walking backwards and then forwards to make sure we don't
// end up in a situation that a later date partition is finalized and then the system
// crashes (which creates unfinalized partition folders in between)
while (!toBeFinalized.isEmpty()) {
String[] current = toBeFinalized.pop();
LOG.info("Finalizing partition: " + Arrays.toString(current));
// We only perform hive registration on the last dimension of the partition array
// i.e. only do hive registration for the hourly folder, but not for the daily
if (uptoPartitions.length == current.length) {
try {
StringBuilder sb = new StringBuilder();
for (int i = 0; i < current.length; i++) {
String par = current[i];
// We expect the partition array in the form of key=value if
// they need to go through hive registration
String[] parts = par.split("=");
assert parts.length == 2 : "wrong partition format: " + par;
if (i > 0) {
sb.append(",");
}
sb.append(parts[0]);
sb.append("='");
sb.append(parts[1]);
sb.append("'");
}
LOG.info("Hive partition string: " + sb);
String hiveTableName = mConfig.getHiveTableName(topic);
LOG.info("Hive table name from config: {}", hiveTableName);
if (hiveTableName == null) {
String hivePrefix = null;
try {
hivePrefix = mConfig.getHivePrefix();
hiveTableName = hivePrefix + topic;
LOG.info("Hive table name from prefix: {}", hiveTableName);
} catch (RuntimeException ex) {
LOG.warn("HivePrefix is not defined. Skip hive registration");
}
}
if (hiveTableName != null && mConfig.getQuboleEnabled()) {
mQuboleClient.addPartition(hiveTableName, sb.toString());
}
} catch (Exception e) {
LOG.error("failed to finalize topic " + topic, e);
continue;
}
}
// Generate the SUCCESS file at the end
LogFilePath logFilePath = new LogFilePath(prefix, topic, current,
mConfig.getGeneration(), 0, 0, mFileExtension);
if (FileUtil.s3PathPrefixIsAltered(logFilePath.getLogFilePath(), mConfig)) {
logFilePath = logFilePath.withPrefix(FileUtil.getS3AlternativePrefix(mConfig));
LOG.info("Will finalize alternative s3 logFilePath {}", logFilePath);
}
String logFileDir = logFilePath.getLogFileDir();
String successFilePath = logFileDir + "/_SUCCESS";
LOG.info("touching file {}", successFilePath);
FileUtil.touch(successFilePath);
}
}
public void finalizePartitions() throws Exception {
List topics = mZookeeperConnector.getCommittedOffsetTopics();
for (String topic : topics) {
if (!topic.matches(mConfig.getKafkaTopicFilter())) {
LOG.info("skipping topic {}", topic);
} else {
LOG.info("finalizing topic {}", topic);
String[] partitions = getFinalizedUptoPartitions(topic);
LOG.info("finalized timestamp for topic {} is {}", topic , partitions);
if (partitions != null) {
finalizePartitionsUpTo(topic, partitions);
}
}
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy