com.bazaarvoice.emodb.sor.audit.s3.AthenaAuditWriter Maven / Gradle / Ivy
package com.bazaarvoice.emodb.sor.audit.s3;
import com.amazonaws.auth.AWSCredentialsProvider;
import com.amazonaws.auth.AWSStaticCredentialsProvider;
import com.amazonaws.auth.BasicAWSCredentials;
import com.amazonaws.auth.DefaultAWSCredentialsProviderChain;
import com.amazonaws.client.builder.AwsClientBuilder;
import com.amazonaws.regions.Regions;
import com.amazonaws.services.s3.AmazonS3;
import com.amazonaws.services.s3.AmazonS3ClientBuilder;
import com.amazonaws.services.s3.model.PutObjectResult;
import com.bazaarvoice.emodb.common.dropwizard.log.RateLimitedLog;
import com.bazaarvoice.emodb.common.dropwizard.log.RateLimitedLogFactory;
import com.bazaarvoice.emodb.sor.api.Audit;
import com.bazaarvoice.emodb.sor.audit.AuditStore;
import com.bazaarvoice.emodb.sor.audit.AuditWriterConfiguration;
import com.codahale.metrics.Gauge;
import com.codahale.metrics.MetricRegistry;
import com.fasterxml.jackson.core.JsonGenerator;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.ObjectWriter;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Maps;
import com.google.common.io.ByteStreams;
import com.google.common.io.CountingOutputStream;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
import com.google.inject.Inject;
import io.dropwizard.util.Size;
import org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.nio.file.Files;
import java.time.Clock;
import java.time.Duration;
import java.time.Instant;
import java.time.ZoneOffset;
import java.time.format.DateTimeFormatter;
import java.util.HashMap;
import java.util.Map;
import java.util.UUID;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.locks.ReentrantLock;
import static com.google.common.base.Preconditions.checkArgument;
import static java.util.Objects.requireNonNull;
/**
* Audit writer implementation which writes all audits as GZIP'd jsonl documents to S3 partitioned by date.
* This format allows audit queries to be carried out by Athena, Amazon's Presto implementation over S3 documents.
*
* This audit writer favors fast, non-blocking calls to {@link #persist(String, String, Audit, long)} over guaranteeing
* a completely loss-less audit history. To achieve this all audits are written to an in-memory queue. That queue
* is written to a local log file until it has reached a maximum size or age, both configurable in the constructor.
* At this time the file is asynchronously GZIP'd then delivered to S3. Once the file is delivered it is deleted from
* the local host.
*
* Each stage has multiple layers of recovery, ensuring that once a line is written to a file that file will eventually
* be delivered to S3. The exceptions to this which can cause audit loss are:
*
*
* - The process is terminated while unwritten audits are still in the audit queue.
* - The host itself terminates before all files are delivered to S3.
*
*/
public class AthenaAuditWriter implements AuditStore {
private final static Logger _log = LoggerFactory.getLogger(AthenaAuditWriter.class);
private final static DateTimeFormatter LOG_FILE_DATE_FORMATTER =
DateTimeFormatter.ofPattern("yyyyMMddHHmmss").withZone(ZoneOffset.UTC);
private final static String OPEN_FILE_SUFFIX = ".log.tmp";
private final static String CLOSED_FILE_SUFFIX = ".log";
private final static String COMPRESSED_FILE_SUFFIX = ".log.gz";
private final static long DEFAULT_MAX_FILE_SIZE = Size.megabytes(10).toBytes();
private final static Duration DEFAULT_MAX_BATCH_TIME = Duration.ofMinutes(2);
private final AmazonS3 _s3;
private final String _s3Bucket;
private final String _s3AuditRoot;
private final long _maxFileSize;
private final long _maxBatchTimeMs;
private final File _stagingDir;
private final String _logFilePrefix;
private final BlockingQueue _auditQueue;
private final Clock _clock;
private final ObjectWriter _objectWriter;
private final ConcurrentMap _openAuditOutputs = Maps.newConcurrentMap();
private ScheduledExecutorService _auditService;
private ExecutorService _fileTransferService;
private AuditOutput _mruAuditOutput;
private boolean _fileTransfersEnabled;
private final RateLimitedLog _rateLimitedLog;
@Inject
public AthenaAuditWriter(AuditWriterConfiguration config, ObjectMapper objectMapper, Clock clock,
RateLimitedLogFactory rateLimitedLogFactory, MetricRegistry metricRegistry) {
this(getAmazonS3Client(config), config.getLogBucket(), config.getLogPath(), config.getMaxFileSize(),
Duration.ofMillis(config.getMaxBatchTime().toMillis()),
config.getStagingDir() != null ? new File(config.getStagingDir()) : com.google.common.io.Files.createTempDir(),
config.getLogFilePrefix(), objectMapper, clock, config.isFileTransfersEnabled(), rateLimitedLogFactory,
metricRegistry, null, null);
}
@VisibleForTesting
AthenaAuditWriter(AmazonS3 s3, String s3Bucket, String s3Path, long maxFileSize, Duration maxBatchTime,
File stagingDir, String logFilePrefix, ObjectMapper objectMapper, Clock clock,
boolean fileTransfersEnabled, RateLimitedLogFactory rateLimitedLogFactory,
MetricRegistry metricRegistry, ScheduledExecutorService auditService,
ExecutorService fileTransferService) {
_s3 = requireNonNull(s3);
_s3Bucket = requireNonNull(s3Bucket);
String s3AuditRoot = s3Path;
if (s3AuditRoot.startsWith("/")) {
s3AuditRoot = s3AuditRoot.substring(1);
}
if (s3AuditRoot.endsWith("/")) {
s3AuditRoot = s3AuditRoot.substring(0, s3AuditRoot.length() - 1);
}
_s3AuditRoot = s3AuditRoot;
checkArgument(stagingDir.exists(), "Staging directory must exist");
_maxFileSize = maxFileSize > 0 ? maxFileSize : DEFAULT_MAX_FILE_SIZE;
_maxBatchTimeMs = (maxBatchTime != null && maxBatchTime.compareTo(Duration.ZERO) > 0 ? maxBatchTime : DEFAULT_MAX_BATCH_TIME).toMillis();
_stagingDir = requireNonNull(stagingDir, "stagingDir");
_logFilePrefix = requireNonNull(logFilePrefix, "logFilePrefix");
_clock = requireNonNull(clock, "clock");
// Audit queue isn't completely unbounded but is large enough to ensure at several times the normal write rate
// it can accept audits without blocking.
_auditQueue = new ArrayBlockingQueue<>(4096);
// Need to ensure the object mapper keeps the file stream open after each audit is written.
_objectWriter = objectMapper.copy()
.configure(JsonGenerator.Feature.AUTO_CLOSE_TARGET, false)
.writer();
_fileTransfersEnabled = fileTransfersEnabled;
// Two threads for the audit service: once to drain queued audits and one to close audit logs files and submit
// them for transfer. Normally these are initially null and locally managed, but unit tests may provide
// pre-configured instances.
_auditService = auditService != null ? auditService :
Executors.newScheduledThreadPool(2, new ThreadFactoryBuilder().setNameFormat("audit-log-%d").build());
_fileTransferService = fileTransferService != null ? fileTransferService :
Executors.newSingleThreadExecutor(new ThreadFactoryBuilder().setNameFormat("audit-transfer-%d").build());
long now = _clock.millis();
long msToNextBatch = _maxBatchTimeMs - (now % _maxBatchTimeMs);
// Do a one-time closing of all orphaned log files which may have been left behind if a previous EmoDB process
// terminated before closing them.
for (final File logFile : _stagingDir.listFiles((dir, name) -> name.startsWith(_logFilePrefix) && name.endsWith(OPEN_FILE_SUFFIX))) {
if (logFile.length() > 0) {
try {
renameClosedLogFile(logFile);
} catch (IOException e) {
_log.warn("Failed to close orphaned audit log file: {}", logFile, e);
}
} else {
if (!logFile.delete()) {
_log.debug("Failed to delete empty orphaned log file: {}", logFile);
}
}
}
_rateLimitedLog = requireNonNull(rateLimitedLogFactory.from(_log));
// Guage metric to measure the size of the audit queue
metricRegistry.register(MetricRegistry.name(AthenaAuditWriter.class, "auditQueue", "size"), (Gauge) _auditQueue::size);
_auditService.scheduleWithFixedDelay(() -> processQueuedAudits(true),
0, 1, TimeUnit.SECONDS);
_auditService.scheduleAtFixedRate(this::doLogFileMaintenance,
msToNextBatch, _maxBatchTimeMs, TimeUnit.MILLISECONDS);
}
private static AmazonS3 getAmazonS3Client(AuditWriterConfiguration configuration) {
AWSCredentialsProvider credentialsProvider;
if (configuration.getS3AccessKey() != null && configuration.getS3SecretKey() != null) {
credentialsProvider = new AWSStaticCredentialsProvider(
new BasicAWSCredentials(configuration.getS3AccessKey(), configuration.getS3SecretKey()));
} else {
credentialsProvider = new DefaultAWSCredentialsProviderChain();
}
AmazonS3ClientBuilder amazonS3ClientBuilder = AmazonS3ClientBuilder.standard()
.withCredentials(credentialsProvider);
if (configuration.getS3Endpoint() != null) {
amazonS3ClientBuilder.withEndpointConfiguration(new AwsClientBuilder.EndpointConfiguration(configuration.getS3Endpoint(), Regions.getCurrentRegion().getName()));
} else {
amazonS3ClientBuilder.withRegion(Regions.fromName(configuration.getLogBucketRegion()));
}
return amazonS3ClientBuilder.build();
}
@Override
public void flushAndShutdown() {
_auditService.shutdown();
try {
if (_auditService.awaitTermination(5, TimeUnit.SECONDS)) {
// Poll the queue one last time and drain anything that is still remaining
processQueuedAudits(false);
} else {
_log.warn("Audit service did not shutdown cleanly.");
_auditService.shutdownNow();
}
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
// Close all log files and prepare them for transfer.
closeCompleteLogFiles(true);
prepareClosedLogFilesForTransfer();
transferLogFilesToS3();
_fileTransferService.shutdown();
try {
if (_fileTransferService.awaitTermination(30, TimeUnit.SECONDS)) {
_log.info("All audits were successfully persisted prior to shutdown");
} else {
_log.warn("All audits could not be persisted prior to shutdown");
}
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
}
@Override
public void persist(String table, String key, Audit audit, long auditTime) {
try {
_auditQueue.put(new QueuedAudit(table, key, audit, auditTime));
} catch (InterruptedException e) {
// Don't error out if the audit was interrupted since this implementation does not guarantee 100%
// audit retention, just warn that it happened.
_log.warn("Interrupted attempting to write audit for {}/{}", table, key);
}
}
/**
* This method is run at regular intervals to close log files, gzip them and initiate their transfer to S3.
*/
private void doLogFileMaintenance() {
// Close all files that have either exceeded their maximum size or age but have not closed due to lack of
// audit activity.
closeCompleteLogFiles(false);
prepareClosedLogFilesForTransfer();
transferLogFilesToS3();
}
private void closeCompleteLogFiles(boolean forceClose) {
for (AuditOutput auditOutput : ImmutableList.copyOf(_openAuditOutputs.values())) {
if (forceClose || auditOutput.shouldClose()) {
auditOutput.close();
}
}
}
/**
* This method takes all closed log files and GZIPs and renames them in preparation for transfer. If the operation
* fails the original file is unmodified so the next call should attempt to prepare the file again. This means
* the same file may be transferred more than once, but this guarantees that so long as the host remains active the
* file will eventually be transferred.
*/
private void prepareClosedLogFilesForTransfer() {
for (final File logFile : _stagingDir.listFiles((dir, name) -> name.startsWith(_logFilePrefix) && name.endsWith(CLOSED_FILE_SUFFIX))) {
boolean moved;
String fileName = logFile.getName().substring(0, logFile.getName().length() - CLOSED_FILE_SUFFIX.length()) + COMPRESSED_FILE_SUFFIX;
try (FileInputStream fileIn = new FileInputStream(logFile);
FileOutputStream fileOut = new FileOutputStream(new File(logFile.getParentFile(), fileName));
GzipCompressorOutputStream gzipOut = new GzipCompressorOutputStream(fileOut)) {
ByteStreams.copy(fileIn, gzipOut);
moved = true;
} catch (IOException e) {
_log.warn("Failed to compress audit log file: {}", logFile, e);
moved = false;
}
if (moved) {
if (!logFile.delete()) {
_log.warn("Failed to delete audit log file: {}", logFile);
}
}
}
}
/**
* This method takes all log files prepared by {@link #prepareClosedLogFilesForTransfer()} and initiates their
* transfer to S3. The transfer itself is performed asynchronously.
*/
private void transferLogFilesToS3() {
if (_fileTransfersEnabled) {
// Find all closed log files in the staging directory and move them to S3
for (final File logFile : _stagingDir.listFiles((dir, name) -> name.startsWith(_logFilePrefix) && name.endsWith(COMPRESSED_FILE_SUFFIX))) {
// Extract the date portion of the file name and is it to partition the file in S3
String auditDate = logFile.getName().substring(_logFilePrefix.length() + 1, _logFilePrefix.length() + 9);
String dest = String.format("%s/date=%s/%s", _s3AuditRoot, auditDate, logFile.getName());
_fileTransferService.submit(() -> {
// Since file transfers are done in a single thread, there shouldn't be any concurrency issues,
// but verify the same file wasn't submitted previously and is already transferred.
if (logFile.exists()) {
try {
PutObjectResult result = _s3.putObject(_s3Bucket, dest, logFile);
_log.debug("Audit log copied: {}, ETag={}", logFile, result.getETag());
if (!logFile.delete()) {
_log.warn("Failed to delete file after copying to s3: {}", logFile);
}
} catch (Exception e) {
// Log the error, try again on the next iteration
_rateLimitedLog.error(e, "Failed to copy log file {}", logFile);
}
}
});
}
}
}
/**
* This method is run at regular intervals to remove audits from the audit queue and write them to a local file.
*/
private void processQueuedAudits(boolean interruptable) {
QueuedAudit audit;
try {
while (!(_auditService.isShutdown() && interruptable) && ((audit = _auditQueue.poll()) != null)) {
boolean written = false;
while (!written) {
AuditOutput auditOutput = getAuditOutputForTime(audit.time);
written = auditOutput.writeAudit(audit);
}
}
} catch (Exception e) {
_log.error("Processing of queued audits failed", e);
}
}
private AuditOutput getAuditOutputForTime(long time) {
// Truncate the time based on the batch duration
long batchTime = time - (time % _maxBatchTimeMs);
// The most common case is that audits are written in time order, so optimize by caching the most recently
// used AuditOutput and return it if it is usable for an audit at the given time.
AuditOutput mruAuditOutput = _mruAuditOutput;
if (mruAuditOutput != null && batchTime == mruAuditOutput.getBatchTime() && !mruAuditOutput.isClosed()) {
return mruAuditOutput;
}
return _mruAuditOutput = _openAuditOutputs.computeIfAbsent(batchTime, this::createNewAuditLogOut);
}
private AuditOutput createNewAuditLogOut(long batchTime) {
// Set the batch to close at the end of the next batch time from now, regardless of what time the batch is for.
long now = _clock.millis();
long nextBatchCycleCloseTime = now - (now % _maxBatchTimeMs) + _maxBatchTimeMs;
return new AuditOutput(LOG_FILE_DATE_FORMATTER.format(Instant.ofEpochMilli(batchTime)), batchTime, nextBatchCycleCloseTime);
}
private void renameClosedLogFile(File logFile) throws IOException {
// Move the file to a new file without the ".tmp" suffix
String closedFileName = logFile.getName().substring(0, logFile.getName().length() - OPEN_FILE_SUFFIX.length()) + CLOSED_FILE_SUFFIX;
Files.move(logFile.toPath(), new File(logFile.getParentFile(), closedFileName).toPath());
}
/**
* In-memory holder for an audit in the queue.
*/
private static class QueuedAudit {
final String table;
final String key;
final Audit audit;
final long time;
QueuedAudit(String table, String key, Audit audit, long time) {
this.table = table;
this.key = key;
this.audit = audit;
this.time = time;
}
}
/**
* Class holder for a single audit output file, with most of the file details abstracted by {@link #writeAudit(QueuedAudit)}.
*/
private class AuditOutput {
private final File _auditLogFile;
private final long _batchTime;
private final long _closeTime;
private final ReentrantLock _lock = new ReentrantLock();
private volatile boolean _closed;
private volatile int _auditsWritten = 0;
private volatile CountingOutputStream _auditLogOut;
AuditOutput(String datePrefix, long batchTime, long closeTime) {
String fileName = String.format("%s.%s.%s%s", _logFilePrefix, datePrefix, UUID.randomUUID(), OPEN_FILE_SUFFIX);
_auditLogFile = new File(_stagingDir, fileName);
_batchTime = batchTime;
_closeTime = closeTime;
}
void createAuditLogOut() throws IOException {
FileOutputStream fileOut = new FileOutputStream(_auditLogFile);
_auditLogOut = new CountingOutputStream(fileOut);
}
/**
* Writes a single audit to the log file.
*
* @return True if the audit was written, false if the audit could not be written because the file was closed
* or can no longer accept writes due to file size or age.
*/
boolean writeAudit(QueuedAudit audit) {
Map auditMap = Maps.newLinkedHashMap();
// This is an intentional break from convention to use "tablename" instead of "table". This is because
// "table" is a reserved word in Presto and complicates queries for that column.
auditMap.put("tablename", audit.table);
auditMap.put("key", audit.key);
auditMap.put("time", audit.time);
// Even though the content of the audit is valid JSON the potential key set is unbounded. This makes
// it difficult to define a schema for Presto. So create values for the conventional keys and store
// the rest in an opaque blob.
Map custom = new HashMap<>(audit.audit.getAll());
if (custom.remove(Audit.COMMENT) != null) {
auditMap.put("comment", audit.audit.getComment());
}
if (custom.remove(Audit.HOST) != null) {
auditMap.put("host", audit.audit.getHost());
}
if (custom.remove(Audit.PROGRAM) != null) {
auditMap.put("program", audit.audit.getProgram());
}
if (custom.remove(Audit.SHA1) != null) {
auditMap.put("sha1", audit.audit.getCustom(Audit.SHA1));
}
if (custom.remove(Audit.TAGS) != null) {
auditMap.put("tags", audit.audit.getTags());
}
if (!custom.isEmpty()) {
try {
auditMap.put("custom", _objectWriter.writeValueAsString(custom));
} catch (JsonProcessingException e) {
_log.info("Failed to write custom audit information", e);
}
}
// Lock critical section to ensure the file isn't closed while writing the audit
_lock.lock();
try {
if (shouldClose()) {
close();
}
if (isClosed()) {
return false;
}
// Lazily create the audit log file on the first write. This way if a race occurs and more than
// one AuditOutput is created for a batch only the one which wins will actually generate a file to
// be transferred to S3.
if (_auditLogOut == null) {
createAuditLogOut();
}
_objectWriter.writeValue(_auditLogOut, auditMap);
_auditLogOut.write('\n');
//noinspection NonAtomicOperationOnVolatileField
_auditsWritten += 1;
} catch (IOException e) {
_log.warn("Failed to write audit to logs", e);
} finally {
_lock.unlock();
}
return true;
}
boolean isClosed() {
return _closed;
}
void close() {
_lock.lock();
try {
if (!_closed) {
_closed = true;
if (_auditLogOut != null) {
_auditLogOut.close();
}
if (_auditsWritten != 0) {
renameClosedLogFile(_auditLogFile);
}
}
} catch (IOException e) {
_log.warn("Failed to close log file", e);
} finally {
_openAuditOutputs.remove(_batchTime, this);
_lock.unlock();
}
}
long getBatchTime() {
return _batchTime;
}
boolean isBatchTimedOut() {
return _clock.millis() >= _closeTime;
}
boolean isOversized() {
return _auditLogOut != null && _auditLogOut.getCount() > _maxFileSize;
}
boolean shouldClose() {
return isBatchTimedOut() || isOversized();
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy