com.uber.hoodie.common.util.queue.BoundedInMemoryQueue Maven / Gradle / Ivy
/*
* Copyright (c) 2018 Uber Technologies, Inc. ([email protected])
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.uber.hoodie.common.util.queue;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.uber.hoodie.common.util.DefaultSizeEstimator;
import com.uber.hoodie.common.util.SizeEstimator;
import com.uber.hoodie.exception.HoodieException;
import java.util.Iterator;
import java.util.Optional;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.Semaphore;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.atomic.AtomicReference;
import java.util.function.Function;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
/**
* Used for enqueueing input records. Queue limit is controlled by {@link #memoryLimit}.
* Unlike standard bounded queue implementations, this queue bounds the size by memory bytes occupied by its
* tenants. The standard implementation bounds by the number of entries in the queue.
*
* It internally samples every {@link #RECORD_SAMPLING_RATE}th record and adjusts number of records in
* queue accordingly. This is done to ensure that we don't OOM.
*
* This queue supports multiple producer single consumer pattern.
*
* @param input payload data type
* @param output payload data type
*/
public class BoundedInMemoryQueue implements Iterable {
// interval used for polling records in the queue.
public static final int RECORD_POLL_INTERVAL_SEC = 1;
// rate used for sampling records to determine avg record size in bytes.
public static final int RECORD_SAMPLING_RATE = 64;
// maximum records that will be cached
private static final int RECORD_CACHING_LIMIT = 128 * 1024;
private static Logger logger = LogManager.getLogger(BoundedInMemoryQueue.class);
// It indicates number of records to cache. We will be using sampled record's average size to
// determine how many
// records we should cache and will change (increase/decrease) permits accordingly.
@VisibleForTesting
public final Semaphore rateLimiter = new Semaphore(1);
// used for sampling records with "RECORD_SAMPLING_RATE" frequency.
public final AtomicLong samplingRecordCounter = new AtomicLong(-1);
// internal queue for records.
private final LinkedBlockingQueue> queue = new
LinkedBlockingQueue<>();
// maximum amount of memory to be used for queueing records.
private final long memoryLimit;
// it holds the root cause of the exception in case either queueing records (consuming from
// inputIterator) fails or
// thread reading records from queue fails.
private final AtomicReference hasFailed = new AtomicReference(null);
// used for indicating that all the records from queue are read successfully.
private final AtomicBoolean isReadDone = new AtomicBoolean(false);
// used for indicating that all records have been enqueued
private final AtomicBoolean isWriteDone = new AtomicBoolean(false);
// Function to transform the input payload to the expected output payload
private final Function transformFunction;
// Payload Size Estimator
private final SizeEstimator payloadSizeEstimator;
// Singleton (w.r.t this instance) Iterator for this queue
private final QueueIterator iterator;
// indicates rate limit (number of records to cache). it is updated whenever there is a change
// in avg record size.
@VisibleForTesting
public int currentRateLimit = 1;
// indicates avg record size in bytes. It is updated whenever a new record is sampled.
@VisibleForTesting
public long avgRecordSizeInBytes = 0;
// indicates number of samples collected so far.
private long numSamples = 0;
/**
* Construct BoundedInMemoryQueue with default SizeEstimator
*
* @param memoryLimit MemoryLimit in bytes
* @param transformFunction Transformer Function to convert input payload type to stored payload type
*/
public BoundedInMemoryQueue(final long memoryLimit, final Function transformFunction) {
this(memoryLimit, transformFunction, new DefaultSizeEstimator() {
});
}
/**
* Construct BoundedInMemoryQueue with passed in size estimator
*
* @param memoryLimit MemoryLimit in bytes
* @param transformFunction Transformer Function to convert input payload type to stored payload type
* @param payloadSizeEstimator Payload Size Estimator
*/
public BoundedInMemoryQueue(
final long memoryLimit,
final Function transformFunction,
final SizeEstimator payloadSizeEstimator) {
this.memoryLimit = memoryLimit;
this.transformFunction = transformFunction;
this.payloadSizeEstimator = payloadSizeEstimator;
this.iterator = new QueueIterator();
}
@VisibleForTesting
public int size() {
return this.queue.size();
}
/**
* Samples records with "RECORD_SAMPLING_RATE" frequency and computes average record size in bytes. It is used
* for determining how many maximum records to queue. Based on change in avg size it ma increase or decrease
* available permits.
*
* @param payload Payload to size
*/
private void adjustBufferSizeIfNeeded(final O payload) throws InterruptedException {
if (this.samplingRecordCounter.incrementAndGet() % RECORD_SAMPLING_RATE != 0) {
return;
}
final long recordSizeInBytes = payloadSizeEstimator.sizeEstimate(payload);
final long newAvgRecordSizeInBytes = Math
.max(1, (avgRecordSizeInBytes * numSamples + recordSizeInBytes) / (numSamples + 1));
final int newRateLimit = (int) Math
.min(RECORD_CACHING_LIMIT, Math.max(1, this.memoryLimit / newAvgRecordSizeInBytes));
// If there is any change in number of records to cache then we will either release (if it increased) or acquire
// (if it decreased) to adjust rate limiting to newly computed value.
if (newRateLimit > currentRateLimit) {
rateLimiter.release(newRateLimit - currentRateLimit);
} else if (newRateLimit < currentRateLimit) {
rateLimiter.acquire(currentRateLimit - newRateLimit);
}
currentRateLimit = newRateLimit;
avgRecordSizeInBytes = newAvgRecordSizeInBytes;
numSamples++;
}
/**
* Inserts record into queue after applying transformation
*
* @param t Item to be queueed
*/
public void insertRecord(I t) throws Exception {
// If already closed, throw exception
if (isWriteDone.get()) {
throw new IllegalStateException("Queue closed for enqueueing new entries");
}
// We need to stop queueing if queue-reader has failed and exited.
throwExceptionIfFailed();
rateLimiter.acquire();
// We are retrieving insert value in the record queueing thread to offload computation
// around schema validation
// and record creation to it.
final O payload = transformFunction.apply(t);
adjustBufferSizeIfNeeded(payload);
queue.put(Optional.of(payload));
}
/**
* Checks if records are either available in the queue or expected to be written in future
*/
private boolean expectMoreRecords() {
return !isWriteDone.get() || (isWriteDone.get() && !queue.isEmpty());
}
/**
* Reader interface but never exposed to outside world as this is a single consumer queue.
* Reading is done through a singleton iterator for this queue.
*/
private Optional readNextRecord() {
if (this.isReadDone.get()) {
return Optional.empty();
}
rateLimiter.release();
Optional newRecord = Optional.empty();
while (expectMoreRecords()) {
try {
throwExceptionIfFailed();
newRecord = queue.poll(RECORD_POLL_INTERVAL_SEC, TimeUnit.SECONDS);
if (newRecord != null) {
break;
}
} catch (InterruptedException e) {
logger.error("error reading records from queue", e);
throw new HoodieException(e);
}
}
// Check one more time here as it is possible producer errored out and closed immediately
throwExceptionIfFailed();
if (newRecord != null && newRecord.isPresent()) {
return newRecord;
} else {
// We are done reading all the records from internal iterator.
this.isReadDone.set(true);
return Optional.empty();
}
}
/**
* Puts an empty entry to queue to denote termination
*/
public void close() throws InterruptedException {
// done queueing records notifying queue-reader.
isWriteDone.set(true);
}
private void throwExceptionIfFailed() {
if (this.hasFailed.get() != null) {
throw new HoodieException("operation has failed", this.hasFailed.get());
}
}
/**
* API to allow producers and consumer to communicate termination due to failure
*/
public void markAsFailed(Exception e) {
this.hasFailed.set(e);
// release the permits so that if the queueing thread is waiting for permits then it will
// get it.
this.rateLimiter.release(RECORD_CACHING_LIMIT + 1);
}
@Override
public Iterator iterator() {
return iterator;
}
/**
* Iterator for the memory bounded queue
*/
private final class QueueIterator implements Iterator {
// next record to be read from queue.
private O nextRecord;
@Override
public boolean hasNext() {
if (this.nextRecord == null) {
Optional res = readNextRecord();
this.nextRecord = res.orElse(null);
}
return this.nextRecord != null;
}
@Override
public O next() {
Preconditions.checkState(hasNext() && this.nextRecord != null);
final O ret = this.nextRecord;
this.nextRecord = null;
return ret;
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy