All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.fs.azurebfs.services.ReadBufferManager Maven / Gradle / Ivy

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * 

* http://www.apache.org/licenses/LICENSE-2.0 *

* Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.fs.azurebfs.services; import org.apache.hadoop.fs.azurebfs.contracts.services.ReadBufferStatus; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.Queue; import java.util.Stack; import java.util.concurrent.CountDownLatch; import java.util.concurrent.locks.ReentrantLock; import org.apache.hadoop.fs.azurebfs.utils.TracingContext; import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; /** * The Read Buffer Manager for Rest AbfsClient. */ final class ReadBufferManager { private static final Logger LOGGER = LoggerFactory.getLogger(ReadBufferManager.class); private static final int ONE_KB = 1024; private static final int ONE_MB = ONE_KB * ONE_KB; private static final int NUM_BUFFERS = 16; private static final int NUM_THREADS = 8; private static final int DEFAULT_THRESHOLD_AGE_MILLISECONDS = 3000; // have to see if 3 seconds is a good threshold private static int blockSize = 4 * ONE_MB; private static int thresholdAgeMilliseconds = DEFAULT_THRESHOLD_AGE_MILLISECONDS; private Thread[] threads = new Thread[NUM_THREADS]; private byte[][] buffers; // array of byte[] buffers, to hold the data that is read private Stack freeList = new Stack<>(); // indices in buffers[] array that are available private Queue readAheadQueue = new LinkedList<>(); // queue of requests that are not picked up by any worker thread yet private LinkedList inProgressList = new LinkedList<>(); // requests being processed by worker threads private LinkedList completedReadList = new LinkedList<>(); // buffers available for reading private static ReadBufferManager bufferManager; // singleton, initialized in static initialization block private static final ReentrantLock LOCK = new ReentrantLock(); static ReadBufferManager getBufferManager() { if (bufferManager == null) { LOCK.lock(); try { if (bufferManager == null) { bufferManager = new ReadBufferManager(); bufferManager.init(); } } finally { LOCK.unlock(); } } return bufferManager; } static void setReadBufferManagerConfigs(int readAheadBlockSize) { if (bufferManager == null) { LOGGER.debug( "ReadBufferManager not initialized yet. Overriding readAheadBlockSize as {}", readAheadBlockSize); blockSize = readAheadBlockSize; } } private void init() { buffers = new byte[NUM_BUFFERS][]; for (int i = 0; i < NUM_BUFFERS; i++) { buffers[i] = new byte[blockSize]; // same buffers are reused. The byte array never goes back to GC freeList.add(i); } for (int i = 0; i < NUM_THREADS; i++) { Thread t = new Thread(new ReadBufferWorker(i)); t.setDaemon(true); threads[i] = t; t.setName("ABFS-prefetch-" + i); t.start(); } ReadBufferWorker.UNLEASH_WORKERS.countDown(); } // hide instance constructor private ReadBufferManager() { } /* * * AbfsInputStream-facing methods * */ /** * {@link AbfsInputStream} calls this method to queue read-aheads. * * @param stream The {@link AbfsInputStream} for which to do the read-ahead * @param requestedOffset The offset in the file which shoukd be read * @param requestedLength The length to read */ void queueReadAhead(final AbfsInputStream stream, final long requestedOffset, final int requestedLength, TracingContext tracingContext) { if (LOGGER.isTraceEnabled()) { LOGGER.trace("Start Queueing readAhead for {} offset {} length {}", stream.getPath(), requestedOffset, requestedLength); } ReadBuffer buffer; synchronized (this) { if (isAlreadyQueued(stream, requestedOffset)) { return; // already queued, do not queue again } if (freeList.isEmpty() && !tryEvict()) { return; // no buffers available, cannot queue anything } buffer = new ReadBuffer(); buffer.setStream(stream); buffer.setOffset(requestedOffset); buffer.setLength(0); buffer.setRequestedLength(requestedLength); buffer.setStatus(ReadBufferStatus.NOT_AVAILABLE); buffer.setLatch(new CountDownLatch(1)); buffer.setTracingContext(tracingContext); Integer bufferIndex = freeList.pop(); // will return a value, since we have checked size > 0 already buffer.setBuffer(buffers[bufferIndex]); buffer.setBufferindex(bufferIndex); readAheadQueue.add(buffer); notifyAll(); if (LOGGER.isTraceEnabled()) { LOGGER.trace("Done q-ing readAhead for file {} offset {} buffer idx {}", stream.getPath(), requestedOffset, buffer.getBufferindex()); } } } /** * {@link AbfsInputStream} calls this method read any bytes already available in a buffer (thereby saving a * remote read). This returns the bytes if the data already exists in buffer. If there is a buffer that is reading * the requested offset, then this method blocks until that read completes. If the data is queued in a read-ahead * but not picked up by a worker thread yet, then it cancels that read-ahead and reports cache miss. This is because * depending on worker thread availability, the read-ahead may take a while - the calling thread can do it's own * read to get the data faster (copmared to the read waiting in queue for an indeterminate amount of time). * * @param stream the file to read bytes for * @param position the offset in the file to do a read for * @param length the length to read * @param buffer the buffer to read data into. Note that the buffer will be written into from offset 0. * @return the number of bytes read */ int getBlock(final AbfsInputStream stream, final long position, final int length, final byte[] buffer) throws IOException { // not synchronized, so have to be careful with locking if (LOGGER.isTraceEnabled()) { LOGGER.trace("getBlock for file {} position {} thread {}", stream.getPath(), position, Thread.currentThread().getName()); } waitForProcess(stream, position); int bytesRead = 0; synchronized (this) { bytesRead = getBlockFromCompletedQueue(stream, position, length, buffer); } if (bytesRead > 0) { if (LOGGER.isTraceEnabled()) { LOGGER.trace("Done read from Cache for {} position {} length {}", stream.getPath(), position, bytesRead); } return bytesRead; } // otherwise, just say we got nothing - calling thread can do its own read return 0; } /* * * Internal methods * */ private void waitForProcess(final AbfsInputStream stream, final long position) { ReadBuffer readBuf; synchronized (this) { clearFromReadAheadQueue(stream, position); readBuf = getFromList(inProgressList, stream, position); } if (readBuf != null) { // if in in-progress queue, then block for it try { if (LOGGER.isTraceEnabled()) { LOGGER.trace("got a relevant read buffer for file {} offset {} buffer idx {}", stream.getPath(), readBuf.getOffset(), readBuf.getBufferindex()); } readBuf.getLatch().await(); // blocking wait on the caller stream's thread // Note on correctness: readBuf gets out of inProgressList only in 1 place: after worker thread // is done processing it (in doneReading). There, the latch is set after removing the buffer from // inProgressList. So this latch is safe to be outside the synchronized block. // Putting it in synchronized would result in a deadlock, since this thread would be holding the lock // while waiting, so no one will be able to change any state. If this becomes more complex in the future, // then the latch cane be removed and replaced with wait/notify whenever inProgressList is touched. } catch (InterruptedException ex) { Thread.currentThread().interrupt(); } if (LOGGER.isTraceEnabled()) { LOGGER.trace("latch done for file {} buffer idx {} length {}", stream.getPath(), readBuf.getBufferindex(), readBuf.getLength()); } } } /** * If any buffer in the completedlist can be reclaimed then reclaim it and return the buffer to free list. * The objective is to find just one buffer - there is no advantage to evicting more than one. * * @return whether the eviction succeeeded - i.e., were we able to free up one buffer */ private synchronized boolean tryEvict() { ReadBuffer nodeToEvict = null; if (completedReadList.size() <= 0) { return false; // there are no evict-able buffers } long currentTimeInMs = currentTimeMillis(); // first, try buffers where all bytes have been consumed (approximated as first and last bytes consumed) for (ReadBuffer buf : completedReadList) { if (buf.isFirstByteConsumed() && buf.isLastByteConsumed()) { nodeToEvict = buf; break; } } if (nodeToEvict != null) { return evict(nodeToEvict); } // next, try buffers where any bytes have been consumed (may be a bad idea? have to experiment and see) for (ReadBuffer buf : completedReadList) { if (buf.isAnyByteConsumed()) { nodeToEvict = buf; break; } } if (nodeToEvict != null) { return evict(nodeToEvict); } // next, try any old nodes that have not been consumed // Failed read buffers (with buffer index=-1) that are older than // thresholdAge should be cleaned up, but at the same time should not // report successful eviction. // Queue logic expects that a buffer is freed up for read ahead when // eviction is successful, whereas a failed ReadBuffer would have released // its buffer when its status was set to READ_FAILED. long earliestBirthday = Long.MAX_VALUE; ArrayList oldFailedBuffers = new ArrayList<>(); for (ReadBuffer buf : completedReadList) { if ((buf.getBufferindex() != -1) && (buf.getTimeStamp() < earliestBirthday)) { nodeToEvict = buf; earliestBirthday = buf.getTimeStamp(); } else if ((buf.getBufferindex() == -1) && (currentTimeInMs - buf.getTimeStamp()) > thresholdAgeMilliseconds) { oldFailedBuffers.add(buf); } } for (ReadBuffer buf : oldFailedBuffers) { evict(buf); } if ((currentTimeInMs - earliestBirthday > thresholdAgeMilliseconds) && (nodeToEvict != null)) { return evict(nodeToEvict); } LOGGER.trace("No buffer eligible for eviction"); // nothing can be evicted return false; } private boolean evict(final ReadBuffer buf) { // As failed ReadBuffers (bufferIndx = -1) are saved in completedReadList, // avoid adding it to freeList. if (buf.getBufferindex() != -1) { freeList.push(buf.getBufferindex()); } completedReadList.remove(buf); buf.setTracingContext(null); if (LOGGER.isTraceEnabled()) { LOGGER.trace("Evicting buffer idx {}; was used for file {} offset {} length {}", buf.getBufferindex(), buf.getStream().getPath(), buf.getOffset(), buf.getLength()); } return true; } private boolean isAlreadyQueued(final AbfsInputStream stream, final long requestedOffset) { // returns true if any part of the buffer is already queued return (isInList(readAheadQueue, stream, requestedOffset) || isInList(inProgressList, stream, requestedOffset) || isInList(completedReadList, stream, requestedOffset)); } private boolean isInList(final Collection list, final AbfsInputStream stream, final long requestedOffset) { return (getFromList(list, stream, requestedOffset) != null); } private ReadBuffer getFromList(final Collection list, final AbfsInputStream stream, final long requestedOffset) { for (ReadBuffer buffer : list) { if (buffer.getStream() == stream) { if (buffer.getStatus() == ReadBufferStatus.AVAILABLE && requestedOffset >= buffer.getOffset() && requestedOffset < buffer.getOffset() + buffer.getLength()) { return buffer; } else if (requestedOffset >= buffer.getOffset() && requestedOffset < buffer.getOffset() + buffer.getRequestedLength()) { return buffer; } } } return null; } /** * Returns buffers that failed or passed from completed queue. * @param stream * @param requestedOffset * @return */ private ReadBuffer getBufferFromCompletedQueue(final AbfsInputStream stream, final long requestedOffset) { for (ReadBuffer buffer : completedReadList) { // Buffer is returned if the requestedOffset is at or above buffer's // offset but less than buffer's length or the actual requestedLength if ((buffer.getStream() == stream) && (requestedOffset >= buffer.getOffset()) && ((requestedOffset < buffer.getOffset() + buffer.getLength()) || (requestedOffset < buffer.getOffset() + buffer.getRequestedLength()))) { return buffer; } } return null; } private void clearFromReadAheadQueue(final AbfsInputStream stream, final long requestedOffset) { ReadBuffer buffer = getFromList(readAheadQueue, stream, requestedOffset); if (buffer != null) { readAheadQueue.remove(buffer); notifyAll(); // lock is held in calling method freeList.push(buffer.getBufferindex()); } } private int getBlockFromCompletedQueue(final AbfsInputStream stream, final long position, final int length, final byte[] buffer) throws IOException { ReadBuffer buf = getBufferFromCompletedQueue(stream, position); if (buf == null) { return 0; } if (buf.getStatus() == ReadBufferStatus.READ_FAILED) { // To prevent new read requests to fail due to old read-ahead attempts, // return exception only from buffers that failed within last thresholdAgeMilliseconds if ((currentTimeMillis() - (buf.getTimeStamp()) < thresholdAgeMilliseconds)) { throw buf.getErrException(); } else { return 0; } } if ((buf.getStatus() != ReadBufferStatus.AVAILABLE) || (position >= buf.getOffset() + buf.getLength())) { return 0; } int cursor = (int) (position - buf.getOffset()); int availableLengthInBuffer = buf.getLength() - cursor; int lengthToCopy = Math.min(length, availableLengthInBuffer); System.arraycopy(buf.getBuffer(), cursor, buffer, 0, lengthToCopy); if (cursor == 0) { buf.setFirstByteConsumed(true); } if (cursor + lengthToCopy == buf.getLength()) { buf.setLastByteConsumed(true); } buf.setAnyByteConsumed(true); return lengthToCopy; } /* * * ReadBufferWorker-thread-facing methods * */ /** * ReadBufferWorker thread calls this to get the next buffer that it should work on. * * @return {@link ReadBuffer} * @throws InterruptedException if thread is interrupted */ ReadBuffer getNextBlockToRead() throws InterruptedException { ReadBuffer buffer = null; synchronized (this) { //buffer = readAheadQueue.take(); // blocking method while (readAheadQueue.size() == 0) { wait(); } buffer = readAheadQueue.remove(); notifyAll(); if (buffer == null) { return null; // should never happen } buffer.setStatus(ReadBufferStatus.READING_IN_PROGRESS); inProgressList.add(buffer); } if (LOGGER.isTraceEnabled()) { LOGGER.trace("ReadBufferWorker picked file {} for offset {}", buffer.getStream().getPath(), buffer.getOffset()); } return buffer; } /** * ReadBufferWorker thread calls this method to post completion. * * @param buffer the buffer whose read was completed * @param result the {@link ReadBufferStatus} after the read operation in the worker thread * @param bytesActuallyRead the number of bytes that the worker thread was actually able to read */ void doneReading(final ReadBuffer buffer, final ReadBufferStatus result, final int bytesActuallyRead) { if (LOGGER.isTraceEnabled()) { LOGGER.trace("ReadBufferWorker completed file {} for offset {} bytes {}", buffer.getStream().getPath(), buffer.getOffset(), bytesActuallyRead); } synchronized (this) { // If this buffer has already been purged during // close of InputStream then we don't update the lists. if (inProgressList.contains(buffer)) { inProgressList.remove(buffer); if (result == ReadBufferStatus.AVAILABLE && bytesActuallyRead > 0) { buffer.setStatus(ReadBufferStatus.AVAILABLE); buffer.setLength(bytesActuallyRead); } else { freeList.push(buffer.getBufferindex()); // buffer will be deleted as per the eviction policy. } // completed list also contains FAILED read buffers // for sending exception message to clients. buffer.setStatus(result); buffer.setTimeStamp(currentTimeMillis()); completedReadList.add(buffer); } } //outside the synchronized, since anyone receiving a wake-up from the latch must see safe-published results buffer.getLatch().countDown(); // wake up waiting threads (if any) } /** * Similar to System.currentTimeMillis, except implemented with System.nanoTime(). * System.currentTimeMillis can go backwards when system clock is changed (e.g., with NTP time synchronization), * making it unsuitable for measuring time intervals. nanotime is strictly monotonically increasing per CPU core. * Note: it is not monotonic across Sockets, and even within a CPU, its only the * more recent parts which share a clock across all cores. * * @return current time in milliseconds */ private long currentTimeMillis() { return System.nanoTime() / 1000 / 1000; } @VisibleForTesting int getThresholdAgeMilliseconds() { return thresholdAgeMilliseconds; } @VisibleForTesting static void setThresholdAgeMilliseconds(int thresholdAgeMs) { thresholdAgeMilliseconds = thresholdAgeMs; } @VisibleForTesting int getCompletedReadListSize() { return completedReadList.size(); } @VisibleForTesting public synchronized List getCompletedReadListCopy() { return new ArrayList<>(completedReadList); } @VisibleForTesting public synchronized List getFreeListCopy() { return new ArrayList<>(freeList); } @VisibleForTesting public synchronized List getReadAheadQueueCopy() { return new ArrayList<>(readAheadQueue); } @VisibleForTesting public synchronized List getInProgressCopiedList() { return new ArrayList<>(inProgressList); } @VisibleForTesting void callTryEvict() { tryEvict(); } /** * Purging the buffers associated with an {@link AbfsInputStream} * from {@link ReadBufferManager} when stream is closed. * @param stream input stream. */ public synchronized void purgeBuffersForStream(AbfsInputStream stream) { LOGGER.debug("Purging stale buffers for AbfsInputStream {} ", stream); readAheadQueue.removeIf(readBuffer -> readBuffer.getStream() == stream); purgeList(stream, completedReadList); purgeList(stream, inProgressList); } /** * Method to remove buffers associated with a {@link AbfsInputStream} * when its close method is called. * NOTE: This method is not threadsafe and must be called inside a * synchronised block. See caller. * @param stream associated input stream. * @param list list of buffers like {@link this#completedReadList} * or {@link this#inProgressList}. */ private void purgeList(AbfsInputStream stream, LinkedList list) { for (Iterator it = list.iterator(); it.hasNext();) { ReadBuffer readBuffer = it.next(); if (readBuffer.getStream() == stream) { it.remove(); // As failed ReadBuffers (bufferIndex = -1) are already pushed to free // list in doneReading method, we will skip adding those here again. if (readBuffer.getBufferindex() != -1) { freeList.push(readBuffer.getBufferindex()); } } } } /** * Test method that can clean up the current state of readAhead buffers and * the lists. Will also trigger a fresh init. */ @VisibleForTesting void testResetReadBufferManager() { synchronized (this) { ArrayList completedBuffers = new ArrayList<>(); for (ReadBuffer buf : completedReadList) { if (buf != null) { completedBuffers.add(buf); } } for (ReadBuffer buf : completedBuffers) { evict(buf); } readAheadQueue.clear(); inProgressList.clear(); completedReadList.clear(); freeList.clear(); for (int i = 0; i < NUM_BUFFERS; i++) { buffers[i] = null; } buffers = null; resetBufferManager(); } } /** * Reset buffer manager to null. */ @VisibleForTesting static void resetBufferManager() { bufferManager = null; } /** * Reset readAhead buffer to needed readAhead block size and * thresholdAgeMilliseconds. * @param readAheadBlockSize * @param thresholdAgeMilliseconds */ @VisibleForTesting void testResetReadBufferManager(int readAheadBlockSize, int thresholdAgeMilliseconds) { setBlockSize(readAheadBlockSize); setThresholdAgeMilliseconds(thresholdAgeMilliseconds); testResetReadBufferManager(); } @VisibleForTesting static void setBlockSize(int readAheadBlockSize) { blockSize = readAheadBlockSize; } @VisibleForTesting int getReadAheadBlockSize() { return blockSize; } /** * Test method that can mimic no free buffers scenario and also add a ReadBuffer * into completedReadList. This readBuffer will get picked up by TryEvict() * next time a new queue request comes in. * @param buf that needs to be added to completedReadlist */ @VisibleForTesting void testMimicFullUseAndAddFailedBuffer(ReadBuffer buf) { freeList.clear(); completedReadList.add(buf); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy