
com.twitter.distributedlog.LogReader Maven / Gradle / Ivy
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.twitter.distributedlog;
import com.twitter.distributedlog.io.AsyncCloseable;
import java.io.Closeable;
import java.io.IOException;
import java.util.List;
/**
* LogReader is a `synchronous` reader reading records from a DL log.
*
* Lifecycle of a Reader
*
* A reader is a sequential reader that read records from a DL log starting
* from a given position. The position could be a DLSN (via {@link DistributedLogManager#getInputStream(DLSN)}
* or a Transaction ID (via {@link DistributedLogManager#getInputStream(long)}.
*
* After the reader is open, it could call {@link #readNext(boolean)} or {@link #readBulk(boolean, int)}
* to read records out the log from provided position.
*
* Closing the reader (via {@link #close()} will release all the resources occupied
* by this reader instance.
*
* Exceptions could be thrown during reading records. Once the exception is thrown,
* the reader is set to an error state and it isn't usable anymore. It is the application's
* responsibility to handle the exceptions and re-create readers if necessary.
*
* Example:
*
* DistributedLogManager dlm = ...;
* long nextTxId = ...;
* LogReader reader = dlm.getInputStream(nextTxId);
*
* while (true) { // keep reading & processing records
* LogRecord record;
* try {
* record = reader.readNext(false);
* nextTxId = record.getTransactionId();
* // process the record
* ...
* } catch (IOException ioe) {
* // handle the exception
* ...
* reader = dlm.getInputStream(nextTxId + 1);
* }
* }
*
*
*
* Read Records
*
* Reading records from an endless log in `synchronous` way isn't as
* trivial as in `asynchronous` way (via {@link AsyncLogReader}. Because it
* lacks of callback mechanism. LogReader introduces a flag `nonBlocking` on
* controlling the waiting behavior on `synchronous` reads.
*
* Blocking vs NonBlocking
*
* Blocking (nonBlocking = false) means the reads will wait for records
* before returning read calls. While NonBlocking (nonBlocking = true)
* means the reads will only check readahead cache and return whatever records
* available in the readahead cache.
*
* The waiting period varies in blocking mode. If the reader is
* catching up with writer (there are records in the log), the read call will
* wait until records are read and returned. If the reader is caught up with
* writer (there are no more records in the log at read time), the read call
* will wait for a small period of time (defined in
* {@link DistributedLogConfiguration#getReadAheadWaitTime()} and return whatever
* records available in the readahead cache. In other words, if a reader sees
* no record on blocking reads, it means the reader is `caught-up` with the
* writer.
*
* Blocking and NonBlocking modes are useful for building replicated
* state machines. Applications could use blocking reads till caught up
* with latest data. Once they are caught up with latest data, they could start
* serving their service and turn to non-blocking read mode and tail read
* data from the logs.
*
* See examples below.
*
*
Read Single Record
*
* {@link #readNext(boolean)} is reading individual records from a DL log.
*
*
* LogReader reader = ...
*
* // keep reading records in blocking way until no records available in the log
* LogRecord record = reader.readNext(false);
* while (null != record) {
* // process the record
* ...
* // read next record
* records = reader.readNext(false);
* }
*
* ...
*
* // reader is caught up with writer, doing non-blocking reads to tail the log
* while (true) {
* record = reader.readNext(true)
* // process the new records
* ...
* }
*
*
* Read Batch of Records
*
* {@link #readBulk(boolean, int)} is a convenient way to read a batch of records
* from a DL log.
*
*
* LogReader reader = ...
* int N = 10;
*
* // keep reading N records in blocking way until no records available in the log
* List records = reader.readBulk(false, N);
* while (!records.isEmpty()) {
* // process the list of records
* ...
* if (records.size() < N) { // no more records available in the log
* break;
* }
* // read next N records
* records = reader.readBulk(false, N);
* }
*
* ...
*
* // reader is caught up with writer, doing non-blocking reads to tail the log
* while (true) {
* records = reader.readBulk(true, N)
* // process the new records
* ...
* }
*
*
*
* @see AsyncLogReader
*
* NOTE:
* 1. Extending {@link AsyncCloseable}: BKSyncLogReader is implemented based on BKAsyncLogReader, exposing
* the {@link AsyncCloseable} interface so the reader could be closed asynchronously
*/
public interface LogReader extends Closeable, AsyncCloseable {
/**
* Read the next log record from the stream.
*
* If nonBlocking is set to true, the call returns immediately by just polling
* records from read ahead cache. It would return null if there isn't any records
* available in the read ahead cache.
*
* If nonBlocking is set to false, it would does blocking call. The call will
* block until return a record if there are records in the stream (aka catching up).
* Otherwise it would wait up to {@link DistributedLogConfiguration#getReadAheadWaitTime()}
* milliseconds and return null if there isn't any more records in the stream.
*
* @param nonBlocking should the read make blocking calls to the backend or rely on the
* readAhead cache
* @return an operation from the stream or null if at end of stream
* @throws IOException if there is an error reading from the stream
*/
public LogRecordWithDLSN readNext(boolean nonBlocking) throws IOException;
/**
* Read the next numLogRecords log records from the stream
*
* @param nonBlocking should the read make blocking calls to the backend or rely on the
* readAhead cache
* @param numLogRecords maximum number of log records returned by this call.
* @return an operation from the stream or empty list if at end of stream
* @throws IOException if there is an error reading from the stream
* @see #readNext(boolean)
*/
public List readBulk(boolean nonBlocking, int numLogRecords) throws IOException;
}