com.twitter.distributedlog.LogReader Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of distributedlog-core_2.10 Show documentation
The newest version!
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.twitter.distributedlog;

import com.twitter.distributedlog.io.AsyncCloseable;

import java.io.Closeable;
import java.io.IOException;
import java.util.List;

/**
 * LogReader is a `synchronous` reader reading records from a DL log.
 *
 * Lifecycle of a Reader
 *
 * A reader is a sequential reader that read records from a DL log starting
 * from a given position. The position could be a DLSN (via {@link DistributedLogManager#getInputStream(DLSN)}
 * or a Transaction ID (via {@link DistributedLogManager#getInputStream(long)}.
 * 
 * After the reader is open, it could call {@link #readNext(boolean)} or {@link #readBulk(boolean, int)}
 * to read records out the log from provided position.
 * 

 * Closing the reader (via {@link #close()} will release all the resources occupied
 * by this reader instance.
 * 

 * Exceptions could be thrown during reading records. Once the exception is thrown,
 * the reader is set to an error state and it isn't usable anymore. It is the application's
 * responsibility to handle the exceptions and re-create readers if necessary.
 * 

 * Example:
 * 
 * DistributedLogManager dlm = ...;
 * long nextTxId = ...;
 * LogReader reader = dlm.getInputStream(nextTxId);
 *
 * while (true) { // keep reading & processing records
 *     LogRecord record;
 *     try {
 *         record = reader.readNext(false);
 *         nextTxId = record.getTransactionId();
 *         // process the record
 *         ...
 *     } catch (IOException ioe) {
 *         // handle the exception
 *         ...
 *         reader = dlm.getInputStream(nextTxId + 1);
 *     }
 * }
 *
 * 
 *
 * Read Records
 *
 * Reading records from an endless log in `synchronous` way isn't as
 * trivial as in `asynchronous` way (via {@link AsyncLogReader}. Because it
 * lacks of callback mechanism. LogReader introduces a flag `nonBlocking` on
 * controlling the waiting behavior on `synchronous` reads.
 *
 * Blocking vs NonBlocking
 *
 * Blocking (nonBlocking = false) means the reads will wait for records
 * before returning read calls. While NonBlocking (nonBlocking = true)
 * means the reads will only check readahead cache and return whatever records
 * available in the readahead cache.
 * 
 * The waiting period varies in blocking mode. If the reader is
 * catching up with writer (there are records in the log), the read call will
 * wait until records are read and returned. If the reader is caught up with
 * writer (there are no more records in the log at read time), the read call
 * will wait for a small period of time (defined in
 * {@link DistributedLogConfiguration#getReadAheadWaitTime()} and return whatever
 * records available in the readahead cache. In other words, if a reader sees
 * no record on blocking reads, it means the reader is `caught-up` with the
 * writer.
 * 

 * Blocking and NonBlocking modes are useful for building replicated
 * state machines. Applications could use blocking reads till caught up
 * with latest data. Once they are caught up with latest data, they could start
 * serving their service and turn to non-blocking read mode and tail read
 * data from the logs.
 * 

 * See examples below.
 *
 * 
Read Single Record
 *
 * {@link #readNext(boolean)} is reading individual records from a DL log.
 *
 *  * LogReader reader = ...
 *
 * // keep reading records in blocking way until no records available in the log
 * LogRecord record = reader.readNext(false);
 * while (null != record) {
 *     // process the record
 *     ...
 *     // read next record
 *     records = reader.readNext(false);
 * }
 *
 * ...
 *
 * // reader is caught up with writer, doing non-blocking reads to tail the log
 * while (true) {
 *     record = reader.readNext(true)
 *     // process the new records
 *     ...
 * }
 * 
 *
 * Read Batch of Records
 *
 * {@link #readBulk(boolean, int)} is a convenient way to read a batch of records
 * from a DL log.
 *
 *  * LogReader reader = ...
 * int N = 10;
 *
 * // keep reading N records in blocking way until no records available in the log
 * List records = reader.readBulk(false, N);
 * while (!records.isEmpty()) {
 *     // process the list of records
 *     ...
 *     if (records.size() < N) { // no more records available in the log
 *         break;
 *     }
 *     // read next N records
 *     records = reader.readBulk(false, N);
 * }
 *
 * ...
 *
 * // reader is caught up with writer, doing non-blocking reads to tail the log
 * while (true) {
 *     records = reader.readBulk(true, N)
 *     // process the new records
 *     ...
 * }
 *
 * 
 *
 * @see AsyncLogReader
 *
 * NOTE:
 * 1. Extending {@link AsyncCloseable}: BKSyncLogReader is implemented based on BKAsyncLogReader, exposing
 *    the {@link AsyncCloseable} interface so the reader could be closed asynchronously
 */
public interface LogReader extends Closeable, AsyncCloseable {

    /**
     * Read the next log record from the stream.
     * 
     * If nonBlocking is set to true, the call returns immediately by just polling
     * records from read ahead cache. It would return null if there isn't any records
     * available in the read ahead cache.
     * 
     * If nonBlocking is set to false, it would does blocking call. The call will
     * block until return a record if there are records in the stream (aka catching up).
     * Otherwise it would wait up to {@link DistributedLogConfiguration#getReadAheadWaitTime()}
     * milliseconds and return null if there isn't any more records in the stream.
     *
     * @param nonBlocking should the read make blocking calls to the backend or rely on the
     * readAhead cache
     * @return an operation from the stream or null if at end of stream
     * @throws IOException if there is an error reading from the stream
     */
    public LogRecordWithDLSN readNext(boolean nonBlocking) throws IOException;

    /**
     * Read the next numLogRecords log records from the stream
     *
     * @param nonBlocking should the read make blocking calls to the backend or rely on the
     * readAhead cache
     * @param numLogRecords maximum number of log records returned by this call.
     * @return an operation from the stream or empty list if at end of stream
     * @throws IOException if there is an error reading from the stream
     * @see #readNext(boolean)
     */
    public List readBulk(boolean nonBlocking, int numLogRecords) throws IOException;
}