All Downloads are FREE. Search and download functionalities are using the official Maven repository.

alluxio.master.journal.JournalWriter Maven / Gradle / Ivy

/*
 * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0
 * (the "License"). You may not use this work except in compliance with the License, which is
 * available at www.apache.org/licenses/LICENSE-2.0
 *
 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
 * either express or implied, as more fully set forth in the License.
 *
 * See the NOTICE file distributed with this work for information regarding copyright ownership.
 */

package alluxio.master.journal;

import alluxio.Configuration;
import alluxio.Constants;
import alluxio.exception.ExceptionMessage;
import alluxio.master.MasterContext;
import alluxio.proto.journal.Journal.JournalEntry;
import alluxio.underfs.UnderFileSystem;

import com.google.common.base.Preconditions;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.DataOutputStream;
import java.io.IOException;
import java.io.OutputStream;

import javax.annotation.concurrent.ThreadSafe;

/**
 * This class manages all the writes to the journal. Journal writes happen in two phases:
 *
 * 1. First the checkpoint file is written. The checkpoint file contains entries reflecting the
 * state of the master with all of the completed logs applied.
 *
 * 2. Afterwards, entries are appended to log files. The checkpoint file must be written before the
 * log files.
 *
 * The latest state can be reconstructed by reading the checkpoint file, and applying all the
 * completed logs and then the remaining log in progress.
 */
@ThreadSafe
public final class JournalWriter {
  private static final Logger LOG = LoggerFactory.getLogger(Constants.LOGGER_TYPE);

  private final Journal mJournal;
  /** Absolute path to the directory storing all of the journal data. */
  private final String mJournalDirectory;
  /** Absolute path to the directory storing all completed logs. */
  private final String mCompletedDirectory;
  /** Absolute path to the temporary checkpoint file. */
  private final String mTempCheckpointPath;
  /** The UFS where the journal is being written to. */
  private final UnderFileSystem mUfs;
  private final long mMaxLogSize;

  /** The log number to assign to the next complete log. */
  private long mNextCompleteLogNumber = Journal.FIRST_COMPLETED_LOG_NUMBER;

  /** The output stream singleton for the checkpoint file. */
  private CheckpointOutputStream mCheckpointOutputStream = null;
  /** The output stream singleton for the entry log files. */
  private EntryOutputStream mEntryOutputStream = null;

  /** The sequence number for the next entry in the log. */
  private long mNextEntrySequenceNumber = 1;

  /**
   * Creates a new instance of {@link JournalWriter}.
   *
   * @param journal the handle to the journal
   */
  JournalWriter(Journal journal) {
    mJournal = Preconditions.checkNotNull(journal);
    mJournalDirectory = mJournal.getDirectory();
    mCompletedDirectory = mJournal.getCompletedDirectory();
    mTempCheckpointPath = mJournal.getCheckpointFilePath() + ".tmp";
    Configuration conf = MasterContext.getConf();
    mUfs = UnderFileSystem.get(mJournalDirectory, conf);
    mMaxLogSize = conf.getBytes(Constants.MASTER_JOURNAL_LOG_SIZE_BYTES_MAX);
  }

  /**
   * Marks all logs as completed.
   *
   * @throws IOException if an I/O error occurs
   */
  public synchronized void completeAllLogs() throws IOException {
    LOG.info("Marking all logs as complete.");
    // Loop over all complete logs starting from the beginning, to determine the next log number.
    mNextCompleteLogNumber = Journal.FIRST_COMPLETED_LOG_NUMBER;
    String logFilename = mJournal.getCompletedLogFilePath(mNextCompleteLogNumber);
    while (mUfs.exists(logFilename)) {
      mNextCompleteLogNumber++;
      // generate the next completed log filename in the sequence.
      logFilename = mJournal.getCompletedLogFilePath(mNextCompleteLogNumber);
    }
    completeCurrentLog();
  }

  /**
   * Returns an output stream for the journal checkpoint. The returned output stream is a singleton
   * for this writer.
   *
   * @param latestSequenceNumber the sequence number of the latest journal entry. This sequence
   *        number will be used to determine the next sequence numbers for the subsequent journal
   *        entries.
   * @return the output stream for the journal checkpoint
   * @throws IOException if an I/O error occurs
   */
  public synchronized JournalOutputStream getCheckpointOutputStream(long latestSequenceNumber)
      throws IOException {
    if (mCheckpointOutputStream == null) {
      LOG.info("Creating tmp checkpoint file: {}", mTempCheckpointPath);
      if (!mUfs.exists(mJournalDirectory)) {
        LOG.info("Creating journal folder: {}", mJournalDirectory);
        mUfs.mkdirs(mJournalDirectory, true);
      }
      mNextEntrySequenceNumber = latestSequenceNumber + 1;
      LOG.info("Latest journal sequence number: {} Next journal sequence number: {}",
          latestSequenceNumber, mNextEntrySequenceNumber);
      mCheckpointOutputStream =
          new CheckpointOutputStream(new DataOutputStream(mUfs.create(mTempCheckpointPath)));
    }
    return mCheckpointOutputStream;
  }

  /**
   * Returns an output stream for the journal entries. The returned output stream is a singleton for
   * this writer.
   *
   * @return the output stream for the journal entries
   * @throws IOException if an I/O error occurs
   */
  public synchronized JournalOutputStream getEntryOutputStream() throws IOException {
    if (mCheckpointOutputStream == null || !mCheckpointOutputStream.isClosed()) {
      throw new IOException("The checkpoint must be written and closed before writing entries.");
    }
    if (mEntryOutputStream == null) {
      mEntryOutputStream = new EntryOutputStream(openCurrentLog());
    }
    return mEntryOutputStream;
  }

  /**
   * Closes the journal.
   *
   * @throws IOException if an I/O error occurs
   */
  public synchronized void close() throws IOException {
    if (mCheckpointOutputStream != null) {
      mCheckpointOutputStream.close();
    }
    if (mEntryOutputStream != null) {
      mEntryOutputStream.close();
    }
    // Close the ufs.
    mUfs.close();
  }

  /**
   * Returns the current log file output stream.
   *
   * @return the output stream for the current log file
   * @throws IOException if an I/O error occurs
   */
  private synchronized OutputStream openCurrentLog() throws IOException {
    String currentLogFile = mJournal.getCurrentLogFilePath();
    OutputStream os = mUfs.create(currentLogFile);
    LOG.info("Opened current log file: {}", currentLogFile);
    return os;
  }

  /**
   * Deletes all of the logs in the completed folder.
   *
   * @throws IOException if an I/O error occurs
   */
  private synchronized void deleteCompletedLogs() throws IOException {
    LOG.info("Deleting all completed log files...");
    // Loop over all complete logs starting from the beginning.
    // TODO(gpang): should the deletes start from the end?
    long logNumber = Journal.FIRST_COMPLETED_LOG_NUMBER;
    String logFilename = mJournal.getCompletedLogFilePath(logNumber);
    while (mUfs.exists(logFilename)) {
      LOG.info("Deleting completed log: {}", logFilename);
      mUfs.delete(logFilename, true);
      logNumber++;
      // generate the next completed log filename in the sequence.
      logFilename = mJournal.getCompletedLogFilePath(logNumber);
    }
    LOG.info("Finished deleting all completed log files.");

    // All complete logs are deleted. Reset the log number counter.
    mNextCompleteLogNumber = Journal.FIRST_COMPLETED_LOG_NUMBER;
  }

  /**
   * Moves the current log file to the completed folder, marking it as complete. If successful, the
   * current log file will no longer exist. The current log must already be closed before this call.
   *
   * @throws IOException if an I/O error occurs
   */
  private synchronized void completeCurrentLog() throws IOException {
    String currentLog = mJournal.getCurrentLogFilePath();
    if (!mUfs.exists(currentLog)) {
      // All logs are already complete, so nothing to do.
      return;
    }

    if (!mUfs.exists(mCompletedDirectory)) {
      mUfs.mkdirs(mCompletedDirectory, true);
    }

    String completedLog = mJournal.getCompletedLogFilePath(mNextCompleteLogNumber);
    mUfs.rename(currentLog, completedLog);
    LOG.info("Completed current log: {} to completed log: {}", currentLog, completedLog);

    mNextCompleteLogNumber++;
  }

  /**
   * This is the output stream for the journal checkpoint file. When this stream is closed, it will
   * delete the completed logs, and then mark the current log as complete.
   */
  private class CheckpointOutputStream implements JournalOutputStream {
    private final DataOutputStream mOutputStream;
    private boolean mIsClosed = false;

    CheckpointOutputStream(DataOutputStream outputStream) {
      mOutputStream = outputStream;
    }

    boolean isClosed() {
      return mIsClosed;
    }

    /**
     * Writes an entry to the checkpoint file.
     *
     * The entry should not have its sequence number set. This method will add the proper sequence
     * number to the passed in entry.
     *
     * @param entry an entry to write to the journal checkpoint file
     * @throws IOException if an I/O error occurs
     */
    @Override
    public synchronized void writeEntry(JournalEntry entry) throws IOException {
      if (mIsClosed) {
        throw new IOException(ExceptionMessage.JOURNAL_WRITE_AFTER_CLOSE.getMessage());
      }
      mJournal.getJournalFormatter().serialize(
          entry.toBuilder().setSequenceNumber(mNextEntrySequenceNumber++).build(), mOutputStream);
    }

    /**
     * Closes the checkpoint file. The entries in the checkpoint should already reflect all the
     * state of the complete log files (but not the last, current log file).
     *
     * Closing the checkpoint file will delete all the completed log files, since the checkpoint
     * already reflects that state.
     *
     * The current log file (if it exists) will be closed and marked as complete.
     *
     * @throws IOException if an I/O error occurs
     */
    @Override
    public synchronized void close() throws IOException {
      if (mIsClosed) {
        return;
      }
      mOutputStream.flush();
      mOutputStream.close();

      LOG.info("Successfully created tmp checkpoint file: {}", mTempCheckpointPath);
      mUfs.delete(mJournal.getCheckpointFilePath(), false);
      // TODO(gpang): the real checkpoint should not be overwritten here, but after all operations.
      mUfs.rename(mTempCheckpointPath, mJournal.getCheckpointFilePath());
      mUfs.delete(mTempCheckpointPath, false);
      LOG.info("Renamed checkpoint file {} to {}", mTempCheckpointPath,
          mJournal.getCheckpointFilePath());

      // The checkpoint already reflects the information in the completed logs.
      deleteCompletedLogs();

      // Consider the current log to be complete.
      completeCurrentLog();

      mIsClosed = true;
    }

    @Override
    public synchronized void flush() throws IOException {
      if (mIsClosed) {
        return;
      }
      mOutputStream.flush();
    }
  }

  /**
   * This is the output stream for the journal entries after the checkpoint. This output stream
   * handles rotating full log files, and creating the next log file.
   */
  @ThreadSafe
  private class EntryOutputStream implements JournalOutputStream {
    /** The direct output stream created by {@link UnderFileSystem}. */
    private OutputStream mRawOutputStream;
    /** The output stream that wraps around {@link #mRawOutputStream}. */
    private DataOutputStream mDataOutputStream;
    private boolean mIsClosed = false;

    EntryOutputStream(OutputStream outputStream) {
      mRawOutputStream = outputStream;
      mDataOutputStream = new DataOutputStream(outputStream);
    }

    /**
     * The given entry should not have its sequence number set. This method will add the proper
     * sequence number to the passed in entry.
     *
     * @param entry an entry to write to the journal checkpoint file
     * @throws IOException if an I/O error occurs
     */
    @Override
    public synchronized void writeEntry(JournalEntry entry) throws IOException {
      if (mIsClosed) {
        throw new IOException(ExceptionMessage.JOURNAL_WRITE_AFTER_CLOSE.getMessage());
      }
      mJournal.getJournalFormatter().serialize(
          entry.toBuilder().setSequenceNumber(mNextEntrySequenceNumber++).build(),
          mDataOutputStream);
    }

    @Override
    public synchronized void close() throws IOException {
      if (mIsClosed) {
        return;
      }
      if (mDataOutputStream != null) {
        // Close the current log file.
        mDataOutputStream.close();
      }

      mIsClosed = true;
    }

    @Override
    public synchronized void flush() throws IOException {
      if (mIsClosed || mDataOutputStream.size() == 0) {
        // There is nothing to flush.
        return;
      }
      mDataOutputStream.flush();
      if (mRawOutputStream instanceof FSDataOutputStream) {
        // The output stream directly created by {@link UnderFileSystem} may be
        // {@link FSDataOutputStream}, which means the under filesystem is HDFS, but
        // {@link DataOutputStream#flush} won't flush the data to HDFS, so we need to call
        // {@link FSDataOutputStream#sync} to actually flush data to HDFS.
        ((FSDataOutputStream) mRawOutputStream).sync();
      }
      boolean overSize = mDataOutputStream.size() >= mMaxLogSize;
      if (overSize || mUfs.getUnderFSType() == UnderFileSystem.UnderFSType.S3
          || mUfs.getUnderFSType() == UnderFileSystem.UnderFSType.OSS) {
        // (1) The log file is oversize, needs to be rotated. Or
        // (2) Underfs is S3 or OSS, flush on S3OutputStream/OSSOutputStream will only flush to
        // local temporary file,
        // call close and complete the log to sync the journal entry to S3/OSS.
        if (overSize) {
          LOG.info("Rotating log file. size: {} maxSize: {}", mDataOutputStream.size(),
              mMaxLogSize);
        }
        // rotate the current log.
        mDataOutputStream.close();
        completeCurrentLog();
        mRawOutputStream = openCurrentLog();
        mDataOutputStream = new DataOutputStream(mRawOutputStream);
      }
    }
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy