All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hdfs.server.namenode.FSEditLog Maven / Gradle / Ivy

The newest version!
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hdfs.server.namenode;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.fs.Options;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.hdfs.DeprecatedUTF8;
import org.apache.hadoop.hdfs.protocol.Block;
import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier;
import org.apache.hadoop.hdfs.server.common.Storage;
import org.apache.hadoop.hdfs.server.common.HdfsConstants.NamenodeRole;
import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
import static org.apache.hadoop.hdfs.server.common.Util.now;
import org.apache.hadoop.hdfs.server.namenode.FSImage.NameNodeDirType;
import org.apache.hadoop.hdfs.server.namenode.FSImage.NameNodeFile;
import org.apache.hadoop.hdfs.server.namenode.JournalStream.JournalType;
import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeMetrics;
import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol;
import org.apache.hadoop.hdfs.server.protocol.NamenodeRegistration;
import org.apache.hadoop.io.ArrayWritable;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.security.token.delegation.DelegationKey;

/**
 * FSEditLog maintains a log of the namespace modifications.
 * 
 */
@InterfaceAudience.Private
@InterfaceStability.Evolving
public class FSEditLog {
  
  abstract static class Ops {
    public  static final byte OP_INVALID = -1;
    public static final byte OP_ADD = 0;
    public static final byte OP_RENAME_OLD = 1;  // rename
    public static final byte OP_DELETE = 2;  // delete
    public static final byte OP_MKDIR = 3;   // create directory
    public static final byte OP_SET_REPLICATION = 4; // set replication
    //the following two are used only for backward compatibility :
    @Deprecated public static final byte OP_DATANODE_ADD = 5;
    @Deprecated public static final byte OP_DATANODE_REMOVE = 6;
    public static final byte OP_SET_PERMISSIONS = 7;
    public static final byte OP_SET_OWNER = 8;
    public static final byte OP_CLOSE = 9;    // close after write
    public static final byte OP_SET_GENSTAMP = 10;    // store genstamp
    /* The following two are not used any more. Should be removed once
     * LAST_UPGRADABLE_LAYOUT_VERSION is -17 or newer. */
    public static final byte OP_SET_NS_QUOTA = 11; // set namespace quota
    public static final byte OP_CLEAR_NS_QUOTA = 12; // clear namespace quota
    public static final byte OP_TIMES = 13; // sets mod & access time on a file
    public static final byte OP_SET_QUOTA = 14; // sets name and disk quotas.
    public static final byte OP_RENAME = 15;  // new rename
    public static final byte OP_CONCAT_DELETE = 16; // concat files.
    public static final byte OP_SYMLINK = 17; // a symbolic link
    public static final byte OP_GET_DELEGATION_TOKEN = 18; //new delegation token
    public static final byte OP_RENEW_DELEGATION_TOKEN = 19; //renew delegation token
    public static final byte OP_CANCEL_DELEGATION_TOKEN = 20; //cancel delegation token
    public static final byte OP_UPDATE_MASTER_KEY = 21; //update master key
  
    /* 
     * The following operations are used to control remote edit log streams,
     * and not logged into file streams.
     */
    static final byte OP_JSPOOL_START = // start journal spool
                                      NamenodeProtocol.JA_JSPOOL_START;
    static final byte OP_CHECKPOINT_TIME = // incr checkpoint time
                                      NamenodeProtocol.JA_CHECKPOINT_TIME;
  }

  static final String NO_JOURNAL_STREAMS_WARNING = "!!! WARNING !!!" +
      " File system changes are not persistent. No journal streams.";

  private static final Log LOG = LogFactory.getLog(FSEditLog.class);

  private volatile int sizeOutputFlushBuffer = 512*1024;

  private ArrayList editStreams = null;
  private FSImage fsimage = null;

  // a monotonically increasing counter that represents transactionIds.
  private long txid = 0;

  // stores the last synced transactionId.
  private long synctxid = 0;

  // the time of printing the statistics to the log file.
  private long lastPrintTime;

  // is a sync currently running?
  private volatile boolean isSyncRunning;

  // is an automatic sync scheduled?
  private volatile boolean isAutoSyncScheduled = false;

  // these are statistics counters.
  private long numTransactions;        // number of transactions
  private long numTransactionsBatchedInSync;
  private long totalTimeTransactions;  // total time for all transactions
  private NameNodeMetrics metrics;

  private static class TransactionId {
    public long txid;

    TransactionId(long value) {
      this.txid = value;
    }
  }

  // stores the most current transactionId of this thread.
  private static final ThreadLocal myTransactionId = new ThreadLocal() {
    protected synchronized TransactionId initialValue() {
      return new TransactionId(Long.MAX_VALUE);
    }
  };

  FSEditLog(FSImage image) {
    fsimage = image;
    isSyncRunning = false;
    metrics = NameNode.getNameNodeMetrics();
    lastPrintTime = now();
  }
  
  private File getEditFile(StorageDirectory sd) {
    return fsimage.getEditFile(sd);
  }
  
  private File getEditNewFile(StorageDirectory sd) {
    return fsimage.getEditNewFile(sd);
  }
  
  private int getNumEditsDirs() {
   return fsimage.getNumStorageDirs(NameNodeDirType.EDITS);
  }

  synchronized int getNumEditStreams() {
    return editStreams == null ? 0 : editStreams.size();
  }

  /**
   * Return the currently active edit streams.
   * This should be used only by unit tests.
   */
  ArrayList getEditStreams() {
    return editStreams;
  }

  boolean isOpen() {
    return getNumEditStreams() > 0;
  }

  /**
   * Create empty edit log files.
   * Initialize the output stream for logging.
   * 
   * @throws IOException
   */
  synchronized void open() throws IOException {
    numTransactions = totalTimeTransactions = numTransactionsBatchedInSync = 0;
    if (editStreams == null)
      editStreams = new ArrayList();
    
    ArrayList al = null;
    for (Iterator it = 
           fsimage.dirIterator(NameNodeDirType.EDITS); it.hasNext();) {
      StorageDirectory sd = it.next();
      File eFile = getEditFile(sd);
      try {
        addNewEditLogStream(eFile);
      } catch (IOException e) {
        LOG.warn("Unable to open edit log file " + eFile);
        // Remove the directory from list of storage directories
        if(al == null) al = new ArrayList(1);
        al.add(sd);
      }
    }
    
    if(al != null) fsimage.processIOError(al, false);
    
    // If there was an error in every storage dir, each one will have
    // been removed from the list of storage directories.
    if (fsimage.getNumStorageDirs(NameNodeDirType.EDITS) == 0) {
      throw new IOException(
          "Failed to initialize edits log in any storage directory.");
    }
  }
  
  
  synchronized void addNewEditLogStream(File eFile) throws IOException {
    EditLogOutputStream eStream = new EditLogFileOutputStream(eFile,
        sizeOutputFlushBuffer);
    editStreams.add(eStream);
  }

  synchronized void createEditLogFile(File name) throws IOException {
    waitForSyncToFinish();

    EditLogOutputStream eStream = new EditLogFileOutputStream(name,
        sizeOutputFlushBuffer);
    eStream.create();
    eStream.close();
  }

  /**
   * Shutdown the file store.
   */
  synchronized void close() {
    waitForSyncToFinish();
    if (editStreams == null || editStreams.isEmpty()) {
      return;
    }
    printStatistics(true);
    numTransactions = totalTimeTransactions = numTransactionsBatchedInSync = 0;

    ArrayList errorStreams = null;
    Iterator it = getOutputStreamIterator(null);
    while(it.hasNext()) {
      EditLogOutputStream eStream = it.next();
      try {
        closeStream(eStream);
      } catch (IOException e) {
        LOG.warn("FSEditLog:close - failed to close stream " 
            + eStream.getName());
        if(errorStreams == null)
          errorStreams = new ArrayList(1);
        errorStreams.add(eStream);
      }
    }
    processIOError(errorStreams, true);
    editStreams.clear();
  }

  /**
   * Close and remove edit log stream.
   * @param index of the stream
   */
  synchronized private void removeStream(int index) {
    EditLogOutputStream eStream = editStreams.get(index);
    try {
      eStream.close();
    } catch (Exception e) {}
    editStreams.remove(index);
  }

  /**
   * The specified streams have IO errors. Close and remove them.
   * If propagate is true - close related StorageDirectories.
   * (is called with propagate value true from everywhere
   *  except fsimage.processIOError)
   */
  synchronized void processIOError(
      List errorStreams,
      boolean propagate) {
    
    if (errorStreams == null || errorStreams.size() == 0) {
      return;                       // nothing to do
    }

    String lsd = fsimage.listStorageDirectories();
    LOG.info("current list of storage dirs:" + lsd);

    ArrayList al = null;
    for (EditLogOutputStream eStream : errorStreams) {
      LOG.error("Unable to log edits to " + eStream.getName()
          + "; removing it");     

      StorageDirectory storageDir;
      if(propagate && eStream.getType() == JournalType.FILE && //find SD
          (storageDir = getStorage(eStream)) != null) {
        LOG.info("about to remove corresponding storage:" 
            + storageDir.getRoot().getAbsolutePath());
        // remove corresponding storage dir
        if(al == null) al = new ArrayList(1);
        al.add(storageDir);
      }
      Iterator ies = editStreams.iterator();
      while (ies.hasNext()) {
        EditLogOutputStream es = ies.next();
        if (es == eStream) {  
          try { eStream.close(); } catch (IOException e) {
            // nothing to do.
            LOG.warn("Failed to close eStream " + eStream.getName()
                + " before removing it (might be ok)");
          }
          ies.remove();
          break;
        }
      } 
    }
    
    if (editStreams == null || editStreams.size() <= 0) {
      String msg = "Fatal Error: All storage directories are inaccessible.";
      LOG.fatal(msg, new IOException(msg)); 
      Runtime.getRuntime().exit(-1);
    }

    // removed failed SDs
    if(propagate && al != null) fsimage.processIOError(al, false);
    
    //for the rest of the streams
    if(propagate) incrementCheckpointTime();
    
    lsd = fsimage.listStorageDirectories();
    LOG.info("at the end current list of storage dirs:" + lsd);
  }


  /**
   * get an editStream corresponding to a sd
   * @param es - stream to remove
   * @return the matching stream
   */
  StorageDirectory getStorage(EditLogOutputStream es) {
    String parentStorageDir = ((EditLogFileOutputStream)es).getFile()
    .getParentFile().getParentFile().getAbsolutePath();

    Iterator it = fsimage.dirIterator(); 
    while (it.hasNext()) {
      StorageDirectory sd = it.next();
      LOG.info("comparing: " + parentStorageDir + " and " + sd.getRoot().getAbsolutePath()); 
      if (parentStorageDir.equals(sd.getRoot().getAbsolutePath()))
        return sd;
    }
    return null;
  }
  
  /**
   * get an editStream corresponding to a sd
   * @param sd
   * @return the matching stream
   */
  synchronized EditLogOutputStream getEditsStream(StorageDirectory sd) {
    for (EditLogOutputStream es : editStreams) {
      File parentStorageDir = ((EditLogFileOutputStream)es).getFile()
        .getParentFile().getParentFile();
      if (parentStorageDir.getName().equals(sd.getRoot().getName()))
        return es;
    }
    return null;
  }

  /**
   * check if edits.new log exists in the specified stoorage directory
   */
  boolean existsNew(StorageDirectory sd) {
    return getEditNewFile(sd).exists(); 
  }

  /**
   * Write an operation to the edit log. Do not sync to persistent
   * store yet.
   */
  void logEdit(byte op, Writable ... writables) {
    synchronized (this) {
      // wait if an automatic sync is scheduled
      waitIfAutoSyncScheduled();
      
      if(getNumEditStreams() == 0)
        throw new java.lang.IllegalStateException(NO_JOURNAL_STREAMS_WARNING);
      ArrayList errorStreams = null;
      long start = now();
      for(EditLogOutputStream eStream : editStreams) {
        if(!eStream.isOperationSupported(op))
          continue;
        try {
          eStream.write(op, writables);
        } catch (IOException ie) {
          LOG.error("logEdit: removing "+ eStream.getName(), ie);
          if(errorStreams == null)
            errorStreams = new ArrayList(1);
          errorStreams.add(eStream);
        }
      }
      processIOError(errorStreams, true);
      recordTransaction(start);
      
      // check if it is time to schedule an automatic sync
      if (!shouldForceSync()) {
        return;
      }
      isAutoSyncScheduled = true;
    }
    
    // sync buffered edit log entries to persistent store
    logSync();
  }

  /**
   * Wait if an automatic sync is scheduled
   * @throws InterruptedException
   */
  synchronized void waitIfAutoSyncScheduled() {
    try {
      while (isAutoSyncScheduled) {
        this.wait(1000);
      }
    } catch (InterruptedException e) {
    }
  }
  
  /**
   * Signal that an automatic sync scheduling is done if it is scheduled
   */
  synchronized void doneWithAutoSyncScheduling() {
    if (isAutoSyncScheduled) {
      isAutoSyncScheduled = false;
      notifyAll();
    }
  }
  
  /**
   * Check if should automatically sync buffered edits to 
   * persistent store
   * 
   * @return true if any of the edit stream says that it should sync
   */
  private boolean shouldForceSync() {
    for (EditLogOutputStream eStream : editStreams) {
      if (eStream.shouldForceSync()) {
        return true;
      }
    }
    return false;
  }
  
  private void recordTransaction(long start) {
    // get a new transactionId
    txid++;

    //
    // record the transactionId when new data was written to the edits log
    //
    TransactionId id = myTransactionId.get();
    id.txid = txid;

    // update statistics
    long end = now();
    numTransactions++;
    totalTimeTransactions += (end-start);
    if (metrics != null) // Metrics is non-null only when used inside name node
      metrics.transactions.inc((end-start));
  }

  /**
   * Blocks until all ongoing edits have been synced to disk.
   * This differs from logSync in that it waits for edits that have been
   * written by other threads, not just edits from the calling thread.
   *
   * NOTE: this should be done while holding the FSNamesystem lock, or
   * else more operations can start writing while this is in progress.
   */
  void logSyncAll() throws IOException {
    // Record the most recent transaction ID as our own id
    synchronized (this) {
      TransactionId id = myTransactionId.get();
      id.txid = txid;
    }
    // Then make sure we're synced up to this point
    logSync();
  }
  
  /**
   * Sync all modifications done by this thread.
   *
   * The internal concurrency design of this class is as follows:
   *   - Log items are written synchronized into an in-memory buffer,
   *     and each assigned a transaction ID.
   *   - When a thread (client) would like to sync all of its edits, logSync()
   *     uses a ThreadLocal transaction ID to determine what edit number must
   *     be synced to.
   *   - The isSyncRunning volatile boolean tracks whether a sync is currently
   *     under progress.
   *
   * The data is double-buffered within each edit log implementation so that
   * in-memory writing can occur in parallel with the on-disk writing.
   *
   * Each sync occurs in three steps:
   *   1. synchronized, it swaps the double buffer and sets the isSyncRunning
   *      flag.
   *   2. unsynchronized, it flushes the data to storage
   *   3. synchronized, it resets the flag and notifies anyone waiting on the
   *      sync.
   *
   * The lack of synchronization on step 2 allows other threads to continue
   * to write into the memory buffer while the sync is in progress.
   * Because this step is unsynchronized, actions that need to avoid
   * concurrency with sync() should be synchronized and also call
   * waitForSyncToFinish() before assuming they are running alone.
   */
  public void logSync() {
    ArrayList errorStreams = null;
    long syncStart = 0;

    // Fetch the transactionId of this thread. 
    long mytxid = myTransactionId.get().txid;
    ArrayList streams = new ArrayList();
    boolean sync = false;
    try {
      synchronized (this) {
        try {
        printStatistics(false);
  
        // if somebody is already syncing, then wait
        while (mytxid > synctxid && isSyncRunning) {
          try {
            wait(1000);
          } catch (InterruptedException ie) { 
          }
        }
  
        //
        // If this transaction was already flushed, then nothing to do
        //
        if (mytxid <= synctxid) {
          numTransactionsBatchedInSync++;
          if (metrics != null) // Metrics is non-null only when used inside name node
            metrics.transactionsBatchedInSync.inc();
          return;
        }
     
        // now, this thread will do the sync
        syncStart = txid;
        isSyncRunning = true;
        sync = true;
  
        // swap buffers
        assert editStreams.size() > 0 : "no editlog streams";
        for(EditLogOutputStream eStream : editStreams) {
          try {
            eStream.setReadyToFlush();
            streams.add(eStream);
          } catch (IOException ie) {
            LOG.error("Unable to get ready to flush.", ie);
            //
            // remember the streams that encountered an error.
            //
            if (errorStreams == null) {
              errorStreams = new ArrayList(1);
            }
            errorStreams.add(eStream);
          }
        }
        } finally {
          // Prevent RuntimeException from blocking other log edit write 
          doneWithAutoSyncScheduling();
        }
      }
  
      // do the sync
      long start = now();
      for (EditLogOutputStream eStream : streams) {
        try {
          eStream.flush();
        } catch (IOException ie) {
          LOG.error("Unable to sync edit log.", ie);
          //
          // remember the streams that encountered an error.
          //
          if (errorStreams == null) {
            errorStreams = new ArrayList(1);
          }
          errorStreams.add(eStream);
        }
      }
      long elapsed = now() - start;
      processIOError(errorStreams, true);
  
      if (metrics != null) // Metrics non-null only when used inside name node
        metrics.syncs.inc(elapsed);
    } finally {
      // Prevent RuntimeException from blocking other log edit sync 
      synchronized (this) {
        if (sync) {
          synctxid = syncStart;
          isSyncRunning = false;
        }
        this.notifyAll();
     }
    }
  }

  //
  // print statistics every 1 minute.
  //
  private void printStatistics(boolean force) {
    long now = now();
    if (lastPrintTime + 60000 > now && !force) {
      return;
    }
    if (editStreams == null || editStreams.size()==0) {
      return;
    }
    lastPrintTime = now;
    StringBuilder buf = new StringBuilder();
    buf.append("Number of transactions: ");
    buf.append(numTransactions);
    buf.append(" Total time for transactions(ms): ");
    buf.append(totalTimeTransactions);
    buf.append("Number of transactions batched in Syncs: ");
    buf.append(numTransactionsBatchedInSync);
    buf.append(" Number of syncs: ");
    buf.append(editStreams.get(0).getNumSync());
    buf.append(" SyncTimes(ms): ");

    int numEditStreams = editStreams.size();
    for (int idx = 0; idx < numEditStreams; idx++) {
      EditLogOutputStream eStream = editStreams.get(idx);
      buf.append(eStream.getTotalSyncTime());
      buf.append(" ");
    }
    LOG.info(buf);
  }

  /** 
   * Add open lease record to edit log. 
   * Records the block locations of the last block.
   */
  public void logOpenFile(String path, INodeFileUnderConstruction newNode) {

    DeprecatedUTF8 nameReplicationPair[] = new DeprecatedUTF8[] { 
      new DeprecatedUTF8(path), 
      FSEditLog.toLogReplication(newNode.getReplication()),
      FSEditLog.toLogLong(newNode.getModificationTime()),
      FSEditLog.toLogLong(newNode.getAccessTime()),
      FSEditLog.toLogLong(newNode.getPreferredBlockSize())};
    logEdit(Ops.OP_ADD,
            new ArrayWritable(DeprecatedUTF8.class, nameReplicationPair), 
            new ArrayWritable(Block.class, newNode.getBlocks()),
            newNode.getPermissionStatus(),
            new DeprecatedUTF8(newNode.getClientName()),
            new DeprecatedUTF8(newNode.getClientMachine()));
  }

  /** 
   * Add close lease record to edit log.
   */
  public void logCloseFile(String path, INodeFile newNode) {
    DeprecatedUTF8 nameReplicationPair[] = new DeprecatedUTF8[] {
      new DeprecatedUTF8(path),
      FSEditLog.toLogReplication(newNode.getReplication()),
      FSEditLog.toLogLong(newNode.getModificationTime()),
      FSEditLog.toLogLong(newNode.getAccessTime()),
      FSEditLog.toLogLong(newNode.getPreferredBlockSize())};
    logEdit(Ops.OP_CLOSE,
            new ArrayWritable(DeprecatedUTF8.class, nameReplicationPair),
            new ArrayWritable(Block.class, newNode.getBlocks()),
            newNode.getPermissionStatus());
  }
  
  /** 
   * Add create directory record to edit log
   */
  public void logMkDir(String path, INode newNode) {
    DeprecatedUTF8 info[] = new DeprecatedUTF8[] {
      new DeprecatedUTF8(path),
      FSEditLog.toLogLong(newNode.getModificationTime()),
      FSEditLog.toLogLong(newNode.getAccessTime())
    };
    logEdit(Ops.OP_MKDIR, new ArrayWritable(DeprecatedUTF8.class, info),
        newNode.getPermissionStatus());
  }
  
  /** 
   * Add rename record to edit log
   * TODO: use String parameters until just before writing to disk
   */
  void logRename(String src, String dst, long timestamp) {
    DeprecatedUTF8 info[] = new DeprecatedUTF8[] { 
      new DeprecatedUTF8(src),
      new DeprecatedUTF8(dst),
      FSEditLog.toLogLong(timestamp)};
    logEdit(Ops.OP_RENAME_OLD, new ArrayWritable(DeprecatedUTF8.class, info));
  }
  
  /** 
   * Add rename record to edit log
   */
  void logRename(String src, String dst, long timestamp, Options.Rename... options) {
    DeprecatedUTF8 info[] = new DeprecatedUTF8[] { 
      new DeprecatedUTF8(src),
      new DeprecatedUTF8(dst),
      FSEditLog.toLogLong(timestamp)};
    logEdit(Ops.OP_RENAME, new ArrayWritable(DeprecatedUTF8.class, info),
        toBytesWritable(options));
  }
  
  /** 
   * Add set replication record to edit log
   */
  void logSetReplication(String src, short replication) {
    logEdit(Ops.OP_SET_REPLICATION, 
            new DeprecatedUTF8(src), 
            FSEditLog.toLogReplication(replication));
  }
  
  /** Add set namespace quota record to edit log
   * 
   * @param src the string representation of the path to a directory
   * @param quota the directory size limit
   */
  void logSetQuota(String src, long nsQuota, long dsQuota) {
    logEdit(Ops.OP_SET_QUOTA, new DeprecatedUTF8(src), 
            new LongWritable(nsQuota), new LongWritable(dsQuota));
  }

  /**  Add set permissions record to edit log */
  void logSetPermissions(String src, FsPermission permissions) {
    logEdit(Ops.OP_SET_PERMISSIONS, new DeprecatedUTF8(src), permissions);
  }

  /**  Add set owner record to edit log */
  void logSetOwner(String src, String username, String groupname) {
    DeprecatedUTF8 u = new DeprecatedUTF8(username == null? "": username);
    DeprecatedUTF8 g = new DeprecatedUTF8(groupname == null? "": groupname);
    logEdit(Ops.OP_SET_OWNER, new DeprecatedUTF8(src), u, g);
  }
  
  /**
   * concat(trg,src..) log
   */
  void logConcat(String trg, String [] srcs, long timestamp) {
    int size = 1 + srcs.length + 1; // trg, srcs, timestamp
    DeprecatedUTF8 info[] = new DeprecatedUTF8[size];
    int idx = 0;
    info[idx++] = new DeprecatedUTF8(trg);
    for(int i=0; i al = null;
    for (int idx = 0; idx < getNumEditStreams(); idx++) {
      EditLogOutputStream es = editStreams.get(idx);
      try {
        long curSize = es.length();
        assert (size == 0 || size == curSize || curSize ==0) :
          "Wrong streams size";
        size = Math.max(size, curSize);
      } catch (IOException e) {
        LOG.error("getEditLogSize: editstream.length failed. removing editlog (" +
            idx + ") " + es.getName());
        if(al==null) al = new ArrayList(1);
        al.add(es);
      }
    }
    if(al!=null) processIOError(al, true);
    return size;
  }
  
  /**
   * Closes the current edit log and opens edits.new. 
   */
  synchronized void rollEditLog() throws IOException {
    waitForSyncToFinish();
    Iterator it = fsimage.dirIterator(NameNodeDirType.EDITS);
    if(!it.hasNext()) 
      return;
    //
    // If edits.new already exists in some directory, verify it
    // exists in all directories.
    //
    boolean alreadyExists = existsNew(it.next());
    while(it.hasNext()) {
      StorageDirectory sd = it.next();
      if(alreadyExists != existsNew(sd))
        throw new IOException(getEditNewFile(sd) 
              + "should " + (alreadyExists ? "" : "not ") + "exist.");
    }
    if(alreadyExists)
      return; // nothing to do, edits.new exists!

    // check if any of failed storage is now available and put it back
    fsimage.attemptRestoreRemovedStorage();

    divertFileStreams(
        Storage.STORAGE_DIR_CURRENT + "/" + NameNodeFile.EDITS_NEW.getName());
  }

  /**
   * Divert file streams from file edits to file edits.new.

* Close file streams, which are currently writing into edits files. * Create new streams based on file getRoot()/dest. * @param dest new stream path relative to the storage directory root. * @throws IOException */ synchronized void divertFileStreams(String dest) throws IOException { waitForSyncToFinish(); assert getNumEditStreams() >= getNumEditsDirs() : "Inconsistent number of streams"; ArrayList errorStreams = null; EditStreamIterator itE = (EditStreamIterator)getOutputStreamIterator(JournalType.FILE); Iterator itD = fsimage.dirIterator(NameNodeDirType.EDITS); while(itE.hasNext() && itD.hasNext()) { EditLogOutputStream eStream = itE.next(); StorageDirectory sd = itD.next(); if(!eStream.getName().startsWith(sd.getRoot().getPath())) throw new IOException("Inconsistent order of edit streams: " + eStream); try { // close old stream closeStream(eStream); // create new stream eStream = new EditLogFileOutputStream(new File(sd.getRoot(), dest), sizeOutputFlushBuffer); eStream.create(); // replace by the new stream itE.replace(eStream); } catch (IOException e) { LOG.warn("Error in editStream " + eStream.getName(), e); if(errorStreams == null) errorStreams = new ArrayList(1); errorStreams.add(eStream); } } processIOError(errorStreams, true); } /** * Removes the old edit log and renames edits.new to edits. * Reopens the edits file. */ synchronized void purgeEditLog() throws IOException { waitForSyncToFinish(); revertFileStreams( Storage.STORAGE_DIR_CURRENT + "/" + NameNodeFile.EDITS_NEW.getName()); } /** * The actual sync activity happens while not synchronized on this object. * Thus, synchronized activities that require that they are not concurrent * with file operations should wait for any running sync to finish. */ synchronized void waitForSyncToFinish() { while (isSyncRunning) { try { wait(1000); } catch (InterruptedException ie) {} } } /** * Revert file streams from file edits.new back to file edits.

* Close file streams, which are currently writing into getRoot()/source. * Rename getRoot()/source to edits. * Reopen streams so that they start writing into edits files. * @param dest new stream path relative to the storage directory root. * @throws IOException */ synchronized void revertFileStreams(String source) throws IOException { waitForSyncToFinish(); assert getNumEditStreams() >= getNumEditsDirs() : "Inconsistent number of streams"; ArrayList errorStreams = null; EditStreamIterator itE = (EditStreamIterator)getOutputStreamIterator(JournalType.FILE); Iterator itD = fsimage.dirIterator(NameNodeDirType.EDITS); while(itE.hasNext() && itD.hasNext()) { EditLogOutputStream eStream = itE.next(); StorageDirectory sd = itD.next(); if(!eStream.getName().startsWith(sd.getRoot().getPath())) throw new IOException("Inconsistent order of edit streams: " + eStream + " does not start with " + sd.getRoot().getPath()); try { // close old stream closeStream(eStream); // rename edits.new to edits File editFile = getEditFile(sd); File prevEditFile = new File(sd.getRoot(), source); if(prevEditFile.exists()) { if(!prevEditFile.renameTo(editFile)) { // // renameTo() fails on Windows if the destination // file exists. // if(!editFile.delete() || !prevEditFile.renameTo(editFile)) { throw new IOException("Rename failed for " + sd.getRoot()); } } } // open new stream eStream = new EditLogFileOutputStream(editFile, sizeOutputFlushBuffer); // replace by the new stream itE.replace(eStream); } catch (IOException e) { LOG.warn("Error in editStream " + eStream.getName(), e); if(errorStreams == null) errorStreams = new ArrayList(1); errorStreams.add(eStream); } } processIOError(errorStreams, true); } /** * Return the name of the edit file */ synchronized File getFsEditName() { StorageDirectory sd = null; for (Iterator it = fsimage.dirIterator(NameNodeDirType.EDITS); it.hasNext();) { sd = it.next(); if(sd.getRoot().canRead()) return getEditFile(sd); } return null; } /** * Returns the timestamp of the edit log */ synchronized long getFsEditTime() { Iterator it = fsimage.dirIterator(NameNodeDirType.EDITS); if(it.hasNext()) return getEditFile(it.next()).lastModified(); return 0; } /** * Return the txid of the last synced transaction. * For test use only */ synchronized long getSyncTxId() { return synctxid; } // sets the initial capacity of the flush buffer. public void setBufferCapacity(int size) { sizeOutputFlushBuffer = size; } boolean isEmpty() throws IOException { return getEditLogSize() <= 0; } /** * Create (or find if already exists) an edit output stream, which * streams journal records (edits) to the specified backup node.
* Send a record, prescribing to start journal spool.
* This should be sent via regular stream of journal records so that * the backup node new exactly after which record it should start spooling. * * @param bnReg the backup node registration information. * @param nnReg this (active) name-node registration. * @throws IOException */ synchronized void logJSpoolStart(NamenodeRegistration bnReg, // backup node NamenodeRegistration nnReg) // active name-node throws IOException { if(bnReg.isRole(NamenodeRole.CHECKPOINT)) return; // checkpoint node does not stream edits if(editStreams == null) editStreams = new ArrayList(); EditLogOutputStream boStream = null; for(EditLogOutputStream eStream : editStreams) { if(eStream.getName().equals(bnReg.getAddress())) { boStream = eStream; // already there break; } } if(boStream == null) { boStream = new EditLogBackupOutputStream(bnReg, nnReg); editStreams.add(boStream); } logEdit(Ops.OP_JSPOOL_START, (Writable[])null); } /** * Write an operation to the edit log. Do not sync to persistent * store yet. */ synchronized void logEdit(int length, byte[] data) { if(getNumEditStreams() == 0) throw new java.lang.IllegalStateException(NO_JOURNAL_STREAMS_WARNING); ArrayList errorStreams = null; long start = now(); for(EditLogOutputStream eStream : editStreams) { try { eStream.write(data, 0, length); } catch (IOException ie) { LOG.warn("Error in editStream " + eStream.getName(), ie); if(errorStreams == null) errorStreams = new ArrayList(1); errorStreams.add(eStream); } } processIOError(errorStreams, true); recordTransaction(start); } /** * Iterates output streams based of the same type. * Type null will iterate over all streams. */ private class EditStreamIterator implements Iterator { JournalType type; int prevIndex; // for remove() int nextIndex; // for next() EditStreamIterator(JournalType streamType) { this.type = streamType; this.nextIndex = 0; this.prevIndex = 0; } public boolean hasNext() { synchronized(FSEditLog.this) { if(editStreams == null || editStreams.isEmpty() || nextIndex >= editStreams.size()) return false; while(nextIndex < editStreams.size() && !editStreams.get(nextIndex).getType().isOfType(type)) nextIndex++; return nextIndex < editStreams.size(); } } public EditLogOutputStream next() { EditLogOutputStream stream = null; synchronized(FSEditLog.this) { stream = editStreams.get(nextIndex); prevIndex = nextIndex; nextIndex++; while(nextIndex < editStreams.size() && !editStreams.get(nextIndex).getType().isOfType(type)) nextIndex++; } return stream; } public void remove() { nextIndex = prevIndex; // restore previous state removeStream(prevIndex); // remove last returned element hasNext(); // reset nextIndex to correct place } void replace(EditLogOutputStream newStream) { synchronized (FSEditLog.this) { assert 0 <= prevIndex && prevIndex < editStreams.size() : "Index out of bound."; editStreams.set(prevIndex, newStream); } } } /** * Get stream iterator for the specified type. */ public Iterator getOutputStreamIterator(JournalType streamType) { return new EditStreamIterator(streamType); } private void closeStream(EditLogOutputStream eStream) throws IOException { eStream.setReadyToFlush(); eStream.flush(); eStream.close(); } void incrementCheckpointTime() { fsimage.incrementCheckpointTime(); Writable[] args = {new LongWritable(fsimage.getCheckpointTime())}; logEdit(Ops.OP_CHECKPOINT_TIME, args); } synchronized void releaseBackupStream(NamenodeRegistration registration) { Iterator it = getOutputStreamIterator(JournalType.BACKUP); ArrayList errorStreams = null; NamenodeRegistration backupNode = null; while(it.hasNext()) { EditLogBackupOutputStream eStream = (EditLogBackupOutputStream)it.next(); backupNode = eStream.getRegistration(); if(backupNode.getAddress().equals(registration.getAddress()) && backupNode.isRole(registration.getRole())) { errorStreams = new ArrayList(1); errorStreams.add(eStream); break; } } assert backupNode == null || backupNode.isRole(NamenodeRole.BACKUP) : "Not a backup node corresponds to a backup stream"; processIOError(errorStreams, true); } synchronized boolean checkBackupRegistration( NamenodeRegistration registration) { Iterator it = getOutputStreamIterator(JournalType.BACKUP); boolean regAllowed = !it.hasNext(); NamenodeRegistration backupNode = null; ArrayList errorStreams = null; while(it.hasNext()) { EditLogBackupOutputStream eStream = (EditLogBackupOutputStream)it.next(); backupNode = eStream.getRegistration(); if(backupNode.getAddress().equals(registration.getAddress()) && backupNode.isRole(registration.getRole())) { regAllowed = true; // same node re-registers break; } if(!eStream.isAlive()) { if(errorStreams == null) errorStreams = new ArrayList(1); errorStreams.add(eStream); regAllowed = true; // previous backup node failed } } assert backupNode == null || backupNode.isRole(NamenodeRole.BACKUP) : "Not a backup node corresponds to a backup stream"; processIOError(errorStreams, true); return regAllowed; } static BytesWritable toBytesWritable(Options.Rename... options) { byte[] bytes = new byte[options.length]; for (int i = 0; i < options.length; i++) { bytes[i] = options[i].value(); } return new BytesWritable(bytes); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy