 
                        
        
                        
        org.apache.hadoop.hbase.wal.WAL Maven / Gradle / Ivy
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hbase.wal;
import static org.apache.commons.lang3.StringUtils.isNumeric;
import java.io.Closeable;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.client.RegionInfo;
import org.apache.hadoop.hbase.regionserver.wal.CompressionContext;
import org.apache.hadoop.hbase.regionserver.wal.FailedLogCloseException;
import org.apache.hadoop.hbase.regionserver.wal.WALActionsListener;
import org.apache.hadoop.hbase.regionserver.wal.WALCoprocessorHost;
import org.apache.hadoop.hbase.replication.regionserver.WALFileLengthProvider;
import org.apache.yetus.audience.InterfaceAudience;
import org.apache.yetus.audience.InterfaceStability;
/**
 * A Write Ahead Log (WAL) provides service for reading, writing waledits. This interface provides
 * APIs for WAL users (such as RegionServer) to use the WAL (do append, sync, etc). Note that some
 * internals, such as log rolling and performance evaluation tools, will use WAL.equals to determine
 * if they have already seen a given WAL.
 */
@InterfaceAudience.Private
@InterfaceStability.Evolving
public interface WAL extends Closeable, WALFileLengthProvider {
  /**
   * Registers WALActionsListener
   */
  void registerWALActionsListener(final WALActionsListener listener);
  /**
   * Unregisters WALActionsListener
   */
  boolean unregisterWALActionsListener(final WALActionsListener listener);
  /**
   * Roll the log writer. That is, start writing log messages to a new file.
   * 
   * The implementation is synchronized in order to make sure there's one rollWriter running at any
   * given time.
   * @return If lots of logs, flush the stores of returned regions so next time through we can clean
   *         logs. Returns null if nothing to flush. Names are actual region names as returned by
   *         {@link RegionInfo#getEncodedName()}
   */
  Map> rollWriter() throws FailedLogCloseException, IOException;
  /**
   * Roll the log writer. That is, start writing log messages to a new file.
   * 
   * The implementation is synchronized in order to make sure there's one rollWriter running at any
   * given time. n * If true, force creation of a new writer even if no entries have been written to
   * the current writer
   * @return If lots of logs, flush the stores of returned regions so next time through we can clean
   *         logs. Returns null if nothing to flush. Names are actual region names as returned by
   *         {@link RegionInfo#getEncodedName()}
   */
  Map> rollWriter(boolean force) throws IOException;
  /**
   * Stop accepting new writes. If we have unsynced writes still in buffer, sync them. Extant edits
   * are left in place in backing storage to be replayed later.
   */
  void shutdown() throws IOException;
  /**
   * Caller no longer needs any edits from this WAL. Implementers are free to reclaim underlying
   * resources after this call; i.e. filesystem based WALs can archive or delete files.
   */
  @Override
  void close() throws IOException;
  /**
   * Append a set of data edits to the WAL. 'Data' here means that the content in the edits will
   * also have transitioned through the memstore.
   * 
   * The WAL is not flushed/sync'd after this transaction completes BUT on return this edit must
   * have its region edit/sequence id assigned else it messes up our unification of mvcc and
   * sequenceid. On return key will have the region edit/sequence id filled in.
   * @param info  the regioninfo associated with append
   * @param key   Modified by this call; we add to it this edits region edit/sequence id.
   * @param edits Edits to append. MAY CONTAIN NO EDITS for case where we want to get an edit
   *              sequence id that is after all currently appended edits.
   * @return Returns a 'transaction id' and key will have the region edit/sequence id
   *         in it.
   * @see #appendMarker(RegionInfo, WALKeyImpl, WALEdit)
   */
  long appendData(RegionInfo info, WALKeyImpl key, WALEdit edits) throws IOException;
  /**
   * Append an operational 'meta' event marker edit to the WAL. A marker meta edit could be a
   * FlushDescriptor, a compaction marker, or a region event marker; e.g. region open or region
   * close. The difference between a 'marker' append and a 'data' append as in
   * {@link #appendData(RegionInfo, WALKeyImpl, WALEdit)}is that a marker will not have transitioned
   * through the memstore.
   * 
   * The WAL is not flushed/sync'd after this transaction completes BUT on return this edit must
   * have its region edit/sequence id assigned else it messes up our unification of mvcc and
   * sequenceid. On return key will have the region edit/sequence id filled in.
   * @param info  the regioninfo associated with append
   * @param key   Modified by this call; we add to it this edits region edit/sequence id.
   * @param edits Edits to append. MAY CONTAIN NO EDITS for case where we want to get an edit
   *              sequence id that is after all currently appended edits.
   * @return Returns a 'transaction id' and key will have the region edit/sequence id
   *         in it.
   * @see #appendData(RegionInfo, WALKeyImpl, WALEdit)
   */
  long appendMarker(RegionInfo info, WALKeyImpl key, WALEdit edits) throws IOException;
  /**
   * updates the seuence number of a specific store. depending on the flag: replaces current seq
   * number if the given seq id is bigger, or even if it is lower than existing one
   */
  void updateStore(byte[] encodedRegionName, byte[] familyName, Long sequenceid,
    boolean onlyIfGreater);
  /**
   * Sync what we have in the WAL.
   */
  void sync() throws IOException;
  /**
   * Sync the WAL if the txId was not already sync'd.
   * @param txid Transaction id to sync to.
   */
  void sync(long txid) throws IOException;
  /**
   * @param forceSync Flag to force sync rather than flushing to the buffer. Example - Hadoop hflush
   *                  vs hsync.
   */
  default void sync(boolean forceSync) throws IOException {
    sync();
  }
  /**
   * @param txid      Transaction id to sync to.
   * @param forceSync Flag to force sync rather than flushing to the buffer. Example - Hadoop hflush
   *                  vs hsync.
   */
  default void sync(long txid, boolean forceSync) throws IOException {
    sync(txid);
  }
  /**
   * WAL keeps track of the sequence numbers that are as yet not flushed im memstores in order to be
   * able to do accounting to figure which WALs can be let go. This method tells WAL that some
   * region is about to flush. The flush can be the whole region or for a column family of the
   * region only.
   * 
   * Currently, it is expected that the update lock is held for the region; i.e. no concurrent
   * appends while we set up cache flush.
   * @param families Families to flush. May be a subset of all families in the region.
   * @return Returns {@link HConstants#NO_SEQNUM} if we are flushing the whole region OR if we are
   *         flushing a subset of all families but there are no edits in those families not being
   *         flushed; in other words, this is effectively same as a flush of all of the region
   *         though we were passed a subset of regions. Otherwise, it returns the sequence id of the
   *         oldest/lowest outstanding edit.
   * @see #completeCacheFlush(byte[], long)
   * @see #abortCacheFlush(byte[])
   */
  Long startCacheFlush(final byte[] encodedRegionName, Set families);
  Long startCacheFlush(final byte[] encodedRegionName, Map familyToSeq);
  /**
   * Complete the cache flush.
   * @param encodedRegionName Encoded region name.
   * @param maxFlushedSeqId   The maxFlushedSeqId for this flush. There is no edit in memory that is
   *                          less that this sequence id.
   * @see #startCacheFlush(byte[], Set)
   * @see #abortCacheFlush(byte[])
   */
  void completeCacheFlush(final byte[] encodedRegionName, long maxFlushedSeqId);
  /**
   * Abort a cache flush. Call if the flush fails. Note that the only recovery for an aborted flush
   * currently is a restart of the regionserver so the snapshot content dropped by the failure gets
   * restored to the memstore.
   * @param encodedRegionName Encoded region name.
   */
  void abortCacheFlush(byte[] encodedRegionName);
  /**
   * @return Coprocessor host.
   */
  WALCoprocessorHost getCoprocessorHost();
  /**
   * Gets the earliest unflushed sequence id in the memstore for the region.
   * @param encodedRegionName The region to get the number for.
   * @return The earliest/lowest/oldest sequence id if present, HConstants.NO_SEQNUM if absent.
   * @deprecated Since version 1.2.0. Removing because not used and exposes subtle internal
   *             workings. Use {@link #getEarliestMemStoreSeqNum(byte[], byte[])}
   */
  @Deprecated
  long getEarliestMemStoreSeqNum(byte[] encodedRegionName);
  /**
   * Gets the earliest unflushed sequence id in the memstore for the store.
   * @param encodedRegionName The region to get the number for.
   * @param familyName        The family to get the number for.
   * @return The earliest/lowest/oldest sequence id if present, HConstants.NO_SEQNUM if absent.
   */
  long getEarliestMemStoreSeqNum(byte[] encodedRegionName, byte[] familyName);
  /**
   * Human readable identifying information about the state of this WAL. Implementors are encouraged
   * to include information appropriate for debugging. Consumers are advised not to rely on the
   * details of the returned String; it does not have a defined structure.
   */
  @Override
  String toString();
  /**
   * When outside clients need to consume persisted WALs, they rely on a provided Reader.
   */
  interface Reader extends Closeable {
    Entry next() throws IOException;
    Entry next(Entry reuse) throws IOException;
    void seek(long pos) throws IOException;
    long getPosition() throws IOException;
    void reset() throws IOException;
  }
  /**
   * Utility class that lets us keep track of the edit with it's key.
   */
  class Entry {
    private final WALEdit edit;
    private final WALKeyImpl key;
    public Entry() {
      this(new WALKeyImpl(), new WALEdit());
    }
    /**
     * Constructor for both params
     * @param edit log's edit
     * @param key  log's key
     */
    public Entry(WALKeyImpl key, WALEdit edit) {
      this.key = key;
      this.edit = edit;
    }
    /**
     * Gets the edit n
     */
    public WALEdit getEdit() {
      return edit;
    }
    /**
     * Gets the key n
     */
    public WALKeyImpl getKey() {
      return key;
    }
    /**
     * Set compression context for this entry. n * Compression context
     * @deprecated deparcated since hbase 2.1.0
     */
    @Deprecated
    public void setCompressionContext(CompressionContext compressionContext) {
      key.setCompressionContext(compressionContext);
    }
    @Override
    public String toString() {
      return this.key + "=" + this.edit;
    }
  }
  /**
   * Split a WAL filename to get a start time. WALs usually have the time we start writing to them
   * as part of their name, usually the suffix. Sometimes there will be an extra suffix as when it
   * is a WAL for the meta table. For example, WALs might look like this
   * 10.20.20.171%3A60020.1277499063250 where 1277499063250 is the
   * timestamp. Could also be a meta WAL which adds a '.meta' suffix or a synchronous replication
   * WAL which adds a '.syncrep' suffix. Check for these. File also may have no timestamp on it. For
   * example the recovered.edits files are WALs but are named in ascending order. Here is an
   * example: 0000000000000016310. Allow for this.
   * @param name Name of the WAL file.
   * @return Timestamp or -1.
   */
  public static long getTimestamp(String name) {
    String[] splits = name.split("\\.");
    if (splits.length <= 1) {
      return -1;
    }
    String timestamp = splits[splits.length - 1];
    if (!isNumeric(timestamp)) {
      // Its a '.meta' or a '.syncrep' suffix.
      timestamp = splits[splits.length - 2];
      if (!isNumeric(timestamp)) {
        return -1;
      }
    }
    return Long.parseLong(timestamp);
  }
}