All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hbase.regionserver.wal.FSHLog Maven / Gradle / Ivy

There is a newer version: 3.0.0-beta-1
Show newest version
/**
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hbase.regionserver.wal;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.OutputStream;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.NavigableMap;
import java.util.TreeMap;
import java.util.UUID;
import java.util.concurrent.ConcurrentSkipListMap;
import java.util.concurrent.CopyOnWriteArrayList;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.locks.ReentrantLock;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.Syncable;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.ClassSize;
import org.apache.hadoop.hbase.util.DrainBarrier;
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
import org.apache.hadoop.hbase.util.FSUtils;
import org.apache.hadoop.hbase.util.HasThread;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
import org.apache.hadoop.util.StringUtils;
import org.cloudera.htrace.Trace;
import org.cloudera.htrace.TraceScope;

import com.google.common.annotations.VisibleForTesting;

/**
 * HLog stores all the edits to the HStore.  Its the hbase write-ahead-log
 * implementation.
 *
 * It performs logfile-rolling, so external callers are not aware that the
 * underlying file is being rolled.
 *
 * 

* There is one HLog per RegionServer. All edits for all Regions carried by * a particular RegionServer are entered first in the HLog. * *

* Each HRegion is identified by a unique long int. HRegions do * not need to declare themselves before using the HLog; they simply include * their HRegion-id in the append or * completeCacheFlush calls. * *

* An HLog consists of multiple on-disk files, which have a chronological order. * As data is flushed to other (better) on-disk structures, the log becomes * obsolete. We can destroy all the log messages for a given HRegion-id up to * the most-recent CACHEFLUSH message from that HRegion. * *

* It's only practical to delete entire files. Thus, we delete an entire on-disk * file F when all of the messages in F have a log-sequence-id that's older * (smaller) than the most-recent CACHEFLUSH message for every HRegion that has * a message in F. * *

* Synchronized methods can never execute in parallel. However, between the * start of a cache flush and the completion point, appends are allowed but log * rolling is not. To prevent log rolling taking place during this period, a * separate reentrant lock is used. * *

To read an HLog, call {@link HLogFactory#createReader(org.apache.hadoop.fs.FileSystem, * org.apache.hadoop.fs.Path, org.apache.hadoop.conf.Configuration)}. * */ @InterfaceAudience.Private class FSHLog implements HLog, Syncable { static final Log LOG = LogFactory.getLog(FSHLog.class); private static final int DEFAULT_SLOW_SYNC_TIME_MS = 100; // in ms private final FileSystem fs; private final Path rootDir; private final Path dir; private final Configuration conf; // Listeners that are called on WAL events. private List listeners = new CopyOnWriteArrayList(); private final long blocksize; private final String prefix; private final AtomicLong unflushedEntries = new AtomicLong(0); private final AtomicLong syncedTillHere = new AtomicLong(0); private long lastUnSyncedTxid; private final Path oldLogDir; // all writes pending on AsyncWriter/AsyncSyncer thread with // txid <= failedTxid will fail by throwing asyncIOE private final AtomicLong failedTxid = new AtomicLong(-1); private volatile IOException asyncIOE = null; private WALCoprocessorHost coprocessorHost; private FSDataOutputStream hdfs_out; // FSDataOutputStream associated with the current SequenceFile.writer // Minimum tolerable replicas, if the actual value is lower than it, // rollWriter will be triggered private int minTolerableReplication; private Method getNumCurrentReplicas; // refers to DFSOutputStream.getNumCurrentReplicas private final Method getPipeLine; // refers to DFSOutputStream.getPipeLine private final int slowSyncNs; final static Object [] NO_ARGS = new Object []{}; /** The barrier used to ensure that close() waits for all log rolls and flushes to finish. */ private DrainBarrier closeBarrier = new DrainBarrier(); /** * Current log file. */ Writer writer; /** * This lock synchronizes all operations on oldestUnflushedSeqNums and oldestFlushingSeqNums, * with the exception of append's putIfAbsent into oldestUnflushedSeqNums. * We only use these to find out the low bound seqNum, or to find regions with old seqNums to * force flush them, so we don't care about these numbers messing with anything. */ private final Object oldestSeqNumsLock = new Object(); /** * This lock makes sure only one log roll runs at the same time. Should not be taken while * any other lock is held. We don't just use synchronized because that results in bogus and * tedious findbugs warning when it thinks synchronized controls writer thread safety */ private final ReentrantLock rollWriterLock = new ReentrantLock(true); /** * Map of encoded region names to their most recent sequence/edit id in their memstore. */ private final ConcurrentSkipListMap oldestUnflushedSeqNums = new ConcurrentSkipListMap(Bytes.BYTES_COMPARATOR); /** * Map of encoded region names to their most recent sequence/edit id in their memstore; * contains the regions that are currently flushing. That way we can store two numbers for * flushing and non-flushing (oldestUnflushedSeqNums) memstore for the same region. */ private final Map oldestFlushingSeqNums = new TreeMap(Bytes.BYTES_COMPARATOR); private volatile boolean closed = false; private boolean forMeta = false; // The timestamp (in ms) when the log file was created. private volatile long filenum = -1; //number of transactions in the current Hlog. private final AtomicInteger numEntries = new AtomicInteger(0); // If live datanode count is lower than the default replicas value, // RollWriter will be triggered in each sync(So the RollWriter will be // triggered one by one in a short time). Using it as a workaround to slow // down the roll frequency triggered by checkLowReplication(). private AtomicInteger consecutiveLogRolls = new AtomicInteger(0); private final int lowReplicationRollLimit; // If consecutiveLogRolls is larger than lowReplicationRollLimit, // then disable the rolling in checkLowReplication(). // Enable it if the replications recover. private volatile boolean lowReplicationRollEnabled = true; // If > than this size, roll the log. This is typically 0.95 times the size // of the default Hdfs block size. private final long logrollsize; /** size of current log */ private long curLogSize = 0; /** * The total size of hlog */ private AtomicLong totalLogSize = new AtomicLong(0); // We synchronize on updateLock to prevent updates and to prevent a log roll // during an update // locked during appends private final Object updateLock = new Object(); private final Object pendingWritesLock = new Object(); private final boolean enabled; /* * If more than this many logs, force flush of oldest region to oldest edit * goes to disk. If too many and we crash, then will take forever replaying. * Keep the number of logs tidy. */ private final int maxLogs; // List of pending writes to the HLog. There corresponds to transactions // that have not yet returned to the client. We keep them cached here // instead of writing them to HDFS piecemeal. The goal is to increase // the batchsize for writing-to-hdfs as well as sync-to-hdfs, so that // we can get better system throughput. private List pendingWrites = new LinkedList(); private final AsyncWriter asyncWriter; // since AsyncSyncer takes much longer than other phase(add WALEdits to local // buffer, write local buffer to HDFS, notify pending write handler threads), // when a sync is ongoing, all other phase pend, we use multiple parallel // AsyncSyncer threads to improve overall throughput. private final AsyncSyncer[] asyncSyncers; private final AsyncNotifier asyncNotifier; /** Number of log close errors tolerated before we abort */ private final int closeErrorsTolerated; private final AtomicInteger closeErrorCount = new AtomicInteger(); private final MetricsWAL metrics; /** * Map of region encoded names to the latest sequence num obtained from them while appending * WALEdits to the wal. We create one map for each WAL file at the time it is rolled. *

* When deciding whether to archive a WAL file, we compare the sequence IDs in this map to * {@link #oldestFlushingSeqNums} and {@link #oldestUnflushedSeqNums}. * See {@link FSHLog#areAllRegionsFlushed(Map, Map, Map)} for more info. *

* This map uses byte[] as the key, and uses reference equality. It works in our use case as we * use {@link HRegionInfo#getEncodedNameAsBytes()} as keys. For a given region, it always returns * the same array. */ private Map latestSequenceNums = new HashMap(); /** * WAL Comparator; it compares the timestamp (log filenum), present in the log file name. */ public final Comparator LOG_NAME_COMPARATOR = new Comparator() { @Override public int compare(Path o1, Path o2) { long t1 = getFileNumFromFileName(o1); long t2 = getFileNumFromFileName(o2); if (t1 == t2) return 0; return (t1 > t2) ? 1 : -1; } }; /** * Map of log file to the latest sequence nums of all regions it has entries of. * The map is sorted by the log file creation timestamp (contained in the log file name). */ private NavigableMap> hlogSequenceNums = new ConcurrentSkipListMap>(LOG_NAME_COMPARATOR); /** * Constructor. * * @param fs filesystem handle * @param root path for stored and archived hlogs * @param logDir dir where hlogs are stored * @param conf configuration to use * @throws IOException */ public FSHLog(final FileSystem fs, final Path root, final String logDir, final Configuration conf) throws IOException { this(fs, root, logDir, HConstants.HREGION_OLDLOGDIR_NAME, conf, null, true, null, false); } /** * Constructor. * * @param fs filesystem handle * @param root path for stored and archived hlogs * @param logDir dir where hlogs are stored * @param oldLogDir dir where hlogs are archived * @param conf configuration to use * @throws IOException */ public FSHLog(final FileSystem fs, final Path root, final String logDir, final String oldLogDir, final Configuration conf) throws IOException { this(fs, root, logDir, oldLogDir, conf, null, true, null, false); } /** * Create an edit log at the given dir location. * * You should never have to load an existing log. If there is a log at * startup, it should have already been processed and deleted by the time the * HLog object is started up. * * @param fs filesystem handle * @param root path for stored and archived hlogs * @param logDir dir where hlogs are stored * @param conf configuration to use * @param listeners Listeners on WAL events. Listeners passed here will * be registered before we do anything else; e.g. the * Constructor {@link #rollWriter()}. * @param prefix should always be hostname and port in distributed env and * it will be URL encoded before being used. * If prefix is null, "hlog" will be used * @throws IOException */ public FSHLog(final FileSystem fs, final Path root, final String logDir, final Configuration conf, final List listeners, final String prefix) throws IOException { this(fs, root, logDir, HConstants.HREGION_OLDLOGDIR_NAME, conf, listeners, true, prefix, false); } /** * Create an edit log at the given dir location. * * You should never have to load an existing log. If there is a log at * startup, it should have already been processed and deleted by the time the * HLog object is started up. * * @param fs filesystem handle * @param root path to where logs and oldlogs * @param logDir dir where hlogs are stored * @param oldLogDir dir where hlogs are archived * @param conf configuration to use * @param listeners Listeners on WAL events. Listeners passed here will * be registered before we do anything else; e.g. the * Constructor {@link #rollWriter()}. * @param failIfLogDirExists If true IOException will be thrown if dir already exists. * @param prefix should always be hostname and port in distributed env and * it will be URL encoded before being used. * If prefix is null, "hlog" will be used * @param forMeta if this hlog is meant for meta updates * @throws IOException */ public FSHLog(final FileSystem fs, final Path root, final String logDir, final String oldLogDir, final Configuration conf, final List listeners, final boolean failIfLogDirExists, final String prefix, boolean forMeta) throws IOException { super(); this.fs = fs; this.rootDir = root; this.dir = new Path(this.rootDir, logDir); this.oldLogDir = new Path(this.rootDir, oldLogDir); this.forMeta = forMeta; this.conf = conf; if (listeners != null) { for (WALActionsListener i: listeners) { registerWALActionsListener(i); } } this.blocksize = this.conf.getLong("hbase.regionserver.hlog.blocksize", FSUtils.getDefaultBlockSize(this.fs, this.dir)); // Roll at 95% of block size. float multi = conf.getFloat("hbase.regionserver.logroll.multiplier", 0.95f); this.logrollsize = (long)(this.blocksize * multi); this.maxLogs = conf.getInt("hbase.regionserver.maxlogs", 32); this.minTolerableReplication = conf.getInt( "hbase.regionserver.hlog.tolerable.lowreplication", FSUtils.getDefaultReplication(fs, this.dir)); this.lowReplicationRollLimit = conf.getInt( "hbase.regionserver.hlog.lowreplication.rolllimit", 5); this.enabled = conf.getBoolean("hbase.regionserver.hlog.enabled", true); this.closeErrorsTolerated = conf.getInt( "hbase.regionserver.logroll.errors.tolerated", 0); LOG.info("WAL/HLog configuration: blocksize=" + StringUtils.byteDesc(this.blocksize) + ", rollsize=" + StringUtils.byteDesc(this.logrollsize) + ", enabled=" + this.enabled); // If prefix is null||empty then just name it hlog this.prefix = prefix == null || prefix.isEmpty() ? "hlog" : URLEncoder.encode(prefix, "UTF8"); boolean dirExists = false; if (failIfLogDirExists && (dirExists = this.fs.exists(dir))) { throw new IOException("Target HLog directory already exists: " + dir); } if (!dirExists && !fs.mkdirs(dir)) { throw new IOException("Unable to mkdir " + dir); } if (!fs.exists(this.oldLogDir)) { if (!fs.mkdirs(this.oldLogDir)) { throw new IOException("Unable to mkdir " + this.oldLogDir); } } // rollWriter sets this.hdfs_out if it can. rollWriter(); this.slowSyncNs = 1000000 * conf.getInt("hbase.regionserver.hlog.slowsync.ms", DEFAULT_SLOW_SYNC_TIME_MS); // handle the reflection necessary to call getNumCurrentReplicas() this.getNumCurrentReplicas = getGetNumCurrentReplicas(this.hdfs_out); this.getPipeLine = getGetPipeline(this.hdfs_out); final String n = Thread.currentThread().getName(); asyncWriter = new AsyncWriter(n + "-WAL.AsyncWriter"); asyncWriter.start(); int syncerNums = conf.getInt("hbase.hlog.asyncer.number", 5); asyncSyncers = new AsyncSyncer[syncerNums]; for (int i = 0; i < asyncSyncers.length; ++i) { asyncSyncers[i] = new AsyncSyncer(n + "-WAL.AsyncSyncer" + i); asyncSyncers[i].start(); } asyncNotifier = new AsyncNotifier(n + "-WAL.AsyncNotifier"); asyncNotifier.start(); coprocessorHost = new WALCoprocessorHost(this, conf); this.metrics = new MetricsWAL(); registerWALActionsListener(metrics); } /** * Find the 'getNumCurrentReplicas' on the passed os stream. * @return Method or null. */ private Method getGetNumCurrentReplicas(final FSDataOutputStream os) { Method m = null; if (os != null) { Class wrappedStreamClass = os.getWrappedStream() .getClass(); try { m = wrappedStreamClass.getDeclaredMethod("getNumCurrentReplicas", new Class[] {}); m.setAccessible(true); } catch (NoSuchMethodException e) { LOG.info("FileSystem's output stream doesn't support" + " getNumCurrentReplicas; --HDFS-826 not available; fsOut=" + wrappedStreamClass.getName()); } catch (SecurityException e) { LOG.info("Doesn't have access to getNumCurrentReplicas on " + "FileSystems's output stream --HDFS-826 not available; fsOut=" + wrappedStreamClass.getName(), e); m = null; // could happen on setAccessible() } } if (m != null) { if (LOG.isTraceEnabled()) LOG.trace("Using getNumCurrentReplicas--HDFS-826"); } return m; } @Override public void registerWALActionsListener(final WALActionsListener listener) { this.listeners.add(listener); } @Override public boolean unregisterWALActionsListener(final WALActionsListener listener) { return this.listeners.remove(listener); } @Override public long getFilenum() { return this.filenum; } /** * Method used internal to this class and for tests only. * @return The wrapped stream our writer is using; its not the * writer's 'out' FSDatoOutputStream but the stream that this 'out' wraps * (In hdfs its an instance of DFSDataOutputStream). * * usage: see TestLogRolling.java */ OutputStream getOutputStream() { return this.hdfs_out.getWrappedStream(); } @Override public byte [][] rollWriter() throws FailedLogCloseException, IOException { return rollWriter(false); } @Override public byte [][] rollWriter(boolean force) throws FailedLogCloseException, IOException { rollWriterLock.lock(); try { // Return if nothing to flush. if (!force && this.writer != null && this.numEntries.get() <= 0) { return null; } byte [][] regionsToFlush = null; if (closed) { LOG.debug("HLog closed. Skipping rolling of writer"); return null; } try { if (!closeBarrier.beginOp()) { LOG.debug("HLog closing. Skipping rolling of writer"); return regionsToFlush; } // Do all the preparation outside of the updateLock to block // as less as possible the incoming writes long currentFilenum = this.filenum; Path oldPath = null; if (currentFilenum > 0) { //computeFilename will take care of meta hlog filename oldPath = computeFilename(currentFilenum); } this.filenum = System.currentTimeMillis(); Path newPath = computeFilename(); while (fs.exists(newPath)) { this.filenum++; newPath = computeFilename(); } // Tell our listeners that a new log is about to be created if (!this.listeners.isEmpty()) { for (WALActionsListener i : this.listeners) { i.preLogRoll(oldPath, newPath); } } FSHLog.Writer nextWriter = this.createWriterInstance(fs, newPath, conf); // Can we get at the dfsclient outputstream? FSDataOutputStream nextHdfsOut = null; if (nextWriter instanceof ProtobufLogWriter) { nextHdfsOut = ((ProtobufLogWriter)nextWriter).getStream(); // perform the costly sync before we get the lock to roll writers. try { nextWriter.sync(); } catch (IOException e) { // optimization failed, no need to abort here. LOG.warn("pre-sync failed", e); } } Path oldFile = null; int oldNumEntries = 0; synchronized (updateLock) { // Clean up current writer. oldNumEntries = this.numEntries.get(); oldFile = cleanupCurrentWriter(currentFilenum); this.writer = nextWriter; this.hdfs_out = nextHdfsOut; this.numEntries.set(0); if (oldFile != null) { this.hlogSequenceNums.put(oldFile, this.latestSequenceNums); this.latestSequenceNums = new HashMap(); } } if (oldFile == null) LOG.info("New WAL " + FSUtils.getPath(newPath)); else { long oldFileLen = this.fs.getFileStatus(oldFile).getLen(); this.totalLogSize.addAndGet(oldFileLen); LOG.info("Rolled WAL " + FSUtils.getPath(oldFile) + " with entries=" + oldNumEntries + ", filesize=" + StringUtils.humanReadableInt(oldFileLen) + "; new WAL " + FSUtils.getPath(newPath)); } // Tell our listeners that a new log was created if (!this.listeners.isEmpty()) { for (WALActionsListener i : this.listeners) { i.postLogRoll(oldPath, newPath); } } // Can we delete any of the old log files? if (getNumRolledLogFiles() > 0) { cleanOldLogs(); regionsToFlush = findRegionsToForceFlush(); } } finally { closeBarrier.endOp(); } return regionsToFlush; } finally { rollWriterLock.unlock(); } } /** * This method allows subclasses to inject different writers without having to * extend other methods like rollWriter(). * * @param fs * @param path * @param conf * @return Writer instance * @throws IOException */ protected Writer createWriterInstance(final FileSystem fs, final Path path, final Configuration conf) throws IOException { if (forMeta) { //TODO: set a higher replication for the hlog files (HBASE-6773) } return HLogFactory.createWALWriter(fs, path, conf); } /** * Archive old logs that could be archived: a log is eligible for archiving if all its WALEdits * are already flushed by the corresponding regions. *

* For each log file, it compares its region to sequenceId map * (@link {@link FSHLog#latestSequenceNums} with corresponding region entries in * {@link FSHLog#oldestFlushingSeqNums} and {@link FSHLog#oldestUnflushedSeqNums}. * If all the regions in the map are flushed past of their value, then the wal is eligible for * archiving. * @throws IOException */ private void cleanOldLogs() throws IOException { Map oldestFlushingSeqNumsLocal = null; Map oldestUnflushedSeqNumsLocal = null; List logsToArchive = new ArrayList(); // make a local copy so as to avoid locking when we iterate over these maps. synchronized (oldestSeqNumsLock) { oldestFlushingSeqNumsLocal = new HashMap(this.oldestFlushingSeqNums); oldestUnflushedSeqNumsLocal = new HashMap(this.oldestUnflushedSeqNums); } for (Map.Entry> e : hlogSequenceNums.entrySet()) { // iterate over the log file. Path log = e.getKey(); Map sequenceNums = e.getValue(); // iterate over the map for this log file, and tell whether it should be archive or not. if (areAllRegionsFlushed(sequenceNums, oldestFlushingSeqNumsLocal, oldestUnflushedSeqNumsLocal)) { logsToArchive.add(log); LOG.debug("log file is ready for archiving " + log); } } for (Path p : logsToArchive) { this.totalLogSize.addAndGet(-this.fs.getFileStatus(p).getLen()); archiveLogFile(p); this.hlogSequenceNums.remove(p); } } /** * Takes a region:sequenceId map for a WAL file, and checks whether the file can be archived. * It compares the region entries present in the passed sequenceNums map with the local copy of * {@link #oldestUnflushedSeqNums} and {@link #oldestFlushingSeqNums}. If, for all regions, * the value is lesser than the minimum of values present in the oldestFlushing/UnflushedSeqNums, * then the wal file is eligible for archiving. * @param sequenceNums for a HLog, at the time when it was rolled. * @param oldestFlushingMap * @param oldestUnflushedMap * @return true if wal is eligible for archiving, false otherwise. */ static boolean areAllRegionsFlushed(Map sequenceNums, Map oldestFlushingMap, Map oldestUnflushedMap) { for (Map.Entry regionSeqIdEntry : sequenceNums.entrySet()) { // find region entries in the flushing/unflushed map. If there is no entry, it means // a region doesn't have any unflushed entry. long oldestFlushing = oldestFlushingMap.containsKey(regionSeqIdEntry.getKey()) ? oldestFlushingMap.get(regionSeqIdEntry.getKey()) : Long.MAX_VALUE; long oldestUnFlushed = oldestUnflushedMap.containsKey(regionSeqIdEntry.getKey()) ? oldestUnflushedMap.get(regionSeqIdEntry.getKey()) : Long.MAX_VALUE; // do a minimum to be sure to contain oldest sequence Id long minSeqNum = Math.min(oldestFlushing, oldestUnFlushed); if (minSeqNum <= regionSeqIdEntry.getValue()) return false;// can't archive } return true; } /** * Iterates over the given map of regions, and compares their sequence numbers with corresponding * entries in {@link #oldestUnflushedSeqNums}. If the sequence number is greater or equal, the * region is eligible to flush, otherwise, there is no benefit to flush (from the perspective of * passed regionsSequenceNums map), because the region has already flushed the entries present * in the WAL file for which this method is called for (typically, the oldest wal file). * @param regionsSequenceNums * @return regions which should be flushed (whose sequence numbers are larger than their * corresponding un-flushed entries. */ private byte[][] findEligibleMemstoresToFlush(Map regionsSequenceNums) { List regionsToFlush = null; // Keeping the old behavior of iterating unflushedSeqNums under oldestSeqNumsLock. synchronized (oldestSeqNumsLock) { for (Map.Entry e : regionsSequenceNums.entrySet()) { Long unFlushedVal = this.oldestUnflushedSeqNums.get(e.getKey()); if (unFlushedVal != null && unFlushedVal <= e.getValue()) { if (regionsToFlush == null) regionsToFlush = new ArrayList(); regionsToFlush.add(e.getKey()); } } } return regionsToFlush == null ? null : regionsToFlush .toArray(new byte[][] { HConstants.EMPTY_BYTE_ARRAY }); } /** * If the number of un-archived WAL files is greater than maximum allowed, it checks * the first (oldest) WAL file, and returns the regions which should be flushed so that it could * be archived. * @return regions to flush in order to archive oldest wal file. * @throws IOException */ byte[][] findRegionsToForceFlush() throws IOException { byte [][] regions = null; int logCount = getNumRolledLogFiles(); if (logCount > this.maxLogs && logCount > 0) { Map.Entry> firstWALEntry = this.hlogSequenceNums.firstEntry(); regions = findEligibleMemstoresToFlush(firstWALEntry.getValue()); } if (regions != null) { StringBuilder sb = new StringBuilder(); for (int i = 0; i < regions.length; i++) { if (i > 0) sb.append(", "); sb.append(Bytes.toStringBinary(regions[i])); } LOG.info("Too many hlogs: logs=" + logCount + ", maxlogs=" + this.maxLogs + "; forcing flush of " + regions.length + " regions(s): " + sb.toString()); } return regions; } /* * Cleans up current writer closing. * Presumes we're operating inside an updateLock scope. * @return Path to current writer or null if none. * @throws IOException */ Path cleanupCurrentWriter(final long currentfilenum) throws IOException { Path oldFile = null; if (this.writer != null) { // Close the current writer, get a new one. try { // Wait till all current transactions are written to the hlog. // No new transactions can occur because we have the updatelock. if (this.unflushedEntries.get() != this.syncedTillHere.get()) { LOG.debug("cleanupCurrentWriter " + " waiting for transactions to get synced " + " total " + this.unflushedEntries.get() + " synced till here " + this.syncedTillHere.get()); sync(); } this.writer.close(); this.writer = null; closeErrorCount.set(0); } catch (IOException e) { LOG.error("Failed close of HLog writer", e); int errors = closeErrorCount.incrementAndGet(); if (errors <= closeErrorsTolerated && !hasUnSyncedEntries()) { LOG.warn("Riding over HLog close failure! error count="+errors); } else { if (hasUnSyncedEntries()) { LOG.error("Aborting due to unflushed edits in HLog"); } // Failed close of log file. Means we're losing edits. For now, // shut ourselves down to minimize loss. Alternative is to try and // keep going. See HBASE-930. FailedLogCloseException flce = new FailedLogCloseException("#" + currentfilenum); flce.initCause(e); throw flce; } } if (currentfilenum >= 0) { oldFile = computeFilename(currentfilenum); } } return oldFile; } private void archiveLogFile(final Path p) throws IOException { Path newPath = getHLogArchivePath(this.oldLogDir, p); // Tell our listeners that a log is going to be archived. if (!this.listeners.isEmpty()) { for (WALActionsListener i : this.listeners) { i.preLogArchive(p, newPath); } } if (!FSUtils.renameAndSetModifyTime(this.fs, p, newPath)) { throw new IOException("Unable to rename " + p + " to " + newPath); } // Tell our listeners that a log has been archived. if (!this.listeners.isEmpty()) { for (WALActionsListener i : this.listeners) { i.postLogArchive(p, newPath); } } } /** * This is a convenience method that computes a new filename with a given * using the current HLog file-number * @return Path */ protected Path computeFilename() { return computeFilename(this.filenum); } /** * This is a convenience method that computes a new filename with a given * file-number. * @param filenum to use * @return Path */ protected Path computeFilename(long filenum) { if (filenum < 0) { throw new RuntimeException("hlog file number can't be < 0"); } String child = prefix + "." + filenum; if (forMeta) { child += HLog.META_HLOG_FILE_EXTN; } return new Path(dir, child); } /** * A log file has a creation timestamp (in ms) in its file name ({@link #filenum}. * This helper method returns the creation timestamp from a given log file. * It extracts the timestamp assuming the filename is created with the * {@link #computeFilename(long filenum)} method. * @param fileName * @return timestamp, as in the log file name. */ protected long getFileNumFromFileName(Path fileName) { if (fileName == null) throw new IllegalArgumentException("file name can't be null"); // The path should start with dir/. String prefixPathStr = new Path(dir, prefix + ".").toString(); if (!fileName.toString().startsWith(prefixPathStr)) { throw new IllegalArgumentException("The log file " + fileName + " doesn't belong to" + " this regionserver " + prefixPathStr); } String chompedPath = fileName.toString().substring(prefixPathStr.length()); if (forMeta) chompedPath = chompedPath.substring(0, chompedPath.indexOf(META_HLOG_FILE_EXTN)); return Long.parseLong(chompedPath); } @Override public void closeAndDelete() throws IOException { close(); if (!fs.exists(this.dir)) return; FileStatus[] files = fs.listStatus(this.dir); if (files != null) { for(FileStatus file : files) { Path p = getHLogArchivePath(this.oldLogDir, file.getPath()); // Tell our listeners that a log is going to be archived. if (!this.listeners.isEmpty()) { for (WALActionsListener i : this.listeners) { i.preLogArchive(file.getPath(), p); } } if (!FSUtils.renameAndSetModifyTime(fs, file.getPath(), p)) { throw new IOException("Unable to rename " + file.getPath() + " to " + p); } // Tell our listeners that a log was archived. if (!this.listeners.isEmpty()) { for (WALActionsListener i : this.listeners) { i.postLogArchive(file.getPath(), p); } } } LOG.debug("Moved " + files.length + " WAL file(s) to " + FSUtils.getPath(this.oldLogDir)); } if (!fs.delete(dir, true)) { LOG.info("Unable to delete " + dir); } } @Override public void close() throws IOException { if (this.closed) { return; } try { asyncNotifier.interrupt(); asyncNotifier.join(); } catch (InterruptedException e) { LOG.error("Exception while waiting for " + asyncNotifier.getName() + " threads to die", e); } for (int i = 0; i < asyncSyncers.length; ++i) { try { asyncSyncers[i].interrupt(); asyncSyncers[i].join(); } catch (InterruptedException e) { LOG.error("Exception while waiting for " + asyncSyncers[i].getName() + " threads to die", e); } } try { asyncWriter.interrupt(); asyncWriter.join(); } catch (InterruptedException e) { LOG.error("Exception while waiting for " + asyncWriter.getName() + " thread to die", e); } try { // Prevent all further flushing and rolling. closeBarrier.stopAndDrainOps(); } catch (InterruptedException e) { LOG.error("Exception while waiting for cache flushes and log rolls", e); Thread.currentThread().interrupt(); } // Tell our listeners that the log is closing if (!this.listeners.isEmpty()) { for (WALActionsListener i : this.listeners) { i.logCloseRequested(); } } synchronized (updateLock) { this.closed = true; if (LOG.isDebugEnabled()) { LOG.debug("Closing WAL writer in " + this.dir.toString()); } if (this.writer != null) { this.writer.close(); this.writer = null; } } } /** * @param now * @param encodedRegionName Encoded name of the region as returned by * HRegionInfo#getEncodedNameAsBytes(). * @param tableName * @param clusterIds that have consumed the change * @return New log key. */ protected HLogKey makeKey(byte[] encodedRegionName, TableName tableName, long seqnum, long now, List clusterIds, long nonceGroup, long nonce) { return new HLogKey(encodedRegionName, tableName, seqnum, now, clusterIds, nonceGroup, nonce); } @Override @VisibleForTesting public void append(HRegionInfo info, TableName tableName, WALEdit edits, final long now, HTableDescriptor htd, AtomicLong sequenceId) throws IOException { append(info, tableName, edits, new ArrayList(), now, htd, true, true, sequenceId, HConstants.NO_NONCE, HConstants.NO_NONCE); } /** * Append a set of edits to the log. Log edits are keyed by (encoded) * regionName, rowname, and log-sequence-id. * * Later, if we sort by these keys, we obtain all the relevant edits for a * given key-range of the HRegion (TODO). Any edits that do not have a * matching COMPLETE_CACHEFLUSH message can be discarded. * *

* Logs cannot be restarted once closed, or once the HLog process dies. Each * time the HLog starts, it must create a new log. This means that other * systems should process the log appropriately upon each startup (and prior * to initializing HLog). * * synchronized prevents appends during the completion of a cache flush or for * the duration of a log roll. * * @param info * @param tableName * @param edits * @param clusterIds that have consumed the change (for replication) * @param now * @param doSync shall we sync? * @param sequenceId of the region. * @return txid of this transaction * @throws IOException */ @SuppressWarnings("deprecation") private long append(HRegionInfo info, TableName tableName, WALEdit edits, List clusterIds, final long now, HTableDescriptor htd, boolean doSync, boolean isInMemstore, AtomicLong sequenceId, long nonceGroup, long nonce) throws IOException { if (edits.isEmpty()) return this.unflushedEntries.get(); if (this.closed) { throw new IOException("Cannot append; log is closed"); } TraceScope traceScope = Trace.startSpan("FSHlog.append"); try { long txid = 0; synchronized (this.updateLock) { // get the sequence number from the passed Long. In normal flow, it is coming from the // region. long seqNum = sequenceId.incrementAndGet(); // The 'lastSeqWritten' map holds the sequence number of the oldest // write for each region (i.e. the first edit added to the particular // memstore). . When the cache is flushed, the entry for the // region being flushed is removed if the sequence number of the flush // is greater than or equal to the value in lastSeqWritten. // Use encoded name. Its shorter, guaranteed unique and a subset of // actual name. byte [] encodedRegionName = info.getEncodedNameAsBytes(); if (isInMemstore) this.oldestUnflushedSeqNums.putIfAbsent(encodedRegionName, seqNum); HLogKey logKey = makeKey( encodedRegionName, tableName, seqNum, now, clusterIds, nonceGroup, nonce); synchronized (pendingWritesLock) { doWrite(info, logKey, edits, htd); txid = this.unflushedEntries.incrementAndGet(); } this.numEntries.incrementAndGet(); this.asyncWriter.setPendingTxid(txid); if (htd.isDeferredLogFlush()) { lastUnSyncedTxid = txid; } this.latestSequenceNums.put(encodedRegionName, seqNum); } // TODO: note that only tests currently call append w/sync. // Therefore, this code here is not actually used by anything. // Sync if catalog region, and if not then check if that table supports // deferred log flushing if (doSync && (info.isMetaRegion() || !htd.isDeferredLogFlush())) { // sync txn to file system this.sync(txid); } return txid; } finally { traceScope.close(); } } @Override public long appendNoSync(HRegionInfo info, TableName tableName, WALEdit edits, List clusterIds, final long now, HTableDescriptor htd, AtomicLong sequenceId, boolean isInMemstore, long nonceGroup, long nonce) throws IOException { return append(info, tableName, edits, clusterIds, now, htd, false, isInMemstore, sequenceId, nonceGroup, nonce); } /* The work of current write process of HLog goes as below: * 1). All write handler threads append edits to HLog's local pending buffer; * (it notifies AsyncWriter thread that there is new edits in local buffer) * 2). All write handler threads wait in HLog.syncer() function for underlying threads to * finish the sync that contains its txid; * 3). An AsyncWriter thread is responsible for retrieving all edits in HLog's * local pending buffer and writing to the hdfs (hlog.writer.append); * (it notifies AsyncSyncer threads that there is new writes to hdfs which needs a sync) * 4). AsyncSyncer threads are responsible for issuing sync request to hdfs to persist the * writes by AsyncWriter; (they notify the AsyncNotifier thread that sync is done) * 5). An AsyncNotifier thread is responsible for notifying all pending write handler * threads which are waiting in the HLog.syncer() function * 6). No LogSyncer thread any more (since there is always AsyncWriter/AsyncFlusher threads * do the same job it does) * note: more than one AsyncSyncer threads are needed here to guarantee good enough performance * when less concurrent write handler threads. since sync is the most time-consuming * operation in the whole write process, multiple AsyncSyncer threads can provide better * parallelism of sync to get better overall throughput */ // thread to write locally buffered writes to HDFS private class AsyncWriter extends HasThread { private long pendingTxid = 0; private long txidToWrite = 0; private long lastWrittenTxid = 0; private Object writeLock = new Object(); public AsyncWriter(String name) { super(name); } // wake up (called by (write) handler thread) AsyncWriter thread // to write buffered writes to HDFS public void setPendingTxid(long txid) { synchronized (this.writeLock) { if (txid <= this.pendingTxid) return; this.pendingTxid = txid; this.writeLock.notify(); } } public void run() { try { while (!this.isInterrupted()) { // 1. wait until there is new writes in local buffer synchronized (this.writeLock) { while (this.pendingTxid <= this.lastWrittenTxid) { this.writeLock.wait(); } } // 2. get all buffered writes and update 'real' pendingTxid // since maybe newer writes enter buffer as AsyncWriter wakes // up and holds the lock // NOTE! can't hold 'updateLock' here since rollWriter will pend // on 'sync()' with 'updateLock', but 'sync()' will wait for // AsyncWriter/AsyncSyncer/AsyncNotifier series. without updateLock // can leads to pendWrites more than pendingTxid, but not problem List pendWrites = null; synchronized (pendingWritesLock) { this.txidToWrite = unflushedEntries.get(); pendWrites = pendingWrites; pendingWrites = new LinkedList(); } // 3. write all buffered writes to HDFS(append, without sync) try { for (Entry e : pendWrites) { writer.append(e); } } catch(IOException e) { LOG.error("Error while AsyncWriter write, request close of hlog ", e); requestLogRoll(); asyncIOE = e; failedTxid.set(this.txidToWrite); } // 4. update 'lastWrittenTxid' and notify AsyncSyncer to do 'sync' this.lastWrittenTxid = this.txidToWrite; boolean hasIdleSyncer = false; for (int i = 0; i < asyncSyncers.length; ++i) { if (!asyncSyncers[i].isSyncing()) { hasIdleSyncer = true; asyncSyncers[i].setWrittenTxid(this.lastWrittenTxid); break; } } if (!hasIdleSyncer) { int idx = (int)(this.lastWrittenTxid % asyncSyncers.length); asyncSyncers[idx].setWrittenTxid(this.lastWrittenTxid); } } } catch (InterruptedException e) { LOG.debug(getName() + " interrupted while waiting for " + "newer writes added to local buffer"); } catch (Exception e) { LOG.error("UNEXPECTED", e); } finally { LOG.info(getName() + " exiting"); } } } // thread to request HDFS to sync the WALEdits written by AsyncWriter // to make those WALEdits durable on HDFS side private class AsyncSyncer extends HasThread { private long writtenTxid = 0; private long txidToSync = 0; private long lastSyncedTxid = 0; private volatile boolean isSyncing = false; private Object syncLock = new Object(); public AsyncSyncer(String name) { super(name); } public boolean isSyncing() { return this.isSyncing; } // wake up (called by AsyncWriter thread) AsyncSyncer thread // to sync(flush) writes written by AsyncWriter in HDFS public void setWrittenTxid(long txid) { synchronized (this.syncLock) { if (txid <= this.writtenTxid) return; this.writtenTxid = txid; this.syncLock.notify(); } } public void run() { try { while (!this.isInterrupted()) { // 1. wait until AsyncWriter has written data to HDFS and // called setWrittenTxid to wake up us synchronized (this.syncLock) { while (this.writtenTxid <= this.lastSyncedTxid) { this.syncLock.wait(); } this.txidToSync = this.writtenTxid; } // if this syncer's writes have been synced by other syncer: // 1. just set lastSyncedTxid // 2. don't do real sync, don't notify AsyncNotifier, don't logroll check // regardless of whether the writer is null or not if (this.txidToSync <= syncedTillHere.get()) { this.lastSyncedTxid = this.txidToSync; continue; } // 2. do 'sync' to HDFS to provide durability long now = EnvironmentEdgeManager.currentTimeMillis(); try { if (writer == null) { // the only possible case where writer == null is as below: // 1. t1: AsyncWriter append writes to hdfs, // envokes AsyncSyncer 1 with writtenTxid==100 // 2. t2: AsyncWriter append writes to hdfs, // envokes AsyncSyncer 2 with writtenTxid==200 // 3. t3: rollWriter starts, it grabs the updateLock which // prevents further writes entering pendingWrites and // wait for all items(200) in pendingWrites to append/sync // to hdfs // 4. t4: AsyncSyncer 2 finishes, now syncedTillHere==200 // 5. t5: rollWriter close writer, set writer=null... // 6. t6: AsyncSyncer 1 starts to use writer to do sync... before // rollWriter set writer to the newly created Writer // // Now writer == null and txidToSync > syncedTillHere here: // we need fail all the writes with txid <= txidToSync to avoid // 'data loss' where user get successful write response but can't // read the writes! LOG.error("should never happen: has unsynced writes but writer is null!"); asyncIOE = new IOException("has unsynced writes but writer is null!"); failedTxid.set(this.txidToSync); } else { this.isSyncing = true; writer.sync(); this.isSyncing = false; } postSync(); } catch (IOException e) { LOG.warn("Error while AsyncSyncer sync, request close of hlog ", e); requestLogRoll(); asyncIOE = e; failedTxid.set(this.txidToSync); this.isSyncing = false; } final long took = EnvironmentEdgeManager.currentTimeMillis() - now; metrics.finishSync(took); if (took > (slowSyncNs/1000000)) { String msg = new StringBuilder().append("Slow sync cost: ") .append(took).append(" ms, current pipeline: ") .append(Arrays.toString(getPipeLine())).toString(); Trace.addTimelineAnnotation(msg); LOG.info(msg); } // 3. wake up AsyncNotifier to notify(wake-up) all pending 'put' // handler threads on 'sync()' this.lastSyncedTxid = this.txidToSync; asyncNotifier.setFlushedTxid(this.lastSyncedTxid); // 4. check and do logRoll if needed boolean lowReplication = false; if (rollWriterLock.tryLock()) { try { lowReplication = checkLowReplication(); } finally { rollWriterLock.unlock(); } try { if (lowReplication || writer != null && writer.getLength() > logrollsize) { requestLogRoll(lowReplication); } } catch (IOException e) { LOG.warn("writer.getLength() failed,this failure won't block here"); } } } } catch (InterruptedException e) { LOG.debug(getName() + " interrupted while waiting for " + "notification from AsyncWriter thread"); } catch (Exception e) { LOG.error("UNEXPECTED", e); } finally { LOG.info(getName() + " exiting"); } } } // thread to notify all write handler threads which are pending on // their written WALEdits' durability(sync) // why an extra 'notifier' thread is needed rather than letting // AsyncSyncer thread itself notifies when sync is done is to let // AsyncSyncer thread do next sync as soon as possible since 'notify' // has heavy synchronization with all pending write handler threads private class AsyncNotifier extends HasThread { private long flushedTxid = 0; private long lastNotifiedTxid = 0; private Object notifyLock = new Object(); public AsyncNotifier(String name) { super(name); } public void setFlushedTxid(long txid) { synchronized (this.notifyLock) { if (txid <= this.flushedTxid) { return; } this.flushedTxid = txid; this.notifyLock.notify(); } } public void run() { try { while (!this.isInterrupted()) { synchronized (this.notifyLock) { while (this.flushedTxid <= this.lastNotifiedTxid) { this.notifyLock.wait(); } this.lastNotifiedTxid = this.flushedTxid; } // notify(wake-up) all pending (write) handler thread // (or logroller thread which also may pend on sync()) synchronized (syncedTillHere) { syncedTillHere.set(this.lastNotifiedTxid); syncedTillHere.notifyAll(); } } } catch (InterruptedException e) { LOG.debug(getName() + " interrupted while waiting for " + " notification from AsyncSyncer thread"); } catch (Exception e) { LOG.error("UNEXPECTED", e); } finally { LOG.info(getName() + " exiting"); } } } // sync all known transactions private void syncer() throws IOException { syncer(this.unflushedEntries.get()); // sync all pending items } // sync all transactions upto the specified txid private void syncer(long txid) throws IOException { synchronized (this.syncedTillHere) { while (this.syncedTillHere.get() < txid) { try { this.syncedTillHere.wait(); } catch (InterruptedException e) { LOG.debug("interrupted while waiting for notification from AsyncNotifier"); } } } if (txid <= this.failedTxid.get()) { assert asyncIOE != null : "current txid is among(under) failed txids, but asyncIOE is null!"; throw asyncIOE; } } @Override public void postSync() {} @Override public void postAppend(List entries) {} /* * @return whether log roll should be requested */ private boolean checkLowReplication() { boolean logRollNeeded = false; // if the number of replicas in HDFS has fallen below the configured // value, then roll logs. try { int numCurrentReplicas = getLogReplication(); if (numCurrentReplicas != 0 && numCurrentReplicas < this.minTolerableReplication) { if (this.lowReplicationRollEnabled) { if (this.consecutiveLogRolls.get() < this.lowReplicationRollLimit) { LOG.warn("HDFS pipeline error detected. " + "Found " + numCurrentReplicas + " replicas but expecting no less than " + this.minTolerableReplication + " replicas. " + " Requesting close of hlog. current pipeline: " + Arrays.toString(getPipeLine())); logRollNeeded = true; // If rollWriter is requested, increase consecutiveLogRolls. Once it // is larger than lowReplicationRollLimit, disable the // LowReplication-Roller this.consecutiveLogRolls.getAndIncrement(); } else { LOG.warn("Too many consecutive RollWriter requests, it's a sign of " + "the total number of live datanodes is lower than the tolerable replicas."); this.consecutiveLogRolls.set(0); this.lowReplicationRollEnabled = false; } } } else if (numCurrentReplicas >= this.minTolerableReplication) { if (!this.lowReplicationRollEnabled) { // The new writer's log replicas is always the default value. // So we should not enable LowReplication-Roller. If numEntries // is lower than or equals 1, we consider it as a new writer. if (this.numEntries.get() <= 1) { return logRollNeeded; } // Once the live datanode number and the replicas return to normal, // enable the LowReplication-Roller. this.lowReplicationRollEnabled = true; LOG.info("LowReplication-Roller was enabled."); } } } catch (Exception e) { LOG.warn("Unable to invoke DFSOutputStream.getNumCurrentReplicas" + e + " still proceeding ahead..."); } return logRollNeeded; } /** * This method gets the datanode replication count for the current HLog. * * If the pipeline isn't started yet or is empty, you will get the default * replication factor. Therefore, if this function returns 0, it means you * are not properly running with the HDFS-826 patch. * @throws InvocationTargetException * @throws IllegalAccessException * @throws IllegalArgumentException * * @throws Exception */ int getLogReplication() throws IllegalArgumentException, IllegalAccessException, InvocationTargetException { if (this.getNumCurrentReplicas != null && this.hdfs_out != null) { Object repl = this.getNumCurrentReplicas.invoke(getOutputStream(), NO_ARGS); if (repl instanceof Integer) { return ((Integer)repl).intValue(); } } return 0; } boolean canGetCurReplicas() { return this.getNumCurrentReplicas != null; } @Override public void hsync() throws IOException { syncer(); } @Override public void hflush() throws IOException { syncer(); } @Override public void sync() throws IOException { syncer(); } @Override public void sync(long txid) throws IOException { syncer(txid); } private void requestLogRoll() { requestLogRoll(false); } private void requestLogRoll(boolean tooFewReplicas) { if (!this.listeners.isEmpty()) { for (WALActionsListener i: this.listeners) { i.logRollRequested(tooFewReplicas); } } } // TODO: Remove info. Unused. protected void doWrite(HRegionInfo info, HLogKey logKey, WALEdit logEdit, HTableDescriptor htd) throws IOException { if (!this.enabled) { return; } if (!this.listeners.isEmpty()) { for (WALActionsListener i: this.listeners) { i.visitLogEntryBeforeWrite(htd, logKey, logEdit); } } try { long now = EnvironmentEdgeManager.currentTimeMillis(); // coprocessor hook: if (!coprocessorHost.preWALWrite(info, logKey, logEdit)) { if (logEdit.isReplay()) { // set replication scope null so that this won't be replicated logKey.setScopes(null); } // write to our buffer for the Hlog file. this.pendingWrites.add(new HLog.Entry(logKey, logEdit)); } long took = EnvironmentEdgeManager.currentTimeMillis() - now; coprocessorHost.postWALWrite(info, logKey, logEdit); long len = 0; for (KeyValue kv : logEdit.getKeyValues()) { len += kv.getLength(); } this.metrics.finishAppend(took, len); } catch (IOException e) { LOG.warn("Could not append. Requesting close of hlog", e); requestLogRoll(); throw e; } } /** @return How many items have been added to the log */ int getNumEntries() { return numEntries.get(); } /** @return the number of rolled log files */ public int getNumRolledLogFiles() { return hlogSequenceNums.size(); } /** @return the number of log files in use */ @Override public int getNumLogFiles() { // +1 for current use log return getNumRolledLogFiles() + 1; } /** @return the size of log files in use */ @Override public long getLogFileSize() { return totalLogSize.get() + curLogSize; } @Override public boolean startCacheFlush(final byte[] encodedRegionName) { Long oldRegionSeqNum = null; if (!closeBarrier.beginOp()) { LOG.info("Flush will not be started for " + Bytes.toString(encodedRegionName) + " - because the server is closing."); return false; } synchronized (oldestSeqNumsLock) { oldRegionSeqNum = this.oldestUnflushedSeqNums.remove(encodedRegionName); if (oldRegionSeqNum != null) { Long oldValue = this.oldestFlushingSeqNums.put(encodedRegionName, oldRegionSeqNum); assert oldValue == null : "Flushing map not cleaned up for " + Bytes.toString(encodedRegionName); } } if (oldRegionSeqNum == null) { // TODO: if we have no oldRegionSeqNum, and WAL is not disabled, presumably either // the region is already flushing (which would make this call invalid), or there // were no appends after last flush, so why are we starting flush? Maybe we should // assert not null, and switch to "long" everywhere. Less rigorous, but safer, // alternative is telling the caller to stop. For now preserve old logic. LOG.warn("Couldn't find oldest seqNum for the region we are about to flush: [" + Bytes.toString(encodedRegionName) + "]"); } return true; } @Override public void completeCacheFlush(final byte [] encodedRegionName) { synchronized (oldestSeqNumsLock) { this.oldestFlushingSeqNums.remove(encodedRegionName); } closeBarrier.endOp(); } @Override public void abortCacheFlush(byte[] encodedRegionName) { Long currentSeqNum = null, seqNumBeforeFlushStarts = null; synchronized (oldestSeqNumsLock) { seqNumBeforeFlushStarts = this.oldestFlushingSeqNums.remove(encodedRegionName); if (seqNumBeforeFlushStarts != null) { currentSeqNum = this.oldestUnflushedSeqNums.put(encodedRegionName, seqNumBeforeFlushStarts); } } closeBarrier.endOp(); if ((currentSeqNum != null) && (currentSeqNum.longValue() <= seqNumBeforeFlushStarts.longValue())) { String errorStr = "Region " + Bytes.toString(encodedRegionName) + "acquired edits out of order current memstore seq=" + currentSeqNum + ", previous oldest unflushed id=" + seqNumBeforeFlushStarts; LOG.error(errorStr); assert false : errorStr; Runtime.getRuntime().halt(1); } } @Override public boolean isLowReplicationRollEnabled() { return lowReplicationRollEnabled; } /** * Get the directory we are making logs in. * * @return dir */ protected Path getDir() { return dir; } static Path getHLogArchivePath(Path oldLogDir, Path p) { return new Path(oldLogDir, p.getName()); } static String formatRecoveredEditsFileName(final long seqid) { return String.format("%019d", seqid); } public static final long FIXED_OVERHEAD = ClassSize.align( ClassSize.OBJECT + (5 * ClassSize.REFERENCE) + ClassSize.ATOMIC_INTEGER + Bytes.SIZEOF_INT + (3 * Bytes.SIZEOF_LONG)); private static void usage() { System.err.println("Usage: HLog "); System.err.println("Arguments:"); System.err.println(" --dump Dump textual representation of passed one or more files"); System.err.println(" For example: HLog --dump hdfs://example.com:9000/hbase/.logs/MACHINE/LOGFILE"); System.err.println(" --split Split the passed directory of WAL logs"); System.err.println(" For example: HLog --split hdfs://example.com:9000/hbase/.logs/DIR"); } private static void split(final Configuration conf, final Path p) throws IOException { FileSystem fs = FileSystem.get(conf); if (!fs.exists(p)) { throw new FileNotFoundException(p.toString()); } if (!fs.getFileStatus(p).isDir()) { throw new IOException(p + " is not a directory"); } final Path baseDir = FSUtils.getRootDir(conf); final Path oldLogDir = new Path(baseDir, HConstants.HREGION_OLDLOGDIR_NAME); HLogSplitter.split(baseDir, p, oldLogDir, fs, conf); } @Override public WALCoprocessorHost getCoprocessorHost() { return coprocessorHost; } /** Provide access to currently deferred sequence num for tests */ boolean hasUnSyncedEntries() { return this.lastUnSyncedTxid > this.syncedTillHere.get(); } @Override public long getEarliestMemstoreSeqNum(byte[] encodedRegionName) { Long result = oldestUnflushedSeqNums.get(encodedRegionName); return result == null ? HConstants.NO_SEQNUM : result.longValue(); } /** * Pass one or more log file names and it will either dump out a text version * on stdout or split the specified log files. * * @param args * @throws IOException */ public static void main(String[] args) throws IOException { if (args.length < 2) { usage(); System.exit(-1); } // either dump using the HLogPrettyPrinter or split, depending on args if (args[0].compareTo("--dump") == 0) { HLogPrettyPrinter.run(Arrays.copyOfRange(args, 1, args.length)); } else if (args[0].compareTo("--split") == 0) { Configuration conf = HBaseConfiguration.create(); for (int i = 1; i < args.length; i++) { try { Path logPath = new Path(args[i]); FSUtils.setFsDefault(conf, logPath); split(conf, logPath); } catch (Throwable t) { t.printStackTrace(System.err); System.exit(-1); } } } else { usage(); System.exit(-1); } } /** * Find the 'getPipeline' on the passed os stream. * @return Method or null. */ private Method getGetPipeline(final FSDataOutputStream os) { Method m = null; if (os != null) { Class wrappedStreamClass = os.getWrappedStream() .getClass(); try { m = wrappedStreamClass.getDeclaredMethod("getPipeline", new Class[] {}); m.setAccessible(true); } catch (NoSuchMethodException e) { LOG.info("FileSystem's output stream doesn't support" + " getPipeline; not available; fsOut=" + wrappedStreamClass.getName()); } catch (SecurityException e) { LOG.info( "Doesn't have access to getPipeline on " + "FileSystems's output stream ; fsOut=" + wrappedStreamClass.getName(), e); m = null; // could happen on setAccessible() } } return m; } /** * This method gets the pipeline for the current HLog. * @return */ DatanodeInfo[] getPipeLine() { if (this.getPipeLine != null && this.hdfs_out != null) { Object repl; try { repl = this.getPipeLine.invoke(getOutputStream(), NO_ARGS); if (repl instanceof DatanodeInfo[]) { return ((DatanodeInfo[]) repl); } } catch (Exception e) { LOG.info("Get pipeline failed", e); } } return new DatanodeInfo[0]; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy