org.apache.hadoop.hbase.backup.impl.IncrementalBackupManager Maven / Gradle / Ivy
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.backup.impl;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.backup.impl.BackupSystemTable.WALItem;
import org.apache.hadoop.hbase.backup.master.LogRollMasterProcedureManager;
import org.apache.hadoop.hbase.backup.util.BackupUtils;
import org.apache.yetus.audience.InterfaceAudience;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.util.FSUtils;
import org.apache.hadoop.hbase.wal.AbstractFSWALProvider;
/**
* After a full backup was created, the incremental backup will only store the changes made after
* the last full or incremental backup. Creating the backup copies the logfiles in .logs and
* .oldlogs since the last backup timestamp.
*/
@InterfaceAudience.Private
public class IncrementalBackupManager extends BackupManager {
public static final Log LOG = LogFactory.getLog(IncrementalBackupManager.class);
public IncrementalBackupManager(Connection conn, Configuration conf) throws IOException {
super(conn, conf);
}
/**
* Obtain the list of logs that need to be copied out for this incremental backup. The list is set
* in BackupInfo.
* @return The new HashMap of RS log time stamps after the log roll for this incremental backup.
* @throws IOException exception
*/
public HashMap getIncrBackupLogFileMap()
throws IOException {
List logList;
HashMap newTimestamps;
HashMap previousTimestampMins;
String savedStartCode = readBackupStartCode();
// key: tableName
// value:
HashMap> previousTimestampMap = readLogTimestampMap();
previousTimestampMins = BackupUtils.getRSLogTimestampMins(previousTimestampMap);
if (LOG.isDebugEnabled()) {
LOG.debug("StartCode " + savedStartCode + "for backupID " + backupInfo.getBackupId());
}
// get all new log files from .logs and .oldlogs after last TS and before new timestamp
if (savedStartCode == null || previousTimestampMins == null
|| previousTimestampMins.isEmpty()) {
throw new IOException(
"Cannot read any previous back up timestamps from backup system table. "
+ "In order to create an incremental backup, at least one full backup is needed.");
}
LOG.info("Execute roll log procedure for incremental backup ...");
HashMap props = new HashMap();
props.put("backupRoot", backupInfo.getBackupRootDir());
try (Admin admin = conn.getAdmin();) {
admin.execProcedure(LogRollMasterProcedureManager.ROLLLOG_PROCEDURE_SIGNATURE,
LogRollMasterProcedureManager.ROLLLOG_PROCEDURE_NAME, props);
}
newTimestamps = readRegionServerLastLogRollResult();
logList = getLogFilesForNewBackup(previousTimestampMins, newTimestamps, conf, savedStartCode);
List logFromSystemTable =
getLogFilesFromBackupSystem(previousTimestampMins, newTimestamps, getBackupInfo()
.getBackupRootDir());
logList = excludeAlreadyBackedUpWALs(logList, logFromSystemTable);
backupInfo.setIncrBackupFileList(logList);
return newTimestamps;
}
/**
* Get list of WAL files eligible for incremental backup
* @return list of WAL files
* @throws IOException
*/
public List getIncrBackupLogFileList()
throws IOException {
List logList;
HashMap newTimestamps;
HashMap previousTimestampMins;
String savedStartCode = readBackupStartCode();
// key: tableName
// value:
HashMap> previousTimestampMap = readLogTimestampMap();
previousTimestampMins = BackupUtils.getRSLogTimestampMins(previousTimestampMap);
if (LOG.isDebugEnabled()) {
LOG.debug("StartCode " + savedStartCode + "for backupID " + backupInfo.getBackupId());
}
// get all new log files from .logs and .oldlogs after last TS and before new timestamp
if (savedStartCode == null || previousTimestampMins == null
|| previousTimestampMins.isEmpty()) {
throw new IOException(
"Cannot read any previous back up timestamps from backup system table. "
+ "In order to create an incremental backup, at least one full backup is needed.");
}
newTimestamps = readRegionServerLastLogRollResult();
logList = getLogFilesForNewBackup(previousTimestampMins, newTimestamps, conf, savedStartCode);
List logFromSystemTable =
getLogFilesFromBackupSystem(previousTimestampMins, newTimestamps, getBackupInfo()
.getBackupRootDir());
logList = excludeAlreadyBackedUpWALs(logList, logFromSystemTable);
backupInfo.setIncrBackupFileList(logList);
return logList;
}
private List excludeAlreadyBackedUpWALs(List logList,
List logFromSystemTable) {
Set walFileNameSet = convertToSet(logFromSystemTable);
List list = new ArrayList();
for (int i=0; i < logList.size(); i++) {
Path p = new Path(logList.get(i));
String name = p.getName();
if (walFileNameSet.contains(name)) continue;
list.add(logList.get(i));
}
return list;
}
/**
* Create Set of WAL file names (not full path names)
* @param logFromSystemTable
* @return set of WAL file names
*/
private Set convertToSet(List logFromSystemTable) {
Set set = new HashSet();
for (int i=0; i < logFromSystemTable.size(); i++) {
WALItem item = logFromSystemTable.get(i);
set.add(item.walFile);
}
return set;
}
/**
* For each region server: get all log files newer than the last timestamps, but not newer than
* the newest timestamps.
* @param olderTimestamps timestamp map for each region server of the last backup.
* @param newestTimestamps timestamp map for each region server that the backup should lead to.
* @return list of log files which needs to be added to this backup
* @throws IOException
*/
private List getLogFilesFromBackupSystem(HashMap olderTimestamps,
HashMap newestTimestamps, String backupRoot) throws IOException {
List logFiles = new ArrayList();
Iterator it = getWALFilesFromBackupSystem();
while (it.hasNext()) {
WALItem item = it.next();
String rootDir = item.getBackupRoot();
if (!rootDir.equals(backupRoot)) {
continue;
}
String walFileName = item.getWalFile();
String server = BackupUtils.parseHostNameFromLogFile(new Path(walFileName));
if (server == null) {
continue;
}
Long tss = getTimestamp(walFileName);
Long oldTss = olderTimestamps.get(server);
Long newTss = newestTimestamps.get(server);
if (oldTss == null) {
logFiles.add(item);
continue;
}
if (newTss == null) {
newTss = Long.MAX_VALUE;
}
if (tss > oldTss && tss < newTss) {
logFiles.add(item);
}
}
return logFiles;
}
private Long getTimestamp(String walFileName) {
int index = walFileName.lastIndexOf(BackupUtils.LOGNAME_SEPARATOR);
return Long.parseLong(walFileName.substring(index + 1));
}
/**
* For each region server: get all log files newer than the last timestamps but not newer than the
* newest timestamps.
* @param olderTimestamps the timestamp for each region server of the last backup.
* @param newestTimestamps the timestamp for each region server that the backup should lead to.
* @param conf the Hadoop and Hbase configuration
* @param savedStartCode the startcode (timestamp) of last successful backup.
* @return a list of log files to be backed up
* @throws IOException exception
*/
private List getLogFilesForNewBackup(HashMap olderTimestamps,
HashMap newestTimestamps, Configuration conf, String savedStartCode)
throws IOException {
LOG.debug("In getLogFilesForNewBackup()\n" + "olderTimestamps: " + olderTimestamps
+ "\n newestTimestamps: " + newestTimestamps);
Path rootdir = FSUtils.getRootDir(conf);
Path logDir = new Path(rootdir, HConstants.HREGION_LOGDIR_NAME);
Path oldLogDir = new Path(rootdir, HConstants.HREGION_OLDLOGDIR_NAME);
FileSystem fs = rootdir.getFileSystem(conf);
NewestLogFilter pathFilter = new NewestLogFilter();
List resultLogFiles = new ArrayList();
List newestLogs = new ArrayList();
/*
* The old region servers and timestamps info we kept in backup system table may be out of sync
* if new region server is added or existing one lost. We'll deal with it here when processing
* the logs. If data in backup system table has more hosts, just ignore it. If the .logs
* directory includes more hosts, the additional hosts will not have old timestamps to compare
* with. We'll just use all the logs in that directory. We always write up-to-date region server
* and timestamp info to backup system table at the end of successful backup.
*/
FileStatus[] rss;
Path p;
String host;
Long oldTimeStamp;
String currentLogFile;
long currentLogTS;
// Get the files in .logs.
rss = fs.listStatus(logDir);
for (FileStatus rs : rss) {
p = rs.getPath();
host = BackupUtils.parseHostNameFromLogFile(p);
if (host == null) {
continue;
}
FileStatus[] logs;
oldTimeStamp = olderTimestamps.get(host);
// It is possible that there is no old timestamp in backup system table for this host if
// this region server is newly added after our last backup.
if (oldTimeStamp == null) {
logs = fs.listStatus(p);
} else {
pathFilter.setLastBackupTS(oldTimeStamp);
logs = fs.listStatus(p, pathFilter);
}
for (FileStatus log : logs) {
LOG.debug("currentLogFile: " + log.getPath().toString());
if (AbstractFSWALProvider.isMetaFile(log.getPath())) {
if (LOG.isDebugEnabled()) {
LOG.debug("Skip hbase:meta log file: " + log.getPath().getName());
}
continue;
}
currentLogFile = log.getPath().toString();
resultLogFiles.add(currentLogFile);
currentLogTS = BackupUtils.getCreationTime(log.getPath());
// newestTimestamps is up-to-date with the current list of hosts
// so newestTimestamps.get(host) will not be null.
if (currentLogTS > newestTimestamps.get(host)) {
newestLogs.add(currentLogFile);
}
}
}
// Include the .oldlogs files too.
FileStatus[] oldlogs = fs.listStatus(oldLogDir);
for (FileStatus oldlog : oldlogs) {
p = oldlog.getPath();
currentLogFile = p.toString();
if (AbstractFSWALProvider.isMetaFile(p)) {
if (LOG.isDebugEnabled()) {
LOG.debug("Skip .meta log file: " + currentLogFile);
}
continue;
}
host = BackupUtils.parseHostFromOldLog(p);
if (host == null) {
continue;
}
currentLogTS = BackupUtils.getCreationTime(p);
oldTimeStamp = olderTimestamps.get(host);
/*
* It is possible that there is no old timestamp in backup system table for this host. At the
* time of our last backup operation, this rs did not exist. The reason can be one of the two:
* 1. The rs already left/crashed. Its logs were moved to .oldlogs. 2. The rs was added after
* our last backup.
*/
if (oldTimeStamp == null) {
if (currentLogTS < Long.parseLong(savedStartCode)) {
// This log file is really old, its region server was before our last backup.
continue;
} else {
resultLogFiles.add(currentLogFile);
}
} else if (currentLogTS > oldTimeStamp) {
resultLogFiles.add(currentLogFile);
}
// It is possible that a host in .oldlogs is an obsolete region server
// so newestTimestamps.get(host) here can be null.
// Even if these logs belong to a obsolete region server, we still need
// to include they to avoid loss of edits for backup.
Long newTimestamp = newestTimestamps.get(host);
if (newTimestamp != null && currentLogTS > newTimestamp) {
newestLogs.add(currentLogFile);
}
}
// remove newest log per host because they are still in use
resultLogFiles.removeAll(newestLogs);
return resultLogFiles;
}
static class NewestLogFilter implements PathFilter {
private Long lastBackupTS = 0L;
public NewestLogFilter() {
}
protected void setLastBackupTS(Long ts) {
this.lastBackupTS = ts;
}
@Override
public boolean accept(Path path) {
// skip meta table log -- ts.meta file
if (AbstractFSWALProvider.isMetaFile(path)) {
if (LOG.isDebugEnabled()) {
LOG.debug("Skip .meta log file: " + path.getName());
}
return false;
}
long timestamp;
try {
timestamp = BackupUtils.getCreationTime(path);
return timestamp > lastBackupTS;
} catch (Exception e) {
LOG.warn("Cannot read timestamp of log file " + path);
return false;
}
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy