hbase-webapps.master.hbck.jsp Maven / Gradle / Ivy
<%--
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
--%>
<%@ page contentType="text/html;charset=UTF-8"
import="java.time.Instant"
import="java.time.ZoneId"
import="java.util.List"
import="java.util.Map"
import="java.util.stream.Collectors"
import="java.time.ZonedDateTime"
import="java.time.format.DateTimeFormatter"
%>
<%@ page import="org.apache.hadoop.fs.Path" %>
<%@ page import="org.apache.hadoop.hbase.client.RegionInfo" %>
<%@ page import="org.apache.hadoop.hbase.master.hbck.HbckChore" %>
<%@ page import="org.apache.hadoop.hbase.master.hbck.HbckReport" %>
<%@ page import="org.apache.hadoop.hbase.master.HMaster" %>
<%@ page import="org.apache.hadoop.hbase.master.ServerManager" %>
<%@ page import="org.apache.hadoop.hbase.ServerName" %>
<%@ page import="org.apache.hadoop.hbase.util.Bytes" %>
<%@ page import="org.apache.hadoop.hbase.util.Pair" %>
<%@ page import="org.apache.hadoop.hbase.master.janitor.CatalogJanitor" %>
<%@ page import="org.apache.hadoop.hbase.master.janitor.CatalogJanitorReport" %>
<%@ page import="java.util.Optional" %>
<%@ page import="org.apache.hadoop.hbase.util.EnvironmentEdgeManager" %>
<%
final String cacheParameterValue = request.getParameter("cache");
final HMaster master = (HMaster) getServletContext().getAttribute(HMaster.MASTER);
pageContext.setAttribute("pageTitle", "HBase Master HBCK Report: " + master.getServerName());
if (!Boolean.parseBoolean(cacheParameterValue)) {
// Run the two reporters inline w/ drawing of the page. If exception, will show in page draw.
try {
master.getMasterRpcServices().runHbckChore(null, null);
} catch (org.apache.hbase.thirdparty.com.google.protobuf.ServiceException se) {
out.write("Failed generating a new hbck_chore report; using cache; try again or run hbck_chore_run in the shell: " + se.getMessage() + "\n");
}
try {
master.getMasterRpcServices().runCatalogScan(null, null);
} catch (org.apache.hbase.thirdparty.com.google.protobuf.ServiceException se) {
out.write("Failed generating a new catalogjanitor report; using cache; try again or run catalogjanitor_run in the shell: " + se.getMessage() + "\n");
}
}
HbckChore hbckChore = master.getHbckChore();
HbckReport hbckReport = hbckChore == null ? null : hbckChore.getLastReport();
String hbckReportStartTime = Optional.ofNullable(hbckReport)
.map(HbckReport::getCheckingStartTimestamp)
.map(start -> ZonedDateTime.ofInstant(start, ZoneId.systemDefault()))
.map(zdt -> zdt.format(DateTimeFormatter.ISO_OFFSET_DATE_TIME))
.orElse(null);
String hbckReportEndTime = Optional.ofNullable(hbckReport)
.map(HbckReport::getCheckingEndTimestamp)
.map(start -> ZonedDateTime.ofInstant(start, ZoneId.systemDefault()))
.map(zdt -> zdt.format(DateTimeFormatter.ISO_OFFSET_DATE_TIME))
.orElse(null);
CatalogJanitor cj = master.getCatalogJanitor();
CatalogJanitorReport cjReport = cj == null? null: cj.getLastReport();
final ServerManager serverManager = master.getServerManager();
%>
<% if (!master.isInitialized()) { %>
Master is not initialized
<% } else { %>
This page displays two reports: the HBCK Chore Report and
the CatalogJanitor Consistency Issues report. Only report titles
show if there are no problems to list. Note some conditions are
transitory as regions migrate. Reports are generated
when you invoke this page unless you add ?cache=true to the URL. Then
we display the reports cached from the last time the reports were run.
Reports are run by Chores that are hosted by the Master on a cadence.
You can also run them on demand from the hbase shell: invoke catalogjanitor_run
and/or hbck_chore_run.
ServerNames will be links if server is live, italic if dead, and plain if unknown.
HBCK Chore Report
<% if (hbckChore == null) { %>
HBCK chore has not yet initialized. Try again later.
<% } else if (hbckChore.isDisabled()) { %>
HBCK chore is currently disabled. Set hbase.master.hbck.chore.interval > 0 in the config & do a rolling-restart to enable it.
<% } else if (hbckReport == null) { %>
No Report created.
<% } else if (hbckReportStartTime != null && hbckReportEndTime == null) { %>
Checking started at <%= hbckReportStartTime %>. Please wait for checking to generate a new sub-report.
<% } else { %>
Checking started at <%= hbckReportStartTime %> and generated catalogJanitorReport at <%= hbckReportEndTime %>.
<% } %>
<% if (hbckReport != null && hbckReport.getInconsistentRegions().size() > 0) { %>
Inconsistent Regions
There are three cases: 1. Master thought this region opened, but no regionserver reported it (Fix: use assign
command); 2. Master thought this region opened on Server1, but regionserver reported Server2 (Fix:
need to check the server still exists. If not, schedule ServerCrashProcedure for it. If exists,
restart Server2 and Server1):
3. More than one regionserver reports opened this region (Fix: restart the RegionServers).
Note: the reported online regionservers may be not be up-to-date when there are regions in transition.
Region Name
Location in META
Reported Online RegionServers
<% for (Map.Entry>> entry : hbckReport.getInconsistentRegions().entrySet()) { %>
<%= entry.getKey() %>
<%= formatServerName(master, serverManager, entry.getValue().getFirst()) %>
<%= entry.getValue().getSecond().stream().map(s -> formatServerName(master, serverManager, s)).
collect(Collectors.joining(", ")) %>
<% } %>
<%= hbckReport.getInconsistentRegions().size() %> region(s) in set.
<% } %>
<% if (hbckReport != null && hbckReport.getOrphanRegionsOnRS().size() > 0) { %>
Orphan Regions on RegionServer
Region Name
Reported Online RegionServer
<% for (Map.Entry entry : hbckReport.getOrphanRegionsOnRS().entrySet()) { %>
<%= entry.getKey() %>
<%= formatServerName(master, serverManager, entry.getValue()) %>
<% } %>
<%= hbckReport.getOrphanRegionsOnRS().size() %> region(s) in set.
<% } %>
<% if (hbckReport != null && hbckReport.getOrphanRegionsOnFS().size() > 0) { %>
Orphan Regions on FileSystem
The below are Regions we've lost account of. To be safe, run bulk load of any data found under these Region orphan directories to have the
cluster re-adopt data.
First make sure hbase:meta is in a healthy state, that there are no holes, overlaps or inconsistencies (else bulk load may fail);
run hbck2 fixMeta. Once this is done, per Region below, run a bulk
load -- $ hbase completebulkload REGION_DIR_PATH TABLE_NAME -- and then delete the desiccated directory content (HFiles are removed upon
successful load; all that is left are empty directories and occasionally a seqid marking file).
Region Encoded Name
FileSystem Path
<% for (Map.Entry entry : hbckReport.getOrphanRegionsOnFS().entrySet()) { %>
<%= entry.getKey() %>
<%= entry.getValue() %>
<% } %>
<%= hbckReport.getOrphanRegionsOnFS().size() %> region(s) in set.
<% } %>
<%
Instant nowInstant = Instant.ofEpochMilli(EnvironmentEdgeManager.currentTime());
ZonedDateTime nowZdt = ZonedDateTime.ofInstant(nowInstant, ZoneId.systemDefault());
String iso8601Now = nowZdt.format(DateTimeFormatter.ISO_OFFSET_DATE_TIME);
String cjReportTime = Optional.ofNullable(cjReport)
.map(CatalogJanitorReport::getCreateTime)
.map(Instant::ofEpochMilli)
.map(start -> ZonedDateTime.ofInstant(start, ZoneId.systemDefault()))
.map(zdt -> zdt.format(DateTimeFormatter.ISO_OFFSET_DATE_TIME))
.orElse(null);
%>
CatalogJanitor hbase:meta Consistency Issues
<% if (cjReport != null) { %>
Report created: <%= cjReportTime %> (now=<%= iso8601Now %>).
<% } else { %>
No catalogJanitorReport created.
<% } %>
<% if (cjReport != null && !cjReport.isEmpty()) { %>
<% if (!cjReport.getHoles().isEmpty()) { %>
Holes
RegionInfo
RegionInfo
<% for (Pair p : cjReport.getHoles()) { %>
<%= p.getFirst().getRegionNameAsString() %>
<%= p.getSecond().getRegionNameAsString() %>
<% } %>
<%= cjReport.getHoles().size() %> hole(s).
<% } %>
<% if (!cjReport.getOverlaps().isEmpty()) { %>
Overlaps
Regions highlighted in blue are recently merged regions, HBase is still doing cleanup for them. Overlaps involving these regions cannot be fixed by hbck2 fixMeta at this moment.
Please wait some time, run catalogjanitor_run in hbase shell, refresh ‘HBCK Report’ page, make sure these regions are not highlighted to start the fix.
RegionInfo
Other RegionInfo
<% for (Pair p : cjReport.getOverlaps()) { %>
<% if (cjReport.getMergedRegions().containsKey(p.getFirst())) { %>
<%= p.getFirst().getRegionNameAsString() %>
<% } else { %>
<%= p.getFirst().getRegionNameAsString() %>
<% } %>
<% if (cjReport.getMergedRegions().containsKey(p.getSecond())) { %>
<%= p.getSecond().getRegionNameAsString() %>
<% } else { %>
<%= p.getSecond().getRegionNameAsString() %>
<% } %>
<% } %>
<%= cjReport.getOverlaps().size() %> overlap(s).
<% } %>
<% if (!cjReport.getUnknownServers().isEmpty()) { %>
Unknown Servers
The below are servers mentioned in the hbase:meta table that are no longer 'live' or known 'dead'.
The server likely belongs to an older cluster epoch since replaced by a new instance because of a restart/crash.
To clear 'Unknown Servers', run 'hbck2 scheduleRecoveries UNKNOWN_SERVERNAME'. This will schedule a ServerCrashProcedure.
It will clear out 'Unknown Server' references and schedule reassigns of any Regions that were associated with this host.
But first!, be sure the referenced Region is not currently stuck looping trying to OPEN. Does it show as a Region-In-Transition on the
Master home page? Is it mentioned in the 'Procedures and Locks' Procedures list? If so, perhaps it stuck in a loop
trying to OPEN but unable to because of a missing reference or file.
Read the Master log looking for the most recent
mentions of the associated Region name. Try and address any such complaint first. If successful, a side-effect
should be the clean up of the 'Unknown Servers' list. It may take a while. OPENs are retried forever but the interval
between retries grows. The 'Unknown Server' may be cleared because it is just the last RegionServer the Region was
successfully opened on; on the next open, the 'Unknown Server' will be purged.
RegionInfo
ServerName
<% for (Pair p: cjReport.getUnknownServers()) { %>
<%= p.getFirst().getRegionNameAsString() %>
<%= p.getSecond() %>
<% } %>
<%= cjReport.getUnknownServers().size() %> unknown servers(s).
<% } %>
<% if (!cjReport.getEmptyRegionInfo().isEmpty()) { %>
Empty info:regioninfo
Row
<% for (byte [] row: cjReport.getEmptyRegionInfo()) { %>
<%= Bytes.toStringBinary(row) %>
<% } %>
<%= cjReport.getEmptyRegionInfo().size() %> emptyRegionInfo(s).
<% } %>
<% } %>
<% } %>
<%!
/**
* Format serverName for display.
* If a live server reference, make it a link.
* If dead, make it italic.
* If unknown, make it plain.
*/
private static String formatServerName(HMaster master,
ServerManager serverManager, ServerName serverName) {
String sn = serverName.toString();
if (serverManager.isServerOnline(serverName)) {
int infoPort = master.getRegionServerInfoPort(serverName);
if (infoPort > 0) {
return "" + sn + "";
} else {
return "" + sn + "";
}
} else if (serverManager.isServerDead(serverName)) {
return "" + sn + "";
}
return sn;
}
%>