All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hudi.org.apache.hadoop.hbase.io.HFileLink Maven / Gradle / Ivy

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hbase.io;

import java.io.IOException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.RegionInfo;
import org.apache.hadoop.hbase.client.RegionInfoBuilder;
import org.apache.hadoop.hbase.mob.MobConstants;
import org.apache.hadoop.hbase.regionserver.HRegion;
import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
import org.apache.hadoop.hbase.util.CommonFSUtils;
import org.apache.hadoop.hbase.util.HFileArchiveUtil;
import org.apache.hadoop.hbase.util.Pair;
import org.apache.yetus.audience.InterfaceAudience;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * HFileLink describes a link to an hfile.
 *
 * An hfile can be served from a region or from the hfile archive directory (/hbase/.archive)
 * HFileLink allows to access the referenced hfile regardless of the location where it is.
 *
 * 

Searches for hfiles in the following order and locations: *

    *
  • /hbase/table/region/cf/hfile
  • *
  • /hbase/.archive/table/region/cf/hfile
  • *
* * The link checks first in the original path if it is not present * it fallbacks to the archived path. */ @InterfaceAudience.Private @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="EQ_DOESNT_OVERRIDE_EQUALS", justification="To be fixed but warning suppressed for now") public class HFileLink extends FileLink { private static final Logger LOG = LoggerFactory.getLogger(HFileLink.class); /** * A non-capture group, for HFileLink, so that this can be embedded. * The HFileLink describe a link to an hfile in a different table/region * and the name is in the form: table=region-hfile. *

* Table name is ([\p{IsAlphabetic}\p{Digit}][\p{IsAlphabetic}\p{Digit}.-]*), so '=' is an invalid * character for the table name. * Region name is ([a-f0-9]+), so '-' is an invalid character for the region name. * HFile is ([0-9a-f]+(?:_SeqId_[0-9]+_)?) covering the plain hfiles (uuid) * and the bulk loaded (_SeqId_[0-9]+_) hfiles. * *

Here is an example name: /hbase/test/0123/cf/testtb=4567-abcd where 'testtb' is table name * and '4567' is region name and 'abcd' is filename. */ public static final String LINK_NAME_REGEX = String.format("(?:(?:%s=)?)%s=%s-%s", TableName.VALID_NAMESPACE_REGEX, TableName.VALID_TABLE_QUALIFIER_REGEX, RegionInfoBuilder.ENCODED_REGION_NAME_REGEX, StoreFileInfo.HFILE_NAME_REGEX); /** Define the HFile Link name parser in the form of: table=region-hfile */ public static final Pattern LINK_NAME_PATTERN = Pattern.compile(String.format("^(?:(%s)(?:\\=))?(%s)=(%s)-(%s)$", TableName.VALID_NAMESPACE_REGEX, TableName.VALID_TABLE_QUALIFIER_REGEX, RegionInfoBuilder.ENCODED_REGION_NAME_REGEX, StoreFileInfo.HFILE_NAME_REGEX)); /** * The pattern should be used for hfile and reference links * that can be found in /hbase/table/region/family/ */ private static final Pattern REF_OR_HFILE_LINK_PATTERN = Pattern.compile(String.format("^(?:(%s)(?:=))?(%s)=(%s)-(.+)$", TableName.VALID_NAMESPACE_REGEX, TableName.VALID_TABLE_QUALIFIER_REGEX, RegionInfoBuilder.ENCODED_REGION_NAME_REGEX)); private final Path archivePath; private final Path originPath; private final Path mobPath; private final Path tempPath; /** * Dead simple hfile link constructor */ public HFileLink(final Path originPath, final Path tempPath, final Path mobPath, final Path archivePath) { this.tempPath = tempPath; this.originPath = originPath; this.mobPath = mobPath; this.archivePath = archivePath; setLocations(originPath, tempPath, mobPath, archivePath); } /** * @param conf {@link Configuration} from which to extract specific archive locations * @param hFileLinkPattern The path ending with a HFileLink pattern. (table=region-hfile) * @throws IOException on unexpected error. */ public static final HFileLink buildFromHFileLinkPattern(Configuration conf, Path hFileLinkPattern) throws IOException { return buildFromHFileLinkPattern(CommonFSUtils.getRootDir(conf), HFileArchiveUtil.getArchivePath(conf), hFileLinkPattern); } /** * @param rootDir Path to the root directory where hbase files are stored * @param archiveDir Path to the hbase archive directory * @param hFileLinkPattern The path of the HFile Link. */ public final static HFileLink buildFromHFileLinkPattern(final Path rootDir, final Path archiveDir, final Path hFileLinkPattern) { Path hfilePath = getHFileLinkPatternRelativePath(hFileLinkPattern); Path tempPath = new Path(new Path(rootDir, HConstants.HBASE_TEMP_DIRECTORY), hfilePath); Path originPath = new Path(rootDir, hfilePath); Path mobPath = new Path(new Path(rootDir, MobConstants.MOB_DIR_NAME), hfilePath); Path archivePath = new Path(archiveDir, hfilePath); return new HFileLink(originPath, tempPath, mobPath, archivePath); } /** * Create an HFileLink relative path for the table/region/family/hfile location * @param table Table name * @param region Region Name * @param family Family Name * @param hfile HFile Name * @return the relative Path to open the specified table/region/family/hfile link */ public static Path createPath(final TableName table, final String region, final String family, final String hfile) { if (HFileLink.isHFileLink(hfile)) { return new Path(family, hfile); } return new Path(family, HFileLink.createHFileLinkName(table, region, hfile)); } /** * Create an HFileLink instance from table/region/family/hfile location * @param conf {@link Configuration} from which to extract specific archive locations * @param table Table name * @param region Region Name * @param family Family Name * @param hfile HFile Name * @return Link to the file with the specified table/region/family/hfile location * @throws IOException on unexpected error. */ public static HFileLink build(final Configuration conf, final TableName table, final String region, final String family, final String hfile) throws IOException { return HFileLink.buildFromHFileLinkPattern(conf, createPath(table, region, family, hfile)); } /** * @return the origin path of the hfile. */ public Path getOriginPath() { return this.originPath; } /** * @return the path of the archived hfile. */ public Path getArchivePath() { return this.archivePath; } /** * @return the path of the mob hfiles. */ public Path getMobPath() { return this.mobPath; } /** * @param path Path to check. * @return True if the path is a HFileLink. */ public static boolean isHFileLink(final Path path) { return isHFileLink(path.getName()); } /** * @param fileName File name to check. * @return True if the path is a HFileLink. */ public static boolean isHFileLink(String fileName) { Matcher m = LINK_NAME_PATTERN.matcher(fileName); if (!m.matches()) { return false; } return m.groupCount() > 2 && m.group(4) != null && m.group(3) != null && m.group(2) != null; } /** * Convert a HFileLink path to a table relative path. * e.g. the link: /hbase/test/0123/cf/testtb=4567-abcd * becomes: /hbase/testtb/4567/cf/abcd * * @param path HFileLink path * @return Relative table path * @throws IOException on unexpected error. */ private static Path getHFileLinkPatternRelativePath(final Path path) { // table=region-hfile Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(path.getName()); if (!m.matches()) { throw new IllegalArgumentException(path.getName() + " is not a valid HFileLink pattern!"); } // Convert the HFileLink name into a real table/region/cf/hfile path. TableName tableName = TableName.valueOf(m.group(1), m.group(2)); String regionName = m.group(3); String hfileName = m.group(4); String familyName = path.getParent().getName(); Path tableDir = CommonFSUtils.getTableDir(new Path("./"), tableName); return new Path(tableDir, new Path(regionName, new Path(familyName, hfileName))); } /** * Get the HFile name of the referenced link * * @param fileName HFileLink file name * @return the name of the referenced HFile */ public static String getReferencedHFileName(final String fileName) { Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(fileName); if (!m.matches()) { throw new IllegalArgumentException(fileName + " is not a valid HFileLink name!"); } return(m.group(4)); } /** * Get the Region name of the referenced link * * @param fileName HFileLink file name * @return the name of the referenced Region */ public static String getReferencedRegionName(final String fileName) { Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(fileName); if (!m.matches()) { throw new IllegalArgumentException(fileName + " is not a valid HFileLink name!"); } return(m.group(3)); } /** * Get the Table name of the referenced link * * @param fileName HFileLink file name * @return the name of the referenced Table */ public static TableName getReferencedTableName(final String fileName) { Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(fileName); if (!m.matches()) { throw new IllegalArgumentException(fileName + " is not a valid HFileLink name!"); } return(TableName.valueOf(m.group(1), m.group(2))); } /** * Create a new HFileLink name * * @param hfileRegionInfo - Linked HFile Region Info * @param hfileName - Linked HFile name * @return file name of the HFile Link */ public static String createHFileLinkName(final RegionInfo hfileRegionInfo, final String hfileName) { return createHFileLinkName(hfileRegionInfo.getTable(), hfileRegionInfo.getEncodedName(), hfileName); } /** * Create a new HFileLink name * * @param tableName - Linked HFile table name * @param regionName - Linked HFile region name * @param hfileName - Linked HFile name * @return file name of the HFile Link */ public static String createHFileLinkName(final TableName tableName, final String regionName, final String hfileName) { String s = String.format("%s=%s-%s", tableName.getNameAsString().replace(TableName.NAMESPACE_DELIM, '='), regionName, hfileName); return s; } /** * Create a new HFileLink * *

It also adds a back-reference to the hfile back-reference directory * to simplify the reference-count and the cleaning process. * * @param conf {@link Configuration} to read for the archive directory name * @param fs {@link FileSystem} on which to write the HFileLink * @param dstFamilyPath - Destination path (table/region/cf/) * @param hfileRegionInfo - Linked HFile Region Info * @param hfileName - Linked HFile name * @return true if the file is created, otherwise the file exists. * @throws IOException on file or parent directory creation failure */ public static boolean create(final Configuration conf, final FileSystem fs, final Path dstFamilyPath, final RegionInfo hfileRegionInfo, final String hfileName) throws IOException { return create(conf, fs, dstFamilyPath, hfileRegionInfo, hfileName, true); } /** * Create a new HFileLink * *

It also adds a back-reference to the hfile back-reference directory * to simplify the reference-count and the cleaning process. * * @param conf {@link Configuration} to read for the archive directory name * @param fs {@link FileSystem} on which to write the HFileLink * @param dstFamilyPath - Destination path (table/region/cf/) * @param hfileRegionInfo - Linked HFile Region Info * @param hfileName - Linked HFile name * @param createBackRef - Whether back reference should be created. Defaults to true. * @return true if the file is created, otherwise the file exists. * @throws IOException on file or parent directory creation failure */ public static boolean create(final Configuration conf, final FileSystem fs, final Path dstFamilyPath, final RegionInfo hfileRegionInfo, final String hfileName, final boolean createBackRef) throws IOException { TableName linkedTable = hfileRegionInfo.getTable(); String linkedRegion = hfileRegionInfo.getEncodedName(); return create(conf, fs, dstFamilyPath, linkedTable, linkedRegion, hfileName, createBackRef); } /** * Create a new HFileLink * *

It also adds a back-reference to the hfile back-reference directory * to simplify the reference-count and the cleaning process. * * @param conf {@link Configuration} to read for the archive directory name * @param fs {@link FileSystem} on which to write the HFileLink * @param dstFamilyPath - Destination path (table/region/cf/) * @param linkedTable - Linked Table Name * @param linkedRegion - Linked Region Name * @param hfileName - Linked HFile name * @return true if the file is created, otherwise the file exists. * @throws IOException on file or parent directory creation failure */ public static boolean create(final Configuration conf, final FileSystem fs, final Path dstFamilyPath, final TableName linkedTable, final String linkedRegion, final String hfileName) throws IOException { return create(conf, fs, dstFamilyPath, linkedTable, linkedRegion, hfileName, true); } /** * Create a new HFileLink * *

It also adds a back-reference to the hfile back-reference directory * to simplify the reference-count and the cleaning process. * * @param conf {@link Configuration} to read for the archive directory name * @param fs {@link FileSystem} on which to write the HFileLink * @param dstFamilyPath - Destination path (table/region/cf/) * @param linkedTable - Linked Table Name * @param linkedRegion - Linked Region Name * @param hfileName - Linked HFile name * @param createBackRef - Whether back reference should be created. Defaults to true. * @return true if the file is created, otherwise the file exists. * @throws IOException on file or parent directory creation failure */ public static boolean create(final Configuration conf, final FileSystem fs, final Path dstFamilyPath, final TableName linkedTable, final String linkedRegion, final String hfileName, final boolean createBackRef) throws IOException { String familyName = dstFamilyPath.getName(); String regionName = dstFamilyPath.getParent().getName(); String tableName = CommonFSUtils.getTableName(dstFamilyPath.getParent().getParent()) .getNameAsString(); return create(conf, fs, dstFamilyPath, familyName, tableName, regionName, linkedTable, linkedRegion, hfileName, createBackRef); } /** * Create a new HFileLink * *

It also adds a back-reference to the hfile back-reference directory * to simplify the reference-count and the cleaning process. * @param conf {@link Configuration} to read for the archive directory name * @param fs {@link FileSystem} on which to write the HFileLink * @param dstFamilyPath - Destination path (table/region/cf/) * @param dstTableName - Destination table name * @param dstRegionName - Destination region name * @param linkedTable - Linked Table Name * @param linkedRegion - Linked Region Name * @param hfileName - Linked HFile name * @param createBackRef - Whether back reference should be created. Defaults to true. * @return true if the file is created, otherwise the file exists. * @throws IOException on file or parent directory creation failure */ public static boolean create(final Configuration conf, final FileSystem fs, final Path dstFamilyPath, final String familyName, final String dstTableName, final String dstRegionName, final TableName linkedTable, final String linkedRegion, final String hfileName, final boolean createBackRef) throws IOException { String name = createHFileLinkName(linkedTable, linkedRegion, hfileName); String refName = createBackReferenceName(dstTableName, dstRegionName); // Make sure the destination directory exists fs.mkdirs(dstFamilyPath); // Make sure the FileLink reference directory exists Path archiveStoreDir = HFileArchiveUtil.getStoreArchivePath(conf, linkedTable, linkedRegion, familyName); Path backRefPath = null; if (createBackRef) { Path backRefssDir = getBackReferencesDir(archiveStoreDir, hfileName); fs.mkdirs(backRefssDir); // Create the reference for the link backRefPath = new Path(backRefssDir, refName); fs.createNewFile(backRefPath); } try { // Create the link return fs.createNewFile(new Path(dstFamilyPath, name)); } catch (IOException e) { LOG.error("couldn't create the link=" + name + " for " + dstFamilyPath, e); // Revert the reference if the link creation failed if (createBackRef) { fs.delete(backRefPath, false); } throw e; } } /** * Create a new HFileLink starting from a hfileLink name * *

It also adds a back-reference to the hfile back-reference directory * to simplify the reference-count and the cleaning process. * * @param conf {@link Configuration} to read for the archive directory name * @param fs {@link FileSystem} on which to write the HFileLink * @param dstFamilyPath - Destination path (table/region/cf/) * @param hfileLinkName - HFileLink name (it contains hfile-region-table) * @return true if the file is created, otherwise the file exists. * @throws IOException on file or parent directory creation failure */ public static boolean createFromHFileLink(final Configuration conf, final FileSystem fs, final Path dstFamilyPath, final String hfileLinkName) throws IOException { return createFromHFileLink(conf, fs, dstFamilyPath, hfileLinkName, true); } /** * Create a new HFileLink starting from a hfileLink name * *

It also adds a back-reference to the hfile back-reference directory * to simplify the reference-count and the cleaning process. * * @param conf {@link Configuration} to read for the archive directory name * @param fs {@link FileSystem} on which to write the HFileLink * @param dstFamilyPath - Destination path (table/region/cf/) * @param hfileLinkName - HFileLink name (it contains hfile-region-table) * @param createBackRef - Whether back reference should be created. Defaults to true. * @return true if the file is created, otherwise the file exists. * @throws IOException on file or parent directory creation failure */ public static boolean createFromHFileLink(final Configuration conf, final FileSystem fs, final Path dstFamilyPath, final String hfileLinkName, final boolean createBackRef) throws IOException { Matcher m = LINK_NAME_PATTERN.matcher(hfileLinkName); if (!m.matches()) { throw new IllegalArgumentException(hfileLinkName + " is not a valid HFileLink name!"); } return create(conf, fs, dstFamilyPath, TableName.valueOf(m.group(1), m.group(2)), m.group(3), m.group(4), createBackRef); } /** * Create the back reference name */ //package-private for testing static String createBackReferenceName(final String tableNameStr, final String regionName) { return regionName + "." + tableNameStr.replace(TableName.NAMESPACE_DELIM, '='); } /** * Get the full path of the HFile referenced by the back reference * * @param rootDir root hbase directory * @param linkRefPath Link Back Reference path * @return full path of the referenced hfile */ public static Path getHFileFromBackReference(final Path rootDir, final Path linkRefPath) { Pair p = parseBackReferenceName(linkRefPath.getName()); TableName linkTableName = p.getFirst(); String linkRegionName = p.getSecond(); String hfileName = getBackReferenceFileName(linkRefPath.getParent()); Path familyPath = linkRefPath.getParent().getParent(); Path regionPath = familyPath.getParent(); Path tablePath = regionPath.getParent(); String linkName = createHFileLinkName(CommonFSUtils.getTableName(tablePath), regionPath.getName(), hfileName); Path linkTableDir = CommonFSUtils.getTableDir(rootDir, linkTableName); Path regionDir = HRegion.getRegionDir(linkTableDir, linkRegionName); return new Path(new Path(regionDir, familyPath.getName()), linkName); } public static Pair parseBackReferenceName(String name) { int separatorIndex = name.indexOf('.'); String linkRegionName = name.substring(0, separatorIndex); String tableSubstr = name.substring(separatorIndex + 1) .replace('=', TableName.NAMESPACE_DELIM); TableName linkTableName = TableName.valueOf(tableSubstr); return new Pair<>(linkTableName, linkRegionName); } /** * Get the full path of the HFile referenced by the back reference * * @param conf {@link Configuration} to read for the archive directory name * @param linkRefPath Link Back Reference path * @return full path of the referenced hfile * @throws IOException on unexpected error. */ public static Path getHFileFromBackReference(final Configuration conf, final Path linkRefPath) throws IOException { return getHFileFromBackReference(CommonFSUtils.getRootDir(conf), linkRefPath); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy