org.apache.hadoop.fs.viewfs.RegexMountPoint Maven / Gradle / Ivy
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.viewfs;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.util.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static org.apache.hadoop.fs.viewfs.InodeTree.SlashPath;
/**
* Regex mount point is build to implement regex based mount point.
*/
@InterfaceAudience.Private
@InterfaceStability.Unstable
class RegexMountPoint {
private static final Logger LOGGER =
LoggerFactory.getLogger(RegexMountPoint.class.getName());
private InodeTree inodeTree;
private String srcPathRegex;
private Pattern srcPattern;
private String dstPath;
private String interceptorSettingsString;
private List interceptorList;
public static final String SETTING_SRCREGEX_SEP = "#.";
public static final char INTERCEPTOR_SEP = ';';
public static final char INTERCEPTOR_INTERNAL_SEP = ':';
// ${var},$var
public static final Pattern VAR_PATTERN_IN_DEST =
Pattern.compile("\\$((\\{\\w+\\})|(\\w+))");
// Same var might have different representations.
// e.g.
// key => $key or key = > ${key}
private Map> varInDestPathMap;
public Map> getVarInDestPathMap() {
return varInDestPathMap;
}
RegexMountPoint(InodeTree inodeTree, String sourcePathRegex,
String destPath, String settingsStr) {
this.inodeTree = inodeTree;
this.srcPathRegex = sourcePathRegex;
this.dstPath = destPath;
this.interceptorSettingsString = settingsStr;
this.interceptorList = new ArrayList<>();
}
/**
* Initialize regex mount point.
*
* @throws IOException
*/
public void initialize() throws IOException {
try {
srcPattern = Pattern.compile(srcPathRegex);
} catch (PatternSyntaxException ex) {
throw new IOException(
"Failed to initialized mount point due to bad src path regex:"
+ srcPathRegex + ", dstPath:" + dstPath, ex);
}
varInDestPathMap = getVarListInString(dstPath);
initializeInterceptors();
}
private void initializeInterceptors() throws IOException {
if (interceptorSettingsString == null
|| interceptorSettingsString.isEmpty()) {
return;
}
String[] interceptorStrArray =
StringUtils.split(interceptorSettingsString, INTERCEPTOR_SEP);
for (String interceptorStr : interceptorStrArray) {
RegexMountPointInterceptor interceptor =
RegexMountPointInterceptorFactory.create(interceptorStr);
if (interceptor == null) {
throw new IOException(
"Illegal settings String " + interceptorSettingsString);
}
interceptor.initialize();
interceptorList.add(interceptor);
}
}
/**
* Get $var1 and $var2 style variables in string.
*
* @param input - the string to be process.
* @return
*/
public static Map> getVarListInString(String input) {
Map> varMap = new HashMap<>();
Matcher matcher = VAR_PATTERN_IN_DEST.matcher(input);
while (matcher.find()) {
// $var or ${var}
String varName = matcher.group(0);
// var or {var}
String strippedVarName = matcher.group(1);
if (strippedVarName.startsWith("{")) {
// {varName} = > varName
strippedVarName =
strippedVarName.substring(1, strippedVarName.length() - 1);
}
varMap.putIfAbsent(strippedVarName, new HashSet<>());
varMap.get(strippedVarName).add(varName);
}
return varMap;
}
public String getSrcPathRegex() {
return srcPathRegex;
}
public Pattern getSrcPattern() {
return srcPattern;
}
public String getDstPath() {
return dstPath;
}
public static Pattern getVarPatternInDest() {
return VAR_PATTERN_IN_DEST;
}
/**
* Get resolved path from regex mount points.
* E.g. link: ^/user/(?\\w+) => s3://$user.apache.com/_${user}
* srcPath: is /user/hadoop/dir1
* resolveLastComponent: true
* then return value is s3://hadoop.apache.com/_hadoop
* @param srcPath - the src path to resolve
* @param resolveLastComponent - whether resolve the path after last `/`
* @return mapped path of the mount point.
*/
public InodeTree.ResolveResult resolve(final String srcPath,
final boolean resolveLastComponent) {
String pathStrToResolve = getPathToResolve(srcPath, resolveLastComponent);
for (RegexMountPointInterceptor interceptor : interceptorList) {
pathStrToResolve = interceptor.interceptSource(pathStrToResolve);
}
LOGGER.debug("Path to resolve:" + pathStrToResolve + ", srcPattern:"
+ getSrcPathRegex());
Matcher srcMatcher = getSrcPattern().matcher(pathStrToResolve);
String parsedDestPath = getDstPath();
int mappedCount = 0;
String resolvedPathStr = "";
while (srcMatcher.find()) {
resolvedPathStr = pathStrToResolve.substring(0, srcMatcher.end());
Map> varMap = getVarInDestPathMap();
for (Map.Entry> entry : varMap.entrySet()) {
String regexGroupNameOrIndexStr = entry.getKey();
Set groupRepresentationStrSetInDest = entry.getValue();
parsedDestPath = replaceRegexCaptureGroupInPath(
parsedDestPath, srcMatcher,
regexGroupNameOrIndexStr, groupRepresentationStrSetInDest);
}
++mappedCount;
}
if (0 == mappedCount) {
return null;
}
Path remainingPath = getRemainingPathStr(srcPath, resolvedPathStr);
for (RegexMountPointInterceptor interceptor : interceptorList) {
parsedDestPath = interceptor.interceptResolvedDestPathStr(parsedDestPath);
remainingPath =
interceptor.interceptRemainingPath(remainingPath);
}
InodeTree.ResolveResult resolveResult = inodeTree
.buildResolveResultForRegexMountPoint(InodeTree.ResultKind.EXTERNAL_DIR,
resolvedPathStr, parsedDestPath, remainingPath);
return resolveResult;
}
private Path getRemainingPathStr(
String srcPath,
String resolvedPathStr) {
String remainingPathStr = srcPath.substring(resolvedPathStr.length());
if (!remainingPathStr.startsWith("/")) {
remainingPathStr = "/" + remainingPathStr;
}
return new Path(remainingPathStr);
}
private String getPathToResolve(
String srcPath, boolean resolveLastComponent) {
if (resolveLastComponent) {
return srcPath;
}
int lastSlashIndex = srcPath.lastIndexOf(SlashPath.toString());
if (lastSlashIndex == -1) {
return null;
}
return srcPath.substring(0, lastSlashIndex);
}
/**
* Use capture group named regexGroupNameOrIndexStr in mather to replace
* parsedDestPath.
* E.g. link: ^/user/(?\\w+) => s3://$user.apache.com/_${user}
* srcMatcher is from /user/hadoop.
* Then the params will be like following.
* parsedDestPath: s3://$user.apache.com/_${user},
* regexGroupNameOrIndexStr: user
* groupRepresentationStrSetInDest: {user:$user; user:${user}}
* return value will be s3://hadoop.apache.com/_hadoop
* @param parsedDestPath
* @param srcMatcher
* @param regexGroupNameOrIndexStr
* @param groupRepresentationStrSetInDest
* @return return parsedDestPath while ${var},$var replaced or
* parsedDestPath nothing found.
*/
private String replaceRegexCaptureGroupInPath(
String parsedDestPath,
Matcher srcMatcher,
String regexGroupNameOrIndexStr,
Set groupRepresentationStrSetInDest) {
String groupValue = getRegexGroupValueFromMather(
srcMatcher, regexGroupNameOrIndexStr);
if (groupValue == null) {
return parsedDestPath;
}
for (String varName : groupRepresentationStrSetInDest) {
parsedDestPath = parsedDestPath.replace(varName, groupValue);
LOGGER.debug("parsedDestPath value is:" + parsedDestPath);
}
return parsedDestPath;
}
/**
* Get matched capture group value from regex matched string. E.g.
* Regex: ^/user/(?\\w+), regexGroupNameOrIndexStr: userName
* then /user/hadoop should return hadoop while call
* getRegexGroupValueFromMather(matcher, usersName)
* or getRegexGroupValueFromMather(matcher, 1)
*
* @param srcMatcher - the matcher to be use
* @param regexGroupNameOrIndexStr - the regex group name or index
* @return - Null if no matched group named regexGroupNameOrIndexStr found.
*/
private String getRegexGroupValueFromMather(
Matcher srcMatcher, String regexGroupNameOrIndexStr) {
if (regexGroupNameOrIndexStr.matches("\\d+")) {
// group index
int groupIndex = Integer.parseUnsignedInt(regexGroupNameOrIndexStr);
if (groupIndex >= 0 && groupIndex <= srcMatcher.groupCount()) {
return srcMatcher.group(groupIndex);
}
} else {
// named group in regex
return srcMatcher.group(regexGroupNameOrIndexStr);
}
return null;
}
}