All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.fs.viewfs.RegexMountPoint Maven / Gradle / Ivy

There is a newer version: 3.4.0
Show newest version
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * 

* http://www.apache.org/licenses/LICENSE-2.0 *

* Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.fs.viewfs; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.fs.Path; import org.apache.hadoop.util.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import static org.apache.hadoop.fs.viewfs.InodeTree.SlashPath; /** * Regex mount point is build to implement regex based mount point. */ @InterfaceAudience.Private @InterfaceStability.Unstable class RegexMountPoint { private static final Logger LOGGER = LoggerFactory.getLogger(RegexMountPoint.class.getName()); private InodeTree inodeTree; private String srcPathRegex; private Pattern srcPattern; private String dstPath; private String interceptorSettingsString; private List interceptorList; public static final String SETTING_SRCREGEX_SEP = "#."; public static final char INTERCEPTOR_SEP = ';'; public static final char INTERCEPTOR_INTERNAL_SEP = ':'; // ${var},$var public static final Pattern VAR_PATTERN_IN_DEST = Pattern.compile("\\$((\\{\\w+\\})|(\\w+))"); // Same var might have different representations. // e.g. // key => $key or key = > ${key} private Map> varInDestPathMap; public Map> getVarInDestPathMap() { return varInDestPathMap; } RegexMountPoint(InodeTree inodeTree, String sourcePathRegex, String destPath, String settingsStr) { this.inodeTree = inodeTree; this.srcPathRegex = sourcePathRegex; this.dstPath = destPath; this.interceptorSettingsString = settingsStr; this.interceptorList = new ArrayList<>(); } /** * Initialize regex mount point. * * @throws IOException */ public void initialize() throws IOException { try { srcPattern = Pattern.compile(srcPathRegex); } catch (PatternSyntaxException ex) { throw new IOException( "Failed to initialized mount point due to bad src path regex:" + srcPathRegex + ", dstPath:" + dstPath, ex); } varInDestPathMap = getVarListInString(dstPath); initializeInterceptors(); } private void initializeInterceptors() throws IOException { if (interceptorSettingsString == null || interceptorSettingsString.isEmpty()) { return; } String[] interceptorStrArray = StringUtils.split(interceptorSettingsString, INTERCEPTOR_SEP); for (String interceptorStr : interceptorStrArray) { RegexMountPointInterceptor interceptor = RegexMountPointInterceptorFactory.create(interceptorStr); if (interceptor == null) { throw new IOException( "Illegal settings String " + interceptorSettingsString); } interceptor.initialize(); interceptorList.add(interceptor); } } /** * Get $var1 and $var2 style variables in string. * * @param input - the string to be process. * @return */ public static Map> getVarListInString(String input) { Map> varMap = new HashMap<>(); Matcher matcher = VAR_PATTERN_IN_DEST.matcher(input); while (matcher.find()) { // $var or ${var} String varName = matcher.group(0); // var or {var} String strippedVarName = matcher.group(1); if (strippedVarName.startsWith("{")) { // {varName} = > varName strippedVarName = strippedVarName.substring(1, strippedVarName.length() - 1); } varMap.putIfAbsent(strippedVarName, new HashSet<>()); varMap.get(strippedVarName).add(varName); } return varMap; } public String getSrcPathRegex() { return srcPathRegex; } public Pattern getSrcPattern() { return srcPattern; } public String getDstPath() { return dstPath; } public static Pattern getVarPatternInDest() { return VAR_PATTERN_IN_DEST; } /** * Get resolved path from regex mount points. * E.g. link: ^/user/(?\\w+) => s3://$user.apache.com/_${user} * srcPath: is /user/hadoop/dir1 * resolveLastComponent: true * then return value is s3://hadoop.apache.com/_hadoop * @param srcPath - the src path to resolve * @param resolveLastComponent - whether resolve the path after last `/` * @return mapped path of the mount point. */ public InodeTree.ResolveResult resolve(final String srcPath, final boolean resolveLastComponent) { String pathStrToResolve = getPathToResolve(srcPath, resolveLastComponent); for (RegexMountPointInterceptor interceptor : interceptorList) { pathStrToResolve = interceptor.interceptSource(pathStrToResolve); } LOGGER.debug("Path to resolve:" + pathStrToResolve + ", srcPattern:" + getSrcPathRegex()); Matcher srcMatcher = getSrcPattern().matcher(pathStrToResolve); String parsedDestPath = getDstPath(); int mappedCount = 0; String resolvedPathStr = ""; while (srcMatcher.find()) { resolvedPathStr = pathStrToResolve.substring(0, srcMatcher.end()); Map> varMap = getVarInDestPathMap(); for (Map.Entry> entry : varMap.entrySet()) { String regexGroupNameOrIndexStr = entry.getKey(); Set groupRepresentationStrSetInDest = entry.getValue(); parsedDestPath = replaceRegexCaptureGroupInPath( parsedDestPath, srcMatcher, regexGroupNameOrIndexStr, groupRepresentationStrSetInDest); } ++mappedCount; } if (0 == mappedCount) { return null; } Path remainingPath = getRemainingPathStr(srcPath, resolvedPathStr); for (RegexMountPointInterceptor interceptor : interceptorList) { parsedDestPath = interceptor.interceptResolvedDestPathStr(parsedDestPath); remainingPath = interceptor.interceptRemainingPath(remainingPath); } InodeTree.ResolveResult resolveResult = inodeTree .buildResolveResultForRegexMountPoint(InodeTree.ResultKind.EXTERNAL_DIR, resolvedPathStr, parsedDestPath, remainingPath); return resolveResult; } private Path getRemainingPathStr( String srcPath, String resolvedPathStr) { String remainingPathStr = srcPath.substring(resolvedPathStr.length()); if (!remainingPathStr.startsWith("/")) { remainingPathStr = "/" + remainingPathStr; } return new Path(remainingPathStr); } private String getPathToResolve( String srcPath, boolean resolveLastComponent) { if (resolveLastComponent) { return srcPath; } int lastSlashIndex = srcPath.lastIndexOf(SlashPath.toString()); if (lastSlashIndex == -1) { return null; } return srcPath.substring(0, lastSlashIndex); } /** * Use capture group named regexGroupNameOrIndexStr in mather to replace * parsedDestPath. * E.g. link: ^/user/(?\\w+) => s3://$user.apache.com/_${user} * srcMatcher is from /user/hadoop. * Then the params will be like following. * parsedDestPath: s3://$user.apache.com/_${user}, * regexGroupNameOrIndexStr: user * groupRepresentationStrSetInDest: {user:$user; user:${user}} * return value will be s3://hadoop.apache.com/_hadoop * @param parsedDestPath * @param srcMatcher * @param regexGroupNameOrIndexStr * @param groupRepresentationStrSetInDest * @return return parsedDestPath while ${var},$var replaced or * parsedDestPath nothing found. */ private String replaceRegexCaptureGroupInPath( String parsedDestPath, Matcher srcMatcher, String regexGroupNameOrIndexStr, Set groupRepresentationStrSetInDest) { String groupValue = getRegexGroupValueFromMather( srcMatcher, regexGroupNameOrIndexStr); if (groupValue == null) { return parsedDestPath; } for (String varName : groupRepresentationStrSetInDest) { parsedDestPath = parsedDestPath.replace(varName, groupValue); LOGGER.debug("parsedDestPath value is:" + parsedDestPath); } return parsedDestPath; } /** * Get matched capture group value from regex matched string. E.g. * Regex: ^/user/(?\\w+), regexGroupNameOrIndexStr: userName * then /user/hadoop should return hadoop while call * getRegexGroupValueFromMather(matcher, usersName) * or getRegexGroupValueFromMather(matcher, 1) * * @param srcMatcher - the matcher to be use * @param regexGroupNameOrIndexStr - the regex group name or index * @return - Null if no matched group named regexGroupNameOrIndexStr found. */ private String getRegexGroupValueFromMather( Matcher srcMatcher, String regexGroupNameOrIndexStr) { if (regexGroupNameOrIndexStr.matches("\\d+")) { // group index int groupIndex = Integer.parseUnsignedInt(regexGroupNameOrIndexStr); if (groupIndex >= 0 && groupIndex <= srcMatcher.groupCount()) { return srcMatcher.group(groupIndex); } } else { // named group in regex return srcMatcher.group(regexGroupNameOrIndexStr); } return null; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy