org.apache.hadoop.fs.GlobExpander Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of hadoop-apache Show documentation
Show all versions of hadoop-apache Show documentation
Shaded version of Apache Hadoop for Presto
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
@InterfaceAudience.Private
@InterfaceStability.Unstable
public class GlobExpander {
static class StringWithOffset {
String string;
int offset;
public StringWithOffset(String string, int offset) {
super();
this.string = string;
this.offset = offset;
}
}
/**
* Expand globs in the given filePattern
into a collection of
* file patterns so that in the expanded set no file pattern has a slash
* character ("/") in a curly bracket pair.
*
* Some examples of how the filePattern is expanded:
*
*
* filePattern - Expanded file pattern
* {a/b} - a/b
* /}{a/b} - /}a/b
* p{a/b,c/d}s - pa/bs, pc/ds
* {a/b,c/d,{e,f}} - a/b, c/d, {e,f}
* {a/b,c/d}{e,f} - a/b{e,f}, c/d{e,f}
* {a,b}/{b,{c/d,e/f}} - {a,b}/b, {a,b}/c/d, {a,b}/e/f
* {a,b}/{c/\d} - {a,b}/c/d
*
*
* @param filePattern
* @return expanded file patterns
* @throws IOException
*/
public static List expand(String filePattern) throws IOException {
List fullyExpanded = new ArrayList();
List toExpand = new ArrayList();
toExpand.add(new StringWithOffset(filePattern, 0));
while (!toExpand.isEmpty()) {
StringWithOffset path = toExpand.remove(0);
List expanded = expandLeftmost(path);
if (expanded == null) {
fullyExpanded.add(path.string);
} else {
toExpand.addAll(0, expanded);
}
}
return fullyExpanded;
}
/**
* Expand the leftmost outer curly bracket pair containing a
* slash character ("/") in filePattern
.
* @param filePatternWithOffset
* @return expanded file patterns
* @throws IOException
*/
private static List expandLeftmost(StringWithOffset
filePatternWithOffset) throws IOException {
String filePattern = filePatternWithOffset.string;
int leftmost = leftmostOuterCurlyContainingSlash(filePattern,
filePatternWithOffset.offset);
if (leftmost == -1) {
return null;
}
int curlyOpen = 0;
StringBuilder prefix = new StringBuilder(filePattern.substring(0, leftmost));
StringBuilder suffix = new StringBuilder();
List alts = new ArrayList();
StringBuilder alt = new StringBuilder();
StringBuilder cur = prefix;
for (int i = leftmost; i < filePattern.length(); i++) {
char c = filePattern.charAt(i);
if (cur == suffix) {
cur.append(c);
} else if (c == '\\') {
i++;
if (i >= filePattern.length()) {
throw new IOException("Illegal file pattern: "
+ "An escaped character does not present for glob "
+ filePattern + " at " + i);
}
c = filePattern.charAt(i);
cur.append(c);
} else if (c == '{') {
if (curlyOpen++ == 0) {
alt.setLength(0);
cur = alt;
} else {
cur.append(c);
}
} else if (c == '}' && curlyOpen > 0) {
if (--curlyOpen == 0) {
alts.add(alt.toString());
alt.setLength(0);
cur = suffix;
} else {
cur.append(c);
}
} else if (c == ',') {
if (curlyOpen == 1) {
alts.add(alt.toString());
alt.setLength(0);
} else {
cur.append(c);
}
} else {
cur.append(c);
}
}
List exp = new ArrayList();
for (String string : alts) {
exp.add(new StringWithOffset(prefix + string + suffix, prefix.length()));
}
return exp;
}
/**
* Finds the index of the leftmost opening curly bracket containing a
* slash character ("/") in filePattern
.
* @param filePattern
* @return the index of the leftmost opening curly bracket containing a
* slash character ("/"), or -1 if there is no such bracket
* @throws IOException
*/
private static int leftmostOuterCurlyContainingSlash(String filePattern,
int offset) throws IOException {
int curlyOpen = 0;
int leftmost = -1;
boolean seenSlash = false;
for (int i = offset; i < filePattern.length(); i++) {
char c = filePattern.charAt(i);
if (c == '\\') {
i++;
if (i >= filePattern.length()) {
throw new IOException("Illegal file pattern: "
+ "An escaped character does not present for glob "
+ filePattern + " at " + i);
}
} else if (c == '{') {
if (curlyOpen++ == 0) {
leftmost = i;
}
} else if (c == '}' && curlyOpen > 0) {
if (--curlyOpen == 0 && leftmost != -1 && seenSlash) {
return leftmost;
}
} else if (c == '/' && curlyOpen > 0) {
seenSlash = true;
}
}
return -1;
}
}