All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.fs.GlobExpander Maven / Gradle / Ivy

There is a newer version: 3.4.1
Show newest version
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.fs;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;

@InterfaceAudience.Private
@InterfaceStability.Unstable
public class GlobExpander {
  
  static class StringWithOffset {
    String string;
    int offset;
    public StringWithOffset(String string, int offset) {
      super();
      this.string = string;
      this.offset = offset;
    }
  }
  
  /**
   * Expand globs in the given filePattern into a collection of
   * file patterns so that in the expanded set no file pattern has a slash
   * character ("/") in a curly bracket pair.
   * 

* Some examples of how the filePattern is expanded:
*

   * 
   * filePattern         - Expanded file pattern 
   * {a/b}               - a/b
   * /}{a/b}             - /}a/b
   * p{a/b,c/d}s         - pa/bs, pc/ds
   * {a/b,c/d,{e,f}}     - a/b, c/d, {e,f}
   * {a/b,c/d}{e,f}      - a/b{e,f}, c/d{e,f}
   * {a,b}/{b,{c/d,e/f}} - {a,b}/b, {a,b}/c/d, {a,b}/e/f
   * {a,b}/{c/\d}        - {a,b}/c/d
   * 
* * @param filePattern * @return expanded file patterns * @throws IOException */ public static List expand(String filePattern) throws IOException { List fullyExpanded = new ArrayList(); List toExpand = new ArrayList(); toExpand.add(new StringWithOffset(filePattern, 0)); while (!toExpand.isEmpty()) { StringWithOffset path = toExpand.remove(0); List expanded = expandLeftmost(path); if (expanded == null) { fullyExpanded.add(path.string); } else { toExpand.addAll(0, expanded); } } return fullyExpanded; } /** * Expand the leftmost outer curly bracket pair containing a * slash character ("/") in filePattern. * @param filePatternWithOffset * @return expanded file patterns * @throws IOException */ private static List expandLeftmost(StringWithOffset filePatternWithOffset) throws IOException { String filePattern = filePatternWithOffset.string; int leftmost = leftmostOuterCurlyContainingSlash(filePattern, filePatternWithOffset.offset); if (leftmost == -1) { return null; } int curlyOpen = 0; StringBuilder prefix = new StringBuilder(filePattern.substring(0, leftmost)); StringBuilder suffix = new StringBuilder(); List alts = new ArrayList(); StringBuilder alt = new StringBuilder(); StringBuilder cur = prefix; for (int i = leftmost; i < filePattern.length(); i++) { char c = filePattern.charAt(i); if (cur == suffix) { cur.append(c); } else if (c == '\\') { i++; if (i >= filePattern.length()) { throw new IOException("Illegal file pattern: " + "An escaped character does not present for glob " + filePattern + " at " + i); } c = filePattern.charAt(i); cur.append(c); } else if (c == '{') { if (curlyOpen++ == 0) { alt.setLength(0); cur = alt; } else { cur.append(c); } } else if (c == '}' && curlyOpen > 0) { if (--curlyOpen == 0) { alts.add(alt.toString()); alt.setLength(0); cur = suffix; } else { cur.append(c); } } else if (c == ',') { if (curlyOpen == 1) { alts.add(alt.toString()); alt.setLength(0); } else { cur.append(c); } } else { cur.append(c); } } List exp = new ArrayList(); for (String string : alts) { exp.add(new StringWithOffset(prefix + string + suffix, prefix.length())); } return exp; } /** * Finds the index of the leftmost opening curly bracket containing a * slash character ("/") in filePattern. * @param filePattern * @return the index of the leftmost opening curly bracket containing a * slash character ("/"), or -1 if there is no such bracket * @throws IOException */ private static int leftmostOuterCurlyContainingSlash(String filePattern, int offset) throws IOException { int curlyOpen = 0; int leftmost = -1; boolean seenSlash = false; for (int i = offset; i < filePattern.length(); i++) { char c = filePattern.charAt(i); if (c == '\\') { i++; if (i >= filePattern.length()) { throw new IOException("Illegal file pattern: " + "An escaped character does not present for glob " + filePattern + " at " + i); } } else if (c == '{') { if (curlyOpen++ == 0) { leftmost = i; } } else if (c == '}' && curlyOpen > 0) { if (--curlyOpen == 0 && leftmost != -1 && seenSlash) { return leftmost; } } else if (c == '/' && curlyOpen > 0) { seenSlash = true; } } return -1; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy