de.schlichtherle.io.util.Paths Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of truezip Show documentation
TrueZIP is a Java based Virtual File System (VFS) to enable transparent, multi-threaded read/write access to archive files (ZIP, TAR etc.) as if they were directories. Archive files may be arbitrarily nested and the nesting level is only limited by heap and file system size.
The newest version!
/*
 * Copyright (C) 2006-2010 Schlichtherle IT Services
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package de.schlichtherle.io.util;

import java.io.File;

/**
 * Static utility methods for path names.
 * In order to enhance interoperability, the methods in this class always
 * detect Windows drives ("[a-zA-Z]:") and UNCs ("\\") in
 * a path name, even on non-Windows platforms.
 *
 * @since TrueZIP 5.1.4
 * @author Christian Schlichtherle
 * @version $Id$
 */
public class Paths {

    /**
     * Equivalent to {@link #normalize(String, char)
     * normalize(path, File.separatorChar)}.
     */
    public static String normalize(final String path) {
        return normalize(path, File.separatorChar);
    }

    /**
     * Removes all redundant separators, dot directories ({@code "."}) and
     * dot-dot directories ({@code ".."}) from the path name and returns
     * the result.
     * If present, a single trailing separator character is retained.
     * An empty path results in {@code "."}.
     * 
     * On Windows, a path may be prefixed by a drive letter followed by a
     * colon.
     * On all platforms, a path may be prefixed by two leading separators
     * to indicate a UNC, although this is currently supported on Windows
     * only.
     *
     * @param  path the non-{@code null} path name to normalize.
     * @param  separatorChar the separator character.
     * @return {@code path} if it was already in normalized form.
     *         Otherwise, a new String with the normalized form of the given
     *         path.
     * @throws NullPointerException if path is {@code null}.
     */
    public static String normalize(
            final String path,
            final char separatorChar) {
        final int prefixLen = prefixLength(path, separatorChar);
        final int pathLen = path.length();
        final StringBuffer buffer = new StringBuffer(pathLen);
        normalize(path.substring(prefixLen, pathLen), separatorChar, 0, pathLen - prefixLen, buffer);
        buffer.insert(0, path.substring(0, prefixLen));
        if (buffer.length() == prefixLen
                && (prefixLen <= 0 || buffer.charAt(prefixLen - 1) != separatorChar))
            buffer.append('.');
        if (pathLen > 0 && path.charAt(pathLen - 1) == separatorChar)
            if (buffer.charAt(buffer.length() - 1) != separatorChar)
                buffer.append(separatorChar); // retain trailing separator
        final int bufferLen = buffer.length();
        String result;
        if (bufferLen == path.length()) {
            assert path.equals(buffer.toString());
            result = path;
        } else {
            result = buffer.toString();
            if (path.startsWith(result))
                result = path.substring(0, bufferLen);
        }
        assert !result.equals(path) || result == path; // postcondition
        return result;
    }

    /**
     * This is a recursive call: The top level call should provide
     * {@code 0} as the {@code skip} parameter, the length
     * of the path as the {@code end} parameter and an empty string
     * buffer as the {@code result} parameter.
     *
     * @param  collapse the number of adjacent dir/.. segments in the
     *         path to collapse.
     *         This value must not be negative.
     * @param  end the current position in {@code path}.
     *         Only the string to the left of this index is considered.
     *         If not positive, nothing happens.
     * @param  buffer the non-{@code null} string buffer for the result.
     * @return The number of adjacent segments in the path which have
     *         not been collapsed at this position.
     */
    private static int normalize(
            final String path,
            final char separatorChar,
            final int collapse,
            final int end,
            final StringBuffer buffer) {
        assert collapse >= 0;
        if (0 >= end)
            return collapse;
        final int next = path.lastIndexOf(separatorChar, end - 1);
        final String base = path.substring(next + 1, end);
        int notCollapsed;
        if (0 >= base.length() || ".".equals(base)) {
            return normalize(path, separatorChar, collapse, next, buffer);
        } else if ("..".equals(base)) {
            notCollapsed = normalize(path, separatorChar, collapse + 1, next, buffer) - 1;
            if (0 > notCollapsed)
                return 0;
        } else if (0 < collapse) {
            return normalize(path, separatorChar, collapse - 1, next, buffer);
        } else {
            assert 0 == collapse;
            notCollapsed = normalize(path, separatorChar, 0, next, buffer);
            assert 0 == notCollapsed;
        }
        final int bufferLen = buffer.length();
        if (bufferLen > 0 /*&& buffer.charAt(bufferLen - 1) != separatorChar*/)
            buffer.append(separatorChar);
        buffer.append(base);
        return notCollapsed;
    }

    /**
     * Cuts off any separator characters at the end of the path, unless the
     * path contains of only separator characters, in which case a single
     * separator character is retained to denote the root directory.
     *
     * @return {@code path} if it's a path without trailing separators
     *         or contains the separator only.
     *         Otherwise, the substring until the first of at least one
     *         separating characters is returned.
     * @throws NullPointerException If path is {@code null}.
     */
    public static String cutTrailingSeparators(
            final String path,
            final char separatorChar) {
        int i = path.length();
        if (i <= 0 || path.charAt(--i) != separatorChar)
            return path;
        while (i > 0 && path.charAt(--i) == separatorChar)
            ;
        return path.substring(0, ++i);
    }

    /**
     * Cuts off a trailing separator character of the pathname, unless the
     * pathname contains of only the separator character (i.e. denotes the
     * root directory).
     *
     * @deprecated This method chops off a single trailing separator only.
     *             Use {@link #cutTrailingSeparators} to chop off multiple
     *             trailing separators.
     * @return {@code path} if it's a path without a trailing separator
     *         or contains the separator only.
     *         Otherwise, the substring up to the last character is returned.
     * @throws NullPointerException If path is {@code null}.
     */
    public final static String cutTrailingSeparator(
            final String path,
            final char separatorChar) {
        final int pathEnd = path.length() - 1;
        if (pathEnd > 0 && path.charAt(pathEnd) == separatorChar)
            return path.substring(0, pathEnd);
        else
            return path;
    }

    /**
     * Equivalent to {@link #split(String, char)
     * split(path, File.separatorChar)}.
     */
    public static String[] split(
            final String path) {
        return split(path, File.separatorChar);
    }

    /**
     * Equivalent to {@link #split(String, char, String[])
     * split(path, separatorChar, new String[2])}.
     */
    public static String[] split(
            final String path,
            final char separatorChar) {
        return split(path, separatorChar, new String[2]);
    }

    /**
     * Splits a path into its parent path and its base name,
     * recognizing platform specific file system roots.
     *
     * @param path The name of the path which's parent path and base name
     *        are to be returned.
     * @param separatorChar The path separator character to use for this
     *        operation.
     * @param result An array of at least two {@link String} elements to hold
     *        the result upon return.
     * @return An array holding at least two strings:
     *         

     *         Index zero holds the parent path or {@code null} if the
     *             path does not specify a parent. This name compares equal
     *             with {@link java.io.File#getParent()}.
     *         Index one holds the base name. This name compares
     *             equal with {@link java.io.File#getName()}.
     *         
     * @return {@code result}
     * @throws NullPointerException If path is {@code null}.
     */
    public static String[] split(
            final String path,
            final char separatorChar,
            final String[] result) {
        final int prefixLen = prefixLength(path, separatorChar);

        // Skip any trailing separators and look for the previous separator.
        int baseBegin = -1;
        int baseEnd = path.length() - 1;
        if (prefixLen <= baseEnd) {
            baseEnd = lastIndexNot(path, separatorChar, baseEnd);
            baseBegin = path.lastIndexOf(separatorChar, baseEnd);
        }
        baseEnd++; // convert end index to interval boundary

        // Finally split according to our findings.
        if (baseBegin >= prefixLen) { // found separator after the prefix?
            final int parentEnd = lastIndexNot(path, separatorChar, baseBegin) + 1;
            result[0] = path.substring(0, parentEnd > prefixLen ? parentEnd : prefixLen);        // include separator, may produce separator only!
            result[1] = path.substring(baseBegin + 1, baseEnd);  // between separator and trailing separator
        } else { // no separator after prefix
            if (0 < prefixLen && prefixLen < baseEnd)       // prefix exists and we have more?
                result[0] = path.substring(0, prefixLen);    // prefix is parent
            else
                result[0] = null;                            // no parent
            result[1] = path.substring(prefixLen, baseEnd);
        }

        return result;
    }

    private static int lastIndexNot(String path, char separatorChar, int last) {
        while (path.charAt(last) == separatorChar && --last >= 0)
            ;
        return last;
    }

    /**
     * Returns the length of the file system prefix in {@code path}.
     * File system prefixes are:
     * 
     * A letter followed by a colon and an optional separator.
     *     On Windows, this is the notation for a drive.
     * 
Two leading separators.
     *     On Windows, this is the notation for a UNC.
     * 
A single leading separator.
     *     On Windows and POSIX, this is the notation for an absolute path.
     * 
     * This method works identical on all platforms, so even if the separator
     * is {@code '/'}, two leading separators would be considered to
     * be a UNC and hence the return value would be {@code 2}.
     *
     * @param path The file system path.
     * @param separatorChar The file name separator character in {@code path}.
     * @return The number of characters in the prefix.
     * @throws NullPointerException If {@code path} is {@code null}.
     */
    private static int prefixLength(final String path, final char separatorChar) {
        final int pathLen = path.length();
        int len = 0; // default prefix length
        if (pathLen > 0 && path.charAt(0) == separatorChar) {
            len++; // leading separator or first character of a UNC.
        } else if (pathLen > 1 && path.charAt(1) == ':') {
            final char drive = path.charAt(0);
            if ('A' <= drive && drive <= 'Z'
                    || 'a' <= drive && drive <= 'z') { // US-ASCII letters only
                // Path is prefixed with drive, e.g. "C:\\Programs".
                len = 2;
            }
        }
        if (pathLen > len && path.charAt(len) == separatorChar)
            len++; // next separator is considered part of prefix
        return len;
    }

    /** You cannot instantiate this class. */
    protected Paths() {
    }
}