org.apache.cocoon.util.NetUtils Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of cocoon-util Show documentation
Utility classes.
There is a newer version: 2.3.0
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.cocoon.util;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
import java.net.URLEncoder;
import java.util.BitSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.Map;
import java.util.StringTokenizer;

import org.apache.commons.lang.StringUtils;

/**
 * A collection of File, URL and filename
 * utility methods
 *
 * @version $Id: NetUtils.java 490714 2006-12-28 13:08:22Z danielf $
 */
public class NetUtils {

    /**
     * Array containing the safe characters set as defined by RFC 1738
     */
    private static BitSet safeCharacters;

    private static final char[] hexadecimal =
    {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
     'A', 'B', 'C', 'D', 'E', 'F'};

    static {
        safeCharacters = new BitSet(256);
        int i;
        // 'lowalpha' rule
        for (i = 'a'; i <= 'z'; i++) {
            safeCharacters.set(i);
        }
        // 'hialpha' rule
        for (i = 'A'; i <= 'Z'; i++) {
            safeCharacters.set(i);
        }
        // 'digit' rule
        for (i = '0'; i <= '9'; i++) {
            safeCharacters.set(i);
        }

        // 'safe' rule
        safeCharacters.set('$');
        safeCharacters.set('-');
        safeCharacters.set('_');
        safeCharacters.set('.');
        safeCharacters.set('+');

        // 'extra' rule
        safeCharacters.set('!');
        safeCharacters.set('*');
        safeCharacters.set('\'');
        safeCharacters.set('(');
        safeCharacters.set(')');
        safeCharacters.set(',');

        // special characters common to http: file: and ftp: URLs ('fsegment' and 'hsegment' rules)
        safeCharacters.set('/');
        safeCharacters.set(':');
        safeCharacters.set('@');
        safeCharacters.set('&');
        safeCharacters.set('=');
    }

    /**
     * Decode a path.
     *
     * Interprets %XX (where XX is hexadecimal number) as UTF-8 encoded bytes.
     * 
The validity of the input path is not checked (i.e. characters that were not encoded will
     * not be reported as errors).
     * This method differs from URLDecoder.decode in that it always uses UTF-8 (while URLDecoder
     * uses the platform default encoding, often ISO-8859-1), and doesn't translate + characters to spaces.
     *
     * @param path the path to decode
     * @return the decoded path
     */
    public static String decodePath(String path) {
        StringBuffer translatedPath = new StringBuffer(path.length());
        byte[] encodedchars = new byte[path.length() / 3];
        int i = 0;
        int length = path.length();
        int encodedcharsLength = 0;
        while (i < length) {
            if (path.charAt(i) == '%') {
                // we must process all consecutive %-encoded characters in one go, because they represent
                // an UTF-8 encoded string, and in UTF-8 one character can be encoded as multiple bytes
                while (i < length && path.charAt(i) == '%') {
                    if (i + 2 < length) {
                        try {
                            byte x = (byte)Integer.parseInt(path.substring(i + 1, i + 3), 16);
                            encodedchars[encodedcharsLength] = x;
                        } catch (NumberFormatException e) {
                            throw new IllegalArgumentException("NetUtils.decodePath: " +
                                                               "Illegal hex characters in pattern %" + path.substring(i + 1, i + 3));
                        }
                        encodedcharsLength++;
                        i += 3;
                    } else {
                        throw new IllegalArgumentException("NetUtils.decodePath: " +
                                                           "% character should be followed by 2 hexadecimal characters.");
                    }
                }
                try {
                    String translatedPart = new String(encodedchars, 0, encodedcharsLength, "UTF-8");
                    translatedPath.append(translatedPart);
                } catch (UnsupportedEncodingException e) {
                    // the situation that UTF-8 is not supported is quite theoretical, so throw a runtime exception
                    throw new RuntimeException("Problem in decodePath: UTF-8 encoding not supported.");
                }
                encodedcharsLength = 0;
            } else {
                // a normal character
                translatedPath.append(path.charAt(i));
                i++;
            }
        }
        return translatedPath.toString();
    }

    /**
     * Encode a path as required by the URL specification (
     * RFC 1738). This differs from java.net.URLEncoder.encode() which encodes according
     * to the x-www-form-urlencoded MIME format.
     *
     * @param path the path to encode
     * @return the encoded path
     */
    public static String encodePath(String path) {
       // stolen from org.apache.catalina.servlets.DefaultServlet ;)

        /**
         * Note: This code portion is very similar to URLEncoder.encode.
         * Unfortunately, there is no way to specify to the URLEncoder which
         * characters should be encoded. Here, ' ' should be encoded as "%20"
         * and '/' shouldn't be encoded.
         */

        int maxBytesPerChar = 10;
        StringBuffer rewrittenPath = new StringBuffer(path.length());
        ByteArrayOutputStream buf = new ByteArrayOutputStream(maxBytesPerChar);
        OutputStreamWriter writer = null;
        try {
            writer = new OutputStreamWriter(buf, "UTF8");
        } catch (Exception e) {
            e.printStackTrace();
            writer = new OutputStreamWriter(buf);
        }

        for (int i = 0; i < path.length(); i++) {
            int c = path.charAt(i);
            if (safeCharacters.get(c)) {
                rewrittenPath.append((char)c);
            } else {
                // convert to external encoding before hex conversion
                try {
                    writer.write(c);
                    writer.flush();
                } catch(IOException e) {
                    buf.reset();
                    continue;
                }
                byte[] ba = buf.toByteArray();
                for (int j = 0; j < ba.length; j++) {
                    // Converting each byte in the buffer
                    byte toEncode = ba[j];
                    rewrittenPath.append('%');
                    int low = (toEncode & 0x0f);
                    int high = ((toEncode & 0xf0) >> 4);
                    rewrittenPath.append(hexadecimal[high]);
                    rewrittenPath.append(hexadecimal[low]);
                }
                buf.reset();
            }
        }
        return rewrittenPath.toString();
    }

    /**
     * Returns the path of the given resource.
     *
     * @param uri The URI of the resource
     * @return the resource path
     */
    public static String getPath(String uri) {
        int i = uri.lastIndexOf('/');
        if (i > -1) {
            return uri.substring(0, i);
        }
        i = uri.indexOf(':');
        return (i > -1) ? uri.substring(i + 1, uri.length()) : "";
    }

    /**
     * Remove path and file information from a filename returning only its
     * extension  component
     *
     * @param uri The filename
     * @return The filename extension (with starting dot!) or null if filename extension is not found
     */
    public static String getExtension(String uri) {
        int dot = uri.lastIndexOf('.');
        if (dot > -1) {
            uri = uri.substring(dot);
            int slash = uri.lastIndexOf('/');
            if (slash > -1) {
                return null;
            } else {
                int sharp = uri.lastIndexOf('#');
                if (sharp > -1) {
                    // uri starts with dot already
                    return uri.substring(0, sharp);
                } else {
                    int mark = uri.lastIndexOf('?');
                    if (mark > -1) {
                        // uri starts with dot already
                        return uri.substring(0, mark);
                    } else {
                        return uri;
                    }
                }
            }
        } else {
            return null;
        }
    }

    /**
     * Absolutize a relative resource path on the given absolute base path.
     *
     * @param path The absolute base path
     * @param resource The relative resource path
     * @return The absolutized resource path
     */
    public static String absolutize(String path, String resource) {
        if (StringUtils.isEmpty(path)) {
            return resource;
        } else if (StringUtils.isEmpty(resource)) {
            return path;
        } else if (resource.charAt(0) == '/') {
            // Resource path is already absolute
            return resource;
        }

        boolean slash = (path.charAt(path.length() - 1) == '/');
        
        StringBuffer b = new StringBuffer(path.length() + 1 + resource.length());
        b.append(path);
        if (!slash) {
            b.append('/');
        } 
        b.append(resource);
        return b.toString();
    }

    /**
     * Relativize an absolute resource on a given absolute path.
     *
     * @param path The absolute path
     * @param absoluteResource The absolute resource
     * @return the resource relative to the given path
     */
    public static String relativize(String path, String absoluteResource) {
        if (StringUtils.isEmpty(path)) {
            return absoluteResource;
        }

        if (path.charAt(path.length() - 1) != '/') {
            path += "/";
        }

        if (absoluteResource.startsWith(path)) {
            // resource is direct descentant
            return absoluteResource.substring(path.length());
        } else {
            // resource is not direct descendant
            int index = StringUtils.indexOfDifference(path, absoluteResource);
            if (index > 0 && path.charAt(index-1) != '/') {
                index = path.substring(0, index).lastIndexOf('/');
                index++;
            }
            String pathDiff = path.substring(index);
            String resource = absoluteResource.substring(index);
            int levels = StringUtils.countMatches(pathDiff, "/");
            StringBuffer b = new StringBuffer(levels * 3 + resource.length());
            for (int i = 0; i < levels; i++) {
                b.append("../");
            }
            b.append(resource);
            return b.toString();
        }
    }

    /**
     * Normalize a uri containing ../ and ./ paths.
     *
     * @param uri The uri path to normalize
     * @return The normalized uri
     */
    public static String normalize(String uri) {
        if ("".equals(uri)) {
            return uri;
        }
        int leadingSlashes = 0;
        for (leadingSlashes = 0 ; leadingSlashes < uri.length()
                && uri.charAt(leadingSlashes) == '/' ; ++leadingSlashes) {}
        boolean isDir = (uri.charAt(uri.length() - 1) == '/');
        StringTokenizer st = new StringTokenizer(uri, "/");
        LinkedList clean = new LinkedList();
        while (st.hasMoreTokens()) {
            String token = st.nextToken();
            if ("..".equals(token)) {
                if (! clean.isEmpty() && ! "..".equals(clean.getLast())) {
                    clean.removeLast();
                    if (! st.hasMoreTokens()) {
                        isDir = true;
                    }
                } else {
                    clean.add("..");
                }
            } else if (! ".".equals(token) && ! "".equals(token)) {
                clean.add(token);
            }
        }
        StringBuffer sb = new StringBuffer();
        while (leadingSlashes-- > 0) {
            sb.append('/');
        }
        for (Iterator it = clean.iterator() ; it.hasNext() ; ) {
            sb.append(it.next());
            if (it.hasNext()) {
                sb.append('/');
            }
        }
        if (isDir && sb.length() > 0 && sb.charAt(sb.length() - 1) != '/') {
            sb.append('/');
        }
        return sb.toString();
    }

    /**
     * Remove parameters from a uri.
     * Resulting Map will have either String for single value attributes,
     * or String arrays for multivalue attributes.
     * 
     * @param uri The uri path to deparameterize.
     * @param parameters The map that collects parameters.
     * @return The cleaned uri
     */
    public static String deparameterize(String uri, Map parameters) {
        int i = uri.lastIndexOf('?');
        if (i == -1) {
            return uri;
        }

        String[] params = StringUtils.split(uri.substring(i + 1), '&');
        for (int j = 0; j < params.length; j++) {
            String p = params[j];
            int k = p.indexOf('=');
            if (k == -1) {
                break;
            }
            String name = p.substring(0, k);
            String value = p.substring(k + 1);
            Object values = parameters.get(name);
            if (values == null) {
                parameters.put(name, value);
            } else if (values.getClass().isArray()) {
                String[] v1 = (String[])values;
                String[] v2 = new String[v1.length + 1];
                System.arraycopy(v1, 0, v2, 0, v1.length);
                v2[v1.length] = value;
                parameters.put(name, v2);
            } else {
                parameters.put(name, new String[]{values.toString(), value});
            }
        }
        return uri.substring(0, i);
    }

    /**
     * Add parameters stored in the Map to the uri string.
     * Map can contain Object values which will be converted to the string,
     * or Object arrays, which will be treated as multivalue attributes.
     * 
     * @param uri The uri to add parameters into
     * @param parameters The map containing parameters to be added
     * @return The uri with added parameters
     */
    public static String parameterize(String uri, Map parameters) {
        if (parameters.size() == 0) {
            return uri;
        }
        
        StringBuffer buffer = new StringBuffer(uri);
        if (uri.indexOf('?') == -1) {
            buffer.append('?');
        } else {
            buffer.append('&');
        }
        
        for (Iterator i = parameters.entrySet().iterator(); i.hasNext();) {
            Map.Entry entry = (Map.Entry)i.next();
            if (entry.getValue().getClass().isArray()) {
                Object[] value = (Object[])entry.getValue();
                for (int j = 0; j < value.length; j++) {
                    if (j > 0) {
                        buffer.append('&');
                    }
                    buffer.append(entry.getKey());
                    buffer.append('=');
                    buffer.append(value[j]);
                }
            } else {
                buffer.append(entry.getKey());
                buffer.append('=');
                buffer.append(entry.getValue());
            }
            if (i.hasNext()) {
                buffer.append('&');
            }
        }
        return buffer.toString();
    }

    /**
     * Remove any authorisation details from a URI
     */
    public static String removeAuthorisation(String uri) {
        if (uri.indexOf("@")!=-1 && (uri.startsWith("ftp://") || uri.startsWith("http://"))) {
            return uri.substring(0, uri.indexOf(":")+2)+uri.substring(uri.indexOf("@")+1);
        }
        return uri;
    }

    /**
     * Pass through to the {@link java.net.URLEncoder}. If running under JDK < 1.4,
     * default encoding will always be used.
     */
    public static String encode(String s, String enc) throws UnsupportedEncodingException {
        return URLEncoder.encode(s, enc);
    }

    /**
     * Pass through to the {@link java.net.URLDecoder}. If running under JDK < 1.4,
     * default encoding will always be used.
     */
    public static String decode(String s, String enc) throws UnsupportedEncodingException {
        return URLDecoder.decode(s, enc);
    }
}