org.osgi.service.dmt.Uri Maven / Gradle / Ivy
/*
* Copyright (c) OSGi Alliance (2004, 2011). All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.osgi.service.dmt;
import java.io.UnsupportedEncodingException;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.ArrayList;
import java.util.List;
/**
* This class contains static utility methods to manipulate DMT URIs.
*
* Syntax of valid DMT URIs:
*
* - A slash ({@code '/'} \u002F) is the separator of the node names.
* Slashes used in node name must therefore be escaped using a backslash slash (
* {@code "\/"}). The backslash must be escaped with a double backslash
* sequence. A backslash found must be ignored when it is not followed by a
* slash or backslash.
*
- The node name can be constructed using full Unicode character set (except
* the Supplementary code, not being supported by CLDC/CDC). However, using the
* full Unicode character set for node names is discouraged because the encoding
* in the underlying storage as well as the encoding needed in communications
* can create significant performance and memory usage overhead. Names that are
* restricted to the URI set {@code [-a-zA-Z0-9_.!~*'()]} are most efficient.
*
- URIs used in the DMT must be treated and interpreted as case sensitive.
*
- No End Slash: URI must not end with the delimiter slash ({@code '/'}
* \u002F). This implies that the root node must be denoted as {@code "."}
* and not {@code "./"}.
*
- No parent denotation: URI must not be constructed using the character
* sequence {@code "../"} to traverse the tree upwards.
*
- Single Root: The character sequence {@code "./"} must not be used
* anywhere else but in the beginning of a URI.
*
*
* @version $Id: 7e9381120a81c14e450b4571cc30ca5aed5b082e $
*/
public final class Uri {
/**
* This constant stands for a string identifying the root of the DmTree
* (".").
*
* @since 2.0
*/
public static final String ROOT_NODE = ".";
/**
* This constant stands for a char identifying the root of the DmTree ('.').
*
* @since 2.0
*/
public static final char ROOT_NODE_CHAR = '.';
/**
* This constant stands for a string identifying the path separator in the
* DmTree ("/").
*
* @since 2.0
*/
public static final String PATH_SEPARATOR = "/";
/**
* This constant stands for a char identifying the path separator in the
* DmTree ('/').
*
* @since 2.0
*/
public static final char PATH_SEPARATOR_CHAR = '/';
// base64 encoding table, modified for use in node name mangling
private static final char BASE_64_TABLE[] = { 'A', 'B', 'C', 'D', 'E', 'F',
'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S',
'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's',
't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5',
'6', '7', '8', '9', '+', '_', // !!! this
// differs
// from
// base64
};
/**
* A private constructor to suppress the default public constructor.
*/
private Uri() {
//
}
/**
* Returns a node name that is valid for the tree operation methods, based
* on the given node name. This transformation is not idempotent, so it must
* not be called with a parameter that is the result of a previous
* {@code mangle} method call.
*
* Node name mangling is needed in the following cases:
*
* - if the name contains '/' or '\' characters
*
*
* A node name that does not suffer from either of these problems is
* guaranteed to remain unchanged by this method. Therefore the client may
* skip the mangling if the node name is known to be valid (though it is
* always safe to call this method).
*
* The method returns the normalized {@code nodeName} as described below.
* Invalid node names are normalized in different ways, depending on the
* cause. If the name contains '/' or '\' characters, then these are simply
* escaped by inserting an additional '\' before each occurrence. If the
* length of the name does exceed the limit, the following mechanism is
* used to normalize it:
*
* - the SHA 1 digest of the name is calculated
*
- the digest is encoded with the base 64 algorithm
*
- all '/' characters in the encoded digest are replaced with '_'
*
- trailing '=' signs are removed
*
*
* @param nodeName
* the node name to be mangled (if necessary), must not be
* {@code null} or empty
* @return the normalized node name that is valid for tree operations
* @throws NullPointerException
* if {@code nodeName} is {@code null}
* @throws IllegalArgumentException
* if {@code nodeName} is empty
*/
public static String mangle(String nodeName) {
return nodeName;
}
/**
* Construct a URI from the specified URI segments. The segments must
* already be mangled.
*
* If the specified path is an empty array then an empty URI ({@code ""}) is
* returned.
*
* @param path
* a possibly empty array of URI segments, must not be
* {@code null}
* @return the URI created from the specified segments
* @throws NullPointerException
* if the specified path or any of its segments are {@code null}
* @throws IllegalArgumentException
* if the specified path contains too many or malformed segments
* or the resulting URI is too long
*/
public static String toUri(String[] path) {
if (0 == path.length) {
return "";
}
StringBuffer uri = new StringBuffer();
for (int i = 0; i < path.length; ++i) {
if (i > 0) {
uri.append('/');
}
// checks if it's not null; otherwise throw NullPointerException.
if (path[i] == null) {
throw new NullPointerException("One of its segments is null.");
}
uri.append(path[i]);
}
return uri.toString();
}
/**
* This method returns the length of a URI segment. The length of the URI
* segment is defined as the number of bytes in the unescaped, UTF-8 encoded
* representation of the segment.
*
* The method verifies that the URI segment is well-formed.
*
* @param segment
* the URI segment
* @return URI segment length
* @throws NullPointerException
* if the specified segment is {@code null}
* @throws IllegalArgumentException
* if the specified URI segment is malformed
*/
private static int getSegmentLength(String segment) {
if (segment.length() == 0)
throw new IllegalArgumentException("URI segment is empty.");
StringBuffer newsegment = new StringBuffer(segment);
int i = 0;
while (i < newsegment.length()) { // length can decrease during the
// loop!
if (newsegment.charAt(i) == '\\') {
if (i == newsegment.length() - 1) // last character cannot be a
// '\'
throw new IllegalArgumentException(
"URI segment ends with the escape character.");
newsegment.deleteCharAt(i); // remove the extra '\'
} else if (newsegment.charAt(i) == '/')
throw new IllegalArgumentException(
"URI segment contains an unescaped '/' character.");
i++;
}
if (newsegment.toString().equals(".."))
throw new IllegalArgumentException(
"URI segment must not be \"..\".");
try {
return newsegment.toString().getBytes("UTF-8").length;
} catch (UnsupportedEncodingException e) {
// This should never happen. All implementations must support
// UTF-8 encoding;
throw new RuntimeException(e.toString());
}
}
/**
* Split the specified URI along the path separator '/' characters and
* return an array of URI segments. Special characters in the returned
* segments are escaped. The returned array may be empty if the specified
* URI was empty.
*
* @param uri
* the URI to be split, must not be {@code null}
* @return an array of URI segments created by splitting the specified URI
* @throws NullPointerException
* if the specified URI is {@code null}
* @throws IllegalArgumentException
* if the specified URI is malformed
*/
public static String[] toPath(String uri) {
if (uri == null)
throw new NullPointerException("'uri' parameter is null.");
if (!isValidUri(uri))
throw new IllegalArgumentException("Malformed URI: " + uri);
if (uri.length() == 0)
return new String[] {};
List segments = new ArrayList();
StringBuffer segment = new StringBuffer();
boolean escape = false;
for (int i = 0; i < uri.length(); i++) {
char ch = uri.charAt(i);
if (escape) {
if (ch == '/' || ch == '\\')
segment.append('\\');
segment.append(ch);
escape = false;
} else if (ch == '/') {
segments.add(segment.toString());
segment = new StringBuffer();
} else if (ch == '\\') {
escape = true;
} else
segment.append(ch);
}
if (segment.length() > 0) {
segments.add(segment.toString());
}
return (String[]) segments.toArray(new String[segments.size()]);
}
/**
* Checks whether the specified URI is an absolute URI. An absolute URI
* contains the complete path to a node in the DMT starting from the DMT
* root (".").
*
* @param uri
* the URI to be checked, must not be {@code null} and must
* contain a valid URI
* @return whether the specified URI is absolute
* @throws NullPointerException
* if the specified URI is {@code null}
* @throws IllegalArgumentException
* if the specified URI is malformed
*/
public static boolean isAbsoluteUri(String uri) {
if (null == uri) {
throw new NullPointerException("'uri' parameter is null.");
}
if (!isValidUri(uri))
throw new IllegalArgumentException("Malformed URI: " + uri);
return uri.equals(".") || uri.equals("\\.") || uri.startsWith("./")
|| uri.startsWith("\\./");
}
/**
* Encode the node name so that back slash and forward slash are escaped
* with a back slash. This method is the reverse of {@link #decode(String)}.
*
* @param nodeName the node name to be encoded
* @return the encoded node name
* @since 2.0
*/
public static String encode(String nodeName) {
// Try not to create an object when it is not necessary
for (int i = 0; i < nodeName.length(); i++) {
char c = nodeName.charAt(i);
if (c == '\\' || c == '/') {
// We've got an to be escaped character, so now create the string buffer
StringBuffer sb = new StringBuffer(nodeName);
for (; i < sb.length(); i++) {
c = sb.charAt(i);
if (c == '\\' || c == '/')
sb.insert(i++, '\\');
}
return sb.toString();
}
}
return nodeName;
}
/**
* Decode the node name so that back slash and forward slash are un-escaped
* from a back slash.
*
* @param nodeName the node name to be decoded
* @return the decoded node name
* @since 2.0
*/
public static String decode(String nodeName) {
// Try not to create an object when it is not necessary
int n = nodeName.indexOf('\\');
if (n < 0)
return nodeName;
StringBuffer sb = new StringBuffer(nodeName);
while (n >= 0 && n < sb.length()) {
sb.deleteCharAt(n);
n++;
while (n < sb.length() && sb.charAt(n) != '\\')
n++;
}
return sb.toString();
}
/**
* Checks whether the specified URI is valid. A URI is considered valid if
* it meets the following constraints:
*
* - the URI is not {@code null};
*
- the URI follows the syntax defined for valid DMT URIs;
*
* The exact definition of the length of a URI and its segments is given in
* the descriptions of the {@code getMaxUriLength()} and
* {@code getMaxSegmentNameLength()} methods.
*
* @param uri
* the URI to be validated
* @return whether the specified URI is valid
*/
public static boolean isValidUri(String uri) {
if (null == uri)
return false;
int paramLen = uri.length();
if (paramLen == 0)
return true;
if (uri.charAt(0) == '/' || uri.charAt(paramLen - 1) == '\\')
return false;
int segmentNumber = 0;
// append a '/' to indicate the end of the last segment (the URI in the
// parameter must not end with a '/')
uri += '/';
paramLen++;
int start = 0;
for (int i = 1; i < paramLen; i++) { // first character is not a '/'
if (uri.charAt(i) == '/' && uri.charAt(i - 1) != '\\') {
segmentNumber++;
String segment = uri.substring(start, i);
if (segmentNumber > 1 && segment.equals("."))
return false; // the URI contains the "." node name at a
// position other than the beginning of the
// URI
try {
// also checks that the segment is valid
getSegmentLength(segment);
} catch (IllegalArgumentException e) {
return false;
}
// the extra byte is for the separator '/' (will be deducted
// again for the last segment of the URI)
start = i + 1;
}
}
return true;
}
// Non-public fields and methods
// package private method for testing purposes
static String mangle(String nodeName, int limit) {
if (nodeName == null)
throw new NullPointerException(
"The 'nodeName' parameter must not be null.");
if (nodeName.equals(""))
throw new IllegalArgumentException(
"The 'nodeName' parameter must not be empty.");
if (nodeName.length() > limit)
// create node name hash
return getHash(nodeName);
// escape any '/' and '\' characters in the node name
StringBuffer nameBuffer = new StringBuffer(nodeName);
for (int i = 0; i < nameBuffer.length(); i++)
// 'i' can increase in loop
if (nameBuffer.charAt(i) == '\\' || nameBuffer.charAt(i) == '/')
nameBuffer.insert(i++, '\\');
return nameBuffer.toString();
}
private static String getHash(String from) {
byte[] bytes;
try {
bytes = from.getBytes("UTF-8");
} catch (UnsupportedEncodingException e) {
// There's no way UTF-8 encoding is not implemented...
throw new IllegalStateException("there's no UTF-8 encoder here!");
}
MessageDigest md = null;
try {
md = MessageDigest.getInstance("SHA");
} catch (NoSuchAlgorithmException e) {
throw new IllegalStateException(
"Can't get an instance of a SHA MessageDigest provider!");
}
byte[] digest = md.digest(bytes);
// very dumb base64 encoder code. There is no need for multiple lines
// or trailing '='-s....
// also, we hardcoded the fact that sha-1 digests are 20 bytes long
StringBuffer sb = new StringBuffer(digest.length * 2);
for (int i = 0; i < 6; i++) {
int d0 = digest[i * 3] & 0xff;
int d1 = digest[i * 3 + 1] & 0xff;
int d2 = digest[i * 3 + 2] & 0xff;
sb.append(BASE_64_TABLE[d0 >> 2]);
sb.append(BASE_64_TABLE[(d0 << 4 | d1 >> 4) & 63]);
sb.append(BASE_64_TABLE[(d1 << 2 | d2 >> 6) & 63]);
sb.append(BASE_64_TABLE[d2 & 63]);
}
int d0 = digest[18] & 0xff;
int d1 = digest[19] & 0xff;
sb.append(BASE_64_TABLE[d0 >> 2]);
sb.append(BASE_64_TABLE[(d0 << 4 | d1 >> 4) & 63]);
sb.append(BASE_64_TABLE[(d1 << 2) & 63]);
return sb.toString();
}
}