org.ditang.relaxng.defaults.Util Maven / Gradle / Ivy
/*
* This file is part of the DITA Open Toolkit project.
*
* Copyright 2011 George Bina
*
* See the accompanying LICENSE file for applicable license.
*/
package org.ditang.relaxng.defaults;
import java.net.MalformedURLException;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.util.StringTokenizer;
/**
* Corrects the urls.
*/
public class Util {
/**
* Windows platform flag.
*/
private static Boolean windows = null;
/**
* Checks for a Windows platform.
*
* @return True if it is a win 32 platform.
*/
private static boolean isWindows() {
if (windows == null) {
windows = System.getProperty("os.name").toUpperCase().startsWith("WIN");
}
return windows;
}
// which ASCII characters need to be escaped
private static final boolean[] gNeedEscaping = new boolean[128];
// the first hex character if a character needs to be escaped
private static final char[] gAfterEscaping1 = new char[128];
// the second hex character if a character needs to be escaped
private static final char[] gAfterEscaping2 = new char[128];
private static final char[] gHexChs = {
'0',
'1',
'2',
'3',
'4',
'5',
'6',
'7',
'8',
'9',
'A',
'B',
'C',
'D',
'E',
'F',
};
// initialize the above 3 arrays
static {
for (int i = 0; i <= 0x1f; i++) {
gNeedEscaping[i] = true;
gAfterEscaping1[i] = gHexChs[i >> 4];
gAfterEscaping2[i] = gHexChs[i & 0xf];
}
gNeedEscaping[0x7f] = true;
gAfterEscaping1[0x7f] = '7';
gAfterEscaping2[0x7f] = 'F';
char[] escChs = { ' ', '<', '>', '#', '%', '"', '{', '}', '?', '|', '\\', '^', '~', '[', ']', '`', '\'', '&' };
int len = escChs.length;
char ch;
for (char escCh : escChs) {
ch = escCh;
gNeedEscaping[ch] = true;
gAfterEscaping1[ch] = gHexChs[ch >> 4];
gAfterEscaping2[ch] = gHexChs[ch & 0xf];
}
}
/**
* To escape a file path to a URI, by using %HH to represent
* special ASCII characters: 0x00~0x1F, 0x7F, ' ', '<', '>', '#', '%'
* and '"' and non-ASCII characters (whose value >= 128).
*
* '\' character will also be escaped.
*
* @param path The path to be escaped.
* @return The escaped uri.
*/
private static String filepath2URI(String path) {
// return null if path is null.
if (path == null) {
return null;
}
path = escapeSpecialAsciiAndNonAscii(path);
return path;
}
/**
* To escape a file path to a URI, by using %HH to represent
* special ASCII characters: 0x00~0x1F, 0x7F, ' ', '<', '>', '#', '%'
* and '"' and non-ASCII characters (whose value >= 128).
*
* @param path The path to be escaped.
* @return The escaped path.
*/
private static String escapeSpecialAsciiAndNonAscii(String path) {
int len = path.length(), ch;
StringBuilder buffer = new StringBuilder(len * 3);
// change C:/blah to /C:/blah
if (len >= 2 && path.charAt(1) == ':') {
ch = Character.toUpperCase(path.charAt(0));
if (ch >= 'A' && ch <= 'Z') {
buffer.append('/');
}
}
// for each character in the path
int i = 0;
for (i = 0; i < len; i++) {
ch = path.charAt(i);
// if it's not an ASCII character, break here, and use UTF-8 encoding
if (ch >= 128) {
break;
}
if (gNeedEscaping[ch]) {
buffer.append('%');
buffer.append(gAfterEscaping1[ch]);
buffer.append(gAfterEscaping2[ch]);
} else {
buffer.append((char) ch);
}
}
// we saw some non-ascii character
if (i < len) {
// get UTF-8 bytes for the remaining sub-string
byte[] bytes = null;
byte b;
bytes = path.substring(i).getBytes(StandardCharsets.UTF_8);
len = bytes.length;
// for each byte
for (i = 0; i < len; i++) {
b = bytes[i];
// for non-ascii character: make it positive, then escape
if (b < 0) {
ch = b + 256;
buffer.append('%');
buffer.append(gHexChs[ch >> 4]);
buffer.append(gHexChs[ch & 0xf]);
} else if (gNeedEscaping[b]) {
buffer.append('%');
buffer.append(gAfterEscaping1[b]);
buffer.append(gAfterEscaping2[b]);
} else {
buffer.append((char) b);
}
}
}
return buffer.toString();
}
/**
* Corrects an URL.
*
* @param url
* The url to be corrected. If null will throw MalformedURLException.
* @return a corrected URL. Never null.
* @exception MalformedURLException
* when the argument is null.
*/
public static URL correct(URL url) throws MalformedURLException {
if (url == null) {
throw new MalformedURLException("The url is null");
}
return new URL(correct(url.toString()));
}
/**
* Method introduced to correct the URLs in the default machine encoding. This
* was needed by the xsltproc, the catalogs URLs must be encoded in the
* machine encoding.
*
* @param url
* The URL to be corrected. If it contains a % char, it means it
* already was corrected, so it will be returned. Take care at
* composing URLs from a corrected part and an uncorrected part.
* Correcting the result will not work. Try to correct first the
* relative part.
* @return The corrected URL.
*/
private static String correct(String url) {
// Fix for bad URLs containing UNC paths
// If the url is a UNC file url it must be specified like:
// file:////...
if (
url.startsWith("file://") &&
// A url like file:/// refers to a local file so it must not be
// modified.
!url.startsWith("file:///") &&
isWindows()
) {
url = "file:////" + url.substring("file://".length());
}
String userInfo = getUserInfo(url);
String user = extractUser(userInfo);
String pass = extractPassword(userInfo);
String initialUrl = url;
// See if the url contains user and password. If so we remove them and
// attach them back after the correction is performed.
if (user != null || pass != null) {
URL urlWithoutUserInfo = clearUserInfo(url);
if (urlWithoutUserInfo != null) {
url = clearUserInfo(url).toString();
} else {
// Possible a malformed URL
}
}
// If there is a % that means the url was already corrected.
if (url.contains("%")) {
return initialUrl;
}
// Extract the reference (anchor) part from the url. The '#' char
// identifying the anchor must not be corrected.
String reference = null;
int refIndex = url.lastIndexOf('#');
if (refIndex != -1) {
reference = filepath2URI(url.substring(refIndex + 1));
url = url.substring(0, refIndex);
}
// Buffer where eventual query string will be processed.
StringBuilder queryBuffer = null;
int queryIndex = url.indexOf('?');
if (queryIndex != -1) {
// We have a query
String query = url.substring(queryIndex + 1);
url = url.substring(0, queryIndex);
queryBuffer = new StringBuilder(query.length());
// Tokenize by &
StringTokenizer st = new StringTokenizer(query, "&");
while (st.hasMoreElements()) {
String token = st.nextToken();
token = filepath2URI(token);
// Correct token
queryBuffer.append(token);
if (st.hasMoreElements()) {
queryBuffer.append("&");
}
}
}
String toReturn = filepath2URI(url);
if (queryBuffer != null) {
// Append to the end the corrected query.
toReturn += "?" + queryBuffer.toString();
}
if (reference != null) {
// Append the reference to the end the corrected query.
toReturn += "#" + reference;
}
// Re-attach the user and password.
if (user != null || pass != null) {
try {
if (user == null) {
user = "";
}
if (pass == null) {
pass = "";
}
// Re-attach user info.
toReturn = attachUserInfo(new URL(toReturn), user, pass.toCharArray()).toString();
} catch (MalformedURLException e) {
// Shoudn't happen.
}
}
return toReturn;
}
/**
* Extract the user info from an URL.
*
* @param url
* The string representing the URL.
* @return The userinfo or null if cannot extract it.
*/
private static String getUserInfo(String url) {
String userInfo = null;
int startIndex = Integer.MIN_VALUE;
int nextSlashIndex = Integer.MIN_VALUE;
int endIndex = Integer.MIN_VALUE;
try {
// The user info start index should be the first index of "//".
startIndex = url.indexOf("//");
if (startIndex != -1) {
startIndex += 2;
// The user info should be found before the next '/' index.
nextSlashIndex = url.indexOf('/', startIndex);
if (nextSlashIndex == -1) {
nextSlashIndex = url.length();
}
// The user info ends at the last index of '@' from the previously
// computed subsequence.
endIndex = url.substring(startIndex, nextSlashIndex).lastIndexOf('@');
if (endIndex != -1) {
userInfo = url.substring(startIndex, startIndex + endIndex);
}
}
} catch (StringIndexOutOfBoundsException ex) {
System.err.println("String index out of bounds for:|" + url + "|");
System.err.println("Start index: " + startIndex);
System.err.println("Next slash index " + nextSlashIndex);
System.err.println("End index :" + endIndex);
System.err.println("User info :|" + userInfo + "|");
ex.printStackTrace();
}
return userInfo;
}
/**
* Gets the user from an userInfo string obtained from the starting URL. Used
* only by the constructor.
*
* @param userInfo
* userInfo, taken from the URL.
* @return The user.
*/
private static String extractUser(String userInfo) {
if (userInfo == null) {
return null;
}
int index = userInfo.lastIndexOf(':');
if (index == -1) {
return userInfo;
} else {
return userInfo.substring(0, index);
}
}
/**
* Gets the password from an user info string obtained from the starting URL.
*
* @param userInfo
* userInfo, taken from the URL. If no user info specified returning
* null. If user info not null but no password after ":" then
* returning empty, (we have a user so the default pass for him is
* empty string). See bug 6086.
*
* @return The password.
*/
private static String extractPassword(String userInfo) {
if (userInfo == null) {
return null;
}
String password = "";
int index = userInfo.lastIndexOf(':');
if (index != -1 && index < userInfo.length() - 1) {
// Extract password from the URL.
password = userInfo.substring(index + 1);
}
return password;
}
/**
* Clears the user info from an url.
*
* @param systemID
* the url to be cleaned.
* @return the cleaned url, or null if the argument is not an URL.
*/
private static URL clearUserInfo(String systemID) {
try {
URL url = new URL(systemID);
// Do not clear user info on "file" urls 'cause on Windows the drive will
// have no ":"...
if (!"file".equals(url.getProtocol())) {
return attachUserInfo(url, null, null);
}
return url;
} catch (MalformedURLException e) {
return null;
}
}
/**
* Build the URL from the data obtained from the user.
*
* @param url
* The URL to be transformed.
* @param user
* The user name.
* @param password
* The password.
* @return The URL with userInfo.
* @exception MalformedURLException
* Exception thrown if the URL is malformed.
*/
private static URL attachUserInfo(URL url, String user, char[] password) throws MalformedURLException {
if (url == null) {
return null;
}
if ((url.getAuthority() == null || "".equals(url.getAuthority())) && !"jar".equals(url.getProtocol())) {
return url;
}
StringBuilder buf = new StringBuilder();
String protocol = url.getProtocol();
if (protocol.equals("jar")) {
URL newURL = new URL(url.getPath());
newURL = attachUserInfo(newURL, user, password);
buf.append("jar:");
buf.append(newURL.toString());
} else {
password = correctPassword(password);
user = correctUser(user);
buf.append(protocol);
buf.append("://");
if (!"file".equals(protocol) && user != null && user.trim().length() > 0) {
buf.append(user);
if (password != null && password.length > 0) {
buf.append(":");
buf.append(password);
}
buf.append("@");
}
buf.append(url.getHost());
if (url.getPort() > 0) {
buf.append(":");
buf.append(url.getPort());
}
buf.append(url.getPath());
String query = url.getQuery();
if (query != null && query.trim().length() > 0) {
buf.append("?").append(query);
}
String ref = url.getRef();
if (ref != null && ref.trim().length() > 0) {
buf.append("#").append(ref);
}
}
return new URL(buf.toString());
}
/**
* Escape the specified user.
*
* @param user
* The user name to correct.
* @return The escaped user.
*/
private static String correctUser(String user) {
if (user != null && user.trim().length() > 0 && (false || user.indexOf('%') == -1)) {
String escaped = escapeSpecialAsciiAndNonAscii(user);
StringBuilder totalEscaped = new StringBuilder();
for (int i = 0; i < escaped.length(); i++) {
char ch = escaped.charAt(i);
if (ch == '@' || ch == '/' || ch == ':') {
totalEscaped.append('%').append(Integer.toHexString(ch).toUpperCase());
} else {
totalEscaped.append(ch);
}
}
user = totalEscaped.toString();
}
return user;
}
/**
* Escape the specified password.
*
* @param password
* The password to be corrected.
*
* @return The escaped password.
*/
private static char[] correctPassword(char[] password) {
if (password != null && new String(password).indexOf('%') == -1) {
String escaped = escapeSpecialAsciiAndNonAscii(new String(password));
StringBuilder totalEscaped = new StringBuilder();
for (int i = 0; i < escaped.length(); i++) {
char ch = escaped.charAt(i);
if (ch == '@' || ch == '/' || ch == ':') {
totalEscaped.append('%').append(Integer.toHexString(ch).toUpperCase());
} else {
totalEscaped.append(ch);
}
}
password = totalEscaped.toString().toCharArray();
}
return password;
}
}