com.couchbase.lite.util.URIUtils Maven / Gradle / Ivy
/*
* Copyright (C) 2007 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.couchbase.lite.util;
import java.io.ByteArrayOutputStream;
import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
import java.nio.charset.Charset;
// COPY: Partially copied from android.net.Uri
// COPY: Partially copied from libcore.net.UriCodec
public class URIUtils {
/**
* Index of a component which was not found.
*/
private final static int NOT_FOUND = -1;
/**
* Default encoding.
*/
private static final String UTF_8_ENCODING = "UTF-8";
/**
* Error message presented when a user tries to treat an opaque URI as
* hierarchical.
*/
private static final String NOT_HIERARCHICAL = "This isn't a hierarchical URI.";
public static String decode(String s) {
if (s == null) return null;
try {
return URLDecoder.decode(s, UTF_8_ENCODING);
} catch (UnsupportedEncodingException e) {
// This is highly unlikely since we always use UTF-8 encoding.
throw new RuntimeException(e);
}
}
private static final char[] HEX_DIGITS = "0123456789ABCDEF".toCharArray();
/**
* Encodes characters in the given string as '%'-escaped octets
* using the UTF-8 scheme. Leaves letters ("A-Z", "a-z"), numbers
* ("0-9"), and unreserved characters ("_-!.~'()*") intact. Encodes
* all other characters.
*
* @param s string to encode
* @return an encoded version of s suitable for use as a URI component,
* or null if s is null
*/
public static String encode(String s) {
return encode(s, null);
}
/**
* Encodes characters in the given string as '%'-escaped octets
* using the UTF-8 scheme. Leaves letters ("A-Z", "a-z"), numbers
* ("0-9"), and unreserved characters ("_-!.~'()*") intact. Encodes
* all other characters with the exception of those specified in the
* allow argument.
*
* @param s string to encode
* @param allow set of additional characters to allow in the encoded form,
* null if no characters should be skipped
* @return an encoded version of s suitable for use as a URI component,
* or null if s is null
*/
public static String encode(String s, String allow) {
if (s == null) {
return null;
}
// Lazily-initialized buffers.
StringBuilder encoded = null;
int oldLength = s.length();
// This loop alternates between copying over allowed characters and
// encoding in chunks. This results in fewer method calls and
// allocations than encoding one character at a time.
int current = 0;
while (current < oldLength) {
// Start in "copying" mode where we copy over allowed chars.
// Find the next character which needs to be encoded.
int nextToEncode = current;
while (nextToEncode < oldLength
&& isAllowed(s.charAt(nextToEncode), allow)) {
nextToEncode++;
}
// If there's nothing more to encode...
if (nextToEncode == oldLength) {
if (current == 0) {
// We didn't need to encode anything!
return s;
} else {
// Presumably, we've already done some encoding.
encoded.append(s, current, oldLength);
return encoded.toString();
}
}
if (encoded == null) {
encoded = new StringBuilder();
}
if (nextToEncode > current) {
// Append allowed characters leading up to this point.
encoded.append(s, current, nextToEncode);
} else {
// assert nextToEncode == current
}
// Switch to "encoding" mode.
// Find the next allowed character.
current = nextToEncode;
int nextAllowed = current + 1;
while (nextAllowed < oldLength
&& !isAllowed(s.charAt(nextAllowed), allow)) {
nextAllowed++;
}
// Convert the substring to bytes and encode the bytes as
// '%'-escaped octets.
String toEncode = s.substring(current, nextAllowed);
try {
byte[] bytes = toEncode.getBytes(UTF_8_ENCODING);
int bytesLength = bytes.length;
for (int i = 0; i < bytesLength; i++) {
encoded.append('%');
encoded.append(HEX_DIGITS[(bytes[i] & 0xf0) >> 4]);
encoded.append(HEX_DIGITS[bytes[i] & 0xf]);
}
} catch (UnsupportedEncodingException e) {
throw new AssertionError(e);
}
current = nextAllowed;
}
// Encoded could still be null at this point if s is empty.
return encoded == null ? s : encoded.toString();
}
/**
* Returns true if the given character is allowed.
*
* @param c character to check
* @param allow characters to allow
* @return true if the character is allowed or false if it should be
* encoded
*/
private static boolean isAllowed(char c, String allow) {
return (c >= 'A' && c <= 'Z')
|| (c >= 'a' && c <= 'z')
|| (c >= '0' && c <= '9')
|| "_-!.~'()*".indexOf(c) != NOT_FOUND
|| (allow != null && allow.indexOf(c) != NOT_FOUND);
}
// COPY: Copied from libcore.net.UriCodec
/**
* @param convertPlus true to convert '+' to ' '.
*/
public static String decode(String s, boolean convertPlus, Charset charset) {
if (s.indexOf('%') == -1 && (!convertPlus || s.indexOf('+') == -1)) {
return s;
}
StringBuilder result = new StringBuilder(s.length());
ByteArrayOutputStream out = new ByteArrayOutputStream();
for (int i = 0; i < s.length(); ) {
char c = s.charAt(i);
if (c == '%') {
do {
if (i + 2 >= s.length()) {
throw new IllegalArgumentException("Incomplete % sequence at: " + i);
}
int d1 = hexToInt(s.charAt(i + 1));
int d2 = hexToInt(s.charAt(i + 2));
if (d1 == -1 || d2 == -1) {
throw new IllegalArgumentException("Invalid % sequence " +
s.substring(i, i + 3) + " at " + i);
}
out.write((byte) ((d1 << 4) + d2));
i += 3;
} while (i < s.length() && s.charAt(i) == '%');
result.append(new String(out.toByteArray(), charset));
out.reset();
} else {
if (convertPlus && c == '+') {
c = ' ';
}
result.append(c);
i++;
}
}
return result.toString();
}
// COPY: Copied from libcore.net.UriCodec
/**
* Like {@link Character#digit}, but without support for non-ASCII
* characters.
*/
private static int hexToInt(char c) {
if ('0' <= c && c <= '9') {
return c - '0';
} else if ('a' <= c && c <= 'f') {
return 10 + (c - 'a');
} else if ('A' <= c && c <= 'F') {
return 10 + (c - 'A');
} else {
return -1;
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy