com.sangupta.jerry.util.UriUtils Maven / Gradle / Ivy
/**
*
* jerry - Common Java Functionality
* Copyright (c) 2012-2015, Sandeep Gupta
*
* http://sangupta.com/projects/jerry
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package com.sangupta.jerry.util;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLDecoder;
import java.net.URLEncoder;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.SortedMap;
import java.util.TreeMap;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Utility functions around the URI.
*
* @author sangupta
*
*/
public class UriUtils {
/**
* My own logger instance
*/
private static final Logger LOGGER = LoggerFactory.getLogger(UriUtils.class);
/**
* Characters that are allowed in a URI.
*/
private static final String ALLOWED_CHARS = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_.!~*'()";
/**
* Encode the given string as a URI component. A URI component is part of
* the URI like a query parameter value, or the fragment name. In percent
* encoded values, small-case letters will be used.
*
* The method is null-safe.
*
* @param input
* the string that needs to be encoded
*
* @return the encoded representation
*/
public static String encodeURIComponent(String input) {
return encodeURIComponent(input, false);
}
/**
* Function to convert a given string into URI encoded format.
*
* The method is null-safe.
*
* @param input
* the source string
*
* @param upperCase
* whether to use upper-case or lower-case letters in the percent
* encoded representation
*
* @return the encoded string
*/
public static String encodeURIComponent(String input, boolean upperCase) {
if (AssertUtils.isEmpty(input)) {
return input;
}
int l = input.length();
StringBuilder output = new StringBuilder(l * 3);
try {
for (int i = 0; i < l; i++) {
String e = input.substring(i, i + 1);
if (ALLOWED_CHARS.indexOf(e) == -1) {
byte[] bytes = e.getBytes("utf-8");
for(byte b : bytes) {
output.append('%');
output.append(StringUtils.asHex(b).toUpperCase());
}
continue;
}
output.append(e);
}
return output.toString();
} catch (UnsupportedEncodingException e) {
LOGGER.error("Unable to encode bytes to UTF-8", e);
}
return input;
}
/**
* Function to decode a given string from URI encoded format.
*
* @param encodedURI
* the encoded string component
*
* @return the decoded string
*/
public static String decodeURIComponent(String encodedURI) {
if(AssertUtils.isEmpty(encodedURI)) {
return encodedURI;
}
char actualChar;
StringBuffer buffer = new StringBuffer();
int bytePattern, sumb = 0;
for (int index = 0, more = -1; index < encodedURI.length(); index++) {
actualChar = encodedURI.charAt(index);
switch (actualChar) {
case '%': {
actualChar = encodedURI.charAt(++index);
int hb = (Character.isDigit(actualChar) ? actualChar - '0' : 10 + Character.toLowerCase(actualChar) - 'a') & 0xF;
actualChar = encodedURI.charAt(++index);
int lb = (Character.isDigit(actualChar) ? actualChar - '0' : 10 + Character.toLowerCase(actualChar) - 'a') & 0xF;
bytePattern = (hb << 4) | lb;
break;
}
case '+': {
bytePattern = ' ';
break;
}
default: {
bytePattern = actualChar;
}
}
if ((bytePattern & 0xc0) == 0x80) { // 10xxxxxx
sumb = (sumb << 6) | (bytePattern & 0x3f);
if (--more == 0)
buffer.append((char) sumb);
} else if ((bytePattern & 0x80) == 0x00) { // 0xxxxxxx
buffer.append((char) bytePattern);
} else if ((bytePattern & 0xe0) == 0xc0) { // 110xxxxx
sumb = bytePattern & 0x1f;
more = 1;
} else if ((bytePattern & 0xf0) == 0xe0) { // 1110xxxx
sumb = bytePattern & 0x0f;
more = 2;
} else if ((bytePattern & 0xf8) == 0xf0) { // 11110xxx
sumb = bytePattern & 0x07;
more = 3;
} else if ((bytePattern & 0xfc) == 0xf8) { // 111110xx
sumb = bytePattern & 0x03;
more = 4;
} else { // 1111110x
sumb = bytePattern & 0x01;
more = 5;
}
}
return buffer.toString();
}
/**
* Extract the file name from the URL removing the scheme, domain, query
* params and named anchor, if present.
*
* @param url
* the URL to be used
*
* @return extracted filename from the URL
*/
public static String extractFileName(String url) {
int index1 = url.indexOf('?');
int index2 = url.indexOf('#');
if(index1 == -1) {
index1 = url.length() + 1;
}
if(index2 == -1) {
index2 = url.length() + 1;
}
int index = Math.min(index1, index2);
if(index < url.length()) {
url = url.substring(0, index);
}
index1 = url.lastIndexOf('/');
index2 = url.lastIndexOf('\\');
index = Math.max(index1, index2);
url = url.substring(index + 1);
return url;
}
/**
* Extract the extension from the given URL.
*
* @param url
* the url from which the extension needs to be extracted.
*
* @return the extracted extension
*
* @throws NullPointerException
* if the URL presented is null
*/
public static String extractExtension(String url) {
// check for any slash characters that remain
int index = url.lastIndexOf('/');
if(index != -1) {
url = url.substring(index + 1);
}
// now for the dot part
index = url.lastIndexOf('.');
// check if extension present
if(index == -1) {
return null;
}
url = url.substring(index + 1);
// query param
int end = url.indexOf('?');
if(end != -1) {
url = url.substring(0, end);
}
// anchor name
end = url.indexOf('#');
if(end != -1) {
url = url.substring(0, end);
}
return url;
}
/**
* Encode the given set of parameters into a URL format, considering that the parameter
* values are already encoded.
*
* @param params the url parameters that need to be encoded
*
* @return string representation of the parameters
*
*/
public static String urlEncode(Map params) {
return urlEncode(params, false);
}
/**
* URL encode the given list of parameters.
*
* @param params
* the key-value pair of params that need to be encoded
*
* @param encodeValues
* whether the value portions need to be URL-encoded or not
*
* @return a string representation of the URL parameters
*
*/
public static String urlEncode(Map params, boolean encodeValues) {
if(AssertUtils.isEmpty(params)) {
return StringUtils.EMPTY_STRING;
}
Set> entrySet = params.entrySet();
StringBuilder builder = new StringBuilder();
boolean first = true;
for(Entry entry : entrySet) {
if(!first) {
builder.append("&");
}
first = false;
builder.append(entry.getKey());
builder.append("=");
if(encodeValues) {
builder.append(encodeURIComponent(entry.getValue()));
} else {
builder.append(entry.getValue());
}
}
return builder.toString();
}
/**
* Normalizes a given URL. Add the protocol if necessary, convert domain to lower-case,
* remove port number if it is 80, align the query parameters, then sort them, remove
* the anchor link, change https to http
*
* @param taintedURL the URL that may be tainted
*
* @return the normalized URL
*
* @deprecated {@link UrlCanonicalizer} or {@link UrlManipulator} classes should be used
* instead of this
*/
public static String normalizeUrl(String taintedURL) {
if(AssertUtils.isEmpty(taintedURL)) {
return taintedURL;
}
int hasProtocol = taintedURL.indexOf("://");
if(hasProtocol == -1) {
// no protocol found
// append HTTP to the URL
taintedURL = "http://" + taintedURL;
}
final URL url;
try {
url = new URL(taintedURL);
} catch (MalformedURLException e) {
throw new RuntimeException("Invalid URL: " + taintedURL);
}
final String path = url.getPath().replace("/$", "");
final SortedMap params = createParameterMap(url.getQuery());
final int port = url.getPort();
final String queryString;
if (params != null) {
queryString = "?" + canonicalize(params);
} else {
queryString = "";
}
StringBuffer sb = new StringBuffer();
sb.append(url.getProtocol());
sb.append("://");
sb.append(url.getHost());
if(port != -1 && port != 80) {
sb.append(":");
sb.append(port);
}
sb.append(path);
sb.append(queryString);
return sb.toString();
}
/**
* Takes a query string, separates the constituent name-value pairs, and
* stores them in a SortedMap ordered by lexicographical order.
*
* @param queryString
* the query string that needs to be parsed
*
* @return a {@link SortedMap} instance of the parameters, or
* null if the query string is null or empty
*/
private static SortedMap createParameterMap(final String queryString) {
if (AssertUtils.isEmpty(queryString)) {
return null;
}
final String[] pairs = queryString.split("&");
final Map params = new HashMap(pairs.length);
for (final String pair : pairs) {
if (pair.length() < 1) {
continue;
}
String[] tokens = pair.split("=", 2);
for (int j = 0; j < tokens.length; j++) {
try {
tokens[j] = URLDecoder.decode(tokens[j], "UTF-8");
} catch (UnsupportedEncodingException ex) {
ex.printStackTrace();
}
}
switch (tokens.length) {
case 1:
if (pair.charAt(0) == '=') {
params.put("", tokens[0]);
} else {
params.put(tokens[0], "");
}
break;
case 2:
params.put(tokens[0], tokens[1]);
break;
}
}
return new TreeMap(params);
}
/**
* Canonicalize the query string.
*
* @param sortedParamMap
* Parameter name-value pairs in lexicographical order.
*
* @return canonical form of query string.
*/
private static String canonicalize(final SortedMap sortedParamMap) {
if (sortedParamMap == null || sortedParamMap.isEmpty()) {
return "";
}
final StringBuffer sb = new StringBuffer(350);
final Iterator> iter = sortedParamMap.entrySet().iterator();
while (iter.hasNext()) {
final Map.Entry pair = iter.next();
sb.append(percentEncodeRfc3986(pair.getKey()));
sb.append('=');
sb.append(percentEncodeRfc3986(pair.getValue()));
if (iter.hasNext()) {
sb.append('&');
}
}
return sb.toString();
}
/**
* Percent-encode values according the RFC 3986. The built-in Java
* URLEncoder does not encode according to the RFC, so we make the extra
* replacements.
*
* @param string
* Decoded string.
*
* @return Encoded string per RFC 3986.
*/
private static String percentEncodeRfc3986(final String string) {
try {
return URLEncoder.encode(string, "UTF-8").replace("+", "%20").replace("*", "%2A").replace("%7E", "~");
} catch (UnsupportedEncodingException e) {
return string;
}
}
/**
* Extract the host value from the URL. If there is no scheme
* separator: :// available, a null is
* returned. No checks for the sanity of the host value are made.
*
* Also, the value is NOT canonicalized before returning.
*
* @param url
* the URL from which host/domain needs to be extracted
*
* @return the extracted domain/host
*
*/
public static String extractHost(String url) {
if(AssertUtils.isEmpty(url)) {
return null;
}
int start = url.indexOf("://");
if(start == -1) {
// we must check if URL startwith //
if(!url.startsWith("//")) {
return null;
}
start = -1; // -1 because we add 3 down below considering we matched :// and not //
}
start += 3;
int end = url.indexOf('/', start);
if(end == -1) {
return url.substring(start);
}
return url.substring(start, end);
}
/**
* Extract the path value from the URL. The path in a URL is considered from
* the first leading slash to the end before the query or the fragment
* separator.
*
* @param url
* the URL from which the path needs to be extracted
*
* @return the extracted path value, or null if url is
* null or empty, or empty string if path is not found
*/
public static String extractPath(String url) {
if(AssertUtils.isEmpty(url)) {
return null;
}
int schemeEnd = url.indexOf("://");
if(schemeEnd >= 0) {
schemeEnd += 3;
}
int pathStart = url.indexOf('/', schemeEnd);
if(pathStart == -1) {
// no path present - return an empty string
return StringUtils.EMPTY_STRING;
}
// find query and fragment separators
int queryStart = url.indexOf('?', pathStart);
int fragmentStart = url.indexOf('#', pathStart);
if(queryStart == -1 && fragmentStart == -1) {
return url.substring(pathStart);
}
if(queryStart != -1 && fragmentStart != -1) {
// find min of both
int min = (queryStart < fragmentStart) ? queryStart : fragmentStart;
return url.substring(pathStart, min);
}
if(queryStart != -1) {
return url.substring(pathStart, queryStart);
}
return url.substring(pathStart, fragmentStart);
}
/**
* Extract the protocol or the scheme from the given URL. For example, in
* the URL http://www.sangupta.com, the protocol is http. Neither checks for
* the validily of the scheme is made, nor the value is canonicalized.
*
* @param url
* the URL from which the scheme/protocol needs to be extracted.
*
* @return the scheme/protocol if found, or null.
*/
public static String extractProtocol(String url) {
if(AssertUtils.isEmpty(url)) {
return null;
}
int index = url.indexOf("://");
if(index == -1) {
return null;
}
return url.substring(0, index).toLowerCase();
}
/**
* Extract the base url (scheme + domain) from the given URL.
*
* @param url
* the url from which the information needs to be extracted
*
* @return the base URL as extracted, or null in case the URL
* is empty or cannot be parsed properly.
*/
public static String getBaseUrl(String url) {
if(AssertUtils.isEmpty(url)) {
return null;
}
int index = url.indexOf("://");
index = url.indexOf('/', index + 3);
if(index == -1) {
return null;
}
return url.substring(0, index);
}
/**
* Remove the scheme and domain name from the url and return the entire
* path, query string and name anchor, if present.
*
* @param url
* the URL from which the information that needs to be stripped
* off
*
* @return the url without scheme and domain, or null in case
* the URL is empty or cannot be parsed properly.
*
*/
public static String removeSchemeAndDomain(String url) {
if(AssertUtils.isEmpty(url)) {
return null;
}
int index = url.indexOf("://");
index = url.indexOf('/', index + 3);
if(index == -1) {
return null;
}
return url.substring(index + 1);
}
/**
* Add two web paths making sure that we only have one forward slash in the
* URL.
*
* @param base
* the base path
*
* @param suffix
* the suffix to add
*
* @return the combined URL safe string
*/
public static String addWebPaths(String base, String suffix) {
StringBuilder builder = new StringBuilder(base);
if(base.endsWith("/")) {
if(suffix.startsWith("/")) {
builder.append(suffix.substring(1));
} else {
builder.append(suffix);
}
} else {
if(suffix.startsWith("/")) {
builder.append(suffix);
} else {
builder.append('/');
builder.append(suffix);
}
}
return builder.toString();
}
/**
* Add multiple web-path components given in the order.
*
* @param components
* the components that need to be added
*
* @return the complete added web-paths
*
*/
public static String addWebPaths(String... components) {
if(components == null || components.length == 0) {
return "";
}
if(components.length == 1) {
return components[0];
}
StringBuilder builder = new StringBuilder(components[0]);
for(int index = 1; index < components.length; index++) {
String suffix = components[index];
if(AssertUtils.isEmpty(suffix)) {
continue;
}
if(builder.length() > 2 && builder.charAt(builder.length() - 1) == '/') {
if(suffix.startsWith("/")) {
builder.append(suffix.substring(1));
} else {
builder.append(suffix);
}
} else {
if(suffix.startsWith("/")) {
builder.append(suffix);
} else {
builder.append('/');
builder.append(suffix);
}
}
}
return builder.toString();
}
/**
* Simple function to see if a string resembles a URL or not.
*
* @param url
* the string to be tested
*
* @return true if the string appears to be a valid url,
* false otherwise
*/
public static boolean appearsValidUrl(String url) {
try {
UrlManipulator manipulator = new UrlManipulator(url, true);
String host = manipulator.getHost();
if(host != null) {
if(host.indexOf('.') == -1) {
return false;
}
}
return true;
} catch(IllegalArgumentException e) {
return false;
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy