All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.conqat.lib.commons.net.UrlUtils Maven / Gradle / Ivy

The newest version!
/*
 * Copyright (c) CQSE GmbH
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.conqat.lib.commons.net;

import java.io.File;
import java.io.UnsupportedEncodingException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URLDecoder;
import java.util.Arrays;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

import org.conqat.lib.commons.filesystem.FileSystemUtils;

import com.google.common.net.UrlEscapers;

/**
 * Utils for URL handling.
 */
public class UrlUtils {

	/** "file://" protocol prefix in URLs */
	public static final String FILE_PROTOCOL = "file://";

	/**
	 * Pattern that matches web URLs of the form {@code ://}.
	 * Allowed characters and prefix are according to RFC
	 * 3986 except that the string_of_allowed_characters can be empty too so URLs of the form
	 * {@code ://} are matched as well. Note that this pattern cannot be used to validate URLs,
	 * but it is suitable as heuristic to detect URLs that may or may not be well-formed.
	 */
	public static final Pattern URL_PATTERN = Pattern
			.compile("\\p{Alpha}[\\p{Alnum}+\\-.]*://[\\p{Alnum}:?#/@%_\\-~.\\]\\[!$&')(*+,;=]*");

	/**
	 * Pattern to match Windows paths. WARNING: There is no guarantee that this catches all Windows
	 * paths, and using this should be avoided as much as possible. Should be removed with TS-31850.
	 */
	private static final Pattern ABSOLUTE_WINDOWS_PATH_PATTERN = Pattern.compile("[a-zA-Z]:([\\\\/](.+[\\\\/])*.*)?");

	/**
	 * Converts the given uri to a {@link URI}. Has special handling for "file://" paths since
	 * {@link URI#URI(String)} does not handle windows paths correctly.
	 */
	public static URI parseUri(String uri) throws URISyntaxException {
		if (uri.startsWith(FILE_PROTOCOL)) {
			// TS-32847: Replacing '+' with its hex code '%2B'. DecodeFromUtf8 below uses
			// URLDecoder.decode() which comes with special treatment specifically for '+'
			// that turns it into a whitespace as it assumes an
			// application/x-www-form-urlencoded string. Other special characters (e.g. $)
			// receive no special treatment, but seem to be decoded as expected.
			uri = uri.replace("+", "%2B");
			// Decodes possible already encoded parts of uri path
			uri = decodeFromUtf8(uri.substring(FILE_PROTOCOL.length()));
			return new File(uri).toURI();
		}
		return new URI(uri);
	}

	/**
	 * If the given URL matches a Windows path, ensure it is parseable by {@link URI}.
	 * 

* Note: this is a workaround and a bad idea, we should instead migrate Windows paths at some point * and only allow actual URIs in the configuration (TS-31850). */ public static String ensureParseable(String url) { if (ABSOLUTE_WINDOWS_PATH_PATTERN.matcher(url).matches()) { url = "/" + url; } return url.replace("\\", "/"); } /** * Splits the segments at slashes and encodes them using {@link #encodePathSegment(String)}. Returns * the encoded segments joined with slashes. */ public static String encodeMultiplePathSegments(String pathSegments) { return Arrays.stream(pathSegments.split("/")).map(UrlUtils::encodePathSegment).collect(Collectors.joining("/")); } /** * Encodes the given url path segment. Slashes will be escaped. If you want to keep them you need to * split the input before and supply them one by one. */ public static String encodePathSegment(String pathSegment) { return UrlEscapers.urlPathSegmentEscaper().escape(pathSegment); } /** * Encodes the given value as query parameter (includes slashes and + symbols). */ public static String encodeQueryParameter(String queryParameter) { return UrlEscapers.urlFormParameterEscaper().escape(queryParameter); } /** * Decodes the given urlString using UTF-8 encoding. */ public static String decodeFromUtf8(String urlString) { try { return URLDecoder.decode(urlString, FileSystemUtils.UTF8_ENCODING); } catch (UnsupportedEncodingException e) { // Extremely unlikely--It could only happen if UTF-8 became unsupported. throw new AssertionError("Decoding " + urlString + " from UTF-8 failed: " + e.getMessage(), e); } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy