ca.uhn.fhir.util.UrlUtil Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of hapi-fhir-base Show documentation
There is a newer version: 7.4.5
/*
 * #%L
 * HAPI FHIR - Core Library
 * %%
 * Copyright (C) 2014 - 2024 Smile CDR, Inc.
 * %%
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 * #L%
 */
package ca.uhn.fhir.util;

import ca.uhn.fhir.context.FhirContext;
import ca.uhn.fhir.context.RuntimeResourceDefinition;
import ca.uhn.fhir.i18n.Msg;
import ca.uhn.fhir.model.primitive.IdDt;
import ca.uhn.fhir.parser.DataFormatException;
import ca.uhn.fhir.rest.api.Constants;
import ca.uhn.fhir.rest.server.exceptions.InvalidRequestException;
import com.google.common.escape.Escaper;
import com.google.common.net.PercentEscaper;
import jakarta.annotation.Nonnull;
import jakarta.annotation.Nullable;
import org.apache.commons.lang3.StringUtils;
import org.apache.http.NameValuePair;
import org.apache.http.client.utils.URLEncodedUtils;
import org.apache.http.message.BasicNameValuePair;
import org.hl7.fhir.instance.model.api.IPrimitiveType;

import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.net.URLDecoder;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.StringTokenizer;
import java.util.stream.Collectors;

import static org.apache.commons.lang3.StringUtils.defaultIfBlank;
import static org.apache.commons.lang3.StringUtils.defaultString;
import static org.apache.commons.lang3.StringUtils.endsWith;
import static org.apache.commons.lang3.StringUtils.isBlank;
import static org.apache.commons.lang3.StringUtils.isNotBlank;

@SuppressWarnings("JavadocLinkAsPlainText")
public class UrlUtil {
	private static final org.slf4j.Logger ourLog = org.slf4j.LoggerFactory.getLogger(UrlUtil.class);

	private static final String URL_FORM_PARAMETER_OTHER_SAFE_CHARS = "-_.*";
	private static final Escaper PARAMETER_ESCAPER = new PercentEscaper(URL_FORM_PARAMETER_OTHER_SAFE_CHARS, false);

	/**
	 * Non instantiable
	 */
	private UrlUtil() {}

	/**
	 * Cleans up a value that will be serialized as an HTTP header. This method:
	 * 
	 * - Strips any newline (\r or \n) characters
	 *
	 * @since 6.2.0
	 */
	public static String sanitizeHeaderValue(String theHeader) {
		return theHeader.replace("\n", "").replace("\r", "");
	}

	public static String sanitizeBaseUrl(String theBaseUrl) {
		return theBaseUrl.replaceAll("[^a-zA-Z0-9:/._-]", "");
	}

	/**
	 * Resolve a relative URL - THIS METHOD WILL NOT FAIL but will log a warning and return theEndpoint if the input is invalid.
	 */
	public static String constructAbsoluteUrl(String theBase, String theEndpoint) {
		if (theEndpoint == null) {
			return null;
		}
		if (isAbsolute(theEndpoint)) {
			return theEndpoint;
		}
		if (theBase == null) {
			return theEndpoint;
		}

		try {
			return new URL(new URL(theBase), theEndpoint).toString();
		} catch (MalformedURLException e) {
			ourLog.warn(
					"Failed to resolve relative URL[" + theEndpoint + "] against absolute base[" + theBase + "]", e);
			return theEndpoint;
		}
	}

	public static String constructRelativeUrl(String theParentExtensionUrl, String theExtensionUrl) {
		if (theParentExtensionUrl == null) {
			return theExtensionUrl;
		}
		if (theExtensionUrl == null) {
			return null;
		}

		int parentLastSlashIdx = theParentExtensionUrl.lastIndexOf('/');
		int childLastSlashIdx = theExtensionUrl.lastIndexOf('/');

		if (parentLastSlashIdx == -1 || childLastSlashIdx == -1) {
			return theExtensionUrl;
		}

		if (parentLastSlashIdx != childLastSlashIdx) {
			return theExtensionUrl;
		}

		if (!theParentExtensionUrl
				.substring(0, parentLastSlashIdx)
				.equals(theExtensionUrl.substring(0, parentLastSlashIdx))) {
			return theExtensionUrl;
		}

		if (theExtensionUrl.length() > parentLastSlashIdx) {
			return theExtensionUrl.substring(parentLastSlashIdx + 1);
		}

		return theExtensionUrl;
	}

	/**
	 * Given a FHIR resource URL, extracts the associated resource type. Supported formats
	 * include the following inputs, all of which will return {@literal Patient}. If no
	 * resource type can be determined, {@literal null} will be returned.
	 * 

	 * Patient
	 * 
Patient?
	 * 
Patient?identifier=foo
	 * 
/Patient
	 * 
/Patient?
	 * 
/Patient?identifier=foo
	 * 
http://foo/base/Patient?identifier=foo
	 * 
http://foo/base/Patient/1
	 * 
http://foo/base/Patient/1/_history/2
	 * 
Patient/1
	 * 
Patient/1/_history/2
	 * 
/Patient/1
	 * 
/Patient/1/_history/2
	 * 
	 */
	@Nullable
	public static String determineResourceTypeInResourceUrl(FhirContext theFhirContext, String theUrl) {
		if (theUrl == null) {
			return null;
		}
		if (theUrl.startsWith("urn:")) {
			return null;
		}

		String resourceType = null;
		int qmIndex = theUrl.indexOf("?");
		if (qmIndex > 0) {
			String urlResourceType = theUrl.substring(0, qmIndex);
			int slashIdx = urlResourceType.lastIndexOf('/');
			if (slashIdx != -1) {
				urlResourceType = urlResourceType.substring(slashIdx + 1);
			}
			if (isNotBlank(urlResourceType)) {
				resourceType = urlResourceType;
			}
		} else {
			resourceType = theUrl;
			int slashIdx = resourceType.indexOf('/');
			if (slashIdx == 0) {
				resourceType = resourceType.substring(1);
			}

			slashIdx = resourceType.indexOf('/');
			if (slashIdx != -1) {
				resourceType = new IdDt(resourceType).getResourceType();
			}
		}

		try {
			if (isNotBlank(resourceType)) {
				theFhirContext.getResourceDefinition(resourceType);
			}
		} catch (DataFormatException e) {
			return null;
		}

		return resourceType;
	}

	/**
	 * URL encode a value according to RFC 3986
	 * 
	 * This method is intended to be applied to an individual parameter
	 * name or value. For example, if you are creating the URL
	 * http://example.com/fhir/Patient?key=føø
	 * it would be appropriate to pass the string "føø" to this method,
	 * but not appropriate to pass the entire URL since characters
	 * such as "/" and "?" would also be escaped.
	 * 
	 */
	public static String escapeUrlParam(String theUnescaped) {
		if (theUnescaped == null) {
			return null;
		}
		return PARAMETER_ESCAPER.escape(theUnescaped);
	}

	/**
	 * Applies the same encodong as {@link #escapeUrlParam(String)} but against all
	 * values in a collection
	 */
	public static List escapeUrlParams(@Nonnull Collection theUnescaped) {
		return theUnescaped.stream().map(t -> PARAMETER_ESCAPER.escape(t)).collect(Collectors.toList());
	}

	public static boolean isAbsolute(String theValue) {
		String value = theValue.toLowerCase();
		return value.startsWith("http://") || value.startsWith("https://");
	}

	public static boolean isNeedsSanitization(CharSequence theString) {
		if (theString != null) {
			for (int i = 0; i < theString.length(); i++) {
				char nextChar = theString.charAt(i);
				switch (nextChar) {
					case '\'':
					case '"':
					case '<':
					case '>':
					case '\n':
					case '\r':
						return true;
				}
				if (nextChar < ' ') {
					return true;
				}
			}
		}
		return false;
	}

	public static boolean isValid(String theUrl) {
		if (theUrl == null || theUrl.length() < 8) {
			return false;
		}

		String url = theUrl.toLowerCase();
		if (url.charAt(0) != 'h') {
			return false;
		}
		if (url.charAt(1) != 't') {
			return false;
		}
		if (url.charAt(2) != 't') {
			return false;
		}
		if (url.charAt(3) != 'p') {
			return false;
		}
		int slashOffset;
		if (url.charAt(4) == ':') {
			slashOffset = 5;
		} else if (url.charAt(4) == 's') {
			if (url.charAt(5) != ':') {
				return false;
			}
			slashOffset = 6;
		} else {
			return false;
		}

		if (url.charAt(slashOffset) != '/') {
			return false;
		}
		if (url.charAt(slashOffset + 1) != '/') {
			return false;
		}

		return true;
	}

	public static RuntimeResourceDefinition parseUrlResourceType(FhirContext theCtx, String theUrl)
			throws DataFormatException {
		String url = theUrl;
		int paramIndex = url.indexOf('?');

		// Change pattern of "Observation/?param=foo" into "Observation?param=foo"
		if (paramIndex > 0 && url.charAt(paramIndex - 1) == '/') {
			url = url.substring(0, paramIndex - 1) + url.substring(paramIndex);
			paramIndex--;
		}

		String resourceName = url.substring(0, paramIndex);
		if (resourceName.contains("/")) {
			resourceName = resourceName.substring(resourceName.lastIndexOf('/') + 1);
		}
		return theCtx.getResourceDefinition(resourceName);
	}

	@Nonnull
	public static Map parseQueryString(String theQueryString) {
		HashMap> map = new HashMap<>();
		parseQueryString(theQueryString, map);
		return toQueryStringMap(map);
	}

	private static void parseQueryString(String theQueryString, HashMap> map) {
		String query = defaultString(theQueryString);
		if (query.startsWith("?")) {
			query = query.substring(1);
		}

		StringTokenizer tok = new StringTokenizer(query, "&");
		while (tok.hasMoreTokens()) {
			String nextToken = tok.nextToken();
			if (isBlank(nextToken)) {
				continue;
			}

			int equalsIndex = nextToken.indexOf('=');
			String nextValue;
			String nextKey;
			if (equalsIndex == -1) {
				nextKey = nextToken;
				nextValue = "";
			} else {
				nextKey = nextToken.substring(0, equalsIndex);
				nextValue = nextToken.substring(equalsIndex + 1);
			}

			nextKey = unescape(nextKey);
			nextValue = unescape(nextValue);

			List list = map.computeIfAbsent(nextKey, k -> new ArrayList<>());
			list.add(nextValue);
		}
	}

	public static Map parseQueryStrings(String... theQueryString) {
		HashMap> map = new HashMap<>();
		for (String next : theQueryString) {
			parseQueryString(next, map);
		}
		return toQueryStringMap(map);
	}

	/**
	 * Normalizes canonical URLs for comparison. Trailing "/" is stripped,
	 * and any version identifiers or fragment hash is removed
	 */
	public static String normalizeCanonicalUrlForComparison(String theUrl) {
		String retVal;
		try {
			retVal = new URI(theUrl).normalize().toString();
		} catch (URISyntaxException e) {
			retVal = theUrl;
		}
		while (endsWith(retVal, "/")) {
			retVal = retVal.substring(0, retVal.length() - 1);
		}
		int hashOrPipeIndex = StringUtils.indexOfAny(retVal, '#', '|');
		if (hashOrPipeIndex != -1) {
			retVal = retVal.substring(0, hashOrPipeIndex);
		}
		return retVal;
	}

	/**
	 * Parse a URL in one of the following forms:
	 * 
	 * [Resource Type]?[Search Params]
	 * 
[Resource Type]/[Resource ID]
	 * 
[Resource Type]/[Resource ID]/_history/[Version ID]
	 * 
	 */
	public static UrlParts parseUrl(String theUrl) {
		String url = theUrl;
		UrlParts retVal = new UrlParts();
		if (url.startsWith("http")) {
			int qmIdx = url.indexOf('?');
			if (qmIdx != -1) {
				retVal.setParams(defaultIfBlank(url.substring(qmIdx + 1), null));
				url = url.substring(0, qmIdx);
			}

			IdDt id = new IdDt(url);
			retVal.setResourceType(id.getResourceType());
			retVal.setResourceId(id.getIdPart());
			retVal.setVersionId(id.getVersionIdPart());
			return retVal;
		}

		int parsingStart = 0;
		if (url.length() > 2) {
			if (url.charAt(0) == '/') {
				if (Character.isLetter(url.charAt(1))) {
					parsingStart = 1;
				}
			}
		}

		int nextStart = parsingStart;
		boolean nextIsHistory = false;

		for (int idx = parsingStart; idx < url.length(); idx++) {
			char nextChar = url.charAt(idx);
			boolean atEnd = (idx + 1) == url.length();
			if (nextChar == '?' || nextChar == '/' || atEnd) {
				int endIdx = (atEnd && nextChar != '?') ? idx + 1 : idx;
				String nextSubstring = url.substring(nextStart, endIdx);
				if (retVal.getResourceType() == null) {
					retVal.setResourceType(nextSubstring);
				} else if (retVal.getResourceId() == null) {
					retVal.setResourceId(nextSubstring);
				} else if (nextIsHistory) {
					retVal.setVersionId(nextSubstring);
				} else {
					if (nextSubstring.equals(Constants.URL_TOKEN_HISTORY)) {
						nextIsHistory = true;
					} else {
						throw new InvalidRequestException(Msg.code(1742) + "Invalid FHIR resource URL: " + url);
					}
				}
				if (nextChar == '?') {
					if (url.length() > idx + 1) {
						retVal.setParams(url.substring(idx + 1));
					}
					break;
				}
				nextStart = idx + 1;
			}
		}

		return retVal;
	}

	/**
	 * This method specifically HTML-encodes the " and
	 * < characters in order to prevent injection attacks
	 */
	public static String sanitizeUrlPart(IPrimitiveType theString) {
		String retVal = null;
		if (theString != null) {
			retVal = sanitizeUrlPart(theString.getValueAsString());
		}
		return retVal;
	}

	/**
	 * This method specifically HTML-encodes the " and
	 * < characters in order to prevent injection attacks.
	 * 
	 * The following characters are escaped:
	 * 

	 *    '
	 *    "
	 *    <
	 *    >
	 *    \n (newline)
	 * 
	 */
	public static String sanitizeUrlPart(CharSequence theString) {
		if (theString == null) {
			return null;
		}

		boolean needsSanitization = isNeedsSanitization(theString);

		if (needsSanitization) {
			// Ok, we're sanitizing
			StringBuilder buffer = new StringBuilder(theString.length() + 10);
			for (int j = 0; j < theString.length(); j++) {

				char nextChar = theString.charAt(j);
				switch (nextChar) {
						/*
						 * NB: If you add a constant here, you also need to add it
						 * to isNeedsSanitization()!!
						 */
					case '\'':
						buffer.append("'");
						break;
					case '"':
						buffer.append(""");
						break;
					case '<':
						buffer.append("<");
						break;
					case '>':
						buffer.append(">");
						break;
					case '\n':
						buffer.append("
");
						break;
					case '\r':
						buffer.append("");
						break;
					default:
						if (nextChar >= ' ') {
							buffer.append(nextChar);
						}
						break;
				}
			} // for build escaped string

			return buffer.toString();
		}

		return theString.toString();
	}

	/**
	 * Applies the same logic as {@link #sanitizeUrlPart(CharSequence)} but against an array, returning an array with the
	 * same strings as the input but with sanitization applied
	 */
	public static String[] sanitizeUrlPart(String[] theParameterValues) {
		String[] retVal = null;
		if (theParameterValues != null) {
			retVal = new String[theParameterValues.length];
			for (int i = 0; i < theParameterValues.length; i++) {
				retVal[i] = sanitizeUrlPart(theParameterValues[i]);
			}
		}
		return retVal;
	}

	private static Map toQueryStringMap(HashMap> map) {
		HashMap retVal = new HashMap<>();
		for (Entry> nextEntry : map.entrySet()) {
			retVal.put(nextEntry.getKey(), nextEntry.getValue().toArray(new String[0]));
		}
		return retVal;
	}

	public static String unescape(String theString) {
		if (theString == null) {
			return null;
		}
		// If the user passes "_outputFormat" as a GET request parameter directly in the URL:
		final boolean shouldEscapePlus = !theString.startsWith("application/");

		for (int i = 0; i < theString.length(); i++) {
			char nextChar = theString.charAt(i);
			if (nextChar == '%' || (nextChar == '+' && shouldEscapePlus)) {
				try {
					// Yes it would be nice to not use a string "UTF-8" but the equivalent
					// method that takes Charset is JDK10+ only... sigh....
					return URLDecoder.decode(theString, "UTF-8");
				} catch (UnsupportedEncodingException e) {
					throw new Error(Msg.code(1743) + "UTF-8 not supported, this shouldn't happen", e);
				}
			}
		}
		return theString;
	}

	public static List translateMatchUrl(String theMatchUrl) {
		List parameters;
		String matchUrl = theMatchUrl;
		int questionMarkIndex = matchUrl.indexOf('?');
		if (questionMarkIndex != -1) {
			matchUrl = matchUrl.substring(questionMarkIndex + 1);
		}

		final String[] searchList = new String[] {"|", "=>=", "=<=", "=>", "=<"};
		final String[] replacementList = new String[] {"%7C", "=%3E%3D", "=%3C%3D", "=%3E", "=%3C"};
		matchUrl = StringUtils.replaceEach(matchUrl, searchList, replacementList);
		if (matchUrl.contains(" ")) {
			throw new InvalidRequestException(Msg.code(1744) + "Failed to parse match URL[" + theMatchUrl
					+ "] - URL is invalid (must not contain spaces)");
		}

		parameters = URLEncodedUtils.parse((matchUrl), Constants.CHARSET_UTF8, '&');

		// One issue that has happened before is people putting a "+" sign into an email address in a match URL
		// and having that turn into a " ". Since spaces are never appropriate for email addresses, let's just
		// assume they really meant "+".
		for (int i = 0; i < parameters.size(); i++) {
			NameValuePair next = parameters.get(i);
			if (next.getName().equals("email") && next.getValue().contains(" ")) {
				BasicNameValuePair newPair =
						new BasicNameValuePair(next.getName(), next.getValue().replace(' ', '+'));
				parameters.set(i, newPair);
			}
		}

		return parameters;
	}

	/**
	 * Creates list of sub URIs candidates for search with :above modifier
	 * Example input: http://[host]/[pathPart1]/[pathPart2]
	 * Example output: http://[host], http://[host]/[pathPart1], http://[host]/[pathPart1]/[pathPart2]
	 *
	 * @param theUri String URI parameter
	 * @return List of URI candidates
	 */
	public static List getAboveUriCandidates(String theUri) {
		try {
			URI uri = new URI(theUri);
			if (uri.getScheme() == null || uri.getHost() == null) {
				throwInvalidRequestExceptionForNotValidUri(theUri, null);
			}
		} catch (URISyntaxException theCause) {
			throwInvalidRequestExceptionForNotValidUri(theUri, theCause);
		}

		List candidates = new ArrayList<>();
		Path path = Paths.get(theUri);
		candidates.add(path.toString().replace(":/", "://"));
		while (path.getParent() != null && path.getParent().toString().contains("/")) {
			candidates.add(path.getParent().toString().replace(":/", "://"));
			path = path.getParent();
		}
		return candidates;
	}

	private static void throwInvalidRequestExceptionForNotValidUri(String theUri, Exception theCause) {
		throw new InvalidRequestException(
				Msg.code(2419) + String.format("Provided URI is not valid: %s", theUri), theCause);
	}

	public static class UrlParts {
		private String myParams;
		private String myResourceId;
		private String myResourceType;
		private String myVersionId;

		public String getParams() {
			return myParams;
		}

		public void setParams(String theParams) {
			myParams = theParams;
		}

		public String getResourceId() {
			return myResourceId;
		}

		public void setResourceId(String theResourceId) {
			myResourceId = theResourceId;
		}

		public String getResourceType() {
			return myResourceType;
		}

		public void setResourceType(String theResourceType) {
			myResourceType = theResourceType;
		}

		public String getVersionId() {
			return myVersionId;
		}

		public void setVersionId(String theVersionId) {
			myVersionId = theVersionId;
		}
	}
}