com.palominolabs.http.url.UrlBuilder Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of url-builder Show documentation
Show all versions of url-builder Show documentation
Create properly-encoded URLs with a builder-style API.
The newest version!
/*
* Copyright (c) 2012 Palomino Labs, Inc.
*/
package com.palominolabs.http.url;
import java.net.URL;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.CharsetDecoder;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Pattern;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import javax.annotation.concurrent.NotThreadSafe;
import static com.palominolabs.http.url.UrlPercentEncoders.getFragmentEncoder;
import static com.palominolabs.http.url.UrlPercentEncoders.getMatrixEncoder;
import static com.palominolabs.http.url.UrlPercentEncoders.getPathEncoder;
import static com.palominolabs.http.url.UrlPercentEncoders.getQueryParamEncoder;
import static com.palominolabs.http.url.UrlPercentEncoders.getRegNameEncoder;
import static com.palominolabs.http.url.UrlPercentEncoders.getUnstructuredQueryEncoder;
import static java.nio.charset.StandardCharsets.UTF_8;
/**
* Builder for urls with url-encoding applied to path, query param, etc.
*
* Escaping rules are from RFC 3986, RFC 1738 and the HTML 4 spec (http://www.w3.org/TR/html401/interact/forms.html#form-content-type).
* This means that this diverges from the canonical URI/URL rules for the sake of being what you want to actually make
* HTTP-useful URLs.
*/
@NotThreadSafe
public final class UrlBuilder {
/**
* IPv6 address, cribbed from http://stackoverflow.com/questions/46146/what-are-the-java-regular-expressions-for-matching-ipv4-and-ipv6-strings
*/
private static final Pattern IPV6_PATTERN = Pattern
.compile(
"\\A\\[((?:[0-9A-Fa-f]{1,4}(?::[0-9A-Fa-f]{1,4})*)?)::((?:[0-9A-Fa-f]{1,4}(?::[0-9A-Fa-f]{1,4})*)?)]\\z");
/**
* IPv4 dotted quad
*/
private static final Pattern IPV4_PATTERN = Pattern
.compile("\\A(25[0-5]|2[0-4]\\d|[0-1]?\\d?\\d)(\\.(25[0-5]|2[0-4]\\d|[0-1]?\\d?\\d)){3}\\z");
@Nonnull
private final String scheme;
@Nonnull
private final String host;
@Nullable
private final Integer port;
private final List> queryParams = new ArrayList<>();
/**
* If this is non-null, queryParams must be empty, and vice versa.
*/
@Nullable
private String unstructuredQuery;
private final List pathSegments = new ArrayList<>();
private final PercentEncoder pathEncoder = getPathEncoder();
private final PercentEncoder regNameEncoder = getRegNameEncoder();
private final PercentEncoder matrixEncoder = getMatrixEncoder();
private final PercentEncoder queryParamEncoder = getQueryParamEncoder();
private final PercentEncoder unstructuredQueryEncoder = getUnstructuredQueryEncoder();
private final PercentEncoder fragmentEncoder = getFragmentEncoder();
@Nullable
private String fragment;
private boolean forceTrailingSlash = false;
/**
* Create a URL with UTF-8 encoding.
*
* @param scheme scheme (e.g. http)
* @param host host (e.g. foo.com or 1.2.3.4 or [::1])
* @param port null or a positive integer
*/
private UrlBuilder(@Nonnull String scheme, @Nonnull String host, @Nullable Integer port) {
this.host = host;
this.scheme = scheme;
this.port = port;
}
/**
* Create a URL with an null port and UTF-8 encoding.
*
* @param scheme scheme (e.g. http)
* @param host host in any of the valid syntaxes: reg-name (a dns name), ipv4 literal (1.2.3.4), ipv6 literal
* ([::1]), excluding IPvFuture since no one uses that in practice
* @return a url builder
* @see UrlBuilder#forHost(String scheme, String host, int port)
*/
public static UrlBuilder forHost(@Nonnull String scheme, @Nonnull String host) {
return new UrlBuilder(scheme, host, null);
}
/**
* @param scheme scheme (e.g. http)
* @param host host in any of the valid syntaxes: reg-name ( a dns name), ipv4 literal (1.2.3.4), ipv6 literal
* ([::1]), excluding IPvFuture since no one uses that in practice
* @param port port
* @return a url builder
*/
public static UrlBuilder forHost(@Nonnull String scheme, @Nonnull String host, int port) {
return new UrlBuilder(scheme, host, port);
}
/**
* Calls {@link UrlBuilder#fromUrl(URL, CharsetDecoder)} with a UTF-8 CharsetDecoder. The same semantics about the
* query string apply.
*
* @param url url to initialize builder with
* @return a UrlBuilder containing the host, path, etc. from the url
* @throws CharacterCodingException if char decoding fails
* @see UrlBuilder#fromUrl(URL, CharsetDecoder)
*/
@Nonnull
public static UrlBuilder fromUrl(@Nonnull URL url) throws CharacterCodingException {
return fromUrl(url, UTF_8.newDecoder());
}
/**
* Create a UrlBuilder initialized with the contents of a {@link URL}.
*
* The query string will be parsed into HTML4 query params if it can be separated into a
* &
-separated sequence of key=value
pairs. The sequence of query params can then be
* appended to by continuing to call {@link UrlBuilder#queryParam(String, String)}. The concept of query params is
* only part of the HTML spec (and common HTTP usage), though, so it's perfectly legal to have a query string that
* is in some other form. To represent this case, if the aforementioned param-parsing attempt fails, the query
* string will be treated as just a monolithic, unstructured, string. In this case, calls to {@link
* UrlBuilder#queryParam(String, String)} on the resulting instance will throw IllegalStateException, and only calls
* to {@link UrlBuilder#unstructuredQuery(String)}}, which replaces the entire query string, are allowed.
*
* @param url url to initialize builder with
* @param charsetDecoder the decoder to decode encoded bytes with (except for reg names, which are always UTF-8)
* @return a UrlBuilder containing the host, path, etc. from the url
* @throws CharacterCodingException if decoding percent-encoded bytes fails and charsetDecoder is configured to
* report errors
* @see UrlBuilder#fromUrl(URL, CharsetDecoder)
*/
@Nonnull
public static UrlBuilder fromUrl(@Nonnull URL url, @Nonnull CharsetDecoder charsetDecoder) throws
CharacterCodingException {
PercentDecoder decoder = new PercentDecoder(charsetDecoder);
// reg names must be encoded UTF-8
PercentDecoder regNameDecoder;
if (charsetDecoder.charset().equals(UTF_8)) {
regNameDecoder = decoder;
} else {
regNameDecoder = new PercentDecoder(UTF_8.newDecoder());
}
Integer port = url.getPort();
if (port == -1) {
port = null;
}
UrlBuilder builder = new UrlBuilder(url.getProtocol(), regNameDecoder.decode(url.getHost()), port);
buildFromPath(builder, decoder, url);
buildFromQuery(builder, decoder, url);
if (url.getRef() != null) {
builder.fragment(decoder.decode(url.getRef()));
}
return builder;
}
/**
* Add a path segment.
*
* @param segment a path segment
* @return this
*/
@Nonnull
public UrlBuilder pathSegment(@Nonnull String segment) {
pathSegments.add(new PathSegment(segment));
return this;
}
/**
* Add multiple path segments. Equivalent to successive calls to {@link UrlBuilder#pathSegment(String)}.
*
* @param segments path segments
* @return this
*/
@Nonnull
public UrlBuilder pathSegments(String... segments) {
for (String segment : segments) {
pathSegment(segment);
}
return this;
}
/**
* Add an HTML query parameter. Query parameters will be encoded in the order added.
*
* Using query strings to encode key=value pairs is not part of the URI/URL specification; it is specified by
* http://www.w3.org/TR/html401/interact/forms.html#form-content-type.
*
* If you use this method to build a query string, or created this builder from a url with a query string that can
* successfully be parsed into query param pairs, you cannot subsequently use {@link
* UrlBuilder#unstructuredQuery(String)}. See {@link UrlBuilder#fromUrl(URL, CharsetDecoder)}.
*
* @param name param name
* @param value param value
* @return this
*/
@Nonnull
public UrlBuilder queryParam(@Nonnull String name, @Nonnull String value) {
if (unstructuredQuery != null) {
throw new IllegalStateException(
"Cannot call queryParam() when this already has an unstructured query specified");
}
queryParams.add(Pair.of(name, value));
return this;
}
/**
* Set the complete query string of arbitrary structure. This is useful when you want to specify a query string that
* is not of key=value format. If the query has previously been set via this method, subsequent calls will overwrite
* that query.
*
* If you use this method, or create a builder from a URL whose query is not parseable into query param pairs, you
* cannot subsequently use {@link UrlBuilder#queryParam(String, String)}. See {@link UrlBuilder#fromUrl(URL,
* CharsetDecoder)}.
*
* @param query Complete URI query, as specified by https://tools.ietf.org/html/rfc3986#section-3.4
* @return this
*/
@Nonnull
public UrlBuilder unstructuredQuery(@Nonnull String query) {
if (!queryParams.isEmpty()) {
throw new IllegalStateException(
"Cannot call unstructuredQuery() when this already has queryParam pairs specified");
}
unstructuredQuery = query;
return this;
}
/**
* Clear the unstructured query and any query params.
*
* Since the query / query param situation is a little complicated, this method will let you remove all query
* information and start again from scratch. This may be useful when taking an existing url, parsing it into a
* builder, and then re-doing its query params, for instance.
*
* @return this
*/
@Nonnull
public UrlBuilder clearQuery() {
queryParams.clear();
unstructuredQuery = null;
return this;
}
/**
* Add a matrix param to the last added path segment. If no segments have been added, the param will be added to the
* root. Matrix params will be encoded in the order added.
*
* @param name param name
* @param value param value
* @return this
*/
@Nonnull
public UrlBuilder matrixParam(@Nonnull String name, @Nonnull String value) {
if (pathSegments.isEmpty()) {
// create an empty path segment to represent a matrix param applied to the root
pathSegment("");
}
PathSegment seg = pathSegments.get(pathSegments.size() - 1);
seg.matrixParams.add(Pair.of(name, value));
return this;
}
/**
* Set the fragment.
*
* @param fragment fragment string
* @return this
*/
@Nonnull
public UrlBuilder fragment(@Nonnull String fragment) {
this.fragment = fragment;
return this;
}
/**
* Force the generated URL to have a trailing slash at the end of the path.
*
* @return this
*/
@Nonnull
public UrlBuilder forceTrailingSlash() {
forceTrailingSlash = true;
return this;
}
/**
* Encode the current builder state into a URL string.
*
* @return a well-formed URL string
* @throws CharacterCodingException if character encoding fails and the encoder is configured to report errors
*/
public String toUrlString() throws CharacterCodingException {
StringBuilder buf = new StringBuilder();
buf.append(scheme);
buf.append("://");
buf.append(encodeHost(host));
if (port != null) {
buf.append(':');
buf.append(port);
}
for (PathSegment pathSegment : pathSegments) {
buf.append('/');
buf.append(pathEncoder.encode(pathSegment.segment));
for (Pair matrixParam : pathSegment.matrixParams) {
buf.append(';');
buf.append(matrixEncoder.encode(matrixParam.getKey()));
buf.append('=');
buf.append(matrixEncoder.encode(matrixParam.getValue()));
}
}
if (forceTrailingSlash) {
buf.append('/');
}
if (!queryParams.isEmpty()) {
buf.append("?");
Iterator> qpIter = queryParams.iterator();
while (qpIter.hasNext()) {
Pair queryParam = qpIter.next();
buf.append(queryParamEncoder.encode(queryParam.getKey()));
buf.append('=');
buf.append(queryParamEncoder.encode(queryParam.getValue()));
if (qpIter.hasNext()) {
buf.append('&');
}
}
} else if (unstructuredQuery != null) {
buf.append("?");
buf.append(unstructuredQueryEncoder.encode(unstructuredQuery));
}
if (fragment != null) {
buf.append('#');
buf.append(fragmentEncoder.encode(fragment));
}
return buf.toString();
}
/**
* Populate a url builder based on the query of a url
*
* @param builder builder
* @param decoder decoder
* @param url url
* @throws CharacterCodingException
*/
private static void buildFromQuery(UrlBuilder builder, PercentDecoder decoder, URL url) throws
CharacterCodingException {
if (url.getQuery() != null) {
String q = url.getQuery();
// try to parse into &-separated key=value pairs
List> pairs = new ArrayList<>();
boolean parseOk = true;
for (String queryChunk : q.split("&")) {
String[] queryParamChunks = queryChunk.split("=");
if (queryParamChunks.length != 2) {
parseOk = false;
break;
}
pairs.add(Pair.of(decoder.decode(queryParamChunks[0]),
decoder.decode(queryParamChunks[1])));
}
if (parseOk) {
for (Pair pair : pairs) {
builder.queryParam(pair.getKey(), pair.getValue());
}
} else {
builder.unstructuredQuery(decoder.decode(q));
}
}
}
/**
* Populate the path segments of a url builder from a url
*
* @param builder builder
* @param decoder decoder
* @param url url
* @throws CharacterCodingException
*/
private static void buildFromPath(UrlBuilder builder, PercentDecoder decoder, URL url) throws
CharacterCodingException {
for (String pathChunk : url.getPath().split("/")) {
if (pathChunk.equals("")) {
continue;
}
if (pathChunk.charAt(0) == ';') {
builder.pathSegment("");
// empty path segment, but matrix params
for (String matrixChunk : pathChunk.substring(1).split(";")) {
buildFromMatrixParamChunk(decoder, builder, matrixChunk);
}
continue;
}
// otherwise, path chunk is non empty and does not start with a ';'
String[] matrixChunks = pathChunk.split(";");
// first chunk is always the path segment. If there is a trailing ; and no matrix params, the ; will
// not be included in the final url.
builder.pathSegment(decoder.decode(matrixChunks[0]));
// if there any other chunks, they're matrix param pairs
for (int i = 1; i < matrixChunks.length; i++) {
buildFromMatrixParamChunk(decoder, builder, matrixChunks[i]);
}
}
}
private static void buildFromMatrixParamChunk(PercentDecoder decoder, UrlBuilder ub, String pathMatrixChunk) throws
CharacterCodingException {
String[] mtxPair = pathMatrixChunk.split("=");
if (mtxPair.length != 2) {
throw new IllegalArgumentException("Malformed matrix param: <" + pathMatrixChunk + ">");
}
String mtxName = mtxPair[0];
String mtxVal = mtxPair[1];
ub.matrixParam(decoder.decode(mtxName), decoder.decode(mtxVal));
}
/**
* @param host original host string
* @return host encoded as in RFC 3986 section 3.2.2
*/
@Nonnull
private String encodeHost(String host) throws CharacterCodingException {
// matching order: IP-literal, IPv4, reg-name
if (IPV4_PATTERN.matcher(host).matches() || IPV6_PATTERN.matcher(host).matches()) {
return host;
}
// it's a reg-name, which MUST be encoded as UTF-8 (regardless of the rest of the URL)
return regNameEncoder.encode(host);
}
/**
* Bundle of a path segment name and any associated matrix params.
*/
private static class PathSegment {
private final String segment;
private final List> matrixParams = new ArrayList<>();
PathSegment(String segment) {
this.segment = segment;
}
}
private static class Pair {
private final K key;
private final V value;
private Pair(K key, V value) {
this.key = key;
this.value = value;
}
K getKey() {
return key;
}
V getValue() {
return value;
}
static Pair of(K key, V value) {
return new Pair<>(key, value);
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy