org.htmlunit.protocol.data.DataUrlDecoder Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of xlt Show documentation
Show all versions of xlt Show documentation
XLT (Xceptance LoadTest) is an extensive load and performance test tool developed and maintained by Xceptance.
/*
* Copyright (c) 2002-2024 Gargoyle Software Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.htmlunit.protocol.data;
import static java.nio.charset.StandardCharsets.US_ASCII;
import static org.htmlunit.protocol.data.DataURLConnection.DATA_PREFIX;
import java.io.ByteArrayOutputStream;
import java.io.UnsupportedEncodingException;
import java.net.URL;
import java.nio.charset.Charset;
import java.nio.charset.IllegalCharsetNameException;
import java.nio.charset.UnsupportedCharsetException;
import org.apache.commons.codec.DecoderException;
import org.apache.commons.codec.binary.Base64;
import org.apache.commons.lang3.StringUtils;
import org.htmlunit.util.MimeType;
/**
* Helper to work with data URLs.
* @see RFC2397
* @author Marc Guillemot
* @author Ronald Brill
* @author Carsten Steul
*/
public class DataUrlDecoder {
private static final Charset DEFAULT_CHARSET = US_ASCII;
private static final String DEFAULT_MEDIA_TYPE = MimeType.TEXT_PLAIN;
private final String mediaType_;
private final Charset charset_;
private final byte[] content_;
/**
* C'tor.
* @param data the data
* @param mediaType the media type
* @param charset the charset
*/
protected DataUrlDecoder(final byte[] data, final String mediaType, final Charset charset) {
content_ = data;
mediaType_ = mediaType;
charset_ = charset;
}
/**
* Decodes a data URL providing simple access to the information contained by the URL.
* @param url the URL to decode
* @return the {@link DataUrlDecoder} holding decoded information
* @throws UnsupportedEncodingException if the encoding specified by the data URL is invalid or not
*/
public static DataUrlDecoder decode(final URL url) throws UnsupportedEncodingException {
return decodeDataURL(url.toExternalForm());
}
/**
* Decodes a data URL providing simple access to the information contained by the URL.
* @param url the string representation of the URL to decode
* @return the {@link DataUrlDecoder} holding decoded information
* @throws UnsupportedEncodingException if the encoding specified by the data URL is invalid or not
* available on the JVM
*/
public static DataUrlDecoder decodeDataURL(final String url) throws UnsupportedEncodingException {
if (!url.startsWith(DATA_PREFIX)) {
throw new UnsupportedEncodingException("Invalid data url: '" + url + "' (wrong prefix)");
}
final int comma = url.indexOf(',');
if (comma < 0) {
throw new UnsupportedEncodingException("Invalid data url: '" + url + "' (no data)");
}
String beforeData = url.substring(DATA_PREFIX.length(), comma);
final boolean base64 = beforeData.endsWith(";base64");
if (base64) {
beforeData = beforeData.substring(0, beforeData.length() - 7);
}
final String mediaType = extractMediaType(beforeData);
final Charset charset = extractCharset(beforeData);
try {
byte[] data = url.substring(comma + 1).getBytes(charset);
data = decodeUrl(data);
if (base64) {
data = Base64.decodeBase64(data);
}
return new DataUrlDecoder(data, mediaType, charset);
}
catch (final DecoderException e) {
final UnsupportedEncodingException ex =
new UnsupportedEncodingException("Invalid data url: '" + url + "' (data decoding failed)");
ex.initCause(e);
throw ex;
}
}
private static Charset extractCharset(final String beforeData) {
if (beforeData.contains(";")) {
String charsetName = StringUtils.substringAfter(beforeData, ";");
charsetName = charsetName.trim();
if (charsetName.startsWith("charset=")) {
charsetName = charsetName.substring(8);
}
try {
return Charset.forName(charsetName);
}
catch (final UnsupportedCharsetException | IllegalCharsetNameException e) {
return DEFAULT_CHARSET;
}
}
return DEFAULT_CHARSET;
}
private static String extractMediaType(final String beforeData) {
if (beforeData.contains("/")) {
if (beforeData.contains(";")) {
return StringUtils.substringBefore(beforeData, ";");
}
return beforeData;
}
return DEFAULT_MEDIA_TYPE;
}
/**
* Gets the media type information contained in the data URL.
* @return "text/plain" if the URL didn't contain any media type information
*/
public String getMediaType() {
return mediaType_;
}
/**
* Gets the charset information specified in the data URL.
* @return "US-ASCII" if the URL didn't contain any charset information
*/
public String getCharset() {
return charset_.name();
}
/**
* Gets the bytes contained in the data URL.
* @return the content
*/
public byte[] getBytes() {
return content_;
}
/**
* Gets the text content of the data URL. This makes sense only for data URL that
* represents some text.
* @return the text content
* @throws UnsupportedEncodingException if decoding failed using the specified charset
*/
public String getDataAsString() throws UnsupportedEncodingException {
return new String(content_, charset_);
}
// adapted from apache commons codec
private static byte[] decodeUrl(final byte[] bytes) throws DecoderException {
if (bytes == null) {
return null;
}
final ByteArrayOutputStream buffer = new ByteArrayOutputStream();
for (int i = 0; i < bytes.length; i++) {
final int b = bytes[i];
if (b == '%') {
try {
final int u = digit16(bytes[++i]);
final int l = digit16(bytes[++i]);
buffer.write((char) ((u << 4) + l));
}
catch (final ArrayIndexOutOfBoundsException e) {
throw new DecoderException("Invalid URL encoding: ", e);
}
}
else {
buffer.write(b);
}
}
return buffer.toByteArray();
}
private static int digit16(final byte b) throws DecoderException {
final int i = Character.digit((char) b, 16);
if (i == -1) {
throw new DecoderException("Invalid URL encoding: not a valid digit (radix 16): " + b);
}
return i;
}
}