nu.validator.io.DataUri Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of validator Show documentation
Show all versions of validator Show documentation
An HTML-checking library (used by https://html5.validator.nu and the HTML5 facet of the W3C Validator)
/*
* Copyright (c) 2007-2015 Mozilla Foundation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
package nu.validator.io;
import java.io.IOException;
import java.io.InputStream;
import java.io.StringReader;
import java.net.MalformedURLException;
import io.mola.galimatias.URL;
import io.mola.galimatias.GalimatiasParseException;
public class DataUri {
public static boolean startsWithData(String uri) {
return uri != null && uri.length() >= 5
&& (uri.charAt(0) == 'd' || uri.charAt(0) == 'D')
&& (uri.charAt(1) == 'a' || uri.charAt(1) == 'A')
&& (uri.charAt(2) == 't' || uri.charAt(2) == 'T')
&& (uri.charAt(3) == 'a' || uri.charAt(3) == 'A')
&& (uri.charAt(4) == ':');
}
private enum State {
AT_START, IN_SUPERTYPE, AT_SUBTYPE_START, IN_SUBTYPE, SEMICOLON_SEEN, WS_BEFORE_SEMICOLON, IN_PARAM_NAME, EQUALS_SEEN, IN_QUOTED_STRING, IN_UNQUOTED_STRING, IN_QUOTED_PAIR, CLOSE_QUOTE_SEEN
}
private String contentType;
private InputStream inputStream;
/**
* @param url
* @throws MalformedURLException
* @throws IOException
*/
protected void init(URL url) throws IOException, MalformedURLException {
if (!url.scheme().equals("data")) {
throw new IllegalArgumentException("The input did not start with data:.");
}
if (url.fragment() != null) {
throw new MalformedURLException(
"Fragment is not allowed for data: URIs according to RFC 2397.");
}
InputStream is = new PercentDecodingReaderInputStream(new StringReader(url.schemeData()));
StringBuilder sb = new StringBuilder();
State state = State.AT_START;
int i = 0; // string counter
for (;;i++) {
int b = is.read();
if (b == -1) {
is.close();
throw new MalformedURLException("Premature end of URI.");
}
if (b >= 0x80) {
is.close();
throw new MalformedURLException(
"Non-ASCII character in MIME type part of the data URI.");
}
char c = (char) b;
sb.append(c);
switch (state) {
case AT_START:
if (isTokenChar(c)) {
state = State.IN_SUPERTYPE;
continue;
} else if (c == ';') {
sb.setLength(0);
sb.append("text/plain;");
state = State.SEMICOLON_SEEN;
continue;
} else if (c == ',') {
contentType = "text/plain;charset=US-ASCII";
inputStream = is;
return;
} else {
throw newDatatypeException(i,
"Expected a token character or a semicolon but saw ",
c, " instead.");
}
case IN_SUPERTYPE:
if (isTokenChar(c)) {
continue;
} else if (c == '/') {
state = State.AT_SUBTYPE_START;
continue;
} else {
throw newDatatypeException(i,
"Expected a token character or \u201C/\u201D but saw ",
c, " instead.");
}
case AT_SUBTYPE_START:
if (isTokenChar(c)) {
state = State.IN_SUBTYPE;
continue;
} else {
throw newDatatypeException(i,
"Expected a token character but saw ",
c, " instead.");
}
case IN_SUBTYPE:
if (isTokenChar(c)) {
continue;
} else if (c == ';') {
state = State.SEMICOLON_SEEN;
continue;
} else if (isWhitespace(c)) {
state = State.WS_BEFORE_SEMICOLON;
continue;
} else if (c == ',') {
contentType = sb.substring(0, sb.length() - 1);
inputStream = is;
return;
} else {
throw newDatatypeException(i,
"Expected a token character, whitespace, a semicolon or a comma but saw ",
c, " instead.");
}
case WS_BEFORE_SEMICOLON:
if (isWhitespace(c)) {
continue;
} else if (c == ';') {
state = State.SEMICOLON_SEEN;
continue;
} else {
throw newDatatypeException(i,
"Expected whitespace or a semicolon but saw ",
c, " instead.");
}
case SEMICOLON_SEEN:
if (isWhitespace(c)) {
continue;
} else if (isTokenChar(c)) {
state = State.IN_PARAM_NAME;
continue;
} else {
throw newDatatypeException(i,
"Expected whitespace or a token character but saw ",
c, " instead.");
}
case IN_PARAM_NAME:
if (isTokenChar(c)) {
continue;
} else if (c == '=') {
state = State.EQUALS_SEEN;
continue;
} else if (c == ',') {
// let's see if we had ;base64,
int baseFirst = sb.length() - 8;
if (baseFirst >= 0 && ";base64,".equals(sb.substring(baseFirst, sb.length()))) {
contentType = sb.substring(0, baseFirst);
inputStream = new Base64InputStream(is);
return;
}
} else {
throw newDatatypeException(i,
"Expected an equals sign, a comma or a token character but saw ",
c, " instead.");
}
case EQUALS_SEEN:
if (c == '\"') {
state = State.IN_QUOTED_STRING;
continue;
} else if (isTokenChar(c)) {
state = State.IN_UNQUOTED_STRING;
continue;
} else {
throw newDatatypeException(i,
"Expected a double quote or a token character but saw ",
c, " instead.");
}
case IN_QUOTED_STRING:
if (c == '\\') {
state = State.IN_QUOTED_PAIR;
continue;
} else if (c == '\"') {
state = State.CLOSE_QUOTE_SEEN;
continue;
} else if (isQDTextChar(c)) {
continue;
} else {
throw newDatatypeException(i,
"Expected a non-control ASCII character but saw ",
c, " instead.");
}
case IN_QUOTED_PAIR:
if (c <= 127) {
state = State.IN_QUOTED_STRING;
continue;
} else {
throw newDatatypeException(i,
"Expected an ASCII character but saw ",
c, " instead.");
}
case CLOSE_QUOTE_SEEN:
if (c == ';') {
state = State.SEMICOLON_SEEN;
continue;
} else if (isWhitespace(c)) {
state = State.WS_BEFORE_SEMICOLON;
continue;
} else if (c == ',') {
contentType = sb.substring(0, sb.length() - 1);
inputStream = is;
return;
} else {
throw newDatatypeException(i,
"Expected an ASCII character but saw ",
c, " instead.");
}
case IN_UNQUOTED_STRING:
if (isTokenChar(c)) {
continue;
} else if (c == ';') {
state = State.SEMICOLON_SEEN;
continue;
} else if (isWhitespace(c)) {
state = State.WS_BEFORE_SEMICOLON;
continue;
} else if (c == ',') {
contentType = sb.substring(0, sb.length() - 1);
inputStream = is;
return;
} else {
throw newDatatypeException(i,
"Expected a token character, whitespace, a semicolon, or a comma but saw ",
c, " instead.");
}
}
}
}
public DataUri(String url) throws IOException {
try {
init(URL.parse(url));
} catch (GalimatiasParseException e) {
throw new MalformedURLException(e.getMessage());
}
}
/**
* @param url
* @throws MalformedURLException
* @throws IOException
*/
public DataUri(URL url) throws IOException, MalformedURLException {
init(url);
}
private IOException newDatatypeException(int i, String head, char c, String tail) {
return new DataUriException(i, head, c, tail);
}
private boolean isQDTextChar(char c) {
return (c >= ' ' && c <= 126) || (c == '\n') || (c == '\r')
|| (c == '\t');
}
private boolean isTokenChar(char c) {
return (c >= 33 && c <= 126)
&& !(c == '(' || c == ')' || c == '<' || c == '>' || c == '@'
|| c == ',' || c == ';' || c == ':' || c == '\\'
|| c == '\"' || c == '/' || c == '[' || c == ']'
|| c == '?' || c == '=' || c == '{' || c == '}');
}
/**
* Checks if a UTF-16 code unit represents a whitespace character (U+0020,
* U+0009, U+000D or U+000A).
* @param c the code unit
* @return true
if whitespace, false
otherwise
*/
private boolean isWhitespace(char c) {
return c == ' ' || c == '\t' || c == '\n' || c == '\r';
}
/**
* Returns the contentType.
*
* @return the contentType
*/
public String getContentType() {
return contentType;
}
/**
* Returns the inputStream.
*
* @return the inputStream
*/
public InputStream getInputStream() {
return inputStream;
}
}