All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.lockss.util.DataUri Maven / Gradle / Ivy

The newest version!
/*
 * Copyright (c) 2000-2016 Board of Trustees of Leland Stanford Jr. University,
 * all rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * STANFORD UNIVERSITY BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
 * IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 *
 * Except as contained in this notice, the name of Stanford University shall not
 * be used in advertising or otherwise to promote the sale, use or other dealings
 * in this Software without prior written authorization from Stanford University.
 *
 */

package org.lockss.util;

import org.lockss.daemon.PluginException;
import org.lockss.extractor.LinkExtractor;
import org.lockss.util.io.FileUtil;
import org.lockss.plugin.ArchivalUnit;

import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.URLDecoder;
import java.nio.charset.Charset;
import java.util.Properties;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import javax.xml.bind.DatatypeConverter;

import static org.lockss.util.StringUtil.isNullString;

/**
 * A Wrapper around the Data Uri as defined by
 * RFC2397.
 * The schema as defined:
 * 
 *  dataurl    := "data:" [ mediatype ] [ ";base64" ] "," data
 *  mediatype  := [ type "/" subtype ] *( ";" parameter )
 *  data       := *urlchar
 *  parameter  := attribute "=" value
 * 
* * A data uri may be found in various media examples from html: *
 *   <img src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAUA
 *   AAAFCAYAAACNbyblAAAAHElEQVQI12P4//8/w38GIAXDIBKE0DHxgljNBAAO
 *   9TXL0Y4OHwAAAABJRU5ErkJggg==" alt="Red dot" />
 * 
* Other varients include *
 *  data:,Hello%2C%20World!
 *  data:text/plain;base64,SGVsbG8sIFdvcmxkIQ%3D%3D
 *  data:text/html,<script>alert('hi');</script>
 * 
* * from css: *
 *  body {
 *    background-image:url('data:image/png;base64, SGVsbG8sIFdvcmxkIQ%3D%3D...')
 *  }
 * 
 *  font-face {
 *  font-family: 'customFont';
 *  src: url(data:font/svg;charset=utf-8;base64,PD94bWwgdmV...)
 * 
* * from javascript: *
 *  window.open('data:text/html;charset=utf-8,' +
 *    encodeURIComponent( // Escape for URL formatting
 *      '<!DOCTYPE html>'+
 *      '<html lang="en">'+
 *      '<head><title>Embedded Window</title></head>'+
 *      '<body><h1>42</h1></body>'+
 *      '</html>'
 *     )
 *   );
 *  
* @author: claire griffin date: 2016-04-06. */ public class DataUri { /** * The default media value when none is given */ public static final String DEFAULT_MEDIA = "text/plain;charset=US-ASCII"; /** * The default value if no mime type is found as defined by RFC2397. */ public static final String DEFAULT_MIMETYPE = "text/plain"; /** * The default value if no charset is given for text media as defined by RFC2397. */ public static final String DEFAULT_CHARSET = "US-ASCII"; /** * Regular expression to match a Data URI */ public static final String DATA_URI_RE = "^data:(.*?);?(base64)?,(.*)"; /** * Regular expression to match the mime type within a data url */ public static final String MIME_TYPE_RE = "([-\\w.+]+/[-\\w.+]*)"; /** * Regular expression to match the array of parameters in form of param=value */ public static final String MIME_PARAM_RE = ";([-\\w.+]+)(=)?([^;,]+)?"; protected static Logger log = Logger.getLogger(); private static final Pattern DATA_URI_PATTERN = Pattern.compile(DATA_URI_RE, Pattern.CASE_INSENSITIVE); private static final Pattern MIME_TYPE_PATTERN = Pattern.compile(MIME_TYPE_RE); private static final Pattern MIME_PARAM_PATTERN = Pattern.compile(MIME_PARAM_RE); private static final Pattern IS_DATA_URI_PATTERN = Pattern.compile("^data:.*", Pattern.CASE_INSENSITIVE); private String mimeType; private String charsetName; private Properties mediaParams = new Properties(); private String data; private boolean useBase64; private Charset charset; private DataUri(Builder builder) { mimeType = builder.mimeType; charsetName = builder.charsetName; mediaParams = builder.mediaParams; data = builder.data; useBase64 = builder.useBase64; charset = Charset.forName(charsetName); } public static Builder newBuilder() { return new Builder(); } public String getMimeType() { return mimeType; } public String getCharset() { return charsetName; } public Properties getMediaParams() { return mediaParams; } public String getData() { return data; } public boolean usesBase64() { return useBase64; } /** * Returns true iff the string begins with data: * * @param uri the string to test. * @return true if this a data uri. */ public static boolean isDataUri(String uri) { Matcher matcher = IS_DATA_URI_PATTERN.matcher(uri); return matcher.matches(); } /** * Returns true iff the string conforms to format of a data url * * @param uri the string to test. * @return true if this a data uri. */ public static boolean isValidDataUri(String uri) { Matcher matcher = DATA_URI_PATTERN.matcher(uri); return matcher.matches(); } /** * Turn a data uri string int a DataUri object which can be queried for the component parts. * * @param uri the data uri * @return a newly created DataUri */ public static DataUri makeDataUri(String uri) { if (isNullString(uri) && !isDataUri(uri)) { return null; } String mime_type = null; boolean usesBase64; Properties props = new Properties(); String media; Builder builder = new Builder(); Matcher urimatcher = DATA_URI_PATTERN.matcher(uri); if(urimatcher.matches()) { // group 1 = media (mime-type and parameters) media = urimatcher.group(1); if(isNullString(media)) { media = DEFAULT_MEDIA; } Matcher mimematcher = MIME_TYPE_PATTERN.matcher(media); if(mimematcher.find()) { mime_type = mimematcher.group(); } Matcher param_matcher = MIME_PARAM_PATTERN.matcher(media); String key; String value = null; while (param_matcher.find()) { key = param_matcher.group(1).toLowerCase(); if(param_matcher.group(2) != null) value = param_matcher.group(3); else if(key.equalsIgnoreCase("utf8")) {// used for font/svg key = "charset"; value = Constants.ENCODING_UTF_8; } else { value = ""; } if(key != null && value != null) { props.setProperty(key,value); log.debug3("property: " + key + "=" + value); } } builder.mimeType(mime_type).mediaParams(props).charset(props.getProperty("charset")); // group 2 = base64 usesBase64 = urimatcher.group(2) != null; builder.useBase64(usesBase64); // group 3 = data builder.data(urimatcher.group(3)); if(log.isDebug3()) { log.debug3("mime-type:" + mime_type); log.debug3("usesBase64:" + usesBase64); log.debug3("data: " + urimatcher.group(3)); } } return builder.build(); } /** * makeDataUri a data uri to a stream * @param uri the data uri to makeDataUri * @param os the stream to wrtie to * @throws IOException if stream is unwriteable */ public static void decodeToStream(String uri, OutputStream os) throws IOException { DataUri d_uri = makeDataUri(uri); d_uri.decodeToStream(os); } /** * makeDataUri a data uri into a file * @param uri the data uri to makeDataUri * @param file the file to write the decoded * @throws IOException if file is unwrittable */ public static void decodeToFile(String uri, File file) throws IOException { DataUri d_uri = makeDataUri(uri); d_uri.decodeToStream(new BufferedOutputStream(new FileOutputStream(file))); } /** * Call the appropriate link extractor defined by the plugin for the given * mime type. * @param uri the data uri to extract from * @param baseUri the base uri for resolving relative found links * @param au the AU to use for searching for an extractor * @param cb the callback to pass to the extractor */ public static void dispatchToLinkExtractor(String uri, final String baseUri, final ArchivalUnit au, final LinkExtractor.Callback cb) { if(isValidDataUri(uri)) { DataUri d_uri = makeDataUri(uri); InputStream in = null; OutputStream out = null; File tmpfile = null; try { // check to see if we have a link extractor for this mime-type. LinkExtractor extractor = au.getLinkExtractor(d_uri.getMimeType()); if (extractor != null) { tmpfile = FileUtil.createTempFile("d_uri", ".buf"); // now we unpack the data out = new BufferedOutputStream(new FileOutputStream(tmpfile)); decodeToStream(uri, out); out.flush(); out.close(); in = new BufferedInputStream(new FileInputStream(tmpfile)); // pass it to the appropriate extractor extractor.extractUrls(au, in, d_uri.getCharset(), baseUri, cb); } } catch (IOException e) { log.debug3("IOException in extractor", e); } catch (PluginException e) { log.debug3("PluginException in extractor", e); } catch (IllegalStateException ise) { log.debug("Attempt to call data uri handler for non data uri"); } finally { IOUtil.safeClose(in); IOUtil.safeClose(out); FileUtil.safeDeleteFile(tmpfile); } } } /** * decode to the stream passes in * @param os * @throws IOException */ protected void decodeToStream(OutputStream os) throws IOException { DataOutputStream dos = new DataOutputStream(os); if(useBase64) { dos.write(DatatypeConverter.parseBase64Binary(data)); } else { dos.writeBytes(URLDecoder.decode(data,charsetName)); } } protected void decodeToFile(File file) throws IOException { OutputStream os = null; try { os = new BufferedOutputStream(new FileOutputStream(file)); decodeToStream(os); } finally { if(os != null) os.close(); } } /** * {@code DataUri} builder static inner class. */ public static final class Builder { private String mimeType; private String charsetName; private Properties mediaParams; private String data; private boolean useBase64; private Builder() { } /** * Sets the {@code mimeType} and returns a reference to this Builder so that the methods can be * chained together. * * @param val the {@code mimeType} to set * @return a reference to this Builder */ public Builder mimeType(String val) { mimeType = val; return this; } /** * Sets the {@code charset} and returns a reference to this Builder so that the methods can be * chained together. * * @param val the {@code charset} to set * @return a reference to this Builder */ public Builder charset(String val) { charsetName = val; return this; } /** * Sets the {@code mediaParams} and returns a reference to this Builder so that the methods can * be chained together. * * @param val the {@code mediaParams} to set * @return a reference to this Builder */ public Builder mediaParams(Properties val) { mediaParams = val; return this; } /** * Sets the {@code data} and returns a reference to this Builder so that the methods can be * chained together. * * @param val the {@code data} to set * @return a reference to this Builder */ public Builder data(String val) { data = val; return this; } /** * Sets the {@code useBase64} and returns a reference to this Builder so that the methods can be * chained together. * * @param val the {@code useBase64} to set * @return a reference to this Builder */ public Builder useBase64(boolean val) { useBase64 = val; return this; } /** * Returns a {@code DataUri} built from the parameters previously set. * * @return a {@code DataUri} built with parameters of this {@code DataUri.Builder} * @throws java.lang.IllegalStateException if we unable to make the class */ public DataUri build() throws java.lang.IllegalStateException { if(data == null) throw new java.lang.IllegalStateException("No uri data to build"); if(isNullString(charsetName) || !Charset.isSupported(charsetName)) { // unknown charsetName = DEFAULT charset charsetName = DEFAULT_CHARSET; } if(isNullString(mimeType)) { // unknown mime type is DEFAULT mime type mimeType = DEFAULT_MIMETYPE; } return new DataUri(this); } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy