All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.openhtmltopdf.swing.NaiveUserAgent Maven / Gradle / Ivy

Go to download

Open HTML to PDF is a CSS 2.1 renderer written in Java. This artifact contains the core rendering and layout code.

The newest version!
/*
 * NaiveUserAgent.java
 * Copyright (c) 2004, 2005 Torbjoern Gannholm
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public License
 * as published by the Free Software Foundation; either version 2.1
 * of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 *
 */
package com.openhtmltopdf.swing;

import java.io.*;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.net.URLConnection;
import java.nio.charset.StandardCharsets;
import java.util.Base64;
import java.util.EnumMap;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.function.BiPredicate;
import java.util.logging.Level;
import java.util.regex.Pattern;

import com.openhtmltopdf.event.DocumentListener;
import com.openhtmltopdf.extend.FSUriResolver;
import com.openhtmltopdf.extend.FSStreamFactory;
import com.openhtmltopdf.extend.FSStream;
import com.openhtmltopdf.extend.UserAgentCallback;
import com.openhtmltopdf.outputdevice.helper.ExternalResourceControlPriority;
import com.openhtmltopdf.outputdevice.helper.ExternalResourceType;
import com.openhtmltopdf.resource.CSSResource;
import com.openhtmltopdf.resource.ImageResource;
import com.openhtmltopdf.resource.XMLResource;
import com.openhtmltopdf.util.LogMessageId;
import com.openhtmltopdf.util.OpenUtil;
import com.openhtmltopdf.util.XRLog;

/**
 * 

NaiveUserAgent is a simple implementation of {@link UserAgentCallback} which places no restrictions on what * XML, CSS or images are loaded.

* *

The NaiveUserAgent has a simple per-run cache for images so that the same image is not embedded in a document * multiple times.

* * @author Torbjoern Gannholm */ public abstract class NaiveUserAgent implements UserAgentCallback, DocumentListener { /** * a (simple) cache * This is only useful for the one run. */ protected final LinkedHashMap _imageCache = new LinkedHashMap<>(); protected final FSUriResolver DEFAULT_URI_RESOLVER = new DefaultUriResolver(); protected final Map> _accessControllers = new EnumMap<>(ExternalResourceControlPriority.class); protected FSUriResolver _resolver = DEFAULT_URI_RESOLVER; protected String _baseUri; protected Map _protocolsStreamFactory = new HashMap<>(2); public static class DefaultHttpStream implements FSStream { private InputStream strm; public DefaultHttpStream(InputStream strm) { this.strm = strm; } @Override public InputStream getStream() { return this.strm; } @Override public Reader getReader() { if (this.strm != null) { return new InputStreamReader(this.strm, StandardCharsets.UTF_8); } return null; } } public static class DefaultHttpStreamFactory implements FSStreamFactory { final static int CONNECTION_TIMEOUT = 10_000; final static int READ_TIMEOUT = 30_000; final int connectTimeout; final int readTimeout; /** * Create a FSStreamFactory for http, https with specified timeouts. * Uses URLConnection to perform requests. * Zero value for timeout specifies no timeout. */ public DefaultHttpStreamFactory(int connectTimeout, int readTimeout) { this.connectTimeout = connectTimeout; this.readTimeout = readTimeout; } /** * Create a FSStreamFactory with 10 second connect timeout and * 30 second read timeout. */ public DefaultHttpStreamFactory() { this(CONNECTION_TIMEOUT, READ_TIMEOUT); } @SuppressWarnings("resource") @Override public FSStream getUrl(String uri) { InputStream is = null; try { URLConnection conn = new URL(uri).openConnection(); conn.setConnectTimeout(this.connectTimeout); conn.setReadTimeout(this.readTimeout); conn.connect(); is = conn.getInputStream(); } catch (java.net.MalformedURLException e) { XRLog.log(Level.WARNING, LogMessageId.LogMessageId1Param.EXCEPTION_MALFORMED_URL, uri, e); } catch (java.io.FileNotFoundException e) { XRLog.log(Level.WARNING, LogMessageId.LogMessageId1Param.EXCEPTION_ITEM_AT_URI_NOT_FOUND, uri, e); } catch (java.io.IOException e) { XRLog.log(Level.WARNING, LogMessageId.LogMessageId1Param.EXCEPTION_IO_PROBLEM_FOR_URI, uri, e); } // Ownership is transferred to DefaultHttpStream which implements Closeable. return new DefaultHttpStream(is); } } private static class ByteStream implements FSStream { ByteStream(byte[] input) { this.input = input; } private final byte[] input; @Override public InputStream getStream() { return new ByteArrayInputStream(input); } @Override public Reader getReader() { return new InputStreamReader(getStream(), StandardCharsets.UTF_8); } } public static class DataUriFactory implements FSStreamFactory { @Override public FSStream getUrl(String url) { int idxSeparator; if (url != null && url.startsWith("data:") && (idxSeparator = url.indexOf(',')) > 0) { String data = url.substring(idxSeparator+1); byte[] res; if (url.indexOf("base64,") == idxSeparator - 6 /* 6 = "base64,".length */) { res = fromBase64Encoded(data); } else { res = data.getBytes(StandardCharsets.UTF_8); } return new ByteStream(res); } return null; } static final Pattern WHITE_SPACE = Pattern.compile("\\s+"); static byte[] fromBase64Encoded(String b64encoded) { return Base64.getMimeDecoder().decode(WHITE_SPACE.matcher(b64encoded).replaceAll("")); } } /** * Get the binary content of an embedded base 64 image. * * @param imageDataUri URI of the embedded image * @return The binary content */ public static byte[] getEmbeddedBase64Image(String imageDataUri) { int b64Index = imageDataUri.indexOf("base64,"); if (b64Index != -1) { String b64encoded = imageDataUri.substring(b64Index + "base64,".length()); return DataUriFactory.fromBase64Encoded(b64encoded); } else { XRLog.log(Level.SEVERE, LogMessageId.LogMessageId0Param.LOAD_EMBEDDED_DATA_URI_MUST_BE_ENCODED_IN_BASE64); } return null; } public NaiveUserAgent() { FSStreamFactory factory = new DefaultHttpStreamFactory(); this._protocolsStreamFactory.put("http", factory); this._protocolsStreamFactory.put("https", factory); this._protocolsStreamFactory.put("data", new DataUriFactory()); } public void setProtocolsStreamFactory(Map protocolsStreamFactory) { this._protocolsStreamFactory = protocolsStreamFactory; } public void setUriResolver(FSUriResolver resolver) { this._resolver = resolver; } public FSUriResolver getDefaultUriResolver() { return DEFAULT_URI_RESOLVER; } /** * Empties the image cache entirely. */ @Deprecated public void clearImageCache() { _imageCache.clear(); } protected FSStreamFactory getProtocolFactory(String protocol) { return _protocolsStreamFactory.get(protocol); } protected boolean hasProtocolFactory(String protocol) { return _protocolsStreamFactory.containsKey(protocol); } protected String extractProtocol(String uri) throws URISyntaxException { int idxSeparator; if (uri != null && (idxSeparator = uri.indexOf(':')) > 0) { return uri.substring(0, idxSeparator); } else { throw new URISyntaxException(uri, "missing protocol for URI"); } } /** * Gets a InputStream for the resource identified by a resolved URI. */ protected InputStream openStream(String uri) { java.io.InputStream is = null; try { String protocol = extractProtocol(uri); if (hasProtocolFactory(protocol)) { return getProtocolFactory(protocol).getUrl(uri).getStream(); } else { try { is = new URL(uri).openStream(); } catch (java.net.MalformedURLException e) { XRLog.log(Level.WARNING, LogMessageId.LogMessageId1Param.EXCEPTION_MALFORMED_URL, uri, e); } catch (java.io.FileNotFoundException e) { XRLog.log(Level.WARNING, LogMessageId.LogMessageId1Param.EXCEPTION_ITEM_AT_URI_NOT_FOUND, uri, e); } catch (java.io.IOException e) { XRLog.log(Level.WARNING, LogMessageId.LogMessageId1Param.EXCEPTION_IO_PROBLEM_FOR_URI, uri, e); } } } catch (URISyntaxException e1) { XRLog.log(Level.WARNING, LogMessageId.LogMessageId1Param.EXCEPTION_MALFORMED_URL, uri, e1); } return is; } /** * Gets a reader for the identified resource by a resolved URI. */ protected Reader openReader(String uri) { InputStream is = null; try { String protocol = extractProtocol(uri); if (hasProtocolFactory(protocol)) { return getProtocolFactory(protocol).getUrl(uri).getReader(); } else { try { is = new URL(uri).openStream(); } catch (java.net.MalformedURLException e) { XRLog.log(Level.WARNING, LogMessageId.LogMessageId1Param.EXCEPTION_MALFORMED_URL, uri, e); } catch (java.io.FileNotFoundException e) { XRLog.log(Level.WARNING, LogMessageId.LogMessageId1Param.EXCEPTION_ITEM_AT_URI_NOT_FOUND, uri, e); } catch (java.io.IOException e) { XRLog.log(Level.WARNING, LogMessageId.LogMessageId1Param.EXCEPTION_IO_PROBLEM_FOR_URI, uri, e); } } } catch (URISyntaxException e1) { XRLog.log(Level.WARNING, LogMessageId.LogMessageId1Param.EXCEPTION_MALFORMED_URL, uri, e1); } return is == null ? null : new InputStreamReader(is, StandardCharsets.UTF_8); } protected String readAll(Reader reader) throws IOException { return OpenUtil.readAll(reader); } /** * Retrieves the CSS located at the given URI. It's assumed the URI does point to a CSS file--the URI will * be resolved, accessed (using the set FSStreamFactory or URL::openStream), opened, read and then passed into the CSS parser. * The result is packed up into an CSSResource for later consumption. * * @param uri Location of the CSS source. * @return A CSSResource containing the CSS reader or null if not available. */ @SuppressWarnings("resource") @Override public CSSResource getCSSResource(String uri, ExternalResourceType type) { if (!checkAccessAllowed(uri, type, ExternalResourceControlPriority.RUN_BEFORE_RESOLVING_URI)) { return null; } String resolved = _resolver.resolveURI(this._baseUri, uri); if (!checkAccessAllowed(resolved, type, ExternalResourceControlPriority.RUN_AFTER_RESOLVING_URI)) { return null; } if (resolved == null) { XRLog.log(Level.INFO, LogMessageId.LogMessageId2Param.LOAD_URI_RESOLVER_REJECTED_LOADING_AT_URI, "CSS resource", uri); return null; } // Ownership is transferred to CSSResource which implements Closeable. return new CSSResource(openReader(resolved)); } public abstract ImageResource getImageResource(String uri, ExternalResourceType type); /** * Retrieves the XML located at the given URI. It's assumed the URI does point to a XML--the URI will * be accessed (using the set HttpStreamFactory or URL::openStream), opened, read and then passed into the XML parser (XMLReader) * configured for Flying Saucer. The result is packed up into an XMLResource for later consumption. * * @param uri Location of the XML source. * @return An XMLResource containing the image. */ @Override public XMLResource getXMLResource(String uri, ExternalResourceType type) { if (!checkAccessAllowed(uri, type, ExternalResourceControlPriority.RUN_BEFORE_RESOLVING_URI)) { return null; } String resolved = _resolver.resolveURI(this._baseUri, uri); if (!checkAccessAllowed(resolved, type, ExternalResourceControlPriority.RUN_AFTER_RESOLVING_URI)) { return null; } if (resolved == null) { XRLog.log(Level.INFO, LogMessageId.LogMessageId2Param.LOAD_URI_RESOLVER_REJECTED_LOADING_AT_URI, "XML resource", uri); return null; } try (Reader inputReader = openReader(resolved)) { return inputReader == null ? null : XMLResource.load(inputReader); } catch (IOException e) { // On auto close, swallow. return null; } } @Override public byte[] getBinaryResource(String uri, ExternalResourceType type) { if (!checkAccessAllowed(uri, type, ExternalResourceControlPriority.RUN_BEFORE_RESOLVING_URI)) { return null; } String resolved = _resolver.resolveURI(this._baseUri, uri); if (!checkAccessAllowed(resolved, type, ExternalResourceControlPriority.RUN_AFTER_RESOLVING_URI)) { return null; } if (resolved == null) { XRLog.log(Level.INFO, LogMessageId.LogMessageId2Param.LOAD_URI_RESOLVER_REJECTED_LOADING_AT_URI, "binary resource", uri); return null; } try (InputStream is = openStream(resolved)) { if (is == null) { return null; } return OpenUtil.readAll(is); } catch (IOException e) { return null; } } /** * Returns true if the given URI was visited, meaning it was requested at some point since initialization. * * @param uri A URI which might have been visited. * @return Always false; visits are not tracked in the NaiveUserAgent. */ @Override public boolean isVisited(String uri) { return false; } /** * URL relative to which URIs are resolved. * * @param uri A URI which anchors other, possibly relative URIs. */ @Override public void setBaseURL(String uri) { _baseUri = uri; } public static class DefaultAccessController implements BiPredicate { public boolean test(String uri, ExternalResourceType resourceType) { if (resourceType == null) { return false; } switch (resourceType) { case BINARY: case CSS: case FONT: case IMAGE_RASTER: case XML_XHTML: case XML_SVG: case PDF: case SVG_BINARY: return true; case FILE_EMBED: return false; } return false; } } public void setAccessController( ExternalResourceControlPriority prio, BiPredicate controller) { this._accessControllers.put(prio, controller); } public boolean checkAccessAllowed( String uriOrResolved, ExternalResourceType type, ExternalResourceControlPriority priority) { BiPredicate controller = this._accessControllers.get(priority); if (uriOrResolved == null) { return false; } if (controller == null) { return true; } boolean passed = controller.test(uriOrResolved, type); if (!passed) { XRLog.log(Level.WARNING, LogMessageId.LogMessageId2Param.LOAD_RESOURCE_ACCESS_REJECTED, uriOrResolved, type); } return passed; } public static class DefaultUriResolver implements FSUriResolver { /** * Resolves the URI; if absolute, leaves as is, if relative, returns an * absolute URI based on the baseUrl for the agent. * * @param uri * A URI, possibly relative. * * @return A URI as String, resolved, or null if there was an exception * (for example if the URI is malformed). */ @Override public String resolveURI(String baseUri, String uri) { if (uri == null || uri.isEmpty()) return null; if (uri.startsWith("data:")) { return uri; //bypass URI "formatting" check for data uri, as we may have whitespace in the base64 encoded data } try { URI possiblyRelative = new URI(uri); if (possiblyRelative.isAbsolute()) { return possiblyRelative.toString(); } else { if (baseUri == null) { // If user hasn't provided base URI, just reject resolving relative URIs. XRLog.log(Level.WARNING, LogMessageId.LogMessageId1Param.LOAD_COULD_NOT_RESOLVE_RELATIVE_URI_BECAUSE_NO_BASE_URI_WAS_PROVIDED, uri); return null; } else if (baseUri.startsWith("jar")) { // Fix for OpenHTMLtoPDF issue-#125, URI class doesn't resolve jar: scheme urls and so returns only // the relative part on calling base.resolve(relative) so we use the URL class instead which does // understand jar: scheme urls. URL base = new URL(baseUri); URL absolute = new URL(base, uri); return absolute.toString(); } else { URI base = new URI(baseUri); URI absolute = base.resolve(uri); return absolute.toString(); } } } catch (URISyntaxException e) { XRLog.log(Level.WARNING, LogMessageId.LogMessageId3Param.EXCEPTION_URI_WITH_BASE_URI_INVALID, uri, "", baseUri, e); return null; } catch (MalformedURLException e) { XRLog.log(Level.WARNING, LogMessageId.LogMessageId3Param.EXCEPTION_URI_WITH_BASE_URI_INVALID, uri, "jar scheme", baseUri, e); return null; } } } /** * Returns the current baseUrl for this class. */ @Override public String getBaseURL() { return _baseUri; } @Override @Deprecated public void documentStarted() { clearImageCache(); } @Override @Deprecated public void documentLoaded() { /* ignore*/ } @Override @Deprecated public void onLayoutException(Throwable t) { /* ignore*/ } @Override @Deprecated public void onRenderException(Throwable t) { /* ignore*/ } @Override public String resolveURI(String uri) { return _resolver.resolveURI(getBaseURL(), uri); } @Override public String resolveUri(String baseUri, String uri) { return _resolver.resolveURI(baseUri, uri); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy