All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.openhtmltopdf.swing.NaiveUserAgent Maven / Gradle / Ivy

Go to download

Open HTML to PDF is a CSS 2.1 renderer written in Java. This artifact contains the core rendering and layout code.

There is a newer version: 1.0.10
Show newest version
/*
 * NaiveUserAgent.java
 * Copyright (c) 2004, 2005 Torbjoern Gannholm
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public License
 * as published by the Free Software Foundation; either version 2.1
 * of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 *
 */
package com.openhtmltopdf.swing;

import java.awt.image.BufferedImage;
import java.io.ByteArrayOutputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.logging.Level;

import javax.imageio.ImageIO;

import com.openhtmltopdf.event.DocumentListener;
import com.openhtmltopdf.extend.FSUriResolver;
import com.openhtmltopdf.extend.FSStreamFactory;
import com.openhtmltopdf.extend.FSStream;
import com.openhtmltopdf.extend.UserAgentCallback;
import com.openhtmltopdf.resource.CSSResource;
import com.openhtmltopdf.resource.ImageResource;
import com.openhtmltopdf.resource.XMLResource;
import com.openhtmltopdf.util.ImageUtil;
import com.openhtmltopdf.util.XRLog;

/**
 * 

NaiveUserAgent is a simple implementation of {@link UserAgentCallback} which places no restrictions on what * XML, CSS or images are loaded.

* *

The NaiveUserAgent has a simple per-run cache for images so that the same image is not embedded in a document * multiple times.

* * @author Torbjoern Gannholm */ public class NaiveUserAgent implements UserAgentCallback, DocumentListener { /** * a (simple) cache * This is only useful for the one run. */ protected final LinkedHashMap _imageCache = new LinkedHashMap(); protected final FSUriResolver DEFAULT_URI_RESOLVER = new DefaultUriResolver(); protected FSUriResolver _resolver = DEFAULT_URI_RESOLVER; protected String _baseUri; protected Map _protocolsStreamFactory = new HashMap(2); public static class DefaultHttpStream implements FSStream { private InputStream strm; public DefaultHttpStream(InputStream strm) { this.strm = strm; } @Override public InputStream getStream() { return this.strm; } @Override public Reader getReader() { if (this.strm != null) { return new InputStreamReader(this.strm, StandardCharsets.UTF_8); } return null; } } public static class DefaultHttpStreamFactory implements FSStreamFactory { @Override public FSStream getUrl(String uri) { InputStream is = null; try { is = new URL(uri).openStream(); } catch (java.net.MalformedURLException e) { XRLog.exception("bad URL given: " + uri, e); } catch (java.io.FileNotFoundException e) { XRLog.exception("item at URI " + uri + " not found"); } catch (java.io.IOException e) { XRLog.exception("IO problem for " + uri, e); } return new DefaultHttpStream(is); } } public NaiveUserAgent() { FSStreamFactory factory = new DefaultHttpStreamFactory(); this._protocolsStreamFactory.put("http", factory); this._protocolsStreamFactory.put("https", factory); } public void setProtocolsStreamFactory(Map protocolsStreamFactory) { this._protocolsStreamFactory = protocolsStreamFactory; } public void setUriResolver(FSUriResolver resolver) { this._resolver = resolver; } public FSUriResolver getDefaultUriResolver() { return DEFAULT_URI_RESOLVER; } /** * Empties the image cache entirely. */ @Deprecated public void clearImageCache() { _imageCache.clear(); } protected FSStreamFactory getProtocolFactory(String protocol) { return _protocolsStreamFactory.get(protocol); } protected boolean hasProtocolFactory(String protocol) { return _protocolsStreamFactory.containsKey(protocol); } /** * Gets a InputStream for the resource identified by a resolved URI. */ protected InputStream openStream(String uri) { java.io.InputStream is = null; try { URI urlObj = new URI(uri); String protocol = urlObj.getScheme(); if (hasProtocolFactory(protocol)) { return getProtocolFactory(protocol).getUrl(uri).getStream(); } else { try { is = new URL(uri).openStream(); } catch (java.net.MalformedURLException e) { XRLog.exception("bad URL given: " + uri, e); } catch (java.io.FileNotFoundException e) { XRLog.exception("item at URI " + uri + " not found", e); } catch (java.io.IOException e) { XRLog.exception("IO problem for " + uri, e); } } } catch (URISyntaxException e1) { XRLog.exception("bad URL given: " + uri, e1); } return is; } /** * Gets a reader for the identified resource by a resolved URI. */ protected Reader openReader(String uri) { InputStream is = null; try { URI urlObj = new URI(uri); String protocol = urlObj.getScheme(); if (hasProtocolFactory(protocol)) { return getProtocolFactory(protocol).getUrl(uri).getReader(); } else { try { is = new URL(uri).openStream(); } catch (java.net.MalformedURLException e) { XRLog.exception("bad URL given: " + uri, e); } catch (java.io.FileNotFoundException e) { XRLog.exception("item at URI " + uri + " not found"); } catch (java.io.IOException e) { XRLog.exception("IO problem for " + uri, e); } } } catch (URISyntaxException e1) { XRLog.exception("bad URL given: " + uri, e1); } return is == null ? null : new InputStreamReader(is, StandardCharsets.UTF_8); } protected String readAll(Reader reader) throws IOException { char[] arr = new char[8 * 1024]; StringBuilder buffer = new StringBuilder(); int numCharsRead; while ((numCharsRead = reader.read(arr, 0, arr.length)) != -1) { buffer.append(arr, 0, numCharsRead); } return buffer.toString(); } /** * Retrieves the CSS located at the given URI. It's assumed the URI does point to a CSS file--the URI will * be resolved, accessed (using the set FSStreamFactory or URL::openStream), opened, read and then passed into the CSS parser. * The result is packed up into an CSSResource for later consumption. * * @param uri Location of the CSS source. * @return A CSSResource containing the CSS reader or null if not available. */ @Override public CSSResource getCSSResource(String uri) { String resolved = _resolver.resolveURI(this._baseUri, uri); if (resolved == null) { XRLog.load(Level.INFO, "URI resolver rejected loading CSS resource at (" + uri + ")"); return null; } return new CSSResource(openReader(resolved)); } /** * Retrieves the image located at the given URI. It's assumed the URI does point to an image--the URI will * be accessed (using the set HttpStreamFactory or URL::openStream), opened, read and then passed into the JDK image-parsing routines. * The result is packed up into an ImageResource for later consumption. * * @param uri Location of the image source. * @return An ImageResource containing the image. */ @Override public ImageResource getImageResource(String uri) { ImageResource ir; if (ImageUtil.isEmbeddedBase64Image(uri)) { BufferedImage image = ImageUtil.loadEmbeddedBase64Image(uri); return new ImageResource(null, AWTFSImage.createImage(image)); } else { String resolved = _resolver.resolveURI(this._baseUri, uri); if (resolved == null) { XRLog.load(Level.INFO, "URI resolver rejected loading image resource at (" + uri + ")"); return null; } // First, we check the internal per run cache. ir = _imageCache.get(resolved); if (ir != null) { return ir; } // Finally we fetch from the network or file, etc. InputStream is = openStream(resolved); if (is != null) { try { BufferedImage img = ImageIO.read(is); if (img == null) { throw new IOException("ImageIO.read() returned null"); } AWTFSImage fsImage2 = (AWTFSImage) AWTFSImage.createImage(img); ir = new ImageResource(resolved, fsImage2); _imageCache.put(resolved, ir); return ir; } catch (FileNotFoundException e) { XRLog.exception("Can't read image file; image at URI '" + resolved + "' not found"); } catch (IOException e) { XRLog.exception("Can't read image file; unexpected problem for URI '" + resolved + "'", e); } finally { try { is.close(); } catch (IOException e) { // ignore } } } return new ImageResource(resolved, null); } } /** * Retrieves the XML located at the given URI. It's assumed the URI does point to a XML--the URI will * be accessed (using the set HttpStreamFactory or URL::openStream), opened, read and then passed into the XML parser (XMLReader) * configured for Flying Saucer. The result is packed up into an XMLResource for later consumption. * * @param uri Location of the XML source. * @return An XMLResource containing the image. */ @Override public XMLResource getXMLResource(String uri) { String resolved = _resolver.resolveURI(this._baseUri, uri); if (resolved == null) { XRLog.load(Level.INFO, "URI resolver rejected loading XML resource at (" + uri + ")"); return null; } try (Reader inputReader = openReader(resolved)) { return inputReader == null ? null : XMLResource.load(inputReader); } catch (IOException e) { // On auto close, swallow. return null; } } @Override public byte[] getBinaryResource(String uri) { if (ImageUtil.isDataUri(uri)) { return ImageUtil.getEmbeddedDataUri(uri); } String resolved = _resolver.resolveURI(this._baseUri, uri); if (resolved == null) { XRLog.load(Level.INFO, "URI resolver rejected loading binary resource at (" + uri + ")"); return null; } InputStream is = openStream(resolved); if (is == null) { return null; } try { ByteArrayOutputStream result = new ByteArrayOutputStream(); byte[] buf = new byte[10240]; int i; while ((i = is.read(buf)) != -1) { result.write(buf, 0, i); } is.close(); is = null; byte[] bytes2 = result.toByteArray(); return bytes2; } catch (IOException e) { return null; } finally { if (is != null) { try { is.close(); } catch (IOException e) { // ignore } } } } /** * Returns true if the given URI was visited, meaning it was requested at some point since initialization. * * @param uri A URI which might have been visited. * @return Always false; visits are not tracked in the NaiveUserAgent. */ @Override public boolean isVisited(String uri) { return false; } /** * URL relative to which URIs are resolved. * * @param uri A URI which anchors other, possibly relative URIs. */ @Override public void setBaseURL(String uri) { _baseUri = uri; } public static class DefaultUriResolver implements FSUriResolver { /** * Resolves the URI; if absolute, leaves as is, if relative, returns an * absolute URI based on the baseUrl for the agent. * * @param uri * A URI, possibly relative. * * @return A URI as String, resolved, or null if there was an exception * (for example if the URI is malformed). */ @Override public String resolveURI(String baseUri, String uri) { if (uri == null || uri.isEmpty()) return null; try { URI possiblyRelative = new URI(uri); if (possiblyRelative.isAbsolute()) { return possiblyRelative.toString(); } else { if (baseUri == null) { // If user hasn't provided base URI, just reject resolving relative URIs. XRLog.load(Level.WARNING, "Couldn't resolve relative URI(" + uri + ") because no base URI was provided."); return null; } else if (baseUri.startsWith("jar")) { // Fix for OpenHTMLtoPDF issue-#125, URI class doesn't resolve jar: scheme urls and so returns only // the relative part on calling base.resolve(relative) so we use the URL class instead which does // understand jar: scheme urls. URL base = new URL(baseUri); URL absolute = new URL(base, uri); return absolute.toString(); } else { URI base = new URI(baseUri); URI absolute = base.resolve(uri); return absolute.toString(); } } } catch (URISyntaxException e) { XRLog.exception("When trying to load uri(" + uri + ") with base URI(" + baseUri + "), one or both were invalid URIs.", e); return null; } catch (MalformedURLException e) { XRLog.exception("When trying to load uri(" + uri + ") with base jar scheme URI(" + baseUri + "), one or both were invalid URIs.", e); return null; } } } /** * Returns the current baseUrl for this class. */ @Override public String getBaseURL() { return _baseUri; } @Override @Deprecated public void documentStarted() { clearImageCache(); } @Override @Deprecated public void documentLoaded() { /* ignore*/ } @Override @Deprecated public void onLayoutException(Throwable t) { /* ignore*/ } @Override @Deprecated public void onRenderException(Throwable t) { /* ignore*/ } @Override public String resolveURI(String uri) { return _resolver.resolveURI(getBaseURL(), uri); } @Override public String resolveUri(String baseUri, String uri) { return _resolver.resolveURI(baseUri, uri); } } /* * $Id$ * * $Log$ * Revision 1.40 2009/05/15 16:20:10 pdoubleya * ImageResource now tracks the URI for the image that was created and handles mutable images. * * Revision 1.39 2009/04/12 11:16:51 pdoubleya * Remove proposed patch for URLs that are incorrectly handled on Windows; need a more reliable solution. * * Revision 1.38 2008/04/30 23:14:18 peterbrant * Do a better job of cleaning up open file streams (patch by Christophe Marchand) * * Revision 1.37 2007/11/23 07:03:30 pdoubleya * Applied patch from N. Barozzi to allow either toolkit or buffered images to be used, see https://xhtmlrenderer.dev.java.net/servlets/ReadMsg?list=dev&msgNo=3847 * * Revision 1.36 2007/10/31 23:14:43 peterbrant * Add rudimentary support for @font-face rules * * Revision 1.35 2007/06/20 12:24:31 pdoubleya * Fix bug in shrink cache, trying to modify iterator without using safe remove(). * * Revision 1.34 2007/06/19 21:25:41 pdoubleya * Cleanup for caching in NUA, making it more suitable to use as a reusable UAC. NUA is also now a document listener and uses this to try and trim its cache down. PanelManager and iTextUA are now NUA subclasses. * * Revision 1.33 2007/05/20 23:25:33 peterbrant * Various code cleanups (e.g. remove unused imports) * * Patch from Sean Bright * * Revision 1.32 2007/05/09 21:52:06 pdoubleya * Fix for rendering problems introduced by removing GraphicsUtil class. Use Image instead of BufferedImage in most cases, convert to AWT image if necessary. Not complete, requires cleanup. * * Revision 1.31 2007/05/05 21:08:27 pdoubleya * Changed image-related interfaces (FSImage, ImageUtil, scaling) to all use BufferedImage, since there were no Image-specific APIs we depended on, and we have more control over what we do with BIs as compared to Is. * * Revision 1.30 2007/05/05 18:05:21 pdoubleya * Remove references to GraphicsUtil and the class itself, no longer needed * * Revision 1.29 2007/04/10 20:46:02 pdoubleya * Fix, was not closing XML source stream when done * * Revision 1.28 2007/02/07 16:33:31 peterbrant * Initial commit of rewritten table support and associated refactorings * * Revision 1.27 2006/06/28 13:46:59 peterbrant * ImageIO.read() can apparently return sometimes null instead of throwing an exception when processing an invalid image * * Revision 1.26 2006/04/27 13:28:48 tobega * Handle situations without base url and no file access gracefully * * Revision 1.25 2006/04/25 00:23:20 peterbrant * Fixes from Mike Curtis * * Revision 1.23 2006/04/08 08:21:24 tobega * relative urls and linked stylesheets * * Revision 1.22 2006/02/02 02:47:33 peterbrant * Support non-AWT images * * Revision 1.21 2005/10/25 19:40:38 tobega * Suggestion from user to use File.toURI.toURL instead of File.toURL because the latter is buggy * * Revision 1.20 2005/10/09 09:40:27 tobega * Use current directory as default base URL * * Revision 1.19 2005/08/11 01:35:37 joshy * removed debugging * updated stylesheet to use right aligns * Issue number: * Obtained from: * Submitted by: * Reviewed by: * * Revision 1.17 2005/06/25 19:27:47 tobega * UAC now supplies Resources * * Revision 1.16 2005/06/25 17:23:35 tobega * first refactoring of UAC: ImageResource * * Revision 1.15 2005/06/21 17:52:10 joshy * new hover code * removed some debug statements * Issue number: * Obtained from: * Submitted by: * Reviewed by: * * Revision 1.14 2005/06/20 23:45:56 joshy * hack to fix the mangled background images on osx * Issue number: * Obtained from: * Submitted by: * Reviewed by: * * Revision 1.13 2005/06/20 17:26:45 joshy * debugging for image issues * font scale stuff * * Issue number: * Obtained from: * Submitted by: * Reviewed by: * * Revision 1.12 2005/06/15 11:57:18 tobega * Making Browser a better model application with UserAgentCallback * * Revision 1.11 2005/06/15 11:53:47 tobega * Changed UserAgentCallback to getInputStream instead of getReader. Fixed up some consequences of previous change. * * Revision 1.10 2005/06/13 06:50:16 tobega * Fixed a bug in table content resolution. * Various "tweaks" in other stuff. * * Revision 1.9 2005/06/03 00:29:49 tobega * fixed potential bug * * Revision 1.8 2005/06/01 21:36:44 tobega * Got image scaling working, and did some refactoring along the way * * Revision 1.7 2005/03/28 14:24:22 pdoubleya * Remove stack trace on loading images. * * Revision 1.6 2005/02/02 12:14:01 pdoubleya * Clean, format, buffer reader. * * */




© 2015 - 2024 Weber Informatics LLC | Privacy Policy