All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.openhtmltopdf.swing.NaiveUserAgent Maven / Gradle / Ivy

/*
 * NaiveUserAgent.java
 * Copyright (c) 2004, 2005 Torbjoern Gannholm
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public License
 * as published by the Free Software Foundation; either version 2.1
 * of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 *
 */
package com.openhtmltopdf.swing;

import java.awt.Image;
import java.awt.image.BufferedImage;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.Iterator;
import java.util.LinkedHashMap;

import javax.imageio.ImageIO;

import com.openhtmltopdf.event.DocumentListener;
import com.openhtmltopdf.extend.HttpStreamFactory;
import com.openhtmltopdf.extend.HttpStream;
import com.openhtmltopdf.extend.UserAgentCallback;
import com.openhtmltopdf.resource.CSSResource;
import com.openhtmltopdf.resource.ImageResource;
import com.openhtmltopdf.resource.XMLResource;
import com.openhtmltopdf.util.ImageUtil;
import com.openhtmltopdf.util.XRLog;

/**
 * 

NaiveUserAgent is a simple implementation of {@link UserAgentCallback} which places no restrictions on what * XML, CSS or images are loaded, and reports visited links without any filtering. The most straightforward process * available in the JDK is used to load the resources in question--either using java.io or java.net classes. * *

The NaiveUserAgent has a small cache for images, * the size of which (number of images) can be passed as a constructor argument. There is no automatic cleaning of * the cache; call {@link #shrinkImageCache()} to remove the least-accessed elements--for example, you might do this * when a new document is about to be loaded. The NaiveUserAgent is also a DocumentListener; if registered with a * source of document events (like the panel hierarchy), it will respond to the * {@link com.openhtmltopdf.event.DocumentListener#documentStarted()} call and attempt to shrink its cache. * *

This class is meant as a starting point--it will work out of the box, but you should really implement your * own, tuned to your application's needs. * * @author Torbjoern Gannholm */ public class NaiveUserAgent implements UserAgentCallback, DocumentListener { private static final int DEFAULT_IMAGE_CACHE_SIZE = 16; /** * a (simple) LRU cache */ protected LinkedHashMap _imageCache; private int _imageCacheCapacity; private String _baseURL; private HttpStreamFactory _streamFactory = new DefaultHttpStreamFactory(); public static class DefaultHttpStream implements HttpStream { private InputStream strm; public DefaultHttpStream(InputStream strm) { this.strm = strm; } @Override public InputStream getStream() { return this.strm; } @Override public Reader getReader() { try { return new InputStreamReader(this.strm, "UTF-8"); } catch (UnsupportedEncodingException e) { XRLog.exception("Exception when creating stream reader", e); } return null; } } public static class DefaultHttpStreamFactory implements HttpStreamFactory { @Override public HttpStream getUrl(String uri) { InputStream is = null; try { is = new URL(uri).openStream(); } catch (java.net.MalformedURLException e) { XRLog.exception("bad URL given: " + uri, e); } catch (java.io.FileNotFoundException e) { XRLog.exception("item at URI " + uri + " not found"); } catch (java.io.IOException e) { XRLog.exception("IO problem for " + uri, e); } return new DefaultHttpStream(is); } } /** * Creates a new instance of NaiveUserAgent with a max image cache of 16 images. */ public NaiveUserAgent() { this(DEFAULT_IMAGE_CACHE_SIZE); } /** * Creates a new NaiveUserAgent with a cache of a specific size. * * @param imgCacheSize Number of images to hold in cache before LRU images are released. */ public NaiveUserAgent(final int imgCacheSize) { this._imageCacheCapacity = imgCacheSize; // note we do *not* override removeEldestEntry() here--users of this class must call shrinkImageCache(). // that's because we don't know when is a good time to flush the cache this._imageCache = new java.util.LinkedHashMap(_imageCacheCapacity, 0.75f, true); } public void setHttpStreamFactory(HttpStreamFactory factory) { this._streamFactory = factory; } /** * If the image cache has more items than the limit specified for this class, the least-recently used will * be dropped from cache until it reaches the desired size. */ public void shrinkImageCache() { int ovr = _imageCache.size() - _imageCacheCapacity; Iterator it = _imageCache.keySet().iterator(); while (it.hasNext() && ovr-- > 0) { it.next(); it.remove(); } } /** * Empties the image cache entirely. */ public void clearImageCache() { _imageCache.clear(); } /** * Gets a InputStream for the resource identified */ protected InputStream resolveAndOpenStream(String uri) { java.io.InputStream is = null; uri = resolveURI(uri); try { URL urlObj = new URL(uri); if (urlObj.getProtocol().equalsIgnoreCase("http") || urlObj.getProtocol().equalsIgnoreCase("https")) { return _streamFactory.getUrl(uri).getStream(); } else { try { is = new URL(uri).openStream(); } catch (java.net.MalformedURLException e) { XRLog.exception("bad URL given: " + uri, e); } catch (java.io.FileNotFoundException e) { XRLog.exception("item at URI " + uri + " not found"); } catch (java.io.IOException e) { XRLog.exception("IO problem for " + uri, e); } } } catch (MalformedURLException e2) { XRLog.exception("bad URL given: " + uri, e2); } return is; } /** * Gets a reader for the identified resource. */ protected Reader resolveAndOpenReader(String uri) { InputStream is = null; uri = resolveURI(uri); try { URL urlObj = new URL(uri); if (urlObj.getProtocol().equalsIgnoreCase("http") || urlObj.getProtocol().equalsIgnoreCase("https")) { return _streamFactory.getUrl(uri).getReader(); } else { try { is = new URL(uri).openStream(); } catch (java.net.MalformedURLException e) { XRLog.exception("bad URL given: " + uri, e); } catch (java.io.FileNotFoundException e) { XRLog.exception("item at URI " + uri + " not found"); } catch (java.io.IOException e) { XRLog.exception("IO problem for " + uri, e); } } } catch (MalformedURLException e2) { XRLog.exception("bad URL given: " + uri, e2); } try { return is == null ? null : new InputStreamReader(is, "UTF-8"); } catch (UnsupportedEncodingException e) { XRLog.exception("Failed to create stream reader", e); } return null; } /** * Retrieves the CSS located at the given URI. It's assumed the URI does point to a CSS file--the URI will * be accessed (using the set HttpStreamFactory or URL::openStream), opened, read and then passed into the CSS parser. * The result is packed up into an CSSResource for later consumption. * * @param uri Location of the CSS source. * @return A CSSResource containing the parsed CSS. */ @Override public CSSResource getCSSResource(String uri) { return new CSSResource(resolveAndOpenReader(uri)); } /** * Retrieves the image located at the given URI. It's assumed the URI does point to an image--the URI will * be accessed (using the set HttpStreamFactory or URL::openStream), opened, read and then passed into the JDK image-parsing routines. * The result is packed up into an ImageResource for later consumption. * * @param uri Location of the image source. * @return An ImageResource containing the image. */ public ImageResource getImageResource(String uri) { ImageResource ir; if (ImageUtil.isEmbeddedBase64Image(uri)) { BufferedImage image = ImageUtil.loadEmbeddedBase64Image(uri); ir = createImageResource(null, image); } else { uri = resolveURI(uri); ir = _imageCache.get(uri); //TODO: check that cached image is still valid if (ir == null) { InputStream is = resolveAndOpenStream(uri); if (is != null) { try { BufferedImage img = ImageIO.read(is); if (img == null) { throw new IOException("ImageIO.read() returned null"); } ir = createImageResource(uri, img); _imageCache.put(uri, ir); } catch (FileNotFoundException e) { XRLog.exception("Can't read image file; image at URI '" + uri + "' not found"); } catch (IOException e) { XRLog.exception("Can't read image file; unexpected problem for URI '" + uri + "'", e); } finally { try { is.close(); } catch (IOException e) { // ignore } } } } if (ir == null) { ir = createImageResource(uri, null); } } return ir; } /** * Factory method to generate ImageResources from a given Image. May be overridden in subclass. * * @param uri The URI for the image, resolved to an absolute URI. * @param img The image to package; may be null (for example, if image could not be loaded). * * @return An ImageResource containing the image. */ protected ImageResource createImageResource(String uri, Image img) { return new ImageResource(uri, AWTFSImage.createImage(img)); } /** * Retrieves the XML located at the given URI. It's assumed the URI does point to a XML--the URI will * be accessed (using the set HttpStreamFactory or URL::openStream), opened, read and then passed into the XML parser (XMLReader) * configured for Flying Saucer. The result is packed up into an XMLResource for later consumption. * * @param uri Location of the XML source. * @return An XMLResource containing the image. */ public XMLResource getXMLResource(String uri) { Reader inputReader = resolveAndOpenReader(uri); XMLResource xmlResource; try { xmlResource = XMLResource.load(inputReader); } finally { if (inputReader != null) { try { inputReader.close(); } catch (IOException e) { // swallow } } } return xmlResource; } public byte[] getBinaryResource(String uri) { InputStream is = resolveAndOpenStream(uri); if (is==null) return null; try { ByteArrayOutputStream result = new ByteArrayOutputStream(); byte[] buf = new byte[10240]; int i; while ((i = is.read(buf)) != -1) { result.write(buf, 0, i); } is.close(); is = null; return result.toByteArray(); } catch (IOException e) { return null; } finally { if (is != null) { try { is.close(); } catch (IOException e) { // ignore } } } } /** * Returns true if the given URI was visited, meaning it was requested at some point since initialization. * * @param uri A URI which might have been visited. * @return Always false; visits are not tracked in the NaiveUserAgent. */ public boolean isVisited(String uri) { return false; } /** * URL relative to which URIs are resolved. * * @param url A URI which anchors other, possibly relative URIs. */ public void setBaseURL(String url) { _baseURL = url; } /** * Resolves the URI; if absolute, leaves as is, if relative, returns an absolute URI based on the baseUrl for * the agent. * * @param uri A URI, possibly relative. * * @return A URI as String, resolved, or null if there was an exception (for example if the URI is malformed). */ public String resolveURI(String uri) { if (uri == null) return null; String ret = null; if (_baseURL == null) {//first try to set a base URL try { URI result = new URI(uri); if (result.isAbsolute()) setBaseURL(result.toString()); } catch (URISyntaxException e) { XRLog.exception("The default NaiveUserAgent could not use the URL as base url: " + uri, e); } if (_baseURL == null) { // still not set -> fallback to current working directory try { setBaseURL(new File(".").toURI().toURL().toExternalForm()); } catch (Exception e1) { XRLog.exception("The default NaiveUserAgent doesn't know how to resolve the base URL for " + uri); return null; } } } // test if the URI is valid; if not, try to assign the base url as its parent try { URI result = new URI(uri); if (!result.isAbsolute()) { XRLog.load(uri + " is not a URL; may be relative. Testing using parent URL " + _baseURL); result=new URI(_baseURL).resolve(result); } ret = result.toString(); } catch (URISyntaxException e) { XRLog.exception("The default NaiveUserAgent cannot resolve the URL " + uri + " with base URL " + _baseURL); } return ret; } /** * Returns the current baseUrl for this class. */ public String getBaseURL() { return _baseURL; } public void documentStarted() { shrinkImageCache(); } public void documentLoaded() { /* ignore*/ } public void onLayoutException(Throwable t) { /* ignore*/ } public void onRenderException(Throwable t) { /* ignore*/ } } /* * $Id$ * * $Log$ * Revision 1.40 2009/05/15 16:20:10 pdoubleya * ImageResource now tracks the URI for the image that was created and handles mutable images. * * Revision 1.39 2009/04/12 11:16:51 pdoubleya * Remove proposed patch for URLs that are incorrectly handled on Windows; need a more reliable solution. * * Revision 1.38 2008/04/30 23:14:18 peterbrant * Do a better job of cleaning up open file streams (patch by Christophe Marchand) * * Revision 1.37 2007/11/23 07:03:30 pdoubleya * Applied patch from N. Barozzi to allow either toolkit or buffered images to be used, see https://xhtmlrenderer.dev.java.net/servlets/ReadMsg?list=dev&msgNo=3847 * * Revision 1.36 2007/10/31 23:14:43 peterbrant * Add rudimentary support for @font-face rules * * Revision 1.35 2007/06/20 12:24:31 pdoubleya * Fix bug in shrink cache, trying to modify iterator without using safe remove(). * * Revision 1.34 2007/06/19 21:25:41 pdoubleya * Cleanup for caching in NUA, making it more suitable to use as a reusable UAC. NUA is also now a document listener and uses this to try and trim its cache down. PanelManager and iTextUA are now NUA subclasses. * * Revision 1.33 2007/05/20 23:25:33 peterbrant * Various code cleanups (e.g. remove unused imports) * * Patch from Sean Bright * * Revision 1.32 2007/05/09 21:52:06 pdoubleya * Fix for rendering problems introduced by removing GraphicsUtil class. Use Image instead of BufferedImage in most cases, convert to AWT image if necessary. Not complete, requires cleanup. * * Revision 1.31 2007/05/05 21:08:27 pdoubleya * Changed image-related interfaces (FSImage, ImageUtil, scaling) to all use BufferedImage, since there were no Image-specific APIs we depended on, and we have more control over what we do with BIs as compared to Is. * * Revision 1.30 2007/05/05 18:05:21 pdoubleya * Remove references to GraphicsUtil and the class itself, no longer needed * * Revision 1.29 2007/04/10 20:46:02 pdoubleya * Fix, was not closing XML source stream when done * * Revision 1.28 2007/02/07 16:33:31 peterbrant * Initial commit of rewritten table support and associated refactorings * * Revision 1.27 2006/06/28 13:46:59 peterbrant * ImageIO.read() can apparently return sometimes null instead of throwing an exception when processing an invalid image * * Revision 1.26 2006/04/27 13:28:48 tobega * Handle situations without base url and no file access gracefully * * Revision 1.25 2006/04/25 00:23:20 peterbrant * Fixes from Mike Curtis * * Revision 1.23 2006/04/08 08:21:24 tobega * relative urls and linked stylesheets * * Revision 1.22 2006/02/02 02:47:33 peterbrant * Support non-AWT images * * Revision 1.21 2005/10/25 19:40:38 tobega * Suggestion from user to use File.toURI.toURL instead of File.toURL because the latter is buggy * * Revision 1.20 2005/10/09 09:40:27 tobega * Use current directory as default base URL * * Revision 1.19 2005/08/11 01:35:37 joshy * removed debugging * updated stylesheet to use right aligns * Issue number: * Obtained from: * Submitted by: * Reviewed by: * * Revision 1.17 2005/06/25 19:27:47 tobega * UAC now supplies Resources * * Revision 1.16 2005/06/25 17:23:35 tobega * first refactoring of UAC: ImageResource * * Revision 1.15 2005/06/21 17:52:10 joshy * new hover code * removed some debug statements * Issue number: * Obtained from: * Submitted by: * Reviewed by: * * Revision 1.14 2005/06/20 23:45:56 joshy * hack to fix the mangled background images on osx * Issue number: * Obtained from: * Submitted by: * Reviewed by: * * Revision 1.13 2005/06/20 17:26:45 joshy * debugging for image issues * font scale stuff * * Issue number: * Obtained from: * Submitted by: * Reviewed by: * * Revision 1.12 2005/06/15 11:57:18 tobega * Making Browser a better model application with UserAgentCallback * * Revision 1.11 2005/06/15 11:53:47 tobega * Changed UserAgentCallback to getInputStream instead of getReader. Fixed up some consequences of previous change. * * Revision 1.10 2005/06/13 06:50:16 tobega * Fixed a bug in table content resolution. * Various "tweaks" in other stuff. * * Revision 1.9 2005/06/03 00:29:49 tobega * fixed potential bug * * Revision 1.8 2005/06/01 21:36:44 tobega * Got image scaling working, and did some refactoring along the way * * Revision 1.7 2005/03/28 14:24:22 pdoubleya * Remove stack trace on loading images. * * Revision 1.6 2005/02/02 12:14:01 pdoubleya * Clean, format, buffer reader. * * */





© 2015 - 2024 Weber Informatics LLC | Privacy Policy