All Downloads are FREE. Search and download functionalities are using the official Maven repository.

de.mklinger.qetcher.liferay.client.impl.htmlinliner.HtmlElementInliner Maven / Gradle / Ivy

The newest version!
package de.mklinger.qetcher.liferay.client.impl.htmlinliner;

import java.io.Closeable;
import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.Optional;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.jsoup.Jsoup;
import org.jsoup.nodes.DataNode;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import de.mklinger.qetcher.liferay.abstraction.DocumentLibraryFileContents;
import de.mklinger.qetcher.liferay.client.impl.abstraction.liferay71.LiferayAbstractionFactory;

/**
 * @author Marc Klinger - mklinger[at]mklinger[dot]de
 */
public class HtmlElementInliner implements Closeable {
	private static final Pattern CSS_IMPORT_PATTERN = Pattern.compile("@import\\s+url\\s*\\(\\s*(.*?)\\s*\\)\\s*;?", Pattern.DOTALL | Pattern.CASE_INSENSITIVE);
	private static final Logger LOG = LoggerFactory.getLogger(HtmlElementInliner.class);
	private final QetcherHtmlInlineConfiguration configuration;
	private final ResourceLoader resourceLoader;

	public HtmlElementInliner(final QetcherHtmlInlineConfiguration configuration) {
		this.configuration = configuration;
		this.resourceLoader = new ResourceLoaderImpl();
	}

	public byte[] inline(final InputStream htmlInputStream, final String baseUri) throws IOException {
		final Document htmlDocument = Jsoup.parse(htmlInputStream, null, baseUri);

		if (configuration.isInlineExternalImagesForHtml() || configuration.isInlineInternalImagesForHtml()) {
			final Elements imgs = htmlDocument.getElementsByTag("img");
			for (final Element img : imgs) {
				try {
					final String url = img.absUrl("src");
					if (url != null) {
						LOG.debug("Trying to inline {}", url);
						final Optional optionalNewImgSrc = getNewImgSrc(url, baseUri);
						if (optionalNewImgSrc.isPresent()) {
							final String newImgSrc = optionalNewImgSrc.get();
							LOG.debug("Inlining url {}: {} inlined chars", url, newImgSrc.length());
							img.attr("src", newImgSrc);
						}
					}
				} catch (final Exception e) {
					LOG.info("Error inlining image: {}", e.toString());
				}
			}
		}

		if (configuration.isInlineCssForHtml()) {
			final Elements links = htmlDocument.getElementsByTag("link");
			for (final Element link : links) {
				try {
					final String rel = link.attr("rel");
					if ("stylesheet".equalsIgnoreCase(rel)) {
						final String href = link.absUrl("href");
						if (href != null && !href.isEmpty()) {
							LOG.debug("Trying to inline {}", href);
							final Node newCssLink = getNewCssLink(link, baseUri);
							if (newCssLink != null) {
								LOG.debug("Inlining css link {}", link);
								link.replaceWith(newCssLink);
							}
						}
					}
				} catch (final Exception e) {
					LOG.info("Error inlining link css: {}", e.toString());
				}
			}
			final Elements styles = htmlDocument.getElementsByTag("style");
			for (final Element style : styles) {
				try {
					final String css = style.html();
					if (css == null || css.trim().isEmpty()) {
						continue;
					}
					final String newCss = resolveCss(baseUri, css);
					if (newCss != null) {
						LOG.debug("Inlining css: {} inlined chars", newCss.length());
						final DataNode data = DataNode.createFromEncoded(newCss, baseUri);
						style.html("");
						style.appendChild(data);
					}
				} catch (final Exception e) {
					LOG.info("Error inlining style css: {}", e.toString());
				}
			}
		}

		if (configuration.isInlineJsForHtml()) {
			final Elements scripts = htmlDocument.getElementsByTag("script");
			for (final Element script : scripts) {
				try {
					final String src = script.absUrl("src");
					if (src != null && !src.isEmpty()) {
						LOG.debug("Trying to inline {}", src);
						final Node newScript = getNewScript(script, baseUri);
						if (newScript != null) {
							LOG.debug("Inlining js url {}", src);
							script.replaceWith(newScript);
						}
					}
				} catch (final Exception e) {
					LOG.info("Error inlining script: {}", e.toString());
				}
			}
		}


		final byte[] htmlBytes = htmlDocument.outerHtml().getBytes(htmlDocument.outputSettings().charset());

		if (LOG.isTraceEnabled()) {
			LOG.trace("HTML after inlining:\n{}", new String(htmlBytes, htmlDocument.outputSettings().charset()));
		}

		return htmlBytes;
	}

	private String resolveCss(final String baseUri, final String css) throws URISyntaxException {
		final Matcher matcher = CSS_IMPORT_PATTERN.matcher(css);
		final StringBuilder newCss = new StringBuilder();
		int lastEnd = 0;
		while (matcher.find()) {
			final int start = matcher.start();
			if (start > 0) {
				newCss.append(css, lastEnd, start);
			}
			lastEnd = matcher.end();
			String url = matcher.group(1);
			if (url.length() > 2 && ((url.startsWith("\"") && url.endsWith("\"")) || (url.startsWith("'") && url.endsWith("'")))) {
				url = url.substring(1, url.length() - 1);
			}
			boolean replaced = false;
			final String absoluteUrl = getAbsoluteUrl(baseUri, url);
			if (absoluteUrl != null) {
				final Optional optionalContents = resourceLoader.getContents(new URI(absoluteUrl), new URI(baseUri));
				if (optionalContents.isPresent()) {
					final String contents = optionalContents.get();
					final String resolvedContents = resolveCss(absoluteUrl, contents);
					if (resolvedContents != null) {
						newCss.append(resolvedContents);
					} else {
						newCss.append(contents);
					}
					replaced = true;
				}
			}
			if (!replaced) {
				// keep the import
				newCss.append(matcher.group());
			}
		}
		if (lastEnd > 0) {
			if (lastEnd < css.length()) {
				newCss.append(css, lastEnd, css.length());
			}
			return newCss.toString();
		}
		return css;
	}

	private String getAbsoluteUrl(final String baseUri, String relUrl) {
		URL base;
		try {
			try {
				base = new URL(baseUri);
			} catch (final MalformedURLException e) {
				// the base is unsuitable, but the attribute may be abs on its own, so try that
				final URL abs = new URL(relUrl);
				return abs.toExternalForm();
			}
			// workaround: java resolves '//path/file + ?foo' to '//path/?foo', not '//path/file?foo' as desired
			if (relUrl.startsWith("?")) {
				relUrl = base.getPath() + relUrl;
			}
			final URL abs = new URL(base, relUrl);
			return abs.toExternalForm();
		} catch (final MalformedURLException e) {
			return null;
		}
	}

	private Node getNewScript(final Element script, final String baseUri) throws URISyntaxException {
		final Optional optionalJs = resourceLoader.getContents(new URI(script.absUrl("src")), new URI(baseUri));
		if (!optionalJs.isPresent()) {
			return null;
		}
		final String js = optionalJs.get();
		final String html = "";
		final Document doc = Jsoup.parse(html);
		final Node scriptNode = doc.select("script").first();
		copyType(script, scriptNode);
		return scriptNode;
	}

	private Node getNewCssLink(final Element link, final String baseUri) throws URISyntaxException {
		final String cssUri = link.absUrl("href");
		final Optional optionalCss = resourceLoader.getContents(new URI(cssUri), new URI(baseUri));
		if (!optionalCss.isPresent()) {
			return null;
		}
		String css = optionalCss.get();
		css = resolveCss(cssUri, css);
		final String html = "";
		final Document doc = Jsoup.parse(html);
		final Node styleNode = doc.select("style").first();
		copyType(link, styleNode);
		return styleNode;
	}

	private void copyType(final Node link, final Node newLink) {
		final String type = link.attr("type");
		if (type != null) {
			newLink.attr("type", type);
		}
	}

	private Optional getNewImgSrc(final String url, final String baseUri) {
		if (configuration.isInlineInternalImagesForHtml()) {
			try (final DocumentLibraryFileContents localContents = LiferayAbstractionFactory.getInstance().getDLTool().getDocumentLibraryFileContents(new URI(url))) {
				if (localContents != null) {
					return Optional.of(resourceLoader
							.getInlineImgSrc(localContents.getContentType(), localContents.getContents()));
				}
			} catch (final Exception e) {
				// no stack trace here.
				LOG.info("Error fetching local contents for inline image: {}: {}", url, e.getMessage());
			}
		}
		if (configuration.isInlineExternalImagesForHtml()) {
			try {
				return resourceLoader.getExternalInlineImgSrc(new URI(url), new URI(baseUri));
			} catch (final Exception e) {
				// no stack trace here.
				LOG.info("Error fetching external contents for inline image: {}: {}", url, e.getMessage());
			}
		}
		return Optional.empty();
	}

	@Override
	public void close() throws IOException {
		resourceLoader.close();
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy