de.mklinger.qetcher.liferay.client.impl.htmlinliner.HtmlElementInliner Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of qetcher-adapter-liferay-71 Show documentation
Show all versions of qetcher-adapter-liferay-71 Show documentation
Qetcher Liferay 7.1.x Adapter
The newest version!
package de.mklinger.qetcher.liferay.client.impl.htmlinliner;
import java.io.Closeable;
import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.Optional;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jsoup.Jsoup;
import org.jsoup.nodes.DataNode;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import de.mklinger.qetcher.liferay.abstraction.DocumentLibraryFileContents;
import de.mklinger.qetcher.liferay.client.impl.abstraction.liferay71.LiferayAbstractionFactory;
/**
* @author Marc Klinger - mklinger[at]mklinger[dot]de
*/
public class HtmlElementInliner implements Closeable {
private static final Pattern CSS_IMPORT_PATTERN = Pattern.compile("@import\\s+url\\s*\\(\\s*(.*?)\\s*\\)\\s*;?", Pattern.DOTALL | Pattern.CASE_INSENSITIVE);
private static final Logger LOG = LoggerFactory.getLogger(HtmlElementInliner.class);
private final QetcherHtmlInlineConfiguration configuration;
private final ResourceLoader resourceLoader;
public HtmlElementInliner(final QetcherHtmlInlineConfiguration configuration) {
this.configuration = configuration;
this.resourceLoader = new ResourceLoaderImpl();
}
public byte[] inline(final InputStream htmlInputStream, final String baseUri) throws IOException {
final Document htmlDocument = Jsoup.parse(htmlInputStream, null, baseUri);
if (configuration.isInlineExternalImagesForHtml() || configuration.isInlineInternalImagesForHtml()) {
final Elements imgs = htmlDocument.getElementsByTag("img");
for (final Element img : imgs) {
try {
final String url = img.absUrl("src");
if (url != null) {
LOG.debug("Trying to inline {}", url);
final Optional optionalNewImgSrc = getNewImgSrc(url, baseUri);
if (optionalNewImgSrc.isPresent()) {
final String newImgSrc = optionalNewImgSrc.get();
LOG.debug("Inlining url {}: {} inlined chars", url, newImgSrc.length());
img.attr("src", newImgSrc);
}
}
} catch (final Exception e) {
LOG.info("Error inlining image: {}", e.toString());
}
}
}
if (configuration.isInlineCssForHtml()) {
final Elements links = htmlDocument.getElementsByTag("link");
for (final Element link : links) {
try {
final String rel = link.attr("rel");
if ("stylesheet".equalsIgnoreCase(rel)) {
final String href = link.absUrl("href");
if (href != null && !href.isEmpty()) {
LOG.debug("Trying to inline {}", href);
final Node newCssLink = getNewCssLink(link, baseUri);
if (newCssLink != null) {
LOG.debug("Inlining css link {}", link);
link.replaceWith(newCssLink);
}
}
}
} catch (final Exception e) {
LOG.info("Error inlining link css: {}", e.toString());
}
}
final Elements styles = htmlDocument.getElementsByTag("style");
for (final Element style : styles) {
try {
final String css = style.html();
if (css == null || css.trim().isEmpty()) {
continue;
}
final String newCss = resolveCss(baseUri, css);
if (newCss != null) {
LOG.debug("Inlining css: {} inlined chars", newCss.length());
final DataNode data = DataNode.createFromEncoded(newCss, baseUri);
style.html("");
style.appendChild(data);
}
} catch (final Exception e) {
LOG.info("Error inlining style css: {}", e.toString());
}
}
}
if (configuration.isInlineJsForHtml()) {
final Elements scripts = htmlDocument.getElementsByTag("script");
for (final Element script : scripts) {
try {
final String src = script.absUrl("src");
if (src != null && !src.isEmpty()) {
LOG.debug("Trying to inline {}", src);
final Node newScript = getNewScript(script, baseUri);
if (newScript != null) {
LOG.debug("Inlining js url {}", src);
script.replaceWith(newScript);
}
}
} catch (final Exception e) {
LOG.info("Error inlining script: {}", e.toString());
}
}
}
final byte[] htmlBytes = htmlDocument.outerHtml().getBytes(htmlDocument.outputSettings().charset());
if (LOG.isTraceEnabled()) {
LOG.trace("HTML after inlining:\n{}", new String(htmlBytes, htmlDocument.outputSettings().charset()));
}
return htmlBytes;
}
private String resolveCss(final String baseUri, final String css) throws URISyntaxException {
final Matcher matcher = CSS_IMPORT_PATTERN.matcher(css);
final StringBuilder newCss = new StringBuilder();
int lastEnd = 0;
while (matcher.find()) {
final int start = matcher.start();
if (start > 0) {
newCss.append(css, lastEnd, start);
}
lastEnd = matcher.end();
String url = matcher.group(1);
if (url.length() > 2 && ((url.startsWith("\"") && url.endsWith("\"")) || (url.startsWith("'") && url.endsWith("'")))) {
url = url.substring(1, url.length() - 1);
}
boolean replaced = false;
final String absoluteUrl = getAbsoluteUrl(baseUri, url);
if (absoluteUrl != null) {
final Optional optionalContents = resourceLoader.getContents(new URI(absoluteUrl), new URI(baseUri));
if (optionalContents.isPresent()) {
final String contents = optionalContents.get();
final String resolvedContents = resolveCss(absoluteUrl, contents);
if (resolvedContents != null) {
newCss.append(resolvedContents);
} else {
newCss.append(contents);
}
replaced = true;
}
}
if (!replaced) {
// keep the import
newCss.append(matcher.group());
}
}
if (lastEnd > 0) {
if (lastEnd < css.length()) {
newCss.append(css, lastEnd, css.length());
}
return newCss.toString();
}
return css;
}
private String getAbsoluteUrl(final String baseUri, String relUrl) {
URL base;
try {
try {
base = new URL(baseUri);
} catch (final MalformedURLException e) {
// the base is unsuitable, but the attribute may be abs on its own, so try that
final URL abs = new URL(relUrl);
return abs.toExternalForm();
}
// workaround: java resolves '//path/file + ?foo' to '//path/?foo', not '//path/file?foo' as desired
if (relUrl.startsWith("?")) {
relUrl = base.getPath() + relUrl;
}
final URL abs = new URL(base, relUrl);
return abs.toExternalForm();
} catch (final MalformedURLException e) {
return null;
}
}
private Node getNewScript(final Element script, final String baseUri) throws URISyntaxException {
final Optional optionalJs = resourceLoader.getContents(new URI(script.absUrl("src")), new URI(baseUri));
if (!optionalJs.isPresent()) {
return null;
}
final String js = optionalJs.get();
final String html = "";
final Document doc = Jsoup.parse(html);
final Node scriptNode = doc.select("script").first();
copyType(script, scriptNode);
return scriptNode;
}
private Node getNewCssLink(final Element link, final String baseUri) throws URISyntaxException {
final String cssUri = link.absUrl("href");
final Optional optionalCss = resourceLoader.getContents(new URI(cssUri), new URI(baseUri));
if (!optionalCss.isPresent()) {
return null;
}
String css = optionalCss.get();
css = resolveCss(cssUri, css);
final String html = "";
final Document doc = Jsoup.parse(html);
final Node styleNode = doc.select("style").first();
copyType(link, styleNode);
return styleNode;
}
private void copyType(final Node link, final Node newLink) {
final String type = link.attr("type");
if (type != null) {
newLink.attr("type", type);
}
}
private Optional getNewImgSrc(final String url, final String baseUri) {
if (configuration.isInlineInternalImagesForHtml()) {
try (final DocumentLibraryFileContents localContents = LiferayAbstractionFactory.getInstance().getDLTool().getDocumentLibraryFileContents(new URI(url))) {
if (localContents != null) {
return Optional.of(resourceLoader
.getInlineImgSrc(localContents.getContentType(), localContents.getContents()));
}
} catch (final Exception e) {
// no stack trace here.
LOG.info("Error fetching local contents for inline image: {}: {}", url, e.getMessage());
}
}
if (configuration.isInlineExternalImagesForHtml()) {
try {
return resourceLoader.getExternalInlineImgSrc(new URI(url), new URI(baseUri));
} catch (final Exception e) {
// no stack trace here.
LOG.info("Error fetching external contents for inline image: {}: {}", url, e.getMessage());
}
}
return Optional.empty();
}
@Override
public void close() throws IOException {
resourceLoader.close();
}
}