com.seanox.pdf.Service Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of seanox-pdf-service Show documentation
PDF service for generating/rendering PDFs based on Open HTML to PDF
There is a newer version: 4.4.0
/**
 * LIZENZBEDINGUNGEN - Seanox Software Solutions ist ein Open-Source-Projekt, im
 * Folgenden Seanox Software Solutions oder kurz Seanox genannt.
 * Diese Software unterliegt der Version 2 der Apache License.
 *
 * PDF Service
 * Copyright (C) 2020 Seanox Software Solutions
 *  
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *  
 * http://www.apache.org/licenses/LICENSE-2.0
 *  
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */
package com.seanox.pdf;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.lang.annotation.Documented;
import java.lang.annotation.ElementType;
import java.lang.annotation.Retention;
import java.lang.annotation.RetentionPolicy;
import java.lang.annotation.Target;
import java.net.URI;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.Stream;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;

import org.apache.commons.lang3.StringUtils;
import org.apache.pdfbox.io.IOUtils;
import org.apache.pdfbox.io.MemoryUsageSetting;
import org.apache.pdfbox.multipdf.Overlay;
import org.apache.pdfbox.multipdf.PDFMergerUtility;
import org.apache.pdfbox.multipdf.Splitter;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.springframework.beans.factory.config.BeanDefinition;
import org.springframework.context.annotation.ClassPathScanningCandidateComponentProvider;
import org.springframework.core.type.filter.AnnotationTypeFilter;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

import com.openhtmltopdf.pdfboxout.PdfRendererBuilder;
import com.seanox.pdf.Service.Template.Resources;
import com.seanox.pdf.Service.Template.TemplateException;

/**
 * Static service for creating PDF based on templates and meta-objects.
 *
 * Examples of use:
 *  *   Service.generate(template, meta);
 *    
 *   Files.write(new File(template + ".pdf").toPath(), Service.generate(template, meta), StandardOpenOption.CREATE);
 * 
 *  
 * How it works:
 * The creation of PDFs is based on an HTML-to-PDF converter (openhtmltopdf).
 * In the first step, an HTML is created that contains all data records.
 * The HTML is based on a markup template with placeholders. A generator or
 * renderer fills the placeholders in the template and creates an
 * Single-Page-HTML as text.
 * The HTML-to-PDF converter creates the PDF from the HTML.
 * The pages are separated by CSS.

 * The service works data neutral. There is no special data object.
 * Only the key value entries in the maps and the placeholders in the template
 * determine the content.
 *  
 * Useful information:
 * Templates are based on an implementation of the {@link Template} and the
 * annoation {@link Resources}, which with {@link Resources#base()} and
 * {@link Resources#template()}) contains information about the base directory
 * of the resources (stylesheets, images, fonts, ...), as well the path of the
 * markup template with the same name.
 * It is practical if the template implementation, the markup template and any
 * template extensions (properties, ...) are stored in the same package.
 * 
 * The resources (stylesheets, images, fonts, ...) use HTML-to-PDF from the
 * ClassPath, means the base URI required by the HTML-to-PDF converter refers to
 * the ClassPath of this class. The location in the ClassPath can be defined
 * with {@link Resources#base()}.
 *  
 * About the templates
 * The template implementation takes over the rendering of the templates.
 * The implementation decides which generator, renderer, engine, ... it uses.
 * As engine {@link Generator} is used, here you can find more details.
 * The most important in short form:
 *  
 * #[palceholder]
 * Simple placeholder, global or in a section.
 *  
 * #[section[[...]]]
 * Section/Bock can contain more substructures.
 * Sections/blocks are only rendered if a corresponding map entry exists.
 *  
 * #[locale]
 * Placeholder provided by {@link Service} with the current language.
 * Available in all sections (header, content/data, footer).
 *  
 * #[page]
 * Placeholder provided by {@link Service} with the current page number.
 * Available in sections: header, footer
 *  
 * #[pages]
 * Placeholder provided by {@link Service} with the total page number.
 * Available in sections: header, footer

 * 

 * Service 3.5.1 20200316

 * Copyright (C) 2020 Seanox Software Solutions

 * Alle Rechte vorbehalten.
 *
 * @author  Seanox Software Solutions
 * @version 3.5.1 20200316
 */
public class Service {
    
    /**
     * Creates a PDF for a template and data as meta object.
     * @param  template {@link Template}
     * @param  meta     {@link Meta}
     * @return the created PDF as byte array
     * @throws TemplateException
     *     In case of unexpected errors.
     * @throws ServiceException
     *     In case of unexpected errors.
     */
    public static byte[] generate(Class template, Meta meta)
            throws ServiceException {
        
        Template instance;
        try {instance = Template.class.newInstance();
        } catch (Exception exception) {
            throw new Template.TemplateException(exception);
        }        
        
        try {return Service.generate(instance, meta);
        } catch (Exception exception) {
            throw new ServiceException(exception);
        }
    }   
    
    /**
     * Creates a PDF for a template and data as meta object.
     * @param  template {@link Template}
     * @param  meta     {@link Meta}
     * @return the created PDF as byte array
     * @throws ServiceException
     *     In case of unexpected errors.
     */
    public static byte[] generate(Template template, Meta meta)
            throws ServiceException {
        
        try {new URI(template.getBase().toString());
        } catch (Exception exception) {
            throw new Template.TemplateException("Invalid base URI", exception);
        }

        if (meta == null)
            meta = new Meta();
        try {return template.generate(meta);
        } catch (Exception exception) {
            throw new ServiceException(exception);
        }
    }

    /**
     * Meta object for creating PDFs.
     * The PDF creation is based on templates and is decoupled from the business
     * logic. The templates only know placeholders and structures.
     * Templates consist of the parts: header, data and footer.
     * These are three map objects that contain keys and values. The values can
     * be strings or maps and collections with deeper structures, comparable to
     * JSON as a nested data structure.
     */
    public static class Meta {

        /** locale */
        private Locale locale;

        /** key-value map for the header */
        private Map header;

        /** key-value map for the data */
        private Map data;
        
        /** key-value map for the footer */
        private Map footer;

        /** key-value map for the static texts */
        private Map statics;

        /** Constructor, creates a new Meta object. */
        public Meta() {
        }

        /**
         * Return value of locale.
         * @return value of locale
         */
        public Locale getLocale() {
            return this.locale;
        }

        /**
         * Set value of locale.
         * @param locale value of locale
         */
        public void setLocale(Locale locale) {
            this.locale = locale;
        }

        /**
         * Return value of header.
         * @return value of header
         */
        public Map getHeader() {
            return this.header;
        }

        /**
         * Set value of header.
         * @param header value of header
         */
        public void setHeader(Map header) {
            this.header = header;
        }

        /**
         * Return value of data.
         * @return value of data
         */
        public Map getData() {
            return this.data;
        }

        /**
         * Set value of data.
         * @param data value of data
         */
        public void setData(Map data) {
            this.data = data;
        }

        /**
         * Return value of statics.
         * @return value of statics
         */
        public Map getStatics() {
            return this.statics;
        }

        /**
         * Set value of statics.
         * @param statics value of statics
         */
        public void setStatics(Map statics) {
            this.statics = statics;
        }

        /**
         * Return value of footer.
         * @return value of footer
         */
        public Map getFooter() {
            return this.footer;
        }

        /**
         * Set value of footer.
         * @param footer value of footer
         */
        public void setFooter(Map footer) {
            this.footer = footer;
        }
        
        @Override
        protected Object clone() {
            
            Meta meta = new Meta();
            meta.data    = this.data;
            meta.footer  = this.footer;
            meta.header  = this.header;
            meta.locale  = this.locale;
            meta.statics = this.statics;
            return meta;
        }

        /**
         * Meta-Type is required during generation so that the corresponding
         * meta data (Key-Value Map) is used for the markup.
         */
        public static enum Type {

            /** Meta-Type HEADER */
            HEADER,

            /** Meta-Type DATA */
            DATA,

            /** Meta-Type FOOTER */
            FOOTER
        }
    }

    /** Abstract class for implementing templates. */
    public static abstract class Template {
        
        /**
         * Templates are based on an implementation of the
         * {@link Template} and the annoation {@link Resources}, which with
         * {@link Resources#base()} and {@link Resources#template()}) contains 
         * information about the base directory of the resources (stylesheets,
         * images, fonts, ...), as well the path of the markup template with the
         * same name.
         */
        @Documented
        @Target(ElementType.TYPE)
        @Retention(RetentionPolicy.RUNTIME)
        public @interface Resources {

            /** 
             * Base URI of the resources in the ClassPath.
             * Default value is the root in the ClassPath.
             */
            String base() default "/";

            /** 
             * Path of the markup template in the ClassPath.
             * Default value is path of the class in the ClassPath with the
             * extension 'html'.
             */
            String template() default "";
        }
        
        /** Array of template implementations detected in the ClassPath */
        private static Class