All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.janeluo.easypdf.TextParserDocHandler Maven / Gradle / Ivy

The newest version!
/* Copyright (c) 2021 janeluo
 * easy-pdf is licensed under Mulan PSL v2.
 * You can use this software according to the terms and conditions of the Mulan PSL v2.
 * You may obtain a copy of Mulan PSL v2 at:
 *          http://license.coscl.org.cn/MulanPSL2
 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
 * See the Mulan PSL v2 for more details.
 */
package com.janeluo.easypdf;


import com.alibaba.fastjson.JSONObject;
import com.itextpdf.kernel.geom.PageSize;
import com.janeluo.easypdf.enums.DocType;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.io.IOUtils;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.EmptyStackException;
import java.util.List;
import java.util.Stack;

/**
 * 解析 XML 模板,并生成 PDF 文件
 *
 * @author janeluo
 */
@Slf4j
public class TextParserDocHandler extends DefaultHandler {
    public static final String[] BLOCK_ELEMENTS = {
            "title", "chapter", "section", "para",
            "pagebreak", "table"
    };


    // 页面大小常数定义
    private final Object[][] pageSizeMap = {
            {"a0", PageSize.A0}, {"a1", PageSize.A1},
            {"a2", PageSize.A2}, {"a3", PageSize.A3},
            {"a4", PageSize.A4}, {"a5", PageSize.A5},
            {"a6", PageSize.A6}, {"a7", PageSize.A7},
            {"a8", PageSize.A8}, {"a9", PageSize.A9},
            {"a10", PageSize.A10},

            {"b0", PageSize.B0}, {"b1", PageSize.B1},
            {"b2", PageSize.B2}, {"b3", PageSize.B3},
            {"b4", PageSize.B4}, {"b5", PageSize.B5},
            {"b6", PageSize.B6}, {"b7", PageSize.B7},
            {"b8", PageSize.B8}, {"b9", PageSize.B9},
            {"b10", PageSize.B10},
    };

    private final TextParser parser;
    private final TextDoc textDoc;
    private final List chunkList;
    private final Stack chunkStack;
    private final StringBuilder contentsBuilder;
    private JSONObject jsonData;
    private TextTable table = null;

    public TextParserDocHandler(TextParser parser, DocType docType) throws IOException {
        chunkList = new ArrayList<>();
        chunkStack = new Stack<>();
        contentsBuilder = new StringBuilder();

        this.parser = parser;

        switch (docType) {
            case DPF:
                textDoc = new PDFDoc(parser.templateStream, parser.outStream, parser.fontFamily);
                break;

            case HTML:
                textDoc = new HTMLDoc(parser.outStream);
                HTMLDoc textDoc = (HTMLDoc) this.textDoc;
                textDoc.setLinkPaths(parser.cssPaths, parser.jsPaths);
                if (parser.htmlDeclare != null) {
                    textDoc.setDeclare(parser.htmlDeclare);
                }
                if (parser.htmlExtra != null) {
                    textDoc.setExtra(parser.htmlExtra);
                }
                break;
            default:
                log.error("Document type unsupported.");
                throw new IOException("Document type unsupported.");
        }

        if (parser.outputEncoding != null) {
            textDoc.setEncoding(parser.outputEncoding);
        }
    }

    /**
     * 文档开始解析时回调
     */
    @Override
    public void startDocument() throws SAXException {
        if (log.isDebugEnabled()) {
            log.debug("解析文件开始");
        }
        try {
            if (parser.jsonStream != null) {
                final String jsonStr = IOUtils.toString(parser.jsonStream, StandardCharsets.UTF_8);

                JSONObject jsonObject = JSONObject.parseObject(jsonStr);

                if (textDoc instanceof PDFDoc) {
                    if (!jsonObject.containsKey("data")) {
                        if (log.isErrorEnabled()) {
                            log.error("JSON source missing 'data' key, please check!");
                        } else {
                            System.err.println(
                                    "JSON source missing 'data' key, please check!");
                        }
                    } else {
                        Object value = jsonObject.get("data");
                        if (!(value instanceof JSONObject)) {
                            if (log.isErrorEnabled()) {
                                log.error("JSON 'data' must be a object.");
                            } else {
                                System.err.println("JSON 'data' must be a object.");
                            }
                        } else {
                            jsonData = (JSONObject) value;
                        }
                    }
                } else if (textDoc instanceof HTMLDoc) {
                    ((HTMLDoc) textDoc).setJSONObject(jsonObject);
                }
            }
        } catch (Exception ex) {
            ex.printStackTrace();
            throw new SAXException("Failed to parse JSON stream");
        }
    }

    /**
     * 文档解析结束时回调
     */
    @Override
    public void endDocument() {
    }


    private void setupPage(Attributes attrs) {
        // 页面大小
        String value = attrs.getValue("size");
        if (value != null) {
            for (Object[] item : pageSizeMap) {
                if (value.equalsIgnoreCase((String) item[0])) {
                    textDoc.setPageSize((PageSize) item[1]);
                    break;
                }
            }
        }

        // 页面边距
        value = attrs.getValue("margin");
        if (value != null) {
            String[] array = value.split(",");
            if (array.length < 4) {

                if (log.isErrorEnabled()) {
                    log.error("Page margin format error.");
                } else {
                    System.err.println("Page margin format error.");
                }
            } else {
                try {
                    textDoc.setPageMargin(
                            Integer.parseInt(array[0].trim()),
                            Integer.parseInt(array[1].trim()),
                            Integer.parseInt(array[2].trim()),
                            Integer.parseInt(array[3].trim()));
                } catch (Exception ex) {
                    if (log.isErrorEnabled()) {
                        log.error("Page margin format error.");
                    } else {
                        System.err.println("Page margin format error.");
                    }
                }
            }
        }
    }

    /**
     * 元素开始时回调
     */
    @Override
    public void startElement(String namespaceUri,
                             String localName, String qName, Attributes attrs)
            throws SAXException {
        if (log.isTraceEnabled()) {
            log.trace(">>>>>> start qName:[{}]", qName);
        }
        TextChunk prevChunk = null;

        if ("easy-pdf".equalsIgnoreCase(qName)) {
            if (textDoc.isOpen()) {
                throw new SAXException("'textpdf' must be root element.");
            }
            if (!textDoc.open()) {
                throw new SAXException("Open document failed.");
            }
            return;
        }

        if (!textDoc.isOpen()) {
            throw new SAXException("Document unopen yet. "
                    + "check your xml root element is 'textpdf'");
        }

        // Block 元素不可嵌套
        for (String label : BLOCK_ELEMENTS) {
            if (label.equalsIgnoreCase(qName)) {
                chunkList.clear();
                break;
            }
        }

        if ("table".equalsIgnoreCase(qName)) {
            table = new TextTable();
            table.addAttrs(attrs);
            return;
        }
        if (table != null) {
            if (!"cell".equalsIgnoreCase(qName)) {
                throw new SAXException(qName + " is not child of table");
            }
            TextChunk chunk = new TextChunk();
            chunk.addAttrs(attrs);
            table.addCell(chunk);
            contentsBuilder.setLength(0);
            return;
        }

        if ("page".equalsIgnoreCase(qName)) {
            setupPage(attrs);
            textDoc.newPage();
            return;
        }
        if ("hrule".equalsIgnoreCase(qName)) {
            textDoc.addHrule(attrs);
            return;
        }
        if ("img".equalsIgnoreCase(qName)) {
            textDoc.addImage(attrs);
            return;
        }

        try {
            prevChunk = chunkStack.peek();
            String contents = contentsBuilder.toString();
            if (contents.length() > 0) {
                prevChunk.setContents(contents);
                contentsBuilder.setLength(0);
                chunkList.add(prevChunk.clone());
            }
        } catch (EmptyStackException ignored) {
        }

        TextChunk chunk = new TextChunk();
        if (prevChunk != null) {
            chunk.addAttrs(prevChunk.getAttrs());
        }
        chunk.addAttrs(attrs);

        if ("value".equalsIgnoreCase(qName)) {
            chunk.setIsValue(true);

            String id = attrs.getValue("id");
            if (id == null) {

                if (log.isErrorEnabled()) {
                    log.error("Value element missing 'id' attribute.");
                } else {
                    System.err.println("Value element missing 'id' attribute.");
                }
            } else {
                if (textDoc instanceof PDFDoc) {
                    if (jsonData != null) {
                        if (!jsonData.containsKey(id)) {
                            if (log.isErrorEnabled()) {
                                log.error("JSON data key '" + id
                                        + "' not found!");
                            } else {
                                System.err.println("JSON data key '" + id
                                        + "' not found!");
                            }

                        } else {
                            Object value = jsonData.get(id);
                            if (!(value instanceof String)) {
                                if (log.isErrorEnabled()) {
                                    log.error("JSON  data key '" + id
                                            + "' must has a string value.");
                                } else {
                                    System.err.println("JSON  data key '" + id
                                            + "' must has a string value.");
                                }
                            } else {
                                contentsBuilder.append(value);
                                if (attrs.getValue("font-style") == null) {
                                    chunk.addAttr("font-style", "bold,underline");
                                }
                            }
                        }
                    }
                }
            }
        } else if ("hspace".equalsIgnoreCase(qName)) {
            String value = attrs.getValue("size");
            if (value == null || value.length() == 0) {
                if (log.isErrorEnabled()) {
                    log.error("hspace need a size attribute.");
                } else {
                    System.err.println("hspace need a size attribute.");
                }
            } else {
                try {
                    int size = Integer.parseInt(value);
                    for (int i = 0; i < size; i++) {
                        contentsBuilder.append(' ');
                    }
                } catch (Exception ex) {
                    if (log.isErrorEnabled()) {
                        log.error("size attribute need a integer value", ex);
                    } else {
                        System.err.println("size attribute need a integer value");
                    }
                }
            }
        }
        chunkStack.push(chunk);
    }

    /**
     * 标签字符串处理
     */
    @Override
    public void characters(char[] ch, int start, int length) {
        String contents = new String(ch, start, length);
        contentsBuilder.append(
                contents.replaceAll("\\s*\n+\\s*", "").trim());
    }

    /**
     * 元素结束时回调
     */
    @Override
    public void endElement(String namespaceUri,
                           String localName, String qName) throws SAXException {
        if (log.isTraceEnabled()) {
            log.info(">>>>>> end qName:[{}]", qName);
        }

        if ("easy-pdf".equalsIgnoreCase(qName)) {
            textDoc.close();
            return;
        }
        if ("pagebreak".equalsIgnoreCase(qName)) {
            textDoc.newPage();
            return;
        }
        if ("break".equalsIgnoreCase(qName)) {
            contentsBuilder.append("\n");
            return;
        }

        if ("cell".equalsIgnoreCase(qName)) {
            TextChunk chunk = table.lastCell();
            chunk.setContents(contentsBuilder.toString());
        }
        if ("table".equalsIgnoreCase(qName)) {
            if (table.getCells().size() > 0) {
                try {
                    textDoc.writeTable(table);
                } catch (IOException e) {
                    throw new SAXException(e);
                }
            }
            contentsBuilder.setLength(0);
            table = null;
            return;
        }

        TextChunk chunk = null;
        try {
            chunk = chunkStack.pop();
        } catch (Exception ignored) {
        }

        if (chunk == null) {
            return;
        }
        String contents = contentsBuilder.toString();
        if (contents.length() > 0 ||
                "value".equalsIgnoreCase(qName) ||
                "hspace".equalsIgnoreCase(qName)) {
            chunk.setContents(contents);
            contentsBuilder.setLength(0);
            chunkList.add(chunk.clone());
        }

        for (String label : BLOCK_ELEMENTS) {
            // 空段落,需要增加一个空 TextChunk 对象去模拟空段落
            if (chunkList.size() == 0 && "para".equalsIgnoreCase(label)) {
                chunk.setContents(" ");
                chunkList.add(chunk.clone());
            }

            if (chunkList.size() > 0) {
                if (label.equalsIgnoreCase(qName)) {
                    try {
                        textDoc.writeBlock(qName, chunkList);
                    } catch (Exception e) {
                        e.printStackTrace();
                        throw new SAXException("Write to PDF failed.");
                    } finally {
                        chunkList.clear();
                    }
                    break;
                }
            }
        }
    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy