All Downloads are FREE. Search and download functionalities are using the official Maven repository.

cn.afterturn.easypoi.pdf.imports.PdfImportService Maven / Gradle / Ivy

package cn.afterturn.easypoi.pdf.imports;

import cn.afterturn.easypoi.entity.BaseTypeConstants;
import cn.afterturn.easypoi.excel.annotation.ExcelTarget;
import cn.afterturn.easypoi.excel.entity.params.ExcelCollectionParams;
import cn.afterturn.easypoi.excel.entity.params.ExcelImportEntity;
import cn.afterturn.easypoi.excel.entity.result.ExcelImportResult;
import cn.afterturn.easypoi.excel.imports.base.ImportBaseService;
import cn.afterturn.easypoi.exception.excel.ExcelImportException;
import cn.afterturn.easypoi.exception.excel.enums.ExcelImportEnum;
import cn.afterturn.easypoi.pdf.entity.PdfImportParams;
import cn.afterturn.easypoi.util.PoiPublicUtil;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.builder.ReflectionToStringBuilder;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.poi.ss.usermodel.Row;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import technology.tabula.*;
import technology.tabula.extractors.ExtractionAlgorithm;
import technology.tabula.extractors.SpreadsheetExtractionAlgorithm;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.InputStream;
import java.lang.reflect.Field;
import java.util.*;

/**
 * Excel 导入服务
 * 参考 https://blog.csdn.net/gengzhy/article/details/128386973
 *
 * @author JueYue
 * @dete 2023年7月17日
 */
public class PdfImportService extends ImportBaseService {

    private static Logger LOGGER = LoggerFactory.getLogger(PdfImportService.class);
    private PDDocument document;
    private ExtractionAlgorithm algorithm =  new SpreadsheetExtractionAlgorithm();

    /**
     * Excel 导入 field 字段类型 Integer,Long,Double,Date,String,Boolean
     */
    public ExcelImportResult importExcelByIs(InputStream inputstream, Class pojoClass,
                                             PdfImportParams params, boolean needMore) throws Exception {
        if (LOGGER.isDebugEnabled()) {
            LOGGER.debug("Excel import start ,class is {}", pojoClass);
        }
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        byte[] buffer = new byte[1024];
        int len;
        while ((len = inputstream.read(buffer)) > -1) {
            baos.write(buffer, 0, len);
        }
        baos.flush();
        InputStream userIs = new ByteArrayInputStream(baos.toByteArray());
        if (LOGGER.isDebugEnabled()) {
            LOGGER.debug("Excel clone success");
        }
        return importExcelByIs(PDDocument.load(userIs), pojoClass, params, needMore);
    }

    /**
     * Excel 导入 field 字段类型 Integer,Long,Double,Date,String,Boolean
     */
    public ExcelImportResult importExcelByIs(PDDocument pdDocument, Class pojoClass,
                                             PdfImportParams params, boolean needMore) throws Exception {
        if (LOGGER.isDebugEnabled()) {
            LOGGER.debug("Excel import start ,class is {}", pojoClass);
        }
        List result = new ArrayList();
        ExcelImportResult importResult;
        String targetId = null;
        Field[] fileds = PoiPublicUtil.getClassFields(pojoClass);
        ExcelTarget etarget = pojoClass.getAnnotation(ExcelTarget.class);
        if (etarget != null) {
            targetId = etarget.value();
        }
        Map excelParams = new HashMap<>();
        List excelCollection = new ArrayList<>();
        getAllExcelField("", fileds, excelParams, excelCollection, pojoClass, null, null);
        document = pdDocument;
        if (LOGGER.isDebugEnabled()) {
            LOGGER.debug("Document create success");
        }
        importResult = new ExcelImportResult();
        if (LOGGER.isDebugEnabled()) {
            LOGGER.debug(" start to read excel by is ,startTime is {}", new Date());
        }
        importExcel(result, document, pojoClass, params, excelParams, excelCollection);
        if (LOGGER.isDebugEnabled()) {
            LOGGER.debug(" end to read excel list by sheet ,endTime is {}", new Date());
        }
        if (params.isReadSingleCell()) {
            if (LOGGER.isDebugEnabled()) {
                LOGGER.debug(" read Key-Value ,endTime is {}", System.currentTimeMillis());
            }
        }

        importResult.setList(result);

        return importResult;
    }

    private Collection importExcel(List result, PDDocument document, Class pojoClass, PdfImportParams params, Map excelParams, List excelCollection) throws Exception {
        // 读取表格
        List> maps = new ArrayList<>();
        PageIterator pi = new ObjectExtractor(document).extract();
        Map cellMap;
        int rowNum = 0;// 行计数器
        int pageNum = 0;// 页码计数器
        Map titlemap = null;
        while (pi.hasNext()) {
            Page page = pi.next();
            if (pageNum++ < 1) {
                // 第一页获取一下表头
                List tables = (List
) algorithm.extract(page); for (Table table : tables) { List> rows = table.getRows(); if (rows.size() <= params.getHeadRows()) { throw new RuntimeException("标题行不正确"); } for (List row : rows) { //跳过表头行 if (rowNum < params.getTitleRows()) { rowNum++; continue; } // 拿到标题行 if (rowNum < params.getTitleRows() + params.getHeadRows()) { rowNum++; titlemap = getTitleMap(row, params, excelCollection, excelParams); continue; } //跳过无用行数 if (rowNum < params.getTitleRows() + params.getHeadRows() + params.getStartRows()) { rowNum++; continue; } Object object = PoiPublicUtil.createObject(pojoClass, ""); for (int k = 0; k < row.size(); k++) { RectangularTextContainer cell = row.get(k); String cellText; if (params.getCellHandler() != null){ cellText = params.getCellHandler().getValue(cell); } else { cellText = cell.getText(); cellText = cellText == null ? "" : cellText.trim(); } String titleString = (String) titlemap.get(k); if (excelParams.containsKey(titleString)) { setValues(excelParams.get(titleString), object, cellText); } } result.add(object); rowNum++; } } } else { List
tables = (List
) algorithm.extract(page); for (Table table : tables) { List> rows = table.getRows(); for (List row : rows) { Object object = PoiPublicUtil.createObject(pojoClass, ""); for (int k = 0; k < row.size(); k++) { RectangularTextContainer cell = row.get(k); String cellText = cell.getText(); cellText = cellText == null ? "" : cellText.trim(); String titleString = (String) titlemap.get(k); if (excelParams.containsKey(titleString)) { setValues(excelParams.get(titleString), object, cellText); } } result.add(object); rowNum++; } } } } return result; } private Map getTitleMap(List row, PdfImportParams params, List excelCollection, Map excelParams) { Map titlemap = new LinkedHashMap(); String collectionName = null; for (int k = 0; k < row.size(); k++) { RectangularTextContainer cell = row.get(k); String cellText = cell.getText(); String value = cellText == null ? "" : cellText.trim(); value = value.replace("\n", ""); titlemap.put(k, value); } // 处理指定列的情况 Set keys = excelParams.keySet(); for (String key : keys) { if (key.startsWith("FIXED_")) { String[] arr = key.split("_"); titlemap.put(Integer.parseInt(arr[1]), key); } } return titlemap; } public void setAlgorithm(ExtractionAlgorithm algorithm) { this.algorithm = algorithm; } }