All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.day.cq.dam.handler.standard.pdf.PdfHandler Maven / Gradle / Ivy

/*
 * Copyright 1997-2008 Day Management AG
 * Barfuesserplatz 6, 4001 Basel, Switzerland
 * All Rights Reserved.
 *
 * This software is the confidential and proprietary information of
 * Day Management AG, ("Confidential Information"). You shall not
 * disclose such Confidential Information and shall use it only in
 * accordance with the terms of the license agreement you entered into
 * with Day.
 */
package com.day.cq.dam.handler.standard.pdf;

import java.awt.Dimension;
import java.awt.image.BufferedImage;
import java.awt.image.DataBufferByte;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.RandomAccessFile;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Dictionary;
import java.util.Iterator;
import java.util.List;

import javax.imageio.ImageIO;
import javax.jcr.Node;

import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.felix.scr.annotations.Activate;
import org.apache.felix.scr.annotations.Component;
import org.apache.felix.scr.annotations.Property;
import org.apache.felix.scr.annotations.Reference;
import org.apache.felix.scr.annotations.ReferencePolicy;
import org.apache.felix.scr.annotations.ReferencePolicyOption;
import org.apache.felix.scr.annotations.Service;
import org.apache.sling.api.resource.PersistenceException;
import org.apache.sling.api.resource.Resource;
import org.apache.sling.api.resource.ResourceResolver;
import org.apache.sling.commons.contentdetection.ContentAwareMimeTypeService;
import org.apache.sling.commons.osgi.PropertiesUtil;
import org.jpedal.jbig2.JBIG2Decoder;
import org.jpedal.jbig2.JBIG2Exception;
import org.osgi.service.component.ComponentContext;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.adobe.granite.asset.api.AssetException;
import com.adobe.granite.asset.api.AssetManager;
import com.adobe.granite.asset.api.AssetRelation;
import com.adobe.internal.io.ByteReader;
import com.adobe.internal.io.ByteWriter;
import com.adobe.internal.io.InputStreamByteReader;
import com.adobe.internal.io.RandomAccessFileByteWriter;
import com.adobe.internal.pdftoolkit.core.exceptions.PDFException;
import com.adobe.internal.pdftoolkit.core.exceptions.PDFIOException;
import com.adobe.internal.pdftoolkit.core.exceptions.PDFInvalidDocumentException;
import com.adobe.internal.pdftoolkit.core.exceptions.PDFSecurityException;
import com.adobe.internal.pdftoolkit.core.filter.FilterParams;
import com.adobe.internal.pdftoolkit.core.filter.spi.CustomDecodeFilter;
import com.adobe.internal.pdftoolkit.core.filter.spi.CustomFilter;
import com.adobe.internal.pdftoolkit.core.filter.spi.CustomFilterException;
import com.adobe.internal.pdftoolkit.core.fontset.PDFFontSet;
import com.adobe.internal.pdftoolkit.core.types.ASName;
import com.adobe.internal.pdftoolkit.pdf.document.PDFDocument;
import com.adobe.internal.pdftoolkit.pdf.document.PDFEncryptionType;
import com.adobe.internal.pdftoolkit.pdf.document.PDFOpenOptions;
import com.adobe.internal.pdftoolkit.pdf.document.PDFSaveFullOptions;
import com.adobe.internal.pdftoolkit.pdf.graphics.PDFRectangle;
import com.adobe.internal.pdftoolkit.pdf.graphics.optionalcontent.PDFOCBaseState;
import com.adobe.internal.pdftoolkit.pdf.graphics.optionalcontent.PDFOCConfig;
import com.adobe.internal.pdftoolkit.pdf.graphics.optionalcontent.PDFOCGroup;
import com.adobe.internal.pdftoolkit.pdf.graphics.optionalcontent.PDFOCGroupArray;
import com.adobe.internal.pdftoolkit.pdf.graphics.optionalcontent.PDFOCProperties;
import com.adobe.internal.pdftoolkit.pdf.page.PDFPage;
import com.adobe.internal.pdftoolkit.pdf.page.PDFPageTree;
import com.adobe.internal.pdftoolkit.services.manipulations.PMMOptions;
import com.adobe.internal.pdftoolkit.services.manipulations.PMMService;
import com.adobe.internal.pdftoolkit.services.rasterizer.PageRasterizer;
import com.adobe.internal.pdftoolkit.services.rasterizer.RasterizationOptions;
import com.adobe.internal.pdftoolkit.services.security.SecurityKeyPassword;
import com.adobe.internal.pdftoolkit.services.xmp.Metadata;
import com.adobe.internal.pdftoolkit.services.xmp.XMPService;
import com.day.cq.dam.api.Asset;
import com.day.cq.dam.api.AssetReferenceResolver;
import com.day.cq.dam.api.DamConstants;
import com.day.cq.dam.api.Rendition;
import com.day.cq.dam.api.metadata.ExtractedMetadata;
import com.day.cq.dam.commons.handler.AbstractAssetHandler;
import com.day.cq.dam.handler.gibson.fontmanager.FontManagerService;
import com.day.image.Layer;

/**
 * The PdfHandler class ...
 * handles pdfs and illustrator files.
 */
@Component(inherit = true, metatype = true)
@Service
public class PdfHandler extends AbstractAssetHandler {

    /**
     * the default logger
     */
    private static final Logger log = LoggerFactory.getLogger(PdfHandler.class);

    @Reference(
            policy = ReferencePolicy.STATIC,
            policyOption = ReferencePolicyOption.GREEDY // ensures service-resolution to the one ranked highest
            )
    protected ContentAwareMimeTypeService mimeTypeService;

    @Property(boolValue = false, name = "raster.annotation")
    private static final String RASTER_ANNOTATION = "raster.annotation";

    /**
     * Mime type
     */
    public static final String CONTENT_MIMETYPE = "application/pdf";
    
    /** 
     * mime types for illustrator files
     */
    private static final String AI_MIMETYPE_1 = "application/postscript";
    private static final String AI_MIMETYPE_2 = "application/illustrator";

    /**
     * 1 PDF unit equals 1/72 of an inch
     */
    private static final double PDF_UNITS_PER_INCH = 72;

    @Reference
    private FontManagerService fontManagerService;
    
    @Reference(policy = ReferencePolicy.STATIC)
    private AssetReferenceResolver caRefResolver;

    protected boolean rasterAnn;
    protected int maxPages;

    // ----------------------< AssetHandler >-----------------------------------
    /**
     * {@inheritDoc}
     */
    public ExtractedMetadata extractMetadata(final Asset asset) {
        ExtractedMetadata metadata = new ExtractedMetadata();
        log.debug("extractMetadata: importing asset [{}]", asset.getPath());

        InputStream is = null;
        ByteReader byteReader = null;
        PDFDocument pdfDoc = null;

        try {
            is = asset.getOriginal().getStream();
            byteReader = new InputStreamByteReader(is);
            pdfDoc = PDFDocument.newInstance(byteReader, PDFOpenOptions.newInstance());

            boolean canRead = true;
            if (pdfDoc.isEncrypted()) {
                try {
                    unlock(pdfDoc);
                } catch (Exception e) {
                    log.warn("extractMetadata: unable to decrypt document [{}]: ", asset.getPath(), e);
                    canRead = false;
                }
            }

            if (canRead) {
                Metadata pdfMetadata = XMPService.getDocumentMetadata(pdfDoc);
                
                try {
                	// find no. of pages by reading the pdfDoc
                    PDFPageTree pdfPageTree = pdfDoc.requirePages();
                    int numPages = pdfPageTree.getNumPages();
                    metadata.setMetaDataProperty("numPages", numPages);

                    // write page width and height of the first page in inches
                    if (numPages > 0) {
                        PDFRectangle cropBox = pdfPageTree.getPage(0).getCropBox();
                        if ((cropBox != null) && (cropBox.width() > 0) && (cropBox.height() > 0)) {
                            metadata.setMetaDataProperty("Physical width in inches", cropBox.width() / PDF_UNITS_PER_INCH);
                            metadata.setMetaDataProperty("Physical height in inches", cropBox.height() / PDF_UNITS_PER_INCH);
                        }
                    }
                }
                catch(Exception e) {
                    log.error("extractMetadata: error extracting number of pages in the pdf ", asset.getPath(), e);
                }

                // set pdf properties
                if (pdfMetadata.getAuthor() != null) {
                    metadata.setMetaDataProperty("Author", pdfMetadata.getAuthor());
                }
                if (pdfMetadata.getCreationDate() != null) {
                    metadata.setMetaDataProperty("CreationDate", pdfMetadata.getCreationDate());
                }
                if (pdfMetadata.getModificationDate() != null) {
                    metadata.setMetaDataProperty("ModificationDate", pdfMetadata.getModificationDate());
                }
                if (pdfMetadata.getProducer() != null) {
                    metadata.setMetaDataProperty("Producer", pdfMetadata.getProducer());
                }
                if (pdfMetadata.getSubject() != null) {
                    metadata.setMetaDataProperty("Subject", pdfMetadata.getSubject());
                }
                if (pdfMetadata.getTitle() != null) {
                    metadata.setMetaDataProperty("Title", pdfMetadata.getTitle());
                }
                if (pdfMetadata.getTrapped() != null) {
                    metadata.setMetaDataProperty("Trapped", pdfMetadata.getTrapped());
                }

                // xmp
                try {
                    ByteArrayOutputStream out = new ByteArrayOutputStream();
                    pdfMetadata.exportXMP(out);
                    metadata.setXmp(new ByteArrayInputStream(out.toByteArray()));
                } catch (Exception e) {
                    log.error("extractMetadata: cannot import embedded XMP data for document [{}]: ", asset.getPath(), e);
                    log.warn("Failed to set xmp metadata for the asset {}, trying to extract using DefaultFormatHandler", asset.getPath());

                    execGenericProcessor(asset.getOriginal().getStream(), metadata);
                }
            }

        } catch (IOException e) {
            log.warn("extractMetadata: error while extracting metadata from PDF [{}]: ", asset.getPath(), e);
        } catch (PDFException e) {
            log.warn("extractMetadata: error while extracting metadata from PDF [{}]: ", asset.getPath(), e);
        } finally {
            if (pdfDoc != null) {
                try {
                    pdfDoc.close();
                } catch (Exception e) {
                    log.debug("extractMetadata: error while closing PDF document [{}]: ", asset.getPath(), e);
                }
            }
            if (byteReader != null) {
                try {
                    byteReader.close();
                } catch (IOException e) {
                    log.debug("extractMetadata: error while closing byteReader [{}]: ", asset.getPath(), e);
                }
            }
            if (is != null) {
                try {
                    is.close();
                } catch (IOException e) {
                    log.debug("extractMetadata: error while closing inputStream [{}]: ", asset.getPath(), e);
                }
            }
        }

        setMimetype(metadata, asset);
        return metadata;
    }

    @Activate
    protected void activate(ComponentContext componentContext) {
        final Dictionary properties = componentContext.getProperties();
        rasterAnn = PropertiesUtil.toBoolean(properties.get(RASTER_ANNOTATION),
            false);

    }

    /**
     * Unlocks the pdf with the defaultKey obtained from the
     * SecurityKeyPassword.
     * 
     * @throws PDFSecurityException
     * @throws PDFIOException
     * @throws PDFInvalidDocumentException
     */
    private void unlock(PDFDocument pdfDoc) throws PDFInvalidDocumentException, PDFIOException, PDFSecurityException {
        PDFEncryptionType encryptionType = pdfDoc.getEncryptionType();
        if (encryptionType.equals(PDFEncryptionType.Password)) {
            SecurityKeyPassword defaultKey = SecurityKeyPassword.newInstance(pdfDoc);
            pdfDoc.unlock(defaultKey);
        }

    }

    private RandomAccessFile getRandomAccessFileForWriting(File file, boolean create) throws IOException {
	    RandomAccessFile raf = null;
	
	    if (create) {
	        file.delete();
	        if (file.exists()) {
	            throw new IOException("Failed to delete pre-existing file: \"" + file.getAbsolutePath() + "\".");
	        }
	        File parent = file.getParentFile();
	        if (parent != null) {
	            parent.mkdirs();
	        }
	        file.createNewFile();
	    }
	    raf = new RandomAccessFile(file, "rw");
	
	    return raf;
	}

	public ByteWriter getRAFByteWriter(File file) throws IOException {
        RandomAccessFile raf = getRandomAccessFileForWriting(file, true);
        ByteWriter byteWriter = new RandomAccessFileByteWriter(raf);
        return byteWriter;
    }

    /**
     * {@inheritDoc}
     */
    public boolean canHandleSubAssets() {
        return true;
    }

    /**
     * {@inheritDoc}
     */
    public List processSubAssets(final Asset asset) {
        List subAssets = new ArrayList();
        if (asset.isSubAsset()) {
            // we do not continue processing here, otherwise we would enter an
            // endless processing stack
            return subAssets;
        }

        InputStream is = null;
        PDFDocument pdfDoc = null;

        try {
            is = asset.getOriginal().getStream();
            pdfDoc = parseDocument(is);
            boolean canRead = true;
            if (pdfDoc.isEncrypted()) {
                try {
                    unlock(pdfDoc);
                } catch (Exception e) {
                    log.warn("extractMetadata: unable to decrypt document [{}]: ", asset.getPath(), e);
                    canRead = false;
                }
            }
            if (canRead) {
                String mimeType = asset.getMimeType();
                // changes made to the asset are not saved until manually later on.
                final boolean oldBatchMode = asset.isBatchMode();
                asset.setBatchMode(true);
                AssetManager assetManager = asset.getOriginal().getResourceResolver().adaptTo(AssetManager.class);

                if (false == (StringUtils.equals(mimeType, AI_MIMETYPE_1) || StringUtils.equals(mimeType, AI_MIMETYPE_2)) &&
                        (null != mimeTypeService)) {
                    // PdfHandler#processSubAssets behaves differently for AI PDF files and "regular" PDF files
                    // DamMimeTypeServiceImpl#isAIFile is asked to determine if input PDF was AI-generated
                    // (DamMimeTypeServiceImpl#isAIFile is invoked via DamMimeTypeServiceImpl#getMimeType)
                    InputStream pdfContentStream = asset.getOriginal().getStream();
                    try {
                        if (pdfContentStream.markSupported()) {
                            mimeType = mimeTypeService.getMimeType(asset.getName(), pdfContentStream);
                        } else {
                            //wrap 'pdfContentStream' to BufferedInputStream since that's what
                            // mimeTypeService#getMimeType expects
                            mimeType = mimeTypeService.getMimeType(asset.getName(), new BufferedInputStream(pdfContentStream));
                        }
                    } finally {
                        IOUtils.closeQuietly(pdfContentStream);
                    }
                }

                if (StringUtils.equals(mimeType, AI_MIMETYPE_1) || StringUtils.equals(mimeType, AI_MIMETYPE_2)) {
                    cleanup(asset, assetManager);
                    PDFFontSet fontSet = fontManagerService.getPdfFontSet();
                    RasterizationOptions options = new RasterizationOptions();
                    options.setFontSet(fontSet);
                    options.processAnnotations(rasterAnn);

                    PDFOCProperties ocProps = pdfDoc.requireCatalog().getOCProperties();
                    if (ocProps != null) {
                        PDFOCConfig defaultOCConfig = ocProps.getDefaultOCConfigDict();
                        defaultOCConfig.setBaseState(PDFOCBaseState.OFF);
                        Iterator itr = ocProps.getOCGs().iterator();
                        PDFOCGroup ocg = null;
                        int layer = 1;
                        while (itr.hasNext()) {
                            ocg = itr.next();
                            defaultOCConfig.setONList(PDFOCGroupArray.newInstance(pdfDoc, ocg));
                            defaultOCConfig.setOFFList(null);
                            PageRasterizer rasterizer = new PageRasterizer(pdfDoc.requirePages(), options);
                            while (rasterizer.hasNext()) {
                                FileOutputStream itout = null;
                                File imageTmpFile = null;
                                InputStream iis = null;
                                Layer layerImage;
                                try {
                                    BufferedImage image = rasterizer.next();
                                    imageTmpFile = File.createTempFile("image", ".tmp");
                                    layerImage = new Layer(image);
                                    String fileName = "layer" + layer + ".png";
                                    itout = FileUtils.openOutputStream(imageTmpFile);
                                    layerImage.write(DamConstants.THUMBNAIL_MIMETYPE, 1.0, itout);
                                    iis = FileUtils.openInputStream(imageTmpFile);
                                    Asset subAsset = asset.addSubAsset(fileName, DamConstants.THUMBNAIL_MIMETYPE, iis);
                                    subAssets.add(subAsset.getPath());
                                    updatePageRelations(asset, subAsset.getPath());
                                } catch (Exception e) {
                                    log.warn("processSubAssets: error while creating sub assets for asset [{}]: ", asset.getPath(), e);
                                } finally {
                                    IOUtils.closeQuietly(iis);
                                    IOUtils.closeQuietly(itout);
                                    FileUtils.deleteQuietly(imageTmpFile);
                                }
                                layer++;
                            }
                        }
                    }
                } else {
                    PDFPageTree pages = pdfDoc.requirePages();
                    cleanup(asset, assetManager);

                    if (pages == null) {
                        return Collections.emptyList();
                    } else {

                        Iterator iter = pdfDoc.requirePages().iterator();
                        PMMService pmmService = new PMMService(pdfDoc);
                        for (int i = 0; iter.hasNext(); i++) {
                            PDFDocument document = pmmService.extractPages(iter.next(), 1, PMMOptions.newInstance(PMMOptions.AllOptions),
                                PDFOpenOptions.newInstance());
                            final String fileName = "page" + (i + 1) + ".pdf";

                            File tmpFile = null;
                            FileInputStream fis = null;
                            ByteWriter byteWriter = null;

                            try {
                                // write to temp first
                                tmpFile = File.createTempFile(fileName, ".pdf");
                                byteWriter = getRAFByteWriter(tmpFile);
                                document.save(byteWriter, PDFSaveFullOptions.newInstance());
                                fis = new FileInputStream(tmpFile);
                                // create subasset
                                final Asset subAsset = asset.addSubAsset(fileName, CONTENT_MIMETYPE, fis);
                                subAssets.add(subAsset.getPath());
                                updatePageRelations(asset, subAsset.getPath());
                            } catch (Exception e) {
                                log.warn("createSubAsset: error while creating subasset [" + (i + 1) + "] for [{}]: ", asset.getPath(), e);
                            } finally {
                                if (document != null) {
                                    try {
                                        document.close();
                                    } catch (Exception e) {
                                        log.warn("processSubAssets: error while closing subAsset document [{}]: ", asset.getPath(), e);
                                    }
                                }
                                if (byteWriter != null) {
                                    try {
                                        byteWriter.close();
                                    } catch (IOException e) {
                                        log.warn("processSubAssets: error while closing byteWriter [{}]: ", asset.getPath(), e);
                                    }
                                }

                                IOUtils.closeQuietly(fis);
                                FileUtils.deleteQuietly(tmpFile);
                            }
                        }
                    }
                }

                // now save the changes made to the asset.
                asset.adaptTo(Node.class).getSession().save();
                asset.setBatchMode(oldBatchMode);
            }
        } catch (Exception e) {
            log.warn("processSubAssets: error while creating sub assets for asset [{}]: ", asset.getPath(), e);
        } finally {
            if (pdfDoc != null) {
                try {
                    pdfDoc.close();
                } catch (Exception e) {
                    log.warn("processSubAssets: error while closing parent pdf document [{}]: ", asset.getPath(), e);
                }
            }
            IOUtils.closeQuietly(is);
        }
        return subAssets;
    }

    private void updatePageRelations(Asset asset, String subAssetPath) {
        com.adobe.granite.asset.api.Asset graniteAsset = asset.adaptTo(com.adobe.granite.asset.api.Asset.class);
        graniteAsset.addRelation(DamConstants.RELATION_ASSET_PAGES,
            subAssetPath);
    }

    private void cleanup(final Asset asset, AssetManager assetManager) {
        // cleanup subassts
        ResourceResolver  resolver = (asset.adaptTo(Resource.class)).getResourceResolver();
        Collection subAssets = asset.getSubAssets();
        boolean saveReqd = !subAssets.isEmpty();
        for (Asset subAsset : subAssets) {
            assetManager.removeAsset(subAsset.getPath());
        }
        try {
            // clean up pages relation
            com.adobe.granite.asset.api.Asset graniteAsset = asset.adaptTo(com.adobe.granite.asset.api.Asset.class);
            graniteAsset.removeRelation(DamConstants.RELATION_ASSET_PAGES);
            if (saveReqd)
                resolver.commit();
        } catch (AssetException ae) {
            log.debug("Exception occurred while deleting "
                + DamConstants.RELATION_ASSET_PAGES + " relation", ae);
        } catch (PersistenceException e) {
            log.debug("Exception occurred while removing subassets", e);
        }
    }

    /**
     * {@inheritDoc}
     */
    public String[] getMimeTypes() {
        return new String[] { CONTENT_MIMETYPE, AI_MIMETYPE_2 }; // let PostScriptHandler take care of 'AI_MIMETYPE_1'
    }

    /**
     * {@inheritDoc}
     */
    public BufferedImage getImage(final Rendition rendition) throws IOException {
        return getImage(rendition, (Dimension) null);
    }

    /**
     * {@inheritDoc}
     */
    public BufferedImage getImage(final Rendition rendition, Dimension dim) throws IOException {
        RasterizationOptions rasterizationOptions = null;
        try {
            rasterizationOptions = new RasterizationOptions();
        } catch (Throwable e) {
            log.warn("getImage: unable to create rasterization option: ", rendition.getPath(), e);
            throw new IOException(e.getMessage());
        }
        try {
            PDFFontSet fontSet = null;
            fontSet = fontManagerService.getPdfFontSet();
            rasterizationOptions.setFontSet(fontSet);

        } catch (Exception e) {
            log.warn("exception: fontmanagerserviceimpl with [{}]", e.getMessage());
        }
        // Required for ignoring the errors occurred due to absence of the
        // JPXDecoder. Fix for CQ5-17670.
        rasterizationOptions.ignoreErrors(true);
        rasterizationOptions.processAnnotations(rasterAnn);
        try {
            return getImage(rendition, rasterizationOptions, dim);
        } catch (Exception e) {
            log.warn("getImage: unable to generate image : ", rendition.getPath(), e);
        }
        return null;
    }

    private BufferedImage getImage(Rendition rendition, RasterizationOptions rasterizationOptions, Dimension dim) throws Exception {
        ByteReader byteReader = null;
        PDFDocument pdfDoc = null;
        BufferedImage image = null;

        try {
            byteReader = new InputStreamByteReader(rendition.getStream());
            PDFOpenOptions openOptions = PDFOpenOptions.newInstance();
            List filters = new ArrayList();
            filters.add(new JPXDecodeFilter());
            filters.add(new JBIG2CustomFilter());
            openOptions.registerCustomFilters(filters);
            pdfDoc = PDFDocument.newInstance(byteReader, openOptions);
            if (pdfDoc.isEncrypted()) {
                // avoid display of whitepage when pdf is secured.
                // TODO: fix CQ5-6981
                try {
                    unlock(pdfDoc);
                } catch (Exception e) {
                    log.warn("getImage: unable to decrypt document [{}]: ", rendition.getPath(), e);
                    return null;
                }
            }
            PDFPageTree pages = pdfDoc.requirePages();
            PDFPage page = pages.getPage(0);
            Dimension d = calculateRasterizeDim(dim, page.getCropBox());
            rasterizationOptions.setWidth(d.width);
            rasterizationOptions.setHeight(d.height);
            PageRasterizer rasterizer = new PageRasterizer(pages, rasterizationOptions);
            if (rasterizer.hasNext()) {
                // get the first page.
                image = rasterizer.next();
                if (image != null) {
                    Layer layer = new Layer(image);
                    image = layer.getImage();
                }
            }

        } finally {
            if (pdfDoc != null) {
                pdfDoc.close();
            }
            if (byteReader != null) {
                byteReader.close();
            }
        }
        return image;
    }

    // ----------------------< helpers >----------------------------------------
    /**
     * This will parse a document.
     * 
     * @param input The input stream for the document.
     * @return The document.
     * @throws IOException If there is an error parsing the document.
     * @throws PDFException If there is any error in reading the PDF document.
     */
    private static PDFDocument parseDocument(InputStream input) throws IOException, PDFException {
        ByteReader byteReader = null;
        PDFDocument pdfDoc = null;
        byteReader = new InputStreamByteReader(input);
        try {
            pdfDoc = PDFDocument.newInstance(byteReader, PDFOpenOptions.newInstance());
        } catch (PDFException e) {
            log.warn("extractMetadata: error while reading PDF [{}]: ", e);
            throw e;
        }
        return pdfDoc;
    }

    private Dimension calculateRasterizeDim(Dimension dim, PDFRectangle cropBox) throws PDFException {
        if ((dim == null) || (cropBox == null)) {
            return new Dimension();
        }

        double cropWidth = cropBox.width();
        double cropHeight = cropBox.height();
        if ((cropWidth == 0) || (cropHeight == 0)) {
            return new Dimension();
        }

        double scale;
        if ((dim.getWidth() <= 0) && (dim.getHeight() <= 0)) {
            scale = 1.0;
        } else if (dim.getWidth() <= 0) {
            scale = dim.getHeight() / cropHeight;
        } else if (dim.getHeight() <= 0) {
            scale = dim.getWidth() / cropWidth;
        } else {
            double scaleX = dim.getWidth() / cropWidth;
            double scaleY = dim.getHeight() / cropHeight;
            scale = Math.min(scaleX, scaleY);
        }

        int width = (int) Math.round(scale * cropWidth);
        int height = (int) Math.round(scale * cropHeight);
        return new Dimension(width, height);
    }

    /**
     * {@inheritDoc}
     */
    public Iterator processRelated(final Asset asset) {

        if (!asset.isSubAsset()) {
            return caRefResolver.resolve(asset);
        }
        return super.processRelated(asset);
    }
    
    private static class JPXDecodeFilter implements CustomDecodeFilter {

        public ASName getName() {
            return ASName.k_JPXDecode;
        }

        public InputStream decode(InputStream is, FilterParams filterParams) throws CustomFilterException {

            ByteArrayOutputStream os = new ByteArrayOutputStream();
            try {
                BufferedImage bufImage = ImageIO.read(is);
                if (bufImage != null) {
                    return new ByteArrayInputStream(((DataBufferByte) bufImage.getData().getDataBuffer()).getData());
                }
                return null;
            } catch (IOException e) {
                log.error("Error decoding the JPX: ", e.getMessage(), e);
            } finally {
                try {
                    os.close();
                } catch (IOException e) {
                    log.error("Error closing the stream: ", e.getMessage(), e);
                }
            }
            return null;
        }

    }
    private static class JBIG2CustomFilter implements CustomDecodeFilter {

        public ASName getName() {
            return ASName.k_JBIG2Decode;
        }

        public InputStream decode(InputStream in, FilterParams filterParams)
                throws CustomFilterException
        {
            JBIG2Decoder decoder = new JBIG2Decoder();
            ByteArrayOutputStream bao = new ByteArrayOutputStream();
            try {

                byte[] buff = new byte[8000];

                int bytesRead = 0;

                while((bytesRead = in.read(buff)) != -1) {
                    bao.write(buff, 0, bytesRead);
                }

                byte[] b = bao.toByteArray();
                if(filterParams!=null && filterParams.containsKey("JBIG2Globals"))
                {
                    byte[] data = (byte []) filterParams.get("JBIG2Globals");
                    if(data != null)
                        decoder.setGlobalData(data);
                }
                decoder.decodeJBIG2(b);

                return new ByteArrayInputStream(decoder.getPageAsJBIG2Bitmap(0).getData(true));

            } catch (IOException e) {
                throw new CustomFilterException("Could not decode using custom filter",e);
            } catch (JBIG2Exception e) {
                throw new CustomFilterException("Could not decode using custom filter",e);
            }
            finally {
                try {
                    bao.close();

                } catch (IOException e) {
                    log.error("Error closing the stream: ", e.getMessage(), e);
                }
            }


        }

    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy