net.sf.filePiper.processors.Pdf2JpegProcessor Maven / Gradle / Ivy

Go to download

Show more of this group Show more artifacts with this name
Show all versions of file-piper Show documentation

This project is a GUI utility for processing files. It allows selecting a set of source files and a pipeline of processes to apply onto those files. The applications shows in a nice-looking user interface where you can define profiles for your repetitive tasks. It provides pre-defined processors doing usual file manipulation tasks like: Copy, Head, Tail, Chunk, Search, Replace, Zip, Unzip... But the biggest value of this file processor tool is the ability to add easily custom file processors written in java.

The newest version!

package net.sf.filePiper.processors;


import java.io.BufferedOutputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;

import net.sf.filePiper.model.ExecutionPhase;
import net.sf.filePiper.model.FileProcessor;
import net.sf.filePiper.model.FileProcessorEnvironment;
import net.sf.filePiper.model.InputFileInfo;
import net.sf.filePiper.model.StatusHolder;
import net.sf.sfac.file.FilePathUtils;
import net.sf.sfac.gui.editor.ObjectEditor;
import net.sf.sfac.gui.editor.cmp.ReadOnlyObjectEditor;
import net.sf.sfac.setting.Settings;

import org.apache.log4j.Logger;

import com.lowagie.text.pdf.PRStream;
import com.lowagie.text.pdf.PdfName;
import com.lowagie.text.pdf.PdfObject;
import com.lowagie.text.pdf.PdfReader;
import com.lowagie.text.pdf.PdfStream;


/**
 * Processor extracting Jpeg images from Pdf files.
 * 
 * @author BEROL
 */
public class Pdf2JpegProcessor implements FileProcessor {


    Logger log = Logger.getLogger(Pdf2JpegProcessor.class);

    private StatusHolder holder = new StatusHolder() {


        @Override
        protected String getRunningMessage() {
            StringBuilder sb = new StringBuilder();
            sb.append("Extracting ");
            appendCount(getOutputFileCount(), "Jpeg", sb);
            sb.append(" from ");
            appendCount(getInputFileCount(), "Pdf", sb);
            sb.append(" (");
            appendCount(getByteCount(), "byte", sb);
            sb.append(")...");
            return sb.toString();
        }


        @Override
        protected String getDoneMessage() {
            StringBuilder sb = new StringBuilder();
            appendCount(getInputFileCount(), "Jpeg", sb);
            sb.append(" extracted from ");
            appendCount(getOutputFileCount(), "Pdf", sb);
            sb.append(" (");
            appendCount(getByteCount(), "byte", sb);
            sb.append(").");
            return sb.toString();
        }
    };


    public String getProcessorName() {
        return "Pdf to Jpg";
    }


    public void init(Settings sett) {
    }


    public int getOutputCardinality(int inputCardinality) {
        return MANY;
    }


    public void process(InputStream is, InputFileInfo info, FileProcessorEnvironment env) throws IOException {
        String pdfPath = info.getInput().getAbsolutePath();
        String pdfDirectory = FilePathUtils.getDirectoryPath(pdfPath);
        String pdfName = info.getProposedName();
        if (log.isDebugEnabled()) log.debug("Processing PDF: " + pdfPath);
        holder.inputFileStarted();

        PdfReader reader = new PdfReader(is);
        int xrefSize = reader.getXrefSize();
        int count = 0;
        for (int i = 0; (i < xrefSize) && env.shouldContinue(); i++) {
            PdfObject pdfobj = reader.getPdfObject(i);
            if (pdfobj != null && pdfobj.isStream()) {
                PdfStream stream = (PdfStream) pdfobj;
                PdfObject pdfsubtype = stream.get(PdfName.SUBTYPE);
                PdfObject filter = stream.get(PdfName.FILTER);
                if (PdfName.IMAGE.equals(pdfsubtype) && PdfName.DCTDECODE.equals(filter)) {
                    count++;
                    byte[] image = PdfReader.getStreamBytesRaw((PRStream) stream);
                    String imageName = "img" + intToString(count);
                    info.setInput(new File(pdfPath + "#" + imageName + ".jpg"));
                    info.setProposedPath(FilePathUtils.concatPaths(pdfDirectory, pdfName));
                    info.setProposedName(imageName);
                    info.setProposedExtension("jpg");
                    OutputStream fos = new BufferedOutputStream(env.getOutputStream(info));
                    holder.outputFileStarted();
                    fos.write(image);
                    holder.bytesProcessed(image.length);
                    fos.close();
                }
            }
        }
        if (log.isDebugEnabled()) log.debug("Extracted " + count + " pages  from PDF.");
    }


    private String intToString(int i) {
        StringBuilder sb = new StringBuilder();
        if (i < 100) sb.append('0');
        if (i < 10) sb.append('0');
        sb.append(i);
        return sb.toString();
    }


    public ObjectEditor getEditor() {
        return new ReadOnlyObjectEditor("Extract Jpeg images from Pdf files");
    }


    public void startBatch(FileProcessorEnvironment env) {
        holder.reset(ExecutionPhase.STARTING);
    }


    public void endBatch(FileProcessorEnvironment env) {
        holder.setCurrentPhase(env.getCurrentPhase());
    }


    public String getStatusMessage() {
        return holder.getStatusMessage();
    }


}