All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.zebrunner.carina.utils.PDFUtil Maven / Gradle / Ivy

/*******************************************************************************
 * Copyright 2020-2022 Zebrunner Inc (https://www.zebrunner.com).
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *******************************************************************************/
package com.zebrunner.carina.utils;

import java.io.IOException;
import java.io.InputStream;
import java.io.UncheckedIOException;

import org.apache.pdfbox.cos.COSDocument;
import org.apache.pdfbox.io.RandomAccessBufferedFileInputStream;
import org.apache.pdfbox.pdfparser.PDFParser;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;

/**
 * PDFUtil - utility for PDF file parsing.
 * 
 * @author Sergey Zagriychuk
 *         Sergey Zagriychuk
 * @deprecated old/useless logic.
 */
@Deprecated(forRemoval = true, since = "1.0.5")
public final class PDFUtil {

    private PDFUtil() {
    }

    /**
     * Reads PDF content in specified page range.
     * 
     * @param inputStream InputStream
     * @param startPage Start Page
     * @param endPage End Page
     * @return PDF content
     */
    public static String readTxtFromPDF(InputStream inputStream, int startPage, int endPage) {
        if (inputStream == null) {
            throw new IllegalArgumentException("inputStream argument cannot be null");
        }
        PDFTextStripper pdfStripper = null;
        PDFParser parser = null;
        try (inputStream;
                RandomAccessBufferedFileInputStream randomAccessBufferedFileInputStream = new RandomAccessBufferedFileInputStream(inputStream)) {
            parser = new PDFParser(randomAccessBufferedFileInputStream);
            parser.parse();
        } catch (IOException e) {
            throw new UncheckedIOException(e);
        }

        try (COSDocument cosDoc = parser.getDocument();
                PDDocument pdDoc = new PDDocument(cosDoc)) {
            pdfStripper = new PDFTextStripper();
            pdfStripper.setSortByPosition(true);
            pdfStripper.setStartPage(startPage);
            pdfStripper.setEndPage(endPage);
            return pdfStripper.getText(pdDoc);
        } catch (IOException e) {
            throw new UncheckedIOException(e);
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy