com.adobe.platform.operation.internal.service.ExtractPDFAPI Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of pdftools-extract-sdk Show documentation
PDFTools Extract SDK
The newest version!
/*
 * Copyright 2019 Adobe
 * All Rights Reserved.
 *
 * NOTICE: Adobe permits you to use, modify, and distribute this file in
 * accordance with the terms of the Adobe license agreement accompanying
 * it. If you have received this file from a source other than Adobe,
 * then your use, modification, or distribution of it requires the prior
 * written permission of Adobe.
 */

package com.adobe.platform.operation.internal.service;

import com.adobe.platform.operation.exception.SdkException;
import com.adobe.platform.operation.internal.ExtensionMediaTypeMapping;
import com.adobe.platform.operation.internal.FileRefImpl;
import com.adobe.platform.operation.internal.InternalExecutionContext;
import com.adobe.platform.operation.internal.api.platform.CPFApi;
import com.adobe.platform.operation.internal.cpf.constants.CPFConstants;
import com.adobe.platform.operation.internal.cpf.constants.OperationKey;
import com.adobe.platform.operation.internal.cpf.dto.request.ExtractPDFOutputFormat;
import com.adobe.platform.operation.internal.cpf.dto.request.ExtractPDFParams;
import com.adobe.platform.operation.internal.cpf.dto.request.platform.CPFContentAnalyzerRequests;
import com.adobe.platform.operation.internal.cpf.dto.request.platform.Inputs;
import com.adobe.platform.operation.internal.cpf.dto.request.platform.Outputs;
import com.adobe.platform.operation.internal.cpf.dto.response.ExtractPDFOutput;
import com.adobe.platform.operation.internal.cpf.dto.response.platform.CPFContentAnalyzerResponse;
import com.adobe.platform.operation.internal.http.DefaultRequestHeaders;
import com.adobe.platform.operation.internal.http.HttpResponse;
import com.adobe.platform.operation.internal.http.MultiPartHttpResponse;
import com.adobe.platform.operation.pdfops.constants.PDFElementType;
import com.adobe.platform.operation.pdfops.constants.TableStructureType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.mail.internet.MimeBodyPart;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

public class ExtractPDFAPI {

    private static final String INPUT_FORMAT_SPECIFIER = "application/pdf";
    private static final String OUTPUT_EXTRACT_INFO_FORMAT_SPECIFIER = "application/json";
    private static final String OUTPUT_EXTRACT_RENDITION_FORMAT_SPECIFIER = "text/directory";
    private static final Logger LOGGER = LoggerFactory.getLogger(ExtractPDFAPI.class);

    public static String extractPdf(InternalExecutionContext context,
                                    FileRefImpl sourceFileRef, List elementsToExtract,
                                    List elementsToExtractRenditions, TableStructureType tableOutFormat, Boolean charInfo)
            throws FileNotFoundException {

        try{
            long startTimeMs = System.currentTimeMillis();
            ExtractPDFParams extractPDFParams = new ExtractPDFParams(elementsToExtract, elementsToExtractRenditions, tableOutFormat, charInfo);
            Inputs inputs = Inputs.builder(INPUT_FORMAT_SPECIFIER)
                    .setParams(extractPDFParams)
                    .build();
            Outputs outputs = new ExtractPDFOutputFormat(OUTPUT_EXTRACT_INFO_FORMAT_SPECIFIER,
                    OUTPUT_EXTRACT_RENDITION_FORMAT_SPECIFIER);

            String extractAnalyzerID = context.getClientConfig().getExtractAnalyzerId() != null ?
                    context.getClientConfig().getExtractAnalyzerId() : CPFConstants.ExtractPDF.PAPI_CHAIN_ASSET_ID;
            CPFContentAnalyzerRequests CPFContentAnalyzerRequests
                    = new CPFContentAnalyzerRequests(extractAnalyzerID, inputs, outputs);

            // Prepare the sourceFileRefList
            List sourceFileRefList = new ArrayList<>();
            sourceFileRefList.add(sourceFileRef);

            HttpResponse response = CPFApi.cpfCreateOpsApi(context, CPFContentAnalyzerRequests,
                    sourceFileRefList, String.class, OperationKey.EXTRACT_PDF.toString());
            LOGGER.debug("Upload Operation Success Info - Latency(ms): {}", System.currentTimeMillis() - startTimeMs);
            return response.getHeaders().get(DefaultRequestHeaders.LOCATION_HEADER_NAME);
        } catch (FileNotFoundException fe) {
            throw fe;
        }
    }

    public static void downloadAndSave(InternalExecutionContext context, String location, String destinationPath,
                                       ExtensionMediaTypeMapping outputFormat) {

        long startTimeMs = System.currentTimeMillis();
        HttpResponse response = CPFApi.cpfStatusApi(context, location, CPFContentAnalyzerResponse.class);
        LOGGER.debug("Download Operation Success Info - Latency(ms): {}", System.currentTimeMillis() - startTimeMs);
        MultiPartHttpResponse multiPartData = (MultiPartHttpResponse) response;
        List responseData = multiPartData.getResponseBodyParts();
        try {
            startTimeMs = System.currentTimeMillis();
            ExtractPDFOutput extractPDFOutput = ExtractDataParser.frameExtractSpecificOutput(responseData);
            ExtractDataZipper.zipExtractOutput(extractPDFOutput, destinationPath);
            LOGGER.debug("Response Manipulation Success Info - Latency(ms): {}", System.currentTimeMillis() - startTimeMs);
        } catch (IOException e) {
            LOGGER.error("Error {} while writing downloaded file to location {} ", e, destinationPath);
            throw new SdkException("Exception encountered while downloading file", e);
        } catch (Exception e) {
            LOGGER.debug(e.getMessage());
        } finally {
            try {
                response.consume();
            } catch (IOException e) {
                LOGGER.error("Error while consuming file download response ", e);
            }
        }
    }
}