com.adobe.platform.operation.internal.service.ExtractPDFAPI Maven / Gradle / Ivy
The newest version!
/*
* Copyright 2019 Adobe
* All Rights Reserved.
*
* NOTICE: Adobe permits you to use, modify, and distribute this file in
* accordance with the terms of the Adobe license agreement accompanying
* it. If you have received this file from a source other than Adobe,
* then your use, modification, or distribution of it requires the prior
* written permission of Adobe.
*/
package com.adobe.platform.operation.internal.service;
import com.adobe.platform.operation.exception.SdkException;
import com.adobe.platform.operation.internal.ExtensionMediaTypeMapping;
import com.adobe.platform.operation.internal.FileRefImpl;
import com.adobe.platform.operation.internal.InternalExecutionContext;
import com.adobe.platform.operation.internal.api.platform.CPFApi;
import com.adobe.platform.operation.internal.cpf.constants.CPFConstants;
import com.adobe.platform.operation.internal.cpf.constants.OperationKey;
import com.adobe.platform.operation.internal.cpf.dto.request.ExtractPDFOutputFormat;
import com.adobe.platform.operation.internal.cpf.dto.request.ExtractPDFParams;
import com.adobe.platform.operation.internal.cpf.dto.request.platform.CPFContentAnalyzerRequests;
import com.adobe.platform.operation.internal.cpf.dto.request.platform.Inputs;
import com.adobe.platform.operation.internal.cpf.dto.request.platform.Outputs;
import com.adobe.platform.operation.internal.cpf.dto.response.ExtractPDFOutput;
import com.adobe.platform.operation.internal.cpf.dto.response.platform.CPFContentAnalyzerResponse;
import com.adobe.platform.operation.internal.http.DefaultRequestHeaders;
import com.adobe.platform.operation.internal.http.HttpResponse;
import com.adobe.platform.operation.internal.http.MultiPartHttpResponse;
import com.adobe.platform.operation.pdfops.constants.PDFElementType;
import com.adobe.platform.operation.pdfops.constants.TableStructureType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.mail.internet.MimeBodyPart;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
public class ExtractPDFAPI {
private static final String INPUT_FORMAT_SPECIFIER = "application/pdf";
private static final String OUTPUT_EXTRACT_INFO_FORMAT_SPECIFIER = "application/json";
private static final String OUTPUT_EXTRACT_RENDITION_FORMAT_SPECIFIER = "text/directory";
private static final Logger LOGGER = LoggerFactory.getLogger(ExtractPDFAPI.class);
public static String extractPdf(InternalExecutionContext context,
FileRefImpl sourceFileRef, List elementsToExtract,
List elementsToExtractRenditions, TableStructureType tableOutFormat, Boolean charInfo)
throws FileNotFoundException {
try{
long startTimeMs = System.currentTimeMillis();
ExtractPDFParams extractPDFParams = new ExtractPDFParams(elementsToExtract, elementsToExtractRenditions, tableOutFormat, charInfo);
Inputs inputs = Inputs.builder(INPUT_FORMAT_SPECIFIER)
.setParams(extractPDFParams)
.build();
Outputs outputs = new ExtractPDFOutputFormat(OUTPUT_EXTRACT_INFO_FORMAT_SPECIFIER,
OUTPUT_EXTRACT_RENDITION_FORMAT_SPECIFIER);
String extractAnalyzerID = context.getClientConfig().getExtractAnalyzerId() != null ?
context.getClientConfig().getExtractAnalyzerId() : CPFConstants.ExtractPDF.PAPI_CHAIN_ASSET_ID;
CPFContentAnalyzerRequests CPFContentAnalyzerRequests
= new CPFContentAnalyzerRequests(extractAnalyzerID, inputs, outputs);
// Prepare the sourceFileRefList
List sourceFileRefList = new ArrayList<>();
sourceFileRefList.add(sourceFileRef);
HttpResponse response = CPFApi.cpfCreateOpsApi(context, CPFContentAnalyzerRequests,
sourceFileRefList, String.class, OperationKey.EXTRACT_PDF.toString());
LOGGER.debug("Upload Operation Success Info - Latency(ms): {}", System.currentTimeMillis() - startTimeMs);
return response.getHeaders().get(DefaultRequestHeaders.LOCATION_HEADER_NAME);
} catch (FileNotFoundException fe) {
throw fe;
}
}
public static void downloadAndSave(InternalExecutionContext context, String location, String destinationPath,
ExtensionMediaTypeMapping outputFormat) {
long startTimeMs = System.currentTimeMillis();
HttpResponse response = CPFApi.cpfStatusApi(context, location, CPFContentAnalyzerResponse.class);
LOGGER.debug("Download Operation Success Info - Latency(ms): {}", System.currentTimeMillis() - startTimeMs);
MultiPartHttpResponse multiPartData = (MultiPartHttpResponse) response;
List responseData = multiPartData.getResponseBodyParts();
try {
startTimeMs = System.currentTimeMillis();
ExtractPDFOutput extractPDFOutput = ExtractDataParser.frameExtractSpecificOutput(responseData);
ExtractDataZipper.zipExtractOutput(extractPDFOutput, destinationPath);
LOGGER.debug("Response Manipulation Success Info - Latency(ms): {}", System.currentTimeMillis() - startTimeMs);
} catch (IOException e) {
LOGGER.error("Error {} while writing downloaded file to location {} ", e, destinationPath);
throw new SdkException("Exception encountered while downloading file", e);
} catch (Exception e) {
LOGGER.debug(e.getMessage());
} finally {
try {
response.consume();
} catch (IOException e) {
LOGGER.error("Error while consuming file download response ", e);
}
}
}
}