com.google.cloud.spring.vision.DocumentOcrTemplate Maven / Gradle / Ivy
Show all versions of spring-cloud-gcp-vision Show documentation
/*
* Copyright 2017-2019 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.cloud.spring.vision;
import com.google.api.core.ApiFutureCallback;
import com.google.api.core.ApiFutures;
import com.google.api.gax.longrunning.OperationFuture;
import com.google.api.gax.paging.Page;
import com.google.cloud.spring.storage.GoogleStorageLocation;
import com.google.cloud.storage.Blob;
import com.google.cloud.storage.BlobId;
import com.google.cloud.storage.Storage;
import com.google.cloud.storage.Storage.BlobListOption;
import com.google.cloud.vision.v1.AsyncAnnotateFileRequest;
import com.google.cloud.vision.v1.AsyncBatchAnnotateFilesResponse;
import com.google.cloud.vision.v1.Feature;
import com.google.cloud.vision.v1.Feature.Type;
import com.google.cloud.vision.v1.GcsDestination;
import com.google.cloud.vision.v1.GcsSource;
import com.google.cloud.vision.v1.ImageAnnotatorClient;
import com.google.cloud.vision.v1.InputConfig;
import com.google.cloud.vision.v1.OperationMetadata;
import com.google.cloud.vision.v1.OutputConfig;
import com.google.cloud.vision.v1.TextAnnotation;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.Executor;
import java.util.stream.StreamSupport;
import org.springframework.util.Assert;
/**
* Template providing convenient operations for interfacing with Google Cloud Vision's Document OCR
* feature, which allows you to run OCR algorithms on documents (PDF or TIFF format) stored on
* Google Cloud Storage.
*/
public class DocumentOcrTemplate {
private static final Feature DOCUMENT_OCR_FEATURE =
Feature.newBuilder().setType(Type.DOCUMENT_TEXT_DETECTION).build();
private final ImageAnnotatorClient imageAnnotatorClient;
private final Storage storage;
private final Executor executor;
private final int jsonOutputBatchSize;
public DocumentOcrTemplate(
ImageAnnotatorClient imageAnnotatorClient,
Storage storage,
Executor executor,
int jsonOutputBatchSize) {
this.imageAnnotatorClient = imageAnnotatorClient;
this.storage = storage;
this.executor = executor;
this.jsonOutputBatchSize = jsonOutputBatchSize;
}
/**
* Runs OCR processing for a specified {@code document} and generates OCR output files under the
* path specified by {@code outputFilePathPrefix}.
*
* For example, if you specify an {@code outputFilePathPrefix} of
* "gs://bucket_name/ocr_results/myDoc_", all the output files of OCR processing will be saved
* under prefix, such as:
*
*
* - gs://bucket_name/ocr_results/myDoc_output-1-to-5.json
*
- gs://bucket_name/ocr_results/myDoc_output-6-to-10.json
*
- gs://bucket_name/ocr_results/myDoc_output-11-to-15.json
*
*
* Note: OCR processing operations may take several minutes to complete, so it may not be
* advisable to block on the completion of the operation. One may use the returned {@link
* CompletableFuture} to register callbacks or track the status of the operation.
*
* @param document The {@link GoogleStorageLocation} of the document to run OCR processing
* @param outputFilePathPrefix The {@link GoogleStorageLocation} of a file, folder, or a bucket
* describing the path for which all output files shall be saved under
* @return A {@link CompletableFuture} allowing you to register callbacks or wait for the
* completion of the operation.
*/
public CompletableFuture runOcrForDocument(
GoogleStorageLocation document, GoogleStorageLocation outputFilePathPrefix) {
Assert.isTrue(
document.isFile(), "Provided document location is not a valid file location: " + document);
GcsSource gcsSource = GcsSource.newBuilder().setUri(document.uriString()).build();
String contentType = extractContentType(document);
InputConfig inputConfig =
InputConfig.newBuilder().setMimeType(contentType).setGcsSource(gcsSource).build();
GcsDestination gcsDestination =
GcsDestination.newBuilder().setUri(outputFilePathPrefix.uriString()).build();
OutputConfig outputConfig =
OutputConfig.newBuilder()
.setGcsDestination(gcsDestination)
.setBatchSize(this.jsonOutputBatchSize)
.build();
AsyncAnnotateFileRequest request =
AsyncAnnotateFileRequest.newBuilder()
.addFeatures(DOCUMENT_OCR_FEATURE)
.setInputConfig(inputConfig)
.setOutputConfig(outputConfig)
.build();
OperationFuture result =
imageAnnotatorClient.asyncBatchAnnotateFilesAsync(Collections.singletonList(request));
return extractOcrResultFuture(result);
}
/**
* Parses the OCR output files who have the specified {@code jsonFilesetPrefix}. This method
* assumes that all of the OCR output files with the prefix are a part of the same document.
*
* @param jsonOutputFilePathPrefix the folder location containing all of the JSON files of OCR
* output
* @return A {@link DocumentOcrResultSet} describing the OCR content of a document
*/
public DocumentOcrResultSet readOcrOutputFileSet(GoogleStorageLocation jsonOutputFilePathPrefix) {
String nonNullPrefix =
(jsonOutputFilePathPrefix.getBlobName() == null)
? ""
: jsonOutputFilePathPrefix.getBlobName();
Page blobsInFolder =
this.storage.list(
jsonOutputFilePathPrefix.getBucketName(),
BlobListOption.currentDirectory(),
BlobListOption.prefix(nonNullPrefix));
List blobPages =
StreamSupport.stream(blobsInFolder.getValues().spliterator(), false)
.filter(blob -> blob.getContentType().equals("application/octet-stream"))
.toList();
return new DocumentOcrResultSet(blobPages);
}
/**
* Parses a single JSON output file and returns the list of pages stored in the file.
*
* Each page of the document is represented as a {@link TextAnnotation} which contains the
* parsed OCR data.
*
* @param jsonFile the location of the JSON output file
* @return the list of {@link TextAnnotation} containing the OCR results
* @throws RuntimeException if the JSON file cannot be deserialized into a {@link TextAnnotation}
* object
*/
public DocumentOcrResultSet readOcrOutputFile(GoogleStorageLocation jsonFile) {
if (!jsonFile.isFile()) {
throw new IllegalArgumentException(
"Provided jsonOutputFile location is not a valid file location: " + jsonFile);
}
Blob jsonOutputBlob =
this.storage.get(BlobId.of(jsonFile.getBucketName(), jsonFile.getBlobName()));
return new DocumentOcrResultSet(Collections.singletonList(jsonOutputBlob));
}
private CompletableFuture extractOcrResultFuture(
OperationFuture grpcFuture) {
CompletableFuture result = new CompletableFuture<>();
ApiFutures.addCallback(
grpcFuture,
new ApiFutureCallback<>() {
@Override
public void onFailure(Throwable throwable) {
result.completeExceptionally(throwable);
}
@Override
public void onSuccess(AsyncBatchAnnotateFilesResponse asyncBatchAnnotateFilesResponse) {
String outputLocationUri =
asyncBatchAnnotateFilesResponse
.getResponsesList()
.get(0)
.getOutputConfig()
.getGcsDestination()
.getUri();
GoogleStorageLocation outputFolderLocation =
new GoogleStorageLocation(outputLocationUri);
result.complete(readOcrOutputFileSet(outputFolderLocation));
}
},
this.executor);
return result;
}
private String extractContentType(GoogleStorageLocation document) {
Blob documentBlob =
this.storage.get(BlobId.of(document.getBucketName(), document.getBlobName()));
if (documentBlob == null) {
throw new IllegalArgumentException("Provided document does not exist: " + document);
}
return documentBlob.getContentType();
}
}