All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.camunda.connector.textract.caller.PollingTextractCalller Maven / Gradle / Ivy

/*
 * Copyright Camunda Services GmbH and/or licensed to Camunda Services GmbH
 * under one or more contributor license agreements. Licensed under a proprietary license.
 * See the License.txt file for more information. You may not use this file
 * except in compliance with the proprietary license.
 */
package io.camunda.connector.textract.caller;

import static java.util.concurrent.TimeUnit.SECONDS;

import com.amazonaws.services.textract.AmazonTextract;
import com.amazonaws.services.textract.AmazonTextractAsync;
import com.amazonaws.services.textract.model.*;
import io.camunda.connector.textract.model.TextractRequestData;
import io.camunda.connector.textract.model.TextractTask;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.ScheduledFuture;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class PollingTextractCalller implements TextractCaller {
  public static final long DELAY_BETWEEN_POLLING = 5;

  public static final int MAX_RESULT = 1000;

  private static final Logger LOGGER = LoggerFactory.getLogger(PollingTextractCalller.class);

  @Override
  public GetDocumentAnalysisResult call(
      TextractRequestData requestData, AmazonTextract textractClient) throws Exception {
    LOGGER.debug("Starting polling task for document analysis with request data: {}", requestData);
    final StartDocumentAnalysisRequest startDocReq =
        new StartDocumentAnalysisRequest()
            .withFeatureTypes(this.prepareFeatureTypes(requestData))
            .withDocumentLocation(this.prepareDocumentLocation(requestData));

    final StartDocumentAnalysisResult result = textractClient.startDocumentAnalysis(startDocReq);

    GetDocumentAnalysisResult lastDocumentResult;
    List allBlocks;
    try (ScheduledExecutorService executorService = Executors.newSingleThreadScheduledExecutor()) {
      final String jobId = result.getJobId();
      final TextractTask firstTextractTask = prepareTextractTask(jobId, null, textractClient);
      final GetDocumentAnalysisResult firstDocumentResult =
          executeTask(firstTextractTask, 0, executorService);

      allBlocks = new ArrayList<>(firstDocumentResult.getBlocks());
      lastDocumentResult = firstDocumentResult;
      String nextToken = firstDocumentResult.getNextToken();

      while (StringUtils.isNoneEmpty(nextToken)) {
        final TextractTask nextTextractTask = prepareTextractTask(jobId, nextToken, textractClient);
        GetDocumentAnalysisResult nextDocumentResult =
            executeTask(nextTextractTask, DELAY_BETWEEN_POLLING, executorService);
        nextToken = nextDocumentResult.getNextToken();
        allBlocks.addAll(nextDocumentResult.getBlocks());
        lastDocumentResult = nextDocumentResult;
      }
    }

    lastDocumentResult.setBlocks(allBlocks);
    return lastDocumentResult;
  }

  private TextractTask prepareTextractTask(
      String jobId, String nextToken, AmazonTextract textractClient) {
    GetDocumentAnalysisRequest documentAnalysisReq =
        new GetDocumentAnalysisRequest().withJobId(jobId).withMaxResults(MAX_RESULT);

    if (StringUtils.isNoneEmpty(nextToken)) {
      documentAnalysisReq.withNextToken(nextToken);
    }

    return new TextractTask(documentAnalysisReq, (AmazonTextractAsync) textractClient);
  }

  private GetDocumentAnalysisResult executeTask(
      TextractTask task, long delay, ScheduledExecutorService executorService) throws Exception {
    ScheduledFuture nextDocumentResultFuture =
        executorService.schedule(task, delay, SECONDS);
    return nextDocumentResultFuture.get();
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy