All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.amazonaws.services.comprehend.model.DocumentReaderConfig Maven / Gradle / Ivy

Go to download

The AWS Java SDK for Amazon Comprehend module holds the client classes that are used for communicating with Amazon Comprehend Service

There is a newer version: 1.12.772
Show newest version
/*
 * Copyright 2018-2023 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 * 
 * Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance with
 * the License. A copy of the License is located at
 * 
 * http://aws.amazon.com/apache2.0
 * 
 * or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
 * CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions
 * and limitations under the License.
 */
package com.amazonaws.services.comprehend.model;

import java.io.Serializable;
import javax.annotation.Generated;
import com.amazonaws.protocol.StructuredPojo;
import com.amazonaws.protocol.ProtocolMarshaller;

/**
 * 

* Provides configuration parameters to override the default actions for extracting text from PDF documents and image * files. *

*

* By default, Amazon Comprehend performs the following actions to extract text from files, based on the input file * type: *

*
    *
  • *

    * Word files - Amazon Comprehend parser extracts the text. *

    *
  • *
  • *

    * Digital PDF files - Amazon Comprehend parser extracts the text. *

    *
  • *
  • *

    * Image files and scanned PDF files - Amazon Comprehend uses the Amazon Textract DetectDocumentText * API to extract the text. *

    *
  • *
*

* DocumentReaderConfig does not apply to plain text files or Word files. *

*

* For image files and PDF documents, you can override these default actions using the fields listed below. For more * information, see Setting * text extraction options in the Comprehend Developer Guide. *

* * @see AWS * API Documentation */ @Generated("com.amazonaws:aws-java-sdk-code-generator") public class DocumentReaderConfig implements Serializable, Cloneable, StructuredPojo { /** *

* This field defines the Amazon Textract API operation that Amazon Comprehend uses to extract text from PDF files * and image files. Enter one of the following values: *

*
    *
  • *

    * TEXTRACT_DETECT_DOCUMENT_TEXT - The Amazon Comprehend service uses the * DetectDocumentText API operation. *

    *
  • *
  • *

    * TEXTRACT_ANALYZE_DOCUMENT - The Amazon Comprehend service uses the AnalyzeDocument API * operation. *

    *
  • *
*/ private String documentReadAction; /** *

* Determines the text extraction actions for PDF files. Enter one of the following values: *

*
    *
  • *

    * SERVICE_DEFAULT - use the Amazon Comprehend service defaults for PDF files. *

    *
  • *
  • *

    * FORCE_DOCUMENT_READ_ACTION - Amazon Comprehend uses the Textract API specified by DocumentReadAction * for all PDF files, including digital PDF files. *

    *
  • *
*/ private String documentReadMode; /** *

* Specifies the type of Amazon Textract features to apply. If you chose TEXTRACT_ANALYZE_DOCUMENT as * the read action, you must specify one or both of the following values: *

*
    *
  • *

    * TABLES - Returns information about any tables that are detected in the input document. *

    *
  • *
  • *

    * FORMS - Returns information and the data from any forms that are detected in the input document. *

    *
  • *
*/ private java.util.List featureTypes; /** *

* This field defines the Amazon Textract API operation that Amazon Comprehend uses to extract text from PDF files * and image files. Enter one of the following values: *

*
    *
  • *

    * TEXTRACT_DETECT_DOCUMENT_TEXT - The Amazon Comprehend service uses the * DetectDocumentText API operation. *

    *
  • *
  • *

    * TEXTRACT_ANALYZE_DOCUMENT - The Amazon Comprehend service uses the AnalyzeDocument API * operation. *

    *
  • *
* * @param documentReadAction * This field defines the Amazon Textract API operation that Amazon Comprehend uses to extract text from PDF * files and image files. Enter one of the following values:

*
    *
  • *

    * TEXTRACT_DETECT_DOCUMENT_TEXT - The Amazon Comprehend service uses the * DetectDocumentText API operation. *

    *
  • *
  • *

    * TEXTRACT_ANALYZE_DOCUMENT - The Amazon Comprehend service uses the * AnalyzeDocument API operation. *

    *
  • * @see DocumentReadAction */ public void setDocumentReadAction(String documentReadAction) { this.documentReadAction = documentReadAction; } /** *

    * This field defines the Amazon Textract API operation that Amazon Comprehend uses to extract text from PDF files * and image files. Enter one of the following values: *

    *
      *
    • *

      * TEXTRACT_DETECT_DOCUMENT_TEXT - The Amazon Comprehend service uses the * DetectDocumentText API operation. *

      *
    • *
    • *

      * TEXTRACT_ANALYZE_DOCUMENT - The Amazon Comprehend service uses the AnalyzeDocument API * operation. *

      *
    • *
    * * @return This field defines the Amazon Textract API operation that Amazon Comprehend uses to extract text from PDF * files and image files. Enter one of the following values:

    *
      *
    • *

      * TEXTRACT_DETECT_DOCUMENT_TEXT - The Amazon Comprehend service uses the * DetectDocumentText API operation. *

      *
    • *
    • *

      * TEXTRACT_ANALYZE_DOCUMENT - The Amazon Comprehend service uses the * AnalyzeDocument API operation. *

      *
    • * @see DocumentReadAction */ public String getDocumentReadAction() { return this.documentReadAction; } /** *

      * This field defines the Amazon Textract API operation that Amazon Comprehend uses to extract text from PDF files * and image files. Enter one of the following values: *

      *
        *
      • *

        * TEXTRACT_DETECT_DOCUMENT_TEXT - The Amazon Comprehend service uses the * DetectDocumentText API operation. *

        *
      • *
      • *

        * TEXTRACT_ANALYZE_DOCUMENT - The Amazon Comprehend service uses the AnalyzeDocument API * operation. *

        *
      • *
      * * @param documentReadAction * This field defines the Amazon Textract API operation that Amazon Comprehend uses to extract text from PDF * files and image files. Enter one of the following values:

      *
        *
      • *

        * TEXTRACT_DETECT_DOCUMENT_TEXT - The Amazon Comprehend service uses the * DetectDocumentText API operation. *

        *
      • *
      • *

        * TEXTRACT_ANALYZE_DOCUMENT - The Amazon Comprehend service uses the * AnalyzeDocument API operation. *

        *
      • * @return Returns a reference to this object so that method calls can be chained together. * @see DocumentReadAction */ public DocumentReaderConfig withDocumentReadAction(String documentReadAction) { setDocumentReadAction(documentReadAction); return this; } /** *

        * This field defines the Amazon Textract API operation that Amazon Comprehend uses to extract text from PDF files * and image files. Enter one of the following values: *

        *
          *
        • *

          * TEXTRACT_DETECT_DOCUMENT_TEXT - The Amazon Comprehend service uses the * DetectDocumentText API operation. *

          *
        • *
        • *

          * TEXTRACT_ANALYZE_DOCUMENT - The Amazon Comprehend service uses the AnalyzeDocument API * operation. *

          *
        • *
        * * @param documentReadAction * This field defines the Amazon Textract API operation that Amazon Comprehend uses to extract text from PDF * files and image files. Enter one of the following values:

        *
          *
        • *

          * TEXTRACT_DETECT_DOCUMENT_TEXT - The Amazon Comprehend service uses the * DetectDocumentText API operation. *

          *
        • *
        • *

          * TEXTRACT_ANALYZE_DOCUMENT - The Amazon Comprehend service uses the * AnalyzeDocument API operation. *

          *
        • * @return Returns a reference to this object so that method calls can be chained together. * @see DocumentReadAction */ public DocumentReaderConfig withDocumentReadAction(DocumentReadAction documentReadAction) { this.documentReadAction = documentReadAction.toString(); return this; } /** *

          * Determines the text extraction actions for PDF files. Enter one of the following values: *

          *
            *
          • *

            * SERVICE_DEFAULT - use the Amazon Comprehend service defaults for PDF files. *

            *
          • *
          • *

            * FORCE_DOCUMENT_READ_ACTION - Amazon Comprehend uses the Textract API specified by DocumentReadAction * for all PDF files, including digital PDF files. *

            *
          • *
          * * @param documentReadMode * Determines the text extraction actions for PDF files. Enter one of the following values:

          *
            *
          • *

            * SERVICE_DEFAULT - use the Amazon Comprehend service defaults for PDF files. *

            *
          • *
          • *

            * FORCE_DOCUMENT_READ_ACTION - Amazon Comprehend uses the Textract API specified by * DocumentReadAction for all PDF files, including digital PDF files. *

            *
          • * @see DocumentReadMode */ public void setDocumentReadMode(String documentReadMode) { this.documentReadMode = documentReadMode; } /** *

            * Determines the text extraction actions for PDF files. Enter one of the following values: *

            *
              *
            • *

              * SERVICE_DEFAULT - use the Amazon Comprehend service defaults for PDF files. *

              *
            • *
            • *

              * FORCE_DOCUMENT_READ_ACTION - Amazon Comprehend uses the Textract API specified by DocumentReadAction * for all PDF files, including digital PDF files. *

              *
            • *
            * * @return Determines the text extraction actions for PDF files. Enter one of the following values:

            *
              *
            • *

              * SERVICE_DEFAULT - use the Amazon Comprehend service defaults for PDF files. *

              *
            • *
            • *

              * FORCE_DOCUMENT_READ_ACTION - Amazon Comprehend uses the Textract API specified by * DocumentReadAction for all PDF files, including digital PDF files. *

              *
            • * @see DocumentReadMode */ public String getDocumentReadMode() { return this.documentReadMode; } /** *

              * Determines the text extraction actions for PDF files. Enter one of the following values: *

              *
                *
              • *

                * SERVICE_DEFAULT - use the Amazon Comprehend service defaults for PDF files. *

                *
              • *
              • *

                * FORCE_DOCUMENT_READ_ACTION - Amazon Comprehend uses the Textract API specified by DocumentReadAction * for all PDF files, including digital PDF files. *

                *
              • *
              * * @param documentReadMode * Determines the text extraction actions for PDF files. Enter one of the following values:

              *
                *
              • *

                * SERVICE_DEFAULT - use the Amazon Comprehend service defaults for PDF files. *

                *
              • *
              • *

                * FORCE_DOCUMENT_READ_ACTION - Amazon Comprehend uses the Textract API specified by * DocumentReadAction for all PDF files, including digital PDF files. *

                *
              • * @return Returns a reference to this object so that method calls can be chained together. * @see DocumentReadMode */ public DocumentReaderConfig withDocumentReadMode(String documentReadMode) { setDocumentReadMode(documentReadMode); return this; } /** *

                * Determines the text extraction actions for PDF files. Enter one of the following values: *

                *
                  *
                • *

                  * SERVICE_DEFAULT - use the Amazon Comprehend service defaults for PDF files. *

                  *
                • *
                • *

                  * FORCE_DOCUMENT_READ_ACTION - Amazon Comprehend uses the Textract API specified by DocumentReadAction * for all PDF files, including digital PDF files. *

                  *
                • *
                * * @param documentReadMode * Determines the text extraction actions for PDF files. Enter one of the following values:

                *
                  *
                • *

                  * SERVICE_DEFAULT - use the Amazon Comprehend service defaults for PDF files. *

                  *
                • *
                • *

                  * FORCE_DOCUMENT_READ_ACTION - Amazon Comprehend uses the Textract API specified by * DocumentReadAction for all PDF files, including digital PDF files. *

                  *
                • * @return Returns a reference to this object so that method calls can be chained together. * @see DocumentReadMode */ public DocumentReaderConfig withDocumentReadMode(DocumentReadMode documentReadMode) { this.documentReadMode = documentReadMode.toString(); return this; } /** *

                  * Specifies the type of Amazon Textract features to apply. If you chose TEXTRACT_ANALYZE_DOCUMENT as * the read action, you must specify one or both of the following values: *

                  *
                    *
                  • *

                    * TABLES - Returns information about any tables that are detected in the input document. *

                    *
                  • *
                  • *

                    * FORMS - Returns information and the data from any forms that are detected in the input document. *

                    *
                  • *
                  * * @return Specifies the type of Amazon Textract features to apply. If you chose * TEXTRACT_ANALYZE_DOCUMENT as the read action, you must specify one or both of the following * values:

                  *
                    *
                  • *

                    * TABLES - Returns information about any tables that are detected in the input document. *

                    *
                  • *
                  • *

                    * FORMS - Returns information and the data from any forms that are detected in the input * document. *

                    *
                  • * @see DocumentReadFeatureTypes */ public java.util.List getFeatureTypes() { return featureTypes; } /** *

                    * Specifies the type of Amazon Textract features to apply. If you chose TEXTRACT_ANALYZE_DOCUMENT as * the read action, you must specify one or both of the following values: *

                    *
                      *
                    • *

                      * TABLES - Returns information about any tables that are detected in the input document. *

                      *
                    • *
                    • *

                      * FORMS - Returns information and the data from any forms that are detected in the input document. *

                      *
                    • *
                    * * @param featureTypes * Specifies the type of Amazon Textract features to apply. If you chose * TEXTRACT_ANALYZE_DOCUMENT as the read action, you must specify one or both of the following * values:

                    *
                      *
                    • *

                      * TABLES - Returns information about any tables that are detected in the input document. *

                      *
                    • *
                    • *

                      * FORMS - Returns information and the data from any forms that are detected in the input * document. *

                      *
                    • * @see DocumentReadFeatureTypes */ public void setFeatureTypes(java.util.Collection featureTypes) { if (featureTypes == null) { this.featureTypes = null; return; } this.featureTypes = new java.util.ArrayList(featureTypes); } /** *

                      * Specifies the type of Amazon Textract features to apply. If you chose TEXTRACT_ANALYZE_DOCUMENT as * the read action, you must specify one or both of the following values: *

                      *
                        *
                      • *

                        * TABLES - Returns information about any tables that are detected in the input document. *

                        *
                      • *
                      • *

                        * FORMS - Returns information and the data from any forms that are detected in the input document. *

                        *
                      • *
                      *

                      * NOTE: This method appends the values to the existing list (if any). Use * {@link #setFeatureTypes(java.util.Collection)} or {@link #withFeatureTypes(java.util.Collection)} if you want to * override the existing values. *

                      * * @param featureTypes * Specifies the type of Amazon Textract features to apply. If you chose * TEXTRACT_ANALYZE_DOCUMENT as the read action, you must specify one or both of the following * values:

                      *
                        *
                      • *

                        * TABLES - Returns information about any tables that are detected in the input document. *

                        *
                      • *
                      • *

                        * FORMS - Returns information and the data from any forms that are detected in the input * document. *

                        *
                      • * @return Returns a reference to this object so that method calls can be chained together. * @see DocumentReadFeatureTypes */ public DocumentReaderConfig withFeatureTypes(String... featureTypes) { if (this.featureTypes == null) { setFeatureTypes(new java.util.ArrayList(featureTypes.length)); } for (String ele : featureTypes) { this.featureTypes.add(ele); } return this; } /** *

                        * Specifies the type of Amazon Textract features to apply. If you chose TEXTRACT_ANALYZE_DOCUMENT as * the read action, you must specify one or both of the following values: *

                        *
                          *
                        • *

                          * TABLES - Returns information about any tables that are detected in the input document. *

                          *
                        • *
                        • *

                          * FORMS - Returns information and the data from any forms that are detected in the input document. *

                          *
                        • *
                        * * @param featureTypes * Specifies the type of Amazon Textract features to apply. If you chose * TEXTRACT_ANALYZE_DOCUMENT as the read action, you must specify one or both of the following * values:

                        *
                          *
                        • *

                          * TABLES - Returns information about any tables that are detected in the input document. *

                          *
                        • *
                        • *

                          * FORMS - Returns information and the data from any forms that are detected in the input * document. *

                          *
                        • * @return Returns a reference to this object so that method calls can be chained together. * @see DocumentReadFeatureTypes */ public DocumentReaderConfig withFeatureTypes(java.util.Collection featureTypes) { setFeatureTypes(featureTypes); return this; } /** *

                          * Specifies the type of Amazon Textract features to apply. If you chose TEXTRACT_ANALYZE_DOCUMENT as * the read action, you must specify one or both of the following values: *

                          *
                            *
                          • *

                            * TABLES - Returns information about any tables that are detected in the input document. *

                            *
                          • *
                          • *

                            * FORMS - Returns information and the data from any forms that are detected in the input document. *

                            *
                          • *
                          * * @param featureTypes * Specifies the type of Amazon Textract features to apply. If you chose * TEXTRACT_ANALYZE_DOCUMENT as the read action, you must specify one or both of the following * values:

                          *
                            *
                          • *

                            * TABLES - Returns information about any tables that are detected in the input document. *

                            *
                          • *
                          • *

                            * FORMS - Returns information and the data from any forms that are detected in the input * document. *

                            *
                          • * @return Returns a reference to this object so that method calls can be chained together. * @see DocumentReadFeatureTypes */ public DocumentReaderConfig withFeatureTypes(DocumentReadFeatureTypes... featureTypes) { java.util.ArrayList featureTypesCopy = new java.util.ArrayList(featureTypes.length); for (DocumentReadFeatureTypes value : featureTypes) { featureTypesCopy.add(value.toString()); } if (getFeatureTypes() == null) { setFeatureTypes(featureTypesCopy); } else { getFeatureTypes().addAll(featureTypesCopy); } return this; } /** * Returns a string representation of this object. This is useful for testing and debugging. Sensitive data will be * redacted from this string using a placeholder value. * * @return A string representation of this object. * * @see java.lang.Object#toString() */ @Override public String toString() { StringBuilder sb = new StringBuilder(); sb.append("{"); if (getDocumentReadAction() != null) sb.append("DocumentReadAction: ").append(getDocumentReadAction()).append(","); if (getDocumentReadMode() != null) sb.append("DocumentReadMode: ").append(getDocumentReadMode()).append(","); if (getFeatureTypes() != null) sb.append("FeatureTypes: ").append(getFeatureTypes()); sb.append("}"); return sb.toString(); } @Override public boolean equals(Object obj) { if (this == obj) return true; if (obj == null) return false; if (obj instanceof DocumentReaderConfig == false) return false; DocumentReaderConfig other = (DocumentReaderConfig) obj; if (other.getDocumentReadAction() == null ^ this.getDocumentReadAction() == null) return false; if (other.getDocumentReadAction() != null && other.getDocumentReadAction().equals(this.getDocumentReadAction()) == false) return false; if (other.getDocumentReadMode() == null ^ this.getDocumentReadMode() == null) return false; if (other.getDocumentReadMode() != null && other.getDocumentReadMode().equals(this.getDocumentReadMode()) == false) return false; if (other.getFeatureTypes() == null ^ this.getFeatureTypes() == null) return false; if (other.getFeatureTypes() != null && other.getFeatureTypes().equals(this.getFeatureTypes()) == false) return false; return true; } @Override public int hashCode() { final int prime = 31; int hashCode = 1; hashCode = prime * hashCode + ((getDocumentReadAction() == null) ? 0 : getDocumentReadAction().hashCode()); hashCode = prime * hashCode + ((getDocumentReadMode() == null) ? 0 : getDocumentReadMode().hashCode()); hashCode = prime * hashCode + ((getFeatureTypes() == null) ? 0 : getFeatureTypes().hashCode()); return hashCode; } @Override public DocumentReaderConfig clone() { try { return (DocumentReaderConfig) super.clone(); } catch (CloneNotSupportedException e) { throw new IllegalStateException("Got a CloneNotSupportedException from Object.clone() " + "even though we're Cloneable!", e); } } @com.amazonaws.annotation.SdkInternalApi @Override public void marshall(ProtocolMarshaller protocolMarshaller) { com.amazonaws.services.comprehend.model.transform.DocumentReaderConfigMarshaller.getInstance().marshall(this, protocolMarshaller); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy