com.amazonaws.services.comprehend.model.DocumentReaderConfig Maven / Gradle / Ivy
Show all versions of aws-java-sdk-comprehend Show documentation
/*
* Copyright 2018-2023 Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance with
* the License. A copy of the License is located at
*
* http://aws.amazon.com/apache2.0
*
* or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
* CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions
* and limitations under the License.
*/
package com.amazonaws.services.comprehend.model;
import java.io.Serializable;
import javax.annotation.Generated;
import com.amazonaws.protocol.StructuredPojo;
import com.amazonaws.protocol.ProtocolMarshaller;
/**
*
* Provides configuration parameters to override the default actions for extracting text from PDF documents and image
* files.
*
*
* By default, Amazon Comprehend performs the following actions to extract text from files, based on the input file
* type:
*
*
* -
*
* Word files - Amazon Comprehend parser extracts the text.
*
*
* -
*
* Digital PDF files - Amazon Comprehend parser extracts the text.
*
*
* -
*
* Image files and scanned PDF files - Amazon Comprehend uses the Amazon Textract DetectDocumentText
* API to extract the text.
*
*
*
*
* DocumentReaderConfig
does not apply to plain text files or Word files.
*
*
* For image files and PDF documents, you can override these default actions using the fields listed below. For more
* information, see Setting
* text extraction options in the Comprehend Developer Guide.
*
*
* @see AWS
* API Documentation
*/
@Generated("com.amazonaws:aws-java-sdk-code-generator")
public class DocumentReaderConfig implements Serializable, Cloneable, StructuredPojo {
/**
*
* This field defines the Amazon Textract API operation that Amazon Comprehend uses to extract text from PDF files
* and image files. Enter one of the following values:
*
*
* -
*
* TEXTRACT_DETECT_DOCUMENT_TEXT
- The Amazon Comprehend service uses the
* DetectDocumentText
API operation.
*
*
* -
*
* TEXTRACT_ANALYZE_DOCUMENT
- The Amazon Comprehend service uses the AnalyzeDocument
API
* operation.
*
*
*
*/
private String documentReadAction;
/**
*
* Determines the text extraction actions for PDF files. Enter one of the following values:
*
*
* -
*
* SERVICE_DEFAULT
- use the Amazon Comprehend service defaults for PDF files.
*
*
* -
*
* FORCE_DOCUMENT_READ_ACTION
- Amazon Comprehend uses the Textract API specified by DocumentReadAction
* for all PDF files, including digital PDF files.
*
*
*
*/
private String documentReadMode;
/**
*
* Specifies the type of Amazon Textract features to apply. If you chose TEXTRACT_ANALYZE_DOCUMENT
as
* the read action, you must specify one or both of the following values:
*
*
* -
*
* TABLES
- Returns information about any tables that are detected in the input document.
*
*
* -
*
* FORMS
- Returns information and the data from any forms that are detected in the input document.
*
*
*
*/
private java.util.List featureTypes;
/**
*
* This field defines the Amazon Textract API operation that Amazon Comprehend uses to extract text from PDF files
* and image files. Enter one of the following values:
*
*
* -
*
* TEXTRACT_DETECT_DOCUMENT_TEXT
- The Amazon Comprehend service uses the
* DetectDocumentText
API operation.
*
*
* -
*
* TEXTRACT_ANALYZE_DOCUMENT
- The Amazon Comprehend service uses the AnalyzeDocument
API
* operation.
*
*
*
*
* @param documentReadAction
* This field defines the Amazon Textract API operation that Amazon Comprehend uses to extract text from PDF
* files and image files. Enter one of the following values:
*
* -
*
* TEXTRACT_DETECT_DOCUMENT_TEXT
- The Amazon Comprehend service uses the
* DetectDocumentText
API operation.
*
*
* -
*
* TEXTRACT_ANALYZE_DOCUMENT
- The Amazon Comprehend service uses the
* AnalyzeDocument
API operation.
*
*
* @see DocumentReadAction
*/
public void setDocumentReadAction(String documentReadAction) {
this.documentReadAction = documentReadAction;
}
/**
*
* This field defines the Amazon Textract API operation that Amazon Comprehend uses to extract text from PDF files
* and image files. Enter one of the following values:
*
*
* -
*
* TEXTRACT_DETECT_DOCUMENT_TEXT
- The Amazon Comprehend service uses the
* DetectDocumentText
API operation.
*
*
* -
*
* TEXTRACT_ANALYZE_DOCUMENT
- The Amazon Comprehend service uses the AnalyzeDocument
API
* operation.
*
*
*
*
* @return This field defines the Amazon Textract API operation that Amazon Comprehend uses to extract text from PDF
* files and image files. Enter one of the following values:
*
* -
*
* TEXTRACT_DETECT_DOCUMENT_TEXT
- The Amazon Comprehend service uses the
* DetectDocumentText
API operation.
*
*
* -
*
* TEXTRACT_ANALYZE_DOCUMENT
- The Amazon Comprehend service uses the
* AnalyzeDocument
API operation.
*
*
* @see DocumentReadAction
*/
public String getDocumentReadAction() {
return this.documentReadAction;
}
/**
*
* This field defines the Amazon Textract API operation that Amazon Comprehend uses to extract text from PDF files
* and image files. Enter one of the following values:
*
*
* -
*
* TEXTRACT_DETECT_DOCUMENT_TEXT
- The Amazon Comprehend service uses the
* DetectDocumentText
API operation.
*
*
* -
*
* TEXTRACT_ANALYZE_DOCUMENT
- The Amazon Comprehend service uses the AnalyzeDocument
API
* operation.
*
*
*
*
* @param documentReadAction
* This field defines the Amazon Textract API operation that Amazon Comprehend uses to extract text from PDF
* files and image files. Enter one of the following values:
*
* -
*
* TEXTRACT_DETECT_DOCUMENT_TEXT
- The Amazon Comprehend service uses the
* DetectDocumentText
API operation.
*
*
* -
*
* TEXTRACT_ANALYZE_DOCUMENT
- The Amazon Comprehend service uses the
* AnalyzeDocument
API operation.
*
*
* @return Returns a reference to this object so that method calls can be chained together.
* @see DocumentReadAction
*/
public DocumentReaderConfig withDocumentReadAction(String documentReadAction) {
setDocumentReadAction(documentReadAction);
return this;
}
/**
*
* This field defines the Amazon Textract API operation that Amazon Comprehend uses to extract text from PDF files
* and image files. Enter one of the following values:
*
*
* -
*
* TEXTRACT_DETECT_DOCUMENT_TEXT
- The Amazon Comprehend service uses the
* DetectDocumentText
API operation.
*
*
* -
*
* TEXTRACT_ANALYZE_DOCUMENT
- The Amazon Comprehend service uses the AnalyzeDocument
API
* operation.
*
*
*
*
* @param documentReadAction
* This field defines the Amazon Textract API operation that Amazon Comprehend uses to extract text from PDF
* files and image files. Enter one of the following values:
*
* -
*
* TEXTRACT_DETECT_DOCUMENT_TEXT
- The Amazon Comprehend service uses the
* DetectDocumentText
API operation.
*
*
* -
*
* TEXTRACT_ANALYZE_DOCUMENT
- The Amazon Comprehend service uses the
* AnalyzeDocument
API operation.
*
*
* @return Returns a reference to this object so that method calls can be chained together.
* @see DocumentReadAction
*/
public DocumentReaderConfig withDocumentReadAction(DocumentReadAction documentReadAction) {
this.documentReadAction = documentReadAction.toString();
return this;
}
/**
*
* Determines the text extraction actions for PDF files. Enter one of the following values:
*
*
* -
*
* SERVICE_DEFAULT
- use the Amazon Comprehend service defaults for PDF files.
*
*
* -
*
* FORCE_DOCUMENT_READ_ACTION
- Amazon Comprehend uses the Textract API specified by DocumentReadAction
* for all PDF files, including digital PDF files.
*
*
*
*
* @param documentReadMode
* Determines the text extraction actions for PDF files. Enter one of the following values:
*
* -
*
* SERVICE_DEFAULT
- use the Amazon Comprehend service defaults for PDF files.
*
*
* -
*
* FORCE_DOCUMENT_READ_ACTION
- Amazon Comprehend uses the Textract API specified by
* DocumentReadAction for all PDF files, including digital PDF files.
*
*
* @see DocumentReadMode
*/
public void setDocumentReadMode(String documentReadMode) {
this.documentReadMode = documentReadMode;
}
/**
*
* Determines the text extraction actions for PDF files. Enter one of the following values:
*
*
* -
*
* SERVICE_DEFAULT
- use the Amazon Comprehend service defaults for PDF files.
*
*
* -
*
* FORCE_DOCUMENT_READ_ACTION
- Amazon Comprehend uses the Textract API specified by DocumentReadAction
* for all PDF files, including digital PDF files.
*
*
*
*
* @return Determines the text extraction actions for PDF files. Enter one of the following values:
*
* -
*
* SERVICE_DEFAULT
- use the Amazon Comprehend service defaults for PDF files.
*
*
* -
*
* FORCE_DOCUMENT_READ_ACTION
- Amazon Comprehend uses the Textract API specified by
* DocumentReadAction for all PDF files, including digital PDF files.
*
*
* @see DocumentReadMode
*/
public String getDocumentReadMode() {
return this.documentReadMode;
}
/**
*
* Determines the text extraction actions for PDF files. Enter one of the following values:
*
*
* -
*
* SERVICE_DEFAULT
- use the Amazon Comprehend service defaults for PDF files.
*
*
* -
*
* FORCE_DOCUMENT_READ_ACTION
- Amazon Comprehend uses the Textract API specified by DocumentReadAction
* for all PDF files, including digital PDF files.
*
*
*
*
* @param documentReadMode
* Determines the text extraction actions for PDF files. Enter one of the following values:
*
* -
*
* SERVICE_DEFAULT
- use the Amazon Comprehend service defaults for PDF files.
*
*
* -
*
* FORCE_DOCUMENT_READ_ACTION
- Amazon Comprehend uses the Textract API specified by
* DocumentReadAction for all PDF files, including digital PDF files.
*
*
* @return Returns a reference to this object so that method calls can be chained together.
* @see DocumentReadMode
*/
public DocumentReaderConfig withDocumentReadMode(String documentReadMode) {
setDocumentReadMode(documentReadMode);
return this;
}
/**
*
* Determines the text extraction actions for PDF files. Enter one of the following values:
*
*
* -
*
* SERVICE_DEFAULT
- use the Amazon Comprehend service defaults for PDF files.
*
*
* -
*
* FORCE_DOCUMENT_READ_ACTION
- Amazon Comprehend uses the Textract API specified by DocumentReadAction
* for all PDF files, including digital PDF files.
*
*
*
*
* @param documentReadMode
* Determines the text extraction actions for PDF files. Enter one of the following values:
*
* -
*
* SERVICE_DEFAULT
- use the Amazon Comprehend service defaults for PDF files.
*
*
* -
*
* FORCE_DOCUMENT_READ_ACTION
- Amazon Comprehend uses the Textract API specified by
* DocumentReadAction for all PDF files, including digital PDF files.
*
*
* @return Returns a reference to this object so that method calls can be chained together.
* @see DocumentReadMode
*/
public DocumentReaderConfig withDocumentReadMode(DocumentReadMode documentReadMode) {
this.documentReadMode = documentReadMode.toString();
return this;
}
/**
*
* Specifies the type of Amazon Textract features to apply. If you chose TEXTRACT_ANALYZE_DOCUMENT
as
* the read action, you must specify one or both of the following values:
*
*
* -
*
* TABLES
- Returns information about any tables that are detected in the input document.
*
*
* -
*
* FORMS
- Returns information and the data from any forms that are detected in the input document.
*
*
*
*
* @return Specifies the type of Amazon Textract features to apply. If you chose
* TEXTRACT_ANALYZE_DOCUMENT
as the read action, you must specify one or both of the following
* values:
*
* -
*
* TABLES
- Returns information about any tables that are detected in the input document.
*
*
* -
*
* FORMS
- Returns information and the data from any forms that are detected in the input
* document.
*
*
* @see DocumentReadFeatureTypes
*/
public java.util.List getFeatureTypes() {
return featureTypes;
}
/**
*
* Specifies the type of Amazon Textract features to apply. If you chose TEXTRACT_ANALYZE_DOCUMENT
as
* the read action, you must specify one or both of the following values:
*
*
* -
*
* TABLES
- Returns information about any tables that are detected in the input document.
*
*
* -
*
* FORMS
- Returns information and the data from any forms that are detected in the input document.
*
*
*
*
* @param featureTypes
* Specifies the type of Amazon Textract features to apply. If you chose
* TEXTRACT_ANALYZE_DOCUMENT
as the read action, you must specify one or both of the following
* values:
*
* -
*
* TABLES
- Returns information about any tables that are detected in the input document.
*
*
* -
*
* FORMS
- Returns information and the data from any forms that are detected in the input
* document.
*
*
* @see DocumentReadFeatureTypes
*/
public void setFeatureTypes(java.util.Collection featureTypes) {
if (featureTypes == null) {
this.featureTypes = null;
return;
}
this.featureTypes = new java.util.ArrayList(featureTypes);
}
/**
*
* Specifies the type of Amazon Textract features to apply. If you chose TEXTRACT_ANALYZE_DOCUMENT
as
* the read action, you must specify one or both of the following values:
*
*
* -
*
* TABLES
- Returns information about any tables that are detected in the input document.
*
*
* -
*
* FORMS
- Returns information and the data from any forms that are detected in the input document.
*
*
*
*
* NOTE: This method appends the values to the existing list (if any). Use
* {@link #setFeatureTypes(java.util.Collection)} or {@link #withFeatureTypes(java.util.Collection)} if you want to
* override the existing values.
*
*
* @param featureTypes
* Specifies the type of Amazon Textract features to apply. If you chose
* TEXTRACT_ANALYZE_DOCUMENT
as the read action, you must specify one or both of the following
* values:
*
* -
*
* TABLES
- Returns information about any tables that are detected in the input document.
*
*
* -
*
* FORMS
- Returns information and the data from any forms that are detected in the input
* document.
*
*
* @return Returns a reference to this object so that method calls can be chained together.
* @see DocumentReadFeatureTypes
*/
public DocumentReaderConfig withFeatureTypes(String... featureTypes) {
if (this.featureTypes == null) {
setFeatureTypes(new java.util.ArrayList(featureTypes.length));
}
for (String ele : featureTypes) {
this.featureTypes.add(ele);
}
return this;
}
/**
*
* Specifies the type of Amazon Textract features to apply. If you chose TEXTRACT_ANALYZE_DOCUMENT
as
* the read action, you must specify one or both of the following values:
*
*
* -
*
* TABLES
- Returns information about any tables that are detected in the input document.
*
*
* -
*
* FORMS
- Returns information and the data from any forms that are detected in the input document.
*
*
*
*
* @param featureTypes
* Specifies the type of Amazon Textract features to apply. If you chose
* TEXTRACT_ANALYZE_DOCUMENT
as the read action, you must specify one or both of the following
* values:
*
* -
*
* TABLES
- Returns information about any tables that are detected in the input document.
*
*
* -
*
* FORMS
- Returns information and the data from any forms that are detected in the input
* document.
*
*
* @return Returns a reference to this object so that method calls can be chained together.
* @see DocumentReadFeatureTypes
*/
public DocumentReaderConfig withFeatureTypes(java.util.Collection featureTypes) {
setFeatureTypes(featureTypes);
return this;
}
/**
*
* Specifies the type of Amazon Textract features to apply. If you chose TEXTRACT_ANALYZE_DOCUMENT
as
* the read action, you must specify one or both of the following values:
*
*
* -
*
* TABLES
- Returns information about any tables that are detected in the input document.
*
*
* -
*
* FORMS
- Returns information and the data from any forms that are detected in the input document.
*
*
*
*
* @param featureTypes
* Specifies the type of Amazon Textract features to apply. If you chose
* TEXTRACT_ANALYZE_DOCUMENT
as the read action, you must specify one or both of the following
* values:
*
* -
*
* TABLES
- Returns information about any tables that are detected in the input document.
*
*
* -
*
* FORMS
- Returns information and the data from any forms that are detected in the input
* document.
*
*
* @return Returns a reference to this object so that method calls can be chained together.
* @see DocumentReadFeatureTypes
*/
public DocumentReaderConfig withFeatureTypes(DocumentReadFeatureTypes... featureTypes) {
java.util.ArrayList featureTypesCopy = new java.util.ArrayList(featureTypes.length);
for (DocumentReadFeatureTypes value : featureTypes) {
featureTypesCopy.add(value.toString());
}
if (getFeatureTypes() == null) {
setFeatureTypes(featureTypesCopy);
} else {
getFeatureTypes().addAll(featureTypesCopy);
}
return this;
}
/**
* Returns a string representation of this object. This is useful for testing and debugging. Sensitive data will be
* redacted from this string using a placeholder value.
*
* @return A string representation of this object.
*
* @see java.lang.Object#toString()
*/
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append("{");
if (getDocumentReadAction() != null)
sb.append("DocumentReadAction: ").append(getDocumentReadAction()).append(",");
if (getDocumentReadMode() != null)
sb.append("DocumentReadMode: ").append(getDocumentReadMode()).append(",");
if (getFeatureTypes() != null)
sb.append("FeatureTypes: ").append(getFeatureTypes());
sb.append("}");
return sb.toString();
}
@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (obj instanceof DocumentReaderConfig == false)
return false;
DocumentReaderConfig other = (DocumentReaderConfig) obj;
if (other.getDocumentReadAction() == null ^ this.getDocumentReadAction() == null)
return false;
if (other.getDocumentReadAction() != null && other.getDocumentReadAction().equals(this.getDocumentReadAction()) == false)
return false;
if (other.getDocumentReadMode() == null ^ this.getDocumentReadMode() == null)
return false;
if (other.getDocumentReadMode() != null && other.getDocumentReadMode().equals(this.getDocumentReadMode()) == false)
return false;
if (other.getFeatureTypes() == null ^ this.getFeatureTypes() == null)
return false;
if (other.getFeatureTypes() != null && other.getFeatureTypes().equals(this.getFeatureTypes()) == false)
return false;
return true;
}
@Override
public int hashCode() {
final int prime = 31;
int hashCode = 1;
hashCode = prime * hashCode + ((getDocumentReadAction() == null) ? 0 : getDocumentReadAction().hashCode());
hashCode = prime * hashCode + ((getDocumentReadMode() == null) ? 0 : getDocumentReadMode().hashCode());
hashCode = prime * hashCode + ((getFeatureTypes() == null) ? 0 : getFeatureTypes().hashCode());
return hashCode;
}
@Override
public DocumentReaderConfig clone() {
try {
return (DocumentReaderConfig) super.clone();
} catch (CloneNotSupportedException e) {
throw new IllegalStateException("Got a CloneNotSupportedException from Object.clone() " + "even though we're Cloneable!", e);
}
}
@com.amazonaws.annotation.SdkInternalApi
@Override
public void marshall(ProtocolMarshaller protocolMarshaller) {
com.amazonaws.services.comprehend.model.transform.DocumentReaderConfigMarshaller.getInstance().marshall(this, protocolMarshaller);
}
}