google.cloud.documentai.v1beta3.document_service.proto Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of proto-google-cloud-document-ai-v1beta3 Show documentation
Show all versions of proto-google-cloud-document-ai-v1beta3 Show documentation
PROTO library for proto-google-cloud-document-ai-v1beta3
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package google.cloud.documentai.v1beta3;
import "google/api/annotations.proto";
import "google/api/client.proto";
import "google/api/field_behavior.proto";
import "google/api/resource.proto";
import "google/cloud/documentai/v1beta3/dataset.proto";
import "google/cloud/documentai/v1beta3/document.proto";
import "google/cloud/documentai/v1beta3/document_io.proto";
import "google/cloud/documentai/v1beta3/operation_metadata.proto";
import "google/longrunning/operations.proto";
import "google/protobuf/field_mask.proto";
import "google/rpc/status.proto";
option csharp_namespace = "Google.Cloud.DocumentAI.V1Beta3";
option go_package = "cloud.google.com/go/documentai/apiv1beta3/documentaipb;documentaipb";
option java_multiple_files = true;
option java_outer_classname = "DocumentAiDocumentService";
option java_package = "com.google.cloud.documentai.v1beta3";
option php_namespace = "Google\\Cloud\\DocumentAI\\V1beta3";
option ruby_package = "Google::Cloud::DocumentAI::V1beta3";
// Service to call Cloud DocumentAI to manage document collection (dataset).
service DocumentService {
option (google.api.default_host) = "documentai.googleapis.com";
option (google.api.oauth_scopes) =
"https://www.googleapis.com/auth/cloud-platform";
// Updates metadata associated with a dataset.
rpc UpdateDataset(UpdateDatasetRequest)
returns (google.longrunning.Operation) {
option (google.api.http) = {
patch: "/v1beta3/{dataset.name=projects/*/locations/*/processors/*/dataset}"
body: "dataset"
};
option (google.api.method_signature) = "dataset,update_mask";
option (google.longrunning.operation_info) = {
response_type: "Dataset"
metadata_type: "UpdateDatasetOperationMetadata"
};
}
// Import documents into a dataset.
rpc ImportDocuments(ImportDocumentsRequest)
returns (google.longrunning.Operation) {
option (google.api.http) = {
post: "/v1beta3/{dataset=projects/*/locations/*/processors/*/dataset}:importDocuments"
body: "*"
};
option (google.api.method_signature) = "dataset";
option (google.longrunning.operation_info) = {
response_type: "ImportDocumentsResponse"
metadata_type: "ImportDocumentsMetadata"
};
}
// Returns relevant fields present in the requested document.
rpc GetDocument(GetDocumentRequest) returns (GetDocumentResponse) {
option (google.api.http) = {
get: "/v1beta3/{dataset=projects/*/locations/*/processors/*/dataset}:getDocument"
};
option (google.api.method_signature) = "dataset";
}
// Returns a list of documents present in the dataset.
rpc ListDocuments(ListDocumentsRequest) returns (ListDocumentsResponse) {
option (google.api.http) = {
post: "/v1beta3/{dataset=projects/*/locations/*/processors/*/dataset}:listDocuments"
body: "*"
};
option (google.api.method_signature) = "dataset";
}
// Deletes a set of documents.
rpc BatchDeleteDocuments(BatchDeleteDocumentsRequest)
returns (google.longrunning.Operation) {
option (google.api.http) = {
post: "/v1beta3/{dataset=projects/*/locations/*/processors/*/dataset}:batchDeleteDocuments"
body: "*"
};
option (google.api.method_signature) = "dataset";
option (google.longrunning.operation_info) = {
response_type: "BatchDeleteDocumentsResponse"
metadata_type: "BatchDeleteDocumentsMetadata"
};
}
// Gets the `DatasetSchema` of a `Dataset`.
rpc GetDatasetSchema(GetDatasetSchemaRequest) returns (DatasetSchema) {
option (google.api.http) = {
get: "/v1beta3/{name=projects/*/locations/*/processors/*/dataset/datasetSchema}"
};
option (google.api.method_signature) = "name";
}
// Updates a `DatasetSchema`.
rpc UpdateDatasetSchema(UpdateDatasetSchemaRequest) returns (DatasetSchema) {
option (google.api.http) = {
patch: "/v1beta3/{dataset_schema.name=projects/*/locations/*/processors/*/dataset/datasetSchema}"
body: "dataset_schema"
};
option (google.api.method_signature) = "dataset_schema,update_mask";
}
}
// Documents belonging to a dataset will be split into different groups
// referred to as splits: train, test.
enum DatasetSplitType {
// Default value if the enum is not set.
DATASET_SPLIT_TYPE_UNSPECIFIED = 0;
// Identifies the train documents.
DATASET_SPLIT_TRAIN = 1;
// Identifies the test documents.
DATASET_SPLIT_TEST = 2;
// Identifies the unassigned documents.
DATASET_SPLIT_UNASSIGNED = 3;
}
// Describes the labeling status of a document.
enum DocumentLabelingState {
// Default value if the enum is not set.
DOCUMENT_LABELING_STATE_UNSPECIFIED = 0;
// Document has been labeled.
DOCUMENT_LABELED = 1;
// Document has not been labeled.
DOCUMENT_UNLABELED = 2;
// Document has been auto-labeled.
DOCUMENT_AUTO_LABELED = 3;
}
message UpdateDatasetRequest {
// Required. The `name` field of the `Dataset` is used to identify the
// resource to be updated.
Dataset dataset = 1 [(google.api.field_behavior) = REQUIRED];
// The update mask applies to the resource.
google.protobuf.FieldMask update_mask = 2;
}
message UpdateDatasetOperationMetadata {
// The basic metadata of the long-running operation.
CommonOperationMetadata common_metadata = 1;
}
message ImportDocumentsRequest {
// Config for importing documents.
// Each batch can have its own dataset split type.
message BatchDocumentsImportConfig {
// The config for auto-split.
message AutoSplitConfig {
// Ratio of training dataset split.
float training_split_ratio = 1;
}
oneof split_type_config {
// Target dataset split where the documents must be stored.
DatasetSplitType dataset_split = 2;
// If set, documents will be automatically split into training and test
// split category with the specified ratio.
AutoSplitConfig auto_split_config = 3;
}
// The common config to specify a set of documents used as input.
BatchDocumentsInputConfig batch_input_config = 1;
}
// Required. The dataset resource name.
// Format:
// projects/{project}/locations/{location}/processors/{processor}/dataset
string dataset = 1 [
(google.api.field_behavior) = REQUIRED,
(google.api.resource_reference) = {
type: "documentai.googleapis.com/Dataset"
}
];
// Required. The Cloud Storage uri containing raw documents that must be
// imported.
repeated BatchDocumentsImportConfig batch_documents_import_configs = 4
[(google.api.field_behavior) = REQUIRED];
}
// Response of the import document operation.
message ImportDocumentsResponse {}
// Metadata of the import document operation.
message ImportDocumentsMetadata {
// The status of each individual document in the import process.
message IndividualImportStatus {
// The source Cloud Storage URI of the document.
string input_gcs_source = 1;
// The status of the importing of the document.
google.rpc.Status status = 2;
// The document id of imported document if it was successful, otherwise
// empty.
DocumentId output_document_id = 4;
}
// The validation status of each import config. Status is set to an error if
// there are no documents to import in the `import_config`, or `OK` if the
// operation will try to proceed with at least one document.
message ImportConfigValidationResult {
// The source Cloud Storage URI specified in the import config.
string input_gcs_source = 1;
// The validation status of import config.
google.rpc.Status status = 2;
}
// The basic metadata of the long-running operation.
CommonOperationMetadata common_metadata = 1;
// The list of response details of each document.
repeated IndividualImportStatus individual_import_statuses = 2;
// Validation statuses of the batch documents import config.
repeated ImportConfigValidationResult import_config_validation_results = 4;
// Total number of the documents that are qualified for importing.
int32 total_document_count = 3;
}
message GetDocumentRequest {
// Required. The resource name of the dataset that the document belongs to .
// Format:
// projects/{project}/locations/{location}/processors/{processor}/dataset
string dataset = 1 [
(google.api.field_behavior) = REQUIRED,
(google.api.resource_reference) = {
type: "documentai.googleapis.com/Dataset"
}
];
// Required. Document identifier.
DocumentId document_id = 2 [(google.api.field_behavior) = REQUIRED];
// If set, only fields listed here will be returned. Otherwise, all fields
// will be returned by default.
google.protobuf.FieldMask read_mask = 3;
// List of pages for which the fields specified in the `read_mask` must
// be served.
DocumentPageRange page_range = 4;
}
message GetDocumentResponse {
Document document = 1;
}
message ListDocumentsRequest {
// Required. The resource name of the dataset to be listed.
// Format:
// projects/{project}/locations/{location}/processors/{processor}/dataset
string dataset = 1 [
(google.api.field_behavior) = REQUIRED,
(google.api.resource_reference) = {
type: "documentai.googleapis.com/Dataset"
}
];
// The maximum number of documents to return. The service may return
// fewer than this value.
// If unspecified, at most 20 documents will be returned.
// The maximum value is 100; values above 100 will be coerced to 100.
int32 page_size = 2;
// A page token, received from a previous `ListDocuments` call.
// Provide this to retrieve the subsequent page.
//
// When paginating, all other parameters provided to `ListDocuments`
// must match the call that provided the page token.
string page_token = 3;
// Optional. Query to filter the documents based on
// https://google.aip.dev/160.
// ## Currently support query strings are:
//
// `SplitType=DATASET_SPLIT_TEST|DATASET_SPLIT_TRAIN|DATASET_SPLIT_UNASSIGNED`
// - `LabelingState=DOCUMENT_LABELED|DOCUMENT_UNLABELED|DOCUMENT_AUTO_LABELED`
// - `DisplayName=\"file_name.pdf\"`
// - `EntityType=abc/def`
// - `TagName=\"auto-labeling-running\"|\"sampled\"`
//
// Note:
// - Only `AND`, `=` and `!=` are supported.
// e.g. `DisplayName=file_name AND EntityType!=abc` IS supported.
// - Wildcard `*` is supported only in `DisplayName` filter
// - No duplicate filter keys are allowed,
// e.g. `EntityType=a AND EntityType=b` is NOT supported.
// - String match is case sensitive (for filter `DisplayName` & `EntityType`).
string filter = 4 [(google.api.field_behavior) = OPTIONAL];
// Optional. Controls if the request requires a total size of matched
// documents. See
// [ListDocumentsResponse.total_size][google.cloud.documentai.v1beta3.ListDocumentsResponse.total_size].
//
// Enabling this flag may adversely impact performance.
//
// Defaults to false.
bool return_total_size = 6 [(google.api.field_behavior) = OPTIONAL];
// Optional. Number of results to skip beginning from the `page_token` if
// provided. https://google.aip.dev/158#skipping-results. It must be a
// non-negative integer. Negative values will be rejected. Note that this is
// not the number of pages to skip. If this value causes the cursor to move
// past the end of results,
// [ListDocumentsResponse.document_metadata][google.cloud.documentai.v1beta3.ListDocumentsResponse.document_metadata]
// and
// [ListDocumentsResponse.next_page_token][google.cloud.documentai.v1beta3.ListDocumentsResponse.next_page_token]
// will be empty.
int32 skip = 8 [(google.api.field_behavior) = OPTIONAL];
}
message ListDocumentsResponse {
// Document metadata corresponding to the listed documents.
repeated DocumentMetadata document_metadata = 1;
// A token, which can be sent as
// [ListDocumentsRequest.page_token][google.cloud.documentai.v1beta3.ListDocumentsRequest.page_token]
// to retrieve the next page. If this field is omitted, there are no
// subsequent pages.
string next_page_token = 2;
// Total count of documents queried.
int32 total_size = 3;
}
message BatchDeleteDocumentsRequest {
// Required. The dataset resource name.
// Format:
// projects/{project}/locations/{location}/processors/{processor}/dataset
string dataset = 1 [(google.api.field_behavior) = REQUIRED];
// Required. Dataset documents input. If given `filter`, all documents
// satisfying the filter will be deleted. If given documentIds, a maximum of
// 50 documents can be deleted in a batch. The request will be rejected if
// more than 50 document_ids are provided.
BatchDatasetDocuments dataset_documents = 3
[(google.api.field_behavior) = REQUIRED];
}
// Response of the delete documents operation.
message BatchDeleteDocumentsResponse {}
message BatchDeleteDocumentsMetadata {
// The status of each individual document in the batch delete process.
message IndividualBatchDeleteStatus {
// The document id of the document.
DocumentId document_id = 1;
// The status of deleting the document in storage.
google.rpc.Status status = 2;
}
// The basic metadata of the long-running operation.
CommonOperationMetadata common_metadata = 1;
// The list of response details of each document.
repeated IndividualBatchDeleteStatus individual_batch_delete_statuses = 2;
// Total number of documents deleting from dataset.
int32 total_document_count = 3;
// Total number of documents that failed to be deleted in storage.
int32 error_document_count = 4;
}
// Request for `GetDatasetSchema`.
message GetDatasetSchemaRequest {
// Required. The dataset schema resource name.
// Format:
// projects/{project}/locations/{location}/processors/{processor}/dataset/datasetSchema
string name = 1 [
(google.api.field_behavior) = REQUIRED,
(google.api.resource_reference) = {
type: "documentai.googleapis.com/DatasetSchema"
}
];
// If set, only returns the visible fields of the schema.
bool visible_fields_only = 2;
}
// Request for `UpdateDatasetSchema`.
message UpdateDatasetSchemaRequest {
// Required. The name field of the `DatasetSchema` is used to identify the
// resource to be updated.
DatasetSchema dataset_schema = 1 [(google.api.field_behavior) = REQUIRED];
// The update mask applies to the resource.
google.protobuf.FieldMask update_mask = 2;
}
// Range of pages present in a document.
message DocumentPageRange {
// First page number (one-based index) to be returned.
int32 start = 1;
// Last page number (one-based index) to be returned.
int32 end = 2;
}
// Metadata about a document.
message DocumentMetadata {
// Document identifier.
DocumentId document_id = 1;
// Number of pages in the document.
int32 page_count = 2;
// Type of the dataset split to which the document belongs.
DatasetSplitType dataset_type = 3;
// Labeling state of the document.
DocumentLabelingState labeling_state = 5;
// The display name of the document.
string display_name = 6;
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy