google.cloud.documentai.v1beta3.document_schema.proto Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of proto-google-cloud-document-ai-v1beta3 Show documentation
Show all versions of proto-google-cloud-document-ai-v1beta3 Show documentation
PROTO library for proto-google-cloud-document-ai-v1beta3
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package google.cloud.documentai.v1beta3;
option csharp_namespace = "Google.Cloud.DocumentAI.V1Beta3";
option go_package = "cloud.google.com/go/documentai/apiv1beta3/documentaipb;documentaipb";
option java_multiple_files = true;
option java_outer_classname = "DocumentAiDocumentSchema";
option java_package = "com.google.cloud.documentai.v1beta3";
option php_namespace = "Google\\Cloud\\DocumentAI\\V1beta3";
option ruby_package = "Google::Cloud::DocumentAI::V1beta3";
// Metadata for document summarization.
message SummaryOptions {
// The Length enum.
enum Length {
// Default.
LENGTH_UNSPECIFIED = 0;
// A brief summary of one or two sentences.
BRIEF = 1;
// A paragraph-length summary.
MODERATE = 2;
// The longest option available.
COMPREHENSIVE = 3;
}
// The Format enum.
enum Format {
// Default.
FORMAT_UNSPECIFIED = 0;
// Format the output in paragraphs.
PARAGRAPH = 1;
// Format the output in bullets.
BULLETS = 2;
}
// How long the summary should be.
Length length = 1;
// The format the summary should be in.
Format format = 2;
}
// Metadata for how this field value is extracted.
message FieldExtractionMetadata {
// Summary options config.
SummaryOptions summary_options = 2;
}
// Metadata about a property.
message PropertyMetadata {
// Whether the property should be considered as "inactive".
bool inactive = 3;
// Field extraction metadata on the property.
FieldExtractionMetadata field_extraction_metadata = 9;
}
// Metadata about an entity type.
message EntityTypeMetadata {
// Whether the entity type should be considered inactive.
bool inactive = 5;
}
// The schema defines the output of the processed document by a processor.
message DocumentSchema {
// EntityType is the wrapper of a label of the corresponding model with
// detailed attributes and limitations for entity-based processors. Multiple
// types can also compose a dependency tree to represent nested types.
message EntityType {
// Defines the a list of enum values.
message EnumValues {
// The individual values that this enum values type can include.
repeated string values = 1;
}
// Defines properties that can be part of the entity type.
message Property {
// Types of occurrences of the entity type in the document. This
// represents the number of instances, not mentions, of an entity.
// For example, a bank statement might only have one
// `account_number`, but this account number can be mentioned in several
// places on the document. In this case, the `account_number` is
// considered a `REQUIRED_ONCE` entity type. If, on the other hand, we
// expect a bank statement to contain the status of multiple different
// accounts for the customers, the occurrence type is set to
// `REQUIRED_MULTIPLE`.
enum OccurrenceType {
// Unspecified occurrence type.
OCCURRENCE_TYPE_UNSPECIFIED = 0;
// There will be zero or one instance of this entity type. The same
// entity instance may be mentioned multiple times.
OPTIONAL_ONCE = 1;
// The entity type will appear zero or multiple times.
OPTIONAL_MULTIPLE = 2;
// The entity type will only appear exactly once. The same
// entity instance may be mentioned multiple times.
REQUIRED_ONCE = 3;
// The entity type will appear once or more times.
REQUIRED_MULTIPLE = 4;
}
// The name of the property. Follows the same guidelines as the
// EntityType name.
string name = 1;
// User defined name for the property.
string display_name = 6;
// A reference to the value type of the property. This type is subject
// to the same conventions as the `Entity.base_types` field.
string value_type = 2;
// Occurrence type limits the number of instances an entity type appears
// in the document.
OccurrenceType occurrence_type = 3;
// Any additional metadata about the property can be added here.
PropertyMetadata property_metadata = 5;
}
oneof value_source {
// If specified, lists all the possible values for this entity. This
// should not be more than a handful of values. If the number of values
// is >10 or could change frequently use the `EntityType.value_ontology`
// field and specify a list of all possible values in a value ontology
// file.
EnumValues enum_values = 14;
}
// User defined name for the type.
string display_name = 13;
// Name of the type. It must be unique within the schema file and
// cannot be a "Common Type". The following naming conventions are used:
//
// - Use `snake_casing`.
// - Name matching is case-sensitive.
// - Maximum 64 characters.
// - Must start with a letter.
// - Allowed characters: ASCII letters `[a-z0-9_-]`. (For backward
// compatibility internal infrastructure and tooling can handle any ascii
// character.)
// - The `/` is sometimes used to denote a property of a type. For example
// `line_item/amount`. This convention is deprecated, but will still be
// honored for backward compatibility.
string name = 1;
// The entity type that this type is derived from. For now, one and only
// one should be set.
repeated string base_types = 2;
// Description the nested structure, or composition of an entity.
repeated Property properties = 6;
// Metadata for the entity type.
EntityTypeMetadata entity_type_metadata = 11;
}
// Metadata for global schema behavior.
message Metadata {
// If true, a `document` entity type can be applied to subdocument
// (splitting). Otherwise, it can only be applied to the entire document
// (classification).
bool document_splitter = 1;
// If true, on a given page, there can be multiple `document` annotations
// covering it.
bool document_allow_multiple_labels = 2;
// If set, all the nested entities must be prefixed with the parents.
bool prefixed_naming_on_properties = 6;
// If set, we will skip the naming format validation in the schema. So the
// string values in `DocumentSchema.EntityType.name` and
// `DocumentSchema.EntityType.Property.name` will not be checked.
bool skip_naming_validation = 7;
}
// Display name to show to users.
string display_name = 1;
// Description of the schema.
string description = 2;
// Entity types of the schema.
repeated EntityType entity_types = 3;
// Metadata of the schema.
Metadata metadata = 4;
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy