All Downloads are FREE. Search and download functionalities are using the official Maven repository.

google.cloud.documentai.v1beta3.document_schema.proto Maven / Gradle / Ivy

There is a newer version: 0.71.0
Show newest version
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

syntax = "proto3";

package google.cloud.documentai.v1beta3;

option csharp_namespace = "Google.Cloud.DocumentAI.V1Beta3";
option go_package = "cloud.google.com/go/documentai/apiv1beta3/documentaipb;documentaipb";
option java_multiple_files = true;
option java_outer_classname = "DocumentAiDocumentSchema";
option java_package = "com.google.cloud.documentai.v1beta3";
option php_namespace = "Google\\Cloud\\DocumentAI\\V1beta3";
option ruby_package = "Google::Cloud::DocumentAI::V1beta3";

// Metadata for document summarization.
message SummaryOptions {
  // The Length enum.
  enum Length {
    // Default.
    LENGTH_UNSPECIFIED = 0;

    // A brief summary of one or two sentences.
    BRIEF = 1;

    // A paragraph-length summary.
    MODERATE = 2;

    // The longest option available.
    COMPREHENSIVE = 3;
  }

  // The Format enum.
  enum Format {
    // Default.
    FORMAT_UNSPECIFIED = 0;

    // Format the output in paragraphs.
    PARAGRAPH = 1;

    // Format the output in bullets.
    BULLETS = 2;
  }

  // How long the summary should be.
  Length length = 1;

  // The format the summary should be in.
  Format format = 2;
}

// Metadata for how this field value is extracted.
message FieldExtractionMetadata {
  // Summary options config.
  SummaryOptions summary_options = 2;
}

// Metadata about a property.
message PropertyMetadata {
  // Whether the property should be considered as "inactive".
  bool inactive = 3;

  // Field extraction metadata on the property.
  FieldExtractionMetadata field_extraction_metadata = 9;
}

// Metadata about an entity type.
message EntityTypeMetadata {
  // Whether the entity type should be considered inactive.
  bool inactive = 5;
}

// The schema defines the output of the processed document by a processor.
message DocumentSchema {
  // EntityType is the wrapper of a label of the corresponding model with
  // detailed attributes and limitations for entity-based processors. Multiple
  // types can also compose a dependency tree to represent nested types.
  message EntityType {
    // Defines the a list of enum values.
    message EnumValues {
      // The individual values that this enum values type can include.
      repeated string values = 1;
    }

    // Defines properties that can be part of the entity type.
    message Property {
      // Types of occurrences of the entity type in the document.  This
      // represents the number of instances, not mentions, of an entity.
      // For example, a bank statement might only have one
      // `account_number`, but this account number can be mentioned in several
      // places on the document.  In this case, the `account_number` is
      // considered a `REQUIRED_ONCE` entity type. If, on the other hand, we
      // expect a bank statement to contain the status of multiple different
      // accounts for the customers, the occurrence type is set to
      // `REQUIRED_MULTIPLE`.
      enum OccurrenceType {
        // Unspecified occurrence type.
        OCCURRENCE_TYPE_UNSPECIFIED = 0;

        // There will be zero or one instance of this entity type.  The same
        // entity instance may be mentioned multiple times.
        OPTIONAL_ONCE = 1;

        // The entity type will appear zero or multiple times.
        OPTIONAL_MULTIPLE = 2;

        // The entity type will only appear exactly once.  The same
        // entity instance may be mentioned multiple times.
        REQUIRED_ONCE = 3;

        // The entity type will appear once or more times.
        REQUIRED_MULTIPLE = 4;
      }

      // The name of the property.  Follows the same guidelines as the
      // EntityType name.
      string name = 1;

      // User defined name for the property.
      string display_name = 6;

      // A reference to the value type of the property.  This type is subject
      // to the same conventions as the `Entity.base_types` field.
      string value_type = 2;

      // Occurrence type limits the number of instances an entity type appears
      // in the document.
      OccurrenceType occurrence_type = 3;

      // Any additional metadata about the property can be added here.
      PropertyMetadata property_metadata = 5;
    }

    oneof value_source {
      // If specified, lists all the possible values for this entity.  This
      // should not be more than a handful of values.  If the number of values
      // is >10 or could change frequently use the `EntityType.value_ontology`
      // field and specify a list of all possible values in a value ontology
      // file.
      EnumValues enum_values = 14;
    }

    // User defined name for the type.
    string display_name = 13;

    // Name of the type. It must be unique within the schema file and
    // cannot be a "Common Type".  The following naming conventions are used:
    //
    // - Use `snake_casing`.
    // - Name matching is case-sensitive.
    // - Maximum 64 characters.
    // - Must start with a letter.
    // - Allowed characters: ASCII letters `[a-z0-9_-]`.  (For backward
    //   compatibility internal infrastructure and tooling can handle any ascii
    //   character.)
    // - The `/` is sometimes used to denote a property of a type.  For example
    //   `line_item/amount`.  This convention is deprecated, but will still be
    //   honored for backward compatibility.
    string name = 1;

    // The entity type that this type is derived from.  For now, one and only
    // one should be set.
    repeated string base_types = 2;

    // Description the nested structure, or composition of an entity.
    repeated Property properties = 6;

    // Metadata for the entity type.
    EntityTypeMetadata entity_type_metadata = 11;
  }

  // Metadata for global schema behavior.
  message Metadata {
    // If true, a `document` entity type can be applied to subdocument
    // (splitting). Otherwise, it can only be applied to the entire document
    // (classification).
    bool document_splitter = 1;

    // If true, on a given page, there can be multiple `document` annotations
    // covering it.
    bool document_allow_multiple_labels = 2;

    // If set, all the nested entities must be prefixed with the parents.
    bool prefixed_naming_on_properties = 6;

    // If set, we will skip the naming format validation in the schema. So the
    // string values in `DocumentSchema.EntityType.name` and
    // `DocumentSchema.EntityType.Property.name` will not be checked.
    bool skip_naming_validation = 7;
  }

  // Display name to show to users.
  string display_name = 1;

  // Description of the schema.
  string description = 2;

  // Entity types of the schema.
  repeated EntityType entity_types = 3;

  // Metadata of the schema.
  Metadata metadata = 4;
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy