google.cloud.documentai.v1beta3.document.proto Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of proto-google-cloud-document-ai-v1beta3 Show documentation
Show all versions of proto-google-cloud-document-ai-v1beta3 Show documentation
PROTO library for proto-google-cloud-document-ai-v1beta3
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package google.cloud.documentai.v1beta3;
import "google/api/field_behavior.proto";
import "google/cloud/documentai/v1beta3/barcode.proto";
import "google/cloud/documentai/v1beta3/geometry.proto";
import "google/protobuf/timestamp.proto";
import "google/rpc/status.proto";
import "google/type/color.proto";
import "google/type/date.proto";
import "google/type/datetime.proto";
import "google/type/money.proto";
import "google/type/postal_address.proto";
option csharp_namespace = "Google.Cloud.DocumentAI.V1Beta3";
option go_package = "cloud.google.com/go/documentai/apiv1beta3/documentaipb;documentaipb";
option java_multiple_files = true;
option java_outer_classname = "DocumentProto";
option java_package = "com.google.cloud.documentai.v1beta3";
option php_namespace = "Google\\Cloud\\DocumentAI\\V1beta3";
option ruby_package = "Google::Cloud::DocumentAI::V1beta3";
// Document represents the canonical document resource in Document AI. It is an
// interchange format that provides insights into documents and allows for
// collaboration between users and Document AI to iterate and optimize for
// quality.
message Document {
// For a large document, sharding may be performed to produce several
// document shards. Each document shard contains this field to detail which
// shard it is.
message ShardInfo {
// The 0-based index of this shard.
int64 shard_index = 1;
// Total number of shards.
int64 shard_count = 2;
// The index of the first character in
// [Document.text][google.cloud.documentai.v1beta3.Document.text] in the
// overall document global text.
int64 text_offset = 3;
}
// Annotation for common text style attributes. This adheres to CSS
// conventions as much as possible.
message Style {
// Font size with unit.
message FontSize {
// Font size for the text.
float size = 1;
// Unit for the font size. Follows CSS naming (such as `in`, `px`, and
// `pt`).
string unit = 2;
}
// Text anchor indexing into the
// [Document.text][google.cloud.documentai.v1beta3.Document.text].
TextAnchor text_anchor = 1;
// Text color.
google.type.Color color = 2;
// Text background color.
google.type.Color background_color = 3;
// [Font weight](https://www.w3schools.com/cssref/pr_font_weight.asp).
// Possible values are `normal`, `bold`, `bolder`, and `lighter`.
string font_weight = 4;
// [Text style](https://www.w3schools.com/cssref/pr_font_font-style.asp).
// Possible values are `normal`, `italic`, and `oblique`.
string text_style = 5;
// [Text
// decoration](https://www.w3schools.com/cssref/pr_text_text-decoration.asp).
// Follows CSS standard.
//
string text_decoration = 6;
// Font size.
FontSize font_size = 7;
// Font family such as `Arial`, `Times New Roman`.
// https://www.w3schools.com/cssref/pr_font_font-family.asp
string font_family = 8;
}
// A page in a [Document][google.cloud.documentai.v1beta3.Document].
message Page {
// Dimension for the page.
message Dimension {
// Page width.
float width = 1;
// Page height.
float height = 2;
// Dimension unit.
string unit = 3;
}
// Rendered image contents for this page.
message Image {
// Raw byte content of the image.
bytes content = 1;
// Encoding [media type (MIME
// type)](https://www.iana.org/assignments/media-types/media-types.xhtml)
// for the image.
string mime_type = 2;
// Width of the image in pixels.
int32 width = 3;
// Height of the image in pixels.
int32 height = 4;
}
// Representation for transformation matrix, intended to be compatible and
// used with OpenCV format for image manipulation.
message Matrix {
// Number of rows in the matrix.
int32 rows = 1;
// Number of columns in the matrix.
int32 cols = 2;
// This encodes information about what data type the matrix uses.
// For example, 0 (CV_8U) is an unsigned 8-bit image. For the full list
// of OpenCV primitive data types, please refer to
// https://docs.opencv.org/4.3.0/d1/d1b/group__core__hal__interface.html
int32 type = 3;
// The matrix data.
bytes data = 4;
}
// Visual element describing a layout unit on a page.
message Layout {
// Detected human reading orientation.
enum Orientation {
// Unspecified orientation.
ORIENTATION_UNSPECIFIED = 0;
// Orientation is aligned with page up.
PAGE_UP = 1;
// Orientation is aligned with page right.
// Turn the head 90 degrees clockwise from upright to read.
PAGE_RIGHT = 2;
// Orientation is aligned with page down.
// Turn the head 180 degrees from upright to read.
PAGE_DOWN = 3;
// Orientation is aligned with page left.
// Turn the head 90 degrees counterclockwise from upright to read.
PAGE_LEFT = 4;
}
// Text anchor indexing into the
// [Document.text][google.cloud.documentai.v1beta3.Document.text].
TextAnchor text_anchor = 1;
// Confidence of the current
// [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] within
// context of the object this layout is for. e.g. confidence can be for a
// single token, a table, a visual element, etc. depending on context.
// Range `[0, 1]`.
float confidence = 2;
// The bounding polygon for the
// [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout].
BoundingPoly bounding_poly = 3;
// Detected orientation for the
// [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout].
Orientation orientation = 4;
}
// A block has a set of lines (collected into paragraphs) that have a
// common line-spacing and orientation.
message Block {
// [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] for
// [Block][google.cloud.documentai.v1beta3.Document.Page.Block].
Layout layout = 1;
// A list of detected languages together with confidence.
repeated DetectedLanguage detected_languages = 2;
// The history of this annotation.
Provenance provenance = 3 [deprecated = true];
}
// A collection of lines that a human would perceive as a paragraph.
message Paragraph {
// [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] for
// [Paragraph][google.cloud.documentai.v1beta3.Document.Page.Paragraph].
Layout layout = 1;
// A list of detected languages together with confidence.
repeated DetectedLanguage detected_languages = 2;
// The history of this annotation.
Provenance provenance = 3 [deprecated = true];
}
// A collection of tokens that a human would perceive as a line.
// Does not cross column boundaries, can be horizontal, vertical, etc.
message Line {
// [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] for
// [Line][google.cloud.documentai.v1beta3.Document.Page.Line].
Layout layout = 1;
// A list of detected languages together with confidence.
repeated DetectedLanguage detected_languages = 2;
// The history of this annotation.
Provenance provenance = 3 [deprecated = true];
}
// A detected token.
message Token {
// Detected break at the end of a
// [Token][google.cloud.documentai.v1beta3.Document.Page.Token].
message DetectedBreak {
// Enum to denote the type of break found.
enum Type {
// Unspecified break type.
TYPE_UNSPECIFIED = 0;
// A single whitespace.
SPACE = 1;
// A wider whitespace.
WIDE_SPACE = 2;
// A hyphen that indicates that a token has been split across lines.
HYPHEN = 3;
}
// Detected break type.
Type type = 1;
}
// Font and other text style attributes.
message StyleInfo {
// Font size in points (`1` point is `¹⁄₇₂` inches).
int32 font_size = 1;
// Font size in pixels, equal to _unrounded
// [font_size][google.cloud.documentai.v1beta3.Document.Page.Token.StyleInfo.font_size]_
// * _resolution_ ÷ `72.0`.
double pixel_font_size = 2;
// Letter spacing in points.
double letter_spacing = 3;
// Name or style of the font.
string font_type = 4;
// Whether the text is bold (equivalent to
// [font_weight][google.cloud.documentai.v1beta3.Document.Page.Token.StyleInfo.font_weight]
// is at least `700`).
bool bold = 5;
// Whether the text is italic.
bool italic = 6;
// Whether the text is underlined.
bool underlined = 7;
// Whether the text is strikethrough.
bool strikeout = 8;
// Whether the text is a subscript.
bool subscript = 9;
// Whether the text is a superscript.
bool superscript = 10;
// Whether the text is in small caps.
bool smallcaps = 11;
// TrueType weight on a scale `100` (thin) to `1000` (ultra-heavy).
// Normal is `400`, bold is `700`.
int32 font_weight = 12;
// Whether the text is handwritten.
bool handwritten = 13;
// Color of the text.
google.type.Color text_color = 14;
// Color of the background.
google.type.Color background_color = 15;
}
// [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] for
// [Token][google.cloud.documentai.v1beta3.Document.Page.Token].
Layout layout = 1;
// Detected break at the end of a
// [Token][google.cloud.documentai.v1beta3.Document.Page.Token].
DetectedBreak detected_break = 2;
// A list of detected languages together with confidence.
repeated DetectedLanguage detected_languages = 3;
// The history of this annotation.
Provenance provenance = 4 [deprecated = true];
// Text style attributes.
StyleInfo style_info = 5;
}
// A detected symbol.
message Symbol {
// [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] for
// [Symbol][google.cloud.documentai.v1beta3.Document.Page.Symbol].
Layout layout = 1;
// A list of detected languages together with confidence.
repeated DetectedLanguage detected_languages = 2;
}
// Detected non-text visual elements e.g. checkbox, signature etc. on the
// page.
message VisualElement {
// [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] for
// [VisualElement][google.cloud.documentai.v1beta3.Document.Page.VisualElement].
Layout layout = 1;
// Type of the
// [VisualElement][google.cloud.documentai.v1beta3.Document.Page.VisualElement].
string type = 2;
// A list of detected languages together with confidence.
repeated DetectedLanguage detected_languages = 3;
}
// A table representation similar to HTML table structure.
message Table {
// A row of table cells.
message TableRow {
// Cells that make up this row.
repeated TableCell cells = 1;
}
// A cell representation inside the table.
message TableCell {
// [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] for
// [TableCell][google.cloud.documentai.v1beta3.Document.Page.Table.TableCell].
Layout layout = 1;
// How many rows this cell spans.
int32 row_span = 2;
// How many columns this cell spans.
int32 col_span = 3;
// A list of detected languages together with confidence.
repeated DetectedLanguage detected_languages = 4;
}
// [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] for
// [Table][google.cloud.documentai.v1beta3.Document.Page.Table].
Layout layout = 1;
// Header rows of the table.
repeated TableRow header_rows = 2;
// Body rows of the table.
repeated TableRow body_rows = 3;
// A list of detected languages together with confidence.
repeated DetectedLanguage detected_languages = 4;
// The history of this table.
Provenance provenance = 5 [deprecated = true];
}
// A form field detected on the page.
message FormField {
// [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] for the
// [FormField][google.cloud.documentai.v1beta3.Document.Page.FormField]
// name. e.g. `Address`, `Email`, `Grand total`, `Phone number`, etc.
Layout field_name = 1;
// [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] for the
// [FormField][google.cloud.documentai.v1beta3.Document.Page.FormField]
// value.
Layout field_value = 2;
// A list of detected languages for name together with confidence.
repeated DetectedLanguage name_detected_languages = 3;
// A list of detected languages for value together with confidence.
repeated DetectedLanguage value_detected_languages = 4;
// If the value is non-textual, this field represents the type. Current
// valid values are:
//
// - blank (this indicates the `field_value` is normal text)
// - `unfilled_checkbox`
// - `filled_checkbox`
string value_type = 5;
// Created for Labeling UI to export key text.
// If corrections were made to the text identified by the
// `field_name.text_anchor`, this field will contain the correction.
string corrected_key_text = 6;
// Created for Labeling UI to export value text.
// If corrections were made to the text identified by the
// `field_value.text_anchor`, this field will contain the correction.
string corrected_value_text = 7;
// The history of this annotation.
Provenance provenance = 8;
}
// A detected barcode.
message DetectedBarcode {
// [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] for
// [DetectedBarcode][google.cloud.documentai.v1beta3.Document.Page.DetectedBarcode].
Layout layout = 1;
// Detailed barcode information of the
// [DetectedBarcode][google.cloud.documentai.v1beta3.Document.Page.DetectedBarcode].
Barcode barcode = 2;
}
// Detected language for a structural component.
message DetectedLanguage {
// The [BCP-47 language
// code](https://www.unicode.org/reports/tr35/#Unicode_locale_identifier),
// such as `en-US` or `sr-Latn`.
string language_code = 1;
// Confidence of detected language. Range `[0, 1]`.
float confidence = 2;
}
// Image quality scores for the page image.
message ImageQualityScores {
// Image Quality Defects
message DetectedDefect {
// Name of the defect type. Supported values are:
//
// - `quality/defect_blurry`
// - `quality/defect_noisy`
// - `quality/defect_dark`
// - `quality/defect_faint`
// - `quality/defect_text_too_small`
// - `quality/defect_document_cutoff`
// - `quality/defect_text_cutoff`
// - `quality/defect_glare`
string type = 1;
// Confidence of detected defect. Range `[0, 1]` where `1` indicates
// strong confidence that the defect exists.
float confidence = 2;
}
// The overall quality score. Range `[0, 1]` where `1` is perfect quality.
float quality_score = 1;
// A list of detected defects.
repeated DetectedDefect detected_defects = 2;
}
// 1-based index for current
// [Page][google.cloud.documentai.v1beta3.Document.Page] in a parent
// [Document][google.cloud.documentai.v1beta3.Document]. Useful when a page
// is taken out of a [Document][google.cloud.documentai.v1beta3.Document]
// for individual processing.
int32 page_number = 1;
// Rendered image for this page. This image is preprocessed to remove any
// skew, rotation, and distortions such that the annotation bounding boxes
// can be upright and axis-aligned.
Image image = 13;
// Transformation matrices that were applied to the original document image
// to produce
// [Page.image][google.cloud.documentai.v1beta3.Document.Page.image].
repeated Matrix transforms = 14;
// Physical dimension of the page.
Dimension dimension = 2;
// [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] for the
// page.
Layout layout = 3;
// A list of detected languages together with confidence.
repeated DetectedLanguage detected_languages = 4;
// A list of visually detected text blocks on the page.
// A block has a set of lines (collected into paragraphs) that have a common
// line-spacing and orientation.
repeated Block blocks = 5;
// A list of visually detected text paragraphs on the page.
// A collection of lines that a human would perceive as a paragraph.
repeated Paragraph paragraphs = 6;
// A list of visually detected text lines on the page.
// A collection of tokens that a human would perceive as a line.
repeated Line lines = 7;
// A list of visually detected tokens on the page.
repeated Token tokens = 8;
// A list of detected non-text visual elements e.g. checkbox,
// signature etc. on the page.
repeated VisualElement visual_elements = 9;
// A list of visually detected tables on the page.
repeated Table tables = 10;
// A list of visually detected form fields on the page.
repeated FormField form_fields = 11;
// A list of visually detected symbols on the page.
repeated Symbol symbols = 12;
// A list of detected barcodes.
repeated DetectedBarcode detected_barcodes = 15;
// Image quality scores.
ImageQualityScores image_quality_scores = 17;
// The history of this page.
Provenance provenance = 16 [deprecated = true];
}
// An entity that could be a phrase in the text or a property that belongs to
// the document. It is a known entity type, such as a person, an organization,
// or location.
message Entity {
// Parsed and normalized entity value.
message NormalizedValue {
// An optional structured entity value.
// Must match entity type defined in schema if
// known. If this field is present, the `text` field could also be
// populated.
oneof structured_value {
// Money value. See also:
// https://github.com/googleapis/googleapis/blob/master/google/type/money.proto
google.type.Money money_value = 2;
// Date value. Includes year, month, day. See also:
// https://github.com/googleapis/googleapis/blob/master/google/type/date.proto
google.type.Date date_value = 3;
// DateTime value. Includes date, time, and timezone. See also:
// https://github.com/googleapis/googleapis/blob/master/google/type/datetime.proto
google.type.DateTime datetime_value = 4;
// Postal address. See also:
// https://github.com/googleapis/googleapis/blob/master/google/type/postal_address.proto
google.type.PostalAddress address_value = 5;
// Boolean value. Can be used for entities with binary values, or for
// checkboxes.
bool boolean_value = 6;
// Integer value.
int32 integer_value = 7;
// Float value.
float float_value = 8;
}
// Optional. An optional field to store a normalized string.
// For some entity types, one of respective `structured_value` fields may
// also be populated. Also not all the types of `structured_value` will be
// normalized. For example, some processors may not generate `float`
// or `integer` normalized text by default.
//
// Below are sample formats mapped to structured values.
//
// - Money/Currency type (`money_value`) is in the ISO 4217 text format.
// - Date type (`date_value`) is in the ISO 8601 text format.
// - Datetime type (`datetime_value`) is in the ISO 8601 text format.
string text = 1 [(google.api.field_behavior) = OPTIONAL];
}
// Optional. Provenance of the entity.
// Text anchor indexing into the
// [Document.text][google.cloud.documentai.v1beta3.Document.text].
TextAnchor text_anchor = 1 [(google.api.field_behavior) = OPTIONAL];
// Required. Entity type from a schema e.g. `Address`.
string type = 2 [(google.api.field_behavior) = REQUIRED];
// Optional. Text value of the entity e.g. `1600 Amphitheatre Pkwy`.
string mention_text = 3 [(google.api.field_behavior) = OPTIONAL];
// Optional. Deprecated. Use `id` field instead.
string mention_id = 4 [(google.api.field_behavior) = OPTIONAL];
// Optional. Confidence of detected Schema entity. Range `[0, 1]`.
float confidence = 5 [(google.api.field_behavior) = OPTIONAL];
// Optional. Represents the provenance of this entity wrt. the location on
// the page where it was found.
PageAnchor page_anchor = 6 [(google.api.field_behavior) = OPTIONAL];
// Optional. Canonical id. This will be a unique value in the entity list
// for this document.
string id = 7 [(google.api.field_behavior) = OPTIONAL];
// Optional. Normalized entity value. Absent if the extracted value could
// not be converted or the type (e.g. address) is not supported for certain
// parsers. This field is also only populated for certain supported document
// types.
NormalizedValue normalized_value = 9
[(google.api.field_behavior) = OPTIONAL];
// Optional. Entities can be nested to form a hierarchical data structure
// representing the content in the document.
repeated Entity properties = 10 [(google.api.field_behavior) = OPTIONAL];
// Optional. The history of this annotation.
Provenance provenance = 11 [(google.api.field_behavior) = OPTIONAL];
// Optional. Whether the entity will be redacted for de-identification
// purposes.
bool redacted = 12 [(google.api.field_behavior) = OPTIONAL];
}
// Relationship between
// [Entities][google.cloud.documentai.v1beta3.Document.Entity].
message EntityRelation {
// Subject entity id.
string subject_id = 1;
// Object entity id.
string object_id = 2;
// Relationship description.
string relation = 3;
}
// Text reference indexing into the
// [Document.text][google.cloud.documentai.v1beta3.Document.text].
message TextAnchor {
// A text segment in the
// [Document.text][google.cloud.documentai.v1beta3.Document.text]. The
// indices may be out of bounds which indicate that the text extends into
// another document shard for large sharded documents. See
// [ShardInfo.text_offset][google.cloud.documentai.v1beta3.Document.ShardInfo.text_offset]
message TextSegment {
// [TextSegment][google.cloud.documentai.v1beta3.Document.TextAnchor.TextSegment]
// start UTF-8 char index in the
// [Document.text][google.cloud.documentai.v1beta3.Document.text].
int64 start_index = 1;
// [TextSegment][google.cloud.documentai.v1beta3.Document.TextAnchor.TextSegment]
// half open end UTF-8 char index in the
// [Document.text][google.cloud.documentai.v1beta3.Document.text].
int64 end_index = 2;
}
// The text segments from the
// [Document.text][google.cloud.documentai.v1beta3.Document.text].
repeated TextSegment text_segments = 1;
// Contains the content of the text span so that users do
// not have to look it up in the text_segments. It is always
// populated for formFields.
string content = 2;
}
// Referencing the visual context of the entity in the
// [Document.pages][google.cloud.documentai.v1beta3.Document.pages]. Page
// anchors can be cross-page, consist of multiple bounding polygons and
// optionally reference specific layout element types.
message PageAnchor {
// Represents a weak reference to a page element within a document.
message PageRef {
// The type of layout that is being referenced.
enum LayoutType {
// Layout Unspecified.
LAYOUT_TYPE_UNSPECIFIED = 0;
// References a
// [Page.blocks][google.cloud.documentai.v1beta3.Document.Page.blocks]
// element.
BLOCK = 1;
// References a
// [Page.paragraphs][google.cloud.documentai.v1beta3.Document.Page.paragraphs]
// element.
PARAGRAPH = 2;
// References a
// [Page.lines][google.cloud.documentai.v1beta3.Document.Page.lines]
// element.
LINE = 3;
// References a
// [Page.tokens][google.cloud.documentai.v1beta3.Document.Page.tokens]
// element.
TOKEN = 4;
// References a
// [Page.visual_elements][google.cloud.documentai.v1beta3.Document.Page.visual_elements]
// element.
VISUAL_ELEMENT = 5;
// Refrrences a
// [Page.tables][google.cloud.documentai.v1beta3.Document.Page.tables]
// element.
TABLE = 6;
// References a
// [Page.form_fields][google.cloud.documentai.v1beta3.Document.Page.form_fields]
// element.
FORM_FIELD = 7;
}
// Required. Index into the
// [Document.pages][google.cloud.documentai.v1beta3.Document.pages]
// element, for example using
// `[Document.pages][page_refs.page]` to locate the related page element.
// This field is skipped when its value is the default `0`. See
// https://developers.google.com/protocol-buffers/docs/proto3#json.
int64 page = 1 [(google.api.field_behavior) = REQUIRED];
// Optional. The type of the layout element that is being referenced if
// any.
LayoutType layout_type = 2 [(google.api.field_behavior) = OPTIONAL];
// Optional. Deprecated. Use
// [PageRef.bounding_poly][google.cloud.documentai.v1beta3.Document.PageAnchor.PageRef.bounding_poly]
// instead.
string layout_id = 3
[deprecated = true, (google.api.field_behavior) = OPTIONAL];
// Optional. Identifies the bounding polygon of a layout element on the
// page. If `layout_type` is set, the bounding polygon must be exactly the
// same to the layout element it's referring to.
BoundingPoly bounding_poly = 4 [(google.api.field_behavior) = OPTIONAL];
// Optional. Confidence of detected page element, if applicable. Range
// `[0, 1]`.
float confidence = 5 [(google.api.field_behavior) = OPTIONAL];
}
// One or more references to visual page elements
repeated PageRef page_refs = 1;
}
// Structure to identify provenance relationships between annotations in
// different revisions.
message Provenance {
// The parent element the current element is based on. Used for
// referencing/aligning, removal and replacement operations.
message Parent {
// The index of the index into current revision's parent_ids list.
int32 revision = 1;
// The index of the parent item in the corresponding item list (eg. list
// of entities, properties within entities, etc.) in the parent revision.
int32 index = 3;
// The id of the parent provenance.
int32 id = 2 [deprecated = true];
}
// If a processor or agent does an explicit operation on existing elements.
enum OperationType {
// Operation type unspecified. If no operation is specified a provenance
// entry is simply used to match against a `parent`.
OPERATION_TYPE_UNSPECIFIED = 0;
// Add an element.
ADD = 1;
// Remove an element identified by `parent`.
REMOVE = 2;
// Updates any fields within the given provenance scope of the message. It
// overwrites the fields rather than replacing them. Use this when you
// want to update a field value of an entity without also updating all the
// child properties.
UPDATE = 7;
// Currently unused. Replace an element identified by `parent`.
REPLACE = 3;
// Deprecated. Request human review for the element identified by
// `parent`.
EVAL_REQUESTED = 4 [deprecated = true];
// Deprecated. Element is reviewed and approved at human review,
// confidence will be set to 1.0.
EVAL_APPROVED = 5 [deprecated = true];
// Deprecated. Element is skipped in the validation process.
EVAL_SKIPPED = 6 [deprecated = true];
}
// The index of the revision that produced this element.
int32 revision = 1 [deprecated = true];
// The Id of this operation. Needs to be unique within the scope of the
// revision.
int32 id = 2 [deprecated = true];
// References to the original elements that are replaced.
repeated Parent parents = 3;
// The type of provenance operation.
OperationType type = 4;
}
// Contains past or forward revisions of this document.
message Revision {
// Human Review information of the document.
message HumanReview {
// Human review state. e.g. `requested`, `succeeded`, `rejected`.
string state = 1;
// A message providing more details about the current state of processing.
// For example, the rejection reason when the state is `rejected`.
string state_message = 2;
}
// Who/what made the change
oneof source {
// If the change was made by a person specify the name or id of that
// person.
string agent = 4;
// If the annotation was made by processor identify the processor by its
// resource name.
string processor = 5;
}
// Id of the revision, internally generated by doc proto storage.
// Unique within the context of the document.
string id = 1;
// The revisions that this revision is based on. This can include one or
// more parent (when documents are merged.) This field represents the
// index into the `revisions` field.
repeated int32 parent = 2 [deprecated = true];
// The revisions that this revision is based on. Must include all the ids
// that have anything to do with this revision - eg. there are
// `provenance.parent.revision` fields that index into this field.
repeated string parent_ids = 7;
// The time that the revision was created, internally generated by
// doc proto storage at the time of create.
google.protobuf.Timestamp create_time = 3;
// Human Review information of this revision.
HumanReview human_review = 6;
}
// This message is used for text changes aka. OCR corrections.
message TextChange {
// Provenance of the correction.
// Text anchor indexing into the
// [Document.text][google.cloud.documentai.v1beta3.Document.text]. There
// can only be a single `TextAnchor.text_segments` element. If the start
// and end index of the text segment are the same, the text change is
// inserted before that index.
TextAnchor text_anchor = 1;
// The text that replaces the text identified in the `text_anchor`.
string changed_text = 2;
// The history of this annotation.
repeated Provenance provenance = 3 [deprecated = true];
}
// Original source document from the user.
oneof source {
// Optional. Currently supports Google Cloud Storage URI of the form
// `gs://bucket_name/object_name`. Object versioning is not supported.
// For more information, refer to [Google Cloud Storage Request
// URIs](https://cloud.google.com/storage/docs/reference-uris).
string uri = 1 [(google.api.field_behavior) = OPTIONAL];
// Optional. Inline document content, represented as a stream of bytes.
// Note: As with all `bytes` fields, protobuffers use a pure binary
// representation, whereas JSON representations use base64.
bytes content = 2 [(google.api.field_behavior) = OPTIONAL];
}
// An IANA published [media type (MIME
// type)](https://www.iana.org/assignments/media-types/media-types.xhtml).
string mime_type = 3;
// Optional. UTF-8 encoded text in reading order from the document.
string text = 4 [(google.api.field_behavior) = OPTIONAL];
// Styles for the
// [Document.text][google.cloud.documentai.v1beta3.Document.text].
repeated Style text_styles = 5 [deprecated = true];
// Visual page layout for the
// [Document][google.cloud.documentai.v1beta3.Document].
repeated Page pages = 6;
// A list of entities detected on
// [Document.text][google.cloud.documentai.v1beta3.Document.text]. For
// document shards, entities in this list may cross shard boundaries.
repeated Entity entities = 7;
// Placeholder. Relationship among
// [Document.entities][google.cloud.documentai.v1beta3.Document.entities].
repeated EntityRelation entity_relations = 8;
// Placeholder. A list of text corrections made to
// [Document.text][google.cloud.documentai.v1beta3.Document.text]. This is
// usually used for annotating corrections to OCR mistakes. Text changes for
// a given revision may not overlap with each other.
repeated TextChange text_changes = 14;
// Information about the sharding if this document is sharded part of a larger
// document. If the document is not sharded, this message is not specified.
ShardInfo shard_info = 9;
// Any error that occurred while processing this document.
google.rpc.Status error = 10;
// Placeholder. Revision history of this document.
repeated Revision revisions = 13;
}
// The revision reference specifies which revision on the document to read.
message RevisionRef {
// Some predefined revision cases.
enum RevisionCase {
// Unspecified case, fall back to read the `LATEST_HUMAN_REVIEW`.
REVISION_CASE_UNSPECIFIED = 0;
// The latest revision made by a human.
LATEST_HUMAN_REVIEW = 1;
// The latest revision based on timestamp.
LATEST_TIMESTAMP = 2;
// The first (OCR) revision.
BASE_OCR_REVISION = 3;
}
// Specifies which revision to read.
oneof source {
// Reads the revision by the predefined case.
RevisionCase revision_case = 1;
// Reads the revision given by the id.
string revision_id = 2;
// Reads the revision generated by the processor version.
// The format takes the full resource name of processor version.
// `projects/{project}/locations/{location}/processors/{processor}/processorVersions/{processorVersion}`
string latest_processor_version = 3;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy