
google.cloud.visionai.v1.annotations.proto Maven / Gradle / Ivy
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package google.cloud.visionai.v1;
import "google/protobuf/struct.proto";
import "google/protobuf/timestamp.proto";
option csharp_namespace = "Google.Cloud.VisionAI.V1";
option go_package = "cloud.google.com/go/visionai/apiv1/visionaipb;visionaipb";
option java_multiple_files = true;
option java_outer_classname = "AnnotationsProto";
option java_package = "com.google.cloud.visionai.v1";
option php_namespace = "Google\\Cloud\\VisionAI\\V1";
option ruby_package = "Google::Cloud::VisionAI::V1";
// Enum describing all possible types of a stream annotation.
enum StreamAnnotationType {
// Type UNSPECIFIED.
STREAM_ANNOTATION_TYPE_UNSPECIFIED = 0;
// active_zone annotation defines a polygon on top of the content from an
// image/video based stream, following processing will only focus on the
// content inside the active zone.
STREAM_ANNOTATION_TYPE_ACTIVE_ZONE = 1;
// crossing_line annotation defines a polyline on top of the content from an
// image/video based Vision AI stream, events happening across the line will
// be captured. For example, the counts of people who goes acroos the line
// in Occupancy Analytic Processor.
STREAM_ANNOTATION_TYPE_CROSSING_LINE = 2;
}
// Output format for Personal Protective Equipment Detection Operator.
message PersonalProtectiveEquipmentDetectionOutput {
// The entity info for annotations from person detection prediction result.
message PersonEntity {
// Entity id.
int64 person_entity_id = 1;
}
// The entity info for annotations from PPE detection prediction result.
message PPEEntity {
// Label id.
int64 ppe_label_id = 1;
// Human readable string of the label (Examples: helmet, glove, mask).
string ppe_label_string = 2;
// Human readable string of the super category label (Examples: head_cover,
// hands_cover, face_cover).
string ppe_supercategory_label_string = 3;
// Entity id.
int64 ppe_entity_id = 4;
}
// Bounding Box in the normalized coordinates.
message NormalizedBoundingBox {
// Min in x coordinate.
float xmin = 1;
// Min in y coordinate.
float ymin = 2;
// Width of the bounding box.
float width = 3;
// Height of the bounding box.
float height = 4;
}
// PersonIdentified box contains the location and the entity info of the
// person.
message PersonIdentifiedBox {
// An unique id for this box.
int64 box_id = 1;
// Bounding Box in the normalized coordinates.
NormalizedBoundingBox normalized_bounding_box = 2;
// Confidence score associated with this box.
float confidence_score = 3;
// Person entity info.
PersonEntity person_entity = 4;
}
// PPEIdentified box contains the location and the entity info of the PPE.
message PPEIdentifiedBox {
// An unique id for this box.
int64 box_id = 1;
// Bounding Box in the normalized coordinates.
NormalizedBoundingBox normalized_bounding_box = 2;
// Confidence score associated with this box.
float confidence_score = 3;
// PPE entity info.
PPEEntity ppe_entity = 4;
}
// Detected Person contains the detected person and their associated
// ppes and their protecting information.
message DetectedPerson {
// The id of detected person.
int64 person_id = 1;
// The info of detected person identified box.
PersonIdentifiedBox detected_person_identified_box = 2;
// The info of detected person associated ppe identified boxes.
repeated PPEIdentifiedBox detected_ppe_identified_boxes = 3;
// Coverage score for each body part.
// Coverage score for face.
optional float face_coverage_score = 4;
// Coverage score for eyes.
optional float eyes_coverage_score = 5;
// Coverage score for head.
optional float head_coverage_score = 6;
// Coverage score for hands.
optional float hands_coverage_score = 7;
// Coverage score for body.
optional float body_coverage_score = 8;
// Coverage score for feet.
optional float feet_coverage_score = 9;
}
// Current timestamp.
google.protobuf.Timestamp current_time = 1;
// A list of DetectedPersons.
repeated DetectedPerson detected_persons = 2;
}
// Prediction output format for Generic Object Detection.
message ObjectDetectionPredictionResult {
// The entity info for annotations from object detection prediction result.
message Entity {
// Label id.
int64 label_id = 1;
// Human readable string of the label.
string label_string = 2;
}
// Identified box contains location and the entity of the object.
message IdentifiedBox {
// Bounding Box in the normalized coordinates.
message NormalizedBoundingBox {
// Min in x coordinate.
float xmin = 1;
// Min in y coordinate.
float ymin = 2;
// Width of the bounding box.
float width = 3;
// Height of the bounding box.
float height = 4;
}
// An unique id for this box.
int64 box_id = 1;
// Bounding Box in the normalized coordinates.
NormalizedBoundingBox normalized_bounding_box = 2;
// Confidence score associated with this box.
float confidence_score = 3;
// Entity of this box.
Entity entity = 4;
}
// Current timestamp.
google.protobuf.Timestamp current_time = 1;
// A list of identified boxes.
repeated IdentifiedBox identified_boxes = 2;
}
// Prediction output format for Image Object Detection.
message ImageObjectDetectionPredictionResult {
// The resource IDs of the AnnotationSpecs that had been identified, ordered
// by the confidence score descendingly. It is the id segment instead of full
// resource name.
repeated int64 ids = 1;
// The display names of the AnnotationSpecs that had been identified, order
// matches the IDs.
repeated string display_names = 2;
// The Model's confidences in correctness of the predicted IDs, higher value
// means higher confidence. Order matches the Ids.
repeated float confidences = 3;
// Bounding boxes, i.e. the rectangles over the image, that pinpoint
// the found AnnotationSpecs. Given in order that matches the IDs. Each
// bounding box is an array of 4 numbers `xMin`, `xMax`, `yMin`, and
// `yMax`, which represent the extremal coordinates of the box. They are
// relative to the image size, and the point 0,0 is in the top left
// of the image.
repeated google.protobuf.ListValue bboxes = 4;
}
// Prediction output format for Image and Text Classification.
message ClassificationPredictionResult {
// The resource IDs of the AnnotationSpecs that had been identified.
repeated int64 ids = 1;
// The display names of the AnnotationSpecs that had been identified, order
// matches the IDs.
repeated string display_names = 2;
// The Model's confidences in correctness of the predicted IDs, higher value
// means higher confidence. Order matches the Ids.
repeated float confidences = 3;
}
// Prediction output format for Image Segmentation.
message ImageSegmentationPredictionResult {
// A PNG image where each pixel in the mask represents the category in which
// the pixel in the original image was predicted to belong to. The size of
// this image will be the same as the original image. The mapping between the
// AnntoationSpec and the color can be found in model's metadata. The model
// will choose the most likely category and if none of the categories reach
// the confidence threshold, the pixel will be marked as background.
string category_mask = 1;
// A one channel image which is encoded as an 8bit lossless PNG. The size of
// the image will be the same as the original image. For a specific pixel,
// darker color means less confidence in correctness of the cateogry in the
// categoryMask for the corresponding pixel. Black means no confidence and
// white means complete confidence.
string confidence_mask = 2;
}
// Prediction output format for Video Action Recognition.
message VideoActionRecognitionPredictionResult {
// Each IdentifiedAction is one particular identification of an action
// specified with the AnnotationSpec id, display_name and the associated
// confidence score.
message IdentifiedAction {
// The resource ID of the AnnotationSpec that had been identified.
string id = 1;
// The display name of the AnnotationSpec that had been identified.
string display_name = 2;
// The Model's confidence in correction of this identification, higher
// value means higher confidence.
float confidence = 3;
}
// The beginning, inclusive, of the video's time segment in which the
// actions have been identified.
google.protobuf.Timestamp segment_start_time = 1;
// The end, inclusive, of the video's time segment in which the actions have
// been identified. Particularly, if the end is the same as the start, it
// means the identification happens on a specific video frame.
google.protobuf.Timestamp segment_end_time = 2;
// All of the actions identified in the time range.
repeated IdentifiedAction actions = 3;
}
// Prediction output format for Video Object Tracking.
message VideoObjectTrackingPredictionResult {
// Boundingbox for detected object. I.e. the rectangle over the video frame
// pinpointing the found AnnotationSpec. The coordinates are relative to the
// frame size, and the point 0,0 is in the top left of the frame.
message BoundingBox {
// The leftmost coordinate of the bounding box.
float x_min = 1;
// The rightmost coordinate of the bounding box.
float x_max = 2;
// The topmost coordinate of the bounding box.
float y_min = 3;
// The bottommost coordinate of the bounding box.
float y_max = 4;
}
// Each DetectedObject is one particular identification of an object
// specified with the AnnotationSpec id and display_name, the bounding box,
// the associated confidence score and the corresponding track_id.
message DetectedObject {
// The resource ID of the AnnotationSpec that had been identified.
string id = 1;
// The display name of the AnnotationSpec that had been identified.
string display_name = 2;
// Boundingbox.
BoundingBox bounding_box = 3;
// The Model's confidence in correction of this identification, higher
// value means higher confidence.
float confidence = 4;
// The same object may be identified on muitiple frames which are typical
// adjacent. The set of frames where a particular object has been detected
// form a track. This track_id can be used to trace down all frames for an
// detected object.
int64 track_id = 5;
}
// The beginning, inclusive, of the video's time segment in which the
// current identifications happens.
google.protobuf.Timestamp segment_start_time = 1;
// The end, inclusive, of the video's time segment in which the current
// identifications happen. Particularly, if the end is the same as the start,
// it means the identifications happen on a specific video frame.
google.protobuf.Timestamp segment_end_time = 2;
// All of the objects detected in the specified time range.
repeated DetectedObject objects = 3;
}
// Prediction output format for Video Classification.
message VideoClassificationPredictionResult {
// Each IdentifiedClassification is one particular identification of an
// classification specified with the AnnotationSpec id and display_name,
// and the associated confidence score.
message IdentifiedClassification {
// The resource ID of the AnnotationSpec that had been identified.
string id = 1;
// The display name of the AnnotationSpec that had been identified.
string display_name = 2;
// The Model's confidence in correction of this identification, higher
// value means higher confidence.
float confidence = 3;
}
// The beginning, inclusive, of the video's time segment in which the
// classifications have been identified.
google.protobuf.Timestamp segment_start_time = 1;
// The end, inclusive, of the video's time segment in which the
// classifications have been identified. Particularly, if the end is the same
// as the start, it means the identification happens on a specific video
// frame.
google.protobuf.Timestamp segment_end_time = 2;
// All of the classifications identified in the time range.
repeated IdentifiedClassification classifications = 3;
}
// The prediction result proto for occupancy counting.
message OccupancyCountingPredictionResult {
// The entity info for annotations from occupancy counting operator.
message Entity {
// Label id.
int64 label_id = 1;
// Human readable string of the label.
string label_string = 2;
}
// Identified box contains location and the entity of the object.
message IdentifiedBox {
// Bounding Box in the normalized coordinates.
message NormalizedBoundingBox {
// Min in x coordinate.
float xmin = 1;
// Min in y coordinate.
float ymin = 2;
// Width of the bounding box.
float width = 3;
// Height of the bounding box.
float height = 4;
}
// An unique id for this box.
int64 box_id = 1;
// Bounding Box in the normalized coordinates.
NormalizedBoundingBox normalized_bounding_box = 2;
// Confidence score associated with this box.
float score = 3;
// Entity of this box.
Entity entity = 4;
// An unique id to identify a track. It should be consistent across frames.
// It only exists if tracking is enabled.
int64 track_id = 5;
}
// The statistics info for annotations from occupancy counting operator.
message Stats {
// The object info and instant count for annotations from occupancy counting
// operator.
message ObjectCount {
// Entity of this object.
Entity entity = 1;
// Count of the object.
int32 count = 2;
}
// The object info and accumulated count for annotations from occupancy
// counting operator.
message AccumulatedObjectCount {
// The start time of the accumulated count.
google.protobuf.Timestamp start_time = 1;
// The object count for the accumulated count.
ObjectCount object_count = 2;
}
// Message for Crossing line count.
message CrossingLineCount {
// Line annotation from the user.
StreamAnnotation annotation = 1;
// The direction that follows the right hand rule.
repeated ObjectCount positive_direction_counts = 2;
// The direction that is opposite to the right hand rule.
repeated ObjectCount negative_direction_counts = 3;
// The accumulated positive count.
repeated AccumulatedObjectCount accumulated_positive_direction_counts = 4;
// The accumulated negative count.
repeated AccumulatedObjectCount accumulated_negative_direction_counts = 5;
}
// Message for the active zone count.
message ActiveZoneCount {
// Active zone annotation from the user.
StreamAnnotation annotation = 1;
// Counts in the zone.
repeated ObjectCount counts = 2;
}
// Counts of the full frame.
repeated ObjectCount full_frame_count = 1;
// Crossing line counts.
repeated CrossingLineCount crossing_line_counts = 2;
// Active zone counts.
repeated ActiveZoneCount active_zone_counts = 3;
}
// The track info for annotations from occupancy counting operator.
message TrackInfo {
// An unique id to identify a track. It should be consistent across frames.
string track_id = 1;
// Start timestamp of this track.
google.protobuf.Timestamp start_time = 2;
}
// The dwell time info for annotations from occupancy counting operator.
message DwellTimeInfo {
// An unique id to identify a track. It should be consistent across frames.
string track_id = 1;
// The unique id for the zone in which the object is dwelling/waiting.
string zone_id = 2;
// The beginning time when a dwelling object has been identified in a zone.
google.protobuf.Timestamp dwell_start_time = 3;
// The end time when a dwelling object has exited in a zone.
google.protobuf.Timestamp dwell_end_time = 4;
}
// Current timestamp.
google.protobuf.Timestamp current_time = 1;
// A list of identified boxes.
repeated IdentifiedBox identified_boxes = 2;
// Detection statistics.
Stats stats = 3;
// Track related information. All the tracks that are live at this timestamp.
// It only exists if tracking is enabled.
repeated TrackInfo track_info = 4;
// Dwell time related information. All the tracks that are live in a given
// zone with a start and end dwell time timestamp
repeated DwellTimeInfo dwell_time_info = 5;
// The presentation timestamp of the frame.
optional int64 pts = 6;
}
// message about annotations about Vision AI stream resource.
message StreamAnnotation {
oneof annotation_payload {
// Annotation for type ACTIVE_ZONE
NormalizedPolygon active_zone = 5;
// Annotation for type CROSSING_LINE
NormalizedPolyline crossing_line = 6;
}
// ID of the annotation. It must be unique when used in the certain context.
// For example, all the annotations to one input streams of a Vision AI
// application.
string id = 1;
// User-friendly name for the annotation.
string display_name = 2;
// The Vision AI stream resource name.
string source_stream = 3;
// The actual type of Annotation.
StreamAnnotationType type = 4;
}
// A wrapper of repeated StreamAnnotation.
message StreamAnnotations {
// Multiple annotations.
repeated StreamAnnotation stream_annotations = 1;
}
// Normalized Polygon.
message NormalizedPolygon {
// The bounding polygon normalized vertices. Top left corner of the image
// will be [0, 0].
repeated NormalizedVertex normalized_vertices = 1;
}
// Normalized Pplyline, which represents a curve consisting of connected
// straight-line segments.
message NormalizedPolyline {
// A sequence of vertices connected by straight lines.
repeated NormalizedVertex normalized_vertices = 1;
}
// A vertex represents a 2D point in the image.
// NOTE: the normalized vertex coordinates are relative to the original image
// and range from 0 to 1.
message NormalizedVertex {
// X coordinate.
float x = 1;
// Y coordinate.
float y = 2;
}
// Message of essential metadata of App Platform.
// This message is usually attached to a certain processor output annotation for
// customer to identify the source of the data.
message AppPlatformMetadata {
// The application resource name.
string application = 1;
// The instance resource id. Instance is the nested resource of application
// under collection 'instances'.
string instance_id = 2;
// The node name of the application graph.
string node = 3;
// The referred processor resource name of the application node.
string processor = 4;
}
// For any cloud function based customer processing logic, customer's cloud
// function is expected to receive AppPlatformCloudFunctionRequest as request
// and send back AppPlatformCloudFunctionResponse as response.
// Message of request from AppPlatform to Cloud Function.
message AppPlatformCloudFunctionRequest {
// A general annotation message that uses struct format to represent different
// concrete annotation protobufs.
message StructedInputAnnotation {
// The ingestion time of the current annotation.
int64 ingestion_time_micros = 1;
// The struct format of the actual annotation.
google.protobuf.Struct annotation = 2;
}
// The metadata of the AppPlatform for customer to identify the source of the
// payload.
AppPlatformMetadata app_platform_metadata = 1;
// The actual annotations to be processed by the customized Cloud Function.
repeated StructedInputAnnotation annotations = 2;
}
// Message of the response from customer's Cloud Function to AppPlatform.
message AppPlatformCloudFunctionResponse {
// A general annotation message that uses struct format to represent different
// concrete annotation protobufs.
message StructedOutputAnnotation {
// The struct format of the actual annotation.
google.protobuf.Struct annotation = 1;
}
// The modified annotations that is returned back to AppPlatform.
// If the annotations fields are empty, then those annotations will be dropped
// by AppPlatform.
repeated StructedOutputAnnotation annotations = 2;
// If set to true, AppPlatform will use original annotations instead of
// dropping them, even if it is empty in the annotations filed.
bool annotation_passthrough = 3;
// The event notifications that is returned back to AppPlatform. Typically it
// will then be configured to be consumed/forwared to a operator that handles
// events, such as Pub/Sub operator.
repeated AppPlatformEventBody events = 4;
}
// Message of content of appPlatform event
message AppPlatformEventBody {
// Human readable string of the event like "There are more than 6 people in
// the scene". or "Shelf is empty!".
string event_message = 1;
// For the case of Pub/Sub, it will be stored in the message attributes.
// pubsub.proto
google.protobuf.Struct payload = 2;
// User defined Event Id, used to classify event, within a delivery interval,
// events from the same application instance with the same id will be
// de-duplicated & only first one will be sent out. Empty event_id will be
// treated as "".
string event_id = 3;
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy