All Downloads are FREE. Search and download functionalities are using the official Maven repository.

google.cloud.dataplex.v1.metadata.proto Maven / Gradle / Ivy

// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

syntax = "proto3";

package google.cloud.dataplex.v1;

import "google/api/annotations.proto";
import "google/api/client.proto";
import "google/api/field_behavior.proto";
import "google/api/resource.proto";
import "google/protobuf/empty.proto";
import "google/protobuf/timestamp.proto";

option go_package = "cloud.google.com/go/dataplex/apiv1/dataplexpb;dataplexpb";
option java_multiple_files = true;
option java_outer_classname = "MetadataProto";
option java_package = "com.google.cloud.dataplex.v1";

// Metadata service manages metadata resources such as tables, filesets and
// partitions.
service MetadataService {
  option (google.api.default_host) = "dataplex.googleapis.com";
  option (google.api.oauth_scopes) =
      "https://www.googleapis.com/auth/cloud-platform";

  // Create a metadata entity.
  rpc CreateEntity(CreateEntityRequest) returns (Entity) {
    option (google.api.http) = {
      post: "/v1/{parent=projects/*/locations/*/lakes/*/zones/*}/entities"
      body: "entity"
    };
    option (google.api.method_signature) = "parent,entity";
  }

  // Update a metadata entity. Only supports full resource update.
  rpc UpdateEntity(UpdateEntityRequest) returns (Entity) {
    option (google.api.http) = {
      put: "/v1/{entity.name=projects/*/locations/*/lakes/*/zones/*/entities/*}"
      body: "entity"
    };
  }

  // Delete a metadata entity.
  rpc DeleteEntity(DeleteEntityRequest) returns (google.protobuf.Empty) {
    option (google.api.http) = {
      delete: "/v1/{name=projects/*/locations/*/lakes/*/zones/*/entities/*}"
    };
    option (google.api.method_signature) = "name";
  }

  // Get a metadata entity.
  rpc GetEntity(GetEntityRequest) returns (Entity) {
    option (google.api.http) = {
      get: "/v1/{name=projects/*/locations/*/lakes/*/zones/*/entities/*}"
    };
    option (google.api.method_signature) = "name";
  }

  // List metadata entities in a zone.
  rpc ListEntities(ListEntitiesRequest) returns (ListEntitiesResponse) {
    option (google.api.http) = {
      get: "/v1/{parent=projects/*/locations/*/lakes/*/zones/*}/entities"
    };
    option (google.api.method_signature) = "parent";
  }

  // Create a metadata partition.
  rpc CreatePartition(CreatePartitionRequest) returns (Partition) {
    option (google.api.http) = {
      post: "/v1/{parent=projects/*/locations/*/lakes/*/zones/*/entities/*}/partitions"
      body: "partition"
    };
    option (google.api.method_signature) = "parent,partition";
  }

  // Delete a metadata partition.
  rpc DeletePartition(DeletePartitionRequest) returns (google.protobuf.Empty) {
    option (google.api.http) = {
      delete: "/v1/{name=projects/*/locations/*/lakes/*/zones/*/entities/*/partitions/**}"
    };
    option (google.api.method_signature) = "name";
  }

  // Get a metadata partition of an entity.
  rpc GetPartition(GetPartitionRequest) returns (Partition) {
    option (google.api.http) = {
      get: "/v1/{name=projects/*/locations/*/lakes/*/zones/*/entities/*/partitions/**}"
    };
    option (google.api.method_signature) = "name";
  }

  // List metadata partitions of an entity.
  rpc ListPartitions(ListPartitionsRequest) returns (ListPartitionsResponse) {
    option (google.api.http) = {
      get: "/v1/{parent=projects/*/locations/*/lakes/*/zones/*/entities/*}/partitions"
    };
    option (google.api.method_signature) = "parent";
  }
}

// Create a metadata entity request.
message CreateEntityRequest {
  // Required. The resource name of the parent zone:
  // `projects/{project_number}/locations/{location_id}/lakes/{lake_id}/zones/{zone_id}`.
  string parent = 1 [
    (google.api.field_behavior) = REQUIRED,
    (google.api.resource_reference) = { type: "dataplex.googleapis.com/Zone" }
  ];

  // Required. Entity resource.
  Entity entity = 3 [(google.api.field_behavior) = REQUIRED];

  // Optional. Only validate the request, but do not perform mutations.
  // The default is false.
  bool validate_only = 4 [(google.api.field_behavior) = OPTIONAL];
}

// Update a metadata entity request.
// The exiting entity will be fully replaced by the entity in the request.
// The entity ID is mutable. To modify the ID, use the current entity ID in the
// request URL and specify the new ID in the request body.
message UpdateEntityRequest {
  // Required. Update description.
  Entity entity = 2 [(google.api.field_behavior) = REQUIRED];

  // Optional. Only validate the request, but do not perform mutations.
  // The default is false.
  bool validate_only = 3 [(google.api.field_behavior) = OPTIONAL];
}

// Delete a metadata entity request.
message DeleteEntityRequest {
  // Required. The resource name of the entity:
  // `projects/{project_number}/locations/{location_id}/lakes/{lake_id}/zones/{zone_id}/entities/{entity_id}`.
  string name = 1 [
    (google.api.field_behavior) = REQUIRED,
    (google.api.resource_reference) = { type: "dataplex.googleapis.com/Entity" }
  ];

  // Required. The etag associated with the entity, which can be retrieved with
  // a [GetEntity][] request.
  string etag = 2 [(google.api.field_behavior) = REQUIRED];
}

// List metadata entities request.
message ListEntitiesRequest {
  // Entity views.
  enum EntityView {
    // The default unset value. Return both table and fileset entities
    // if unspecified.
    ENTITY_VIEW_UNSPECIFIED = 0;

    // Only list table entities.
    TABLES = 1;

    // Only list fileset entities.
    FILESETS = 2;
  }

  // Required. The resource name of the parent zone:
  // `projects/{project_number}/locations/{location_id}/lakes/{lake_id}/zones/{zone_id}`.
  string parent = 1 [
    (google.api.field_behavior) = REQUIRED,
    (google.api.resource_reference) = { type: "dataplex.googleapis.com/Zone" }
  ];

  // Required. Specify the entity view to make a partial list request.
  EntityView view = 2 [(google.api.field_behavior) = REQUIRED];

  // Optional. Maximum number of entities to return. The service may return
  // fewer than this value. If unspecified, 100 entities will be returned by
  // default. The maximum value is 500; larger values will will be truncated to
  // 500.
  int32 page_size = 3 [(google.api.field_behavior) = OPTIONAL];

  // Optional. Page token received from a previous `ListEntities` call. Provide
  // this to retrieve the subsequent page. When paginating, all other parameters
  // provided to `ListEntities` must match the call that provided the
  // page token.
  string page_token = 4 [(google.api.field_behavior) = OPTIONAL];

  // Optional. The following filter parameters can be added to the URL to limit
  // the entities returned by the API:
  //
  // - Entity ID: ?filter="id=entityID"
  // - Asset ID: ?filter="asset=assetID"
  // - Data path ?filter="data_path=gs://my-bucket"
  // - Is HIVE compatible: ?filter="hive_compatible=true"
  // - Is BigQuery compatible: ?filter="bigquery_compatible=true"
  string filter = 5 [(google.api.field_behavior) = OPTIONAL];
}

// List metadata entities response.
message ListEntitiesResponse {
  // Entities in the specified parent zone.
  repeated Entity entities = 1;

  // Token to retrieve the next page of results, or empty if there are no
  // remaining results in the list.
  string next_page_token = 2;
}

// Get metadata entity request.
message GetEntityRequest {
  // Entity views for get entity partial result.
  enum EntityView {
    // The API will default to the `BASIC` view.
    ENTITY_VIEW_UNSPECIFIED = 0;

    // Minimal view that does not include the schema.
    BASIC = 1;

    // Include basic information and schema.
    SCHEMA = 2;

    // Include everything. Currently, this is the same as the SCHEMA view.
    FULL = 4;
  }

  // Required. The resource name of the entity:
  // `projects/{project_number}/locations/{location_id}/lakes/{lake_id}/zones/{zone_id}/entities/{entity_id}.`
  string name = 1 [
    (google.api.field_behavior) = REQUIRED,
    (google.api.resource_reference) = { type: "dataplex.googleapis.com/Entity" }
  ];

  // Optional. Used to select the subset of entity information to return.
  // Defaults to `BASIC`.
  EntityView view = 2 [(google.api.field_behavior) = OPTIONAL];
}

// List metadata partitions request.
message ListPartitionsRequest {
  // Required. The resource name of the parent entity:
  // `projects/{project_number}/locations/{location_id}/lakes/{lake_id}/zones/{zone_id}/entities/{entity_id}`.
  string parent = 1 [
    (google.api.field_behavior) = REQUIRED,
    (google.api.resource_reference) = { type: "dataplex.googleapis.com/Entity" }
  ];

  // Optional. Maximum number of partitions to return. The service may return
  // fewer than this value. If unspecified, 100 partitions will be returned by
  // default. The maximum page size is 500; larger values will will be truncated
  // to 500.
  int32 page_size = 2 [(google.api.field_behavior) = OPTIONAL];

  // Optional. Page token received from a previous `ListPartitions` call.
  // Provide this to retrieve the subsequent page. When paginating, all other
  // parameters provided to `ListPartitions` must match the call that provided
  // the page token.
  string page_token = 3 [(google.api.field_behavior) = OPTIONAL];

  // Optional. Filter the partitions returned to the caller using a key value
  // pair expression. Supported operators and syntax:
  //
  // - logic operators: AND, OR
  // - comparison operators: <, >, >=, <= ,=, !=
  // - LIKE operators:
  //   - The right hand of a LIKE operator supports "." and
  //     "*" for wildcard searches, for example "value1 LIKE ".*oo.*"
  // - parenthetical grouping: ( )
  //
  // Sample filter expression: `?filter="key1 < value1 OR key2 > value2"
  //
  // **Notes:**
  //
  // - Keys to the left of operators are case insensitive.
  // - Partition results are sorted first by creation time, then by
  //   lexicographic order.
  // - Up to 20 key value filter pairs are allowed, but due to performance
  //   considerations, only the first 10 will be used as a filter.
  string filter = 4 [(google.api.field_behavior) = OPTIONAL];
}

// Create metadata partition request.
message CreatePartitionRequest {
  // Required. The resource name of the parent zone:
  // `projects/{project_number}/locations/{location_id}/lakes/{lake_id}/zones/{zone_id}/entities/{entity_id}`.
  string parent = 1 [
    (google.api.field_behavior) = REQUIRED,
    (google.api.resource_reference) = { type: "dataplex.googleapis.com/Entity" }
  ];

  // Required. Partition resource.
  Partition partition = 3 [(google.api.field_behavior) = REQUIRED];

  // Optional. Only validate the request, but do not perform mutations.
  // The default is false.
  bool validate_only = 4 [(google.api.field_behavior) = OPTIONAL];
}

// Delete metadata partition request.
message DeletePartitionRequest {
  // Required. The resource name of the partition.
  // format:
  // `projects/{project_number}/locations/{location_id}/lakes/{lake_id}/zones/{zone_id}/entities/{entity_id}/partitions/{partition_value_path}`.
  // The {partition_value_path} segment consists of an ordered sequence of
  // partition values separated by "/". All values must be provided.
  string name = 1 [
    (google.api.field_behavior) = REQUIRED,
    (google.api.resource_reference) = {
      type: "dataplex.googleapis.com/Partition"
    }
  ];

  // Optional. The etag associated with the partition.
  string etag = 2 [deprecated = true, (google.api.field_behavior) = OPTIONAL];
}

// List metadata partitions response.
message ListPartitionsResponse {
  // Partitions under the specified parent entity.
  repeated Partition partitions = 1;

  // Token to retrieve the next page of results, or empty if there are no
  // remaining results in the list.
  string next_page_token = 2;
}

// Get metadata partition request.
message GetPartitionRequest {
  // Required. The resource name of the partition:
  // `projects/{project_number}/locations/{location_id}/lakes/{lake_id}/zones/{zone_id}/entities/{entity_id}/partitions/{partition_value_path}`.
  // The {partition_value_path} segment consists of an ordered sequence of
  // partition values separated by "/". All values must be provided.
  string name = 1 [
    (google.api.field_behavior) = REQUIRED,
    (google.api.resource_reference) = {
      type: "dataplex.googleapis.com/Partition"
    }
  ];
}

// Represents tables and fileset metadata contained within a zone.
message Entity {
  option (google.api.resource) = {
    type: "dataplex.googleapis.com/Entity"
    pattern: "projects/{project}/locations/{location}/lakes/{lake}/zones/{zone}/entities/{entity}"
  };

  // The type of entity.
  enum Type {
    // Type unspecified.
    TYPE_UNSPECIFIED = 0;

    // Structured and semi-structured data.
    TABLE = 1;

    // Unstructured data.
    FILESET = 2;
  }

  // Provides compatibility information for various metadata stores.
  message CompatibilityStatus {
    // Provides compatibility information for a specific metadata store.
    message Compatibility {
      // Output only. Whether the entity is compatible and can be represented in
      // the metadata store.
      bool compatible = 1 [(google.api.field_behavior) = OUTPUT_ONLY];

      // Output only. Provides additional detail if the entity is incompatible
      // with the metadata store.
      string reason = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
    }

    // Output only. Whether this entity is compatible with Hive Metastore.
    Compatibility hive_metastore = 1
        [(google.api.field_behavior) = OUTPUT_ONLY];

    // Output only. Whether this entity is compatible with BigQuery.
    Compatibility bigquery = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
  }

  // Output only. The resource name of the entity, of the form:
  // `projects/{project_number}/locations/{location_id}/lakes/{lake_id}/zones/{zone_id}/entities/{id}`.
  string name = 1 [
    (google.api.field_behavior) = OUTPUT_ONLY,
    (google.api.resource_reference) = { type: "dataplex.googleapis.com/Entity" }
  ];

  // Optional. Display name must be shorter than or equal to 256 characters.
  string display_name = 2 [(google.api.field_behavior) = OPTIONAL];

  // Optional. User friendly longer description text. Must be shorter than or
  // equal to 1024 characters.
  string description = 3 [(google.api.field_behavior) = OPTIONAL];

  // Output only. The time when the entity was created.
  google.protobuf.Timestamp create_time = 5
      [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. The time when the entity was last updated.
  google.protobuf.Timestamp update_time = 6
      [(google.api.field_behavior) = OUTPUT_ONLY];

  // Required. A user-provided entity ID. It is mutable, and will be used as the
  // published table name. Specifying a new ID in an update entity
  // request will override the existing value.
  // The ID must contain only letters (a-z, A-Z), numbers (0-9), and
  // underscores, and consist of 256 or fewer characters.
  string id = 7 [(google.api.field_behavior) = REQUIRED];

  // Optional. The etag associated with the entity, which can be retrieved with
  // a [GetEntity][] request. Required for update and delete requests.
  string etag = 8 [(google.api.field_behavior) = OPTIONAL];

  // Required. Immutable. The type of entity.
  Type type = 10 [
    (google.api.field_behavior) = REQUIRED,
    (google.api.field_behavior) = IMMUTABLE
  ];

  // Required. Immutable. The ID of the asset associated with the storage
  // location containing the entity data. The entity must be with in the same
  // zone with the asset.
  string asset = 11 [
    (google.api.field_behavior) = REQUIRED,
    (google.api.field_behavior) = IMMUTABLE
  ];

  // Required. Immutable. The storage path of the entity data.
  // For Cloud Storage data, this is the fully-qualified path to the entity,
  // such as `gs://bucket/path/to/data`. For BigQuery data, this is the name of
  // the table resource, such as
  // `projects/project_id/datasets/dataset_id/tables/table_id`.
  string data_path = 12 [
    (google.api.field_behavior) = REQUIRED,
    (google.api.field_behavior) = IMMUTABLE
  ];

  // Optional. The set of items within the data path constituting the data in
  // the entity, represented as a glob path. Example:
  // `gs://bucket/path/to/data/**/*.csv`.
  string data_path_pattern = 13 [(google.api.field_behavior) = OPTIONAL];

  // Output only. The name of the associated Data Catalog entry.
  string catalog_entry = 14 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Required. Immutable. Identifies the storage system of the entity data.
  StorageSystem system = 15 [
    (google.api.field_behavior) = REQUIRED,
    (google.api.field_behavior) = IMMUTABLE
  ];

  // Required. Identifies the storage format of the entity data.
  // It does not apply to entities with data stored in BigQuery.
  StorageFormat format = 16 [(google.api.field_behavior) = REQUIRED];

  // Output only. Metadata stores that the entity is compatible with.
  CompatibilityStatus compatibility = 19
      [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. Identifies the access mechanism to the entity. Not user
  // settable.
  StorageAccess access = 21 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. System generated unique ID for the Entity. This ID will be
  // different if the Entity is deleted and re-created with the same name.
  string uid = 22 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Required. The description of the data structure and layout.
  // The schema is not included in list responses. It is only included in
  // `SCHEMA` and `FULL` entity views of a `GetEntity` response.
  Schema schema = 50 [(google.api.field_behavior) = REQUIRED];
}

// Represents partition metadata contained within entity instances.
message Partition {
  option (google.api.resource) = {
    type: "dataplex.googleapis.com/Partition"
    pattern: "projects/{project}/locations/{location}/lakes/{lake}/zones/{zone}/entities/{entity}/partitions/{partition}"
  };

  // Output only. Partition values used in the HTTP URL must be
  // double encoded. For example, `url_encode(url_encode(value))` can be used
  // to encode "US:CA/CA#Sunnyvale so that the request URL ends
  // with "/partitions/US%253ACA/CA%2523Sunnyvale".
  // The name field in the response retains the encoded format.
  string name = 1 [
    (google.api.field_behavior) = OUTPUT_ONLY,
    (google.api.resource_reference) = {
      type: "dataplex.googleapis.com/Partition"
    }
  ];

  // Required. Immutable. The set of values representing the partition, which
  // correspond to the partition schema defined in the parent entity.
  repeated string values = 2 [
    (google.api.field_behavior) = REQUIRED,
    (google.api.field_behavior) = IMMUTABLE
  ];

  // Required. Immutable. The location of the entity data within the partition,
  // for example, `gs://bucket/path/to/entity/key1=value1/key2=value2`. Or
  // `projects//datasets//tables/`
  string location = 3 [
    (google.api.field_behavior) = REQUIRED,
    (google.api.field_behavior) = IMMUTABLE
  ];

  // Optional. The etag for this partition.
  string etag = 4 [deprecated = true, (google.api.field_behavior) = OPTIONAL];
}

// Schema information describing the structure and layout of the data.
message Schema {
  // Type information for fields in schemas and partition schemas.
  enum Type {
    // SchemaType unspecified.
    TYPE_UNSPECIFIED = 0;

    // Boolean field.
    BOOLEAN = 1;

    // Single byte numeric field.
    BYTE = 2;

    // 16-bit numeric field.
    INT16 = 3;

    // 32-bit numeric field.
    INT32 = 4;

    // 64-bit numeric field.
    INT64 = 5;

    // Floating point numeric field.
    FLOAT = 6;

    // Double precision numeric field.
    DOUBLE = 7;

    // Real value numeric field.
    DECIMAL = 8;

    // Sequence of characters field.
    STRING = 9;

    // Sequence of bytes field.
    BINARY = 10;

    // Date and time field.
    TIMESTAMP = 11;

    // Date field.
    DATE = 12;

    // Time field.
    TIME = 13;

    // Structured field. Nested fields that define the structure of the map.
    // If all nested fields are nullable, this field represents a union.
    RECORD = 14;

    // Null field that does not have values.
    NULL = 100;
  }

  // Additional qualifiers to define field semantics.
  enum Mode {
    // Mode unspecified.
    MODE_UNSPECIFIED = 0;

    // The field has required semantics.
    REQUIRED = 1;

    // The field has optional semantics, and may be null.
    NULLABLE = 2;

    // The field has repeated (0 or more) semantics, and is a list of values.
    REPEATED = 3;
  }

  // Represents a column field within a table schema.
  message SchemaField {
    // Required. The name of the field. Must contain only letters, numbers and
    // underscores, with a maximum length of 767 characters,
    // and must begin with a letter or underscore.
    string name = 1 [(google.api.field_behavior) = REQUIRED];

    // Optional. User friendly field description. Must be less than or equal to
    // 1024 characters.
    string description = 2 [(google.api.field_behavior) = OPTIONAL];

    // Required. The type of field.
    Type type = 3 [(google.api.field_behavior) = REQUIRED];

    // Required. Additional field semantics.
    Mode mode = 4 [(google.api.field_behavior) = REQUIRED];

    // Optional. Any nested field for complex types.
    repeated SchemaField fields = 10 [(google.api.field_behavior) = OPTIONAL];
  }

  // Represents a key field within the entity's partition structure. You could
  // have up to 20 partition fields, but only the first 10 partitions have the
  // filtering ability due to performance consideration. **Note:**
  // Partition fields are immutable.
  message PartitionField {
    // Required. Partition field name must consist of letters, numbers, and
    // underscores only, with a maximum of length of 256 characters, and must
    // begin with a letter or underscore..
    string name = 1 [(google.api.field_behavior) = REQUIRED];

    // Required. Immutable. The type of field.
    Type type = 2 [
      (google.api.field_behavior) = REQUIRED,
      (google.api.field_behavior) = IMMUTABLE
    ];
  }

  // The structure of paths within the entity, which represent partitions.
  enum PartitionStyle {
    // PartitionStyle unspecified
    PARTITION_STYLE_UNSPECIFIED = 0;

    // Partitions are hive-compatible.
    // Examples: `gs://bucket/path/to/table/dt=2019-10-31/lang=en`,
    // `gs://bucket/path/to/table/dt=2019-10-31/lang=en/late`.
    HIVE_COMPATIBLE = 1;
  }

  // Required. Set to `true` if user-managed or `false` if managed by Dataplex.
  // The default is `false` (managed by Dataplex).
  //
  // - Set to `false`to enable Dataplex discovery to update the schema.
  //   including new data discovery, schema inference, and schema evolution.
  //   Users retain the ability to input and edit the schema. Dataplex
  //   treats schema input by the user as though produced
  //   by a previous Dataplex discovery operation, and it will
  //   evolve the schema and take action based on that treatment.
  //
  // - Set to `true` to fully manage the entity
  //   schema. This setting guarantees that Dataplex will not
  //   change schema fields.
  bool user_managed = 1 [(google.api.field_behavior) = REQUIRED];

  // Optional. The sequence of fields describing data in table entities.
  // **Note:** BigQuery SchemaFields are immutable.
  repeated SchemaField fields = 2 [(google.api.field_behavior) = OPTIONAL];

  // Optional. The sequence of fields describing the partition structure in
  // entities. If this field is empty, there are no partitions within the data.
  repeated PartitionField partition_fields = 3
      [(google.api.field_behavior) = OPTIONAL];

  // Optional. The structure of paths containing partition data within the
  // entity.
  PartitionStyle partition_style = 4 [(google.api.field_behavior) = OPTIONAL];
}

// Describes the format of the data within its storage location.
message StorageFormat {
  // Describes CSV and similar semi-structured data formats.
  message CsvOptions {
    // Optional. The character encoding of the data. Accepts "US-ASCII",
    // "UTF-8", and "ISO-8859-1". Defaults to UTF-8 if unspecified.
    string encoding = 1 [(google.api.field_behavior) = OPTIONAL];

    // Optional. The number of rows to interpret as header rows that should be
    // skipped when reading data rows. Defaults to 0.
    int32 header_rows = 2 [(google.api.field_behavior) = OPTIONAL];

    // Optional. The delimiter used to separate values. Defaults to ','.
    string delimiter = 3 [(google.api.field_behavior) = OPTIONAL];

    // Optional. The character used to quote column values. Accepts '"'
    // (double quotation mark) or ''' (single quotation mark). Defaults to
    // '"' (double quotation mark) if unspecified.
    string quote = 4 [(google.api.field_behavior) = OPTIONAL];
  }

  // Describes JSON data format.
  message JsonOptions {
    // Optional. The character encoding of the data. Accepts "US-ASCII", "UTF-8"
    // and "ISO-8859-1". Defaults to UTF-8 if not specified.
    string encoding = 1 [(google.api.field_behavior) = OPTIONAL];
  }

  // Describes Iceberg data format.
  message IcebergOptions {
    // Optional. The location of where the iceberg metadata is present, must be
    // within the table path
    string metadata_location = 1 [(google.api.field_behavior) = OPTIONAL];
  }

  // The specific file format of the data.
  enum Format {
    // Format unspecified.
    FORMAT_UNSPECIFIED = 0;

    // Parquet-formatted structured data.
    PARQUET = 1;

    // Avro-formatted structured data.
    AVRO = 2;

    // Orc-formatted structured data.
    ORC = 3;

    // Csv-formatted semi-structured data.
    CSV = 100;

    // Json-formatted semi-structured data.
    JSON = 101;

    // Image data formats (such as jpg and png).
    IMAGE = 200;

    // Audio data formats (such as mp3, and wav).
    AUDIO = 201;

    // Video data formats (such as mp4 and mpg).
    VIDEO = 202;

    // Textual data formats (such as txt and xml).
    TEXT = 203;

    // TensorFlow record format.
    TFRECORD = 204;

    // Data that doesn't match a specific format.
    OTHER = 1000;

    // Data of an unknown format.
    UNKNOWN = 1001;
  }

  // The specific compressed file format of the data.
  enum CompressionFormat {
    // CompressionFormat unspecified. Implies uncompressed data.
    COMPRESSION_FORMAT_UNSPECIFIED = 0;

    // GZip compressed set of files.
    GZIP = 2;

    // BZip2 compressed set of files.
    BZIP2 = 3;
  }

  // Output only. The data format associated with the stored data, which
  // represents content type values. The value is inferred from mime type.
  Format format = 1 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Optional. The compression type associated with the stored data.
  // If unspecified, the data is uncompressed.
  CompressionFormat compression_format = 2
      [(google.api.field_behavior) = OPTIONAL];

  // Required. The mime type descriptor for the data. Must match the pattern
  // {type}/{subtype}. Supported values:
  //
  // - application/x-parquet
  // - application/x-avro
  // - application/x-orc
  // - application/x-tfrecord
  // - application/x-parquet+iceberg
  // - application/x-avro+iceberg
  // - application/x-orc+iceberg
  // - application/json
  // - application/{subtypes}
  // - text/csv
  // - text/
  // - image/{image subtype}
  // - video/{video subtype}
  // - audio/{audio subtype}
  string mime_type = 3 [(google.api.field_behavior) = REQUIRED];

  // Additional format-specific options.
  oneof options {
    // Optional. Additional information about CSV formatted data.
    CsvOptions csv = 10 [(google.api.field_behavior) = OPTIONAL];

    // Optional. Additional information about CSV formatted data.
    JsonOptions json = 11 [(google.api.field_behavior) = OPTIONAL];

    // Optional. Additional information about iceberg tables.
    IcebergOptions iceberg = 12 [(google.api.field_behavior) = OPTIONAL];
  }
}

// Describes the access mechanism of the data within its storage location.
message StorageAccess {
  // Access Mode determines how data stored within the Entity is read.
  enum AccessMode {
    // Access mode unspecified.
    ACCESS_MODE_UNSPECIFIED = 0;

    // Default. Data is accessed directly using storage APIs.
    DIRECT = 1;

    // Data is accessed through a managed interface using BigQuery APIs.
    MANAGED = 2;
  }

  // Output only. Describes the read access mechanism of the data. Not user
  // settable.
  AccessMode read = 21 [(google.api.field_behavior) = OUTPUT_ONLY];
}

// Identifies the cloud system that manages the data storage.
enum StorageSystem {
  // Storage system unspecified.
  STORAGE_SYSTEM_UNSPECIFIED = 0;

  // The entity data is contained within a Cloud Storage bucket.
  CLOUD_STORAGE = 1;

  // The entity data is contained within a BigQuery dataset.
  BIGQUERY = 2;
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy