All Downloads are FREE. Search and download functionalities are using the official Maven repository.

a.clickzetta-java.1.3.15.source-code.table_common.proto Maven / Gradle / Ivy

There is a newer version: 2.0.0
Show newest version
syntax = "proto3";

import "data_type.proto";
import "file_format_type.proto";
import "file_system.proto";
import 'object_identifier.proto';
import "storage_location.proto";
import "connection_meta.proto";
import 'expression.proto';
import "property.proto";

package cz.proto;
option java_package = "cz.proto";
option java_multiple_files = true;

enum Order {
  ASC = 0;
  DESC = 1;
}

enum NullOrder {
  LOW = 0; // NULLS FIRST for ASC, NULLS LAST for DESC, this is for compatibility of existing tables
  FIRST = 1;
  LAST = 2;
}

enum ClusterType {
  NORMAL = 0;
  RANGE = 1;
  HASH = 2;
}

enum HashBucketType {
  HASH_MOD = 0;
  HASH_RANGE = 1;
};

enum TableType {
  MANAGED_TABLE = 0;
  EXTERNAL_TABLE = 2;
  VIRTUAL_VIEW = 4;
  MATERIALIZED_VIEW = 6;
  STREAM = 8;
  UNKNOWN_TABLE = 99;
}

enum IndexType {
  BLOOM_FILTER = 0;
  BITSET = 1;
}

message FieldRef {
  oneof field {
    // 1. field_id and field_name are FieldSchema's id and name respectively
    // 2. field_name is only used for creating table, when filed_id has not been assigned yet.
    // 3. field_id is used for almost all the cases except item 2.
    uint32 field_id = 1;
    string field_name = 2;
  }
}

message SortedField {
  FieldRef field = 1;
  Order sort_order = 2;
  // NullOrder null_order = 3;
}

message HashCluster {
  uint32 function_version = 1;
  HashBucketType bucket_type = 2;
}

message RangeCluster {
  RangeType range_type = 1;
}

enum RangeType {
  FIXED_POINT = 0; // like hive partition
  FIXED_RANGE = 2; // like oracle partition that has a range but it must be constant eg: less then 100
  DYNAMIC_RANGE = 4;
}

message ClusterInfo {
  ClusterType cluster_type = 1;
  repeated FieldRef clustered_fields = 2;
  optional uint64 buckets_count = 3;
  optional string path_pattern = 4;

  oneof cluster {
    HashCluster hash = 10;
    RangeCluster range = 11;
  }
}

message SortOrder {
  repeated SortedField sorted_fields = 1;
}
/*
enum ConstraintType {
  UNKNOWN_CONSTRAINT = 0;
  PRIMARY_KEY = 1;
  FOREIGN_KEY = 2;
  UNIQUE = 3;
  CHECK = 4;
}

message ConstraintProperties {
  bool enable = 1;
  bool validate = 2;
  bool rely = 3;
}

message RefTableKey {
  ObjectIdentifier ref_table = 1;
  repeated FieldRef ref_fields = 2;
}

message KeyConstraintInfo {
  repeated FieldRef fields = 1;
  RefTableKey ref_key = 2;
}

message Constraint {
  ConstraintType type = 1;
  string name = 2;
  ConstraintProperties properties = 3;
  oneof key_or_expression {
    KeyConstraintInfo key = 4;
    ScalarExpression check_expression = 5;
  }
}*/

message UniqueKey {
  repeated FieldRef unique_fields = 1;
  bool enable = 2;
  bool validate = 3;
  bool rely = 4;
}

message PrimaryKey {
  repeated FieldRef fields = 1;
  bool enable = 2;
  bool validate = 3;
  bool rely = 4;
}

message ForeignKey {
  repeated FieldRef fields = 1;
  ObjectIdentifier ref_table = 2;
  repeated FieldRef ref_fields = 3;

  bool enable = 4;
  bool validate = 5;
  bool rely = 6;
}

message IndexKey {
  repeated FieldRef fields = 1;
}

message Index {
  IndexType type = 1;
  IndexKey key = 2;
  ObjectIdentifier table = 3;
}

message FieldSpec {
  uint32 spec_id = 1;  // field spec version
  oneof spec {
    ClusterInfo cluster_info = 10;
    SortOrder sort_order = 11;
    UniqueKey unique_key = 12;
    PrimaryKey primary_key = 13;
    Index index = 14;
    ForeignKey foreign_key = 15;
    // partition spec
  }
}


message FieldSchema {
  string name = 2;
  DataType type = 3;
  optional bool virtual = 4;
  optional bool hidden = 5;
  optional bool un_output = 6;
  string comment = 7;
  ScalarExpression expr = 8;
  optional string transform = 9;
}

message TableSchema {
  repeated FieldSchema fields = 1;
  uint32 schema_id = 2;
  string type = 3;  // meta internal use, for compatible with Iceberg, default to "struct"
}

message TextFileFormat {
  // TODO(gang.wu): remove options and explicitly define properties
  map options = 1;
}

message ParquetFileFormat {
  int64 row_group_size_bytes = 1;
  int64 page_size_bytes = 2;
  int64 dict_size_bytes = 3;
}

message OrcFileFormat {
}

message CsvFileFormat {
  // TODO(gang.wu): remove options and explicitly define properties
  map options = 1;
}

message HiveResultFileFormat {
  // TODO(gang.wu): remove options and explicitly define properties
  map options = 1;
}

message AvroFileFormat {

}

message ArrowFileFormat {
  // TODO(gang.wu): define arrow ipc version and other properties
}

message FileFormat {
  FileFormatType type = 1;
  oneof format {
    TextFileFormat textFile = 2;
    ParquetFileFormat parquet_file = 3;
    OrcFileFormat orc_file = 4;
    CsvFileFormat csv_file = 5;
    HiveResultFileFormat hive_result_file = 6;
    AvroFileFormat avro_file = 7;
    ArrowFileFormat arrow_file = 8;
  }
}

message FileDataSourceInfo {
  FileSystemType fileSystemType = 1;
  string path = 2;
  FileFormat format = 3;
}

message DqlDataSourceInfo {
}

message LocationDirectoryDataSourceInfo {
  StorageLocation storage_location = 1;
  FileSystemConnectionInfo connection_info = 2;
  repeated Property properties = 3;
}

// data distribution / data order and so on
// TODO(yun.chen) which properties we should add
message DataProperties {
  repeated FieldSpec cluster_info_spec = 1;
  FieldSpec sort_order_spec = 2;
};


message DataSourceInfo {
  int32 data_source_type = 7;

  oneof dataSourceInfo {
    FileDataSourceInfo file = 1;
    DqlDataSourceInfo dql = 5;
    LocationDirectoryDataSourceInfo location_directory = 8;
  }
  map options = 2;
  string location = 3;
  optional uint32 data_source_id = 4;
  optional DataProperties data_props = 6;
}

message DataSource {
  repeated DataSourceInfo data_source_infos = 17;
  uint32 default_data_source_id = 18;
  optional uint32 next_data_source_id = 19;
}

message MVSource {
  // TODO(chendong): engine use identifier for parameter, while store table id in meta later
  ObjectIdentifier table_identifier = 1;

  // the source table's snapshot version by which the MV is created
  // used for checking expiration of MV
  int64 snapshot = 2;
}

message RefreshOption {
  enum Type {
    ON_DEMAND = 0;
    ON_COMMIT = 1;
    ON_SCHEDULE = 2;
  }
  Type type = 1;
  int64 start_time = 2;
  int64 interval_in_minute = 3;
}

message IncrementalExtension {
  bool isValueSemantics = 1;
  int64 formatVersion = 2;
}

message MVExtension {
  string mv_plan = 1;
  repeated MVSource mv_source_tables = 2;
  RefreshOption refresh_option = 3;
  optional int64 mv_snapshot_id = 4;
  optional IncrementalExtension incremental_extension = 5;
}

message View {
  string view_expanded_text = 1;
  string view_original_text = 2;

  MVExtension mv_extension = 5;
}

message TableStream {
  ObjectIdentifier provider = 1;

  optional int64 at_timestamp = 3;

  int64 offset = 4;
}

message TableStreamState {
  ObjectIdentifier stream = 1;
  int64 from_snapshot = 2;
  int64 to_snapshot = 3;
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy