a.clickzetta-java.1.3.15.source-code.operator.proto Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of clickzetta-java Show documentation
Show all versions of clickzetta-java Show documentation
The java SDK for clickzetta's Lakehouse
syntax = "proto3";
package cz.proto;
option java_multiple_files = true;
option java_outer_classname = "OperatorProto";
import "table_meta.proto";
import "table_common.proto";
import "data_type.proto";
import "expression.proto";
import "file_format_type.proto";
import "file_meta_data.proto";
import "input_split.proto";
import "file_system.proto";
import "virtual_value_info.proto";
import "property.proto";
import "runtime_run_span_stats.proto";
message ColumnMapping {
uint64 outputId = 1;
uint64 inputId = 2;
}
message Distribution {
enum Type {
ANY = 0;
HASH = 1;
RANGE = 2;
SINGLETON = 3;
BROADCAST = 4;
ROUND_ROBIN = 5;
}
Type type = 1;
repeated uint64 keys = 2;
bool has_dop = 5;
uint32 dop = 6;
bool has_hash_version = 10;
uint32 hash_version = 11;
bool has_bucket_type = 12;
HashBucketType bucket_type = 13;
uint32 range_dist_id = 20; // valid only when type = RANGE
}
message Collation {
message Key {
uint64 field = 1;
Order order = 2;
NullOrder null_order = 3;
}
repeated Key orders = 1;
}
message Traits {
enum EngineType {
NONE = 0;
CZ = 1;
}
EngineType engine = 1;
Distribution distribution = 2;
Distribution local_distribution = 3;
Collation collation = 4;
}
message Operator {
string id = 1;
repeated string inputIds = 2;
DataType schema = 3;
repeated ColumnMapping columnMappings = 4;
uint64 signature = 5;
uint32 traits = 6;
oneof op {
TableScan table_scan = 10;
TableSink table_sink = 11;
Calc calc = 12;
MergeSort merge_sort = 13;
ShuffleWrite shuffle_write = 14;
ShuffleRead shuffle_read = 15;
Values values = 16;
HashAggregate hash_agg = 17;
SortedAggregate sorted_agg = 18;
SortMergeJoin merge_join = 19;
HashJoin hash_join = 20;
LocalSort local_sort = 21;
UnionAll union_all = 22;
Buffer buffer = 23;
Window window = 24;
Expand expand = 25;
LateralView lateral_view = 26;
Grouping grouping = 27;
LogicalJoin join = 50;
LogicalAggregate aggregate = 51;
LogicalCalc logical_calc = 52;
LogicalSort logical_sort = 53;
SetOperator set_operator = 54;
AggregatePhase agg_phase = 55;
Spool spool = 56;
PartialWindowFilter partial_window_filter = 57;
TreeJoin tree_join = 58;
TreeJoinLeaf tree_join_leaf = 59;
LocalExchange local_exchange = 60;
}
ParseTreeInfo pt = 100;
}
enum AggStage {
DUPLICATE = 0;
PARTIAL1 = 1;
PARTIAL2 = 2;
FINAL = 3;
COMPLETE = 4;
}
message AggregateCall {
ScalarExpression function = 1;
bool distinct = 2;
AggStage stage = 3;
repeated OrderByDesc orders = 4;
Reference filter = 5;
repeated uint64 output_fields = 6;
DataType initial_Type = 10; // The initial data type of an aggregate function
DataType partial_Type = 11; // The partial data type of an aggregate function
DataType output_Type = 12; // The output data type of an aggregate function
}
message LogicalAggregate {
repeated Reference keys = 1;
repeated AggregateCall aggregate_calls = 2;
bool adaptive = 3;
}
message HashAggregate {
LogicalAggregate aggregate = 1;
AggStage stage = 2;
}
message SortedAggregate {
LogicalAggregate aggregate = 1;
AggStage stage = 2;
repeated OrderByDesc orders = 3;
}
message AggregatePhase {
LogicalAggregate aggregate = 1;
AggStage stage = 2;
}
enum JoinType {
INNER = 0;
LEFT = 1;
RIGHT = 2;
FULL = 3;
LEFT_SEMI = 4;
LEFT_ANTI = 5;
}
enum DynamicFilterType {
DF_NONE = 0;
DF_GLOBAL = 1;
DF_BROADCAST = 2;
DF_SHUFFLED = 3;
}
message DynamicFilterInfo {
DynamicFilterType type = 1;
bool consumer = 2;
double selectivity = 3;
uint32 probe = 4;
bool partition_filter = 5;
uint32 table_scan_parents = 6;
bool sort_filter = 7;
bool bitset_filter = 8;
}
message JoinHintInfo {
int32 build_side = 1;
int32 shuffle_type = 2;
}
message TreeJoin {
repeated Operator root_operators = 1;
}
message TreeJoinLeaf {
int32 input_index = 1;
int32 hint_build = 2;
}
message LogicalJoin {
JoinType type = 1;
ScalarExpression condition = 2;
repeated uint64 input_references = 3;
DynamicFilterInfo dynamic_filter = 4;
JoinHintInfo hintInfo = 5;
}
message SortMergeJoin {
LogicalJoin join = 1;
repeated OrderByDesc lhs_orders = 2;
repeated OrderByDesc rhs_orders = 3;
}
message HashJoin {
LogicalJoin join = 1;
string probe_operator_id = 2;
bool broadcast = 3;
}
message Timing {
uint64 cpu_nanos = 1;
uint64 wall_nanos = 2;
}
message OperatorStats {
string operator_id = 1;
uint64 row_count = 2;
Timing timing = 3;
oneof op_stats {
TableScanStats table_scan_stats = 4;
TableSinkStats table_sink_stats = 5;
CalcStats calc_stats = 6;
HashJoinStats hash_join_stats = 7;
MergeJoinStats merge_join_stats = 8;
HashAggregateStats hash_aggregate_stats = 9;
MergeAggregateStats merge_aggregate_stats = 10;
LocalSortStats local_sort_stats = 11;
MergeSortStats merge_sort_stats = 12;
ValuesStats values_stats = 13;
ExchangeSinkStats exchange_sink_stats = 14;
ExchangeSourceStats exchange_source_stats = 15;
UnionAllStats union_all_stats = 16;
BufferStats buffer_stats = 17;
WindowStats window_stats = 18;
ExpandStats expand_stats = 19;
LateralViewStats lateral_view_stats = 20;
PartialWindowStats partial_window_stats = 21;
LocalExchangeStats local_exchange_stats = 22;
}
Timing init_timing = 100;
uint64 batch_count = 101;
uint64 peak_memory = 102;
uint64 start_time_nanos = 103;
uint64 end_time_nanos = 104;
uint64 batch_signature = 105;
string exec_node_id = 106;
uint64 pipeline_id = 107;
uint64 driver_sequence = 108;
uint64 block_timing_nanos = 109;
Timing construct_timing = 110;
Timing finish_timing = 111;
uint64 data_size_bytes = 112;
repeated RuntimeRunSpanStats run_stats = 200;
bytes extra_stats_binary = 254;
// deprecated
string extra_stats = 255;
}
message OrderByDesc {
Reference reference = 1;
Order order = 2;
NullOrder null_order = 3;
}
message MergeSort {
repeated OrderByDesc orders = 1;
repeated uint64 input_references = 2;
bool isIncrMergeUnion = 3;
}
message UnionAll {
repeated uint64 input_references = 1;
bool isIncrMergeUnion = 2;
}
message Buffer {
bool shared = 1;
}
message PartialWindowFilter {
ScalarExpression function = 1;
WindowSpec spec = 2;
uint64 limit = 3;
}
message Window {
repeated WindowGroup groups = 1;
repeated uint64 input_references = 2;
}
message WindowGroup {
repeated WindowCall functions = 1;
WindowSpec spec = 2;
}
message WindowCall {
ScalarExpression function = 1;
bool distinct = 2;
DataType partial_type = 3;
}
message WindowSpec {
repeated Reference keys = 1;
repeated OrderByDesc orders = 2;
enum BoundaryType {
ROWS = 0;
RANGE = 1;
GROUP = 2;
}
BoundaryType boundary_type = 3;
WindowBoundary lower_bound = 4;
WindowBoundary upper_bound = 5;
}
message WindowBoundary {
optional bool preceding = 1;
Constant offset = 2;
}
message LateralView {
repeated TableFunctionCall functions = 1;
repeated uint64 input_references = 2;
}
message TableFunctionCall {
ScalarExpression function = 1;
bool outer = 2;
repeated uint64 used_fields = 3;
}
message Spool {
uint64 spool_id = 1;
}
enum LazyEval {
NOT_LAZY = 0;
LAZY_IN_CONDITION = 1;
ALWAYS_LAZY = 2;
}
message Calc {
repeated ScalarExpression expressions = 1;
oneof optional_filter {
bool no_filter = 2;
uint64 filter = 3;
}
repeated uint64 projects = 4;
repeated LazyEval lazy = 5;
optional double partial_window_filter_selectivity = 6;
}
message LogicalCalc {
ScalarExpression condition = 1;
repeated ScalarExpression projects = 2;
optional double partial_window_filter_selectivity = 3;
}
message ExpandShuffleKeys {
repeated uint64 shuffle_keys = 1;
}
message Expand {
repeated ScalarExpression expressions = 1;
optional ExpandShuffleKeys shuffle_keys = 2;
}
message GroupingKeySet {
repeated Reference keys = 1;
}
message Grouping {
repeated Reference keys = 1;
repeated GroupingKeySet keySets = 2;
repeated AggregateCall aggregate_calls = 3;
uint64 grouping_id_start_from = 4;
optional uint64 grouping_id_col_offset = 5;
}
message CalcStats {}
message HashTableStats {
uint64 num_buckets = 1;
uint64 num_keys = 2;
uint64 num_resize = 3;
uint64 num_accesses = 4;
uint64 num_collisions = 5;
uint64 used_memory = 6;
}
message HashJoinStats {
Timing build_timing = 1;
Timing finish_build_timing = 2;
Timing probe_timing = 3;
Timing post_probe_timing = 4;
HashTableStats ht_stats = 5;
uint64 num_build_rows = 6;
uint64 num_distinct_build_rows = 7;
uint64 max_equal_build_rows = 8;
SpillStats build_spill_stats = 9;
SpillStats probe_spill_stats = 10;
Timing probe_find_ht_timing = 11;
Timing probe_output_timing = 12;
Timing probe_eval_conjunct_timing = 13;
Timing probe_output_conjunct_timing = 14;
uint64 num_null_rows = 15;
}
message MergeJoinStats {
uint64 drive_close_group_count = 1;
uint64 drive_open_group_count = 2;
uint64 matched_group_count = 3;
uint64 non_matched_group_count = 4;
uint64 num_compare_ovc = 5;
uint64 num_compare_data = 6;
uint64 num_ovc_conflict = 7;
Timing match_group_timing = 8;
Timing output_timing = 9;
Timing eval_conjunct_timing = 10;
Timing output_conjunct_timing = 11;
Timing left_add_input_timing = 100;
Timing left_advance_timing = 101;
uint64 left_materialize_rows = 102;
Timing right_add_input_timing = 200;
Timing right_advance_timing = 201;
uint64 right_materialize_rows = 202;
}
message HashAggregateStats {
Timing assign_states_timing = 1;
Timing update_states_timing = 2;
Timing output_timing = 3;
HashTableStats ht_stats = 4;
uint64 states_used_memory = 5;
uint64 pass_through_rows = 6;
SpillStats input_spill_stats = 7;
SpillStats aggregated_spill_stats = 8;
}
message MergeAggregateStats {
Timing assign_states_timing = 1;
Timing update_states_timing = 2;
Timing output_timing = 3;
}
message LocalSortStats {
SpillStats spill_stats = 1;
Timing generate_run_timing = 2;
Timing merge_run_timing = 3;
Timing init_merge_timing = 4;
// included in generate_run_timing
Timing accumulate_block_timing = 5;
Timing sort_key_timing = 6;
Timing permute_payload_timing = 7;
Timing spill_run_timing = 8;
}
message MergeSortStats {}
message UnionAllStats {}
message ValuesStats {}
message BufferStats {
SpillStats spill_stats = 1;
}
message WindowStats {}
message PartialWindowStats {}
message ExpandStats {}
message LateralViewStats {}
message LocalExchangeStats {}
message ExchangeSinkStats {
uint64 sent_byte_count = 1;
uint64 compress_input_byte_count = 2;
Timing serialize_write_timing = 3;
Timing serialize_flush_timing = 4;
Timing compress_timing = 5;
Timing acquire_buffer_timing = 6;
Timing submit_buffer_timing = 7;
Timing close_timing = 8;
Timing submit_buffer_async_timing = 9;
uint64 flush_auto_count = 10;
uint64 flush_manual_count = 11;
}
message ExchangeSourceStats {
uint64 received_byte_count = 1;
uint64 decompress_output_byte_count = 2;
Timing deserialize_timing = 3;
Timing decompress_timing = 4;
Timing read_buffer_timing = 5;
Timing sort_timing = 6;
SpillStats sorter_spill_stats = 7;
}
message Table {
repeated string path = 1;
TableMeta table_meta = 2;
int64 instance_id = 3;
int64 table_id = 6;
repeated Property properties = 8;
}
message Values {
uint32 row_count = 1;
uint32 col_count = 2;
repeated ScalarExpression data = 3;
bool broadcast = 4;
}
message SortKeyDesc {
uint32 id = 1;
Order order = 2;
NullOrder null_order = 3;
}
message TableScan {
Table table = 1;
int32 data_source_info_id = 2;
repeated uint64 cols = 3;
ScalarExpression filter = 4;
ScalarExpression ensuredFilter = 5;
map props = 6;
bool align = 7;
uint64 alignDop = 8;
repeated SortKeyDesc orders = 9;
repeated uint32 range_keys = 10;
uint32 range_distribution_id = 11;
IncrementalTableProperty incremental_table_property = 12;
ScalarExpression filter4Meta = 13;
SubFieldsPruning subfields = 14;
}
message FilePruningStats {
int32 fileCount = 1;
int32 prunedFileCount = 2;
}
message TableScanStats {
DataInputStats input_stats = 1;
FilePruningStats pruning_stats = 2;
}
message IncrementalTableProperty {
int64 from = 1;
int64 to = 2;
bool consolidate = 3;
int64 fromMetaVersion = 4;
int64 toMetaVersion = 5;
int64 rowCount = 6;
int64 baseRowCount = 7;
}
// The shuffle type. Wrapping a message to avoid enum name conflict
message ShuffleType {
enum Type {
HASH = 0;
RANGE = 1;
BROADCAST = 2;
SINGLE = 3;
RANDOM = 4;
PAIR_WIZE = 5;
ADAPTIVE_HASH = 6;
ADAPTIVE_RANGE = 7;
ROUND_ROBIN = 8;
}
Type type = 1;
}
message TableSink {
Table table = 1;
bool overwrite = 2;
int32 data_source_info_id = 3;
repeated Reference keys = 4;
uint32 flags = 5;
repeated Reference part_sort_keys = 6;
repeated uint64 input_fields = 7;
repeated Reference file_slice_keys = 8;
bool static_partition = 9;
VirtualValueInfo part_values = 10;
bool nop = 11;
}
message ShuffleWrite {
ShuffleType shuffleType = 1;
repeated Reference keys = 2;
repeated OrderByDesc orders = 3;
uint32 limit = 4; // 0 means no limit
uint32 function_version = 5;
HashBucketType bucket_type = 6;
uint32 range_distribution_id = 7;
}
message ShuffleRead {
repeated OrderByDesc orders = 1;
uint32 limit = 2; // 0 means no limit
uint32 offset = 3;
ShuffleType shuffleType = 4;
bool multi_accessed = 5;
bool merge_sort = 7;
}
message LocalExchange {
ShuffleType shuffleType = 1;
repeated Reference keys = 2;
repeated OrderByDesc orders = 3;
uint32 limit = 4; // 0 means no limit
uint32 offset = 5;
uint32 function_version = 6;
}
message LocalSort {
repeated OrderByDesc orders = 1;
uint64 limit = 2;
uint64 offset = 3;
uint32 sorted_prefix_cnt = 4;
}
message LogicalSort {
repeated Reference keys = 1;
repeated OrderByDesc orders = 2;
ScalarExpression limit = 3;
ScalarExpression offset = 4;
bool global = 5;
}
enum SetOpType {
UNION = 0;
INTERSECT = 1;
EXCEPT = 2;
}
message SetOperator {
SetOpType type = 1;
bool all = 2;
repeated uint64 input_references = 3;
}
message DeltaApplyStats {
// The number of rows that is deleted in the delta apply.
// If the row is deleted, but not used in the delta apply, it is not counted.
uint64 deleted_row_count = 1;
// The number of rows that is updated in the delta apply.
// If the row is updated, but not used in the delta apply, it is not counted.
uint64 updated_row_count = 2;
uint64 copied_dest_row_count = 3;
uint64 copied_time_elapsed_us = 4;
}
message DataInputStats {
uint64 raw_input_byte_count = 1;
uint64 row_count = 2;
oneof input_stats {
FileRangesInputStats file_input_stats = 3;
FileInputStats file_format_stats = 4;
}
uint64 time_elapsed_us = 5; // including decoding/decompression/io...
}
message FileRangesInputStats {
repeated DataInputStats file_input_stats = 1;
// Trying to keep all filters in the same order as the basic
// filter and applyNewFilter
repeated string ppd_filter = 2;
uint64 pruned_file_count = 3;
uint64 prefetched_file_count = 4;
}
message FileInputStats {
FileFormatType format_type = 1;
FileRange range = 2;
oneof file_format_input_stats {
TextInputStats text_input_stats = 3;
ParquetInputStats parquet_input_stats = 4;
MemoryTableInputStats memory_input_stats = 5;
OrcInputStats orc_input_stats = 8;
DummyInputStats dummy_input_stats = 9;
CSVInputStats csv_input_stats = 10;
AvroInputStats avro_input_stats = 11;
ArrowInputStats arrow_input_stats = 12;
}
FileIOInputStats io_stats = 6;
repeated DataInputStats delta_file_stats = 7;
optional DeltaApplyStats delta_apply_stats = 13;
}
message FileIOInputStats {
uint64 read_count = 1; // io request count
uint64 read_bytes = 2; // io bytes
uint64 time_elapsed_us = 3; // io stream time, including benefit of prefetch
PrefetchStats prefetch_stats = 4;
FileInputStreamStats input_stats = 5;
}
message FileIOOutputStats {
uint64 time_elapsed_us = 1; // io stream time, means the time blocking on io(SYNC).
}
message PrefetchStats {
string driver_type = 8;
uint64 read_count = 1; // io request from outside
uint64 read_bytes = 2; // io bytes from prefetch cache
uint64 read_hit_cache = 3; // prefetch cache hit count
uint64 read_time_elapsed_us = 4; // total io request cost(SYNC)
uint64 io_count = 5; // merged io count in prefetch thread/fiber
uint64 io_bytes = 6; // merged io bytes in prefetch thread/fiber
uint64 io_time_elapsed_us = 7; // merged io cost(ASYNC)
}
message FileInputStreamStats {
FileSystemType file_system_type = 1;
oneof file_input_stream_stats {
CacheFileInputStreamStats cache_file_input_stream_stats = 4;
COSFileInputStreamStats cos_file_input_stream_stats = 5;
}
}
message CacheFileInputStreamStats {
bool cache_hit = 1;
string short_circuit_stream_type = 2;
bool segment_mode = 3;
uint64 non_read_time_elapsed_us = 4;
uint64 rpc_read_bytes = 5;
uint64 rpc_read_time_elapsed_us = 6;
uint64 direct_read_bytes = 7;
uint64 direct_read_time_elapsed_us = 8;
// If cache hit, base_stream_stats is not set.
optional FileInputStreamStats base_stream_stats = 9;
}
message COSFileInputStreamStats {
optional uint32 retry_time = 1;
}
message TextInputStats {
uint64 missing_field_warned_count = 1;
uint64 extra_field_warned_count = 2;
}
message MemoryTableInputStats {
uint64 batch_count = 1;
}
message ParquetInputStats {
uint64 batch_count = 1;
uint64 decompression_latency_ns = 2;
uint64 levels_decoding_latency_ns = 3;
uint64 data_loading_latency_ns = 4;
uint64 ppd_inclusive_latency_ns = 5;
uint64 reader_inclusive_latency_ns = 6;
uint64 requested_row_count = 7;
uint64 read_row_count = 8;
uint64 open_inclusive_latency_ns = 9;
uint64 open_blocking_latency_ns = 10;
bool file_bloom_filter_pruned = 20;
bool file_bitmap_filter_pruned = 21;
// request row groups before pruning
// `request_blocks - sum of pruned row group number`
uint32 request_blocks = 22;
optional uint32 row_group_bloom_filter_pruned = 23;
optional uint32 row_group_stats_filter_pruned = 24;
optional uint32 row_group_dict_filter_pruned = 25;
// If a ParquetInput runs applyNewFilter, it would close the current
// ParquetTableReader, and reopen a new ParquetTableReader. The closed
// stats should not be lost.
repeated DataInputStats apply_new_filter_stats = 26;
}
message OrcInputStats {
uint64 batch_count = 1;
}
message CSVInputStats {};
message DummyInputStats {
uint64 batch_count = 1;
}
message AvroInputStats {};
message ArrowInputStats {};
message DataOutputStats {
uint64 raw_output_byte_count = 1;
uint64 row_count = 2;
oneof outputStats {
FileOutputStats file_output_stats = 3;
MultipleFileOutputStats multiple_file_output_stats = 4;
}
}
message FileOutputStats {
FileMetaData file_meta_data = 1;
oneof file_format_output_stats {
TextOutputStats text_output_stats = 2;
ParquetOutputStats parquet_output_stats = 3;
OrcOutputStats orc_output_stats = 6;
AvroOutputStats avro_output_stats = 7;
ArrowOutputStats arrow_output_stats = 8;
}
FileIOOutputStats io_stats = 4;
repeated FileMetaData delete_file_metas = 5;
}
message MultipleFileOutputStats {
repeated DataOutputStats file_output_stats = 1;
}
message TextOutputStats {
}
message ParquetOutputStats {
uint64 arrow_casting_time = 1;
uint64 page_compress_time = 2;
uint64 encoding_time = 3;
}
message OrcOutputStats {}
message AvroOutputStats {}
message ArrowOutputStats {}
message TableSinkStats {
DataOutputStats output_stats = 1;
}
message SpillStats {
uint64 compressed_size = 1;
uint64 raw_size = 2;
uint64 spill_count = 3; // deprecated
uint64 row_count = 4;
uint64 run_count = 5;
uint64 file_count = 6;
Timing write_timing = 7;
Timing read_timing = 8;
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy