org.dinky.shaded.paimon.schema.TableSchema Maven / Gradle / Ivy
The newest version!
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.dinky.shaded.paimon.schema;
import org.dinky.shaded.paimon.types.DataField;
import org.dinky.shaded.paimon.types.RowType;
import org.dinky.shaded.paimon.utils.JsonSerdeUtil;
import org.dinky.shaded.paimon.utils.Preconditions;
import org.dinky.shaded.paimon.utils.StringUtils;
import java.io.Serializable;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.stream.Collectors;
import static org.dinky.shaded.paimon.CoreOptions.BUCKET_KEY;
/**
* Schema of a table. Unlike schema, it has more information than {@link Schema}, including schemaId
* and fieldId.
*/
public class TableSchema implements Serializable {
private static final long serialVersionUID = 1L;
private final long id;
private final List fields;
/** Not available from fields, as some fields may have been deleted. */
private final int highestFieldId;
private final List partitionKeys;
private final List primaryKeys;
private final Map options;
private final String comment;
private final long timeMillis;
public TableSchema(
long id,
List fields,
int highestFieldId,
List partitionKeys,
List primaryKeys,
Map options,
String comment) {
this(
id,
fields,
highestFieldId,
partitionKeys,
primaryKeys,
options,
comment,
System.currentTimeMillis());
}
public TableSchema(
long id,
List fields,
int highestFieldId,
List partitionKeys,
List primaryKeys,
Map options,
String comment,
long timeMillis) {
this.id = id;
this.fields = fields;
this.highestFieldId = highestFieldId;
this.partitionKeys = partitionKeys;
this.primaryKeys = primaryKeys;
this.options = Collections.unmodifiableMap(options);
this.comment = comment;
this.timeMillis = timeMillis;
// try to trim to validate primary keys
trimmedPrimaryKeys();
// try to validate bucket keys
originalBucketKeys();
}
public long id() {
return id;
}
public List fields() {
return fields;
}
public List fieldNames() {
return fields.stream().map(DataField::name).collect(Collectors.toList());
}
public int highestFieldId() {
return highestFieldId;
}
public List partitionKeys() {
return partitionKeys;
}
public List primaryKeys() {
return primaryKeys;
}
public List trimmedPrimaryKeys() {
if (primaryKeys.size() > 0) {
List adjusted =
primaryKeys.stream()
.filter(pk -> !partitionKeys.contains(pk))
.collect(Collectors.toList());
Preconditions.checkState(
adjusted.size() > 0,
String.format(
"Primary key constraint %s should not be same with partition fields %s,"
+ " this will result in only one record in a partition",
primaryKeys, partitionKeys));
return adjusted;
}
return primaryKeys;
}
public Map options() {
return options;
}
public List bucketKeys() {
List bucketKeys = originalBucketKeys();
if (bucketKeys.isEmpty()) {
bucketKeys = trimmedPrimaryKeys();
}
if (bucketKeys.isEmpty()) {
bucketKeys = fieldNames();
}
return bucketKeys;
}
public boolean crossPartitionUpdate() {
if (primaryKeys.isEmpty() || partitionKeys.isEmpty()) {
return false;
}
return !primaryKeys.containsAll(partitionKeys);
}
/** Original bucket keys, maybe empty. */
private List originalBucketKeys() {
String key = options.get(BUCKET_KEY.key());
if (StringUtils.isNullOrWhitespaceOnly(key)) {
return Collections.emptyList();
}
List bucketKeys = Arrays.asList(key.split(","));
if (!containsAll(fieldNames(), bucketKeys)) {
throw new RuntimeException(
String.format(
"Field names %s should contains all bucket keys %s.",
fieldNames(), bucketKeys));
}
if (bucketKeys.stream().anyMatch(partitionKeys::contains)) {
throw new RuntimeException(
String.format(
"Bucket keys %s should not in partition keys %s.",
bucketKeys, partitionKeys));
}
if (primaryKeys.size() > 0) {
if (!containsAll(primaryKeys, bucketKeys)) {
throw new RuntimeException(
String.format(
"Primary keys %s should contains all bucket keys %s.",
primaryKeys, bucketKeys));
}
}
return bucketKeys;
}
private boolean containsAll(List all, List contains) {
return new HashSet<>(all).containsAll(new HashSet<>(contains));
}
public String comment() {
return comment;
}
public long timeMillis() {
return timeMillis;
}
public RowType logicalRowType() {
return new RowType(fields);
}
public RowType logicalPartitionType() {
return projectedLogicalRowType(partitionKeys);
}
public RowType logicalBucketKeyType() {
return projectedLogicalRowType(bucketKeys());
}
public RowType logicalTrimmedPrimaryKeysType() {
return projectedLogicalRowType(trimmedPrimaryKeys());
}
public RowType logicalPrimaryKeysType() {
return projectedLogicalRowType(primaryKeys());
}
public List primaryKeysFields() {
return projectedDataFields(primaryKeys());
}
public List trimmedPrimaryKeysFields() {
return projectedDataFields(trimmedPrimaryKeys());
}
public int[] projection(List projectedFieldNames) {
List fieldNames = fieldNames();
return projectedFieldNames.stream().mapToInt(fieldNames::indexOf).toArray();
}
private List projectedDataFields(List projectedFieldNames) {
List fieldNames = fieldNames();
return projectedFieldNames.stream()
.map(k -> fields.get(fieldNames.indexOf(k)))
.collect(Collectors.toList());
}
public RowType projectedLogicalRowType(List projectedFieldNames) {
return new RowType(projectedDataFields(projectedFieldNames));
}
public TableSchema copy(Map newOptions) {
return new TableSchema(
id,
fields,
highestFieldId,
partitionKeys,
primaryKeys,
newOptions,
comment,
timeMillis);
}
@Override
public String toString() {
return JsonSerdeUtil.toJson(this);
}
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
TableSchema tableSchema = (TableSchema) o;
return Objects.equals(fields, tableSchema.fields)
&& Objects.equals(partitionKeys, tableSchema.partitionKeys)
&& Objects.equals(primaryKeys, tableSchema.primaryKeys)
&& Objects.equals(options, tableSchema.options)
&& Objects.equals(comment, tableSchema.comment);
}
@Override
public int hashCode() {
return Objects.hash(fields, partitionKeys, primaryKeys, options, comment);
}
public static List newFields(RowType rowType) {
return rowType.getFields();
}
}