All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.dinky.shaded.paimon.schema.TableSchema Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.dinky.shaded.paimon.schema;

import org.dinky.shaded.paimon.types.DataField;
import org.dinky.shaded.paimon.types.RowType;
import org.dinky.shaded.paimon.utils.JsonSerdeUtil;
import org.dinky.shaded.paimon.utils.Preconditions;
import org.dinky.shaded.paimon.utils.StringUtils;

import java.io.Serializable;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.stream.Collectors;

import static org.dinky.shaded.paimon.CoreOptions.BUCKET_KEY;

/**
 * Schema of a table. Unlike schema, it has more information than {@link Schema}, including schemaId
 * and fieldId.
 */
public class TableSchema implements Serializable {

    private static final long serialVersionUID = 1L;

    private final long id;

    private final List fields;

    /** Not available from fields, as some fields may have been deleted. */
    private final int highestFieldId;

    private final List partitionKeys;

    private final List primaryKeys;

    private final Map options;

    private final String comment;

    private final long timeMillis;

    public TableSchema(
            long id,
            List fields,
            int highestFieldId,
            List partitionKeys,
            List primaryKeys,
            Map options,
            String comment) {
        this(
                id,
                fields,
                highestFieldId,
                partitionKeys,
                primaryKeys,
                options,
                comment,
                System.currentTimeMillis());
    }

    public TableSchema(
            long id,
            List fields,
            int highestFieldId,
            List partitionKeys,
            List primaryKeys,
            Map options,
            String comment,
            long timeMillis) {
        this.id = id;
        this.fields = fields;
        this.highestFieldId = highestFieldId;
        this.partitionKeys = partitionKeys;
        this.primaryKeys = primaryKeys;
        this.options = Collections.unmodifiableMap(options);
        this.comment = comment;
        this.timeMillis = timeMillis;

        // try to trim to validate primary keys
        trimmedPrimaryKeys();

        // try to validate bucket keys
        originalBucketKeys();
    }

    public long id() {
        return id;
    }

    public List fields() {
        return fields;
    }

    public List fieldNames() {
        return fields.stream().map(DataField::name).collect(Collectors.toList());
    }

    public int highestFieldId() {
        return highestFieldId;
    }

    public List partitionKeys() {
        return partitionKeys;
    }

    public List primaryKeys() {
        return primaryKeys;
    }

    public List trimmedPrimaryKeys() {
        if (primaryKeys.size() > 0) {
            List adjusted =
                    primaryKeys.stream()
                            .filter(pk -> !partitionKeys.contains(pk))
                            .collect(Collectors.toList());

            Preconditions.checkState(
                    adjusted.size() > 0,
                    String.format(
                            "Primary key constraint %s should not be same with partition fields %s,"
                                    + " this will result in only one record in a partition",
                            primaryKeys, partitionKeys));

            return adjusted;
        }

        return primaryKeys;
    }

    public Map options() {
        return options;
    }

    public List bucketKeys() {
        List bucketKeys = originalBucketKeys();
        if (bucketKeys.isEmpty()) {
            bucketKeys = trimmedPrimaryKeys();
        }
        if (bucketKeys.isEmpty()) {
            bucketKeys = fieldNames();
        }
        return bucketKeys;
    }

    public boolean crossPartitionUpdate() {
        if (primaryKeys.isEmpty() || partitionKeys.isEmpty()) {
            return false;
        }

        return !primaryKeys.containsAll(partitionKeys);
    }

    /** Original bucket keys, maybe empty. */
    private List originalBucketKeys() {
        String key = options.get(BUCKET_KEY.key());
        if (StringUtils.isNullOrWhitespaceOnly(key)) {
            return Collections.emptyList();
        }
        List bucketKeys = Arrays.asList(key.split(","));
        if (!containsAll(fieldNames(), bucketKeys)) {
            throw new RuntimeException(
                    String.format(
                            "Field names %s should contains all bucket keys %s.",
                            fieldNames(), bucketKeys));
        }
        if (bucketKeys.stream().anyMatch(partitionKeys::contains)) {
            throw new RuntimeException(
                    String.format(
                            "Bucket keys %s should not in partition keys %s.",
                            bucketKeys, partitionKeys));
        }
        if (primaryKeys.size() > 0) {
            if (!containsAll(primaryKeys, bucketKeys)) {
                throw new RuntimeException(
                        String.format(
                                "Primary keys %s should contains all bucket keys %s.",
                                primaryKeys, bucketKeys));
            }
        }
        return bucketKeys;
    }

    private boolean containsAll(List all, List contains) {
        return new HashSet<>(all).containsAll(new HashSet<>(contains));
    }

    public String comment() {
        return comment;
    }

    public long timeMillis() {
        return timeMillis;
    }

    public RowType logicalRowType() {
        return new RowType(fields);
    }

    public RowType logicalPartitionType() {
        return projectedLogicalRowType(partitionKeys);
    }

    public RowType logicalBucketKeyType() {
        return projectedLogicalRowType(bucketKeys());
    }

    public RowType logicalTrimmedPrimaryKeysType() {
        return projectedLogicalRowType(trimmedPrimaryKeys());
    }

    public RowType logicalPrimaryKeysType() {
        return projectedLogicalRowType(primaryKeys());
    }

    public List primaryKeysFields() {
        return projectedDataFields(primaryKeys());
    }

    public List trimmedPrimaryKeysFields() {
        return projectedDataFields(trimmedPrimaryKeys());
    }

    public int[] projection(List projectedFieldNames) {
        List fieldNames = fieldNames();
        return projectedFieldNames.stream().mapToInt(fieldNames::indexOf).toArray();
    }

    private List projectedDataFields(List projectedFieldNames) {
        List fieldNames = fieldNames();
        return projectedFieldNames.stream()
                .map(k -> fields.get(fieldNames.indexOf(k)))
                .collect(Collectors.toList());
    }

    public RowType projectedLogicalRowType(List projectedFieldNames) {
        return new RowType(projectedDataFields(projectedFieldNames));
    }

    public TableSchema copy(Map newOptions) {
        return new TableSchema(
                id,
                fields,
                highestFieldId,
                partitionKeys,
                primaryKeys,
                newOptions,
                comment,
                timeMillis);
    }

    @Override
    public String toString() {
        return JsonSerdeUtil.toJson(this);
    }

    @Override
    public boolean equals(Object o) {
        if (this == o) {
            return true;
        }
        if (o == null || getClass() != o.getClass()) {
            return false;
        }
        TableSchema tableSchema = (TableSchema) o;
        return Objects.equals(fields, tableSchema.fields)
                && Objects.equals(partitionKeys, tableSchema.partitionKeys)
                && Objects.equals(primaryKeys, tableSchema.primaryKeys)
                && Objects.equals(options, tableSchema.options)
                && Objects.equals(comment, tableSchema.comment);
    }

    @Override
    public int hashCode() {
        return Objects.hash(fields, partitionKeys, primaryKeys, options, comment);
    }

    public static List newFields(RowType rowType) {
        return rowType.getFields();
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy