All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.cdc.common.utils.SchemaUtils Maven / Gradle / Ivy

There is a newer version: 3.3.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.cdc.common.utils;

import org.apache.flink.cdc.common.annotation.PublicEvolving;
import org.apache.flink.cdc.common.annotation.VisibleForTesting;
import org.apache.flink.cdc.common.data.RecordData;
import org.apache.flink.cdc.common.event.AddColumnEvent;
import org.apache.flink.cdc.common.event.AlterColumnTypeEvent;
import org.apache.flink.cdc.common.event.DropColumnEvent;
import org.apache.flink.cdc.common.event.RenameColumnEvent;
import org.apache.flink.cdc.common.event.SchemaChangeEvent;
import org.apache.flink.cdc.common.schema.Column;
import org.apache.flink.cdc.common.schema.Schema;
import org.apache.flink.cdc.common.types.DataType;
import org.apache.flink.cdc.common.types.DataTypeFamily;
import org.apache.flink.cdc.common.types.DataTypeRoot;
import org.apache.flink.cdc.common.types.DataTypes;
import org.apache.flink.cdc.common.types.DecimalType;
import org.apache.flink.cdc.common.types.LocalZonedTimestampType;
import org.apache.flink.cdc.common.types.TimestampType;
import org.apache.flink.cdc.common.types.ZonedTimestampType;

import javax.annotation.Nullable;

import java.util.ArrayList;
import java.util.Collections;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.stream.Collectors;
import java.util.stream.IntStream;

/** Utils for {@link Schema} to perform the ability of evolution. */
@PublicEvolving
public class SchemaUtils {

    /**
     * create a list of {@link RecordData.FieldGetter} from given {@link Schema} to get Object from
     * RecordData.
     */
    public static List createFieldGetters(Schema schema) {
        return createFieldGetters(schema.getColumns());
    }

    /**
     * create a list of {@link RecordData.FieldGetter} from given {@link Column} to get Object from
     * RecordData.
     */
    public static List createFieldGetters(List columns) {
        List fieldGetters = new ArrayList<>(columns.size());
        for (int i = 0; i < columns.size(); i++) {
            fieldGetters.add(RecordData.createFieldGetter(columns.get(i).getType(), i));
        }
        return fieldGetters;
    }

    /** Restore original data fields from RecordData structure. */
    public static List restoreOriginalData(
            @Nullable RecordData recordData, List fieldGetters) {
        if (recordData == null) {
            return Collections.emptyList();
        }
        List actualFields = new ArrayList<>();
        for (RecordData.FieldGetter fieldGetter : fieldGetters) {
            actualFields.add(fieldGetter.getFieldOrNull(recordData));
        }
        return actualFields;
    }

    /** Merge compatible upstream schemas. */
    public static Schema inferWiderSchema(List schemas) {
        if (schemas.isEmpty()) {
            return null;
        } else if (schemas.size() == 1) {
            return schemas.get(0);
        } else {
            Schema outputSchema = null;
            for (Schema schema : schemas) {
                outputSchema = inferWiderSchema(outputSchema, schema);
            }
            return outputSchema;
        }
    }

    /** Try to combine two schemas with potential incompatible type. */
    @VisibleForTesting
    public static Schema inferWiderSchema(@Nullable Schema lSchema, Schema rSchema) {
        if (lSchema == null) {
            return rSchema;
        }
        if (lSchema.getColumnCount() != rSchema.getColumnCount()) {
            throw new IllegalStateException(
                    String.format(
                            "Unable to merge schema %s and %s with different column counts.",
                            lSchema, rSchema));
        }
        if (!lSchema.primaryKeys().equals(rSchema.primaryKeys())) {
            throw new IllegalStateException(
                    String.format(
                            "Unable to merge schema %s and %s with different primary keys.",
                            lSchema, rSchema));
        }
        if (!lSchema.partitionKeys().equals(rSchema.partitionKeys())) {
            throw new IllegalStateException(
                    String.format(
                            "Unable to merge schema %s and %s with different partition keys.",
                            lSchema, rSchema));
        }
        if (!lSchema.options().equals(rSchema.options())) {
            throw new IllegalStateException(
                    String.format(
                            "Unable to merge schema %s and %s with different options.",
                            lSchema, rSchema));
        }
        if (!Objects.equals(lSchema.comment(), rSchema.comment())) {
            throw new IllegalStateException(
                    String.format(
                            "Unable to merge schema %s and %s with different comments.",
                            lSchema, rSchema));
        }

        List leftColumns = lSchema.getColumns();
        List rightColumns = rSchema.getColumns();

        List mergedColumns =
                IntStream.range(0, lSchema.getColumnCount())
                        .mapToObj(i -> inferWiderColumn(leftColumns.get(i), rightColumns.get(i)))
                        .collect(Collectors.toList());

        return lSchema.copy(mergedColumns);
    }

    /** Try to combine two columns with potential incompatible type. */
    @VisibleForTesting
    public static Column inferWiderColumn(Column lColumn, Column rColumn) {
        if (!Objects.equals(lColumn.getName(), rColumn.getName())) {
            throw new IllegalStateException(
                    String.format(
                            "Unable to merge column %s and %s with different name.",
                            lColumn, rColumn));
        }
        if (!Objects.equals(lColumn.getComment(), rColumn.getComment())) {
            throw new IllegalStateException(
                    String.format(
                            "Unable to merge column %s and %s with different comments.",
                            lColumn, rColumn));
        }
        return lColumn.copy(inferWiderType(lColumn.getType(), rColumn.getType()));
    }

    /** Try to combine given data types to a compatible wider data type. */
    @VisibleForTesting
    public static DataType inferWiderType(DataType lType, DataType rType) {
        // Ignore nullability during data type merge
        boolean nullable = lType.isNullable() || rType.isNullable();
        lType = lType.notNull();
        rType = rType.notNull();

        DataType mergedType;
        if (lType.equals(rType)) {
            // identical type
            mergedType = rType;
        } else if (lType instanceof TimestampType && rType instanceof TimestampType) {
            return DataTypes.TIMESTAMP(
                    Math.max(
                            ((TimestampType) lType).getPrecision(),
                            ((TimestampType) rType).getPrecision()));
        } else if (lType instanceof ZonedTimestampType && rType instanceof ZonedTimestampType) {
            return DataTypes.TIMESTAMP_TZ(
                    Math.max(
                            ((ZonedTimestampType) lType).getPrecision(),
                            ((ZonedTimestampType) rType).getPrecision()));
        } else if (lType instanceof LocalZonedTimestampType
                && rType instanceof LocalZonedTimestampType) {
            return DataTypes.TIMESTAMP_LTZ(
                    Math.max(
                            ((LocalZonedTimestampType) lType).getPrecision(),
                            ((LocalZonedTimestampType) rType).getPrecision()));
        } else if (lType.is(DataTypeFamily.TIMESTAMP) && rType.is(DataTypeFamily.TIMESTAMP)) {
            return DataTypes.TIMESTAMP(TimestampType.MAX_PRECISION);
        } else if (lType.is(DataTypeFamily.INTEGER_NUMERIC)
                && rType.is(DataTypeFamily.INTEGER_NUMERIC)) {
            mergedType = DataTypes.BIGINT();
        } else if (lType.is(DataTypeFamily.CHARACTER_STRING)
                && rType.is(DataTypeFamily.CHARACTER_STRING)) {
            mergedType = DataTypes.STRING();
        } else if (lType.is(DataTypeFamily.APPROXIMATE_NUMERIC)
                && rType.is(DataTypeFamily.APPROXIMATE_NUMERIC)) {
            mergedType = DataTypes.DOUBLE();
        } else if (lType instanceof DecimalType && rType instanceof DecimalType) {
            // Merge two decimal types
            DecimalType lhsDecimal = (DecimalType) lType;
            DecimalType rhsDecimal = (DecimalType) rType;
            int resultIntDigits =
                    Math.max(
                            lhsDecimal.getPrecision() - lhsDecimal.getScale(),
                            rhsDecimal.getPrecision() - rhsDecimal.getScale());
            int resultScale = Math.max(lhsDecimal.getScale(), rhsDecimal.getScale());
            Preconditions.checkArgument(
                    resultIntDigits + resultScale <= DecimalType.MAX_PRECISION,
                    String.format(
                            "Failed to merge %s and %s type into DECIMAL. %d precision digits required, %d available",
                            lType,
                            rType,
                            resultIntDigits + resultScale,
                            DecimalType.MAX_PRECISION));
            mergedType = DataTypes.DECIMAL(resultIntDigits + resultScale, resultScale);
        } else if (lType instanceof DecimalType && rType.is(DataTypeFamily.EXACT_NUMERIC)) {
            // Merge decimal and int
            mergedType = mergeExactNumericsIntoDecimal((DecimalType) lType, rType);
        } else if (rType instanceof DecimalType && lType.is(DataTypeFamily.EXACT_NUMERIC)) {
            // Merge decimal and int
            mergedType = mergeExactNumericsIntoDecimal((DecimalType) rType, lType);
        } else {
            throw new IllegalStateException(
                    String.format("Incompatible types: \"%s\" and \"%s\"", lType, rType));
        }

        if (nullable) {
            return mergedType.nullable();
        } else {
            return mergedType.notNull();
        }
    }

    private static DataType mergeExactNumericsIntoDecimal(
            DecimalType decimalType, DataType otherType) {
        int resultPrecision =
                Math.max(
                        decimalType.getPrecision(),
                        decimalType.getScale() + getNumericPrecision(otherType));
        Preconditions.checkArgument(
                resultPrecision <= DecimalType.MAX_PRECISION,
                String.format(
                        "Failed to merge %s and %s type into DECIMAL. %d precision digits required, %d available",
                        decimalType, otherType, resultPrecision, DecimalType.MAX_PRECISION));
        return DataTypes.DECIMAL(resultPrecision, decimalType.getScale());
    }

    @VisibleForTesting
    public static int getNumericPrecision(DataType dataType) {
        if (dataType.is(DataTypeFamily.EXACT_NUMERIC)) {
            if (dataType.is(DataTypeRoot.TINYINT)) {
                return 3;
            } else if (dataType.is(DataTypeRoot.SMALLINT)) {
                return 5;
            } else if (dataType.is(DataTypeRoot.INTEGER)) {
                return 10;
            } else if (dataType.is(DataTypeRoot.BIGINT)) {
                return 19;
            } else if (dataType.is(DataTypeRoot.DECIMAL)) {
                return ((DecimalType) dataType).getPrecision();
            }
        }

        throw new IllegalArgumentException(
                "Failed to get precision of non-exact decimal type " + dataType);
    }

    /** apply SchemaChangeEvent to the old schema and return the schema after changing. */
    public static Schema applySchemaChangeEvent(Schema schema, SchemaChangeEvent event) {
        if (event instanceof AddColumnEvent) {
            return applyAddColumnEvent((AddColumnEvent) event, schema);
        } else if (event instanceof DropColumnEvent) {
            return applyDropColumnEvent((DropColumnEvent) event, schema);
        } else if (event instanceof RenameColumnEvent) {
            return applyRenameColumnEvent((RenameColumnEvent) event, schema);
        } else if (event instanceof AlterColumnTypeEvent) {
            return applyAlterColumnTypeEvent((AlterColumnTypeEvent) event, schema);
        } else {
            throw new UnsupportedOperationException(
                    String.format(
                            "Unsupported schema change event type \"%s\"",
                            event.getClass().getCanonicalName()));
        }
    }

    private static Schema applyAddColumnEvent(AddColumnEvent event, Schema oldSchema) {
        LinkedList columns = new LinkedList<>(oldSchema.getColumns());
        for (AddColumnEvent.ColumnWithPosition columnWithPosition : event.getAddedColumns()) {
            switch (columnWithPosition.getPosition()) {
                case FIRST:
                    {
                        columns.addFirst(columnWithPosition.getAddColumn());
                        break;
                    }
                case LAST:
                    {
                        columns.addLast(columnWithPosition.getAddColumn());
                        break;
                    }
                case BEFORE:
                    {
                        Preconditions.checkNotNull(
                                columnWithPosition.getExistedColumnName(),
                                "existedColumnName could not be null in BEFORE type AddColumnEvent");
                        List columnNames =
                                columns.stream().map(Column::getName).collect(Collectors.toList());
                        int index = columnNames.indexOf(columnWithPosition.getExistedColumnName());
                        if (index < 0) {
                            throw new IllegalArgumentException(
                                    columnWithPosition.getExistedColumnName()
                                            + " of AddColumnEvent is not existed");
                        }
                        columns.add(index, columnWithPosition.getAddColumn());
                        break;
                    }
                case AFTER:
                    {
                        Preconditions.checkNotNull(
                                columnWithPosition.getExistedColumnName(),
                                "existedColumnName could not be null in AFTER type AddColumnEvent");
                        List columnNames =
                                columns.stream().map(Column::getName).collect(Collectors.toList());
                        int index = columnNames.indexOf(columnWithPosition.getExistedColumnName());
                        if (index < 0) {
                            throw new IllegalArgumentException(
                                    columnWithPosition.getExistedColumnName()
                                            + " of AddColumnEvent is not existed");
                        }
                        columns.add(index + 1, columnWithPosition.getAddColumn());
                        break;
                    }
            }
        }
        return oldSchema.copy(columns);
    }

    private static Schema applyDropColumnEvent(DropColumnEvent event, Schema oldSchema) {
        List columns =
                oldSchema.getColumns().stream()
                        .filter(
                                (column ->
                                        !event.getDroppedColumnNames().contains(column.getName())))
                        .collect(Collectors.toList());
        return oldSchema.copy(columns);
    }

    private static Schema applyRenameColumnEvent(RenameColumnEvent event, Schema oldSchema) {
        List columns = new ArrayList<>();
        oldSchema
                .getColumns()
                .forEach(
                        column -> {
                            if (event.getNameMapping().containsKey(column.getName())) {
                                columns.add(
                                        column.copy(event.getNameMapping().get(column.getName())));
                            } else {
                                columns.add(column);
                            }
                        });
        return oldSchema.copy(columns);
    }

    private static Schema applyAlterColumnTypeEvent(AlterColumnTypeEvent event, Schema oldSchema) {
        List columns = new ArrayList<>();
        oldSchema
                .getColumns()
                .forEach(
                        column -> {
                            if (event.getTypeMapping().containsKey(column.getName())) {
                                columns.add(
                                        column.copy(event.getTypeMapping().get(column.getName())));
                            } else {
                                columns.add(column);
                            }
                        });
        return oldSchema.copy(columns);
    }

    /**
     * This function determines if the given schema change event {@code event} should be sent to
     * downstream based on if the given transform rule has asterisk, and what columns are
     * referenced.
     *
     * 

For example, if {@code hasAsterisk} is false, then all {@code AddColumnEvent} and {@code * DropColumnEvent} should be ignored since asterisk-less transform should not emit schema * change events that change number of downstream columns. * *

Also, {@code referencedColumns} will be used to determine if the schema change event * affects any referenced columns, since if a column has been projected out of downstream, its * corresponding schema change events should not be emitted, either. * *

For the case when {@code hasAsterisk} is true, things will be cleaner since we don't have * to filter out any schema change events. All we need to do is to change {@code * AddColumnEvent}'s inserting position, and replacing `FIRST` / `LAST` with column-relative * position indicators. This is necessary since extra calculated columns might be added, and * `FIRST` / `LAST` position might differ. */ public static Optional transformSchemaChangeEvent( boolean hasAsterisk, List referencedColumns, SchemaChangeEvent event) { Optional evolvedSchemaChangeEvent = Optional.empty(); if (event instanceof AddColumnEvent) { // Send add column events to downstream iff there's an asterisk if (hasAsterisk) { List addedColumns = ((AddColumnEvent) event) .getAddedColumns().stream() .map( e -> { if (AddColumnEvent.ColumnPosition.LAST.equals( e.getPosition())) { return new AddColumnEvent .ColumnWithPosition( e.getAddColumn(), AddColumnEvent.ColumnPosition.AFTER, referencedColumns.get( referencedColumns.size() - 1)); } else if (AddColumnEvent.ColumnPosition.FIRST .equals(e.getPosition())) { return new AddColumnEvent .ColumnWithPosition( e.getAddColumn(), AddColumnEvent.ColumnPosition .BEFORE, referencedColumns.get(0)); } else { return e; } }) .collect(Collectors.toList()); evolvedSchemaChangeEvent = Optional.of(new AddColumnEvent(event.tableId(), addedColumns)); } } else if (event instanceof AlterColumnTypeEvent) { AlterColumnTypeEvent alterColumnTypeEvent = (AlterColumnTypeEvent) event; if (hasAsterisk) { // In wildcard mode, all alter column type events should be sent to downstream evolvedSchemaChangeEvent = Optional.of(event); } else { // Or, we need to filter out those referenced columns and reconstruct // SchemaChangeEvents Map newDataTypeMap = alterColumnTypeEvent.getTypeMapping().entrySet().stream() .filter(e -> referencedColumns.contains(e.getKey())) .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); if (!newDataTypeMap.isEmpty()) { evolvedSchemaChangeEvent = Optional.of( new AlterColumnTypeEvent( alterColumnTypeEvent.tableId(), newDataTypeMap)); } } } else if (event instanceof RenameColumnEvent) { if (hasAsterisk) { evolvedSchemaChangeEvent = Optional.of(event); } } else if (event instanceof DropColumnEvent) { if (hasAsterisk) { evolvedSchemaChangeEvent = Optional.of(event); } } else { evolvedSchemaChangeEvent = Optional.of(event); } return evolvedSchemaChangeEvent; } }