All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hudi.internal.schema.utils.SchemaChangeUtils Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hudi.internal.schema.utils;

import org.apache.hudi.internal.schema.InternalSchema;
import org.apache.hudi.internal.schema.Type;
import org.apache.hudi.internal.schema.Types;
import org.apache.hudi.internal.schema.action.TableChanges;
import org.apache.hudi.internal.schema.action.TableChangesHelper;

import java.util.ArrayList;
import java.util.List;

/**
 * Helper methods for schema Change.
 */
public class SchemaChangeUtils {
  private SchemaChangeUtils() {

  }

  /**
   * Whether to allow the column type to be updated.
   * now only support:
   * int => long/float/double/String/Decimal
   * long => float/double/String/Decimal
   * float => double/String/Decimal
   * double => String/Decimal
   * Decimal => Decimal/String
   * String => date/decimal
   * date => String
   * TODO: support more type update.
   *
   * @param src origin column type.
   * @param dsr new column type.
   * @return whether to allow the column type to be updated.
   */
  public static boolean isTypeUpdateAllow(Type src, Type dsr) {
    if (src.isNestedType() || dsr.isNestedType()) {
      throw new IllegalArgumentException("only support update primitive type");
    }
    if (src.equals(dsr)) {
      return true;
    }
    return isTypeUpdateAllowInternal(src, dsr);
  }

  public static boolean shouldPromoteType(Type src, Type dsr) {
    if (src.equals(dsr) || src.isNestedType() || dsr.isNestedType()) {
      return false;
    }
    return isTypeUpdateAllowInternal(src, dsr);
  }

  private static boolean isTypeUpdateAllowInternal(Type src, Type dsr) {
    switch (src.typeId()) {
      case INT:
        return dsr == Types.LongType.get() || dsr == Types.FloatType.get()
            || dsr == Types.DoubleType.get() || dsr == Types.StringType.get() || dsr.typeId() == Type.TypeID.DECIMAL;
      case LONG:
        return dsr == Types.FloatType.get() || dsr == Types.DoubleType.get() || dsr == Types.StringType.get() || dsr.typeId() == Type.TypeID.DECIMAL;
      case FLOAT:
        return dsr == Types.DoubleType.get() || dsr == Types.StringType.get() || dsr.typeId() == Type.TypeID.DECIMAL;
      case DOUBLE:
        return dsr == Types.StringType.get() || dsr.typeId() == Type.TypeID.DECIMAL;
      case DATE:
      case BINARY:
        return dsr == Types.StringType.get();
      case DECIMAL:
        if (dsr.typeId() == Type.TypeID.DECIMAL) {
          Types.DecimalType decimalSrc = (Types.DecimalType)src;
          Types.DecimalType decimalDsr = (Types.DecimalType)dsr;
          if (decimalDsr.isWiderThan(decimalSrc)) {
            return true;
          }
          if (decimalDsr.precision() >= decimalSrc.precision() && decimalDsr.scale() == decimalSrc.scale()) {
            return true;
          }
        } else if (dsr.typeId() == Type.TypeID.STRING) {
          return true;
        }
        break;
      case STRING:
        return dsr == Types.DateType.get() || dsr.typeId() == Type.TypeID.DECIMAL || dsr == Types.BinaryType.get();
      default:
        return false;
    }
    return false;
  }

  /**
   * Apply all the DDL add operations to internalSchema to produce a new internalSchema.
   *
   * @param internalSchema origin internalSchema.
   * @param adds a wrapper class for all the DDL add operations.
   * @return a new internalSchema.
   */
  public static InternalSchema applyTableChanges2Schema(InternalSchema internalSchema, TableChanges.ColumnAddChange adds) {
    Types.RecordType newType = (Types.RecordType)applyTableChange2Type(internalSchema.getRecord(), adds);
    // deal with root level changes
    List newFields = TableChangesHelper.applyAddChange2Fields(newType.fields(),
        adds.getParentId2AddCols().get(-1), adds.getPositionChangeMap().get(-1));
    return new InternalSchema(Types.RecordType.get(newFields, newType.name()));
  }

  /**
   * Apply all the DDL add operations to Type to produce a new internalSchema.
   * do not call this method directly. expose this method only for UT.
   *
   * @param type origin hudi Type.
   * @param adds a wrapper class for all the DDL add operations.
   * @return a new internalSchema.
   */
  public static Type applyTableChange2Type(Type type, TableChanges.ColumnAddChange adds) {
    switch (type.typeId()) {
      case RECORD:
        Types.RecordType record = (Types.RecordType) type;
        List newTypes = new ArrayList<>();
        for (Types.Field f : record.fields()) {
          Type newType = applyTableChange2Type(f.type(), adds);
          // try to apply add
          newTypes.add(newType.isNestedType() ? adds.applyAdd(f, newType) : newType);
        }
        List newFields = new ArrayList<>();
        boolean hasChanged = false;
        for (int i = 0; i < newTypes.size(); i++) {
          Type newType = newTypes.get(i);
          Types.Field oldfield = record.fields().get(i);
          if (oldfield.type() == newType) {
            newFields.add(oldfield);
          } else {
            hasChanged = true;
            newFields.add(Types.Field.get(oldfield.fieldId(), oldfield.isOptional(), oldfield.name(), newType, oldfield.doc()));
          }
        }
        return hasChanged ? Types.RecordType.get(newFields, record.name()) : record;
      case ARRAY:
        Types.ArrayType array = (Types.ArrayType) type;
        Type newElementType;
        Types.Field elementField = array.field(array.elementId());
        newElementType = applyTableChange2Type(array.elementType(), adds);
        // try to apply add
        newElementType = adds.applyAdd(elementField, newElementType);
        if (newElementType == array.elementType()) {
          return array;
        }
        return Types.ArrayType.get(array.elementId(), array.isElementOptional(), newElementType);
      case MAP:
        Types.MapType map = (Types.MapType) type;
        Type newValueType;
        Types.Field valueField = map.field(map.valueId());
        if (adds.getParentId2AddCols().containsKey(map.keyId())) {
          throw new IllegalArgumentException("Cannot add fields to map keys: " + map);
        }
        newValueType = applyTableChange2Type(map.valueType(), adds);
        // try to apply add
        newValueType = adds.applyAdd(valueField, newValueType);
        if (newValueType == map.valueType()) {
          return map;
        }
        return Types.MapType.get(map.keyId(), map.valueId(), map.keyType(), newValueType, map.isValueOptional());
      default:
        return type;
    }
  }

  /**
   * Apply all the DDL delete operations to internalSchema to produce a new internalSchema.
   *
   * @param internalSchema origin internalSchema.
   * @param deletes a wrapper class for all the DDL delete operations.
   * @return a new internalSchema.
   */
  public static InternalSchema applyTableChanges2Schema(InternalSchema internalSchema, TableChanges.ColumnDeleteChange deletes) {
    return new InternalSchema((Types.RecordType)applyTableChange2Type(internalSchema.getRecord(), deletes));
  }

  /**
   * Apply all the DDL delete operations to Type to produce a new internalSchema.
   * do not call this method directly. expose this method only for UT.
   *
   * @param type origin type.
   * @param deletes a wrapper class for all the DDL delete operations.
   * @return a new internalSchema.
   */
  private static Type applyTableChange2Type(Type type, TableChanges.ColumnDeleteChange deletes) {
    switch (type.typeId()) {
      case RECORD:
        Types.RecordType record = (Types.RecordType) type;
        List fields = new ArrayList<>();
        for (Types.Field f : record.fields()) {
          Type newType = applyTableChange2Type(f.type(), deletes);
          // apply delete
          newType = deletes.applyDelete(f.fieldId(), newType);
          if (newType != null) {
            fields.add(Types.Field.get(f.fieldId(), f.isOptional(), f.name(), newType, f.doc()));
          }
        }
        if (fields.isEmpty()) {
          throw new UnsupportedOperationException("cannot support delete all columns from Struct");
        }
        return Types.RecordType.get(fields, record.name());
      case ARRAY:
        Types.ArrayType array = (Types.ArrayType) type;
        Type newElementType = applyTableChange2Type(array.elementType(), deletes);
        newElementType = deletes.applyDelete(array.elementId(), newElementType);
        if (newElementType == null) {
          throw new IllegalArgumentException(String.format("cannot delete element from arrayType: %s", array));
        }
        return Types.ArrayType.get(array.elementId(), array.isElementOptional(), newElementType);
      case MAP:
        Types.MapType map = (Types.MapType) type;
        int keyId = map.fields().get(0).fieldId();
        if (deletes.getDeletes().contains(keyId)) {
          throw new IllegalArgumentException(String.format("cannot delete key from mapType: %s", map));
        }
        Type newValueType = applyTableChange2Type(map.valueType(), deletes);
        newValueType = deletes.applyDelete(map.valueId(), newValueType);
        if (newValueType == null) {
          throw new IllegalArgumentException(String.format("cannot delete value from mapType: %s", map));
        }
        return Types.MapType.get(map.keyId(), map.valueId(), map.keyType(), newValueType, map.isValueOptional());
      default:
        return type;
    }
  }

  /**
   * Apply all the DDL update operations to internalSchema to produce a new internalSchema.
   *
   * @param internalSchema origin internalSchema.
   * @param updates a wrapper class for all the DDL update operations.
   * @return a new internalSchema.
   */
  public static InternalSchema applyTableChanges2Schema(InternalSchema internalSchema, TableChanges.ColumnUpdateChange updates) {
    Types.RecordType newType = (Types.RecordType)applyTableChange2Type(internalSchema.getRecord(), updates);
    // deal with root level changes
    List newFields = TableChangesHelper.applyAddChange2Fields(newType.fields(),
        new ArrayList<>(), updates.getPositionChangeMap().get(-1));
    return new InternalSchema(Types.RecordType.get(newFields, newType.name()));
  }

  /**
   * Apply all the DDL update operations to type to produce a new internalSchema.
   * do not call this method directly. expose this method only for UT.
   *
   * @param type origin internalSchema.
   * @param updates a wrapper class for all the DDL update operations.
   * @return a new internalSchema.
   */
  private static Type applyTableChange2Type(Type type, TableChanges.ColumnUpdateChange updates) {
    switch (type.typeId()) {
      case RECORD:
        Types.RecordType record = (Types.RecordType) type;
        List newTypes = new ArrayList<>();
        for (Types.Field f : record.fields()) {
          Type newType = applyTableChange2Type(f.type(), updates);
          newTypes.add(updates.applyUpdates(f, newType));
        }
        List newFields = new ArrayList<>();
        for (int i = 0; i < newTypes.size(); i++) {
          Type newType = newTypes.get(i);
          Types.Field oldField = record.fields().get(i);
          Types.Field updateField = updates.getUpdates().get(oldField.fieldId());
          if (updateField != null) {
            newFields.add(Types.Field.get(oldField.fieldId(), updateField.isOptional(), updateField.name(), newType, updateField.doc()));
          } else if (!oldField.type().equals(newType)) {
            newFields.add(Types.Field.get(oldField.fieldId(), oldField.isOptional(), oldField.name(), newType, oldField.doc()));
          } else {
            newFields.add(oldField);
          }
        }
        return Types.RecordType.get(newFields, record.name());
      case ARRAY:
        Types.ArrayType array = (Types.ArrayType) type;
        Type newElementType;
        Types.Field elementField = array.fields().get(0);
        newElementType = applyTableChange2Type(array.elementType(), updates);
        newElementType = updates.applyUpdates(elementField, newElementType);
        Types.Field elementUpdate = updates.getUpdates().get(elementField.fieldId());
        boolean optional = elementUpdate == null ? array.isElementOptional() : elementUpdate.isOptional();
        if (optional == elementField.isOptional() && array.elementType() == newElementType) {
          return array;
        }
        return Types.ArrayType.get(array.elementId(), optional, newElementType);
      case MAP:
        Types.MapType map = (Types.MapType) type;
        Types.Field valueFiled = map.fields().get(1);
        Type newValueType;
        newValueType = applyTableChange2Type(map.valueType(), updates);
        newValueType = updates.applyUpdates(valueFiled, newValueType);
        Types.Field valueUpdate = updates.getUpdates().get(valueFiled.fieldId());
        boolean valueOptional = valueUpdate == null ? map.isValueOptional() : valueUpdate.isOptional();
        if (valueOptional == map.isValueOptional() && map.valueType() == newValueType) {
          return map;
        }
        return Types.MapType.get(map.keyId(), map.valueId(), map.keyType(), newValueType, valueOptional);
      default:
        return type;
    }
  }
}





© 2015 - 2025 Weber Informatics LLC | Privacy Policy