All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.netease.arctic.op.KeyedSchemaUpdate Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.netease.arctic.op;

import com.netease.arctic.table.KeyedTable;
import com.netease.arctic.table.PrimaryKeySpec;
import com.netease.arctic.shade.org.apache.commons.lang3.StringUtils;
import com.netease.arctic.shade.org.apache.iceberg.Schema;
import com.netease.arctic.shade.org.apache.iceberg.UpdateSchema;
import com.netease.arctic.shade.org.apache.iceberg.relocated.com.google.common.base.Preconditions;
import com.netease.arctic.shade.org.apache.iceberg.types.Type;
import com.netease.arctic.shade.org.apache.iceberg.types.Types;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.annotation.Nonnull;
import java.io.Serializable;
import java.util.Collection;
import java.util.List;
import java.util.Objects;
import java.util.PriorityQueue;

/**
 * Schema evolution API implementation for {@link KeyedTable}.
 */
public class KeyedSchemaUpdate implements UpdateSchema {
  private static final Logger LOG = LoggerFactory.getLogger(KeyedSchemaUpdate.class);

  public static final String DOT = ".";

  private final KeyedTable keyedTable;
  private final UpdateSchema baseTableUpdateSchema;
  private final UpdateSchema changeTableUpdateSchema;

  public KeyedSchemaUpdate(KeyedTable keyedTable) {
    this.keyedTable = keyedTable;
    baseTableUpdateSchema = keyedTable.baseTable().updateSchema();
    changeTableUpdateSchema = keyedTable.changeTable().updateSchema();
  }

  @Override
  public KeyedSchemaUpdate allowIncompatibleChanges() {
    baseTableUpdateSchema.allowIncompatibleChanges();
    changeTableUpdateSchema.allowIncompatibleChanges();
    return this;
  }

  @Override
  public UpdateSchema addColumn(String name, Type type, String doc) {
    baseTableUpdateSchema.addColumn(name, type, doc);
    changeTableUpdateSchema.addColumn(name, type, doc);
    return this;
  }

  @Override
  public UpdateSchema addColumn(String parent, String name, Type type, String doc) {
    baseTableUpdateSchema.addColumn(parent, name, type, doc);
    changeTableUpdateSchema.addColumn(parent, name, type, doc);
    return this;
  }

  @Override
  public UpdateSchema addRequiredColumn(String name, Type type, String doc) {
    baseTableUpdateSchema.addRequiredColumn(name, type, doc);
    changeTableUpdateSchema.addRequiredColumn(name, type, doc);
    return this;
  }

  @Override
  public UpdateSchema addRequiredColumn(String parent, String name, Type type, String doc) {
    baseTableUpdateSchema.addRequiredColumn(parent, name, type, doc);
    changeTableUpdateSchema.addRequiredColumn(parent, name, type, doc);
    return this;
  }

  @Override
  public UpdateSchema deleteColumn(String name) {
    Preconditions.checkArgument(!containsPk(name), "Cannot delete primary key. %s", name);
    baseTableUpdateSchema.deleteColumn(name);
    changeTableUpdateSchema.deleteColumn(name);
    return this;
  }

  @Override
  public UpdateSchema renameColumn(String name, String newName) {
    Preconditions.checkArgument(!containsPk(name), "Cannot rename primary key %s", name);
    baseTableUpdateSchema.renameColumn(name, newName);
    changeTableUpdateSchema.renameColumn(name, newName);
    return this;
  }

  @Override
  public UpdateSchema requireColumn(String name) {
    baseTableUpdateSchema.requireColumn(name);
    changeTableUpdateSchema.requireColumn(name);
    return this;
  }

  @Override
  public UpdateSchema makeColumnOptional(String name) {
    Preconditions.checkArgument(!containsPk(name), "Cannot make primary key optional. %s", name);
    baseTableUpdateSchema.makeColumnOptional(name);
    changeTableUpdateSchema.makeColumnOptional(name);
    return this;
  }

  @Override
  public UpdateSchema updateColumn(String name, Type.PrimitiveType newType) {
    baseTableUpdateSchema.updateColumn(name, newType);
    changeTableUpdateSchema.updateColumn(name, newType);
    return this;
  }

  @Override
  public UpdateSchema updateColumnDoc(String name, String doc) {
    baseTableUpdateSchema.updateColumnDoc(name, doc);
    changeTableUpdateSchema.updateColumnDoc(name, doc);
    return this;
  }

  @Override
  public UpdateSchema moveFirst(String name) {
    baseTableUpdateSchema.moveFirst(name);
    changeTableUpdateSchema.moveFirst(name);
    return this;
  }

  @Override
  public UpdateSchema moveBefore(String name, String beforeName) {
    baseTableUpdateSchema.moveBefore(name, beforeName);
    changeTableUpdateSchema.moveBefore(name, beforeName);
    return this;
  }

  @Override
  public UpdateSchema moveAfter(String name, String afterName) {
    baseTableUpdateSchema.moveAfter(name, afterName);
    changeTableUpdateSchema.moveAfter(name, afterName);
    return this;
  }

  @Override
  public UpdateSchema unionByNameWith(Schema newSchema) {
    baseTableUpdateSchema.unionByNameWith(newSchema);
    changeTableUpdateSchema.unionByNameWith(newSchema);
    return this;
  }

  @Override
  public UpdateSchema setIdentifierFields(Collection names) {
    throw new UnsupportedOperationException("unsupported setIdentifierFields arctic table.");
  }

  /**
   * Apply the pending changes to the original schema and returns the result.
   * 

* This does not result in a permanent update. * * @return the result Schema when all pending updates are applied */ @Override public Schema apply() { syncSchema(keyedTable); Schema newSchema = baseTableUpdateSchema.apply(); changeTableUpdateSchema.apply(); return newSchema; } @Override public void commit() { baseTableUpdateSchema.commit(); try { changeTableUpdateSchema.commit(); } catch (Exception e) { LOG.warn("change table schema commit exception", e); } } private boolean containsPk(String name) { if (!keyedTable.primaryKeySpec().primaryKeyExisted()) { return false; } return keyedTable.primaryKeySpec().fieldNames().contains(name); } public static void syncSchema(KeyedTable keyedTable) { if (PrimaryKeySpec.noPrimaryKey().equals(keyedTable.primaryKeySpec())) { return; } int baseSchemaSize = keyedTable.baseTable().schemas().size(); int changeSchemaSize = keyedTable.changeTable().schemas().size(); if (baseSchemaSize <= changeSchemaSize) { return; } if (baseSchemaSize == changeSchemaSize + 1) { Schema newer = keyedTable.baseTable().schema(); syncSchema(newer, keyedTable.changeTable().schema(), keyedTable.changeTable().updateSchema()); return; } // just allow base table schema's versions are one more than change table's throw new IllegalStateException("base table have two more versions than change table"); } private static void syncSchema(Schema newer, Schema old, UpdateSchema changeTableUs) { // To keep the order of adding columns with base table's PriorityQueue adds = new PriorityQueue<>(); for (Types.NestedField newField : newer.columns()) { Types.NestedField oldField = old.findField(newField.fieldId()); syncField(newField, oldField, changeTableUs, null, adds); } old.columns().forEach((c) -> { if (newer.findField(c.fieldId()) == null) { syncField(null, c, changeTableUs, null, adds); } }); doAddColumns(adds, changeTableUs); LOG.info("sync schema to changeTable. from: {}, base: {}, actual: {}", old, newer, changeTableUs.apply()); changeTableUs.commit(); } private static void syncField(Types.NestedField newField, Types.NestedField oldField, UpdateSchema us, String fieldPrefix, Collection adds) { if (oldField == null && newField == null) { return; } if (oldField == null) { addColumnInternal(newField, fieldPrefix, adds); return; } if (newField == null) { deleteColumnInternal(oldField.name(), us, fieldPrefix); return; } if (Objects.equals(newField, oldField)) { return; } updateField(newField, oldField, us, fieldPrefix, adds); } private static void doAddColumns(PriorityQueue adds, UpdateSchema us) { while (!adds.isEmpty()) { Add add = adds.poll(); if (StringUtils.isBlank(add.parent)) { us.addColumn(add.field, add.type, add.doc); } else { if (add.parent.contains(DOT)) { LOG.error("field: {}", add); throw new UnsupportedOperationException("do not support add deeper than two nested field"); } us.addColumn(add.parent, add.field, add.type, add.doc); } } } private static void addColumnInternal(Types.NestedField field, String fieldPrefix, Collection adds) { adds.add(new Add(field, fieldPrefix)); } private static void deleteColumnInternal(String field, UpdateSchema changeTableUs, String fieldPrefix) { changeTableUs.deleteColumn(getFullName(fieldPrefix, field)); } private static String getFullName(String fieldPrefix, String field) { return StringUtils.isBlank(fieldPrefix) ? field : String.join(DOT, fieldPrefix, field); } private static void updateField(Types.NestedField newField, Types.NestedField oldField, UpdateSchema us, String fieldPrefix, Collection adds) { String oldFullFieldName = getFullName(fieldPrefix, oldField.name()); if (!Objects.equals(newField.doc(), oldField.doc())) { us.updateColumnDoc(oldFullFieldName, newField.doc()); } if (!Objects.equals(newField.isRequired(), oldField.isRequired())) { if (newField.isRequired()) { us.requireColumn(oldFullFieldName); } else { us.makeColumnOptional(oldFullFieldName); } } if (!Objects.equals(newField.name(), oldField.name())) { us.renameColumn(oldFullFieldName, newField.name()); } if (newField.type().isPrimitiveType()) { updatePrimativeFieldType(newField, oldField, us, fieldPrefix); } else { updateNestedField(newField, oldField, us, fieldPrefix, adds); } } private static void updateNestedField(Types.NestedField newField, Types.NestedField oldField, UpdateSchema us, String fieldPrefix, Collection adds) { if (oldField.type().isMapType()) { updateMapField(newField, oldField, us, fieldPrefix, adds); return; } Type.NestedType newType = newField.type().asNestedType(); Type.NestedType oldType = oldField.type().asNestedType(); String prefix = getFullName(fieldPrefix, oldField.name()); updateNestedField(newType, oldType, us, prefix, adds); } private static void updateNestedField(Type.NestedType newType, Type.NestedType oldType, UpdateSchema us, String fieldPrefix, Collection adds) { if (Objects.equals(newType, oldType)) { return; } newType.fields().forEach((field -> { Types.NestedField old = oldType.field(field.fieldId()); syncField(field, old, us, fieldPrefix, adds); })); oldType.fields().forEach((o -> { // won't sync repeatedly if (newType.field(o.fieldId()) == null) { syncField(null, o, us, fieldPrefix, adds); } })); } private static void updateMapField(Types.NestedField newField, Types.NestedField oldField, UpdateSchema us, String fieldPrefix, Collection adds) { Types.MapType newType = newField.type().asMapType(); Types.MapType oldType = oldField.type().asMapType(); List newFields = newType.fields(); List oldFields = oldType.fields(); String crtPrefix = getFullName(fieldPrefix, oldField.name()); for (int i = 0; i < newFields.size(); i++) { Types.NestedField newF = newFields.get(i); Types.NestedField oldF = oldFields.get(i); Type t = newF.type(); // just support same type update if (t.isPrimitiveType()) { syncField(newF, oldF, us, crtPrefix, adds); } else { updateNestedField(newF.type().asNestedType(), oldF.type().asNestedType(), us, crtPrefix, adds); } } } private static void updatePrimativeFieldType(Types.NestedField newField, Types.NestedField oldField, UpdateSchema us, String fieldPrefix) { String fullName = getFullName(fieldPrefix, oldField.name()); if (!Objects.equals(newField.type(), oldField.type())) { us.updateColumn(fullName, newField.type().asPrimitiveType()); } } static class Add implements Comparable, Serializable { private final int baseFieldId; private final String parent; private final String field; private final Type type; private final String doc; public Add(Types.NestedField field, String parent) { this(field.fieldId(), parent, field.name(), field.type(), field.doc()); } public Add(int baseFieldId, String parent, String field, Type type, String doc) { this.baseFieldId = baseFieldId; this.parent = parent; this.field = field; this.type = type; this.doc = doc; } @Override public int compareTo(@Nonnull Add o) { return this.baseFieldId - o.baseFieldId; } @Override public String toString() { return "Add{" + "baseFieldId=" + baseFieldId + ", parent='" + parent + '\'' + ", field='" + field + '\'' + ", type=" + type + ", doc='" + doc + '\'' + '}'; } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy