All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.iceberg.SchemaUpdate Maven / Gradle / Ivy

There is a newer version: 1.7.1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.iceberg;

import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Multimap;
import com.google.common.collect.Multimaps;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import org.apache.iceberg.mapping.MappingUtil;
import org.apache.iceberg.mapping.NameMapping;
import org.apache.iceberg.mapping.NameMappingParser;
import org.apache.iceberg.types.Type;
import org.apache.iceberg.types.TypeUtil;
import org.apache.iceberg.types.Types;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import static org.apache.iceberg.types.Types.NestedField.optional;
import static org.apache.iceberg.types.Types.NestedField.required;

/**
 * Schema evolution API implementation.
 */
class SchemaUpdate implements UpdateSchema {
  private static final Logger LOG = LoggerFactory.getLogger(SchemaUpdate.class);
  private static final int TABLE_ROOT_ID = -1;

  private final TableOperations ops;
  private final TableMetadata base;
  private final Schema schema;
  private final List deletes = Lists.newArrayList();
  private final Map updates = Maps.newHashMap();
  private final Multimap adds =
      Multimaps.newListMultimap(Maps.newHashMap(), Lists::newArrayList);
  private int lastColumnId;

  SchemaUpdate(TableOperations ops) {
    this.ops = ops;
    this.base = ops.current();
    this.schema = base.schema();
    this.lastColumnId = base.lastColumnId();
  }

  /**
   * For testing only.
   */
  SchemaUpdate(Schema schema, int lastColumnId) {
    this.ops = null;
    this.base = null;
    this.schema = schema;
    this.lastColumnId = lastColumnId;
  }

  @Override
  public UpdateSchema addColumn(String name, Type type, String doc) {
    Preconditions.checkArgument(!name.contains("."),
        "Cannot add column with ambiguous name: %s, use addColumn(parent, name, type)", name);
    return addColumn(null, name, type, doc);
  }

  @Override
  public UpdateSchema addColumn(String parent, String name, Type type, String doc) {
    int parentId = TABLE_ROOT_ID;
    if (parent != null) {
      Types.NestedField parentField = schema.findField(parent);
      Preconditions.checkArgument(parentField != null, "Cannot find parent struct: %s", parent);
      Type parentType = parentField.type();
      if (parentType.isNestedType()) {
        Type.NestedType nested = parentType.asNestedType();
        if (nested.isMapType()) {
          // fields are added to the map value type
          parentField = nested.asMapType().fields().get(1);
        } else if (nested.isListType()) {
          // fields are added to the element type
          parentField = nested.asListType().fields().get(0);
        }
      }
      Preconditions.checkArgument(
          parentField.type().isNestedType() && parentField.type().asNestedType().isStructType(),
          "Cannot add to non-struct column: %s: %s", parent, parentField.type());
      parentId = parentField.fieldId();
      Preconditions.checkArgument(!deletes.contains(parentId),
          "Cannot add to a column that will be deleted: %s", parent);
      Preconditions.checkArgument(schema.findField(parent + "." + name) == null,
          "Cannot add column, name already exists: %s.%s", parent, name);
    } else {
      Preconditions.checkArgument(schema.findField(name) == null,
          "Cannot add column, name already exists: %s", name);
    }

    // assign new IDs in order
    int newId = assignNewColumnId();
    adds.put(parentId, optional(newId, name,
        TypeUtil.assignFreshIds(type, this::assignNewColumnId), doc));

    return this;
  }

  @Override
  public UpdateSchema deleteColumn(String name) {
    Types.NestedField field = schema.findField(name);
    Preconditions.checkArgument(field != null, "Cannot delete missing column: %s", name);
    Preconditions.checkArgument(!adds.containsKey(field.fieldId()),
        "Cannot delete a column that has additions: %s", name);
    Preconditions.checkArgument(!updates.containsKey(field.fieldId()),
        "Cannot delete a column that has updates: %s", name);

    deletes.add(field.fieldId());

    return this;
  }

  @Override
  public UpdateSchema renameColumn(String name, String newName) {
    Types.NestedField field = schema.findField(name);
    Preconditions.checkArgument(field != null, "Cannot rename missing column: %s", name);
    Preconditions.checkArgument(newName != null, "Cannot rename a column to null");
    Preconditions.checkArgument(!deletes.contains(field.fieldId()),
        "Cannot rename a column that will be deleted: %s", field.name());

    // merge with an update, if present
    int fieldId = field.fieldId();
    Types.NestedField update = updates.get(fieldId);
    if (update != null) {
      updates.put(fieldId, required(fieldId, newName, update.type(), update.doc()));
    } else {
      updates.put(fieldId, required(fieldId, newName, field.type(), field.doc()));
    }

    return this;
  }

  @Override
  public UpdateSchema updateColumn(String name, Type.PrimitiveType newType) {
    Types.NestedField field = schema.findField(name);
    Preconditions.checkArgument(field != null, "Cannot update missing column: %s", name);
    Preconditions.checkArgument(!deletes.contains(field.fieldId()),
        "Cannot update a column that will be deleted: %s", field.name());
    Preconditions.checkArgument(TypeUtil.isPromotionAllowed(field.type(), newType),
        "Cannot change column type: %s: %s -> %s", name, field.type(), newType);

    // merge with a rename, if present
    int fieldId = field.fieldId();
    Types.NestedField rename = updates.get(fieldId);
    if (rename != null) {
      updates.put(fieldId, required(fieldId, rename.name(), newType, rename.doc()));
    } else {
      updates.put(fieldId, required(fieldId, field.name(), newType, field.doc()));
    }

    return this;
  }

  @Override
  public UpdateSchema updateColumnDoc(String name, String doc) {
    Types.NestedField field = schema.findField(name);
    Preconditions.checkArgument(field != null, "Cannot update missing column: %s", name);
    Preconditions.checkArgument(!deletes.contains(field.fieldId()),
        "Cannot update a column that will be deleted: %s", field.name());

    // merge with a rename or update, if present
    int fieldId = field.fieldId();
    Types.NestedField update = updates.get(fieldId);
    if (update != null) {
      updates.put(fieldId, required(fieldId, update.name(), update.type(), doc));
    } else {
      updates.put(fieldId, required(fieldId, field.name(), field.type(), doc));
    }

    return this;
  }

  /**
   * Apply the pending changes to the original schema and returns the result.
   * 

* This does not result in a permanent update. * * @return the result Schema when all pending updates are applied */ @Override public Schema apply() { return applyChanges(schema, deletes, updates, adds); } @Override public void commit() { TableMetadata update = applyChangesToMapping(base.updateSchema(apply(), lastColumnId)); ops.commit(base, update); } private int assignNewColumnId() { int next = lastColumnId + 1; this.lastColumnId = next; return next; } private TableMetadata applyChangesToMapping(TableMetadata metadata) { String mappingJson = metadata.property(TableProperties.DEFAULT_NAME_MAPPING, null); if (mappingJson != null) { try { // parse and update the mapping NameMapping mapping = NameMappingParser.fromJson(mappingJson); NameMapping updated = MappingUtil.update(mapping, updates, adds); // replace the table property Map updatedProperties = Maps.newHashMap(); updatedProperties.putAll(metadata.properties()); updatedProperties.put(TableProperties.DEFAULT_NAME_MAPPING, NameMappingParser.toJson(updated)); return metadata.replaceProperties(updatedProperties); } catch (RuntimeException e) { // log the error, but do not fail the update LOG.warn("Failed to update external schema mapping: {}", mappingJson, e); } } return metadata; } private static Schema applyChanges(Schema schema, List deletes, Map updates, Multimap adds) { Types.StructType struct = TypeUtil .visit(schema, new ApplyChanges(deletes, updates, adds)) .asNestedType().asStructType(); return new Schema(struct.fields()); } private static class ApplyChanges extends TypeUtil.SchemaVisitor { private final List deletes; private final Map updates; private final Multimap adds; private ApplyChanges(List deletes, Map updates, Multimap adds) { this.deletes = deletes; this.updates = updates; this.adds = adds; } @Override public Type schema(Schema schema, Type structResult) { Collection newColumns = adds.get(TABLE_ROOT_ID); if (newColumns != null) { return addFields(structResult.asNestedType().asStructType(), newColumns); } return structResult; } @Override public Type struct(Types.StructType struct, List fieldResults) { boolean hasChange = false; List newFields = Lists.newArrayListWithExpectedSize(fieldResults.size()); for (int i = 0; i < fieldResults.size(); i += 1) { Type resultType = fieldResults.get(i); if (resultType == null) { hasChange = true; continue; } Types.NestedField field = struct.fields().get(i); String name = field.name(); String doc = field.doc(); Types.NestedField update = updates.get(field.fieldId()); if (update != null) { name = update.name(); doc = update.doc(); } if (!name.equals(field.name()) || field.type() != resultType || !Objects.equals(doc, field.doc())) { hasChange = true; if (field.isOptional()) { newFields.add(optional(field.fieldId(), name, resultType, doc)); } else { newFields.add(required(field.fieldId(), name, resultType, doc)); } } else { newFields.add(field); } } if (hasChange) { // TODO: What happens if there are no fields left? return Types.StructType.of(newFields); } return struct; } @Override public Type field(Types.NestedField field, Type fieldResult) { // the API validates deletes, updates, and additions don't conflict // handle deletes int fieldId = field.fieldId(); if (deletes.contains(fieldId)) { return null; } // handle updates Types.NestedField update = updates.get(field.fieldId()); if (update != null && update.type() != field.type()) { // rename is handled in struct, but struct needs the correct type from the field result return update.type(); } // handle adds Collection newFields = adds.get(fieldId); if (newFields != null && !newFields.isEmpty()) { return addFields(fieldResult.asNestedType().asStructType(), newFields); } return fieldResult; } @Override public Type list(Types.ListType list, Type result) { // use field to apply updates Type elementResult = field(list.fields().get(0), result); if (elementResult == null) { throw new IllegalArgumentException("Cannot delete element type from list: " + list); } if (list.elementType() == elementResult) { return list; } if (list.isElementOptional()) { return Types.ListType.ofOptional(list.elementId(), elementResult); } else { return Types.ListType.ofRequired(list.elementId(), elementResult); } } @Override public Type map(Types.MapType map, Type kResult, Type vResult) { // if any updates are intended for the key, throw an exception int keyId = map.fields().get(0).fieldId(); if (deletes.contains(keyId)) { throw new IllegalArgumentException("Cannot delete map keys: " + map); } else if (updates.containsKey(keyId)) { throw new IllegalArgumentException("Cannot update map keys: " + map); } else if (adds.containsKey(keyId)) { throw new IllegalArgumentException("Cannot add fields to map keys: " + map); } else if (!map.keyType().equals(kResult)) { throw new IllegalArgumentException("Cannot alter map keys: " + map); } // use field to apply updates to the value Type valueResult = field(map.fields().get(1), vResult); if (valueResult == null) { throw new IllegalArgumentException("Cannot delete value type from map: " + map); } if (map.valueType() == valueResult) { return map; } if (map.isValueOptional()) { return Types.MapType.ofOptional(map.keyId(), map.valueId(), map.keyType(), valueResult); } else { return Types.MapType.ofRequired(map.keyId(), map.valueId(), map.keyType(), valueResult); } } @Override public Type primitive(Type.PrimitiveType primitive) { return primitive; } } private static Types.StructType addFields(Types.StructType struct, Collection adds) { List newFields = Lists.newArrayList(struct.fields()); newFields.addAll(adds); return Types.StructType.of(newFields); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy