All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.iceberg.UpdateSchema Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.iceberg;

import java.util.Collection;
import org.apache.iceberg.exceptions.CommitFailedException;
import org.apache.iceberg.relocated.com.google.common.collect.Sets;
import org.apache.iceberg.types.Type;

/**
 * API for schema evolution.
 * 

* When committing, these changes will be applied to the current table metadata. Commit conflicts * will not be resolved and will result in a {@link CommitFailedException}. */ public interface UpdateSchema extends PendingUpdate { /** * Allow incompatible changes to the schema. *

* Incompatible changes can cause failures when attempting to read older data files. For example, adding a required * column and attempting to read data files without that column will cause a failure. However, if there are no data * files that are not compatible with the change, it can be allowed. *

* This option allows incompatible changes to be made to a schema. This should be used when the caller has validated * that the change will not break. For example, if a column is added as optional but always populated and data older * than the column addition has been deleted from the table, this can be used with {@link #requireColumn(String)} to * mark the column required. * * @return this for method chaining */ UpdateSchema allowIncompatibleChanges(); /** * Add a new top-level column. *

* Because "." may be interpreted as a column path separator or may be used in field names, it is * not allowed in names passed to this method. To add to nested structures or to add fields with * names that contain ".", use {@link #addColumn(String, String, Type)}. *

* If type is a nested type, its field IDs are reassigned when added to the existing schema. * * @param name name for the new column * @param type type for the new column * @return this for method chaining * @throws IllegalArgumentException If name contains "." */ default UpdateSchema addColumn(String name, Type type) { return addColumn(name, type, null); } /** * Add a new top-level column. *

* Because "." may be interpreted as a column path separator or may be used in field names, it is * not allowed in names passed to this method. To add to nested structures or to add fields with * names that contain ".", use {@link #addColumn(String, String, Type)}. *

* If type is a nested type, its field IDs are reassigned when added to the existing schema. * * @param name name for the new column * @param type type for the new column * @param doc documentation string for the new column * @return this for method chaining * @throws IllegalArgumentException If name contains "." */ UpdateSchema addColumn(String name, Type type, String doc); /** * Add a new column to a nested struct. *

* The parent name is used to find the parent using {@link Schema#findField(String)}. If the * parent name is null, the new column will be added to the root as a top-level column. If parent * identifies a struct, a new column is added to that struct. If it identifies a list, the column * is added to the list element struct, and if it identifies a map, the new column is added to * the map's value struct. *

* The given name is used to name the new column and names containing "." are not handled * differently. *

* If type is a nested type, its field IDs are reassigned when added to the existing schema. * * @param parent name of the parent struct to the column will be added to * @param name name for the new column * @param type type for the new column * @return this for method chaining * @throws IllegalArgumentException If parent doesn't identify a struct */ default UpdateSchema addColumn(String parent, String name, Type type) { return addColumn(parent, name, type, null); } /** * Add a new column to a nested struct. *

* The parent name is used to find the parent using {@link Schema#findField(String)}. If the * parent name is null, the new column will be added to the root as a top-level column. If parent * identifies a struct, a new column is added to that struct. If it identifies a list, the column * is added to the list element struct, and if it identifies a map, the new column is added to * the map's value struct. *

* The given name is used to name the new column and names containing "." are not handled * differently. *

* If type is a nested type, its field IDs are reassigned when added to the existing schema. * * @param parent name of the parent struct to the column will be added to * @param name name for the new column * @param type type for the new column * @param doc documentation string for the new column * @return this for method chaining * @throws IllegalArgumentException If parent doesn't identify a struct */ UpdateSchema addColumn(String parent, String name, Type type, String doc); /** * Add a new required top-level column. *

* This is an incompatible change that can break reading older data. This method will result in an exception unless * {@link #allowIncompatibleChanges()} has been called. *

* Because "." may be interpreted as a column path separator or may be used in field names, it is * not allowed in names passed to this method. To add to nested structures or to add fields with * names that contain ".", use {@link #addRequiredColumn(String, String, Type)}. *

* If type is a nested type, its field IDs are reassigned when added to the existing schema. * * @param name name for the new column * @param type type for the new column * @return this for method chaining * @throws IllegalArgumentException If name contains "." */ default UpdateSchema addRequiredColumn(String name, Type type) { return addRequiredColumn(name, type, null); } /** * Add a new required top-level column. *

* This is an incompatible change that can break reading older data. This method will result in an exception unless * {@link #allowIncompatibleChanges()} has been called. *

* Because "." may be interpreted as a column path separator or may be used in field names, it is * not allowed in names passed to this method. To add to nested structures or to add fields with * names that contain ".", use {@link #addRequiredColumn(String, String, Type)}. *

* If type is a nested type, its field IDs are reassigned when added to the existing schema. * * @param name name for the new column * @param type type for the new column * @param doc documentation string for the new column * @return this for method chaining * @throws IllegalArgumentException If name contains "." */ UpdateSchema addRequiredColumn(String name, Type type, String doc); /** * Add a new required top-level column. *

* This is an incompatible change that can break reading older data. This method will result in an exception unless * {@link #allowIncompatibleChanges()} has been called. *

* The parent name is used to find the parent using {@link Schema#findField(String)}. If the * parent name is null, the new column will be added to the root as a top-level column. If parent * identifies a struct, a new column is added to that struct. If it identifies a list, the column * is added to the list element struct, and if it identifies a map, the new column is added to * the map's value struct. *

* The given name is used to name the new column and names containing "." are not handled * differently. *

* If type is a nested type, its field IDs are reassigned when added to the existing schema. * * @param parent name of the parent struct to the column will be added to * @param name name for the new column * @param type type for the new column * @return this for method chaining * @throws IllegalArgumentException If parent doesn't identify a struct */ default UpdateSchema addRequiredColumn(String parent, String name, Type type) { return addRequiredColumn(parent, name, type, null); } /** * Add a new required top-level column. *

* This is an incompatible change that can break reading older data. This method will result in an exception unless * {@link #allowIncompatibleChanges()} has been called. *

* The parent name is used to find the parent using {@link Schema#findField(String)}. If the * parent name is null, the new column will be added to the root as a top-level column. If parent * identifies a struct, a new column is added to that struct. If it identifies a list, the column * is added to the list element struct, and if it identifies a map, the new column is added to * the map's value struct. *

* The given name is used to name the new column and names containing "." are not handled * differently. *

* If type is a nested type, its field IDs are reassigned when added to the existing schema. * * @param parent name of the parent struct to the column will be added to * @param name name for the new column * @param type type for the new column * @param doc documentation string for the new column * @return this for method chaining * @throws IllegalArgumentException If parent doesn't identify a struct */ UpdateSchema addRequiredColumn(String parent, String name, Type type, String doc); /** * Rename a column in the schema. *

* The name is used to find the column to rename using {@link Schema#findField(String)}. *

* The new name may contain "." and such names are not parsed or handled differently. *

* Columns may be updated and renamed in the same schema update. * * @param name name of the column to rename * @param newName replacement name for the column * @return this for method chaining * @throws IllegalArgumentException If name doesn't identify a column in the schema or if this * change conflicts with other additions, renames, or updates. */ UpdateSchema renameColumn(String name, String newName); /** * Update a column in the schema to a new primitive type. *

* The name is used to find the column to update using {@link Schema#findField(String)}. *

* Only updates that widen types are allowed. *

* Columns may be updated and renamed in the same schema update. * * @param name name of the column to rename * @param newType replacement type for the column * @return this for method chaining * @throws IllegalArgumentException If name doesn't identify a column in the schema or if this * change introduces a type incompatibility or if it conflicts * with other additions, renames, or updates. */ UpdateSchema updateColumn(String name, Type.PrimitiveType newType); /** * Update a column in the schema to a new primitive type. *

* The name is used to find the column to update using {@link Schema#findField(String)}. *

* Only updates that widen types are allowed. *

* Columns may be updated and renamed in the same schema update. * * @param name name of the column to rename * @param newType replacement type for the column * @param newDoc replacement documentation string for the column * @return this for method chaining * @throws IllegalArgumentException If name doesn't identify a column in the schema or if this * change introduces a type incompatibility or if it conflicts * with other additions, renames, or updates. */ default UpdateSchema updateColumn(String name, Type.PrimitiveType newType, String newDoc) { return updateColumn(name, newType).updateColumnDoc(name, newDoc); } /** * Update a column in the schema to a new primitive type. *

* The name is used to find the column to update using {@link Schema#findField(String)}. *

* Columns may be updated and renamed in the same schema update. * * @param name name of the column to rename * @param newDoc replacement documentation string for the column * @return this for method chaining * @throws IllegalArgumentException If name doesn't identify a column in the schema or if this * change introduces a type incompatibility or if it conflicts * with other additions, renames, or updates. */ UpdateSchema updateColumnDoc(String name, String newDoc); /** * Update a column to optional. * * @param name name of the column to mark optional * @return this for method chaining */ UpdateSchema makeColumnOptional(String name); /** * Update a column to required. *

* This is an incompatible change that can break reading older data. This method will result in an exception unless * {@link #allowIncompatibleChanges()} has been called. * * @param name name of the column to mark required * @return this for method chaining */ UpdateSchema requireColumn(String name); /** * Delete a column in the schema. *

* The name is used to find the column to delete using {@link Schema#findField(String)}. * * @param name name of the column to delete * @return this for method chaining * @throws IllegalArgumentException If name doesn't identify a column in the schema or if this * change conflicts with other additions, renames, or updates. */ UpdateSchema deleteColumn(String name); /** * Move a column from its current position to the start of the schema or its parent struct. * @param name name of the column to move * @return this for method chaining * @throws IllegalArgumentException If name doesn't identify a column in the schema or if this * change conflicts with other changes. */ UpdateSchema moveFirst(String name); /** * Move a column from its current position to directly before a reference column. *

* The name is used to find the column to move using {@link Schema#findField(String)}. If the name identifies a nested * column, it can only be moved within the nested struct that contains it. * * @param name name of the column to move * @param beforeName name of the reference column * @return this for method chaining * @throws IllegalArgumentException If name doesn't identify a column in the schema or if this * change conflicts with other changes. */ UpdateSchema moveBefore(String name, String beforeName); /** * Move a column from its current position to directly after a reference column. *

* The name is used to find the column to move using {@link Schema#findField(String)}. If the name identifies a nested * column, it can only be moved within the nested struct that contains it. * * @param name name of the column to move * @param afterName name of the reference column * @return this for method chaining * @throws IllegalArgumentException If name doesn't identify a column in the schema or if this * change conflicts with other changes. */ UpdateSchema moveAfter(String name, String afterName); /** * Applies all field additions and updates from the provided new schema to the existing schema so * to create a union schema. *

* For fields with same canonical names in both schemas it is required that the widen types is * supported using {@link UpdateSchema#updateColumn(String, Type.PrimitiveType)} *

* Only supports turning a previously required field into an optional one if it is marked * optional in the provided new schema using {@link UpdateSchema#makeColumnOptional(String)} *

* Only supports updating existing field docs with fields docs from the provided new schema using * {@link UpdateSchema#updateColumnDoc(String, String)} * * @param newSchema a schema used in conjunction with the existing schema to create a union schema * @return this for method chaining * @throws IllegalStateException If it encounters errors during provided schema traversal * @throws IllegalArgumentException If name doesn't identify a column in the schema or if this * change introduces a type incompatibility or if it conflicts * with other additions, renames, or updates. */ UpdateSchema unionByNameWith(Schema newSchema); /** * Set the identifier fields given a set of field names. *

* Because identifier fields are unique, duplicated names will be ignored. * See {@link Schema#identifierFieldIds()} to learn more about Iceberg identifier. * * @param names names of the columns to set as identifier fields * @return this for method chaining */ UpdateSchema setIdentifierFields(Collection names); /** * Set the identifier fields given some field names. * See {@link UpdateSchema#setIdentifierFields(Collection)} for more details. * * @param names names of the columns to set as identifier fields * @return this for method chaining */ default UpdateSchema setIdentifierFields(String... names) { return setIdentifierFields(Sets.newHashSet(names)); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy