All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.debezium.relational.TableSchemaBuilder Maven / Gradle / Ivy

/*
 * Copyright Debezium Authors.
 *
 * Licensed under the Apache Software License version 2.0, available at http://www.apache.org/licenses/LICENSE-2.0
 */
package io.debezium.relational;

import java.sql.Types;
import java.util.List;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Collectors;

import org.apache.kafka.connect.data.Field;
import org.apache.kafka.connect.data.Schema;
import org.apache.kafka.connect.data.SchemaBuilder;
import org.apache.kafka.connect.data.Struct;
import org.apache.kafka.connect.errors.ConnectException;
import org.apache.kafka.connect.errors.DataException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import io.debezium.annotation.Immutable;
import io.debezium.annotation.ThreadSafe;
import io.debezium.data.Envelope;
import io.debezium.data.SchemaUtil;
import io.debezium.relational.Key.KeyMapper;
import io.debezium.relational.Tables.ColumnNameFilter;
import io.debezium.relational.mapping.ColumnMapper;
import io.debezium.relational.mapping.ColumnMappers;
import io.debezium.schema.FieldNameSelector;
import io.debezium.schema.FieldNameSelector.FieldNamer;
import io.debezium.util.SchemaNameAdjuster;
import io.debezium.util.Strings;

/**
 * Builder that constructs {@link TableSchema} instances for {@link Table} definitions.
 * 

* This builder is responsible for mapping {@link Column table columns} to {@link Field fields} in Kafka Connect {@link Schema}s, * and this is necessarily dependent upon the database's supported types. Although mappings are defined for standard types, * this class may need to be subclassed for each DBMS to add support for DBMS-specific types by overriding any of the * "{@code add*Field}" methods. *

* See the Java SE Mapping SQL * and Java Types for details about how JDBC {@link Types types} map to Java value types. * * @author Randall Hauch */ @ThreadSafe @Immutable public class TableSchemaBuilder { private static final Logger LOGGER = LoggerFactory.getLogger(TableSchemaBuilder.class); private final SchemaNameAdjuster schemaNameAdjuster; private final ValueConverterProvider valueConverterProvider; private final Schema sourceInfoSchema; private final FieldNamer fieldNamer; private final CustomConverterRegistry customConverterRegistry; /** * Create a new instance of the builder. * * @param valueConverterProvider the provider for obtaining {@link ValueConverter}s and {@link SchemaBuilder}s; may not be * null * @param schemaNameAdjuster the adjuster for schema names; may not be null */ public TableSchemaBuilder(ValueConverterProvider valueConverterProvider, SchemaNameAdjuster schemaNameAdjuster, CustomConverterRegistry customConverterRegistry, Schema sourceInfoSchema, boolean sanitizeFieldNames) { this.schemaNameAdjuster = schemaNameAdjuster; this.valueConverterProvider = valueConverterProvider; this.sourceInfoSchema = sourceInfoSchema; this.fieldNamer = FieldNameSelector.defaultSelector(sanitizeFieldNames); this.customConverterRegistry = customConverterRegistry; } /** * Create a {@link TableSchema} from the given {@link Table table definition}. The resulting TableSchema will have a * {@link TableSchema#keySchema() key schema} that contains all of the columns that make up the table's primary key, * and a {@link TableSchema#valueSchema() value schema} that contains only those columns that are not in the table's primary * key. *

* This is equivalent to calling {@code create(table,false)}. * * @param schemaPrefix the prefix added to the table identifier to construct the schema names; may be null if there is no * prefix * @param envelopSchemaName the name of the schema of the built table's envelope * @param table the table definition; may not be null * @param filter the filter that specifies whether columns in the table should be included; may be null if all columns * are to be included * @param mappers the mapping functions for columns; may be null if none of the columns are to be mapped to different values * @return the table schema that can be used for sending rows of data for this table to Kafka Connect; never null */ public TableSchema create(String schemaPrefix, String envelopSchemaName, Table table, ColumnNameFilter filter, ColumnMappers mappers, KeyMapper keysMapper) { if (schemaPrefix == null) { schemaPrefix = ""; } // Build the schemas ... final TableId tableId = table.id(); final String tableIdStr = tableSchemaName(tableId); final String schemaNamePrefix = schemaPrefix + tableIdStr; LOGGER.debug("Mapping table '{}' to schemas under '{}'", tableId, schemaNamePrefix); SchemaBuilder valSchemaBuilder = SchemaBuilder.struct().name(schemaNameAdjuster.adjust(schemaNamePrefix + ".Value")); SchemaBuilder keySchemaBuilder = SchemaBuilder.struct().name(schemaNameAdjuster.adjust(schemaNamePrefix + ".Key")); AtomicBoolean hasPrimaryKey = new AtomicBoolean(false); Key tableKey = new Key.Builder(table).customKeyMapper(keysMapper).build(); tableKey.keyColumns().forEach(column -> { addField(keySchemaBuilder, table, column, null); hasPrimaryKey.set(true); }); table.columns() .stream() .filter(column -> filter == null || filter.matches(tableId.catalog(), tableId.schema(), tableId.table(), column.name())) .forEach(column -> { ColumnMapper mapper = mappers == null ? null : mappers.mapperFor(tableId, column); addField(valSchemaBuilder, table, column, mapper); }); Schema valSchema = valSchemaBuilder.optional().build(); Schema keySchema = hasPrimaryKey.get() ? keySchemaBuilder.build() : null; if (LOGGER.isDebugEnabled()) { LOGGER.debug("Mapped primary key for table '{}' to schema: {}", tableId, SchemaUtil.asDetailedString(keySchema)); LOGGER.debug("Mapped columns for table '{}' to schema: {}", tableId, SchemaUtil.asDetailedString(valSchema)); } Envelope envelope = Envelope.defineSchema() .withName(schemaNameAdjuster.adjust(envelopSchemaName)) .withRecord(valSchema) .withSource(sourceInfoSchema) .build(); // Create the generators ... StructGenerator keyGenerator = createKeyGenerator(keySchema, tableId, tableKey.keyColumns()); StructGenerator valueGenerator = createValueGenerator(valSchema, tableId, table.columns(), filter, mappers); // And the table schema ... return new TableSchema(tableId, keySchema, keyGenerator, envelope, valSchema, valueGenerator); } /** * Returns the type schema name for the given table. */ private String tableSchemaName(TableId tableId) { if (Strings.isNullOrEmpty(tableId.catalog())) { if (Strings.isNullOrEmpty(tableId.schema())) { return tableId.table(); } else { return tableId.schema() + "." + tableId.table(); } } else if (Strings.isNullOrEmpty(tableId.schema())) { return tableId.catalog() + "." + tableId.table(); } // When both catalog and schema is present then only schema is used else { return tableId.schema() + "." + tableId.table(); } } /** * Creates the function that produces a Kafka Connect key object for a row of data. * * @param schema the Kafka Connect schema for the key; may be null if there is no known schema, in which case the generator * will be null * @param columnSetName the name for the set of columns, used in error messages; may not be null * @param columns the column definitions for the table that defines the row; may not be null * @return the key-generating function, or null if there is no key schema */ protected StructGenerator createKeyGenerator(Schema schema, TableId columnSetName, List columns) { if (schema != null) { int[] recordIndexes = indexesForColumns(columns); Field[] fields = fieldsForColumns(schema, columns); int numFields = recordIndexes.length; ValueConverter[] converters = convertersForColumns(schema, columnSetName, columns, null); return (row) -> { Struct result = new Struct(schema); for (int i = 0; i != numFields; ++i) { validateIncomingRowToInternalMetadata(recordIndexes, fields, converters, row, i); Object value = row[recordIndexes[i]]; ValueConverter converter = converters[i]; if (converter != null) { // A component of primary key must be not-null. // It is possible for some databases and values (MySQL and all-zero datetime) // to be reported as null by JDBC or streaming reader. // It thus makes sense to convert them to a sensible default replacement value. value = converter.convert(value); try { result.put(fields[i], value); } catch (DataException e) { Column col = columns.get(i); LOGGER.error("Failed to properly convert key value for '{}.{}' of type {} for row {}:", columnSetName, col.name(), col.typeName(), row, e); } } } return result; }; } return null; } private void validateIncomingRowToInternalMetadata(int[] recordIndexes, Field[] fields, ValueConverter[] converters, Object[] row, int position) { if (position >= converters.length) { LOGGER.error("Error requesting a converter, converters: {}, requested index: {}", converters.length, position); throw new ConnectException( "Column indexing array is larger than number of converters, internal schema representation is probably out of sync with real database schema"); } if (position >= fields.length) { LOGGER.error("Error requesting a field, fields: {}, requested index: {}", fields.length, position); throw new ConnectException("Too few schema fields, internal schema representation is probably out of sync with real database schema"); } if (recordIndexes[position] >= row.length) { LOGGER.error("Error requesting a row value, row: {}, requested index: {} at position {}", row.length, recordIndexes[position], position); throw new ConnectException("Data row is smaller than a column index, internal schema representation is probably out of sync with real database schema"); } } /** * Creates the function that produces a Kafka Connect value object for a row of data. * * @param schema the Kafka Connect schema for the value; may be null if there is no known schema, in which case the generator * will be null * @param tableId the table identifier; may not be null * @param columns the column definitions for the table that defines the row; may not be null * @param filter the filter that specifies whether columns in the table should be included; may be null if all columns * are to be included * @param mappers the mapping functions for columns; may be null if none of the columns are to be mapped to different values * @return the value-generating function, or null if there is no value schema */ protected StructGenerator createValueGenerator(Schema schema, TableId tableId, List columns, ColumnNameFilter filter, ColumnMappers mappers) { if (schema != null) { List columnsThatShouldBeAdded = columns.stream() .filter(column -> filter == null || filter.matches(tableId.catalog(), tableId.schema(), tableId.table(), column.name())) .collect(Collectors.toList()); int[] recordIndexes = indexesForColumns(columnsThatShouldBeAdded); Field[] fields = fieldsForColumns(schema, columnsThatShouldBeAdded); int numFields = recordIndexes.length; ValueConverter[] converters = convertersForColumns(schema, tableId, columnsThatShouldBeAdded, mappers); return (row) -> { Struct result = new Struct(schema); for (int i = 0; i != numFields; ++i) { validateIncomingRowToInternalMetadata(recordIndexes, fields, converters, row, i); Object value = row[recordIndexes[i]]; ValueConverter converter = converters[i]; if (converter != null) { LOGGER.trace("converter for value object: *** {} ***", converter); } else { LOGGER.trace("converter is null..."); } if (converter != null) { try { value = converter.convert(value); result.put(fields[i], value); } catch (DataException | IllegalArgumentException e) { Column col = columns.get(i); LOGGER.error("Failed to properly convert data value for '{}.{}' of type {} for row {}:", tableId, col.name(), col.typeName(), row, e); } catch (final Exception e) { Column col = columns.get(i); LOGGER.error("Failed to properly convert data value for '{}.{}' of type {} for row {}:", tableId, col.name(), col.typeName(), row, e); } } } return result; }; } return null; } protected int[] indexesForColumns(List columns) { int[] recordIndexes = new int[columns.size()]; AtomicInteger i = new AtomicInteger(0); columns.forEach(column -> { recordIndexes[i.getAndIncrement()] = column.position() - 1; // position is 1-based, indexes 0-based }); return recordIndexes; } protected Field[] fieldsForColumns(Schema schema, List columns) { Field[] fields = new Field[columns.size()]; AtomicInteger i = new AtomicInteger(0); columns.forEach(column -> { Field field = schema.field(fieldNamer.fieldNameFor(column)); // may be null if the field is unused ... fields[i.getAndIncrement()] = field; }); return fields; } /** * Obtain the array of converters for each column in a row. A converter might be null if the column is not be included in * the records. * * @param schema the schema; may not be null * @param tableId the identifier of the table that contains the columns * @param columns the columns in the row; may not be null * @param mappers the mapping functions for columns; may be null if none of the columns are to be mapped to different values * @return the converters for each column in the rows; never null */ protected ValueConverter[] convertersForColumns(Schema schema, TableId tableId, List columns, ColumnMappers mappers) { ValueConverter[] converters = new ValueConverter[columns.size()]; for (int i = 0; i < columns.size(); i++) { Column column = columns.get(i); ValueConverter converter = createValueConverterFor(tableId, column, schema.field(fieldNamer.fieldNameFor(column))); converter = wrapInMappingConverterIfNeeded(mappers, tableId, column, converter); if (converter == null) { LOGGER.warn( "No converter found for column {}.{} of type {}. The column will not be part of change events for that table.", tableId, column.name(), column.typeName()); } // may be null if no converter found converters[i] = converter; } return converters; } private ValueConverter wrapInMappingConverterIfNeeded(ColumnMappers mappers, TableId tableId, Column column, ValueConverter converter) { if (mappers == null || converter == null) { return converter; } ValueConverter mappingConverter = mappers.mappingConverterFor(tableId, column); if (mappingConverter == null) { return converter; } return (value) -> mappingConverter.convert(converter.convert(value)); } /** * Add to the supplied {@link SchemaBuilder} a field for the column with the given information. * * @param builder the schema builder; never null * @param table the table definition; never null * @param column the column definition * @param mapper the mapping function for the column; may be null if the columns is not to be mapped to different values */ protected void addField(SchemaBuilder builder, Table table, Column column, ColumnMapper mapper) { final SchemaBuilder fieldBuilder = customConverterRegistry.registerConverterFor(table.id(), column) .orElse(valueConverterProvider.schemaBuilder(column)); if (fieldBuilder != null) { if (mapper != null) { // Let the mapper add properties to the schema ... mapper.alterFieldSchema(column, fieldBuilder); } if (column.isOptional()) { fieldBuilder.optional(); } // if the default value is provided if (column.hasDefaultValue()) { fieldBuilder .defaultValue(customConverterRegistry.getValueConverter(table.id(), column).orElse(ValueConverter.passthrough()).convert(column.defaultValue())); } builder.field(fieldNamer.fieldNameFor(column), fieldBuilder.build()); if (LOGGER.isDebugEnabled()) { LOGGER.debug("- field '{}' ({}{}) from column {}", column.name(), builder.isOptional() ? "OPTIONAL " : "", fieldBuilder.type(), column); } } else { LOGGER.warn("Unexpected JDBC type '{}' for column '{}' that will be ignored", column.jdbcType(), column.name()); } } /** * Create a {@link ValueConverter} that can be used to convert row values for the given column into the Kafka Connect value * object described by the {@link Field field definition}. This uses the supplied {@link ValueConverterProvider} object. * * @param tableId the id of the table containing the column; never null * @param column the column describing the input values; never null * @param fieldDefn the definition for the field in a Kafka Connect {@link Schema} describing the output of the function; * never null * @return the value conversion function; may not be null */ protected ValueConverter createValueConverterFor(TableId tableId, Column column, Field fieldDefn) { return customConverterRegistry.getValueConverter(tableId, column).orElse(valueConverterProvider.converter(column, fieldDefn)); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy