com.spotify.dbeam.avro.JdbcAvroSchema Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of dbeam-core Show documentation
Show all versions of dbeam-core Show documentation
Top level DBeam core implementation
/*-
* -\-\-
* DBeam Core
* --
* Copyright (C) 2016 - 2018 Spotify AB
* --
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* -/-/-
*/
package com.spotify.dbeam.avro;
import static java.sql.Types.ARRAY;
import static java.sql.Types.BIGINT;
import static java.sql.Types.BINARY;
import static java.sql.Types.BIT;
import static java.sql.Types.BLOB;
import static java.sql.Types.BOOLEAN;
import static java.sql.Types.CHAR;
import static java.sql.Types.CLOB;
import static java.sql.Types.DATE;
import static java.sql.Types.DOUBLE;
import static java.sql.Types.FLOAT;
import static java.sql.Types.INTEGER;
import static java.sql.Types.LONGNVARCHAR;
import static java.sql.Types.LONGVARBINARY;
import static java.sql.Types.LONGVARCHAR;
import static java.sql.Types.NCHAR;
import static java.sql.Types.REAL;
import static java.sql.Types.SMALLINT;
import static java.sql.Types.TIME;
import static java.sql.Types.TIMESTAMP;
import static java.sql.Types.TIME_WITH_TIMEZONE;
import static java.sql.Types.TINYINT;
import static java.sql.Types.VARBINARY;
import static java.sql.Types.VARCHAR;
import com.spotify.dbeam.args.QueryBuilderArgs;
import java.sql.Connection;
import java.sql.JDBCType;
import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.Optional;
import org.apache.avro.Schema;
import org.apache.avro.SchemaBuilder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class JdbcAvroSchema {
private static final Logger LOGGER = LoggerFactory.getLogger(JdbcAvroSchema.class);
public static Schema createSchemaByReadingOneRow(
final Connection connection,
final QueryBuilderArgs queryBuilderArgs,
final String avroSchemaNamespace,
final Optional schemaName,
final String avroDoc,
final boolean useLogicalTypes)
throws SQLException {
LOGGER.debug("Creating Avro schema based on the first read row from the database");
try (Statement statement = connection.createStatement()) {
final ResultSet resultSet = statement.executeQuery(queryBuilderArgs.sqlQueryWithLimitOne());
resultSet.next();
final Schema schema =
createAvroSchema(
resultSet,
avroSchemaNamespace,
connection.getMetaData().getURL(),
schemaName,
avroDoc,
useLogicalTypes);
LOGGER.info("Schema created successfully. Generated schema: {}", schema.toString());
return schema;
}
}
public static Schema createAvroSchema(
final ResultSet resultSet,
final String avroSchemaNamespace,
final String connectionUrl,
final Optional maybeSchemaName,
final String avroDoc,
final boolean useLogicalTypes)
throws SQLException {
final ResultSetMetaData meta = resultSet.getMetaData();
final String tableName = getDatabaseTableName(meta);
final String schemaName = maybeSchemaName.orElse(tableName);
final SchemaBuilder.FieldAssembler builder =
SchemaBuilder.record(schemaName)
.namespace(avroSchemaNamespace)
.doc(avroDoc)
.prop("tableName", tableName)
.prop("connectionUrl", connectionUrl)
.fields();
return createAvroFields(resultSet, builder, useLogicalTypes).endRecord();
}
static String getDatabaseTableName(final ResultSetMetaData meta) throws SQLException {
final String defaultTableName = "no_table_name";
for (int i = 1; i <= meta.getColumnCount(); i++) {
String metaTableName = meta.getTableName(i);
if (metaTableName != null && !metaTableName.isEmpty()) {
return normalizeForAvro(metaTableName);
}
}
return defaultTableName;
}
private static SchemaBuilder.FieldAssembler createAvroFields(
final ResultSet resultSet,
final SchemaBuilder.FieldAssembler builder,
final boolean useLogicalTypes)
throws SQLException {
ResultSetMetaData meta = resultSet.getMetaData();
for (int i = 1; i <= meta.getColumnCount(); i++) {
final String columnName;
if (meta.getColumnName(i).isEmpty()) {
columnName = meta.getColumnLabel(i);
} else {
columnName = meta.getColumnName(i);
}
final int columnType = meta.getColumnType(i);
final String typeName = JDBCType.valueOf(columnType).getName();
final String columnClassName = meta.getColumnClassName(i);
final String columnTypeName = meta.getColumnTypeName(i);
SchemaBuilder.FieldBuilder field =
builder
.name(normalizeForAvro(columnName))
.doc(String.format("From sqlType %d %s (%s)", columnType, typeName, columnClassName))
.prop("columnName", columnName)
.prop("sqlCode", String.valueOf(columnType))
.prop("typeName", typeName)
.prop("columnClassName", columnClassName);
if (columnTypeName != null) {
field = field.prop("columnTypeName", columnTypeName);
}
final SchemaBuilder.BaseTypeBuilder<
SchemaBuilder.UnionAccumulator>>
fieldSchemaBuilder = field.type().unionOf().nullBuilder().endNull().and();
Integer arrayItemType = resultSet.isFirst() && columnType == ARRAY
? resultSet.getArray(i).getBaseType() : null;
final SchemaBuilder.UnionAccumulator> schemaFieldAssembler =
setAvroColumnType(
columnType,
arrayItemType,
meta.getPrecision(i),
columnClassName,
useLogicalTypes,
fieldSchemaBuilder);
schemaFieldAssembler.endUnion().nullDefault();
}
return builder;
}
/**
* Creates Avro field schema based on JDBC MetaData
*
* For database specific types implementation, check the following:
*
*
* - {@link org.postgresql.jdbc.TypeInfoCache }
*
- {@link com.mysql.cj.MysqlType }
*
- org.h2.value.Value
*
*
*/
private static SchemaBuilder.UnionAccumulator>
setAvroColumnType(
final int columnType,
final Integer arrayItemType,
final int precision,
final String columnClassName,
final boolean useLogicalTypes,
final SchemaBuilder.BaseTypeBuilder<
SchemaBuilder.UnionAccumulator>>
field) {
switch (columnType) {
case VARCHAR:
case CHAR:
case CLOB:
case LONGNVARCHAR:
case LONGVARCHAR:
case NCHAR:
return field.stringType();
case BIGINT:
return field.longType();
case INTEGER:
case SMALLINT:
case TINYINT:
if (Long.class.getCanonicalName().equals(columnClassName)) {
return field.longType();
} else {
return field.intType();
}
case TIMESTAMP:
case DATE:
case TIME:
case TIME_WITH_TIMEZONE:
if (useLogicalTypes) {
return field.longBuilder().prop("logicalType", "timestamp-millis").endLong();
} else {
return field.longType();
}
case BOOLEAN:
return field.booleanType();
case BIT:
// Note that bit types can take a param/typemod qualifying its length
// some further docs:
// https://www.postgresql.org/docs/8.2/datatype-bit.html
if (precision <= 1) {
return field.booleanType();
} else {
return field.bytesType();
}
case ARRAY:
return setAvroColumnType(arrayItemType, null, precision, columnClassName,
useLogicalTypes, field.array().items());
case BINARY:
case VARBINARY:
case LONGVARBINARY:
case BLOB:
return field.bytesType();
case DOUBLE:
return field.doubleType();
case FLOAT:
case REAL:
return field.floatType();
default:
return field.stringType();
}
}
private static String normalizeForAvro(final String input) {
return input.replaceAll("[^A-Za-z0-9_]", "_");
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy