All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.beam.sdk.io.jdbc.SchemaUtil Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.beam.sdk.io.jdbc;

import static java.sql.JDBCType.BINARY;
import static java.sql.JDBCType.CHAR;
import static java.sql.JDBCType.LONGNVARCHAR;
import static java.sql.JDBCType.LONGVARBINARY;
import static java.sql.JDBCType.LONGVARCHAR;
import static java.sql.JDBCType.NCHAR;
import static java.sql.JDBCType.NVARCHAR;
import static java.sql.JDBCType.VARBINARY;
import static java.sql.JDBCType.VARCHAR;
import static org.apache.beam.sdk.util.Preconditions.checkArgumentNotNull;
import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkArgument;

import java.io.Serializable;
import java.sql.Array;
import java.sql.JDBCType;
import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
import java.sql.SQLException;
import java.sql.Time;
import java.sql.Timestamp;
import java.time.ZoneOffset;
import java.time.ZonedDateTime;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.EnumMap;
import java.util.List;
import java.util.Objects;
import java.util.TimeZone;
import java.util.UUID;
import java.util.function.BiFunction;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import org.apache.beam.sdk.schemas.Schema;
import org.apache.beam.sdk.schemas.Schema.FieldType;
import org.apache.beam.sdk.schemas.logicaltypes.FixedPrecisionNumeric;
import org.apache.beam.sdk.schemas.logicaltypes.FixedString;
import org.apache.beam.sdk.schemas.logicaltypes.VariableBytes;
import org.apache.beam.sdk.schemas.logicaltypes.VariableString;
import org.apache.beam.sdk.values.Row;
import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableMap;
import org.checkerframework.checker.nullness.qual.Nullable;
import org.joda.time.DateTime;
import org.joda.time.LocalDate;
import org.joda.time.chrono.ISOChronology;

/** Provides utility functions for working with Beam {@link Schema} types. */
public class SchemaUtil {
  /**
   * Interface implemented by functions that extract values of different types from a JDBC
   * ResultSet.
   */
  @FunctionalInterface
  interface ResultSetFieldExtractor extends Serializable {
    @Nullable
    Object extract(ResultSet rs, Integer index) throws SQLException;
  }
  // ResultSetExtractors for primitive schema types (excluding arrays, structs and logical types).
  private static final EnumMap
      RESULTSET_FIELD_EXTRACTORS =
          new EnumMap<>(
              ImmutableMap.builder()
                  .put(Schema.TypeName.BOOLEAN, ResultSet::getBoolean)
                  .put(Schema.TypeName.BYTE, ResultSet::getByte)
                  .put(Schema.TypeName.BYTES, ResultSet::getBytes)
                  .put(Schema.TypeName.DATETIME, ResultSet::getTimestamp)
                  .put(Schema.TypeName.DECIMAL, ResultSet::getBigDecimal)
                  .put(Schema.TypeName.DOUBLE, ResultSet::getDouble)
                  .put(Schema.TypeName.FLOAT, ResultSet::getFloat)
                  .put(Schema.TypeName.INT16, ResultSet::getShort)
                  .put(Schema.TypeName.INT32, ResultSet::getInt)
                  .put(Schema.TypeName.INT64, ResultSet::getLong)
                  .put(Schema.TypeName.STRING, ResultSet::getString)
                  .build());

  private static final ResultSetFieldExtractor DATE_EXTRACTOR = createDateExtractor();
  private static final ResultSetFieldExtractor TIME_EXTRACTOR = createTimeExtractor();
  private static final ResultSetFieldExtractor TIMESTAMP_EXTRACTOR = createTimestampExtractor();
  private static final ResultSetFieldExtractor OBJECT_EXTRACTOR = createObjectExtractor();

  /**
   * Interface implemented by functions that create Beam {@link
   * org.apache.beam.sdk.schemas.Schema.Field} corresponding to JDBC field metadata.
   */
  @FunctionalInterface
  interface BeamFieldConverter extends Serializable {
    Schema.Field create(int index, ResultSetMetaData md) throws SQLException;
  }

  private static BeamFieldConverter jdbcTypeToBeamFieldConverter(
      JDBCType jdbcType, String className) {
    switch (jdbcType) {
      case ARRAY:
        return beamArrayField();
      case BIGINT:
        return beamFieldOfType(Schema.FieldType.INT64);
      case BINARY:
        return beamLogicalField(BINARY.getName(), LogicalTypes::fixedOrVariableBytes);
      case BIT:
        return beamFieldOfType(LogicalTypes.JDBC_BIT_TYPE);
      case BOOLEAN:
        return beamFieldOfType(Schema.FieldType.BOOLEAN);
      case CHAR:
        return beamLogicalField(CHAR.getName(), FixedString::of);
      case DATE:
        return beamFieldOfType(LogicalTypes.JDBC_DATE_TYPE);
      case DECIMAL:
        return beamFieldOfType(Schema.FieldType.DECIMAL);
      case DOUBLE:
        return beamFieldOfType(Schema.FieldType.DOUBLE);
      case FLOAT:
        return beamFieldOfType(LogicalTypes.JDBC_FLOAT_TYPE);
      case INTEGER:
        return beamFieldOfType(Schema.FieldType.INT32);
      case LONGNVARCHAR:
        return beamLogicalField(LONGNVARCHAR.getName(), VariableString::of);
      case LONGVARBINARY:
        return beamLogicalField(LONGVARBINARY.getName(), VariableBytes::of);
      case LONGVARCHAR:
        return beamLogicalField(LONGVARCHAR.getName(), VariableString::of);
      case NCHAR:
        return beamLogicalField(NCHAR.getName(), FixedString::of);
      case NUMERIC:
        return beamLogicalNumericField();
      case NVARCHAR:
        return beamLogicalField(NVARCHAR.getName(), VariableString::of);
      case REAL:
        return beamFieldOfType(Schema.FieldType.FLOAT);
      case SMALLINT:
        return beamFieldOfType(Schema.FieldType.INT16);
      case TIME:
        return beamFieldOfType(LogicalTypes.JDBC_TIME_TYPE);
      case TIMESTAMP:
        return beamFieldOfType(Schema.FieldType.DATETIME);
      case TIMESTAMP_WITH_TIMEZONE:
        return beamFieldOfType(LogicalTypes.JDBC_TIMESTAMP_WITH_TIMEZONE_TYPE);
      case TINYINT:
        return beamFieldOfType(Schema.FieldType.BYTE);
      case VARBINARY:
        return beamLogicalField(VARBINARY.getName(), VariableBytes::of);
      case VARCHAR:
        return beamLogicalField(VARCHAR.getName(), VariableString::of);
      case BLOB:
        return beamFieldOfType(FieldType.BYTES);
      case CLOB:
        return beamFieldOfType(FieldType.STRING);
      case OTHER:
      case JAVA_OBJECT:
        if (UUID.class.getName().equals(className)) {
          return beamFieldOfType(LogicalTypes.JDBC_UUID_TYPE);
        }
        return beamFieldOfType(LogicalTypes.OTHER_AS_STRING_TYPE);
      default:
        throw new UnsupportedOperationException(
            "Converting " + jdbcType + " to Beam schema type is not supported");
    }
  }

  /** Infers the Beam {@link Schema} from {@link ResultSetMetaData}. */
  public static Schema toBeamSchema(ResultSetMetaData md) throws SQLException {
    Schema.Builder schemaBuilder = Schema.builder();

    for (int i = 1; i <= md.getColumnCount(); i++) {
      JDBCType jdbcType = JDBCType.valueOf(md.getColumnType(i));
      String className = md.getColumnClassName(i);
      BeamFieldConverter fieldConverter = jdbcTypeToBeamFieldConverter(jdbcType, className);
      schemaBuilder.addField(fieldConverter.create(i, md));
    }

    return schemaBuilder.build();
  }

  /** Converts a primitive JDBC field to corresponding Beam schema field. */
  private static BeamFieldConverter beamFieldOfType(Schema.FieldType fieldType) {
    return (index, md) -> {
      String label = md.getColumnLabel(index);
      return Schema.Field.of(label, fieldType)
          .withNullable(md.isNullable(index) == ResultSetMetaData.columnNullable);
    };
  }

  /** Converts logical types with arguments such as VARCHAR(25). */
  private static  BeamFieldConverter beamLogicalField(
      String identifier,
      BiFunction> constructor) {
    return (index, md) -> {
      int size = md.getPrecision(index);
      Schema.FieldType fieldType =
          Schema.FieldType.logicalType(constructor.apply(identifier, size));
      return beamFieldOfType(fieldType).create(index, md);
    };
  }

  /**
   * Converts numeric fields with specified precision and scale to {@link FixedPrecisionNumeric}. If
   * a precision of numeric field is not specified, then converts such field to {@link
   * FieldType#DECIMAL}.
   */
  private static BeamFieldConverter beamLogicalNumericField() {
    return (index, md) -> {
      int precision = md.getPrecision(index);
      if (precision == Integer.MAX_VALUE || precision == -1) {
        // If a precision is not specified, the column stores values as given (e.g. in Oracle DB)
        return Schema.Field.of(md.getColumnLabel(index), FieldType.DECIMAL)
            .withNullable(md.isNullable(index) == ResultSetMetaData.columnNullable);
      }
      int scale = md.getScale(index);
      Schema.FieldType fieldType =
          Schema.FieldType.logicalType(FixedPrecisionNumeric.of(precision, scale));
      return beamFieldOfType(fieldType).create(index, md);
    };
  }

  /** Converts array fields. */
  private static BeamFieldConverter beamArrayField() {
    return (index, md) -> {
      JDBCType elementJdbcType = JDBCType.valueOf(md.getColumnTypeName(index));
      String elementClassName = md.getColumnClassName(index);
      BeamFieldConverter elementFieldConverter =
          jdbcTypeToBeamFieldConverter(elementJdbcType, elementClassName);

      String label = md.getColumnLabel(index);
      Schema.FieldType elementBeamType = elementFieldConverter.create(index, md).getType();
      return Schema.Field.of(label, Schema.FieldType.array(elementBeamType))
          .withNullable(md.isNullable(index) == ResultSetMetaData.columnNullable);
    };
  }

  /** Creates a {@link ResultSetFieldExtractor} for the given type. */
  private static ResultSetFieldExtractor createFieldExtractor(Schema.FieldType fieldType) {
    Schema.TypeName typeName = fieldType.getTypeName();
    switch (typeName) {
      case ARRAY:
      case ITERABLE:
        Schema.FieldType elementType = checkArgumentNotNull(fieldType.getCollectionElementType());
        ResultSetFieldExtractor elementExtractor = createFieldExtractor(elementType);
        return createArrayExtractor(elementExtractor);
      case DATETIME:
        return TIMESTAMP_EXTRACTOR;
      case LOGICAL_TYPE:
        return createLogicalTypeExtractor(checkArgumentNotNull(fieldType.getLogicalType()));
      default:
        if (!RESULTSET_FIELD_EXTRACTORS.containsKey(typeName)) {
          throw new UnsupportedOperationException(
              "BeamRowMapper does not have support for fields of type " + fieldType);
        }
        return RESULTSET_FIELD_EXTRACTORS.get(typeName);
    }
  }

  /** Creates a {@link ResultSetFieldExtractor} for array types. */
  private static ResultSetFieldExtractor createArrayExtractor(
      ResultSetFieldExtractor elementExtractor) {
    return (rs, index) -> {
      Array arrayVal = rs.getArray(index);
      if (arrayVal == null) {
        return null;
      }

      List<@Nullable Object> arrayElements = new ArrayList<>();
      ResultSet arrayRs = checkArgumentNotNull(arrayVal.getResultSet());
      while (arrayRs.next()) {
        arrayElements.add(elementExtractor.extract(arrayRs, 1));
      }
      return arrayElements;
    };
  }

  /** Creates a {@link ResultSetFieldExtractor} for logical types. */
  private static  ResultSetFieldExtractor createLogicalTypeExtractor(
      final Schema.LogicalType fieldType) {
    String logicalTypeName = fieldType.getIdentifier();

    if (Objects.equals(fieldType, LogicalTypes.JDBC_UUID_TYPE.getLogicalType())) {
      return OBJECT_EXTRACTOR;
    } else if (logicalTypeName.equals("DATE")) {
      return DATE_EXTRACTOR;
    } else if (logicalTypeName.equals("TIME")) {
      return TIME_EXTRACTOR;
    } else if (logicalTypeName.equals("TIMESTAMP_EXTRACTOR")) {
      return TIMESTAMP_EXTRACTOR;
    } else {
      ResultSetFieldExtractor extractor = createFieldExtractor(fieldType.getBaseType());
      return (rs, index) -> {
        BaseT v = (BaseT) extractor.extract(rs, index);
        if (v == null) {
          return null;
        }
        return fieldType.toInputType(v);
      };
    }
  }

  /** Convert SQL date type to Beam DateTime. */
  private static ResultSetFieldExtractor createDateExtractor() {
    return (rs, i) -> {
      // TODO(https://github.com/apache/beam/issues/19215) import when joda LocalDate is removed.
      java.time.LocalDate date = rs.getObject(i, java.time.LocalDate.class);
      if (date == null) {
        return null;
      }
      ZonedDateTime zdt = date.atStartOfDay(ZoneOffset.UTC);
      return new DateTime(zdt.toInstant().toEpochMilli(), ISOChronology.getInstanceUTC());
    };
  }

  /** Convert SQL time type to Beam DateTime. */
  private static ResultSetFieldExtractor createTimeExtractor() {
    return (rs, i) -> {
      Time time = rs.getTime(i, Calendar.getInstance(TimeZone.getTimeZone(ZoneOffset.UTC)));
      if (time == null) {
        return null;
      }
      return new DateTime(time.getTime(), ISOChronology.getInstanceUTC())
          .withDate(new LocalDate(0L));
    };
  }

  /** Convert SQL timestamp type to Beam DateTime. */
  private static ResultSetFieldExtractor createTimestampExtractor() {
    return (rs, i) -> {
      Timestamp ts = rs.getTimestamp(i, Calendar.getInstance(TimeZone.getTimeZone(ZoneOffset.UTC)));
      if (ts == null) {
        return null;
      }
      return new DateTime(ts.toInstant().toEpochMilli(), ISOChronology.getInstanceUTC());
    };
  }

  /** Convert SQL OTHER type to Beam Object. */
  private static ResultSetFieldExtractor createObjectExtractor() {
    return ResultSet::getObject;
  }

  /**
   * A {@link org.apache.beam.sdk.io.jdbc.JdbcIO.RowMapper} implementation that converts JDBC
   * results into Beam {@link Row} objects.
   */
  public static final class BeamRowMapper implements JdbcIO.RowMapper {
    private final Schema schema;
    private final List fieldExtractors;

    public static BeamRowMapper of(Schema schema) {
      List fieldExtractors =
          IntStream.range(0, schema.getFieldCount())
              .mapToObj(i -> createFieldExtractor(schema.getField(i).getType()))
              .collect(Collectors.toList());

      return new BeamRowMapper(schema, fieldExtractors);
    }

    private BeamRowMapper(Schema schema, List fieldExtractors) {
      this.schema = schema;
      this.fieldExtractors = fieldExtractors;
    }

    @Override
    public Row mapRow(ResultSet rs) throws Exception {
      Row.Builder rowBuilder = Row.withSchema(schema);
      for (int i = 0; i < schema.getFieldCount(); i++) {
        Object value = fieldExtractors.get(i).extract(rs, i + 1);
        if (rs.wasNull()) {
          rowBuilder.addValue(null);
        } else {
          rowBuilder.addValue(value);
        }
      }
      return rowBuilder.build();
    }
  }

  /**
   * compares two fields. Does not compare nullability of field types.
   *
   * @param a field 1
   * @param b field 2
   * @return TRUE if fields are equal. Otherwise FALSE
   */
  public static boolean compareSchemaField(Schema.Field a, Schema.Field b) {
    if (!a.getName().equalsIgnoreCase(b.getName())) {
      return false;
    }

    return compareSchemaFieldType(a.getType(), b.getType());
  }

  /**
   * checks nullability for fields.
   *
   * @param fields list of fields
   * @return TRUE if any field is not nullable
   */
  static boolean checkNullabilityForFields(List fields) {
    return fields.stream().anyMatch(field -> !field.getType().getNullable());
  }

  /**
   * Compares two FieldType. Modified from FieldType.equals to ignore nullability.
   *
   * @param a FieldType 1
   * @param b FieldType 2
   * @return TRUE if FieldType are equal. Otherwise FALSE
   */
  static boolean compareSchemaFieldType(Schema.FieldType a, Schema.FieldType b) {
    if (a.getTypeName().equals(b.getTypeName())) {
      if (!a.getTypeName().isLogicalType()) {
        return true;
      } else {
        Schema.LogicalType aLogicalType = checkArgumentNotNull(a.getLogicalType());
        Schema.LogicalType bLogicalType = checkArgumentNotNull(b.getLogicalType());
        return compareSchemaFieldType(aLogicalType.getBaseType(), bLogicalType.getBaseType());
      }
    } else if (a.getTypeName().isLogicalType()) {
      Schema.LogicalType aLogicalType = checkArgumentNotNull(a.getLogicalType());
      return aLogicalType.getBaseType().getTypeName().equals(b.getTypeName());
    } else if (b.getTypeName().isLogicalType()) {
      Schema.LogicalType bLogicalType = checkArgumentNotNull(b.getLogicalType());
      return bLogicalType.getBaseType().getTypeName().equals(a.getTypeName());
    }
    return false;
  }

  static class FieldWithIndex implements Serializable {
    private final Schema.Field field;
    private final Integer index;

    private FieldWithIndex(Schema.Field field, Integer index) {
      this.field = field;
      this.index = index;
    }

    static FieldWithIndex of(Schema.Field field, Integer index) {
      checkArgument(field != null);
      checkArgument(index != null);
      return new FieldWithIndex(field, index);
    }

    public Schema.Field getField() {
      return field;
    }

    public Integer getIndex() {
      return index;
    }
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy