All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.gobblin.source.jdbc.TeradataExtractor Maven / Gradle / Ivy

Go to download

A distributed data integration framework for streaming and batch data ecosystems.

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.gobblin.source.jdbc;

import java.io.IOException;
import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

import lombok.extern.slf4j.Slf4j;

import static com.google.common.base.Strings.isNullOrEmpty;
import static com.google.common.base.Preconditions.checkArgument;
import com.google.common.collect.ImmutableMap;
import com.google.gson.Gson;
import com.google.gson.JsonArray;
import com.google.gson.JsonElement;
import com.google.gson.JsonObject;

import org.apache.gobblin.configuration.ConfigurationKeys;
import org.apache.gobblin.configuration.WorkUnitState;
import org.apache.gobblin.source.extractor.DataRecordException;
import org.apache.gobblin.source.extractor.exception.HighWatermarkException;
import org.apache.gobblin.source.extractor.exception.RecordCountException;
import org.apache.gobblin.source.extractor.exception.SchemaException;
import org.apache.gobblin.source.extractor.extract.Command;
import org.apache.gobblin.source.extractor.extract.CommandOutput;
import org.apache.gobblin.source.extractor.schema.Schema;
import org.apache.gobblin.source.extractor.utils.Utils;
import org.apache.gobblin.source.extractor.watermark.Predicate;
import org.apache.gobblin.source.extractor.watermark.WatermarkType;
import org.apache.gobblin.source.workunit.WorkUnit;

/**
 * Teradata extractor using JDBC protocol
 *
 * @author ypopov
 */
@Slf4j
public class TeradataExtractor extends JdbcExtractor {
  private static final String TERADATA_TIMESTAMP_FORMAT = "yyyy-MM-dd HH:mm:ss";
  private static final String TERADATA_DATE_FORMAT = "yyyy-MM-dd";
  private static final String TERADATA_HOUR_FORMAT = "HH";
  private static final long SAMPLE_RECORD_COUNT = -1;
  private static final String ELEMENT_DATA_TYPE = "string";

  private static final String TERADATA_SAMPLE_CLAUSE = " sample ";

  private static final Gson gson = new Gson();

  public TeradataExtractor(WorkUnitState workUnitState) {
    super(workUnitState);
  }

  @Override
  public List getSchemaMetadata(String schema, String entity) throws SchemaException {
    log.debug("Build query to get schema");

    List commands = new ArrayList();

    String inputQuery = this.workUnit.getProp(ConfigurationKeys.SOURCE_QUERYBASED_QUERY);
    String metadataSql, predicate = "1=0";
    if(isNullOrEmpty(inputQuery)) {
      metadataSql = "select * from " + schema + "." + entity;
    } else {
      metadataSql = this.removeSampleClauseFromQuery(inputQuery);
    }

    metadataSql = SqlQueryUtils.addPredicate(metadataSql, predicate);
    commands.add(JdbcExtractor.getCommand(metadataSql, JdbcCommand.JdbcCommandType.QUERY));
    return commands;
  }

  @Override
  public List getHighWatermarkMetadata(String schema, String entity, String watermarkColumn,
      List predicateList) throws HighWatermarkException {
    log.debug("Build query to get high watermark");
    List commands = new ArrayList();

    String columnProjection = "max(" + Utils.getCoalesceColumnNames(watermarkColumn) + ")";
    String watermarkFilter = this.concatPredicates(predicateList);
    String query = this.getExtractSql();

    if (isNullOrEmpty(watermarkFilter)) {
      watermarkFilter = "1=1";
    }
    query = query.replace(this.getOutputColumnProjection(), columnProjection)
        .replace(ConfigurationKeys.DEFAULT_SOURCE_QUERYBASED_WATERMARK_PREDICATE_SYMBOL, watermarkFilter);

    commands.add(JdbcExtractor.getCommand(query, JdbcCommand.JdbcCommandType.QUERY));
    return commands;
  }

  @Override
  public List getCountMetadata(String schema, String entity, WorkUnit workUnit, List predicateList)
      throws RecordCountException {
    log.debug("Build query to get source record count");
    List commands = new ArrayList();

    String columnProjection = "CAST(COUNT(1) AS BIGINT)";
    String watermarkFilter = this.concatPredicates(predicateList);
    String query = this.getExtractSql();

    if (isNullOrEmpty(watermarkFilter)) {
      watermarkFilter = "1=1";
    }
    query = query.replace(this.getOutputColumnProjection(), columnProjection)
        .replace(ConfigurationKeys.DEFAULT_SOURCE_QUERYBASED_WATERMARK_PREDICATE_SYMBOL, watermarkFilter);
    String sampleFilter = this.constructSampleClause();
    query = query + sampleFilter;

    if (!isNullOrEmpty(sampleFilter)) {
      query = "SELECT " + columnProjection + " FROM (" + query.replace(columnProjection, "1 as t") + ") temp";
    }
    commands.add(JdbcExtractor.getCommand(query, JdbcCommand.JdbcCommandType.QUERY));
    return commands;
  }

  @Override
  public List getDataMetadata(String schema, String entity, WorkUnit workUnit, List predicateList)
      throws DataRecordException {
    log.debug("Build query to extract data");
    List commands = new ArrayList();
    int fetchSize = this.workUnitState.getPropAsInt(ConfigurationKeys.SOURCE_QUERYBASED_JDBC_RESULTSET_FETCH_SIZE,
        ConfigurationKeys.DEFAULT_SOURCE_QUERYBASED_JDBC_RESULTSET_FETCH_SIZE);

    String watermarkFilter = this.concatPredicates(predicateList);
    String query = this.getExtractSql();
    if (isNullOrEmpty(watermarkFilter)) {
      watermarkFilter = "1=1";
    }

    query = query.replace(ConfigurationKeys.DEFAULT_SOURCE_QUERYBASED_WATERMARK_PREDICATE_SYMBOL, watermarkFilter);
    String sampleFilter = this.constructSampleClause();
    query = query + sampleFilter;

    commands.add(JdbcExtractor.getCommand(query, JdbcCommand.JdbcCommandType.QUERY));
    commands.add(JdbcExtractor.getCommand(fetchSize, JdbcCommand.JdbcCommandType.FETCHSIZE));
    return commands;
  }

  @Override
  public Map getDataTypeMap() {
    Map dataTypeMap =
        ImmutableMap.builder()
            .put("byteint", "int")
            .put("smallint", "int")
            .put("integer", "int")
            .put("bigint", "long")
            .put("float", "float")
            .put("decimal", "double")
            .put("char", "string")
            .put("varchar", "string")
            .put("byte", "bytes")
            .put("varbyte", "bytes")
            .put("date", "date")
            .put("time", "time")
            .put("timestamp", "timestamp")
            .put("clob", "string")
            .put("blob", "string")
            .put("structured udt", "array")
            .put("double precision", "float")
            .put("numeric", "double")
            .put("real", "float")
            .put("character", "string")
            .put("char varying", "string")
            .put("character varying", "string")
            .put("long varchar", "string")
            .put("interval", "string")
            .build();
    return dataTypeMap;
  }

  @Override
  public Iterator getRecordSetFromSourceApi(String schema, String entity, WorkUnit workUnit,
      List predicateList) throws IOException {
    return null;
  }

  @Override
  public String getConnectionUrl() {
    String urlPrefix = "jdbc:teradata://";
    String host = this.workUnit.getProp(ConfigurationKeys.SOURCE_CONN_HOST_NAME);
    checkArgument(!isNullOrEmpty(host), "Connectionn host cannot be null or empty at %s", ConfigurationKeys.SOURCE_CONN_HOST_NAME);

    String port = this.workUnit.getProp(ConfigurationKeys.SOURCE_CONN_PORT,"1025");
    String database = this.workUnit.getProp(ConfigurationKeys.SOURCE_QUERYBASED_SCHEMA);
    String defaultUrl = urlPrefix + host.trim() + "/TYPE=FASTEXPORT,DATABASE=" + database.trim() + ",DBS_PORT=" + port.trim() ;
    // use custom url from source.conn.host if Teradata jdbc url available
    return host.contains(urlPrefix) ? host.trim() : defaultUrl;
  }

  @Override
  public long extractSampleRecordCountFromQuery(String query) {
    if (isNullOrEmpty(query)) {
      return SAMPLE_RECORD_COUNT;
    }

    long recordcount = SAMPLE_RECORD_COUNT;

    String limit = null;
    String inputQuery = query.toLowerCase();
    int limitIndex = inputQuery.indexOf(TERADATA_SAMPLE_CLAUSE);
    if (limitIndex > 0) {
      limit = query.substring(limitIndex + TERADATA_SAMPLE_CLAUSE.length()).trim();
    }

    if (!isNullOrEmpty(limit)) {
      try {
        recordcount = Long.parseLong(limit);
      } catch (Exception e) {
        log.error("Ignoring incorrect limit value in input query: {}", limit);
      }
    }
    return recordcount;
  }

  @Override
  public String removeSampleClauseFromQuery(String query) {
    if (isNullOrEmpty(query)) {
      return null;
    }
    String limitString = "";
    String inputQuery = query.toLowerCase();
    int limitIndex = inputQuery.indexOf(TERADATA_SAMPLE_CLAUSE);
    if (limitIndex > 0) {
      limitString = query.substring(limitIndex);
    }
    return query.replace(limitString, "");
  }

  @Override
  public String constructSampleClause() {
    long sampleRowCount = this.getSampleRecordCount();
    if (sampleRowCount >= 0) {
      return TERADATA_SAMPLE_CLAUSE + sampleRowCount;
    }
    return "";
  }

  @Override
  public String getWatermarkSourceFormat(WatermarkType watermarkType) {
    String columnFormat = null;
    switch (watermarkType) {
      case TIMESTAMP:
        columnFormat = TERADATA_TIMESTAMP_FORMAT;
        break;
      case DATE:
        columnFormat = TERADATA_DATE_FORMAT;
        break;
      case HOUR:
        columnFormat = TERADATA_HOUR_FORMAT;
        break;
      case SIMPLE:
        break;
      default:
        log.error("Watermark type {} not recognized", watermarkType.toString());
    }
    return columnFormat;
  }

  @Override
  public String getHourPredicateCondition(String column, long value, String valueFormat, String operator) {
    log.debug("Getting hour predicate for Teradata");
    String formattedvalue = Utils.toDateTimeFormat(Long.toString(value), valueFormat, TERADATA_HOUR_FORMAT);
    return Utils.getCoalesceColumnNames(column) + " " + operator + " '" + formattedvalue + "'";
  }

  @Override
  public String getDatePredicateCondition(String column, long value, String valueFormat, String operator) {
    log.debug("Getting date predicate for Teradata");
    String formattedvalue = Utils.toDateTimeFormat(Long.toString(value), valueFormat, TERADATA_DATE_FORMAT);
    return Utils.getCoalesceColumnNames(column) + " " + operator + " '" + formattedvalue + "'";
  }

  @Override
  public String getTimestampPredicateCondition(String column, long value, String valueFormat, String operator) {
    log.debug("Getting timestamp predicate for Teradata");
    String formattedvalue = Utils.toDateTimeFormat(Long.toString(value), valueFormat, TERADATA_TIMESTAMP_FORMAT);
    return Utils.getCoalesceColumnNames(column) + " " + operator + " '" + formattedvalue + "'";
  }

  @Override
  public JsonArray getSchema(CommandOutput response) throws SchemaException, IOException {
    log.debug("Extract schema from resultset");
    ResultSet resultset = null;
    Iterator itr = (Iterator) response.getResults().values().iterator();
    if (itr.hasNext()) {
      resultset = itr.next();
    } else {
      throw new SchemaException("Failed to get schema from Teradata - empty schema resultset");
    }

    JsonArray fieldJsonArray = new JsonArray();
    try {
        Schema schema = new Schema();
        ResultSetMetaData rsmd = resultset.getMetaData();

        String columnName, columnTypeName;
        for (int i = 1; i <= rsmd.getColumnCount(); i++) {
          columnName = rsmd.getColumnName(i);
          columnTypeName = rsmd.getColumnTypeName(i);

          schema.setColumnName(columnName);

          List mapSymbols = null;
          JsonObject newDataType = this.convertDataType(columnName, columnTypeName, ELEMENT_DATA_TYPE, mapSymbols);

          schema.setDataType(newDataType);
          schema.setLength(rsmd.getColumnDisplaySize(i));
          schema.setPrecision(rsmd.getPrecision(i));
          schema.setScale(rsmd.getScale(i));
          schema.setNullable(rsmd.isNullable(i) == ResultSetMetaData.columnNullable);
          schema.setComment(rsmd.getColumnLabel(i));

          String jsonStr = gson.toJson(schema);
          JsonObject obj = gson.fromJson(jsonStr, JsonObject.class).getAsJsonObject();
          fieldJsonArray.add(obj);
        }

    } catch (Exception e) {
      throw new SchemaException("Failed to get schema from Teradaa; error - " + e.getMessage(), e);
    }

    return fieldJsonArray;
  }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy