All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.sqoop.manager.oracle.OraOopConnManager Maven / Gradle / Ivy

The newest version!
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.sqoop.manager.oracle;

import java.io.IOException;
import java.sql.Connection;
import java.sql.SQLException;
import java.sql.Timestamp;
import java.sql.Types;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.OutputFormat;
import org.apache.sqoop.manager.OracleManager;

import com.cloudera.sqoop.SqoopOptions;
import com.cloudera.sqoop.manager.ExportJobContext;
import com.cloudera.sqoop.manager.GenericJdbcManager;
import com.cloudera.sqoop.manager.ImportJobContext;
import com.cloudera.sqoop.mapreduce.JdbcExportJob;
import com.cloudera.sqoop.mapreduce.JdbcUpdateExportJob;
import com.cloudera.sqoop.util.ExportException;
import com.cloudera.sqoop.util.ImportException;

/**
 * OraOop manager for high performance Oracle import / export.
 * NOTES:
 *   Escaping Column Names:
 *   ----------------------
 *   There are 3 main queries that occur during a Sqoop import.
 *   (1) Selecting columns to obtain their data-type via getColTypesQuery();
 *   (2) selecting column names via getColNamesQuery(); and
 *   (3) getting the data during the import via
 *       OraOopDBRecordReader.getSelectQuery();
 *   In each of these queries, we'd ideally escape the column names so that
 *   Oracle columns that require this work okay.
 *   Unfortunately we can't do this, because if the user specifies column
 *   names via the "--columns" clause, these names will be used (verbatim)
 *   during OraOopDBRecordReader.getSelectQuery(). This means that we could
 *   only escape the column names during OraOopDBRecordReader.getSelectQuery()
 *   if the user entered them in the correct case.
 *   Therefore, escapeColName() in this class does not actually do anything so
 *   that OraOopDBRecordReader.getSelectQuery() generates a valid SQL statement
 *   when the user utilises the "--columns" clause.
 *   However, getColTypesQuery() and getColNamesQuery() do escape column names
 *   via the method escapeOracleColumnName(). We also get getColumnTypes() to
 *   unescape the column names so that Sqoop has the most accurate column
 *   name strings.
 */
public class OraOopConnManager extends GenericJdbcManager {

  public static final OraOopLog LOG = OraOopLogFactory
      .getLog(OraOopConnManager.class.getName());
  private List columnNamesInOracleTable = null;
  private Map columnTypesInOracleTable = null;
  private final String timestampJavaType;

  public OraOopConnManager(final SqoopOptions sqoopOptions) {
    super(OraOopConstants.ORACLE_JDBC_DRIVER_CLASS, sqoopOptions);
    if (this.options.getConf().getBoolean(
        OraOopConstants.ORAOOP_MAP_TIMESTAMP_AS_STRING,
        OraOopConstants.ORAOOP_MAP_TIMESTAMP_AS_STRING_DEFAULT)) {
      timestampJavaType = "String";
    } else {
      timestampJavaType = super.toJavaType(Types.TIMESTAMP);
    }
  }

  @Override
  protected Connection makeConnection() throws SQLException {

    String connectStr = this.options.getConnectString();
    String username = this.options.getUsername();
    String password = this.options.getPassword();
    Properties additionalProps = this.options.getConnectionParams();

    Connection connection =
        OracleConnectionFactory.createOracleJdbcConnection(this
            .getDriverClass(), connectStr, username, password, additionalProps);
    if (username == null) {
      username = OracleManager.getSessionUser(connection);
    }
    OraOopUtilities.setCurrentSessionUser(username);
    return connection;
  }

  @Override
  public void close() throws SQLException {

    super.close();
  }

  private List getColumnNamesInOracleTable(String tableName) {

    if (this.columnNamesInOracleTable == null) {

      OracleTable tableContext = null;

      try {
        tableContext = getOracleTableContext();

        Configuration conf = this.options.getConf();

        this.columnNamesInOracleTable =
          OraOopOracleQueries.getTableColumnNames(
            getConnection(),
            tableContext,
            OraOopUtilities
              .omitLobAndLongColumnsDuringImport(conf),
            OraOopUtilities
              .recallSqoopJobType(conf),
            true, // <- onlyOraOopSupportedTypes
            true, // <- omitOraOopPseudoColumns
            options.isOracleEscapingDisabled()
          );
      } catch (SQLException ex) {
        throw new RuntimeException(ex);
      }
    }

    // Return a copy of our list, so the original will not be inadvertently
    // altered...
    return OraOopUtilities.copyStringList(this.columnNamesInOracleTable);
  }

  protected List getSelectedColumnNamesInOracleTable(String tableName) {

    List colNamesInTable = getColumnNamesInOracleTable(tableName);

    String[] selectedColumns = this.options.getColumns();
    if (selectedColumns != null && selectedColumns.length > 0) {

      for (int idx = 0; idx < selectedColumns.length; idx++) {

        String selectedColumn = selectedColumns[idx];
        // If the user did not escape this column name, then we should
        // uppercase it...
        if (!OracleUtils.isEscaped(selectedColumn)) {
          selectedColumns[idx] = selectedColumn.toUpperCase();
        } else {
          // If the user escaped this column name, then we should
          // retain its case...
          selectedColumns[idx] = OracleUtils.unescapeIdentifier(selectedColumn);
        }
      }

      // Ensure there are no duplicated column names...
      String[] duplicates =
          OraOopUtilities
              .getDuplicatedStringArrayValues(selectedColumns, false);
      if (duplicates.length > 0) {
        StringBuilder msg = new StringBuilder();
        msg.append("The following column names have been duplicated in the ");
        msg.append("\"--columns\" clause:\n");

        for (String duplicate : duplicates) {
          msg.append("\t" + duplicate + "\n");
        }

        throw new RuntimeException(msg.toString());
      }

      // Ensure the user selected column names that actually exist...
      for (String selectedColumn : selectedColumns) {
        if (!colNamesInTable.contains(selectedColumn)) {
          OracleTable tableContext = getOracleTableContext();
          throw new RuntimeException(String.format(
              "The column named \"%s\" does not exist within the table"
                  + "%s (or is of an unsupported data-type).", selectedColumn,
              tableContext.toString()));
        }
      }

      // Remove any columns (that exist in the table) that were not
      // selected by the user...
      for (int idx = colNamesInTable.size() - 1; idx >= 0; idx--) {
        String colName = colNamesInTable.get(idx);
        if (!OraOopUtilities.stringArrayContains(selectedColumns, colName,
            false)) {
          colNamesInTable.remove(idx);
        }
      }
    }

    // To assist development/testing of Oracle data-types, you can use this
    // to limit the number of columns from the table...
    int columnNameLimit =
        this.options.getConf().getInt("oraoop.column.limit", 0);
    if (columnNameLimit > 0) {
      columnNameLimit = Math.min(columnNameLimit, colNamesInTable.size());
      colNamesInTable = colNamesInTable.subList(0, columnNameLimit);
    }

    return colNamesInTable;
  }

  @Override
  protected String getColTypesQuery(String tableName) {

    List colNames = getSelectedColumnNamesInOracleTable(tableName);

    StringBuilder sb = new StringBuilder();
    sb.append("SELECT ");
    for (int idx = 0; idx < colNames.size(); idx++) {
      if (idx > 0) {
        sb.append(",");
      }
      sb.append(escapeColName(colNames.get(idx))); // <- See notes at
                                                            // top about escaped
                                                            // column names
    }
    sb.append(String.format(" FROM %s WHERE 0=1", tableName));

    return sb.toString();
  }

  @Override
  protected String getColNamesQuery(String tableName) {

    // NOTE: This code is similar to getColTypesQuery() - except the
    // escaping of column names and table name differs.

    List colNames = getSelectedColumnNamesInOracleTable(tableName);

    StringBuilder sb = new StringBuilder();
    sb.append("SELECT ");
    for (int idx = 0; idx < colNames.size(); idx++) {
      if (idx > 0) {
        sb.append(",");
      }
      sb.append(escapeColName(colNames.get(idx))); // <- See notes at top about
                                                   // escaped column names
    }
    sb.append(String.format(" FROM %s WHERE 1=0", escapeTableName(tableName)));

    return sb.toString();
  }

  @Override
  protected String getSplitColumn(SqoopOptions opts, String tableName) {

    // If we're importing an Oracle table and will be generating
    // "splits" based on its Oracle data-files, we don't actually require
    // a primary key to exist, or for the user to identify the split-column.
    // As a consequence, return "NotRequired" to prevent sqoop code
    // such as SqlManager.importTable() from throwing an exception.
    //
    // NB: The tableName parameter will be null if no table is involved,
    // such as when importing data via an (arbitrary) SQL query.
    if (tableName != null) {
      return OraOopConstants.TABLE_SPLIT_COLUMN_NOT_REQUIRED;
    } else {
      return super.getSplitColumn(opts, tableName);
    }
  }

  @Override
  public void importTable(ImportJobContext context) throws IOException,
      ImportException {

    logImportTableDetails(context);

    context.setConnManager(this);

    // Specify the Oracle-specific DBInputFormat for import.
    context.setInputFormat(OraOopDataDrivenDBInputFormat.class);

    super.importTable(context);
  }

  @Override
  public void exportTable(ExportJobContext context) throws IOException,
      ExportException {

    logExportTableDetails(context);

    if (this.columnTypesInOracleTable == null) {
      throw new ExportException("The column-types for the table are not"
          + "known.");
    }
    if (this.columnTypesInOracleTable.containsValue(OraOopOracleQueries
        .getOracleType("BINARY_DOUBLE"))) {
      context.getOptions().getConf().setBoolean(
          OraOopConstants.TABLE_CONTAINS_BINARY_DOUBLE_COLUMN, true);
    }
    if (this.columnTypesInOracleTable.containsValue(OraOopOracleQueries
        .getOracleType("BINARY_FLOAT"))) {
      context.getOptions().getConf().setBoolean(
          OraOopConstants.TABLE_CONTAINS_BINARY_FLOAT_COLUMN, true);
    }

    context.setConnManager(this);

    @SuppressWarnings("rawtypes")
    Class oraOopOutputFormatClass;
    try {
      oraOopOutputFormatClass = OraOopOutputFormatInsert.class;
    } catch (NoClassDefFoundError ex) {
      explainWhyExportClassCannotBeLoaded(ex, "OraOopOutputFormatInsert");
      throw ex;
    }
    JdbcExportJob exportJob =
        new JdbcExportJob(context, null, null, oraOopOutputFormatClass);
    exportJob.runExport();
  }

  @Override
  public void updateTable(ExportJobContext context) throws IOException,
      ExportException {

    logExportTableDetails(context);

    context.setConnManager(this);

    @SuppressWarnings("rawtypes")
    Class oraOopOutputFormatClass;
    try {
      oraOopOutputFormatClass = OraOopOutputFormatUpdate.class;
    } catch (NoClassDefFoundError ex) {
      explainWhyExportClassCannotBeLoaded(ex, "OraOopOutputFormatUpdate");
      throw ex;
    }

    JdbcUpdateExportJob exportJob =
        new JdbcUpdateExportJob(context, null, null, oraOopOutputFormatClass);
    exportJob.runExport();
  }

  @Override
  protected void finalize() throws Throwable {

    close();
    super.finalize();
  }

  @Override
  public String toHiveType(int sqlType) {

    String hiveType = super.toHiveType(sqlType);

    if (hiveType == null) {

      // http://wiki.apache.org/hadoop/Hive/Tutorial#Primitive_Types

      if (sqlType == OraOopOracleQueries.getOracleType("BFILE")
          || sqlType == OraOopOracleQueries.getOracleType("INTERVALYM")
          || sqlType == OraOopOracleQueries.getOracleType("INTERVALDS")
          || sqlType == OraOopOracleQueries.getOracleType("NCLOB")
          || sqlType == OraOopOracleQueries.getOracleType("NCHAR")
          || sqlType == OraOopOracleQueries.getOracleType("NVARCHAR")
          || sqlType == OraOopOracleQueries.getOracleType("OTHER")
          || sqlType == OraOopOracleQueries.getOracleType("ROWID")
          || sqlType == OraOopOracleQueries.getOracleType("TIMESTAMPTZ")
          || sqlType == OraOopOracleQueries.getOracleType("TIMESTAMPLTZ")
          || sqlType == OraOopOracleQueries.getOracleType("STRUCT")) {
        hiveType = "STRING";
      }

      if (sqlType == OraOopOracleQueries.getOracleType("BINARY_FLOAT")) {
        hiveType = "FLOAT";
      }

      if (sqlType == OraOopOracleQueries.getOracleType("BINARY_DOUBLE")) {
        hiveType = "DOUBLE";
      }
    }

    if (hiveType == null) {
      LOG.warn(String.format("%s should be updated to cater for data-type: %d",
          OraOopUtilities.getCurrentMethodName(), sqlType));
    }

    return hiveType;
  }

  @Override
  public String toJavaType(int sqlType) {

    String javaType = super.toJavaType(sqlType);

    if (sqlType == OraOopOracleQueries.getOracleType("TIMESTAMP")) {
      // Get the Oracle JDBC driver to convert this value to a string
      // instead of the generic JDBC driver.
      // If the generic JDBC driver is used, it will take into account the
      // timezone of the client machine's locale. The problem with this is that
      // timestamp data should not be associated with a timezone. In practice,
      // this
      // leads to problems, for example, the time '2010-10-03 02:01:00' being
      // changed to '2010-10-03 03:01:00' if the client machine's locale is
      // Melbourne.
      // (This is in response to daylight saving starting in Melbourne on
      // this date at 2am.)
      javaType = timestampJavaType;
    }

    if (sqlType == OraOopOracleQueries.getOracleType("TIMESTAMPTZ")) {
      // Returning "String" produces: "2010-08-08 09:00:00.0 +10:00"
      // Returning "java.sql.Timestamp" produces: "2010-08-08 09:00:00.0"

      // If we use "java.sql.Timestamp", the field's value will not
      // contain the timezone when converted to a string and written to the HDFS
      // CSV file.
      // I.e. Get the Oracle JDBC driver to convert this value to a string
      // instead of the generic JDBC driver...
      javaType = timestampJavaType;
    }

    if (sqlType == OraOopOracleQueries.getOracleType("TIMESTAMPLTZ")) {
      // Returning "String" produces:
      // "2010-08-08 09:00:00.0 Australia/Melbourne"
      // Returning "java.sql.Timestamp" produces: "2010-08-08 09:00:00.0"
      javaType = timestampJavaType;
    }

    /*
     * http://www.oracle.com/technology/sample_code/tech/java/sqlj_jdbc/files
     * /oracle10g/ieee/Readme.html
     *
     * BINARY_DOUBLE is a 64-bit, double-precision floating-point number
     * datatype. (IEEE 754) Each BINARY_DOUBLE value requires 9 bytes, including
     * a length byte. A 64-bit double format number X is divided as sign s 1-bit
     * exponent e 11-bits fraction f 52-bits
     *
     * BINARY_FLOAT is a 32-bit, single-precision floating-point number
     * datatype. (IEEE 754) Each BINARY_FLOAT value requires 5 bytes, including
     * a length byte. A 32-bit single format number X is divided as sign s 1-bit
     * exponent e 8-bits fraction f 23-bits
     */
    if (sqlType == OraOopOracleQueries.getOracleType("BINARY_FLOAT")) {
      // http://people.uncw.edu/tompkinsj/133/numbers/Reals.htm
      javaType = "Float";
    }

    if (sqlType == OraOopOracleQueries.getOracleType("BINARY_DOUBLE")) {
      // http://people.uncw.edu/tompkinsj/133/numbers/Reals.htm
      javaType = "Double";
    }

    if (sqlType == OraOopOracleQueries.getOracleType("STRUCT")) {
      // E.g. URITYPE
      javaType = "String";
    }

    if (javaType == null) {

      // For constant values, refer to:
      // http://oracleadvisor.com/documentation/oracle/database/11.2/
      //   appdev.112/e13995/constant-values.html#oracle_jdbc

      if (sqlType == OraOopOracleQueries.getOracleType("BFILE")
          || sqlType == OraOopOracleQueries.getOracleType("NCLOB")
          || sqlType == OraOopOracleQueries.getOracleType("NCHAR")
          || sqlType == OraOopOracleQueries.getOracleType("NVARCHAR")
          || sqlType == OraOopOracleQueries.getOracleType("ROWID")
          || sqlType == OraOopOracleQueries.getOracleType("INTERVALYM")
          || sqlType == OraOopOracleQueries.getOracleType("INTERVALDS")
          || sqlType == OraOopOracleQueries.getOracleType("OTHER")) {
        javaType = "String";
      }

    }

    if (javaType == null) {
      LOG.warn(String.format("%s should be updated to cater for data-type: %d",
          OraOopUtilities.getCurrentMethodName(), sqlType));
    }

    return javaType;
  }

  @Override
  public String timestampToQueryString(Timestamp ts) {

    return "TO_TIMESTAMP('" + ts + "', 'YYYY-MM-DD HH24:MI:SS.FF')";
  }

  public OracleTable getOracleTableContext() {

    return OraOopUtilities.decodeOracleTableName(this.options.getUsername(),
        this.options.getTableName(), this.options.getConf());
  }

  @Override
  public Map getColumnTypes(String tableName) {

    if (this.columnTypesInOracleTable == null) {

      Map columnTypes = super.getColumnTypes(tableName);
      this.columnTypesInOracleTable = new HashMap();

      List colNames = getColumnNamesInOracleTable(tableName);

      for (int idx = 0; idx < colNames.size(); idx++) {

        String columnNameInTable = colNames.get(idx);
        if (columnTypes.containsKey(columnNameInTable)) {

          // Unescape the column names being returned...
          int colType = columnTypes.get(columnNameInTable);
          String key = OracleUtils.unescapeIdentifier(columnNameInTable); // <- See
                                                                    // notes at
                                                                    // top about
                                                                    // escaped
                                                                    // column
                                                                    // names
          this.columnTypesInOracleTable.put(key, colType);
        }
      }
    }

    return this.columnTypesInOracleTable;
  }

  @Override
  public String escapeColName(String colName) {
    return OracleUtils.escapeIdentifier(colName, options.isOracleEscapingDisabled()); // <- See notes at top about escaped
                                                                                // column names
  }

  @Override
  public String escapeTableName(String tableName) {
    return OracleUtils.escapeIdentifier(tableName, options.isOracleEscapingDisabled());
  }

   @Override
   public boolean escapeTableNameOnExport() {
        return true;
    }

  private void logImportTableDetails(ImportJobContext context) {

    Path outputDirectory = context.getDestination();
    if (outputDirectory != null) {
      LOG.debug("The output directory for the sqoop table import is : "
          + outputDirectory.getName());
    }

    // Indicate whether we can load the class named: OraOopOraStats
    showUserWhetherOraOopOraStatsIsAvailable(context.getOptions().getConf());
  }

  private void logExportTableDetails(ExportJobContext context) {

    // Indicate whether we can load the class named: OraOopOraStats
    showUserWhetherOraOopOraStatsIsAvailable(context.getOptions().getConf());

    // Indicate what the update/merge columns are...
    String[] updateKeyColumns =
        OraOopUtilities.getExportUpdateKeyColumnNames(context.getOptions());
    if (updateKeyColumns.length > 0) {
      LOG.info(String.format(
          "The column%s used to match rows in the HDFS file with rows in "
              + "the Oracle table %s: %s", updateKeyColumns.length > 1 ? "s"
              : "", updateKeyColumns.length > 1 ? "are" : "is", OraOopUtilities
              .stringArrayToCSV(updateKeyColumns)));
    }
  }

  private void showUserWhetherOraOopOraStatsIsAvailable(Configuration conf) {

    if (OraOopUtilities.userWantsOracleSessionStatisticsReports(conf)) {

      LOG.info(String.format("%s=true",
          OraOopConstants.ORAOOP_REPORT_SESSION_STATISTICS));

      // This will log a warning if it's unable to load the OraOopOraStats
      // class...
      OraOopUtilities.startSessionSnapshot(null);
    }
  }

  @Override
  protected String getCurTimestampQuery() {

    return "SELECT SYSTIMESTAMP FROM DUAL";
  }

  @Override
  protected void checkTableImportOptions(ImportJobContext context)
      throws IOException, ImportException {

    // Update the unit-test code if you modify this method.
    super.checkTableImportOptions(context);
  }

  private void explainWhyExportClassCannotBeLoaded(NoClassDefFoundError ex,
      String exportClassName) {

    String msg =
        String.format("Unable to load class %s.\n"
            + "This is most likely caused by the Cloudera Shim Jar "
            + "not being included in the Java Classpath.\n" + "Either:\n"
            + "\tUse \"-libjars\" on the Sqoop command-line to "
            + "include the Cloudera shim jar in the Java Classpath; or"
            + "\n\tCopy the Cloudera shim jar into the Sqoop/lib "
            + "directory so that it is automatically included in the "
            + "Java Classpath; or\n"
            + "\tObtain an updated version of Sqoop that addresses "
            + "the Sqoop Jira \"SQOOP-127\".\n" + "\n"
            + "The Java Classpath is:\n%s", exportClassName, OraOopUtilities
            .getJavaClassPath());
    LOG.fatal(msg, ex);
  }
  /**
   * Determine if HCat integration from direct mode of the connector is
   * allowed.  By default direct mode is not compatible with HCat
   * @return Whether direct mode is allowed.
   */
  @Override
  public boolean isDirectModeHCatSupported() {
    return true;
  }

  /**
   * Determine if HBase operations from direct mode of the connector is
   * allowed.  By default direct mode is not compatible with HBase
   * @return Whether direct mode is allowed.
   */
  public boolean isDirectModeHBaseSupported() {
    return true;
  }

  /**
   * Determine if Accumulo operations from direct mode of the connector is
   * allowed.  By default direct mode is not compatible with HBase
   * @return Whether direct mode is allowed.
   */
  public boolean isDirectModeAccumuloSupported() {
    return true;
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy