org.apache.sqoop.manager.MySQLManager Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of sqoop Show documentation
Bandwidth controlled sqoop for network aware data migration
The newest version!
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.sqoop.manager;

import java.io.IOException;
import java.io.PrintWriter;
import java.net.URI;
import java.net.URISyntaxException;
import java.sql.DatabaseMetaData;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Types;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;

import org.apache.avro.Schema.Type;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import com.cloudera.sqoop.SqoopOptions;
import com.cloudera.sqoop.util.ImportException;
import com.cloudera.sqoop.util.ExportException;
import com.cloudera.sqoop.mapreduce.JdbcUpsertExportJob;
import org.apache.sqoop.mapreduce.mysql.MySQLUpsertOutputFormat;
import org.apache.sqoop.util.LoggingUtils;

/**
 * Manages connections to MySQL databases.
 */
public class MySQLManager
    extends com.cloudera.sqoop.manager.InformationSchemaManager {

  public static final Log LOG = LogFactory.getLog(MySQLManager.class.getName());

  // driver class to ensure is loaded when making db connection.
  private static final String DRIVER_CLASS = "com.mysql.jdbc.Driver";

  // set to true after we warn the user that we can use direct fastpath.
  private static boolean warningPrinted = false;

  private static final String EXPORT_OPERATION = "export";

  public MySQLManager(final SqoopOptions opts) {
    super(DRIVER_CLASS, opts);
  }

  @Override
  protected void initOptionDefaults() {
    if (options.getFetchSize() == null) {
      LOG.info("Preparing to use a MySQL streaming resultset.");
        String operation = options.getToolName();
      if (StringUtils.isNotBlank(operation) && operation.equalsIgnoreCase(EXPORT_OPERATION)) {
          // Set fetch size to zero for export operation, see SQOOP-2787 for more details. Setting fetchsize to 0
          // would not overwhelm the JVM as the queries during export are primarily metadata queries
          options.setFetchSize(0);
      } else {
          options.setFetchSize(Integer.MIN_VALUE);
      }
    } else if (
        !options.getFetchSize().equals(Integer.MIN_VALUE)
        && !options.getFetchSize().equals(0)) {
      LOG.info("Argument '--fetch-size " + options.getFetchSize()
          + "' will probably get ignored by MySQL JDBC driver.");
      // see also
      // http://dev.mysql.com/doc/refman/5.5/en
      //                       /connector-j-reference-implementation-notes.html
    }
  }

  @Override
  protected String getPrimaryKeyQuery(String tableName) {
    return "SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS "
         + "WHERE TABLE_SCHEMA = (" + getSchemaQuery() + ") "
         + "AND TABLE_NAME = '"+tableName+"' "
         + "AND COLUMN_KEY = 'PRI'";
  }

  @Override
  protected String getColNamesQuery(String tableName) {
    // Use mysql-specific hints and LIMIT to return fast
    return "SELECT t.* FROM " + escapeTableName(tableName) + " AS t LIMIT 1";
  }

  @Override
  public void importTable(com.cloudera.sqoop.manager.ImportJobContext context)
      throws IOException, ImportException {

    // Check that we're not doing a MapReduce from localhost. If we are, point
    // out that we could use mysqldump.
    if (!MySQLManager.warningPrinted) {
      String connectString = context.getOptions().getConnectString();

      if (null != connectString) {
        // DirectMySQLManager will probably be faster.
        LOG.warn("It looks like you are importing from mysql.");
        LOG.warn("This transfer can be faster! Use the --direct");
        LOG.warn("option to exercise a MySQL-specific fast path.");

        MySQLManager.markWarningPrinted(); // don't display this twice.
      }
    }

    checkDateTimeBehavior(context);

    // Then run the normal importTable() method.
    super.importTable(context);
  }

  /**
   * {@inheritDoc}
   */
  @Override
  public void upsertTable(com.cloudera.sqoop.manager.ExportJobContext context)
      throws IOException, ExportException {
    context.setConnManager(this);
    LOG.warn("MySQL Connector upsert functionality is using INSERT ON");
    LOG.warn("DUPLICATE KEY UPDATE clause that relies on table's unique key.");
    LOG.warn("Insert/update distinction is therefore independent on column");
    LOG.warn("names specified in --update-key parameter. Please see MySQL");
    LOG.warn("documentation for additional limitations.");

    JdbcUpsertExportJob exportJob =
      new JdbcUpsertExportJob(context, MySQLUpsertOutputFormat.class);
    exportJob.runExport();
  }

  @Override
  /**
   * {@inheritDoc}
   */
  public void configureDbOutputColumns(SqoopOptions options) {
    // In case that we're running upsert, we do not want to change column order
    // as we're actually going to use INSERT INTO ... ON DUPLICATE KEY UPDATE
    // clause.
    if (options.getUpdateMode() == SqoopOptions.UpdateMode.AllowInsert) {
      return;
    }

    super.configureDbOutputColumns(options);
  }

  /**
   * Set a flag to prevent printing the --direct warning twice.
   */
  protected static void markWarningPrinted() {
    MySQLManager.warningPrinted = true;
  }

  /**
   * MySQL allows TIMESTAMP fields to have the value '0000-00-00 00:00:00',
   * which causes errors in import. If the user has not set the
   * zeroDateTimeBehavior property already, we set it for them to coerce
   * the type to null.
   */
  private void checkDateTimeBehavior(ImportJobContext context) {
    final String ZERO_BEHAVIOR_STR = "zeroDateTimeBehavior";
    final String CONVERT_TO_NULL = "=convertToNull";

    String connectStr = context.getOptions().getConnectString();
    if (connectStr.indexOf("jdbc:") != 0) {
      // This connect string doesn't have the prefix we expect.
      // We can't parse the rest of it here.
      return;
    }

    // This starts with 'jdbc:mysql://' ... let's remove the 'jdbc:'
    // prefix so that java.net.URI can parse the rest of the line.
    String uriComponent = connectStr.substring(5);
    try {
      URI uri = new URI(uriComponent);
      String query = uri.getQuery(); // get the part after a '?'

      // If they haven't set the zeroBehavior option, set it to
      // squash-null for them.
      if (null == query) {
        connectStr = connectStr + "?" + ZERO_BEHAVIOR_STR + CONVERT_TO_NULL;
        LOG.info("Setting zero DATETIME behavior to convertToNull (mysql)");
      } else if (query.length() == 0) {
        connectStr = connectStr + ZERO_BEHAVIOR_STR + CONVERT_TO_NULL;
        LOG.info("Setting zero DATETIME behavior to convertToNull (mysql)");
      } else if (query.indexOf(ZERO_BEHAVIOR_STR) == -1) {
        if (!connectStr.endsWith("&")) {
          connectStr = connectStr + "&";
        }
        connectStr = connectStr + ZERO_BEHAVIOR_STR + CONVERT_TO_NULL;
        LOG.info("Setting zero DATETIME behavior to convertToNull (mysql)");
      }

      LOG.debug("Rewriting connect string to " + connectStr);
      context.getOptions().setConnectString(connectStr);
    } catch (URISyntaxException use) {
      // Just ignore this. If we can't parse the URI, don't attempt
      // to add any extra flags to it.
      LOG.debug("mysql: Couldn't parse connect str in checkDateTimeBehavior: "
          + use);
    }
  }

  @Override
  public void execAndPrint(String s) {
    // Override default execAndPrint() with a special version that forces
    // use of fully-buffered ResultSets (MySQLManager uses streaming ResultSets
    // in the default execute() method; but the execAndPrint() method needs to
    // issue overlapped queries for metadata.)

    ResultSet results = null;
    try {
      // Explicitly setting fetchSize to zero disables streaming.
      results = super.execute(s, 0);
    } catch (SQLException sqlE) {
      LoggingUtils.logAll(LOG, "Error executing statement: ", sqlE);
      release();
      return;
    }

    PrintWriter pw = new PrintWriter(System.out, true);
    try {
      formatAndPrintResultSet(results, pw);
    } finally {
      pw.close();
    }
  }

  /**
   * When using a column name in a generated SQL query, how (if at all)
   * should we escape that column name? e.g., a column named "table"
   * may need to be quoted with backtiks: "`table`".
   *
   * @param colName the column name as provided by the user, etc.
   * @return how the column name should be rendered in the sql text.
   */
  public String escapeColName(String colName) {
    if (null == colName) {
      return null;
    }
    return "`" + colName + "`";
  }

  /**
   * When using a table name in a generated SQL query, how (if at all)
   * should we escape that column name? e.g., a table named "table"
   * may need to be quoted with backtiks: "`table`".
   *
   * @param tableName the table name as provided by the user, etc.
   * @return how the table name should be rendered in the sql text.
   */
  public String escapeTableName(String tableName) {
    if (null == tableName) {
      return null;
    }
    return "`" + tableName + "`";
  }

  @Override
  public boolean supportsStagingForExport() {
    return true;
  }

  @Override
  public String[] getColumnNamesForProcedure(String procedureName) {
    List ret = new ArrayList();
    try {
      DatabaseMetaData metaData = this.getConnection().getMetaData();
      ResultSet results = metaData.getProcedureColumns(null, null,
        procedureName, null);
      if (null == results) {
        LOG.debug("Get Procedure Columns returns null");
        return null;
      }

      try {
        while (results.next()) {
          if (results.getInt("COLUMN_TYPE")
          != DatabaseMetaData.procedureColumnReturn) {
            String name = results.getString("COLUMN_NAME");
            ret.add(name);
          }
        }
        String[] result = ret.toArray(new String[ret.size()]);
        LOG.debug("getColumnsNamesForProcedure returns "
          + StringUtils.join(ret, ","));
        return result;
      } finally {
        results.close();
        getConnection().commit();
      }
    } catch (SQLException e) {
      LoggingUtils.logAll(LOG, "Error reading procedure metadata: ", e);
      throw new RuntimeException("Can't fetch column names for procedure.", e);
    }
  }

  @Override
  public Map getColumnTypesForProcedure(String procedureName) {
    Map ret = new TreeMap();
    try {
      DatabaseMetaData metaData = this.getConnection().getMetaData();
      ResultSet results = metaData.getProcedureColumns(null, null,
        procedureName, null);
      if (null == results) {
        LOG.debug("getColumnTypesForProcedure returns null");
        return null;
      }

      try {
        while (results.next()) {
          if (results.getInt("COLUMN_TYPE")
          != DatabaseMetaData.procedureColumnReturn) {
            // we don't care if we get several rows for the
            // same ORDINAL_POSITION (e.g. like H2 gives us)
            // as we'll just overwrite the entry in the map:
            ret.put(
              results.getString("COLUMN_NAME"),
              results.getInt("DATA_TYPE"));
          }
        }

        LOG.debug("Columns returned = " + StringUtils.join(ret.keySet(), ","));
        LOG.debug("Types returned = " + StringUtils.join(ret.values(), ","));

        return ret.isEmpty() ? null : ret;
      } finally {
        if (results != null) {
          results.close();
        }
        getConnection().commit();
      }
    } catch (SQLException sqlException) {
      LoggingUtils.logAll(LOG, "Error reading primary key metadata: "
        + sqlException.toString(), sqlException);
      return null;
    }
  }

  @Override
  public Map
    getColumnTypeNamesForProcedure(String procedureName) {
    Map ret = new TreeMap();
    try {
      DatabaseMetaData metaData = this.getConnection().getMetaData();
      ResultSet results = metaData.getProcedureColumns(null, null,
        procedureName, null);
      if (null == results) {
        LOG.debug("getColumnTypesForProcedure returns null");
        return null;
      }

      try {
        while (results.next()) {
          if (results.getInt("COLUMN_TYPE")
          != DatabaseMetaData.procedureColumnReturn) {
            // we don't care if we get several rows for the
            // same ORDINAL_POSITION (e.g. like H2 gives us)
            // as we'll just overwrite the entry in the map:
            ret.put(
              results.getString("COLUMN_NAME"),
              results.getString("TYPE_NAME"));
          }
        }

        LOG.debug("Columns returned = " + StringUtils.join(ret.keySet(), ","));
        LOG.debug(
          "Type names returned = " + StringUtils.join(ret.values(), ","));

        return ret.isEmpty() ? null : ret;
      } finally {
        if (results != null) {
          results.close();
        }
        getConnection().commit();
      }
    } catch (SQLException sqlException) {
      LoggingUtils.logAll(LOG, "Error reading primary key metadata: "
        + sqlException.toString(), sqlException);
      return null;
    }
  }

  @Override
  protected String getListDatabasesQuery() {
    return "SELECT SCHEMA_NAME FROM INFORMATION_SCHEMA.SCHEMATA";
  }

  @Override
  protected String getSchemaQuery() {
    return "SELECT SCHEMA()";
  }

  private Map colTypeNames;
  private static final int YEAR_TYPE_OVERWRITE = Types.SMALLINT;

  private int overrideSqlType(String tableName, String columnName,
      int sqlType) {

    if (colTypeNames == null) {
      colTypeNames = getColumnTypeNames(tableName, options.getCall(),
        options.getSqlQuery());
    }

    if ("YEAR".equalsIgnoreCase(colTypeNames.get(columnName))) {
      sqlType = YEAR_TYPE_OVERWRITE;
    }
    return sqlType;
  }

  @Override
  public String toJavaType(String tableName, String columnName, int sqlType) {
    sqlType = overrideSqlType(tableName, columnName, sqlType);
    return super.toJavaType(tableName, columnName, sqlType);
  }

  @Override
  public String toHiveType(String tableName, String columnName, int sqlType) {
    sqlType = overrideSqlType(tableName, columnName, sqlType);
    return super.toHiveType(tableName, columnName, sqlType);
  }

  @Override
  public Type toAvroType(String tableName, String columnName, int sqlType) {
    sqlType = overrideSqlType(tableName, columnName, sqlType);
    return super.toAvroType(tableName, columnName, sqlType);
  }
}