All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.snowflake.client.loader.ProcessQueue Maven / Gradle / Ivy

There is a newer version: 3.21.0
Show newest version
/*
 * Copyright (c) 2012-2019 Snowflake Computing Inc. All rights reserved.
 */

package net.snowflake.client.loader;

import static java.lang.Math.toIntExact;

import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.LinkedList;
import java.util.List;
import net.snowflake.client.log.SFLogger;
import net.snowflake.client.log.SFLoggerFactory;

/**
 * This class is responsible for processing a collection of uploaded data files represented by
 * BufferStage class
 */
public class ProcessQueue implements Runnable {
  private static final SFLogger LOGGER = SFLoggerFactory.getLogger(ProcessQueue.class);

  private final Thread _thread;

  private final StreamLoader _loader;

  public ProcessQueue(StreamLoader loader) {
    LOGGER.debug("", false);

    _loader = loader;
    _thread = new Thread(this);
    _thread.setName("ProcessQueueThread");
    _thread.start();
  }

  @Override
  public void run() {

    while (true) {

      BufferStage stage = null;

      Connection conn = _loader.getProcessConnection();
      State currentState = State.INITIALIZE;
      String currentCommand = null;
      try {
        stage = _loader.takeProcess();

        if (stage.getRowCount() == 0) {
          // Nothing was written to that stage
          if (stage.isTerminate()) {
            break;
          } else {
            continue;
          }
        }

        // Place where the files are.
        // No double quote is added _loader.getRemoteStage(), since
        // it is mostly likely to be "~". If not, we may need to double quote
        // them.
        String remoteStage = "@" + _loader.getRemoteStage() + "/" + stage.getRemoteLocation();
        // process uploaded files
        // Loader.abort() and finish() are also synchronized on this
        synchronized (_loader) {
          String updateKeys = getOn(_loader.getKeys(), "T", "S");
          if (stage.getOp() != Operation.INSERT && updateKeys.isEmpty()) {
            _loader.abort(new RuntimeException("No update key column is specified for the job."));
          }

          if (_loader.isAborted()) {
            if (!_loader._preserveStageFile) {
              currentCommand = "RM '" + remoteStage + "'";
              LOGGER.debug(currentCommand, true);
              conn.createStatement().execute(currentCommand);
            } else {
              LOGGER.debug(
                  "Error occurred. The remote stage is preserved for "
                      + "further investigation: {}",
                  remoteStage);
            }
            if (stage.isTerminate()) {
              break;
            } else {
              continue;
            }
            // Do not do anything to this stage.
            // Everything was rolled back upon abort() call
          }
          // Create a temporary table to hold all uploaded data

          long loaded = 0;
          long parsed = 0;
          int errorCount = 0;
          String lastErrorRow = "";

          // Create temp table to load data (may have a subset of columns)
          LOGGER.debug("Creating Temporary Table: name={}", stage.getId());
          currentState = State.CREATE_TEMP_TABLE;
          List allColumns = getAllColumns(conn);

          // use like to make sure columns in temporary table
          // contains properties (e.g., NOT NULL) from the source table
          currentCommand =
              "CREATE TEMPORARY TABLE \"" + stage.getId() + "\" LIKE " + _loader.getFullTableName();
          List selectedColumns = _loader.getColumns();
          conn.createStatement().execute(currentCommand);

          // In case clustering key exists, drop it from the temporary table so that unused
          // columns can be dropped from the table without errors.
          String dropClusteringKey = "alter table \"" + stage.getId() + "\" drop clustering key";
          conn.createStatement().execute(dropClusteringKey);

          // the temp table can contain only a subset of columns
          // so remove unselected columns
          for (String col : allColumns) {
            if (!selectedColumns.contains(col)) {
              String dropUnSelectedColumn =
                  "alter table \"" + stage.getId() + "\" drop column \"" + col + "\"";
              conn.createStatement().execute(dropUnSelectedColumn);
            }
          }

          // Load data there
          LOGGER.debug(
              "COPY data in the stage to table:" + " stage={}," + " name={}",
              remoteStage,
              stage.getId());
          currentState = State.COPY_INTO_TABLE;
          currentCommand =
              "COPY INTO \""
                  + stage.getId()
                  + "\" FROM '"
                  + remoteStage
                  + "' on_error='"
                  + _loader._onError
                  + "'"
                  + " file_format=("
                  + " field_optionally_enclosed_by='\"'"
                  + " empty_field_as_null="
                  + Boolean.toString(!_loader._copyEmptyFieldAsEmpty)
                  + ")";
          ResultSet rs = conn.createStatement().executeQuery(currentCommand);

          while (rs.next()) {
            // Get the number of rows actually loaded
            loaded += rs.getLong("rows_loaded");
            // Get the number of rows parsed
            parsed += rs.getLong("rows_parsed");
          }

          int errorRecordCount = toIntExact(parsed - loaded);
          LOGGER.debug(
              "errorRecordCount=[{}]," + " parsed=[{}]," + " loaded=[{}]",
              errorRecordCount,
              parsed,
              loaded);

          LoadResultListener listener = _loader.getListener();
          listener.addErrorRecordCount(errorRecordCount);

          if (loaded == stage.getRowCount()) {
            // successfully loaded everything
            LOGGER.debug(
                "COPY command successfully finished:" + " stage={}," + " name={}",
                remoteStage,
                stage.getId());
            listener.addErrorCount(0);
          } else {
            LOGGER.debug(
                "Found errors in COPY command:" + " stage={}," + " name={}",
                remoteStage,
                stage.getId());
            if (listener.needErrors()) {
              currentState = State.COPY_INTO_TABLE_ERROR;
              currentCommand =
                  "COPY INTO \""
                      + stage.getId()
                      + "\" FROM '"
                      + remoteStage
                      + "' validation_mode='return_all_errors'"
                      + " file_format=("
                      + "field_optionally_enclosed_by='\"'"
                      + "empty_field_as_null="
                      + Boolean.toString(!_loader._copyEmptyFieldAsEmpty)
                      + ")";
              ResultSet errorsSet = conn.createStatement().executeQuery(currentCommand);

              Loader.DataError dataError = null;

              while (errorsSet.next()) {
                errorCount++;
                String rn = errorsSet.getString(LoadingError.ErrorProperty.ROW_NUMBER.name());
                if (rn != null && !lastErrorRow.equals(rn)) {
                  // de-duping records with multiple errors
                  lastErrorRow = rn;
                }
                LoadingError loadError = new LoadingError(errorsSet, stage, _loader);

                listener.addError(loadError);
                if (dataError == null) {
                  dataError = loadError.getException();
                }
              }
              LOGGER.debug("errorCount: {}", errorCount);

              listener.addErrorCount(errorCount);
              if (listener.throwOnError()) {
                // stop operation and raise the error
                _loader.abort(dataError);

                if (!_loader._preserveStageFile) {
                  LOGGER.debug("RM: {}", remoteStage);
                  conn.createStatement().execute("RM '" + remoteStage + "'");
                } else {
                  LOGGER.error(
                      "Error occurred. The remote stage is preserved for "
                          + "further investigation: {}",
                      remoteStage);
                }
                if (stage.isTerminate()) {
                  break;
                } else {
                  continue;
                }
              }
            }
          }

          stage.setState(BufferStage.State.VALIDATED);

          // Generate set and values statement
          StringBuilder setStatement = null;
          StringBuilder valueStatement = null;
          if (stage.getOp() != Operation.INSERT && stage.getOp() != Operation.DELETE) {

            setStatement = new StringBuilder(" ");
            valueStatement = new StringBuilder("(");

            for (int c = 0; c < _loader.getColumns().size(); ++c) {
              String column = _loader.getColumns().get(c);
              if (c > 0) {
                setStatement.append(", ");
                valueStatement.append(" , ");
              }
              setStatement
                  .append("T.\"")
                  .append(column)
                  .append("\"=")
                  .append("S.\"")
                  .append(column)
                  .append("\"");
              valueStatement.append("S.\"").append(column).append("\"");
            }
            valueStatement.append(")");
          }

          // generate statement for processing
          currentState = State.INGEST_DATA;
          String loadStatement;
          switch (stage.getOp()) {
            case INSERT:
              {
                loadStatement =
                    "INSERT INTO "
                        + _loader.getFullTableName()
                        + "("
                        + _loader.getColumnsAsString()
                        + ")"
                        + " SELECT * FROM \""
                        + stage.getId()
                        + "\"";
                break;
              }
            case DELETE:
              {
                loadStatement =
                    "DELETE FROM "
                        + _loader.getFullTableName()
                        + " T USING \""
                        + stage.getId()
                        + "\" AS S WHERE "
                        + updateKeys;
                break;
              }
            case MODIFY:
              {
                loadStatement =
                    "MERGE INTO "
                        + _loader.getFullTableName()
                        + " T USING \""
                        + stage.getId()
                        + "\" AS S ON "
                        + updateKeys
                        + " WHEN MATCHED THEN UPDATE SET "
                        + setStatement;
                break;
              }
            case UPSERT:
              {
                loadStatement =
                    "MERGE INTO "
                        + _loader.getFullTableName()
                        + " T USING \""
                        + stage.getId()
                        + "\" AS S ON "
                        + updateKeys
                        + " WHEN MATCHED THEN UPDATE SET "
                        + setStatement
                        + " WHEN NOT MATCHED THEN INSERT("
                        + _loader.getColumnsAsString()
                        + ") VALUES"
                        + valueStatement;
                break;
              }
            default:
              loadStatement = "";
          }
          currentCommand = loadStatement;

          LOGGER.debug("Load Statement: {}", loadStatement);
          Statement s = conn.createStatement();
          s.execute(loadStatement);

          stage.setState(BufferStage.State.PROCESSED);
          currentState = State.FINISH;
          currentCommand = null;
          switch (stage.getOp()) {
            case INSERT:
            case UPSERT:
              {
                _loader.getListener().addProcessedRecordCount(stage.getOp(), stage.getRowCount());

                _loader.getListener().addOperationRecordCount(stage.getOp(), s.getUpdateCount());
                break;
              }
            case DELETE:
            case MODIFY:
              {
                // the number of successful DELETE is the number
                // of processed rows and not the number of given
                // rows.
                _loader.getListener().addProcessedRecordCount(stage.getOp(), s.getUpdateCount());

                _loader.getListener().addOperationRecordCount(stage.getOp(), s.getUpdateCount());
                break;
              }
          }

          // delete stage file if all success
          conn.createStatement().execute("RM '" + remoteStage + "'");

          if (stage.isTerminate()) {
            break;
          }
        }
      } catch (InterruptedException ex) {
        LOGGER.error("Interrupted", ex);
        break;
      } catch (Exception ex) {
        String msg =
            String.format("State: %s, %s, %s", currentState, currentCommand, ex.getMessage());
        _loader.abort(new Loader.ConnectionError(msg, Utils.getCause(ex)));
        LOGGER.error(msg, true);
        if (stage == null || stage.isTerminate()) {
          break;
        }
      }
    }
  }

  private List getAllColumns(final Connection conn) throws SQLException {
    List columns = new LinkedList<>();
    ResultSet result =
        conn.createStatement()
            .executeQuery("show " + "columns" + " in " + _loader.getFullTableName());
    while (result.next()) {
      String col = result.getString("column_name");
      columns.add(col);
    }
    return columns;
  }

  private String getOn(List keys, String L, String R) {
    if (keys == null) {
      return "";
    }
    // L and R don't need to be quoted.
    StringBuilder sb = keys.size() > 1 ? new StringBuilder(64) : new StringBuilder();
    for (int i = 0; i < keys.size(); i++) {
      if (i > 0) {
        sb.append("AND ");
      }
      sb.append(L);
      sb.append(".\"");
      sb.append(keys.get(i));
      sb.append("\" = ");
      sb.append(R);
      sb.append(".\"");
      sb.append(keys.get(i));
      sb.append("\" ");
    }
    return sb.toString();
  }

  public void join() {
    LOGGER.debug("", false);
    try {
      _thread.join(0);
    } catch (InterruptedException ex) {
      LOGGER.debug("Exception: ", ex);
    }
  }

  private enum State {
    INITIALIZE,
    CREATE_TEMP_TABLE,
    COPY_INTO_TABLE,
    COPY_INTO_TABLE_ERROR,
    INGEST_DATA,
    FINISH
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy