All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.pentaho.di.trans.steps.tableoutput.TableOutput Maven / Gradle / Ivy

The newest version!
/*! ******************************************************************************
 *
 * Pentaho Data Integration
 *
 * Copyright (C) 2002-2018 by Hitachi Vantara : http://www.pentaho.com
 *
 *******************************************************************************
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 ******************************************************************************/

package org.pentaho.di.trans.steps.tableoutput;

import org.pentaho.di.core.Const;
import org.pentaho.di.core.RowMetaAndData;
import org.pentaho.di.core.database.Database;
import org.pentaho.di.core.database.DatabaseInterface;
import org.pentaho.di.core.exception.KettleDatabaseBatchException;
import org.pentaho.di.core.exception.KettleDatabaseException;
import org.pentaho.di.core.exception.KettleException;
import org.pentaho.di.core.exception.KettleStepException;
import org.pentaho.di.core.row.RowDataUtil;
import org.pentaho.di.core.row.RowMeta;
import org.pentaho.di.core.row.RowMetaInterface;
import org.pentaho.di.core.row.ValueMetaInterface;
import org.pentaho.di.core.util.Utils;
import org.pentaho.di.i18n.BaseMessages;
import org.pentaho.di.trans.Trans;
import org.pentaho.di.trans.TransMeta;
import org.pentaho.di.trans.step.BaseStep;
import org.pentaho.di.trans.step.StepDataInterface;
import org.pentaho.di.trans.step.StepInterface;
import org.pentaho.di.trans.step.StepMeta;
import org.pentaho.di.trans.step.StepMetaInterface;

import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.List;

/**
 * Writes rows to a database table.
 *
 * @author Matt Casters
 * @since 6-apr-2003
 */
public class TableOutput extends BaseStep implements StepInterface {
  private static Class PKG = TableOutputMeta.class; // for i18n purposes, needed by Translator2!!

  private TableOutputMeta meta;
  private TableOutputData data;

  public TableOutput( StepMeta stepMeta, StepDataInterface stepDataInterface, int copyNr, TransMeta transMeta,
    Trans trans ) {
    super( stepMeta, stepDataInterface, copyNr, transMeta, trans );
  }

  public boolean processRow( StepMetaInterface smi, StepDataInterface sdi ) throws KettleException {
    meta = (TableOutputMeta) smi;
    data = (TableOutputData) sdi;

    Object[] r = getRow(); // this also waits for a previous step to be finished.
    if ( r == null ) { // no more input to be expected...
      // truncate the table if there are no rows at all coming into this step
      if ( first && meta.truncateTable() ) {
        truncateTable();
      }
      return false;
    }

    if ( first ) {
      first = false;
      if ( meta.truncateTable() ) {
        truncateTable();
      }
      data.outputRowMeta = getInputRowMeta().clone();
      meta.getFields( data.outputRowMeta, getStepname(), null, null, this, repository, metaStore );

      if ( !meta.specifyFields() ) {
        // Just take the input row
        data.insertRowMeta = getInputRowMeta().clone();
      } else {

        data.insertRowMeta = new RowMeta();

        //
        // Cache the position of the compare fields in Row row
        //
        data.valuenrs = new int[meta.getFieldDatabase().length];
        for ( int i = 0; i < meta.getFieldDatabase().length; i++ ) {
          data.valuenrs[i] = getInputRowMeta().indexOfValue( meta.getFieldStream()[i] );
          if ( data.valuenrs[i] < 0 ) {
            throw new KettleStepException( BaseMessages.getString(
              PKG, "TableOutput.Exception.FieldRequired", meta.getFieldStream()[i] ) );
          }
        }

        for ( int i = 0; i < meta.getFieldDatabase().length; i++ ) {
          ValueMetaInterface insValue = getInputRowMeta().searchValueMeta( meta.getFieldStream()[i] );
          if ( insValue != null ) {
            ValueMetaInterface insertValue = insValue.clone();
            insertValue.setName( meta.getFieldDatabase()[i] );
            data.insertRowMeta.addValueMeta( insertValue );
          } else {
            throw new KettleStepException( BaseMessages.getString(
              PKG, "TableOutput.Exception.FailedToFindField", meta.getFieldStream()[i] ) );
          }
        }
      }
    }

    try {
      Object[] outputRowData = writeToTable( getInputRowMeta(), r );
      if ( outputRowData != null ) {
        putRow( data.outputRowMeta, outputRowData ); // in case we want it go further...
        incrementLinesOutput();
      }

      if ( checkFeedback( getLinesRead() ) ) {
        if ( log.isBasic() ) {
          logBasic( "linenr " + getLinesRead() );
        }
      }
    } catch ( KettleException e ) {
      logError( "Because of an error, this step can't continue: ", e );
      setErrors( 1 );
      stopAll();
      setOutputDone(); // signal end to receiver(s)
      return false;
    }

    return true;
  }

  protected Object[] writeToTable( RowMetaInterface rowMeta, Object[] r ) throws KettleException {

    if ( r == null ) { // Stop: last line or error encountered
      if ( log.isDetailed() ) {
        logDetailed( "Last line inserted: stop" );
      }
      return null;
    }

    PreparedStatement insertStatement = null;
    Object[] insertRowData;
    Object[] outputRowData = r;

    String tableName = null;

    boolean sendToErrorRow = false;
    String errorMessage = null;
    boolean rowIsSafe = false;
    int[] updateCounts = null;
    List exceptionsList = null;
    boolean batchProblem = false;
    Object generatedKey = null;

    if ( meta.isTableNameInField() ) {
      // Cache the position of the table name field
      if ( data.indexOfTableNameField < 0 ) {
        String realTablename = environmentSubstitute( meta.getTableNameField() );
        data.indexOfTableNameField = rowMeta.indexOfValue( realTablename );
        if ( data.indexOfTableNameField < 0 ) {
          String message = "Unable to find table name field [" + realTablename + "] in input row";
          logError( message );
          throw new KettleStepException( message );
        }
        if ( !meta.isTableNameInTable() && !meta.specifyFields() ) {
          data.insertRowMeta.removeValueMeta( data.indexOfTableNameField );
        }
      }
      tableName = rowMeta.getString( r, data.indexOfTableNameField );
      if ( !meta.isTableNameInTable() && !meta.specifyFields() ) {
        // If the name of the table should not be inserted itself, remove the table name
        // from the input row data as well. This forcibly creates a copy of r
        //
        insertRowData = RowDataUtil.removeItem( rowMeta.cloneRow( r ), data.indexOfTableNameField );
      } else {
        insertRowData = r;
      }
    } else if ( meta.isPartitioningEnabled()
      && ( meta.isPartitioningDaily() || meta.isPartitioningMonthly() )
      && ( meta.getPartitioningField() != null && meta.getPartitioningField().length() > 0 ) ) {
      // Initialize some stuff!
      if ( data.indexOfPartitioningField < 0 ) {
        data.indexOfPartitioningField =
          rowMeta.indexOfValue( environmentSubstitute( meta.getPartitioningField() ) );
        if ( data.indexOfPartitioningField < 0 ) {
          throw new KettleStepException( "Unable to find field ["
            + meta.getPartitioningField() + "] in the input row!" );
        }

        if ( meta.isPartitioningDaily() ) {
          data.dateFormater = new SimpleDateFormat( "yyyyMMdd" );
        } else {
          data.dateFormater = new SimpleDateFormat( "yyyyMM" );
        }
      }

      ValueMetaInterface partitioningValue = rowMeta.getValueMeta( data.indexOfPartitioningField );
      if ( !partitioningValue.isDate() || r[data.indexOfPartitioningField] == null ) {
        throw new KettleStepException(
          "Sorry, the partitioning field needs to contain a data value and can't be empty!" );
      }

      Object partitioningValueData = rowMeta.getDate( r, data.indexOfPartitioningField );
      tableName =
        environmentSubstitute( meta.getTableName() )
          + "_" + data.dateFormater.format( (Date) partitioningValueData );
      insertRowData = r;
    } else {
      tableName = data.tableName;
      insertRowData = r;
    }

    if ( meta.specifyFields() ) {
      //
      // The values to insert are those in the fields sections
      //
      insertRowData = new Object[data.valuenrs.length];
      for ( int idx = 0; idx < data.valuenrs.length; idx++ ) {
        insertRowData[idx] = r[data.valuenrs[idx]];
      }
    }

    if ( Utils.isEmpty( tableName ) ) {
      throw new KettleStepException( "The tablename is not defined (empty)" );
    }

    insertStatement = data.preparedStatements.get( tableName );
    if ( insertStatement == null ) {
      String sql =
        data.db
          .getInsertStatement( environmentSubstitute( meta.getSchemaName() ), tableName, data.insertRowMeta );
      if ( log.isDetailed() ) {
        logDetailed( "Prepared statement : " + sql );
      }
      insertStatement = data.db.prepareSQL( sql, meta.isReturningGeneratedKeys() );
      data.preparedStatements.put( tableName, insertStatement );
    }

    try {
      // For PG & GP, we add a savepoint before the row.
      // Then revert to the savepoint afterwards... (not a transaction, so hopefully still fast)
      //
      if ( data.useSafePoints ) {
        data.savepoint = data.db.setSavepoint();
      }
      data.db.setValues( data.insertRowMeta, insertRowData, insertStatement );
      data.db.insertRow( insertStatement, data.batchMode, false ); // false: no commit, it is handled in this step differently
      if ( isRowLevel() ) {
        logRowlevel( "Written row: " + data.insertRowMeta.getString( insertRowData ) );
      }

      // Get a commit counter per prepared statement to keep track of separate tables, etc.
      //
      Integer commitCounter = data.commitCounterMap.get( tableName );
      if ( commitCounter == null ) {
        commitCounter = Integer.valueOf( 1 );
      } else {
        commitCounter++;
      }
      data.commitCounterMap.put( tableName, Integer.valueOf( commitCounter.intValue() ) );

      // Release the savepoint if needed
      //
      if ( data.useSafePoints ) {
        if ( data.releaseSavepoint ) {
          data.db.releaseSavepoint( data.savepoint );
        }
      }

      // Perform a commit if needed
      //

      if ( ( data.commitSize > 0 ) && ( ( commitCounter % data.commitSize ) == 0 ) ) {
        if ( data.db.getUseBatchInsert( data.batchMode ) ) {
          try {
            insertStatement.executeBatch();
            data.db.commit();
            insertStatement.clearBatch();
          } catch ( SQLException ex ) {
            throw Database.createKettleDatabaseBatchException( "Error updating batch", ex );
          } catch ( Exception ex ) {
            throw new KettleDatabaseException( "Unexpected error inserting row", ex );
          }
        } else {
          // insertRow normal commit
          data.db.commit();
        }
        // Clear the batch/commit counter...
        //
        data.commitCounterMap.put( tableName, Integer.valueOf( 0 ) );
        rowIsSafe = true;
      } else {
        rowIsSafe = false;
      }

      // See if we need to get back the keys as well...
      if ( meta.isReturningGeneratedKeys() ) {
        RowMetaAndData extraKeys = data.db.getGeneratedKeys( insertStatement );

        if ( extraKeys.getRowMeta().size() > 0 ) {
          // Send out the good word!
          // Only 1 key at the moment. (should be enough for now :-)
          generatedKey = extraKeys.getRowMeta().getInteger( extraKeys.getData(), 0 );
        } else {
          // we have to throw something here, else we don't know what the
          // type is of the returned key(s) and we would violate our own rule
          // that a hop should always contain rows of the same type.
          throw new KettleStepException( "No generated keys while \"return generated keys\" is active!" );
        }
      }
    } catch ( KettleDatabaseBatchException be ) {
      errorMessage = be.toString();
      batchProblem = true;
      sendToErrorRow = true;
      updateCounts = be.getUpdateCounts();
      exceptionsList = be.getExceptionsList();

      if ( getStepMeta().isDoingErrorHandling() ) {
        data.db.clearBatch( insertStatement );
        data.db.commit( true );
      } else {
        data.db.clearBatch( insertStatement );
        data.db.rollback();
        StringBuilder msg = new StringBuilder( "Error batch inserting rows into table [" + tableName + "]." );
        msg.append( Const.CR );
        msg.append( "Errors encountered (first 10):" ).append( Const.CR );
        for ( int x = 0; x < be.getExceptionsList().size() && x < 10; x++ ) {
          Exception exception = be.getExceptionsList().get( x );
          if ( exception.getMessage() != null ) {
            msg.append( exception.getMessage() ).append( Const.CR );
          }
        }
        throw new KettleException( msg.toString(), be );
      }
    } catch ( KettleDatabaseException dbe ) {
      if ( getStepMeta().isDoingErrorHandling() ) {
        if ( isRowLevel() ) {
          logRowlevel( "Written row to error handling : " + getInputRowMeta().getString( r ) );
        }

        if ( data.useSafePoints ) {
          data.db.rollback( data.savepoint );
          if ( data.releaseSavepoint ) {
            data.db.releaseSavepoint( data.savepoint );
          }
          // data.db.commit(true); // force a commit on the connection too.
        }

        sendToErrorRow = true;
        errorMessage = dbe.toString();
      } else {
        if ( meta.ignoreErrors() ) {
          if ( data.warnings < 20 ) {
            if ( log.isBasic() ) {
              logBasic( "WARNING: Couldn't insert row into table: "
                + rowMeta.getString( r ) + Const.CR + dbe.getMessage() );
            }
          } else if ( data.warnings == 20 ) {
            if ( log.isBasic() ) {
              logBasic( "FINAL WARNING (no more then 20 displayed): Couldn't insert row into table: "
                + rowMeta.getString( r ) + Const.CR + dbe.getMessage() );
            }
          }
          data.warnings++;
        } else {
          setErrors( getErrors() + 1 );
          data.db.rollback();
          throw new KettleException( "Error inserting row into table ["
            + tableName + "] with values: " + rowMeta.getString( r ), dbe );
        }
      }
    }

    // We need to add a key
    if ( generatedKey != null ) {
      outputRowData = RowDataUtil.addValueData( outputRowData, rowMeta.size(), generatedKey );
    }

    if ( data.batchMode ) {
      if ( sendToErrorRow ) {
        if ( batchProblem ) {
          data.batchBuffer.add( outputRowData );
          outputRowData = null;

          processBatchException( errorMessage, updateCounts, exceptionsList );
        } else {
          // Simply add this row to the error row
          putError( rowMeta, r, 1L, errorMessage, null, "TOP001" );
          outputRowData = null;
        }
      } else {
        data.batchBuffer.add( outputRowData );
        outputRowData = null;

        if ( rowIsSafe ) { // A commit was done and the rows are all safe (no error)
          for ( int i = 0; i < data.batchBuffer.size(); i++ ) {
            Object[] row = data.batchBuffer.get( i );
            putRow( data.outputRowMeta, row );
            incrementLinesOutput();
          }
          // Clear the buffer
          data.batchBuffer.clear();
        }
      }
    } else {
      if ( sendToErrorRow ) {
        putError( rowMeta, r, 1, errorMessage, null, "TOP001" );
        outputRowData = null;
      }
    }

    return outputRowData;
  }

  public boolean isRowLevel() {
    return log.isRowLevel();
  }

  private void processBatchException( String errorMessage, int[] updateCounts, List exceptionsList ) throws KettleException {
    // There was an error with the commit
    // We should put all the failing rows out there...
    //
    if ( updateCounts != null ) {
      int errNr = 0;
      for ( int i = 0; i < updateCounts.length; i++ ) {
        Object[] row = data.batchBuffer.get( i );
        if ( updateCounts[i] > 0 ) {
          // send the error foward
          putRow( data.outputRowMeta, row );
          incrementLinesOutput();
        } else {
          String exMessage = errorMessage;
          if ( errNr < exceptionsList.size() ) {
            SQLException se = (SQLException) exceptionsList.get( errNr );
            errNr++;
            exMessage = se.toString();
          }
          putError( data.outputRowMeta, row, 1L, exMessage, null, "TOP0002" );
        }
      }
    } else {
      // If we don't have update counts, it probably means the DB doesn't support it.
      // In this case we don't have a choice but to consider all inserted rows to be error rows.
      //
      for ( int i = 0; i < data.batchBuffer.size(); i++ ) {
        Object[] row = data.batchBuffer.get( i );
        putError( data.outputRowMeta, row, 1L, errorMessage, null, "TOP0003" );
      }
    }

    // Clear the buffer afterwards...
    data.batchBuffer.clear();
  }

  public boolean init( StepMetaInterface smi, StepDataInterface sdi ) {
    meta = (TableOutputMeta) smi;
    data = (TableOutputData) sdi;

    if ( super.init( smi, sdi ) ) {
      try {
        data.commitSize = Integer.parseInt( environmentSubstitute( meta.getCommitSize() ) );

        data.databaseMeta = meta.getDatabaseMeta();
        DatabaseInterface dbInterface = data.databaseMeta.getDatabaseInterface();

        // Batch updates are not supported on PostgreSQL (and look-a-likes)
        // together with error handling (PDI-366).
        // For these situations we can use savepoints to help out.
        //
        data.useSafePoints =
          data.databaseMeta.getDatabaseInterface().useSafePoints() && getStepMeta().isDoingErrorHandling();

        // Get the boolean that indicates whether or not we can/should release
        // savepoints during data load.
        //
        data.releaseSavepoint = dbInterface.releaseSavepoint();

        // Disable batch mode in case
        // - we use an unlimited commit size
        // - if we need to pick up auto-generated keys
        // - if you are running the transformation as a single database transaction (unique connections)
        // - if we are reverting to save-points
        //
        data.batchMode =
          meta.useBatchUpdate()
            && data.commitSize > 0 && !meta.isReturningGeneratedKeys()
            && !getTransMeta().isUsingUniqueConnections() && !data.useSafePoints;

        // Per PDI-6211 : give a warning that batch mode operation in combination with step error handling can lead to
        // incorrectly processed rows.
        //
        if ( getStepMeta().isDoingErrorHandling() && !dbInterface.supportsErrorHandlingOnBatchUpdates() ) {
          log.logMinimal( BaseMessages.getString(
            PKG, "TableOutput.Warning.ErrorHandlingIsNotFullySupportedWithBatchProcessing" ) );
        }

        if ( meta.getDatabaseMeta() == null ) {
          throw new KettleException( BaseMessages.getString(
            PKG, "TableOutput.Exception.DatabaseNeedsToBeSelected" ) );
        }
        if ( meta.getDatabaseMeta() == null ) {
          logError( BaseMessages.getString( PKG, "TableOutput.Init.ConnectionMissing", getStepname() ) );
          return false;
        }

        if ( !dbInterface.supportsStandardTableOutput() ) {
          throw new KettleException( dbInterface.getUnsupportedTableOutputMessage() );
        }

        data.db = new Database( this, meta.getDatabaseMeta() );
        data.db.shareVariablesWith( this );

        if ( getTransMeta().isUsingUniqueConnections() ) {
          synchronized ( getTrans() ) {
            data.db.connect( getTrans().getTransactionId(), getPartitionID() );
          }
        } else {
          data.db.connect( getPartitionID() );
        }

        if ( log.isBasic() ) {
          logBasic( "Connected to database [" + meta.getDatabaseMeta() + "] (commit=" + data.commitSize + ")" );
        }

        // Postpone commit as long as possible. PDI-2091
        //
        if ( data.commitSize == 0 ) {
          data.commitSize = Integer.MAX_VALUE;
        }
        data.db.setCommit( data.commitSize );

        if ( !meta.isPartitioningEnabled() && !meta.isTableNameInField() ) {
          data.tableName = environmentSubstitute( meta.getTableName() );
        }

        return true;
      } catch ( KettleException e ) {
        logError( "An error occurred intialising this step: " + e.getMessage() );
        stopAll();
        setErrors( 1 );
      }
    }
    return false;
  }

  void truncateTable() throws KettleDatabaseException {
    if ( !meta.isPartitioningEnabled() && !meta.isTableNameInField() ) {
      // Only the first one truncates in a non-partitioned step copy
      //
      if ( meta.truncateTable()
        && ( ( getCopy() == 0 && getUniqueStepNrAcrossSlaves() == 0 ) || !Utils.isEmpty( getPartitionID() ) ) ) {
        data.db.truncateTable( environmentSubstitute( meta.getSchemaName() ), environmentSubstitute( meta
          .getTableName() ) );

      }
    }
  }

  public void dispose( StepMetaInterface smi, StepDataInterface sdi ) {
    meta = (TableOutputMeta) smi;
    data = (TableOutputData) sdi;

    if ( data.db != null ) {
      try {
        for ( String schemaTable : data.preparedStatements.keySet() ) {
          // Get a commit counter per prepared statement to keep track of separate tables, etc.
          //
          Integer batchCounter = data.commitCounterMap.get( schemaTable );
          if ( batchCounter == null ) {
            batchCounter = 0;
          }

          PreparedStatement insertStatement = data.preparedStatements.get( schemaTable );

          data.db.emptyAndCommit( insertStatement, data.batchMode, batchCounter );
        }
        for ( int i = 0; i < data.batchBuffer.size(); i++ ) {
          Object[] row = data.batchBuffer.get( i );
          putRow( data.outputRowMeta, row );
          incrementLinesOutput();
        }
        // Clear the buffer
        data.batchBuffer.clear();
      } catch ( KettleDatabaseBatchException be ) {
        if ( getStepMeta().isDoingErrorHandling() ) {
          // Right at the back we are experiencing a batch commit problem...
          // OK, we have the numbers...
          try {
            processBatchException( be.toString(), be.getUpdateCounts(), be.getExceptionsList() );
          } catch ( KettleException e ) {
            logError( "Unexpected error processing batch error", e );
            setErrors( 1 );
            stopAll();
          }
        } else {
          logError( "Unexpected batch update error committing the database connection.", be );
          setErrors( 1 );
          stopAll();
        }
      } catch ( Exception dbe ) {
        logError( "Unexpected error committing the database connection.", dbe );
        logError( Const.getStackTracker( dbe ) );
        setErrors( 1 );
        stopAll();
      } finally {
        setOutputDone();

        if ( getErrors() > 0 ) {
          try {
            data.db.rollback();
          } catch ( KettleDatabaseException e ) {
            logError( "Unexpected error rolling back the database connection.", e );
          }
        }

        data.db.disconnect();
      }
      super.dispose( smi, sdi );
    }
  }

  /**
   * Allows subclasses of TableOuput to get hold of the step meta
   *
   * @return
   */
  protected TableOutputMeta getMeta() {
    return meta;
  }

  /**
   * Allows subclasses of TableOutput to get hold of the data object
   *
   * @return
   */
  protected TableOutputData getData() {
    return data;
  }

  protected void setMeta( TableOutputMeta meta ) {
    this.meta = meta;
  }

  protected void setData( TableOutputData data ) {
    this.data = data;
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy