All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.pentaho.di.trans.steps.orabulkloader.OraBulkLoader Maven / Gradle / Ivy

The newest version!
/*! ******************************************************************************
 *
 * Pentaho Data Integration
 *
 * Copyright (C) 2002-2019 by Hitachi Vantara : http://www.pentaho.com
 *
 *******************************************************************************
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 ******************************************************************************/

package org.pentaho.di.trans.steps.orabulkloader;

//
// The "designer" notes of the Oracle bulkloader:
// ----------------------------------------------
//
// - "Enclosed" is used in the loader instead of "optionally enclosed" as optionally
//   encloses kind of destroys the escaping.
// - A Boolean is output as Y and N (as in the text output step e.g.). If people don't
//   like this they can first convert the boolean value to something else before loading
//   it.
// - Filters (besides data and datetime) are not supported as it slows down.
//
//

import com.google.common.annotations.VisibleForTesting;
import org.apache.commons.vfs2.FileObject;
import org.pentaho.di.core.Const;
import org.pentaho.di.core.util.Utils;
import org.pentaho.di.core.database.DatabaseMeta;
import org.pentaho.di.core.encryption.Encr;
import org.pentaho.di.core.exception.KettleException;
import org.pentaho.di.core.exception.KettleFileException;
import org.pentaho.di.core.row.RowMetaInterface;
import org.pentaho.di.core.row.ValueMetaInterface;
import org.pentaho.di.core.variables.VariableSpace;
import org.pentaho.di.core.vfs.KettleVFS;
import org.pentaho.di.i18n.BaseMessages;
import org.pentaho.di.trans.Trans;
import org.pentaho.di.trans.TransMeta;
import org.pentaho.di.trans.step.BaseStep;
import org.pentaho.di.trans.step.StepDataInterface;
import org.pentaho.di.trans.step.StepInterface;
import org.pentaho.di.trans.step.StepMeta;
import org.pentaho.di.trans.step.StepMetaInterface;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;

/**
 * Performs a bulk load to an oracle table.
 *
 * @author Sven Boden
 * @since 20-feb-2007
 */
public class OraBulkLoader extends BaseStep implements StepInterface {
  private static Class PKG = OraBulkLoaderMeta.class; // for i18n purposes, needed by Translator2!!

  public static final int EX_SUCC = 0;

  public static final int EX_WARN = 2;

  Process sqlldrProcess = null;

  private OraBulkLoaderMeta meta;
  protected OraBulkLoaderData data;
  private OraBulkDataOutput output = null;

  /*
   * Local copy of the transformation "preview" property. We only forward the rows upon previewing, we don't do any of
   * the real stuff.
   */
  private boolean preview = false;

  //
  // This class continually reads from the stream, and sends it to the log
  // if the logging level is at least basic level.
  //
  private final class StreamLogger extends Thread {
    private InputStream input;
    private String type;

    StreamLogger( InputStream is, String type ) {
      this.input = is;
      this.type = type + ">";
    }

    public void run() {
      try {
        final BufferedReader br = new BufferedReader( new InputStreamReader( input ) );
        String line;
        while ( ( line = br.readLine() ) != null ) {
          // Only perform the concatenation if at basic level. Otherwise,
          // this just reads from the stream.
          if ( log.isBasic() ) {
            logBasic( type + line );
          }
        }
      } catch ( IOException ioe ) {
        ioe.printStackTrace();
      }

    }

  }

  public OraBulkLoader( StepMeta stepMeta, StepDataInterface stepDataInterface, int copyNr, TransMeta transMeta,
    Trans trans ) {
    super( stepMeta, stepDataInterface, copyNr, transMeta, trans );
  }

  private String substituteRecordTerminator( String terminator ) {
    final StringBuilder in = new StringBuilder();
    int length;
    boolean escaped = false;

    terminator = environmentSubstitute( terminator );
    length = terminator.length();
    for ( int i = 0; i < length; i++ ) {
      final char c = terminator.charAt( i );

      if ( escaped ) {
        switch ( c ) {
          case 'n':
            in.append( '\n' );
            break;
          case 'r':
            in.append( '\r' );
            break;
          default:
            in.append( c );
            break;
        }
        escaped = false;
      } else if ( c == '\\' ) {
        escaped = true;
      } else {
        in.append( c );
      }
    }

    return in.toString();
  }

  private String encodeRecordTerminator( String terminator, String encoding ) throws KettleException {
    final String in = substituteRecordTerminator( terminator );
    final StringBuilder out = new StringBuilder();
    byte[] bytes;

    try {
      // use terminator in hex representation due to character set
      // terminator in hex representation must be in character set
      // of data file
      if ( Utils.isEmpty( encoding ) ) {
        bytes = in.getBytes();
      } else {
        bytes = in.getBytes( encoding );
      }
      for ( byte aByte : bytes ) {
        final String hex = Integer.toHexString( aByte );

        if ( hex.length() == 1 ) {
          out.append( '0' );
        }
        out.append( hex );
      }
    } catch ( UnsupportedEncodingException e ) {
      throw new KettleException( "Unsupported character encoding: " + encoding, e );
    }

    return out.toString();
  }

  /**
   * Get the contents of the control file as specified in the meta object
   *
   * @param meta
   *          the meta object to model the control file after
   *
   * @return a string containing the control file contents
   */
  public String getControlFileContents( OraBulkLoaderMeta meta, RowMetaInterface rm, Object[] r ) throws KettleException {
    DatabaseMeta dm = meta.getDatabaseMeta();
    String inputName = "'" + getFilename( getFileObject( meta.getDataFile(), getTransMeta() ) ) + "'";

    String loadAction = meta.getLoadAction();

    StringBuilder contents = new StringBuilder( 500 );
    contents.append( "OPTIONS(" ).append( Const.CR );
    contents.append( "  ERRORS=\'" ).append( meta.getMaxErrors() ).append( "\'" ).append( Const.CR );

    if ( meta.getCommitSizeAsInt( this ) != 0 && !( meta.isDirectPath() && getStepMeta().getCopies() > 1 ) ) {
      // For the second part of the above expressions: ROWS is not supported
      // in parallel mode (by sqlldr).
      contents.append( "  , ROWS=\'" ).append( meta.getCommitSize() ).append( "\'" ).append( Const.CR );
    }

    if ( meta.getBindSizeAsInt( this ) != 0 ) {
      contents.append( "  , BINDSIZE=\'" ).append( meta.getBindSize() ).append( "\'" ).append( Const.CR );
    }

    if ( meta.getReadSizeAsInt( this ) != 0 ) {
      contents.append( "  , READSIZE=\'" ).append( meta.getReadSize() ).append( "\'" ).append( Const.CR );
    }

    contents.append( ")" ).append( Const.CR );

    contents.append( "LOAD DATA" ).append( Const.CR );
    if ( !Utils.isEmpty( meta.getCharacterSetName() ) ) {
      contents.append( "CHARACTERSET " ).append( meta.getCharacterSetName() ).append( Const.CR );
    }
    if ( !OraBulkLoaderMeta.METHOD_AUTO_CONCURRENT.equals( meta.getLoadMethod() )
      || !Utils.isEmpty( meta.getAltRecordTerm() ) ) {
      String infile = inputName;

      if ( OraBulkLoaderMeta.METHOD_AUTO_CONCURRENT.equals( meta.getLoadMethod() ) ) {
        infile = "''";
      }

      // For concurrent input, data command line argument must be specified
      contents.append( "INFILE " ).append( infile );
      if ( !Utils.isEmpty( meta.getAltRecordTerm() ) ) {
        contents.append( " \"STR x'" ).append(
          encodeRecordTerminator( meta.getAltRecordTerm(), meta.getEncoding() ) ).append( "'\"" );
      }
      contents.append( Const.CR );
    }
    contents
      .append( "INTO TABLE " ).append(
        dm.getQuotedSchemaTableCombination(
          environmentSubstitute( meta.getSchemaName() ), environmentSubstitute( meta.getTableName() ) ) )
      .append( Const.CR ).append( loadAction ).append( Const.CR ).append(
        "FIELDS TERMINATED BY ',' ENCLOSED BY '\"'" ).append( Const.CR ).append( "(" );

    String[] streamFields = meta.getFieldStream();
    String[] tableFields = meta.getFieldTable();
    String[] dateMask = meta.getDateMask();

    if ( streamFields == null || streamFields.length == 0 ) {
      throw new KettleException( "No fields defined to load to database" );
    }

    for ( int i = 0; i < streamFields.length; i++ ) {
      if ( i != 0 ) {
        contents.append( ", " ).append( Const.CR );
      }
      contents.append( dm.quoteField( tableFields[i] ) );

      int pos = rm.indexOfValue( streamFields[i] );
      if ( pos < 0 ) {
        throw new KettleException( "Could not find field " + streamFields[i] + " in stream" );
      }
      ValueMetaInterface v = rm.getValueMeta( pos );
      switch ( v.getType() ) {
        case ValueMetaInterface.TYPE_STRING:
          if ( v.getLength() > 255 ) {
            contents.append( " CHAR(" ).append( v.getLength() ).append( ")" );
          } else {
            contents.append( " CHAR" );
          }
          break;
        case ValueMetaInterface.TYPE_INTEGER:
        case ValueMetaInterface.TYPE_NUMBER:
        case ValueMetaInterface.TYPE_BIGNUMBER:
          break;
        case ValueMetaInterface.TYPE_DATE:
          if ( OraBulkLoaderMeta.DATE_MASK_DATE.equals( dateMask[i] ) ) {
            contents.append( " DATE 'yyyy-mm-dd'" );
          } else if ( OraBulkLoaderMeta.DATE_MASK_DATETIME.equals( dateMask[i] ) ) {
            contents.append( " TIMESTAMP 'yyyy-mm-dd hh24:mi:ss.ff'" );
          } else {
            // If not specified the default is date.
            contents.append( " DATE 'yyyy-mm-dd'" );
          }
          break;
        case ValueMetaInterface.TYPE_BINARY:
          contents.append( " ENCLOSED BY '' AND ''" );
          break;
        case ValueMetaInterface.TYPE_TIMESTAMP:
          contents.append( " TIMESTAMP 'yyyy-mm-dd hh24:mi:ss.ff'" );
          break;
        default:
          break;
      }
    }
    contents.append( ")" );

    return contents.toString();
  }

  /**
   * Create a control file.
   *
   * @param filename path to control file
   * @param meta step meta
   * @throws KettleException
   */
  public void createControlFile( String filename, Object[] row, OraBulkLoaderMeta meta ) throws KettleException {
    FileWriter fw = null;

    try {
      File controlFile = new File( getFileObject( filename, getTransMeta() ).getURL().getFile() );
      // Need to ensure that the parent directory they set exists for the control file.
      controlFile.getParentFile().mkdirs();
      controlFile.createNewFile();
      fw = new FileWriter( controlFile );
      fw.write( getControlFileContents( meta, getInputRowMeta(), row ) );
    } catch ( IOException ex ) {
      throw new KettleException( ex.getMessage(), ex );
    } finally {
      try {
        if ( fw != null ) {
          fw.close();
        }
      } catch ( Exception ex ) {
        // Ignore errors
      }
    }
  }

  /**
   * Create the command line for an sqlldr process depending on the meta information supplied.
   *
   * @param meta
   *          The meta data to create the command line from
   * @param password
   *          Use the real password or not
   *
   * @return The string to execute.
   *
   * @throws KettleException
   *           Upon any exception
   */
  public String createCommandLine( OraBulkLoaderMeta meta, boolean password ) throws KettleException {
    StringBuilder sb = new StringBuilder( 300 );

    if ( meta.getSqlldr() != null ) {
      try {
        FileObject fileObject =
          getFileObject( meta.getSqlldr(), getTransMeta() );
        String sqlldr = getFilename( fileObject );
        sb.append( sqlldr );
      } catch ( KettleFileException ex ) {
        throw new KettleException( "Error retrieving sqlldr string", ex );
      }
    } else {
      throw new KettleException( "No sqlldr application specified" );
    }

    if ( meta.getControlFile() != null ) {
      try {
        FileObject fileObject =
          getFileObject( meta.getControlFile(), getTransMeta() );

        sb.append( " control=\'" );
        sb.append( getFilename( fileObject ) );
        sb.append( "\'" );
      } catch ( KettleFileException ex ) {
        throw new KettleException( "Error retrieving controlfile string", ex );
      }
    } else {
      throw new KettleException( "No control file specified" );
    }

    if ( OraBulkLoaderMeta.METHOD_AUTO_CONCURRENT.equals( meta.getLoadMethod() ) ) {
      sb.append( " data=\'-\'" );
    }

    if ( meta.getLogFile() != null ) {
      try {
        FileObject fileObject =
          getFileObject( meta.getLogFile(), getTransMeta() );

        sb.append( " log=\'" );
        sb.append( getFilename( fileObject ) );
        sb.append( "\'" );
      } catch ( KettleFileException ex ) {
        throw new KettleException( "Error retrieving logfile string", ex );
      }
    }

    if ( meta.getBadFile() != null ) {
      try {
        FileObject fileObject =
          getFileObject( meta.getBadFile(), getTransMeta() );

        sb.append( " bad=\'" );
        sb.append( getFilename( fileObject ) );
        sb.append( "\'" );
      } catch ( KettleFileException ex ) {
        throw new KettleException( "Error retrieving badfile string", ex );
      }
    }

    if ( meta.getDiscardFile() != null ) {
      try {
        FileObject fileObject =
          getFileObject( meta.getDiscardFile(), getTransMeta() );

        sb.append( " discard=\'" );
        sb.append( getFilename( fileObject ) );
        sb.append( "\'" );
      } catch ( KettleFileException ex ) {
        throw new KettleException( "Error retrieving discardfile string", ex );
      }
    }

    DatabaseMeta dm = meta.getDatabaseMeta();
    if ( dm != null ) {
      String user = Const.NVL( dm.getUsername(), "" );
      String pass =
        Const.NVL( Encr.decryptPasswordOptionallyEncrypted( environmentSubstitute( dm.getPassword() ) ), "" );
      if ( !password ) {
        pass = "******";
      }
      String dns = Const.NVL( dm.getDatabaseName(), "" );
      sb.append( " userid=" ).append( environmentSubstitute( user ) ).append( "/" ).append(
        environmentSubstitute( pass ) ).append( "@" );

      String overrideName = meta.getDbNameOverride();
      if ( Utils.isEmpty( Const.rtrim( overrideName ) ) ) {
        sb.append( environmentSubstitute( dns ) );
      } else {
        // if the database name override is filled in, do that one.
        sb.append( environmentSubstitute( overrideName ) );
      }
    } else {
      throw new KettleException( "No connection specified" );
    }

    if ( meta.isDirectPath() ) {
      sb.append( " DIRECT=TRUE" );

      if ( getStepMeta().getCopies() > 1 || meta.isParallel() ) {
        sb.append( " PARALLEL=TRUE" );
      }
    }

    return sb.toString();
  }

  public void checkExitVal( int exitVal ) throws KettleException {
    if ( exitVal == EX_SUCC ) {
      return;
    }

    if ( meta.isFailOnWarning() && ( exitVal == EX_WARN ) ) {
      throw new KettleException( "sqlldr returned warning" );
    } else if ( meta.isFailOnError() && ( exitVal != EX_WARN ) ) {
      throw new KettleException( "sqlldr returned an error (exit code " + exitVal + ")" );
    }
  }

  public boolean execute( OraBulkLoaderMeta meta, boolean wait ) throws KettleException {
    Runtime rt = Runtime.getRuntime();

    try {
      sqlldrProcess = rt.exec( createCommandLine( meta, true ) );
      // any error message?
      StreamLogger errorLogger = new StreamLogger( sqlldrProcess.getErrorStream(), "ERROR" );

      // any output?
      StreamLogger outputLogger = new StreamLogger( sqlldrProcess.getInputStream(), "OUTPUT" );

      // kick them off
      errorLogger.start();
      outputLogger.start();

      if ( wait ) {
        // any error???
        int exitVal = sqlldrProcess.waitFor();
        sqlldrProcess = null;
        logBasic( BaseMessages.getString( PKG, "OraBulkLoader.Log.ExitValueSqlldr", "" + exitVal ) );
        checkExitVal( exitVal );
      }
    } catch ( Exception ex ) {
      // Don't throw the message upwards, the message contains the password.
      throw new KettleException( "Error while executing sqlldr \'" + createCommandLine( meta, false ) + "\'" );
    }

    return true;
  }

  public boolean processRow( StepMetaInterface smi, StepDataInterface sdi ) throws KettleException {
    meta = (OraBulkLoaderMeta) smi;
    data = (OraBulkLoaderData) sdi;

    try {
      Object[] r = getRow(); // Get row from input rowset & set row busy!
      if ( r == null ) {
        // no more input to be expected...

        setOutputDone();

        if ( !preview ) {
          if ( output != null ) {
            // Close the output
            try {
              output.close();
            } catch ( IOException e ) {
              throw new KettleException( "Error while closing output", e );
            }

            output = null;
          }

          String loadMethod = meta.getLoadMethod();
          if ( OraBulkLoaderMeta.METHOD_AUTO_END.equals( loadMethod ) ) {
            // if this is the first line, we do not need to execute loader
            // control file may not exists
            if ( !first ) {
              execute( meta, true );
              sqlldrProcess = null;
            }
          } else if ( OraBulkLoaderMeta.METHOD_AUTO_CONCURRENT.equals( meta.getLoadMethod() ) ) {
            try {
              if ( sqlldrProcess != null ) {
                int exitVal = sqlldrProcess.waitFor();
                sqlldrProcess = null;
                logBasic( BaseMessages.getString( PKG, "OraBulkLoader.Log.ExitValueSqlldr", "" + exitVal ) );
                checkExitVal( exitVal );
              } else if ( !first ) {
                throw new KettleException( "Internal error: no sqlldr process running" );
              }
            } catch ( Exception ex ) {
              throw new KettleException( "Error while executing sqlldr", ex );
            }
          }
        }
        return false;
      }

      if ( !preview ) {
        if ( first ) {
          first = false;

          String recTerm = Const.CR;
          if ( !Utils.isEmpty( meta.getAltRecordTerm() ) ) {
            recTerm = substituteRecordTerminator( meta.getAltRecordTerm() );
          }

          createControlFile( environmentSubstitute( meta.getControlFile() ), r, meta );
          output = new OraBulkDataOutput( meta, recTerm );

          if ( OraBulkLoaderMeta.METHOD_AUTO_CONCURRENT.equals( meta.getLoadMethod() ) ) {
            execute( meta, false );
          }
          output.open( this, sqlldrProcess );
        }
        output.writeLine( getInputRowMeta(), r );
      }
      putRow( getInputRowMeta(), r );
      incrementLinesOutput();

    } catch ( KettleException e ) {
      logError( BaseMessages.getString( PKG, "OraBulkLoader.Log.ErrorInStep" ) + e.getMessage() );
      setErrors( 1 );
      stopAll();
      setOutputDone(); // signal end to receiver(s)
      return false;
    }

    return true;
  }

  public boolean init( StepMetaInterface smi, StepDataInterface sdi ) {
    meta = (OraBulkLoaderMeta) smi;
    data = (OraBulkLoaderData) sdi;

    Trans trans = getTrans();
    preview = trans.isPreview();

    return super.init( smi, sdi );
  }

  public void dispose( StepMetaInterface smi, StepDataInterface sdi ) {
    meta = (OraBulkLoaderMeta) smi;
    data = (OraBulkLoaderData) sdi;

    super.dispose( smi, sdi );

    // close output stream (may terminate running sqlldr)
    if ( output != null ) {
      // Close the output
      try {
        output.close();
      } catch ( IOException e ) {
        logError( "Error while closing output", e );
      }

      output = null;
    }
    // running sqlldr process must be terminated
    if ( sqlldrProcess != null ) {
      try {
        int exitVal = sqlldrProcess.waitFor();
        sqlldrProcess = null;
        logBasic( BaseMessages.getString( PKG, "OraBulkLoader.Log.ExitValueSqlldr", "" + exitVal ) );
      } catch ( InterruptedException e ) {
        /* process should be destroyed */
        e.printStackTrace();
        if ( sqlldrProcess != null ) {
          sqlldrProcess.destroy();
        }
      }
    }

    if ( !preview && meta.isEraseFiles() ) {
      // Erase the created cfg/dat files if requested. We don't erase
      // the rest of the files because it would be "stupid" to erase them
      // right after creation. If you don't want them, don't fill them in.
      FileObject fileObject = null;

      String method = meta.getLoadMethod();
      // OraBulkLoaderMeta.METHOD_AUTO_CONCURRENT.equals(method) ||
      if ( OraBulkLoaderMeta.METHOD_AUTO_END.equals( method ) ) {
        if ( meta.getControlFile() != null ) {
          try {
            fileObject = getFileObject( meta.getControlFile(), getTransMeta() );
            fileObject.delete();
            fileObject.close();
          } catch ( Exception ex ) {
            logError( "Error deleting control file \'"
              + getFilename( fileObject ) + "\': " + ex.getMessage(), ex );
          }
        }
      }

      if ( OraBulkLoaderMeta.METHOD_AUTO_END.equals( method ) ) {
        // In concurrent mode the data is written to the control file.
        if ( meta.getDataFile() != null ) {
          try {
            fileObject = getFileObject( meta.getDataFile(), getTransMeta() );
            fileObject.delete();
            fileObject.close();
          } catch ( Exception ex ) {
            logError( "Error deleting data file \'"
              + getFilename( fileObject ) + "\': " + ex.getMessage(), ex );
          }
        }
      }

      if ( OraBulkLoaderMeta.METHOD_MANUAL.equals( method ) ) {
        logBasic( "Deletion of files is not compatible with \'manual load method\'" );
      }
    }
  }
  @VisibleForTesting
  String getFilename( FileObject fileObject ) {
    return KettleVFS.getFilename( fileObject );
  }

  @VisibleForTesting
  FileObject getFileObject( String vfsFilename, VariableSpace space ) throws KettleFileException {
    return KettleVFS.getFileObject( environmentSubstitute( vfsFilename ), space );
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy