org.pentaho.di.trans.steps.orabulkloader.OraBulkLoader Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of kettle-engine Show documentation
Show all versions of kettle-engine Show documentation
Container pom for Pentaho Data Integration modules
The newest version!
/*! ******************************************************************************
*
* Pentaho Data Integration
*
* Copyright (C) 2002-2019 by Hitachi Vantara : http://www.pentaho.com
*
*******************************************************************************
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
******************************************************************************/
package org.pentaho.di.trans.steps.orabulkloader;
//
// The "designer" notes of the Oracle bulkloader:
// ----------------------------------------------
//
// - "Enclosed" is used in the loader instead of "optionally enclosed" as optionally
// encloses kind of destroys the escaping.
// - A Boolean is output as Y and N (as in the text output step e.g.). If people don't
// like this they can first convert the boolean value to something else before loading
// it.
// - Filters (besides data and datetime) are not supported as it slows down.
//
//
import com.google.common.annotations.VisibleForTesting;
import org.apache.commons.vfs2.FileObject;
import org.pentaho.di.core.Const;
import org.pentaho.di.core.util.Utils;
import org.pentaho.di.core.database.DatabaseMeta;
import org.pentaho.di.core.encryption.Encr;
import org.pentaho.di.core.exception.KettleException;
import org.pentaho.di.core.exception.KettleFileException;
import org.pentaho.di.core.row.RowMetaInterface;
import org.pentaho.di.core.row.ValueMetaInterface;
import org.pentaho.di.core.variables.VariableSpace;
import org.pentaho.di.core.vfs.KettleVFS;
import org.pentaho.di.i18n.BaseMessages;
import org.pentaho.di.trans.Trans;
import org.pentaho.di.trans.TransMeta;
import org.pentaho.di.trans.step.BaseStep;
import org.pentaho.di.trans.step.StepDataInterface;
import org.pentaho.di.trans.step.StepInterface;
import org.pentaho.di.trans.step.StepMeta;
import org.pentaho.di.trans.step.StepMetaInterface;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
/**
* Performs a bulk load to an oracle table.
*
* @author Sven Boden
* @since 20-feb-2007
*/
public class OraBulkLoader extends BaseStep implements StepInterface {
private static Class> PKG = OraBulkLoaderMeta.class; // for i18n purposes, needed by Translator2!!
public static final int EX_SUCC = 0;
public static final int EX_WARN = 2;
Process sqlldrProcess = null;
private OraBulkLoaderMeta meta;
protected OraBulkLoaderData data;
private OraBulkDataOutput output = null;
/*
* Local copy of the transformation "preview" property. We only forward the rows upon previewing, we don't do any of
* the real stuff.
*/
private boolean preview = false;
//
// This class continually reads from the stream, and sends it to the log
// if the logging level is at least basic level.
//
private final class StreamLogger extends Thread {
private InputStream input;
private String type;
StreamLogger( InputStream is, String type ) {
this.input = is;
this.type = type + ">";
}
public void run() {
try {
final BufferedReader br = new BufferedReader( new InputStreamReader( input ) );
String line;
while ( ( line = br.readLine() ) != null ) {
// Only perform the concatenation if at basic level. Otherwise,
// this just reads from the stream.
if ( log.isBasic() ) {
logBasic( type + line );
}
}
} catch ( IOException ioe ) {
ioe.printStackTrace();
}
}
}
public OraBulkLoader( StepMeta stepMeta, StepDataInterface stepDataInterface, int copyNr, TransMeta transMeta,
Trans trans ) {
super( stepMeta, stepDataInterface, copyNr, transMeta, trans );
}
private String substituteRecordTerminator( String terminator ) {
final StringBuilder in = new StringBuilder();
int length;
boolean escaped = false;
terminator = environmentSubstitute( terminator );
length = terminator.length();
for ( int i = 0; i < length; i++ ) {
final char c = terminator.charAt( i );
if ( escaped ) {
switch ( c ) {
case 'n':
in.append( '\n' );
break;
case 'r':
in.append( '\r' );
break;
default:
in.append( c );
break;
}
escaped = false;
} else if ( c == '\\' ) {
escaped = true;
} else {
in.append( c );
}
}
return in.toString();
}
private String encodeRecordTerminator( String terminator, String encoding ) throws KettleException {
final String in = substituteRecordTerminator( terminator );
final StringBuilder out = new StringBuilder();
byte[] bytes;
try {
// use terminator in hex representation due to character set
// terminator in hex representation must be in character set
// of data file
if ( Utils.isEmpty( encoding ) ) {
bytes = in.getBytes();
} else {
bytes = in.getBytes( encoding );
}
for ( byte aByte : bytes ) {
final String hex = Integer.toHexString( aByte );
if ( hex.length() == 1 ) {
out.append( '0' );
}
out.append( hex );
}
} catch ( UnsupportedEncodingException e ) {
throw new KettleException( "Unsupported character encoding: " + encoding, e );
}
return out.toString();
}
/**
* Get the contents of the control file as specified in the meta object
*
* @param meta
* the meta object to model the control file after
*
* @return a string containing the control file contents
*/
public String getControlFileContents( OraBulkLoaderMeta meta, RowMetaInterface rm, Object[] r ) throws KettleException {
DatabaseMeta dm = meta.getDatabaseMeta();
String inputName = "'" + getFilename( getFileObject( meta.getDataFile(), getTransMeta() ) ) + "'";
String loadAction = meta.getLoadAction();
StringBuilder contents = new StringBuilder( 500 );
contents.append( "OPTIONS(" ).append( Const.CR );
contents.append( " ERRORS=\'" ).append( meta.getMaxErrors() ).append( "\'" ).append( Const.CR );
if ( meta.getCommitSizeAsInt( this ) != 0 && !( meta.isDirectPath() && getStepMeta().getCopies() > 1 ) ) {
// For the second part of the above expressions: ROWS is not supported
// in parallel mode (by sqlldr).
contents.append( " , ROWS=\'" ).append( meta.getCommitSize() ).append( "\'" ).append( Const.CR );
}
if ( meta.getBindSizeAsInt( this ) != 0 ) {
contents.append( " , BINDSIZE=\'" ).append( meta.getBindSize() ).append( "\'" ).append( Const.CR );
}
if ( meta.getReadSizeAsInt( this ) != 0 ) {
contents.append( " , READSIZE=\'" ).append( meta.getReadSize() ).append( "\'" ).append( Const.CR );
}
contents.append( ")" ).append( Const.CR );
contents.append( "LOAD DATA" ).append( Const.CR );
if ( !Utils.isEmpty( meta.getCharacterSetName() ) ) {
contents.append( "CHARACTERSET " ).append( meta.getCharacterSetName() ).append( Const.CR );
}
if ( !OraBulkLoaderMeta.METHOD_AUTO_CONCURRENT.equals( meta.getLoadMethod() )
|| !Utils.isEmpty( meta.getAltRecordTerm() ) ) {
String infile = inputName;
if ( OraBulkLoaderMeta.METHOD_AUTO_CONCURRENT.equals( meta.getLoadMethod() ) ) {
infile = "''";
}
// For concurrent input, data command line argument must be specified
contents.append( "INFILE " ).append( infile );
if ( !Utils.isEmpty( meta.getAltRecordTerm() ) ) {
contents.append( " \"STR x'" ).append(
encodeRecordTerminator( meta.getAltRecordTerm(), meta.getEncoding() ) ).append( "'\"" );
}
contents.append( Const.CR );
}
contents
.append( "INTO TABLE " ).append(
dm.getQuotedSchemaTableCombination(
environmentSubstitute( meta.getSchemaName() ), environmentSubstitute( meta.getTableName() ) ) )
.append( Const.CR ).append( loadAction ).append( Const.CR ).append(
"FIELDS TERMINATED BY ',' ENCLOSED BY '\"'" ).append( Const.CR ).append( "(" );
String[] streamFields = meta.getFieldStream();
String[] tableFields = meta.getFieldTable();
String[] dateMask = meta.getDateMask();
if ( streamFields == null || streamFields.length == 0 ) {
throw new KettleException( "No fields defined to load to database" );
}
for ( int i = 0; i < streamFields.length; i++ ) {
if ( i != 0 ) {
contents.append( ", " ).append( Const.CR );
}
contents.append( dm.quoteField( tableFields[i] ) );
int pos = rm.indexOfValue( streamFields[i] );
if ( pos < 0 ) {
throw new KettleException( "Could not find field " + streamFields[i] + " in stream" );
}
ValueMetaInterface v = rm.getValueMeta( pos );
switch ( v.getType() ) {
case ValueMetaInterface.TYPE_STRING:
if ( v.getLength() > 255 ) {
contents.append( " CHAR(" ).append( v.getLength() ).append( ")" );
} else {
contents.append( " CHAR" );
}
break;
case ValueMetaInterface.TYPE_INTEGER:
case ValueMetaInterface.TYPE_NUMBER:
case ValueMetaInterface.TYPE_BIGNUMBER:
break;
case ValueMetaInterface.TYPE_DATE:
if ( OraBulkLoaderMeta.DATE_MASK_DATE.equals( dateMask[i] ) ) {
contents.append( " DATE 'yyyy-mm-dd'" );
} else if ( OraBulkLoaderMeta.DATE_MASK_DATETIME.equals( dateMask[i] ) ) {
contents.append( " TIMESTAMP 'yyyy-mm-dd hh24:mi:ss.ff'" );
} else {
// If not specified the default is date.
contents.append( " DATE 'yyyy-mm-dd'" );
}
break;
case ValueMetaInterface.TYPE_BINARY:
contents.append( " ENCLOSED BY '' AND ''" );
break;
case ValueMetaInterface.TYPE_TIMESTAMP:
contents.append( " TIMESTAMP 'yyyy-mm-dd hh24:mi:ss.ff'" );
break;
default:
break;
}
}
contents.append( ")" );
return contents.toString();
}
/**
* Create a control file.
*
* @param filename path to control file
* @param meta step meta
* @throws KettleException
*/
public void createControlFile( String filename, Object[] row, OraBulkLoaderMeta meta ) throws KettleException {
FileWriter fw = null;
try {
File controlFile = new File( getFileObject( filename, getTransMeta() ).getURL().getFile() );
// Need to ensure that the parent directory they set exists for the control file.
controlFile.getParentFile().mkdirs();
controlFile.createNewFile();
fw = new FileWriter( controlFile );
fw.write( getControlFileContents( meta, getInputRowMeta(), row ) );
} catch ( IOException ex ) {
throw new KettleException( ex.getMessage(), ex );
} finally {
try {
if ( fw != null ) {
fw.close();
}
} catch ( Exception ex ) {
// Ignore errors
}
}
}
/**
* Create the command line for an sqlldr process depending on the meta information supplied.
*
* @param meta
* The meta data to create the command line from
* @param password
* Use the real password or not
*
* @return The string to execute.
*
* @throws KettleException
* Upon any exception
*/
public String createCommandLine( OraBulkLoaderMeta meta, boolean password ) throws KettleException {
StringBuilder sb = new StringBuilder( 300 );
if ( meta.getSqlldr() != null ) {
try {
FileObject fileObject =
getFileObject( meta.getSqlldr(), getTransMeta() );
String sqlldr = getFilename( fileObject );
sb.append( sqlldr );
} catch ( KettleFileException ex ) {
throw new KettleException( "Error retrieving sqlldr string", ex );
}
} else {
throw new KettleException( "No sqlldr application specified" );
}
if ( meta.getControlFile() != null ) {
try {
FileObject fileObject =
getFileObject( meta.getControlFile(), getTransMeta() );
sb.append( " control=\'" );
sb.append( getFilename( fileObject ) );
sb.append( "\'" );
} catch ( KettleFileException ex ) {
throw new KettleException( "Error retrieving controlfile string", ex );
}
} else {
throw new KettleException( "No control file specified" );
}
if ( OraBulkLoaderMeta.METHOD_AUTO_CONCURRENT.equals( meta.getLoadMethod() ) ) {
sb.append( " data=\'-\'" );
}
if ( meta.getLogFile() != null ) {
try {
FileObject fileObject =
getFileObject( meta.getLogFile(), getTransMeta() );
sb.append( " log=\'" );
sb.append( getFilename( fileObject ) );
sb.append( "\'" );
} catch ( KettleFileException ex ) {
throw new KettleException( "Error retrieving logfile string", ex );
}
}
if ( meta.getBadFile() != null ) {
try {
FileObject fileObject =
getFileObject( meta.getBadFile(), getTransMeta() );
sb.append( " bad=\'" );
sb.append( getFilename( fileObject ) );
sb.append( "\'" );
} catch ( KettleFileException ex ) {
throw new KettleException( "Error retrieving badfile string", ex );
}
}
if ( meta.getDiscardFile() != null ) {
try {
FileObject fileObject =
getFileObject( meta.getDiscardFile(), getTransMeta() );
sb.append( " discard=\'" );
sb.append( getFilename( fileObject ) );
sb.append( "\'" );
} catch ( KettleFileException ex ) {
throw new KettleException( "Error retrieving discardfile string", ex );
}
}
DatabaseMeta dm = meta.getDatabaseMeta();
if ( dm != null ) {
String user = Const.NVL( dm.getUsername(), "" );
String pass =
Const.NVL( Encr.decryptPasswordOptionallyEncrypted( environmentSubstitute( dm.getPassword() ) ), "" );
if ( !password ) {
pass = "******";
}
String dns = Const.NVL( dm.getDatabaseName(), "" );
sb.append( " userid=" ).append( environmentSubstitute( user ) ).append( "/" ).append(
environmentSubstitute( pass ) ).append( "@" );
String overrideName = meta.getDbNameOverride();
if ( Utils.isEmpty( Const.rtrim( overrideName ) ) ) {
sb.append( environmentSubstitute( dns ) );
} else {
// if the database name override is filled in, do that one.
sb.append( environmentSubstitute( overrideName ) );
}
} else {
throw new KettleException( "No connection specified" );
}
if ( meta.isDirectPath() ) {
sb.append( " DIRECT=TRUE" );
if ( getStepMeta().getCopies() > 1 || meta.isParallel() ) {
sb.append( " PARALLEL=TRUE" );
}
}
return sb.toString();
}
public void checkExitVal( int exitVal ) throws KettleException {
if ( exitVal == EX_SUCC ) {
return;
}
if ( meta.isFailOnWarning() && ( exitVal == EX_WARN ) ) {
throw new KettleException( "sqlldr returned warning" );
} else if ( meta.isFailOnError() && ( exitVal != EX_WARN ) ) {
throw new KettleException( "sqlldr returned an error (exit code " + exitVal + ")" );
}
}
public boolean execute( OraBulkLoaderMeta meta, boolean wait ) throws KettleException {
Runtime rt = Runtime.getRuntime();
try {
sqlldrProcess = rt.exec( createCommandLine( meta, true ) );
// any error message?
StreamLogger errorLogger = new StreamLogger( sqlldrProcess.getErrorStream(), "ERROR" );
// any output?
StreamLogger outputLogger = new StreamLogger( sqlldrProcess.getInputStream(), "OUTPUT" );
// kick them off
errorLogger.start();
outputLogger.start();
if ( wait ) {
// any error???
int exitVal = sqlldrProcess.waitFor();
sqlldrProcess = null;
logBasic( BaseMessages.getString( PKG, "OraBulkLoader.Log.ExitValueSqlldr", "" + exitVal ) );
checkExitVal( exitVal );
}
} catch ( Exception ex ) {
// Don't throw the message upwards, the message contains the password.
throw new KettleException( "Error while executing sqlldr \'" + createCommandLine( meta, false ) + "\'" );
}
return true;
}
public boolean processRow( StepMetaInterface smi, StepDataInterface sdi ) throws KettleException {
meta = (OraBulkLoaderMeta) smi;
data = (OraBulkLoaderData) sdi;
try {
Object[] r = getRow(); // Get row from input rowset & set row busy!
if ( r == null ) {
// no more input to be expected...
setOutputDone();
if ( !preview ) {
if ( output != null ) {
// Close the output
try {
output.close();
} catch ( IOException e ) {
throw new KettleException( "Error while closing output", e );
}
output = null;
}
String loadMethod = meta.getLoadMethod();
if ( OraBulkLoaderMeta.METHOD_AUTO_END.equals( loadMethod ) ) {
// if this is the first line, we do not need to execute loader
// control file may not exists
if ( !first ) {
execute( meta, true );
sqlldrProcess = null;
}
} else if ( OraBulkLoaderMeta.METHOD_AUTO_CONCURRENT.equals( meta.getLoadMethod() ) ) {
try {
if ( sqlldrProcess != null ) {
int exitVal = sqlldrProcess.waitFor();
sqlldrProcess = null;
logBasic( BaseMessages.getString( PKG, "OraBulkLoader.Log.ExitValueSqlldr", "" + exitVal ) );
checkExitVal( exitVal );
} else if ( !first ) {
throw new KettleException( "Internal error: no sqlldr process running" );
}
} catch ( Exception ex ) {
throw new KettleException( "Error while executing sqlldr", ex );
}
}
}
return false;
}
if ( !preview ) {
if ( first ) {
first = false;
String recTerm = Const.CR;
if ( !Utils.isEmpty( meta.getAltRecordTerm() ) ) {
recTerm = substituteRecordTerminator( meta.getAltRecordTerm() );
}
createControlFile( environmentSubstitute( meta.getControlFile() ), r, meta );
output = new OraBulkDataOutput( meta, recTerm );
if ( OraBulkLoaderMeta.METHOD_AUTO_CONCURRENT.equals( meta.getLoadMethod() ) ) {
execute( meta, false );
}
output.open( this, sqlldrProcess );
}
output.writeLine( getInputRowMeta(), r );
}
putRow( getInputRowMeta(), r );
incrementLinesOutput();
} catch ( KettleException e ) {
logError( BaseMessages.getString( PKG, "OraBulkLoader.Log.ErrorInStep" ) + e.getMessage() );
setErrors( 1 );
stopAll();
setOutputDone(); // signal end to receiver(s)
return false;
}
return true;
}
public boolean init( StepMetaInterface smi, StepDataInterface sdi ) {
meta = (OraBulkLoaderMeta) smi;
data = (OraBulkLoaderData) sdi;
Trans trans = getTrans();
preview = trans.isPreview();
return super.init( smi, sdi );
}
public void dispose( StepMetaInterface smi, StepDataInterface sdi ) {
meta = (OraBulkLoaderMeta) smi;
data = (OraBulkLoaderData) sdi;
super.dispose( smi, sdi );
// close output stream (may terminate running sqlldr)
if ( output != null ) {
// Close the output
try {
output.close();
} catch ( IOException e ) {
logError( "Error while closing output", e );
}
output = null;
}
// running sqlldr process must be terminated
if ( sqlldrProcess != null ) {
try {
int exitVal = sqlldrProcess.waitFor();
sqlldrProcess = null;
logBasic( BaseMessages.getString( PKG, "OraBulkLoader.Log.ExitValueSqlldr", "" + exitVal ) );
} catch ( InterruptedException e ) {
/* process should be destroyed */
e.printStackTrace();
if ( sqlldrProcess != null ) {
sqlldrProcess.destroy();
}
}
}
if ( !preview && meta.isEraseFiles() ) {
// Erase the created cfg/dat files if requested. We don't erase
// the rest of the files because it would be "stupid" to erase them
// right after creation. If you don't want them, don't fill them in.
FileObject fileObject = null;
String method = meta.getLoadMethod();
// OraBulkLoaderMeta.METHOD_AUTO_CONCURRENT.equals(method) ||
if ( OraBulkLoaderMeta.METHOD_AUTO_END.equals( method ) ) {
if ( meta.getControlFile() != null ) {
try {
fileObject = getFileObject( meta.getControlFile(), getTransMeta() );
fileObject.delete();
fileObject.close();
} catch ( Exception ex ) {
logError( "Error deleting control file \'"
+ getFilename( fileObject ) + "\': " + ex.getMessage(), ex );
}
}
}
if ( OraBulkLoaderMeta.METHOD_AUTO_END.equals( method ) ) {
// In concurrent mode the data is written to the control file.
if ( meta.getDataFile() != null ) {
try {
fileObject = getFileObject( meta.getDataFile(), getTransMeta() );
fileObject.delete();
fileObject.close();
} catch ( Exception ex ) {
logError( "Error deleting data file \'"
+ getFilename( fileObject ) + "\': " + ex.getMessage(), ex );
}
}
}
if ( OraBulkLoaderMeta.METHOD_MANUAL.equals( method ) ) {
logBasic( "Deletion of files is not compatible with \'manual load method\'" );
}
}
}
@VisibleForTesting
String getFilename( FileObject fileObject ) {
return KettleVFS.getFilename( fileObject );
}
@VisibleForTesting
FileObject getFileObject( String vfsFilename, VariableSpace space ) throws KettleFileException {
return KettleVFS.getFileObject( environmentSubstitute( vfsFilename ), space );
}
}