org.pentaho.di.trans.steps.terafast.TeraFast Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of kettle-engine Show documentation
Show all versions of kettle-engine Show documentation
Container pom for Pentaho Data Integration modules
The newest version!
/*! ******************************************************************************
*
* Pentaho Data Integration
*
* Copyright (C) 2002-2019 by Hitachi Vantara : http://www.pentaho.com
*
*******************************************************************************
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
******************************************************************************/
package org.pentaho.di.trans.steps.terafast;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.PrintStream;
import java.math.BigDecimal;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.vfs2.FileObject;
import org.pentaho.di.core.Const;
import org.pentaho.di.core.database.Database;
import org.pentaho.di.core.exception.KettleException;
import org.pentaho.di.core.logging.LogLevel;
import org.pentaho.di.core.row.RowMetaInterface;
import org.pentaho.di.core.row.ValueMetaInterface;
import org.pentaho.di.core.util.AbstractStep;
import org.pentaho.di.core.util.ConfigurableStreamLogger;
import org.pentaho.di.core.vfs.KettleVFS;
import org.pentaho.di.i18n.BaseMessages;
import org.pentaho.di.trans.Trans;
import org.pentaho.di.trans.TransMeta;
import org.pentaho.di.trans.step.StepDataInterface;
import org.pentaho.di.trans.step.StepInterface;
import org.pentaho.di.trans.step.StepMeta;
import org.pentaho.di.trans.step.StepMetaInterface;
/**
* @author Michael Gugerell(asc145)
*
*/
public class TeraFast extends AbstractStep implements StepInterface {
private static Class> PKG = TeraFastMeta.class; // for i18n purposes, needed by Translator2!!
private TeraFastMeta meta;
private Process process;
private OutputStream fastload;
private OutputStream dataFile;
private PrintStream dataFilePrintStream;
private List columnSortOrder;
private RowMetaInterface tableRowMeta;
private SimpleDateFormat simpleDateFormat;
/**
* Constructor.
*
* @param stepMeta
* the stepMeta.
* @param stepDataInterface
* the stepDataInterface.
* @param copyNr
* the copyNr.
* @param transMeta
* the transMeta.
* @param trans
* the trans.
*/
public TeraFast( final StepMeta stepMeta, final StepDataInterface stepDataInterface, final int copyNr,
final TransMeta transMeta, final Trans trans ) {
super( stepMeta, stepDataInterface, copyNr, transMeta, trans );
}
/**
* Create the command line for a fastload process depending on the meta information supplied.
*
* @return The string to execute.
*
* @throws KettleException
* Upon any exception
*/
public String createCommandLine() throws KettleException {
if ( StringUtils.isBlank( this.meta.getFastloadPath().getValue() ) ) {
throw new KettleException( "Fastload path not set" );
}
final StringBuilder builder = new StringBuilder();
try {
final FileObject fileObject =
KettleVFS.getFileObject( environmentSubstitute( this.meta.getFastloadPath().getValue() ) );
final String fastloadExec = KettleVFS.getFilename( fileObject );
builder.append( fastloadExec );
} catch ( Exception e ) {
throw new KettleException( "Error retrieving fastload application string", e );
}
// Add log error log, if set.
if ( StringUtils.isNotBlank( this.meta.getLogFile().getValue() ) ) {
try {
FileObject fileObject =
KettleVFS.getFileObject( environmentSubstitute( this.meta.getLogFile().getValue() ) );
builder.append( " -e " );
builder.append( "\"" + KettleVFS.getFilename( fileObject ) + "\"" );
} catch ( Exception e ) {
throw new KettleException( "Error retrieving logfile string", e );
}
}
return builder.toString();
}
protected void verifyDatabaseConnection() throws KettleException {
// Confirming Database Connection is defined.
if ( this.meta.getDbMeta() == null ) {
throw new KettleException( BaseMessages.getString( PKG, "TeraFastDialog.GetSQL.NoConnectionDefined" ) );
}
}
/**
* {@inheritDoc}
*
* @see org.pentaho.di.trans.step.BaseStep#init(org.pentaho.di.trans.step.StepMetaInterface,
* org.pentaho.di.trans.step.StepDataInterface)
*/
@Override
public boolean init( final StepMetaInterface smi, final StepDataInterface sdi ) {
this.meta = (TeraFastMeta) smi;
// this.data = (GenericStepData) sdi;
simpleDateFormat = new SimpleDateFormat( FastloadControlBuilder.DEFAULT_DATE_FORMAT );
if ( super.init( smi, sdi ) ) {
try {
verifyDatabaseConnection();
} catch ( KettleException ex ) {
logError( ex.getMessage() );
return false;
}
return true;
}
return false;
}
/**
* {@inheritDoc}
*
* @see org.pentaho.di.trans.step.BaseStep#processRow(org.pentaho.di.trans.step.StepMetaInterface,
* org.pentaho.di.trans.step.StepDataInterface)
*/
@Override
public boolean processRow( final StepMetaInterface smi, final StepDataInterface sdi ) throws KettleException {
this.meta = (TeraFastMeta) smi;
// this.data = (GenericStepData) sdi;
Object[] row = getRow();
if ( row == null ) {
/* In case we have no data, we need to ensure that the printstream was ever initialized. It will if there is
* data. So we check for a null printstream, then we close the dataFile and execute only if it existed.
*/
if ( this.dataFilePrintStream != null ) {
this.dataFilePrintStream.close();
IOUtils.closeQuietly( this.dataFile );
this.execute();
}
setOutputDone();
try {
logBasic( BaseMessages.getString( PKG, "TeraFast.Log.WatingForFastload" ) );
if ( this.process != null ) {
final int exitVal = this.process.waitFor();
if ( exitVal != 0 ) {
setErrors( DEFAULT_ERROR_CODE );
}
logBasic( BaseMessages.getString( PKG, "TeraFast.Log.ExitValueFastloadPath", "" + exitVal ) );
}
} catch ( Exception e ) {
logError( BaseMessages.getString( PKG, "TeraFast.Log.ErrorInStep" ), e );
this.setDefaultError();
stopAll();
}
return false;
}
if ( this.first ) {
this.first = false;
try {
final File tempDataFile = new File( resolveFileName( this.meta.getDataFile().getValue() ) );
this.dataFile = FileUtils.openOutputStream( tempDataFile );
this.dataFilePrintStream = new PrintStream( dataFile );
} catch ( IOException e ) {
throw new KettleException( "Cannot open data file [path=" + this.dataFile + "]", e );
}
// determine column sort order according to field mapping
// thus the columns in the generated datafile are always in the same order and have the same size as in the
// targetTable
this.tableRowMeta = this.meta.getRequiredFields( this.getTransMeta() );
RowMetaInterface streamRowMeta = this.getTransMeta().getPrevStepFields( this.getStepMeta() );
this.columnSortOrder = new ArrayList( this.tableRowMeta.size() );
for ( int i = 0; i < this.tableRowMeta.size(); i++ ) {
ValueMetaInterface column = this.tableRowMeta.getValueMeta( i );
int tableIndex = this.meta.getTableFieldList().getValue().indexOf( column.getName() );
if ( tableIndex >= 0 ) {
String streamField = this.meta.getStreamFieldList().getValue().get( tableIndex );
this.columnSortOrder.add( streamRowMeta.indexOfValue( streamField ) );
}
}
}
writeToDataFile( getInputRowMeta(), row );
return true;
}
/**
* Write a single row to the temporary data file.
*
* @param rowMetaInterface
* describe the row of data
*
* @param row
* row entries
* @throws KettleException
* ...
*/
@SuppressWarnings( "ArrayToString" )
public void writeToDataFile( RowMetaInterface rowMetaInterface, Object[] row ) throws KettleException {
// Write the data to the output
ValueMetaInterface valueMeta = null;
for ( int i = 0; i < row.length; i++ ) {
if ( row[i] == null ) {
break; // no more rows
}
valueMeta = rowMetaInterface.getValueMeta( i );
if ( row[i] != null ) {
switch ( valueMeta.getType() ) {
case ValueMetaInterface.TYPE_STRING:
String s = rowMetaInterface.getString( row, i );
dataFilePrintStream.print( pad( valueMeta, s.toString() ) );
break;
case ValueMetaInterface.TYPE_INTEGER:
Long l = rowMetaInterface.getInteger( row, i );
dataFilePrintStream.print( pad( valueMeta, l.toString() ) );
break;
case ValueMetaInterface.TYPE_NUMBER:
Double d = rowMetaInterface.getNumber( row, i );
dataFilePrintStream.print( pad( valueMeta, d.toString() ) );
break;
case ValueMetaInterface.TYPE_BIGNUMBER:
BigDecimal bd = rowMetaInterface.getBigNumber( row, i );
dataFilePrintStream.print( pad( valueMeta, bd.toString() ) );
break;
case ValueMetaInterface.TYPE_DATE:
Date dt = rowMetaInterface.getDate( row, i );
dataFilePrintStream.print( simpleDateFormat.format( dt ) );
break;
case ValueMetaInterface.TYPE_BOOLEAN:
Boolean b = rowMetaInterface.getBoolean( row, i );
if ( b.booleanValue() ) {
dataFilePrintStream.print( "Y" );
} else {
dataFilePrintStream.print( "N" );
}
break;
case ValueMetaInterface.TYPE_BINARY:
byte[] byt = rowMetaInterface.getBinary( row, i );
// REVIEW - this does an implicit byt.toString, which can't be what was intended.
dataFilePrintStream.print( byt );
break;
default:
throw new KettleException( BaseMessages.getString(
PKG, "TeraFast.Exception.TypeNotSupported", valueMeta.getType() ) );
}
}
dataFilePrintStream.print( FastloadControlBuilder.DATAFILE_COLUMN_SEPERATOR );
}
dataFilePrintStream.print( Const.CR );
}
private String pad( ValueMetaInterface valueMetaInterface, String data ) {
StringBuilder padding = new StringBuilder( data );
int padLength = valueMetaInterface.getLength() - data.length();
int currentPadLength = 0;
while ( currentPadLength < padLength ) {
padding.append( " " );
currentPadLength++;
}
return padding.toString();
}
/**
* Execute fastload.
*
* @throws KettleException
* ...
*/
public void execute() throws KettleException {
if ( this.meta.getTruncateTable().getValue() ) {
Database db = new Database( this, this.meta.getDbMeta() );
db.connect();
db.truncateTable( this.meta.getTargetTable().getValue() );
db.commit();
db.disconnect();
}
startFastLoad();
if ( this.meta.getUseControlFile().getValue() ) {
this.invokeLoadingControlFile();
} else {
this.invokeLoadingCommand();
}
}
/**
* Start fastload command line tool and initialize streams.
*
* @throws KettleException
* ...
*/
private void startFastLoad() throws KettleException {
final String command = this.createCommandLine();
this.logBasic( "About to execute: " + command );
try {
this.process = Runtime.getRuntime().exec( command );
new Thread( new ConfigurableStreamLogger(
getLogChannel(), this.process.getErrorStream(), LogLevel.ERROR, "ERROR" ) ).start();
new Thread( new ConfigurableStreamLogger(
getLogChannel(), this.process.getInputStream(), LogLevel.DETAILED, "OUTPUT" ) ).start();
this.fastload = this.process.getOutputStream();
} catch ( Exception e ) {
throw new KettleException( "Error while setup: " + command, e );
}
}
/**
* Invoke loading with control file.
*
* @throws KettleException
* ...
*/
private void invokeLoadingControlFile() throws KettleException {
File controlFile = null;
final InputStream control;
final String controlContent;
try {
controlFile = new File( resolveFileName( this.meta.getControlFile().getValue() ) );
control = FileUtils.openInputStream( controlFile );
controlContent = environmentSubstitute( FileUtils.readFileToString( controlFile ) );
} catch ( IOException e ) {
throw new KettleException( "Cannot open control file [path=" + controlFile + "]", e );
}
try {
IOUtils.write( controlContent, this.fastload );
this.fastload.flush();
} catch ( IOException e ) {
throw new KettleException( "Cannot pipe content of control file to fastload [path=" + controlFile + "]", e );
} finally {
IOUtils.closeQuietly( control );
IOUtils.closeQuietly( this.fastload );
}
}
/**
* Invoke loading with loading commands.
*
* @throws KettleException
* ...
*/
private void invokeLoadingCommand() throws KettleException {
final FastloadControlBuilder builder = new FastloadControlBuilder();
builder.setSessions( this.meta.getSessions().getValue() );
builder.setErrorLimit( this.meta.getErrorLimit().getValue() );
builder.logon( this.meta.getDbMeta().getHostname(), this.meta.getDbMeta().getUsername(), this.meta
.getDbMeta().getPassword() );
builder.setRecordFormat( FastloadControlBuilder.RECORD_VARTEXT );
try {
builder.define(
this.meta.getRequiredFields( this.getTransMeta() ), meta.getTableFieldList(), resolveFileName( this.meta
.getDataFile().getValue() ) );
} catch ( Exception ex ) {
throw new KettleException( "Error defining data file!", ex );
}
builder.show();
builder.beginLoading( this.meta.getDbMeta().getPreferredSchemaName(), this.meta.getTargetTable().getValue() );
builder.insert( this.meta.getRequiredFields( this.getTransMeta() ), meta.getTableFieldList(), this.meta
.getTargetTable().getValue() );
builder.endLoading();
builder.logoff();
final String control = builder.toString();
try {
logDetailed( "Control file: " + control );
IOUtils.write( control, this.fastload );
// this.fastload.flush();
} catch ( IOException e ) {
throw new KettleException( "Error while execution control command [controlCommand=" + control + "]", e );
} finally {
IOUtils.closeQuietly( this.fastload );
}
}
/**
* {@inheritDoc}
*
* @see org.pentaho.di.trans.step.BaseStep#dispose(org.pentaho.di.trans.step.StepMetaInterface,
* org.pentaho.di.trans.step.StepDataInterface)
*/
@Override
public void dispose( final StepMetaInterface smi, final StepDataInterface sdi ) {
this.meta = (TeraFastMeta) smi;
// this.data = (GenericStepData) sdi;
try {
if ( this.fastload != null ) {
IOUtils.write( new FastloadControlBuilder().endLoading().toString(), this.fastload );
}
} catch ( IOException e ) {
logError( "Unexpected error encountered while issuing END LOADING", e );
}
IOUtils.closeQuietly( this.dataFile );
IOUtils.closeQuietly( this.fastload );
try {
if ( this.process != null ) {
int exitValue = this.process.waitFor();
logDetailed( "Exit value for the fastload process was : " + exitValue );
if ( exitValue != 0 ) {
logError( "Exit value for the fastload process was : " + exitValue );
setErrors( DEFAULT_ERROR_CODE );
}
}
} catch ( InterruptedException e ) {
setErrors( DEFAULT_ERROR_CODE );
logError( "Unexpected error encountered while finishing the fastload process", e );
}
super.dispose( smi, sdi );
}
/**
* @param fileName
* the filename to resolve. may contain Kettle Environment variables.
* @return the data file name.
* @throws IOException
* ...
*/
private String resolveFileName( final String fileName ) throws KettleException {
final FileObject fileObject = KettleVFS.getFileObject( environmentSubstitute( fileName ) );
return KettleVFS.getFilename( fileObject );
}
}