org.pentaho.di.trans.steps.databaselookup.DatabaseLookup Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of kettle-engine Show documentation
Show all versions of kettle-engine Show documentation
Container pom for Pentaho Data Integration modules
The newest version!
/*! ******************************************************************************
*
* Pentaho Data Integration
*
* Copyright (C) 2002-2019 by Hitachi Vantara : http://www.pentaho.com
*
*******************************************************************************
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
******************************************************************************/
package org.pentaho.di.trans.steps.databaselookup;
import java.util.Arrays;
import java.util.List;
import org.pentaho.di.core.Const;
import org.pentaho.di.core.util.Utils;
import org.pentaho.di.core.database.Database;
import org.pentaho.di.core.database.DatabaseMeta;
import org.pentaho.di.core.exception.KettleDatabaseException;
import org.pentaho.di.core.exception.KettleException;
import org.pentaho.di.core.exception.KettleStepException;
import org.pentaho.di.core.row.RowDataUtil;
import org.pentaho.di.core.row.RowMeta;
import org.pentaho.di.core.row.RowMetaInterface;
import org.pentaho.di.core.row.ValueMetaInterface;
import org.pentaho.di.core.row.value.ValueMetaFactory;
import org.pentaho.di.core.row.value.ValueMetaString;
import org.pentaho.di.i18n.BaseMessages;
import org.pentaho.di.trans.Trans;
import org.pentaho.di.trans.TransMeta;
import org.pentaho.di.trans.step.BaseStep;
import org.pentaho.di.trans.step.StepDataInterface;
import org.pentaho.di.trans.step.StepInterface;
import org.pentaho.di.trans.step.StepMeta;
import org.pentaho.di.trans.step.StepMetaInterface;
import org.pentaho.di.trans.steps.databaselookup.readallcache.ReadAllCache;
import com.google.common.annotations.VisibleForTesting;
/**
* Looks up values in a database using keys from input streams.
*
* @author Matt
* @since 26-apr-2003
*/
public class DatabaseLookup extends BaseStep implements StepInterface {
private static Class> PKG = DatabaseLookupMeta.class; // for i18n purposes, needed by Translator2!!
private DatabaseLookupMeta meta;
private DatabaseLookupData data;
public DatabaseLookup( StepMeta stepMeta, StepDataInterface stepDataInterface, int copyNr, TransMeta transMeta,
Trans trans ) {
super( stepMeta, stepDataInterface, copyNr, transMeta, trans );
}
/**
* Performs the lookup based on the meta-data and the input row.
*
* @param row The row to use as lookup data and the row to add the returned lookup fields to
* @return the resulting row after the lookup values where added
* @throws KettleException In case something goes wrong.
*/
@VisibleForTesting
synchronized Object[] lookupValues( RowMetaInterface inputRowMeta, Object[] row ) throws KettleException {
Object[] outputRow = RowDataUtil.resizeArray( row, data.outputRowMeta.size() );
Object[] lookupRow = new Object[ data.lookupMeta.size() ];
int lookupIndex = 0;
for ( int i = 0; i < meta.getStreamKeyField1().length; i++ ) {
if ( data.keynrs[ i ] >= 0 ) {
ValueMetaInterface input = inputRowMeta.getValueMeta( data.keynrs[ i ] );
ValueMetaInterface value = data.lookupMeta.getValueMeta( lookupIndex );
lookupRow[ lookupIndex ] = row[ data.keynrs[ i ] ];
// Try to convert type if needed
if ( input.getType() != value.getType()
|| ValueMetaInterface.STORAGE_TYPE_BINARY_STRING == input.getStorageType() ) {
lookupRow[ lookupIndex ] = value.convertData( input, lookupRow[ lookupIndex ] );
value.setStorageType( ValueMetaInterface.STORAGE_TYPE_NORMAL );
}
lookupIndex++;
}
if ( data.keynrs2[ i ] >= 0 ) {
ValueMetaInterface input = inputRowMeta.getValueMeta( data.keynrs2[ i ] );
ValueMetaInterface value = data.lookupMeta.getValueMeta( lookupIndex );
lookupRow[ lookupIndex ] = row[ data.keynrs2[ i ] ];
// Try to convert type if needed
if ( input.getType() != value.getType()
|| ValueMetaInterface.STORAGE_TYPE_BINARY_STRING == input.getStorageType() ) {
lookupRow[ lookupIndex ] = value.convertData( input, lookupRow[ lookupIndex ] );
value.setStorageType( ValueMetaInterface.STORAGE_TYPE_NORMAL );
}
lookupIndex++;
}
}
Object[] add;
boolean cache_now = false;
boolean cacheHit = false;
// First, check if we looked up before
if ( meta.isCached() ) {
add = data.cache.getRowFromCache( data.lookupMeta, lookupRow );
if ( add != null ) {
cacheHit = true;
}
} else {
add = null;
}
if ( add == null ) {
if ( !( meta.isCached() && meta.isLoadingAllDataInCache() ) || data.hasDBCondition ) { // do not go to the
// database when all rows
// are in (exception LIKE
// operator)
if ( log.isRowLevel() ) {
logRowlevel( BaseMessages.getString( PKG, "DatabaseLookup.Log.AddedValuesToLookupRow1" )
+ meta.getStreamKeyField1().length
+ BaseMessages.getString( PKG, "DatabaseLookup.Log.AddedValuesToLookupRow2" )
+ data.lookupMeta.getString( lookupRow ) );
}
data.db.setValuesLookup( data.lookupMeta, lookupRow );
add = data.db.getLookup( meta.isFailingOnMultipleResults() );
cache_now = true;
}
}
if ( add == null ) { // nothing was found, unknown code: add default values
if ( meta.isEatingRowOnLookupFailure() ) {
return null;
}
if ( getStepMeta().isDoingErrorHandling() ) {
putError( getInputRowMeta(), row, 1L, "No lookup found", null, "DBL001" );
// return false else we would still be processed.
return null;
}
if ( log.isRowLevel() ) {
logRowlevel( BaseMessages.getString( PKG, "DatabaseLookup.Log.NoResultsFoundAfterLookup" ) );
}
add = new Object[ data.returnMeta.size() ];
for ( int i = 0; i < meta.getReturnValueField().length; i++ ) {
if ( data.nullif[ i ] != null ) {
add[ i ] = data.nullif[ i ];
} else {
add[ i ] = null;
}
}
} else {
if ( log.isRowLevel() ) {
logRowlevel( BaseMessages.getString( PKG, "DatabaseLookup.Log.FoundResultsAfterLookup" )
+ Arrays.toString( add ) );
}
// Only verify the data types if the data comes from the DB, NOT when we have a cache hit
// In that case, we already know the data type is OK.
if ( !cacheHit ) {
incrementLinesInput();
int[] types = meta.getReturnValueDefaultType();
// The assumption here is that the types are in the same order
// as the returned lookup row, but since we make the lookup row
// that should not be a problem.
//
for ( int i = 0; i < types.length; i++ ) {
ValueMetaInterface returned = data.db.getReturnRowMeta().getValueMeta( i );
ValueMetaInterface expected = data.returnMeta.getValueMeta( i );
if ( returned != null && types[ i ] > 0 && types[ i ] != returned.getType() ) {
// Set the type to the default return type
add[ i ] = expected.convertData( returned, add[ i ] );
}
}
}
}
// Store in cache if we need to!
// If we already loaded all data into the cache, storing more makes no sense.
//
if ( meta.isCached() && cache_now && !meta.isLoadingAllDataInCache() && data.allEquals ) {
data.cache.storeRowInCache( meta, data.lookupMeta, lookupRow, add );
}
for ( int i = 0; i < data.returnMeta.size(); i++ ) {
outputRow[ inputRowMeta.size() + i ] = add[ i ];
}
return outputRow;
}
// visible for testing purposes
void determineFieldsTypesQueryingDb() throws KettleException {
final String[] keyFields = meta.getTableKeyField();
data.keytypes = new int[ keyFields.length ];
String schemaTable =
meta.getDatabaseMeta().getQuotedSchemaTableCombination(
environmentSubstitute( meta.getSchemaName() ), environmentSubstitute( meta.getTablename() ) );
RowMetaInterface fields = data.db.getTableFields( schemaTable );
if ( fields != null ) {
// Fill in the types...
for ( int i = 0; i < keyFields.length; i++ ) {
ValueMetaInterface key = fields.searchValueMeta( keyFields[ i ] );
if ( key != null ) {
data.keytypes[ i ] = key.getType();
} else {
throw new KettleStepException( BaseMessages.getString(
PKG, "DatabaseLookup.ERROR0001.FieldRequired5.Exception" )
+ keyFields[ i ]
+ BaseMessages.getString( PKG, "DatabaseLookup.ERROR0001.FieldRequired6.Exception" ) );
}
}
final String[] returnFields = meta.getReturnValueField();
final int returnFieldsOffset = getInputRowMeta().size();
for ( int i = 0; i < returnFields.length; i++ ) {
ValueMetaInterface returnValueMeta = fields.searchValueMeta( returnFields[ i ] );
if ( returnValueMeta != null ) {
ValueMetaInterface v = data.outputRowMeta.getValueMeta( returnFieldsOffset + i );
if ( v.getType() != returnValueMeta.getType() ) {
ValueMetaInterface clone = returnValueMeta.clone();
clone.setName( v.getName() );
data.outputRowMeta.setValueMeta( returnFieldsOffset + i, clone );
}
}
}
} else {
throw new KettleStepException( BaseMessages.getString(
PKG, "DatabaseLookup.ERROR0002.UnableToDetermineFieldsOfTable" )
+ schemaTable + "]" );
}
}
private void initNullIf() throws KettleException {
final String[] returnFields = meta.getReturnValueField();
data.nullif = new Object[ returnFields.length ];
for ( int i = 0; i < returnFields.length; i++ ) {
if ( !Utils.isEmpty( meta.getReturnValueDefault()[ i ] ) ) {
ValueMetaInterface stringMeta = new ValueMetaString( "string" );
ValueMetaInterface returnMeta = data.outputRowMeta.getValueMeta( i + getInputRowMeta().size() );
data.nullif[ i ] = returnMeta.convertData( stringMeta, meta.getReturnValueDefault()[ i ] );
} else {
data.nullif[ i ] = null;
}
}
}
private void initLookupMeta() throws KettleException {
// Count the number of values in the lookup as well as the metadata to send along with it.
//
data.lookupMeta = new RowMeta();
for ( int i = 0; i < meta.getStreamKeyField1().length; i++ ) {
if ( data.keynrs[ i ] >= 0 ) {
ValueMetaInterface inputValueMeta = getInputRowMeta().getValueMeta( data.keynrs[ i ] );
// Try to convert type if needed in a clone, we don't want to
// change the type in the original row
//
ValueMetaInterface value = ValueMetaFactory.cloneValueMeta( inputValueMeta, data.keytypes[ i ] );
data.lookupMeta.addValueMeta( value );
}
if ( data.keynrs2[ i ] >= 0 ) {
ValueMetaInterface inputValueMeta = getInputRowMeta().getValueMeta( data.keynrs2[ i ] );
// Try to convert type if needed in a clone, we don't want to
// change the type in the original row
//
ValueMetaInterface value = ValueMetaFactory.cloneValueMeta( inputValueMeta, data.keytypes[ i ] );
data.lookupMeta.addValueMeta( value );
}
}
}
private void initReturnMeta() {
// We also want to know the metadata of the return values beforehand (null handling)
data.returnMeta = new RowMeta();
for ( int i = 0; i < meta.getReturnValueField().length; i++ ) {
ValueMetaInterface v = data.outputRowMeta.getValueMeta( getInputRowMeta().size() + i ).clone();
data.returnMeta.addValueMeta( v );
}
}
@Override
public boolean processRow( StepMetaInterface smi, StepDataInterface sdi ) throws KettleException {
Object[] r = getRow(); // Get row from input rowset & set row busy!
if ( r == null ) { // no more input to be expected...
setOutputDone();
return false;
}
meta = (DatabaseLookupMeta) smi;
data = (DatabaseLookupData) sdi;
if ( first ) {
first = false;
// create the output metadata
data.outputRowMeta = getInputRowMeta().clone();
meta.getFields( data.outputRowMeta, getStepname(), null, null, this, repository, metaStore );
data.db.setLookup(
environmentSubstitute( meta.getSchemaName() ), environmentSubstitute( meta.getTablename() ),
meta.getTableKeyField(), meta.getKeyCondition(), meta.getReturnValueField(),
meta.getReturnValueNewName(), meta.getOrderByClause(), meta.isFailingOnMultipleResults()
);
// lookup the values!
if ( log.isDetailed() ) {
logDetailed( BaseMessages.getString( PKG, "DatabaseLookup.Log.CheckingRow" )
+ getInputRowMeta().getString( r ) );
}
data.keynrs = new int[ meta.getStreamKeyField1().length ];
data.keynrs2 = new int[ meta.getStreamKeyField1().length ];
for ( int i = 0; i < meta.getStreamKeyField1().length; i++ ) {
data.keynrs[ i ] = getInputRowMeta().indexOfValue( meta.getStreamKeyField1()[ i ] );
if ( data.keynrs[ i ] < 0 && // couldn't find field!
!"IS NULL".equalsIgnoreCase( meta.getKeyCondition()[ i ] ) && // No field needed!
!"IS NOT NULL".equalsIgnoreCase( meta.getKeyCondition()[ i ] ) // No field needed!
) {
throw new KettleStepException( BaseMessages.getString(
PKG, "DatabaseLookup.ERROR0001.FieldRequired1.Exception" )
+ meta.getStreamKeyField1()[ i ]
+ BaseMessages.getString( PKG, "DatabaseLookup.ERROR0001.FieldRequired2.Exception" ) );
}
data.keynrs2[ i ] = getInputRowMeta().indexOfValue( meta.getStreamKeyField2()[ i ] );
if ( data.keynrs2[ i ] < 0 && // couldn't find field!
"BETWEEN".equalsIgnoreCase( meta.getKeyCondition()[ i ] ) // 2 fields needed!
) {
throw new KettleStepException( BaseMessages.getString(
PKG, "DatabaseLookup.ERROR0001.FieldRequired3.Exception" )
+ meta.getStreamKeyField2()[ i ]
+ BaseMessages.getString( PKG, "DatabaseLookup.ERROR0001.FieldRequired4.Exception" ) );
}
if ( log.isDebug() ) {
logDebug( BaseMessages.getString( PKG, "DatabaseLookup.Log.FieldHasIndex1" )
+ meta.getStreamKeyField1()[ i ] + BaseMessages.getString( PKG, "DatabaseLookup.Log.FieldHasIndex2" )
+ data.keynrs[ i ] );
}
}
if ( meta.isCached() ) {
data.cache = DefaultCache.newCache( data, meta.getCacheSize() );
}
determineFieldsTypesQueryingDb();
initNullIf();
initLookupMeta();
initReturnMeta();
// If the user selected to load all data into the cache at startup, that's what we do now...
//
if ( meta.isCached() && meta.isLoadingAllDataInCache() ) {
loadAllTableDataIntoTheCache();
}
}
if ( log.isRowLevel() ) {
logRowlevel( BaseMessages.getString( PKG, "DatabaseLookup.Log.GotRowFromPreviousStep" )
+ getInputRowMeta().getString( r ) );
}
try {
// add new lookup values to the row
Object[] outputRow = lookupValues( getInputRowMeta(), r );
if ( outputRow != null ) {
// copy row to output rowset(s);
putRow( data.outputRowMeta, outputRow );
if ( log.isRowLevel() ) {
logRowlevel( BaseMessages.getString( PKG, "DatabaseLookup.Log.WroteRowToNextStep" )
+ getInputRowMeta().getString( r ) );
}
if ( checkFeedback( getLinesRead() ) ) {
logBasic( "linenr " + getLinesRead() );
}
}
} catch ( KettleException e ) {
if ( getStepMeta().isDoingErrorHandling() ) {
putError( getInputRowMeta(), r, 1, e.getMessage(), null, "DBLOOKUPD001" );
} else {
logError( BaseMessages.getString( PKG, "DatabaseLookup.ERROR003.UnexpectedErrorDuringProcessing" )
+ e.getMessage() );
setErrors( 1 );
stopAll();
setOutputDone(); // signal end to receiver(s)
return false;
}
}
return true;
}
private void loadAllTableDataIntoTheCache() throws KettleException {
DatabaseMeta dbMeta = meta.getDatabaseMeta();
Database db = getDatabase( dbMeta );
connectDatabase( db );
try {
// We only want to get the used table fields...
//
String sql = "SELECT ";
for ( int i = 0; i < meta.getStreamKeyField1().length; i++ ) {
if ( i > 0 ) {
sql += ", ";
}
sql += dbMeta.quoteField( meta.getTableKeyField()[ i ] );
}
// Also grab the return field...
//
for ( int i = 0; i < meta.getReturnValueField().length; i++ ) {
sql += ", " + dbMeta.quoteField( meta.getReturnValueField()[ i ] );
}
// The schema/table
//
sql += " FROM "
+ dbMeta.getQuotedSchemaTableCombination(
environmentSubstitute( meta.getSchemaName() ),
environmentSubstitute( meta.getTablename() ) );
// order by?
if ( meta.getOrderByClause() != null && meta.getOrderByClause().length() != 0 ) {
sql += " ORDER BY " + meta.getOrderByClause();
}
// Now that we have the SQL constructed, let's store the rows...
//
List