org.pentaho.di.trans.steps.databaselookup.DefaultCache Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of kettle-engine Show documentation
Container pom for Pentaho Data Integration modules
The newest version!
/*! ******************************************************************************
 *
 * Pentaho Data Integration
 *
 * Copyright (C) 2002-2017 by Hitachi Vantara : http://www.pentaho.com
 *
 *******************************************************************************
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 ******************************************************************************/

package org.pentaho.di.trans.steps.databaselookup;

import org.pentaho.di.core.RowMetaAndData;
import org.pentaho.di.core.TimedRow;
import org.pentaho.di.core.exception.KettleException;
import org.pentaho.di.core.row.RowMetaInterface;
import org.pentaho.di.core.row.ValueMetaInterface;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;
import java.util.LinkedHashMap;
import java.util.List;

/**
 * Old code, copied from {@linkplain DatabaseLookup}
 *
 * @author Andrey Khayrutdinov
 */
public class DefaultCache implements DatabaseLookupData.Cache {

  public static DefaultCache newCache( DatabaseLookupData data, int cacheSize ) {
    if ( cacheSize > 0 ) {
      cacheSize = (int) ( cacheSize * 1.5 );
    } else {
      cacheSize = 16;
    }
    return new DefaultCache( data, cacheSize );
  }


  private final DatabaseLookupData data;
  private final LinkedHashMap map;

  DefaultCache( DatabaseLookupData data, int capacity ) {
    this.data = data;
    map = new LinkedHashMap<>( capacity );
  }

  @Override
  public Object[] getRowFromCache( RowMetaInterface lookupMeta, Object[] lookupRow ) throws KettleException {
    if ( data.allEquals ) {
      // only do the map lookup when all equals otherwise conditions >, <, <> will give wrong results
      TimedRow timedRow = map.get( new RowMetaAndData( data.lookupMeta, lookupRow ) );
      if ( timedRow != null ) {
        return timedRow.getRow();
      }
    } else { // special handling of conditions <,>, <> etc.
      if ( !data.hasDBCondition ) { // e.g. LIKE not handled by this routine, yet
        // TODO: find an alternative way to look up the data based on the condition.
        // Not all conditions are "=" so we are going to have to evaluate row by row
        // A sorted list or index might be a good solution here...
        //
        for ( RowMetaAndData key : map.keySet() ) {
          // Now verify that the key is matching our conditions...
          //
          boolean match = true;
          int lookupIndex = 0;
          for ( int i = 0; i < data.conditions.length && match; i++ ) {
            ValueMetaInterface cmpMeta = lookupMeta.getValueMeta( lookupIndex );
            Object cmpData = lookupRow[ lookupIndex ];
            ValueMetaInterface keyMeta = key.getValueMeta( i );
            Object keyData = key.getData()[ i ];

            switch ( data.conditions[ i ] ) {
              case DatabaseLookupMeta.CONDITION_EQ:
                match = ( cmpMeta.compare( cmpData, keyMeta, keyData ) == 0 );
                break;
              case DatabaseLookupMeta.CONDITION_NE:
                match = ( cmpMeta.compare( cmpData, keyMeta, keyData ) != 0 );
                break;
              case DatabaseLookupMeta.CONDITION_LT:
                match = ( cmpMeta.compare( cmpData, keyMeta, keyData ) > 0 );
                break;
              case DatabaseLookupMeta.CONDITION_LE:
                match = ( cmpMeta.compare( cmpData, keyMeta, keyData ) >= 0 );
                break;
              case DatabaseLookupMeta.CONDITION_GT:
                match = ( cmpMeta.compare( cmpData, keyMeta, keyData ) < 0 );
                break;
              case DatabaseLookupMeta.CONDITION_GE:
                match = ( cmpMeta.compare( cmpData, keyMeta, keyData ) <= 0 );
                break;
              case DatabaseLookupMeta.CONDITION_IS_NULL:
                match = keyMeta.isNull( keyData );
                break;
              case DatabaseLookupMeta.CONDITION_IS_NOT_NULL:
                match = !keyMeta.isNull( keyData );
                break;
              case DatabaseLookupMeta.CONDITION_BETWEEN:
                // Between key >= cmp && key <= cmp2
                ValueMetaInterface cmpMeta2 = lookupMeta.getValueMeta( lookupIndex + 1 );
                Object cmpData2 = lookupRow[ lookupIndex + 1 ];
                match = ( keyMeta.compare( keyData, cmpMeta, cmpData ) >= 0 );
                if ( match ) {
                  match = ( keyMeta.compare( keyData, cmpMeta2, cmpData2 ) <= 0 );
                }
                lookupIndex++;
                break;
              // TODO: add LIKE operator (think of changing the hasDBCondition logic then)
              default:
                match = false;
                data.hasDBCondition = true; // avoid looping in here the next time, also safety when a new condition
                // will be introduced
                break;

            }
            lookupIndex++;
          }
          if ( match ) {
            TimedRow timedRow = map.get( key );
            if ( timedRow != null ) {
              return timedRow.getRow();
            }
          }
        }
      }
    }
    return null;
  }

  @Override
  public void storeRowInCache( DatabaseLookupMeta meta, RowMetaInterface lookupMeta, Object[] lookupRow,
                               Object[] add ) {
    RowMetaAndData rowMetaAndData = new RowMetaAndData( lookupMeta, lookupRow );
    // DEinspanjer 2009-02-01 XXX: I want to write a test case to prove this point before checking in.
    // /* Don't insert a row with a duplicate key into the cache. It doesn't seem
    // * to serve a useful purpose and can potentially cause the step to return
    // * different values over the life of the transformation (if the source DB rows change)
    // * Additionally, if using the load all data feature, re-inserting would reverse the order
    // * specified in the step.
    // */
    // if (!data.look.containsKey(rowMetaAndData)) {
    // data.look.put(rowMetaAndData, new TimedRow(add));
    // }
    map.put( rowMetaAndData, new TimedRow( add ) );

    // See if we have to limit the cache_size.
    // Sample 10% of the rows in the cache.
    // Remove everything below the second lowest date.
    // That should on average remove more than 10% of the entries
    // It's not exact science, but it will be faster than the old algorithm

    // DEinspanjer 2009-02-01: If you had previously set a cache size and then turned on load all, this
    // method would throw out entries if the previous cache size wasn't big enough.
    if ( !meta.isLoadingAllDataInCache() && meta.getCacheSize() > 0 && map.size() > meta.getCacheSize() ) {
      List keys = new ArrayList( map.keySet() );
      List samples = new ArrayList();
      int incr = keys.size() / 10;
      if ( incr == 0 ) {
        incr = 1;
      }
      for ( int k = 0; k < keys.size(); k += incr ) {
        RowMetaAndData key = keys.get( k );
        TimedRow timedRow = map.get( key );
        samples.add( timedRow.getLogDate() );
      }

      Collections.sort( samples );

      if ( samples.size() > 1 ) {
        Date smallest = samples.get( 1 );

        // Everything below the smallest date goes away...
        for ( RowMetaAndData key : keys ) {
          TimedRow timedRow = map.get( key );

          if ( timedRow.getLogDate().compareTo( smallest ) < 0 ) {
            map.remove( key );
          }
        }
      }
    }
  }
}