All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.pentaho.di.trans.steps.databaselookup.readallcache.ReadAllCache Maven / Gradle / Ivy

The newest version!
/*! ******************************************************************************
 *
 * Pentaho Data Integration
 *
 * Copyright (C) 2002-2017 by Hitachi Vantara : http://www.pentaho.com
 *
 *******************************************************************************
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 ******************************************************************************/

package org.pentaho.di.trans.steps.databaselookup.readallcache;

import org.pentaho.di.core.exception.KettleException;
import org.pentaho.di.core.row.RowMetaInterface;
import org.pentaho.di.core.row.ValueMetaInterface;
import org.pentaho.di.trans.steps.databaselookup.DatabaseLookupData;
import org.pentaho.di.trans.steps.databaselookup.DatabaseLookupMeta;

import java.util.ArrayList;
import java.util.BitSet;
import java.util.List;
import java.util.PriorityQueue;

/**
 * This is a read-only array-based cache to be used in
 * {@linkplain org.pentaho.di.trans.steps.databaselookup.DatabaseLookup DatabaseLookup} when "Load All Data In Cache"
 * checkbox is selected. Internally, it uses some optimizations to reduce memory consumption.
 *
 * @author Andrey Khayrutdinov
 */
public class ReadAllCache implements DatabaseLookupData.Cache {

  private final DatabaseLookupData stepData;

  private final Object[][] keys;
  private final RowMetaInterface keysMeta;

  private final Object[][] data;

  private final Index[] indexes;

  // this is an array of pairs (column; condition),
  // such structure was chosen not to introduce separate data-container class
  private final int[][] otherConditions;

  ReadAllCache( DatabaseLookupData stepData, Object[][] keys, RowMetaInterface keysMeta, Object[][] data ) {
    this.stepData = stepData;
    this.keys = keys;
    this.keysMeta = keysMeta;
    this.data = data;

    Object[] pair = createIndexes( stepData, keysMeta, keys );
    indexes = (Index[]) pair[ 0 ];
    otherConditions = (int[][]) pair[ 1 ];
  }


  private static Object[] createIndexes( DatabaseLookupData stepData, RowMetaInterface keysMeta, Object[][] keys ) {
    final int rowsAmount = keys.length;
    final int[] conditions = stepData.conditions;

    // it makes sense to apply restrictions in the specific order, namely, to use those, that can filter more elements
    // Index.restrictionComparator() uses heuristic "restriction power" of each index
    PriorityQueue indexes = new PriorityQueue<>( conditions.length, Index.restrictionComparator() );
    List otherConditions = new ArrayList<>();
    for ( int i = 0, len = conditions.length; i < len; i++ ) {
      int condition = conditions[ i ];
      Index index = null;
      switch ( condition ) {
        case DatabaseLookupMeta.CONDITION_EQ:
          index = new EqIndex( i, keysMeta.getValueMeta( i ), rowsAmount );
          break;
        case DatabaseLookupMeta.CONDITION_NE:
          index = EqIndex.nonEqualityIndex( i, keysMeta.getValueMeta( i ), rowsAmount );
          break;
        case DatabaseLookupMeta.CONDITION_LT:
          index = new LtIndex( i, keysMeta.getValueMeta( i ), rowsAmount );
          break;
        case DatabaseLookupMeta.CONDITION_LE:
          index = GtIndex.lessOrEqualCache( i, keysMeta.getValueMeta( i ), rowsAmount );
          break;
        case DatabaseLookupMeta.CONDITION_GT:
          index = new GtIndex( i, keysMeta.getValueMeta( i ), rowsAmount );
          break;
        case DatabaseLookupMeta.CONDITION_GE:
          index = LtIndex.greaterOrEqualCache( i, keysMeta.getValueMeta( i ), rowsAmount );
          break;
        case DatabaseLookupMeta.CONDITION_IS_NULL:
          index = new IsNullIndex( i, keysMeta.getValueMeta( i ), rowsAmount, true );
          break;
        case DatabaseLookupMeta.CONDITION_IS_NOT_NULL:
          index = new IsNullIndex( i, keysMeta.getValueMeta( i ), rowsAmount, false );
          break;
      }
      if ( index == null ) {
        otherConditions.add( new int[] { i, condition } );
      } else {
        index.performIndexingOf( keys );
        indexes.add( index );
      }
    }

    return new Object[] {
      indexes.toArray( new Index[ indexes.size() ] ),
      otherConditions.toArray( new int[ otherConditions.size() ][] )
    };
  }


  @Override
  public Object[] getRowFromCache( RowMetaInterface lookupMeta, Object[] lookupRow ) throws KettleException {
    if ( stepData.hasDBCondition ) {
      // actually, there was no sense in executing SELECT from db in this case,
      // should be reported as improvement
      return null;
    }

    SearchingContext context = new SearchingContext();
    context.init( keys.length );

    for ( Index index : indexes ) {
      int column = index.getColumn();
      // IS (NOT) NULL operation does not require second argument
      // hence, lookupValue can be absent
      // basically, the index ignores both meta and value, so we can pass everything there
      Object lookupValue = ( column < lookupRow.length ) ? lookupRow[ column ] : null;
      index.applyRestrictionsTo( context, lookupMeta.getValueMeta( column ), lookupValue );
      if ( context.isEmpty() ) {
        // if nothing matches, break the search
        return null;
      }
    }

    // iterate through all elements survived after filtering stage
    // and find the first matching
    BitSet candidates = context.getCandidates();
    int candidate = candidates.nextSetBit( 0 );
    while ( candidate != -1 ) {
      Object[] dataKeys = keys[ candidate ];

      boolean matches = true;
      int lookupShift = 0;
      for ( int i = 0, len = otherConditions.length; i < len && matches; i++ ) {
        int[] columnConditionPair = otherConditions[ i ];

        final int column = columnConditionPair[ 0 ];
        Object keyData = dataKeys[ column ];
        ValueMetaInterface keyMeta = keysMeta.getValueMeta( column );

        int lookupIndex = column + lookupShift;
        Object cmpData = lookupRow[ lookupIndex ];
        ValueMetaInterface cmpMeta = lookupMeta.getValueMeta( lookupIndex );

        int condition = columnConditionPair[ 1 ];
        if ( condition == DatabaseLookupMeta.CONDITION_BETWEEN ) {
          // BETWEEN is a special condition demanding two arguments
          // technically there are no obstacles to implement it,
          // as it is just a short form of: (a <= b) && (b <= c)
          // however, let it be so for now
          matches = ( keyMeta.compare( keyData, cmpMeta, cmpData ) >= 0 );
          if ( matches ) {
            lookupShift++;
            lookupIndex++;
            ValueMetaInterface cmpMeta2 = lookupMeta.getValueMeta( lookupIndex );
            Object cmpData2 = lookupRow[ lookupIndex ];
            matches = ( keyMeta.compare( keyData, cmpMeta2, cmpData2 ) <= 0 );
          }
        } else {
          // if not BETWEEN, than it is LIKE (or some new operator)
          // for now, LIKE is not supported here
          matches = false;
          stepData.hasDBCondition = true;
        }
      }
      if ( matches ) {
        return data[ candidate ];
      } else {
        candidate = candidates.nextSetBit( candidate + 1 );
      }
    }
    return null;
  }

  @Override
  public void storeRowInCache( DatabaseLookupMeta meta, RowMetaInterface lookupMeta, Object[] lookupRow,
                               Object[] add ) {
    throw new UnsupportedOperationException( "This cache is read-only" );
  }


  /**
   * Builder class for {@linkplain ReadAllCache}. Note, it does no checks or verifications!
   */
  public static class Builder {
    private final DatabaseLookupData stepData;
    private final Object[][] keys;
    private final Object[][] data;

    private RowMetaInterface keysMeta;

    private int current;

    public Builder( DatabaseLookupData stepData, int amount ) {
      this.stepData = stepData;
      keys = new Object[ amount ][];
      data = new Object[ amount ][];
    }

    public void setKeysMeta( RowMetaInterface keysMeta ) {
      this.keysMeta = keysMeta;
    }

    public void add( Object[] keys, Object[] data ) {
      this.keys[ current ] = keys;
      this.data[ current ] = data;
      current++;
    }

    public ReadAllCache build() {
      return new ReadAllCache( stepData, keys, keysMeta, data );
    }
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy