All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.pentaho.di.trans.steps.dimensionlookup.DimensionCache Maven / Gradle / Ivy

The newest version!
/*! ******************************************************************************
 *
 * Pentaho Data Integration
 *
 * Copyright (C) 2002-2017 by Hitachi Vantara : http://www.pentaho.com
 *
 *******************************************************************************
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 ******************************************************************************/

package org.pentaho.di.trans.steps.dimensionlookup;

import java.util.Collections;
import java.util.Comparator;
import java.util.Date;
import java.util.List;

import org.pentaho.di.core.exception.KettleException;
import org.pentaho.di.core.row.RowMetaInterface;
import org.pentaho.di.core.row.ValueMetaInterface;

/**
 * This class will act as a special purpose dimension Cache. The idea here is to not only cache the last version of a
 * dimension entry, but all versions. So basically, the entry key is the natural key as well as the from-to date range.
 *
 * The way to achieve that result is to keep a sorted list in memory. Because we want as few conversion errors as
 * possible, we'll use the same row as we get from the database.
 *
 * @author matt
 *
 */
public class DimensionCache implements Comparator {
  private RowMetaInterface rowMeta;
  private List rowCache;
  private int[] keyIndexes;
  private int fromDateIndex;
  private int toDateIndex;

  /**
   * Create a new dimension cache object
   *
   * @param rowMeta
   *          the description of the rows to store
   * @param keyIndexes
   *          the indexes of the natural key (in that order)
   * @param fromDateIndex
   *          the field index where the start of the date range can be found
   * @param toDateIndex
   *          the field index where the end of the date range can be found
   */
  public DimensionCache( RowMetaInterface rowMeta, int[] keyIndexes, int fromDateIndex, int toDateIndex ) {
    this.rowMeta = rowMeta;
    this.keyIndexes = keyIndexes;
    this.fromDateIndex = fromDateIndex;
    this.toDateIndex = toDateIndex;
  }

  /**
   * Add a row to the back of the list
   *
   * @param row
   *          the row to add
   */
  public void addRow( Object[] row ) {
    rowCache.add( row );
  }

  /**
   * Get a row from the cache on a certain index
   *
   * @param index
   *          the index to look for
   * @return the row on the specified index
   */
  public Object[] getRow( int index ) {
    return rowCache.get( index );
  }

  /**
   * Insert a row into the list on a certain index
   *
   * @param index
   *          the index on which the row should be inserted
   * @param row
   *          the row to add
   */
  public void addRow( int index, Object[] row ) {
    rowCache.add( index, row );
  }

  /**
   * Looks up a row in the (sorted) cache.
   *
   * @param lookupRowData
   *          The data of the lookup row. Make sure that on the index of the from date, you put the lookup date.
   * @throws a
   *           KettleException in case there are conversion errors during the lookup of the row
   */
  public int lookupRow( Object[] lookupRowData ) throws KettleException {
    try {
      // First perform the lookup!
      //
      int index = Collections.binarySearch( rowCache, lookupRowData, this );
      if ( index < 0 ) {
        // What we have now is the insertion point.
        // Since we only compare on the start of the date range (see also: below in Compare.compare())
        // we will usually get the insertion point of the row
        // However, that insertion point is the actual row index IF the supplied lookup date (in the lookup row) is
        // between
        //
        // This row at the insertion point where the natural keys match and the start
        //
        int insertionPoint = -( index + 1 );
        if ( insertionPoint < rowCache.size() - 1 ) {
          // Get the row in question
          //
          Object[] row = rowCache.get( insertionPoint );

          // See if the natural key matches...
          //
          int cmp = rowMeta.compare( row, lookupRowData, keyIndexes );
          if ( cmp == 0 ) {
            // The natural keys match, now see if the lookup date (lookupRowData[fromDateIndex]) is between
            // row[fromDateIndex] and row[toDateIndex]
            //
            Date fromDate = rowMeta.getDate( row, fromDateIndex );
            Date toDate = rowMeta.getDate( row, toDateIndex );
            Date lookupDate = rowMeta.getDate( lookupRowData, fromDateIndex );

            if ( fromDate == null && toDate != null ) {
              // This is the case where the fromDate is null and the toDate is not.
              // This is a special case where null as a start date means -Infinity
              //
              if ( toDate.compareTo( lookupDate ) > 0 ) {
                return insertionPoint; // found the key!!
              } else {
                // This should never happen, it's a flaw in the data or the binary search algorithm...
                // TODO: print the row perhaps?
                //
                throw new KettleException(
                  "Key sorting problem detected during row cache lookup: the lookup date of "
                    + "the row retrieved is higher than or equal to the end of the date range." );
              }
            } else if ( fromDate != null && toDate == null ) {
              // This is the case where the toDate is null and the fromDate is not.
              // This is a special case where null as an end date means +Infinity
              //
              if ( fromDate.compareTo( lookupDate ) <= 0 ) {
                return insertionPoint; // found the key!!
              } else {
                // This should never happen, it's a flaw in the data or the binary search algorithm...
                // TODO: print the row perhaps?
                //
                throw new KettleException(
                  "Key sorting problem detected during row cache lookup: the lookup date of the row "
                    + "retrieved is lower than or equal to the start of the date range." );
              }
            } else {
              // Both dates are available: simply see if the lookup date falls in between...
              //
              if ( fromDate.compareTo( lookupDate ) <= 0 && toDate.compareTo( lookupDate ) > 0 ) {
                return insertionPoint;
              }
              // Else this is a cache miss.
            }
          }
        }
      }
      return index;
    } catch ( RuntimeException e ) {
      throw new KettleException( e );
    }
  }

  public void sortRows() {
    Collections.sort( rowCache, this );
  }

  /**
   * Compare 2 rows of data using the natural keys and indexes specified.
   *
   * @param o1
   * @param o2
   * @return
   */
  public int compare( Object[] o1, Object[] o2 ) {
    try {
      // First compare on the natural keys...
      //
      int cmp = rowMeta.compare( o1, o2, keyIndexes );
      if ( cmp != 0 ) {
        return cmp;
      }

      // Then see if the start of the date range of o2 falls between the start and end of o2
      //
      ValueMetaInterface fromDateMeta = rowMeta.getValueMeta( fromDateIndex );
      ValueMetaInterface toDateMeta = rowMeta.getValueMeta( toDateIndex );

      Date fromDate = fromDateMeta.getDate( o1[fromDateIndex] );
      Date toDate = toDateMeta.getDate( o1[toDateIndex] );
      Date lookupDate = fromDateMeta.getDate( o2[fromDateIndex] );

      int fromCmpLookup = 0;
      if ( fromDate == null ) {
        if ( lookupDate == null ) {
          fromCmpLookup = 0;
        } else {
          fromCmpLookup = -1;
        }
      } else {
        if ( lookupDate == null ) {
          fromCmpLookup = 1;
        } else {
          fromCmpLookup = fromDateMeta.compare( fromDate, lookupDate );
        }
      }
      if ( fromCmpLookup < 0 ) {
        if ( toDate != null ) {
          int toCmpLookup = toDateMeta.compare( toDate, lookupDate );
          if ( toCmpLookup > 0 ) {
            return 0;
          }
        }
      }
      return fromCmpLookup;
    } catch ( Exception e ) {
      throw new RuntimeException( e );
    }
  }

  /**
   * @return the rowMeta
   */
  public RowMetaInterface getRowMeta() {
    return rowMeta;
  }

  /**
   * @param rowMeta
   *          the rowMeta to set
   */
  public void setRowMeta( RowMetaInterface rowMeta ) {
    this.rowMeta = rowMeta;
  }

  /**
   * @return the rowCache
   */
  public List getRowCache() {
    return rowCache;
  }

  /**
   * @param rowCache
   *          the rowCache to set
   */
  public void setRowCache( List rowCache ) {
    this.rowCache = rowCache;
  }

  /**
   * @return the keyIndexes
   */
  public int[] getKeyIndexes() {
    return keyIndexes;
  }

  /**
   * @param keyIndexes
   *          the keyIndexes to set
   */
  public void setKeyIndexes( int[] keyIndexes ) {
    this.keyIndexes = keyIndexes;
  }

  /**
   * @return the fromDateIndex
   */
  public int getFromDateIndex() {
    return fromDateIndex;
  }

  /**
   * @param fromDateIndex
   *          the fromDateIndex to set
   */
  public void setFromDateIndex( int fromDateIndex ) {
    this.fromDateIndex = fromDateIndex;
  }

  /**
   * @return the toDateIndex
   */
  public int getToDateIndex() {
    return toDateIndex;
  }

  /**
   * @param toDateIndex
   *          the toDateIndex to set
   */
  public void setToDateIndex( int toDateIndex ) {
    this.toDateIndex = toDateIndex;
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy