All Downloads are FREE. Search and download functionalities are using the official Maven repository.
Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
org.pentaho.di.trans.steps.dimensionlookup.DimensionCache Maven / Gradle / Ivy
/*! ******************************************************************************
*
* Pentaho Data Integration
*
* Copyright (C) 2002-2017 by Hitachi Vantara : http://www.pentaho.com
*
*******************************************************************************
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
******************************************************************************/
package org.pentaho.di.trans.steps.dimensionlookup;
import java.util.Collections;
import java.util.Comparator;
import java.util.Date;
import java.util.List;
import org.pentaho.di.core.exception.KettleException;
import org.pentaho.di.core.row.RowMetaInterface;
import org.pentaho.di.core.row.ValueMetaInterface;
/**
* This class will act as a special purpose dimension Cache. The idea here is to not only cache the last version of a
* dimension entry, but all versions. So basically, the entry key is the natural key as well as the from-to date range.
*
* The way to achieve that result is to keep a sorted list in memory. Because we want as few conversion errors as
* possible, we'll use the same row as we get from the database.
*
* @author matt
*
*/
public class DimensionCache implements Comparator {
private RowMetaInterface rowMeta;
private List rowCache;
private int[] keyIndexes;
private int fromDateIndex;
private int toDateIndex;
/**
* Create a new dimension cache object
*
* @param rowMeta
* the description of the rows to store
* @param keyIndexes
* the indexes of the natural key (in that order)
* @param fromDateIndex
* the field index where the start of the date range can be found
* @param toDateIndex
* the field index where the end of the date range can be found
*/
public DimensionCache( RowMetaInterface rowMeta, int[] keyIndexes, int fromDateIndex, int toDateIndex ) {
this.rowMeta = rowMeta;
this.keyIndexes = keyIndexes;
this.fromDateIndex = fromDateIndex;
this.toDateIndex = toDateIndex;
}
/**
* Add a row to the back of the list
*
* @param row
* the row to add
*/
public void addRow( Object[] row ) {
rowCache.add( row );
}
/**
* Get a row from the cache on a certain index
*
* @param index
* the index to look for
* @return the row on the specified index
*/
public Object[] getRow( int index ) {
return rowCache.get( index );
}
/**
* Insert a row into the list on a certain index
*
* @param index
* the index on which the row should be inserted
* @param row
* the row to add
*/
public void addRow( int index, Object[] row ) {
rowCache.add( index, row );
}
/**
* Looks up a row in the (sorted) cache.
*
* @param lookupRowData
* The data of the lookup row. Make sure that on the index of the from date, you put the lookup date.
* @throws a
* KettleException in case there are conversion errors during the lookup of the row
*/
public int lookupRow( Object[] lookupRowData ) throws KettleException {
try {
// First perform the lookup!
//
int index = Collections.binarySearch( rowCache, lookupRowData, this );
if ( index < 0 ) {
// What we have now is the insertion point.
// Since we only compare on the start of the date range (see also: below in Compare.compare())
// we will usually get the insertion point of the row
// However, that insertion point is the actual row index IF the supplied lookup date (in the lookup row) is
// between
//
// This row at the insertion point where the natural keys match and the start
//
int insertionPoint = -( index + 1 );
if ( insertionPoint < rowCache.size() - 1 ) {
// Get the row in question
//
Object[] row = rowCache.get( insertionPoint );
// See if the natural key matches...
//
int cmp = rowMeta.compare( row, lookupRowData, keyIndexes );
if ( cmp == 0 ) {
// The natural keys match, now see if the lookup date (lookupRowData[fromDateIndex]) is between
// row[fromDateIndex] and row[toDateIndex]
//
Date fromDate = rowMeta.getDate( row, fromDateIndex );
Date toDate = rowMeta.getDate( row, toDateIndex );
Date lookupDate = rowMeta.getDate( lookupRowData, fromDateIndex );
if ( fromDate == null && toDate != null ) {
// This is the case where the fromDate is null and the toDate is not.
// This is a special case where null as a start date means -Infinity
//
if ( toDate.compareTo( lookupDate ) > 0 ) {
return insertionPoint; // found the key!!
} else {
// This should never happen, it's a flaw in the data or the binary search algorithm...
// TODO: print the row perhaps?
//
throw new KettleException(
"Key sorting problem detected during row cache lookup: the lookup date of "
+ "the row retrieved is higher than or equal to the end of the date range." );
}
} else if ( fromDate != null && toDate == null ) {
// This is the case where the toDate is null and the fromDate is not.
// This is a special case where null as an end date means +Infinity
//
if ( fromDate.compareTo( lookupDate ) <= 0 ) {
return insertionPoint; // found the key!!
} else {
// This should never happen, it's a flaw in the data or the binary search algorithm...
// TODO: print the row perhaps?
//
throw new KettleException(
"Key sorting problem detected during row cache lookup: the lookup date of the row "
+ "retrieved is lower than or equal to the start of the date range." );
}
} else {
// Both dates are available: simply see if the lookup date falls in between...
//
if ( fromDate.compareTo( lookupDate ) <= 0 && toDate.compareTo( lookupDate ) > 0 ) {
return insertionPoint;
}
// Else this is a cache miss.
}
}
}
}
return index;
} catch ( RuntimeException e ) {
throw new KettleException( e );
}
}
public void sortRows() {
Collections.sort( rowCache, this );
}
/**
* Compare 2 rows of data using the natural keys and indexes specified.
*
* @param o1
* @param o2
* @return
*/
public int compare( Object[] o1, Object[] o2 ) {
try {
// First compare on the natural keys...
//
int cmp = rowMeta.compare( o1, o2, keyIndexes );
if ( cmp != 0 ) {
return cmp;
}
// Then see if the start of the date range of o2 falls between the start and end of o2
//
ValueMetaInterface fromDateMeta = rowMeta.getValueMeta( fromDateIndex );
ValueMetaInterface toDateMeta = rowMeta.getValueMeta( toDateIndex );
Date fromDate = fromDateMeta.getDate( o1[fromDateIndex] );
Date toDate = toDateMeta.getDate( o1[toDateIndex] );
Date lookupDate = fromDateMeta.getDate( o2[fromDateIndex] );
int fromCmpLookup = 0;
if ( fromDate == null ) {
if ( lookupDate == null ) {
fromCmpLookup = 0;
} else {
fromCmpLookup = -1;
}
} else {
if ( lookupDate == null ) {
fromCmpLookup = 1;
} else {
fromCmpLookup = fromDateMeta.compare( fromDate, lookupDate );
}
}
if ( fromCmpLookup < 0 ) {
if ( toDate != null ) {
int toCmpLookup = toDateMeta.compare( toDate, lookupDate );
if ( toCmpLookup > 0 ) {
return 0;
}
}
}
return fromCmpLookup;
} catch ( Exception e ) {
throw new RuntimeException( e );
}
}
/**
* @return the rowMeta
*/
public RowMetaInterface getRowMeta() {
return rowMeta;
}
/**
* @param rowMeta
* the rowMeta to set
*/
public void setRowMeta( RowMetaInterface rowMeta ) {
this.rowMeta = rowMeta;
}
/**
* @return the rowCache
*/
public List getRowCache() {
return rowCache;
}
/**
* @param rowCache
* the rowCache to set
*/
public void setRowCache( List rowCache ) {
this.rowCache = rowCache;
}
/**
* @return the keyIndexes
*/
public int[] getKeyIndexes() {
return keyIndexes;
}
/**
* @param keyIndexes
* the keyIndexes to set
*/
public void setKeyIndexes( int[] keyIndexes ) {
this.keyIndexes = keyIndexes;
}
/**
* @return the fromDateIndex
*/
public int getFromDateIndex() {
return fromDateIndex;
}
/**
* @param fromDateIndex
* the fromDateIndex to set
*/
public void setFromDateIndex( int fromDateIndex ) {
this.fromDateIndex = fromDateIndex;
}
/**
* @return the toDateIndex
*/
public int getToDateIndex() {
return toDateIndex;
}
/**
* @param toDateIndex
* the toDateIndex to set
*/
public void setToDateIndex( int toDateIndex ) {
this.toDateIndex = toDateIndex;
}
}