Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*! ******************************************************************************
*
* Pentaho Data Integration
*
* Copyright (C) 2002-2017 by Hitachi Vantara : http://www.pentaho.com
*
*******************************************************************************
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
******************************************************************************/
package org.pentaho.di.trans.steps.multimerge;
import java.util.ArrayList;
import java.util.List;
import java.util.PriorityQueue;
import org.pentaho.di.core.RowSet;
import org.pentaho.di.core.exception.KettleException;
import org.pentaho.di.core.exception.KettleStepException;
import org.pentaho.di.core.row.RowDataUtil;
import org.pentaho.di.core.row.RowMeta;
import org.pentaho.di.core.row.RowMetaInterface;
import org.pentaho.di.core.row.ValueMetaInterface;
import org.pentaho.di.i18n.BaseMessages;
import org.pentaho.di.trans.Trans;
import org.pentaho.di.trans.TransHopMeta;
import org.pentaho.di.trans.TransMeta;
import org.pentaho.di.trans.step.BaseStep;
import org.pentaho.di.trans.step.StepDataInterface;
import org.pentaho.di.trans.step.StepIOMetaInterface;
import org.pentaho.di.trans.step.StepInterface;
import org.pentaho.di.trans.step.StepMeta;
import org.pentaho.di.trans.step.StepMetaInterface;
import org.pentaho.di.trans.step.errorhandling.StreamInterface;
/**
* Merge rows from 2 sorted streams and output joined rows with matched key fields. Use this instead of hash join is
* both your input streams are too big to fit in memory. Note that both the inputs must be sorted on the join key.
*
* This is a first prototype implementation that only handles two streams and inner join. It also always outputs all
* values from both streams. Ideally, we should: 1) Support any number of incoming streams 2) Allow user to choose the
* join type (inner, outer) for each stream 3) Allow user to choose which fields to push to next step 4) Have multiple
* output ports as follows: a) Containing matched records b) Unmatched records for each input port 5) Support incoming
* rows to be sorted either on ascending or descending order. The currently implementation only supports ascending
*
* @author Biswapesh
* @since 24-nov-2006
*/
public class MultiMergeJoin extends BaseStep implements StepInterface {
private static Class> PKG = MultiMergeJoinMeta.class; // for i18n purposes, needed by Translator2!!
private MultiMergeJoinMeta meta;
private MultiMergeJoinData data;
public MultiMergeJoin( StepMeta stepMeta, StepDataInterface stepDataInterface, int copyNr, TransMeta transMeta,
Trans trans ) {
super( stepMeta, stepDataInterface, copyNr, transMeta, trans );
}
private boolean processFirstRow( StepMetaInterface smi, StepDataInterface sdi ) throws KettleException {
meta = (MultiMergeJoinMeta) smi;
data = (MultiMergeJoinData) sdi;
TransMeta transMeta = getTransMeta();
TransHopMeta transHopMeta;
StepIOMetaInterface stepIOMeta = meta.getStepIOMeta();
List infoStreams = stepIOMeta.getInfoStreams();
StreamInterface stream;
StepMeta toStepMeta = meta.getParentStepMeta();
StepMeta fromStepMeta;
ArrayList inputStepNameList = new ArrayList();
String[] inputStepNames = meta.getInputSteps();
String inputStepName;
for ( int i = 0; i < infoStreams.size(); i++ ) {
inputStepName = inputStepNames[i];
stream = infoStreams.get( i );
fromStepMeta = stream.getStepMeta();
if ( fromStepMeta == null ) {
//should not arrive here, shoud typically have been caught by init.
throw new KettleException(
BaseMessages.getString( PKG, "MultiMergeJoin.Log.UnableToFindReferenceStream", inputStepName ) );
}
//check the hop
transHopMeta = transMeta.findTransHop( fromStepMeta, toStepMeta, true );
//there is no hop: this is unexpected.
if ( transHopMeta == null ) {
//should not arrive here, shoud typically have been caught by init.
throw new KettleException(
BaseMessages.getString( PKG, "MultiMergeJoin.Log.UnableToFindReferenceStream", inputStepName ) );
} else if ( transHopMeta.isEnabled() ) {
inputStepNameList.add( inputStepName );
} else {
logDetailed( BaseMessages.getString( PKG, "MultiMergeJoin.Log.IgnoringStep", inputStepName ) );
}
}
int streamSize = inputStepNameList.size();
if ( streamSize == 0 ) {
return false;
}
String keyField;
String[] keyFields;
data.rowSets = new RowSet[streamSize];
RowSet rowSet;
Object[] row;
data.rows = new Object[streamSize][];
data.metas = new RowMetaInterface[streamSize];
data.rowLengths = new int[streamSize];
MultiMergeJoinData.QueueComparator comparator = new MultiMergeJoinData.QueueComparator( data );
data.queue = new PriorityQueue( streamSize, comparator );
data.results = new ArrayList>( streamSize );
MultiMergeJoinData.QueueEntry queueEntry;
data.queueEntries = new MultiMergeJoinData.QueueEntry[streamSize];
data.drainIndices = new int[streamSize];
data.keyNrs = new int[streamSize][];
data.dummy = new Object[streamSize][];
RowMetaInterface rowMeta;
data.outputRowMeta = new RowMeta();
for ( int i = 0, j = 0; i < inputStepNames.length; i++ ) {
inputStepName = inputStepNames[i];
if ( !inputStepNameList.contains( inputStepName ) ) {
//ignore step with disabled hop.
continue;
}
queueEntry = new MultiMergeJoinData.QueueEntry();
queueEntry.index = j;
data.queueEntries[j] = queueEntry;
data.results.add( new ArrayList