org.pentaho.di.lineage.TransDataLineage Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of kettle-engine Show documentation
Show all versions of kettle-engine Show documentation
Container pom for Pentaho Data Integration modules
The newest version!
/*! ******************************************************************************
*
* Pentaho Data Integration
*
* Copyright (C) 2002-2017 by Hitachi Vantara : http://www.pentaho.com
*
*******************************************************************************
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
******************************************************************************/
package org.pentaho.di.lineage;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.pentaho.di.core.exception.KettleStepException;
import org.pentaho.di.core.row.RowMetaInterface;
import org.pentaho.di.core.row.ValueMetaInterface;
import org.pentaho.di.trans.TransMeta;
import org.pentaho.di.trans.step.StepMeta;
/**
* This class will help calculate and contain the data lineage for all values in the transformation.
* What we will get is a List of ValueLineage objects for all the values steps in the transformation.
* Each of these ValueLineage objects contains a list of all the steps it passed through.
* As such, it's a hierarchical view of the transformation.
*
* This view will allow us to see immediately where a certain value is being manipulated.
*
* @author matt
*
*/
public class TransDataLineage {
private TransMeta transMeta;
private List valueLineages;
private Map> fieldStepsMap;
public TransDataLineage( TransMeta transMeta ) {
this.transMeta = transMeta;
this.valueLineages = new ArrayList();
}
public TransMeta getTransMeta() {
return transMeta;
}
public void setTransMeta( TransMeta transMeta ) {
this.transMeta = transMeta;
}
/**
* @return the valueLineages
*/
public List getValueLineages() {
return valueLineages;
}
/**
* @param valueLineages
* the valueLineages to set
*/
public void setValueLineages( List valueLineages ) {
this.valueLineages = valueLineages;
}
/**
* Using the transformation, we will calculate the data lineage for each field in each step.
*
* @throws KettleStepException
* In case there is an exception calculating the lineage. This is usually caused by unavailable data sources
* etc.
*/
public void calculateLineage() throws KettleStepException {
// After sorting the steps we get a map of all the previous steps of a certain step.
//
final Map> stepMap = transMeta.sortStepsNatural();
// However, the we need a sorted list of previous steps per step, not a map.
// So lets sort the maps, turn them into lists...
//
Map> previousStepListMap = new HashMap>();
for ( StepMeta stepMeta : stepMap.keySet() ) {
List previousSteps = new ArrayList();
previousStepListMap.put( stepMeta, previousSteps );
previousSteps.addAll( stepMap.get( stepMeta ).keySet() );
// Sort this list...
//
Collections.sort( previousSteps, new Comparator() {
public int compare( StepMeta o1, StepMeta o2 ) {
Map beforeMap = stepMap.get( o1 );
if ( beforeMap != null ) {
if ( beforeMap.get( o2 ) == null ) {
return -1;
} else {
return 1;
}
} else {
return o1.getName().compareToIgnoreCase( o2.getName() );
}
}
} );
System.out.println( "Step considered: " + stepMeta.getName() );
for ( StepMeta prev : previousSteps ) {
System.out.println( " --> previous step: " + prev.getName() );
}
}
fieldStepsMap = new HashMap>();
List usedSteps = transMeta.getUsedSteps();
for ( StepMeta stepMeta : usedSteps ) {
calculateLineage( stepMeta );
}
}
/**
* Calculate the lineage for the specified step only...
*
* @param stepMeta
* The step to calculate the lineage for.
* @throws KettleStepException
* In case there is an exception calculating the lineage. This is usually caused by unavailable data sources
* etc.
*/
private void calculateLineage( StepMeta stepMeta ) throws KettleStepException {
RowMetaInterface outputMeta = transMeta.getStepFields( stepMeta );
// The lineage is basically a calculation of origin for each output of a certain step.
//
for ( ValueMetaInterface valueMeta : outputMeta.getValueMetaList() ) {
StepMeta originStepMeta = transMeta.findStep( valueMeta.getOrigin(), stepMeta );
if ( originStepMeta != null ) {
/* List list = */fieldStepsMap.get( originStepMeta );
}
}
}
}