org.pentaho.di.trans.steps.sasinput.SasInput Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of kettle-engine Show documentation
Show all versions of kettle-engine Show documentation
Container pom for Pentaho Data Integration modules
The newest version!
/*! ******************************************************************************
*
* Pentaho Data Integration
*
* Copyright (C) 2002-2017 by Hitachi Vantara : http://www.pentaho.com
*
*******************************************************************************
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
******************************************************************************/
package org.pentaho.di.trans.steps.sasinput;
import java.io.File;
import java.util.ArrayList;
import org.eobjects.sassy.SasColumnType;
import org.eobjects.sassy.SasReader;
import org.eobjects.sassy.SasReaderCallback;
import org.pentaho.di.core.ResultFile;
import org.pentaho.di.core.exception.KettleException;
import org.pentaho.di.core.row.RowDataUtil;
import org.pentaho.di.core.row.RowMeta;
import org.pentaho.di.core.row.RowMetaInterface;
import org.pentaho.di.core.row.ValueMetaInterface;
import org.pentaho.di.core.row.value.ValueMetaFactory;
import org.pentaho.di.core.vfs.KettleVFS;
import org.pentaho.di.i18n.BaseMessages;
import org.pentaho.di.trans.Trans;
import org.pentaho.di.trans.TransMeta;
import org.pentaho.di.trans.step.BaseStep;
import org.pentaho.di.trans.step.StepDataInterface;
import org.pentaho.di.trans.step.StepInterface;
import org.pentaho.di.trans.step.StepMeta;
import org.pentaho.di.trans.step.StepMetaInterface;
/**
* Reads data from a SAS file in SAS7BAT format.
*
* @author Matt
* @since 9-OCT-2011
* @version 4.3
*/
public class SasInput extends BaseStep implements StepInterface {
private static Class> PKG = SasInputMeta.class; // for i18n purposes, needed
// by Translator2!!
private SasInputMeta meta;
private SasInputData data;
public SasInput( StepMeta stepMeta, StepDataInterface stepDataInterface, int copyNr, TransMeta transMeta,
Trans trans ) {
super( stepMeta, stepDataInterface, copyNr, transMeta, trans );
}
@Override
public boolean processRow( StepMetaInterface smi, StepDataInterface sdi ) throws KettleException {
meta = (SasInputMeta) smi;
data = (SasInputData) sdi;
final Object[] fileRowData = getRow();
if ( fileRowData == null ) {
// No more work to do...
//
setOutputDone();
return false;
}
// First we see if we need to get a list of files from input...
//
if ( first ) {
// The output row meta data, what does it look like?
//
data.outputRowMeta = new RowMeta();
// See if the input row contains the filename field...
//
int idx = getInputRowMeta().indexOfValue( meta.getAcceptingField() );
if ( idx < 0 ) {
throw new KettleException( BaseMessages.getString(
PKG, "SASInput.Log.Error.UnableToFindFilenameField", meta.getAcceptingField() ) );
}
// Determine the output row layout
//
data.outputRowMeta = getInputRowMeta().clone();
meta.getFields( data.outputRowMeta, getStepname(), null, null, this, repository, metaStore );
}
String rawFilename = getInputRowMeta().getString( fileRowData, meta.getAcceptingField(), null );
final String filename = KettleVFS.getFilename( KettleVFS.getFileObject( rawFilename ) );
data.helper = new SasInputHelper( filename );
logBasic( BaseMessages.getString( PKG, "SASInput.Log.OpenedSASFile" ) + " : [" + data.helper + "]" );
// verify the row layout...
//
if ( data.fileLayout == null ) {
data.fileLayout = data.helper.getRowMeta();
} else {
// Verify that all files are of the same file format, this is a requirement...
//
if ( data.fileLayout.size() != data.helper.getRowMeta().size() ) {
throw new KettleException( "All input files need to have the same number of fields. File '"
+ filename + "' has " + data.helper.getRowMeta().size() + " fields while the first file only had "
+ data.fileLayout.size() );
}
for ( int i = 0; i < data.fileLayout.size(); i++ ) {
ValueMetaInterface first = data.fileLayout.getValueMeta( i );
ValueMetaInterface second = data.helper.getRowMeta().getValueMeta( i );
if ( !first.getName().equalsIgnoreCase( second.getName() ) ) {
throw new KettleException( "Field nr "
+ i + " in file '" + filename + "' is called '" + second.getName() + "' while it was called '"
+ first.getName() + "' in the first file" );
}
if ( first.getType() != second.getType() ) {
throw new KettleException( "Field nr "
+ i + " in file '" + filename + "' is of data type '" + second.getTypeDesc() + "' while it was '"
+ first.getTypeDesc() + "' in the first file" );
}
}
}
// Also make sure that we only read the specified fields, not any other...
//
if ( first ) {
first = false;
data.fieldIndexes = new ArrayList();
for ( SasInputField field : meta.getOutputFields() ) {
int fieldIndex = data.fileLayout.indexOfValue( field.getName() );
if ( fieldIndex < 0 ) {
throw new KettleException( "Selected field '"
+ field.getName() + "' couldn't be found in file '" + filename + "'" );
}
data.fieldIndexes.add( fieldIndex );
}
}
// Add this to the result file names...
//
ResultFile resultFile =
new ResultFile( ResultFile.FILE_TYPE_GENERAL, KettleVFS.getFileObject( filename ), getTransMeta()
.getName(), getStepname() );
resultFile.setComment( BaseMessages.getString( PKG, "SASInput.ResultFile.Comment" ) );
addResultFile( resultFile );
SasReader sasReader = new SasReader( new File( filename ) );
sasReader.read( new SasReaderCallback() {
private boolean firstRead = true;
@Override
public void column( int index, String name, String label, SasColumnType type, int length ) {
}
@Override
public boolean readData() {
return true;
}
@Override
public boolean row( int rowNumber, Object[] rowData ) {
try {
// Let's copy the data for safety
//
if ( firstRead ) {
firstRead = false;
} else {
if ( rowNumber == 1 ) {
return false;
}
}
Object[] row = RowDataUtil.createResizedCopy( fileRowData, data.outputRowMeta.size() );
// Only pick those fields that we're interested in.
//
int outputIndex = getInputRowMeta().size();
for ( int i = 0; i < data.fieldIndexes.size(); i++ ) {
int fieldIndex = data.fieldIndexes.get( i );
int type = data.fileLayout.getValueMeta( fieldIndex ).getType();
switch ( type ) {
case ValueMetaInterface.TYPE_STRING:
row[outputIndex++] = rowData[fieldIndex];
break;
case ValueMetaInterface.TYPE_NUMBER:
Double value = (Double) rowData[fieldIndex];
if ( value.equals( Double.NaN ) ) {
value = null;
}
row[outputIndex++] = value;
break;
default:
throw new RuntimeException( "Unhandled data type '" + ValueMetaFactory.getValueMetaName( type ) );
}
}
// Convert the data type of the new data to the requested data types
//
convertData( data.fileLayout, row, data.outputRowMeta );
// Pass along the row to further steps...
//
putRow( data.outputRowMeta, row );
// System.out.println(rowNumber+" ---- passed row : "+Arrays.toString(rowData));
return !isStopped();
} catch ( Exception e ) {
throw new RuntimeException( "There was an error reading from SAS7BAT file '" + filename + "'", e );
}
}
} );
return true;
}
protected void convertData( RowMetaInterface source, Object[] sourceData, RowMetaInterface target ) throws KettleException {
int targetIndex = getInputRowMeta().size();
for ( int i = 0; i < data.fieldIndexes.size(); i++ ) {
int fieldIndex = data.fieldIndexes.get( i );
ValueMetaInterface sourceValueMeta = source.getValueMeta( fieldIndex );
ValueMetaInterface targetValueMeta = target.getValueMeta( targetIndex );
sourceData[targetIndex] = targetValueMeta.convertData( sourceValueMeta, sourceData[targetIndex] );
targetIndex++;
}
}
@Override
public void stopRunning( StepMetaInterface stepMetaInterface, StepDataInterface stepDataInterface ) throws KettleException {
}
}