org.pentaho.di.trans.steps.yamlinput.YamlInput Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of kettle-engine Show documentation
Show all versions of kettle-engine Show documentation
Container pom for Pentaho Data Integration modules
The newest version!
/*! ******************************************************************************
*
* Pentaho Data Integration
*
* Copyright (C) 2002-2017 by Hitachi Vantara : http://www.pentaho.com
*
*******************************************************************************
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
******************************************************************************/
package org.pentaho.di.trans.steps.yamlinput;
import java.util.List;
import org.apache.commons.vfs2.FileObject;
import org.pentaho.di.core.Const;
import org.pentaho.di.core.util.Utils;
import org.pentaho.di.core.ResultFile;
import org.pentaho.di.core.exception.KettleException;
import org.pentaho.di.core.fileinput.FileInputList;
import org.pentaho.di.core.row.RowDataUtil;
import org.pentaho.di.core.row.RowMeta;
import org.pentaho.di.core.row.ValueMetaInterface;
import org.pentaho.di.core.row.value.ValueMetaFactory;
import org.pentaho.di.core.vfs.KettleVFS;
import org.pentaho.di.i18n.BaseMessages;
import org.pentaho.di.trans.Trans;
import org.pentaho.di.trans.TransMeta;
import org.pentaho.di.trans.step.BaseStep;
import org.pentaho.di.trans.step.StepDataInterface;
import org.pentaho.di.trans.step.StepInterface;
import org.pentaho.di.trans.step.StepMeta;
import org.pentaho.di.trans.step.StepMetaInterface;
/**
* Read YAML files, parse them and convert them to rows and writes these to one or more output streams.
*
* @author Samatar
* @since 20-06-2007
*/
public class YamlInput extends BaseStep implements StepInterface {
private static Class> PKG = YamlInputMeta.class; // for i18n purposes, needed by Translator2!!
private YamlInputMeta meta;
private YamlInputData data;
public YamlInput( StepMeta stepMeta, StepDataInterface stepDataInterface, int copyNr, TransMeta transMeta,
Trans trans ) {
super( stepMeta, stepDataInterface, copyNr, transMeta, trans );
}
private void handleMissingFiles() throws KettleException {
List nonExistantFiles = data.files.getNonExistantFiles();
if ( nonExistantFiles.size() != 0 ) {
String message = FileInputList.getRequiredFilesDescription( nonExistantFiles );
logError( BaseMessages.getString( PKG, "YamlInput.Log.RequiredFilesTitle" ), BaseMessages.getString(
PKG, "YamlInput.Log.RequiredFiles", message ) );
throw new KettleException( BaseMessages.getString( PKG, "YamlInput.Log.RequiredFilesMissing", message ) );
}
List nonAccessibleFiles = data.files.getNonAccessibleFiles();
if ( nonAccessibleFiles.size() != 0 ) {
String message = FileInputList.getRequiredFilesDescription( nonAccessibleFiles );
logError( BaseMessages.getString( PKG, "YamlInput.Log.RequiredFilesTitle" ), BaseMessages.getString(
PKG, "YamlInput.Log.RequiredNotAccessibleFiles", message ) );
throw new KettleException( BaseMessages.getString(
PKG, "YamlInput.Log.RequiredNotAccessibleFilesMissing", message ) );
}
}
private boolean readNextString() {
try {
data.readrow = getRow(); // Grab another row ...
if ( data.readrow == null ) {
// finished processing!
if ( log.isDetailed() ) {
logDetailed( BaseMessages.getString( PKG, "YamlInput.Log.FinishedProcessing" ) );
}
return false;
}
if ( first ) {
first = false;
data.outputRowMeta = getInputRowMeta().clone();
// Get total previous fields
data.totalPreviousFields = data.outputRowMeta.size();
data.totalOutFields = data.totalPreviousFields + data.nrInputFields;
meta.getFields( data.outputRowMeta, getStepname(), null, null, this, repository, metaStore );
// Check is Yaml field is provided
if ( Utils.isEmpty( meta.getYamlField() ) ) {
logError( BaseMessages.getString( PKG, "YamlInput.Log.NoField" ) );
throw new KettleException( BaseMessages.getString( PKG, "YamlInput.Log.NoField" ) );
}
// cache the position of the field
data.indexOfYamlField = getInputRowMeta().indexOfValue( meta.getYamlField() );
if ( data.indexOfYamlField < 0 ) {
// The field is unreachable !
logError( BaseMessages.getString( PKG, "YamlInput.Log.ErrorFindingField", meta.getYamlField() ) );
throw new KettleException( BaseMessages.getString( PKG, "YamlInput.Exception.CouldnotFindField", meta
.getYamlField() ) );
}
}
// get field value
String Fieldvalue = getInputRowMeta().getString( data.readrow, data.indexOfYamlField );
getLinesInput();
if ( log.isDetailed() ) {
logDetailed( BaseMessages.getString( PKG, "YamlInput.Log.YAMLStream", meta.getYamlField(), Fieldvalue ) );
}
if ( meta.getIsAFile() ) {
// source is a file.
data.yaml = new YamlReader();
data.yaml.loadFile( KettleVFS.getFileObject( Fieldvalue, getTransMeta() ) );
addFileToResultFilesname( data.yaml.getFile() );
} else {
data.yaml = new YamlReader();
data.yaml.loadString( Fieldvalue );
}
} catch ( Exception e ) {
logError( BaseMessages.getString( PKG, "YamlInput.Log.UnexpectedError", e.toString() ) );
stopAll();
logError( Const.getStackTracker( e ) );
setErrors( 1 );
return false;
}
return true;
}
private void addFileToResultFilesname( FileObject file ) throws Exception {
if ( meta.addResultFile() ) {
// Add this to the result file names...
ResultFile resultFile =
new ResultFile( ResultFile.FILE_TYPE_GENERAL, file, getTransMeta().getName(), getStepname() );
resultFile.setComment( BaseMessages.getString( PKG, "YamlInput.Log.FileAddedResult" ) );
addResultFile( resultFile );
}
}
private boolean openNextFile() {
try {
if ( data.filenr >= data.files.nrOfFiles() ) {
// finished processing!
if ( log.isDetailed() ) {
logDetailed( BaseMessages.getString( PKG, "YamlInput.Log.FinishedProcessing" ) );
}
return false;
}
// Get file to process from list
data.file = data.files.getFile( data.filenr );
// Move file pointer ahead!
data.filenr++;
if ( meta.isIgnoreEmptyFile() && data.file.getContent().getSize() == 0 ) {
if ( isBasic() ) {
logBasic( BaseMessages.getString( PKG, "YamlInput.Error.FileSizeZero", data.file.getName() ) );
}
// Let's open the next file
openNextFile();
} else {
if ( isDetailed() ) {
logDetailed( BaseMessages.getString( PKG, "YamlInput.Log.OpeningFile", data.file.toString() ) );
}
// We have a file
// define a Yaml reader and load file
data.yaml = new YamlReader();
data.yaml.loadFile( data.file );
addFileToResultFilesname( data.file );
if ( isDetailed() ) {
logDetailed( BaseMessages.getString( PKG, "YamlInput.Log.FileOpened", data.file.toString() ) );
}
}
} catch ( Exception e ) {
logError( BaseMessages.getString( PKG, "YamlInput.Log.UnableToOpenFile", "" + data.filenr, data.file
.toString(), e.toString() ) );
stopAll();
setErrors( 1 );
logError( Const.getStackTracker( e ) );
return false;
}
return true;
}
public boolean processRow( StepMetaInterface smi, StepDataInterface sdi ) throws KettleException {
if ( first && !meta.isInFields() ) {
first = false;
data.files = meta.getFiles( this );
if ( !meta.isdoNotFailIfNoFile() && data.files.nrOfFiles() == 0 ) {
throw new KettleException( BaseMessages.getString( PKG, "YamlInput.Log.NoFiles" ) );
}
handleMissingFiles();
// Create the output row meta-data
data.outputRowMeta = new RowMeta();
data.totalPreviousFields = 0;
data.totalOutFields = data.totalPreviousFields + data.nrInputFields;
meta.getFields( data.outputRowMeta, getStepname(), null, null, this, repository, metaStore );
data.totalOutStreamFields = data.outputRowMeta.size();
}
// Grab a row
Object[] r = getOneRow();
if ( r == null ) {
setOutputDone(); // signal end to receiver(s)
return false; // end of data or error.
}
if ( log.isRowLevel() ) {
logRowlevel( BaseMessages.getString( PKG, "YamlInput.Log.ReadRow", data.outputRowMeta.getString( r ) ) );
}
incrementLinesOutput();
data.rownr++;
putRow( data.outputRowMeta, r ); // copy row to output rowset(s);
if ( meta.getRowLimit() > 0 && data.rownr > meta.getRowLimit() ) {
// limit has been reached: stop now.
setOutputDone();
return false;
}
return true;
}
private Object[] getOneRow() throws KettleException {
Object[] row = null;
boolean rowAvailable = false;
boolean fileOpened = false;
if ( !meta.isInFields() ) {
while ( data.file == null || ( data.file != null && !fileOpened && !rowAvailable ) ) {
if ( data.file != null ) {
// We have opened a file
// read one row
row = getRowData();
if ( row == null ) {
// No row extracted
// let's see for the next file
if ( !openNextFile() ) {
return null;
}
fileOpened = true;
} else {
// We had extracted one row
rowAvailable = true;
}
} else {
// First time we get there
// we have to open a new file
if ( !openNextFile() ) {
return null;
}
fileOpened = true;
}
}
} else {
while ( data.readrow == null || ( data.readrow != null && !fileOpened && !rowAvailable ) ) {
if ( data.readrow != null ) {
// We have red the incoming Yaml value
// let's get one row
row = getRowData();
if ( row == null ) {
// No row.. reader next row
if ( !readNextString() ) {
return null;
}
fileOpened = true;
} else {
// We have returned one row
rowAvailable = true;
}
} else {
// First time we get there
// We have to parse incoming Yaml value
if ( !readNextString() ) {
return null;
}
fileOpened = true;
}
if ( data.readrow == null ) {
return null;
}
}
}
if ( !rowAvailable ) {
row = getRowData();
}
return row;
}
private Object[] getRowData() throws KettleException {
// Build an empty row based on the meta-data
Object[] outputRowData = null;
try {
// Create new row...
outputRowData = data.yaml.getRow( data.rowMeta );
if ( outputRowData == null ) {
return null;
}
if ( data.readrow != null ) {
outputRowData = RowDataUtil.addRowData( data.readrow, data.totalPreviousFields, outputRowData );
} else {
outputRowData = RowDataUtil.resizeArray( outputRowData, data.totalOutStreamFields );
}
int rowIndex = data.totalOutFields;
// See if we need to add the filename to the row...
if ( meta.includeFilename() && !Utils.isEmpty( meta.getFilenameField() ) ) {
outputRowData[rowIndex++] = KettleVFS.getFilename( data.file );
}
// See if we need to add the row number to the row...
if ( meta.includeRowNumber() && !Utils.isEmpty( meta.getRowNumberField() ) ) {
outputRowData[rowIndex++] = new Long( data.rownr );
}
} catch ( Exception e ) {
boolean sendToErrorRow = false;
String errorMessage = null;
if ( getStepMeta().isDoingErrorHandling() ) {
sendToErrorRow = true;
errorMessage = e.toString();
} else {
logError( BaseMessages.getString( PKG, "YamlInput.ErrorInStepRunning", e.toString() ) );
setErrors( 1 );
stopAll();
logError( Const.getStackTracker( e ) );
setOutputDone(); // signal end to receiver(s)
}
if ( sendToErrorRow ) {
// Simply add this row to the error row
putError( getInputRowMeta(), outputRowData, 1, errorMessage, null, "YamlInput001" );
}
}
return outputRowData;
}
public boolean init( StepMetaInterface smi, StepDataInterface sdi ) {
meta = (YamlInputMeta) smi;
data = (YamlInputData) sdi;
if ( super.init( smi, sdi ) ) {
data.rownr = 1L;
data.nrInputFields = meta.getInputFields().length;
data.rowMeta = new RowMeta();
for ( int i = 0; i < data.nrInputFields; i++ ) {
YamlInputField field = meta.getInputFields()[i];
String path = environmentSubstitute( field.getPath() );
try {
ValueMetaInterface valueMeta = ValueMetaFactory.createValueMeta( path, field.getType() );
valueMeta.setTrimType( field.getTrimType() );
data.rowMeta.addValueMeta( valueMeta );
} catch ( Exception e ) {
log.logError( "Unable to create value meta", e );
return false;
}
}
return true;
}
return false;
}
public void dispose( StepMetaInterface smi, StepDataInterface sdi ) {
meta = (YamlInputMeta) smi;
data = (YamlInputData) sdi;
if ( data.yaml != null ) {
try {
data.yaml.close();
} catch ( Exception e ) {
// Ignore
}
}
if ( data.file != null ) {
try {
data.file.close();
} catch ( Exception e ) {
// Ignore
}
}
super.dispose( smi, sdi );
}
}