All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.pentaho.di.trans.step.BaseStep Maven / Gradle / Ivy

The newest version!
// CHECKSTYLE:FileLength:OFF
/*! ******************************************************************************
 *
 * Pentaho Data Integration
 *
 * Copyright (C) 2002-2018 by Hitachi Vantara : http://www.pentaho.com
 *
 *******************************************************************************
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 ******************************************************************************/

package org.pentaho.di.trans.step;

import java.io.Closeable;
import java.io.IOException;
import java.net.ServerSocket;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Calendar;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.Hashtable;
import java.util.List;
import java.util.Map;
import java.util.concurrent.CopyOnWriteArrayList;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.locks.ReadWriteLock;
import java.util.concurrent.locks.ReentrantReadWriteLock;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;

import org.apache.commons.lang.StringUtils;
import org.pentaho.di.core.BlockingRowSet;
import org.pentaho.di.core.Const;
import org.pentaho.di.core.util.Utils;
import org.pentaho.di.core.ExtensionDataInterface;
import org.pentaho.di.core.ResultFile;
import org.pentaho.di.core.RowMetaAndData;
import org.pentaho.di.core.RowSet;
import org.pentaho.di.core.exception.KettleException;
import org.pentaho.di.core.exception.KettleRowException;
import org.pentaho.di.core.exception.KettleStepException;
import org.pentaho.di.core.exception.KettleValueException;
import org.pentaho.di.core.logging.KettleLogStore;
import org.pentaho.di.core.logging.LogChannelInterface;
import org.pentaho.di.core.logging.LogLevel;
import org.pentaho.di.core.logging.LoggingObjectInterface;
import org.pentaho.di.core.logging.LoggingObjectType;
import org.pentaho.di.core.row.RowDataUtil;
import org.pentaho.di.core.row.RowMeta;
import org.pentaho.di.core.row.RowMetaInterface;
import org.pentaho.di.core.row.ValueMetaInterface;
import org.pentaho.di.core.row.value.ValueMetaDate;
import org.pentaho.di.core.row.value.ValueMetaNumber;
import org.pentaho.di.core.row.value.ValueMetaString;
import org.pentaho.di.core.variables.VariableSpace;
import org.pentaho.di.core.variables.Variables;
import org.pentaho.di.i18n.BaseMessages;
import org.pentaho.di.partition.PartitionSchema;
import org.pentaho.di.repository.ObjectId;
import org.pentaho.di.repository.ObjectRevision;
import org.pentaho.di.repository.Repository;
import org.pentaho.di.repository.RepositoryDirectory;
import org.pentaho.di.trans.BasePartitioner;
import org.pentaho.di.trans.SlaveStepCopyPartitionDistribution;
import org.pentaho.di.trans.Trans;
import org.pentaho.di.trans.TransMeta;
import org.pentaho.di.trans.cluster.TransSplitter;
import org.pentaho.di.trans.step.BaseStepData.StepExecutionStatus;
import org.pentaho.di.trans.steps.mapping.Mapping;
import org.pentaho.di.trans.steps.mappinginput.MappingInput;
import org.pentaho.di.trans.steps.mappingoutput.MappingOutput;
import org.pentaho.di.www.SocketRepository;
import org.pentaho.metastore.api.IMetaStore;

/**
 * This class can be extended for the actual row processing of the implemented step.
 * 

* The implementing class can rely mostly on the base class, and has only three important methods it implements itself. * The three methods implement the step lifecycle during transformation execution: initialization, row processing, and * clean-up. *

    *
  • Step Initialization
    * The init() method is called when a transformation is preparing to start execution. *

    *

     * public boolean init(...)
     * 
    *

    * Every step is given the opportunity to do one-time initialization tasks like opening files or establishing database * connections. For any steps derived from BaseStep it is mandatory that super.init() is called to ensure correct * behavior. The method must return true in case the step initialized correctly, it must returned false if there was an * initialization error. PDI will abort the execution of a transformation in case any step returns false upon * initialization. *

  • *

    *

  • Row Processing
    * Once the transformation starts execution it enters a tight loop calling processRow() on each step until the method * returns false. Each step typically reads a single row from the input stream, alters the row structure and fields and * passes the row on to next steps. *

    *

     * public boolean processRow(...)
     * 
    *

    * A typical implementation queries for incoming input rows by calling getRow(), which blocks and returns a row object * or null in case there is no more input. If there was an input row, the step does the necessary row processing and * calls putRow() to pass the row on to the next step. If there are no more rows, the step must call setOutputDone() and * return false. *

    * Formally the method must conform to the following rules: *

      *
    • If the step is done processing all rows, the method must call setOutputDone() and return false
    • *
    • If the step is not done processing all rows, the method must return true. PDI will call processRow() again in * this case.
    • *
    *
  • *

    *

  • Step Clean-Up
    * Once the transformation is complete, PDI calls dispose() on all steps. *

    *

     * public void dispose(...)
     * 
    *

    * Steps are required to deallocate resources allocated during init() or subsequent row processing. This typically means * to clear all fields of the StepDataInterface object, and to ensure that all open files or connections are properly * closed. For any steps derived from BaseStep it is mandatory that super.dispose() is called to ensure correct * deallocation. *

*/ public class BaseStep implements VariableSpace, StepInterface, LoggingObjectInterface, ExtensionDataInterface { private static Class PKG = BaseStep.class; // for i18n purposes, needed by Translator2!! protected VariableSpace variables = new Variables(); private TransMeta transMeta; private StepMeta stepMeta; private String stepname; protected LogChannelInterface log; private String containerObjectId; private Trans trans; private final Object statusCountersLock = new Object(); /** * nr of lines read from previous step(s) * * @deprecated use {@link #getLinesRead()}, {@link #incrementLinesRead()}, or {@link #decrementLinesRead()} */ @Deprecated public long linesRead; /** * nr of lines written to next step(s) * * @deprecated use {@link #getLinesWritten()}, {@link #incrementLinesWritten()}, or {@link #decrementLinesWritten()} */ @Deprecated public long linesWritten; /** * nr of lines read from file or database * * @deprecated use {@link #getLinesInput()} or {@link #incrementLinesInput()} */ @Deprecated public long linesInput; /** * nr of lines written to file or database * * @deprecated use {@link #getLinesOutput()} or {@link #incrementLinesOutput()} */ @Deprecated public long linesOutput; /** * nr of updates in a database table or file * * @deprecated use {@link #getLinesUpdated()} or {@link #incrementLinesUpdated()} */ @Deprecated public long linesUpdated; /** * nr of lines skipped * * @deprecated use {@link #getLinesSkipped()} or {@link #incrementLinesSkipped()} */ @Deprecated public long linesSkipped; /** * total sleep time in ns caused by an empty input buffer (previous step is slow) * * @deprecated use {@link #getLinesRejected()} or {@link #incrementLinesRejected()} */ @Deprecated public long linesRejected; private boolean distributed; private String rowDistributionCode; private RowDistributionInterface rowDistribution; private long errors; private StepMeta[] nextSteps; private StepMeta[] prevSteps; private int currentInputRowSetNr, currentOutputRowSetNr; /** * The rowsets on the input, size() == nr of source steps */ private List inputRowSets; private final ReentrantReadWriteLock inputRowSetsLock = new ReentrantReadWriteLock(); /** * the rowsets on the output, size() == nr of target steps */ private List outputRowSets; private final ReadWriteLock outputRowSetsLock = new ReentrantReadWriteLock(); /** * The remote input steps. */ private List remoteInputSteps; /** * The remote output steps. */ private List remoteOutputSteps; /** * the rowset for the error rows */ private RowSet errorRowSet; private AtomicBoolean running; private AtomicBoolean stopped; protected AtomicBoolean safeStopped; private AtomicBoolean paused; private boolean init; /** * the copy number of this thread */ private int stepcopy; private Date start_time, stop_time; /** * if true then the row being processed is the first row */ public boolean first; /** */ public boolean terminator; public List terminator_rows; private StepMetaInterface stepMetaInterface; private StepDataInterface stepDataInterface; /** * The list of RowListener interfaces */ protected List rowListeners; /** * Map of files that are generated or used by this step. After execution, these can be added to result. The entry to * the map is the filename */ private final Map resultFiles; private final ReentrantReadWriteLock resultFilesLock; /** * This contains the first row received and will be the reference row. We used it to perform extra checking: see if we * don't get rows with "mixed" contents. */ private RowMetaInterface inputReferenceRow; /** * This field tells the putRow() method that we are in partitioned mode */ private boolean partitioned; /** * The partition ID at which this step copy runs, or null if this step is not running partitioned. */ private String partitionID; /** * This field tells the putRow() method to re-partition the incoming data, See also * StepPartitioningMeta.PARTITIONING_METHOD_* */ private int repartitioning; /** * The partitionID to rowset mapping */ private Map partitionTargets; private RowMetaInterface inputRowMeta; /** * step partitioning information of the NEXT step */ private StepPartitioningMeta nextStepPartitioningMeta; /** * The metadata information of the error output row. There is only one per step so we cache it */ private RowMetaInterface errorRowMeta = null; private RowMetaInterface previewRowMeta; private boolean checkTransRunning; private int slaveNr; private int clusterSize; private int uniqueStepNrAcrossSlaves; private int uniqueStepCountAcrossSlaves; private boolean remoteOutputStepsInitialized; private boolean remoteInputStepsInitialized; private RowSet[] partitionNrRowSetList; /** * A list of server sockets that need to be closed during transformation cleanup. */ private List serverSockets; private static int NR_OF_ROWS_IN_BLOCK = 500; private int blockPointer; /** * A flag to indicate that clustered partitioning was not yet initialized */ private boolean clusteredPartitioningFirst; /** * A flag to determine whether or not we are doing local or clustered (remote) par */ private boolean clusteredPartitioning; private boolean usingThreadPriorityManagment; private List stepListeners; /** * The socket repository to use when opening server side sockets in clustering mode */ private SocketRepository socketRepository; /** * The upper buffer size boundary after which we manage the thread priority a little bit to prevent excessive locking */ private int upperBufferBoundary; /** * The lower buffer size boundary after which we manage the thread priority a little bit to prevent excessive locking */ private int lowerBufferBoundary; /** * maximum number of errors to allow */ private Long maxErrors = -1L; /** * maximum percent of errors to allow */ private int maxPercentErrors = -1; /** * minumum number of rows to process before using maxPercentErrors in calculation */ private long minRowsForMaxErrorPercent = -1L; /** * set this flag to true to allow empty field names and types to output */ private boolean allowEmptyFieldNamesAndTypes = false; /** * Keeps track of the number of rows read for input deadlock verification. */ protected long deadLockCounter; /** * The repository used by the step to load and reference Kettle objects with at runtime */ protected Repository repository; /** * The metastore that the step uses to load external elements from */ protected IMetaStore metaStore; protected Map extensionDataMap; /** * rowHandler handles getting/putting rows and putting errors. * Default implementation defers to corresponding methods in this class. */ private RowHandler rowHandler; /** * This is the base step that forms that basis for all steps. You can derive from this class to implement your own * steps. * * @param stepMeta The StepMeta object to run. * @param stepDataInterface the data object to store temporary data, database connections, caches, result sets, * hashtables etc. * @param copyNr The copynumber for this step. * @param transMeta The TransInfo of which the step stepMeta is part of. * @param trans The (running) transformation to obtain information shared among the steps. */ public BaseStep( StepMeta stepMeta, StepDataInterface stepDataInterface, int copyNr, TransMeta transMeta, Trans trans ) { this.stepMeta = stepMeta; this.stepDataInterface = stepDataInterface; this.stepcopy = copyNr; this.transMeta = transMeta; this.trans = trans; this.stepname = stepMeta.getName(); this.socketRepository = trans.getSocketRepository(); // Set the name of the thread if ( stepMeta.getName() == null ) { throw new RuntimeException( "A step in transformation [" + transMeta.toString() + "] doesn't have a name. A step should always have a name to identify it by." ); } log = KettleLogStore.getLogChannelInterfaceFactory().create( this, trans ); first = true; clusteredPartitioningFirst = true; running = new AtomicBoolean( false ); stopped = new AtomicBoolean( false ); safeStopped = new AtomicBoolean( false ); paused = new AtomicBoolean( false ); init = false; synchronized ( statusCountersLock ) { linesRead = 0L; // new AtomicLong(0L); // Keep some statistics! linesWritten = 0L; // new AtomicLong(0L); linesUpdated = 0L; // new AtomicLong(0L); linesSkipped = 0L; // new AtomicLong(0L); linesRejected = 0L; // new AtomicLong(0L); linesInput = 0L; // new AtomicLong(0L); linesOutput = 0L; // new AtomicLong(0L); } inputRowSets = null; outputRowSets = null; nextSteps = null; terminator = stepMeta.hasTerminator(); if ( terminator ) { terminator_rows = new ArrayList(); } else { terminator_rows = null; } // debug="-"; start_time = null; stop_time = null; distributed = stepMeta.isDistributes(); rowDistribution = stepMeta.getRowDistribution(); if ( distributed ) { if ( rowDistribution != null ) { if ( log.isDetailed() ) { logDetailed( BaseMessages.getString( PKG, "BaseStep.Log.CustomRowDistributionActivated", rowDistributionCode ) ); } } else { if ( log.isDetailed() ) { logDetailed( BaseMessages.getString( PKG, "BaseStep.Log.DistributionActivated" ) ); } } } else { if ( log.isDetailed() ) { logDetailed( BaseMessages.getString( PKG, "BaseStep.Log.DistributionDeactivated" ) ); } } rowListeners = new CopyOnWriteArrayList(); resultFiles = new HashMap(); resultFilesLock = new ReentrantReadWriteLock(); repartitioning = StepPartitioningMeta.PARTITIONING_METHOD_NONE; partitionTargets = new Hashtable(); serverSockets = new ArrayList(); extensionDataMap = new HashMap(); // tuning parameters // putTimeOut = 10; //s // getTimeOut = 500; //s // timeUnit = TimeUnit.MILLISECONDS; // the smaller singleWaitTime, the faster the program run but cost CPU // singleWaitTime = 1; //ms // maxPutWaitCount = putTimeOut*1000/singleWaitTime; // maxGetWaitCount = getTimeOut*1000/singleWaitTime; // worker = Executors.newFixedThreadPool(10); checkTransRunning = false; blockPointer = 0; stepListeners = Collections.synchronizedList( new ArrayList() ); dispatch(); upperBufferBoundary = (int) ( transMeta.getSizeRowset() * 0.99 ); lowerBufferBoundary = (int) ( transMeta.getSizeRowset() * 0.01 ); } /* * (non-Javadoc) * * @see org.pentaho.di.trans.step.StepInterface#init(org.pentaho.di.trans.step.StepMetaInterface, * org.pentaho.di.trans.step.StepDataInterface) */ @Override public boolean init( StepMetaInterface smi, StepDataInterface sdi ) { sdi.setStatus( StepExecutionStatus.STATUS_INIT ); String slaveNr = transMeta.getVariable( Const.INTERNAL_VARIABLE_SLAVE_SERVER_NUMBER ); String clusterSize = transMeta.getVariable( Const.INTERNAL_VARIABLE_CLUSTER_SIZE ); boolean master = "Y".equalsIgnoreCase( transMeta.getVariable( Const.INTERNAL_VARIABLE_CLUSTER_MASTER ) ); if ( !Utils.isEmpty( slaveNr ) && !Utils.isEmpty( clusterSize ) && !master ) { this.slaveNr = Integer.parseInt( slaveNr ); this.clusterSize = Integer.parseInt( clusterSize ); if ( log.isDetailed() ) { logDetailed( BaseMessages.getString( PKG, "BaseStep.Log.ReleasedServerSocketOnPort", slaveNr, clusterSize ) ); } } else { this.slaveNr = 0; this.clusterSize = 0; } // Also set the internal variable for the partition // SlaveStepCopyPartitionDistribution partitionDistribution = transMeta.getSlaveStepCopyPartitionDistribution(); if ( stepMeta.isPartitioned() ) { // See if we are partitioning remotely // if ( partitionDistribution != null && !partitionDistribution.getDistribution().isEmpty() ) { String slaveServerName = getVariable( Const.INTERNAL_VARIABLE_SLAVE_SERVER_NAME ); int stepCopyNr = stepcopy; // Look up the partition nr... // Set the partition ID (string) as well as the partition nr [0..size[ // PartitionSchema partitionSchema = stepMeta.getStepPartitioningMeta().getPartitionSchema(); int partitionNr = partitionDistribution.getPartition( slaveServerName, partitionSchema.getName(), stepCopyNr ); if ( partitionNr >= 0 ) { String partitionNrString = new DecimalFormat( "000" ).format( partitionNr ); setVariable( Const.INTERNAL_VARIABLE_STEP_PARTITION_NR, partitionNrString ); if ( partitionDistribution.getOriginalPartitionSchemas() != null ) { // What is the partition schema name? // String partitionSchemaName = stepMeta.getStepPartitioningMeta().getPartitionSchema().getName(); // Search the original partition schema in the distribution... // for ( PartitionSchema originalPartitionSchema : partitionDistribution.getOriginalPartitionSchemas() ) { String slavePartitionSchemaName = TransSplitter.createSlavePartitionSchemaName( originalPartitionSchema.getName() ); if ( slavePartitionSchemaName.equals( partitionSchemaName ) ) { PartitionSchema schema = (PartitionSchema) originalPartitionSchema.clone(); // This is the one... // if ( schema.isDynamicallyDefined() ) { schema.expandPartitionsDynamically( this.clusterSize, this ); } String partID = schema.getPartitionIDs().get( partitionNr ); setVariable( Const.INTERNAL_VARIABLE_STEP_PARTITION_ID, partID ); break; } } } } } else { // This is a locally partitioned step... // int partitionNr = stepcopy; String partitionNrString = new DecimalFormat( "000" ).format( partitionNr ); setVariable( Const.INTERNAL_VARIABLE_STEP_PARTITION_NR, partitionNrString ); final List partitionIDList = stepMeta.getStepPartitioningMeta().getPartitionSchema().getPartitionIDs(); if ( partitionIDList.size() > 0 ) { String partitionID = partitionIDList.get( partitionNr ); setVariable( Const.INTERNAL_VARIABLE_STEP_PARTITION_ID, partitionID ); } else { logError( BaseMessages.getString( PKG, "BaseStep.Log.UnableToRetrievePartitionId", stepMeta.getStepPartitioningMeta().getPartitionSchema().getName() ) ); return false; } } } else if ( !Utils.isEmpty( partitionID ) ) { setVariable( Const.INTERNAL_VARIABLE_STEP_PARTITION_ID, partitionID ); } // Set a unique step number across all slave servers // // slaveNr * nrCopies + copyNr // uniqueStepNrAcrossSlaves = this.slaveNr * getStepMeta().getCopies() + stepcopy; uniqueStepCountAcrossSlaves = this.clusterSize <= 1 ? getStepMeta().getCopies() : this.clusterSize * getStepMeta().getCopies(); if ( uniqueStepCountAcrossSlaves == 0 ) { uniqueStepCountAcrossSlaves = 1; } setVariable( Const.INTERNAL_VARIABLE_STEP_UNIQUE_NUMBER, Integer.toString( uniqueStepNrAcrossSlaves ) ); setVariable( Const.INTERNAL_VARIABLE_STEP_UNIQUE_COUNT, Integer.toString( uniqueStepCountAcrossSlaves ) ); setVariable( Const.INTERNAL_VARIABLE_STEP_COPYNR, Integer.toString( stepcopy ) ); // BACKLOG-18004 allowEmptyFieldNamesAndTypes = Boolean.parseBoolean( System.getProperties().getProperty( Const.KETTLE_ALLOW_EMPTY_FIELD_NAMES_AND_TYPES, "false" ) ); // Now that these things have been done, we also need to start a number of server sockets. // One for each of the remote output steps that we're going to write to. // try { // If this is on the master, separate logic applies. // // boolean isMaster = "Y".equalsIgnoreCase(getVariable(Const.INTERNAL_VARIABLE_CLUSTER_MASTER)); remoteOutputSteps = new ArrayList(); for ( int i = 0; i < stepMeta.getRemoteOutputSteps().size(); i++ ) { RemoteStep remoteStep = stepMeta.getRemoteOutputSteps().get( i ); // If the step run in multiple copies, we only want to open every socket once. // if ( getCopy() == remoteStep.getSourceStepCopyNr() ) { // Open a server socket to allow the remote output step to connect. // RemoteStep copy = (RemoteStep) remoteStep.clone(); try { if ( log.isDetailed() ) { logDetailed( BaseMessages.getString( PKG, "BaseStep.Log.SelectedRemoteOutputStepToServer", copy, copy.getTargetStep(), copy.getTargetStepCopyNr(), copy.getPort() ) ); } copy.openServerSocket( this ); if ( log.isDetailed() ) { logDetailed( BaseMessages.getString( PKG, "BaseStep.Log.OpenedServerSocketConnectionTo", copy ) ); } } catch ( Exception e ) { logError( "Unable to open server socket during step initialisation: " + copy.toString(), e ); throw e; } remoteOutputSteps.add( copy ); } } } catch ( Exception e ) { for ( RemoteStep remoteStep : remoteOutputSteps ) { if ( remoteStep.getServerSocket() != null ) { try { ServerSocket serverSocket = remoteStep.getServerSocket(); getTrans().getSocketRepository().releaseSocket( serverSocket.getLocalPort() ); } catch ( IOException e1 ) { logError( "Unable to close server socket after error during step initialisation", e ); } } } return false; } // For the remote input steps to read from, we do the same: make a list and initialize what we can... // try { remoteInputSteps = new ArrayList(); if ( ( stepMeta.isPartitioned() && getClusterSize() > 1 ) || stepMeta.getCopies() > 1 ) { // If the step is partitioned or has multiple copies and clustered, we only want to take one remote input step // per copy. // This is where we make that selection... // for ( int i = 0; i < stepMeta.getRemoteInputSteps().size(); i++ ) { RemoteStep remoteStep = stepMeta.getRemoteInputSteps().get( i ); if ( remoteStep.getTargetStepCopyNr() == stepcopy ) { RemoteStep copy = (RemoteStep) remoteStep.clone(); remoteInputSteps.add( copy ); } } } else { for ( RemoteStep remoteStep : stepMeta.getRemoteInputSteps() ) { RemoteStep copy = (RemoteStep) remoteStep.clone(); remoteInputSteps.add( copy ); } } } catch ( Exception e ) { logError( "Unable to initialize remote input steps during step initialisation", e ); return false; } // Getting ans setting the error handling values // first, get the step meta StepErrorMeta stepErrorMeta = stepMeta.getStepErrorMeta(); if ( stepErrorMeta != null ) { // do an environment substitute for stepErrorMeta.getMaxErrors(), stepErrorMeta.getMinPercentRows() // and stepErrorMeta.getMaxPercentErrors() // Catch NumberFormatException since the user can enter anything in the dialog- the value // they enter must be a number or a variable set to a number // We will use a boolean to indicate failure so that we can log all errors - not just the first one caught boolean envSubFailed = false; try { maxErrors = ( !Utils.isEmpty( stepErrorMeta.getMaxErrors() ) ? Long.valueOf( trans .environmentSubstitute( stepErrorMeta.getMaxErrors() ) ) : -1L ); } catch ( NumberFormatException nfe ) { log.logError( BaseMessages.getString( PKG, "BaseStep.Log.NumberFormatException", BaseMessages.getString( PKG, "BaseStep.Property.MaxErrors.Name" ), this.stepname, ( stepErrorMeta.getMaxErrors() != null ? stepErrorMeta.getMaxErrors() : "" ) ) ); envSubFailed = true; } try { minRowsForMaxErrorPercent = ( !Utils.isEmpty( stepErrorMeta.getMinPercentRows() ) ? Long.valueOf( trans .environmentSubstitute( stepErrorMeta.getMinPercentRows() ) ) : -1L ); } catch ( NumberFormatException nfe ) { log.logError( BaseMessages.getString( PKG, "BaseStep.Log.NumberFormatException", BaseMessages.getString( PKG, "BaseStep.Property.MinRowsForErrorsPercentCalc.Name" ), this.stepname, ( stepErrorMeta .getMinPercentRows() != null ? stepErrorMeta.getMinPercentRows() : "" ) ) ); envSubFailed = true; } try { maxPercentErrors = ( !Utils.isEmpty( stepErrorMeta.getMaxPercentErrors() ) ? Integer.valueOf( trans .environmentSubstitute( stepErrorMeta.getMaxPercentErrors() ) ) : -1 ); } catch ( NumberFormatException nfe ) { log.logError( BaseMessages.getString( PKG, "BaseStep.Log.NumberFormatException", BaseMessages.getString( PKG, "BaseStep.Property.MaxPercentErrors.Name" ), this.stepname, ( stepErrorMeta .getMaxPercentErrors() != null ? stepErrorMeta.getMaxPercentErrors() : "" ) ) ); envSubFailed = true; } // if we failed and environment subsutitue if ( envSubFailed ) { return false; } } return true; } /* * (non-Javadoc) * * @see org.pentaho.di.trans.step.StepInterface#dispose(org.pentaho.di.trans.step.StepMetaInterface, * org.pentaho.di.trans.step.StepDataInterface) */ @Override public void dispose( StepMetaInterface smi, StepDataInterface sdi ) { sdi.setStatus( StepExecutionStatus.STATUS_DISPOSED ); } /* * (non-Javadoc) * * @see org.pentaho.di.trans.step.StepInterface#cleanup() */ @Override public void cleanup() { for ( ServerSocket serverSocket : serverSockets ) { try { socketRepository.releaseSocket( serverSocket.getLocalPort() ); logDetailed( BaseMessages.getString( PKG, "BaseStep.Log.ReleasedServerSocketOnPort", serverSocket.getLocalPort() ) ); } catch ( IOException e ) { logError( "Cleanup: Unable to release server socket (" + serverSocket.getLocalPort() + ")", e ); } } List remoteInputSteps = getRemoteInputSteps(); if ( remoteInputSteps != null ) { cleanupRemoteSteps( remoteInputSteps ); } List remoteOutputSteps = getRemoteOutputSteps(); if ( remoteOutputSteps != null ) { cleanupRemoteSteps( remoteOutputSteps ); } } static void cleanupRemoteSteps( List remoteSteps ) { for ( RemoteStep remoteStep : remoteSteps ) { remoteStep.cleanup(); } } /* * (non-Javadoc) * * @see org.pentaho.di.trans.step.StepInterface#getProcessed() */ @Override public long getProcessed() { if ( getLinesRead() > getLinesWritten() ) { return getLinesRead(); } else { return getLinesWritten(); } } /** * Sets the copy. * * @param cop the new copy */ public void setCopy( int cop ) { stepcopy = cop; } /** * @return The steps copy number (default 0) */ @Override public int getCopy() { return stepcopy; } /* * (non-Javadoc) * * @see org.pentaho.di.trans.step.StepInterface#getErrors() */ @Override public long getErrors() { return errors; } /* * (non-Javadoc) * * @see org.pentaho.di.trans.step.StepInterface#setErrors(long) */ @Override public void setErrors( long e ) { errors = e; } /** * @return Returns the number of lines read from previous steps */ @Override public long getLinesRead() { synchronized ( statusCountersLock ) { return linesRead; } } /** * Increments the number of lines read from previous steps by one * * @return Returns the new value */ public long incrementLinesRead() { synchronized ( statusCountersLock ) { return ++linesRead; } } /** * Decrements the number of lines read from previous steps by one * * @return Returns the new value */ public long decrementLinesRead() { synchronized ( statusCountersLock ) { return --linesRead; } } /** * @param newLinesReadValue the new number of lines read from previous steps */ public void setLinesRead( long newLinesReadValue ) { synchronized ( statusCountersLock ) { linesRead = newLinesReadValue; } } /** * @return Returns the number of lines read from an input source: database, file, socket, etc. */ @Override public long getLinesInput() { synchronized ( statusCountersLock ) { return linesInput; } } /** * Increments the number of lines read from an input source: database, file, socket, etc. * * @return the new incremented value */ public long incrementLinesInput() { synchronized ( statusCountersLock ) { return ++linesInput; } } /** * @param newLinesInputValue the new number of lines read from an input source: database, file, socket, etc. */ public void setLinesInput( long newLinesInputValue ) { synchronized ( statusCountersLock ) { linesInput = newLinesInputValue; } } /** * @return Returns the number of lines written to an output target: database, file, socket, etc. */ @Override public long getLinesOutput() { synchronized ( statusCountersLock ) { return linesOutput; } } /** * Increments the number of lines written to an output target: database, file, socket, etc. * * @return the new incremented value */ public long incrementLinesOutput() { synchronized ( statusCountersLock ) { return ++linesOutput; } } /** * @param newLinesOutputValue the new number of lines written to an output target: database, file, socket, etc. */ public void setLinesOutput( long newLinesOutputValue ) { synchronized ( statusCountersLock ) { linesOutput = newLinesOutputValue; } } /** * @return Returns the linesWritten. */ @Override public long getLinesWritten() { synchronized ( statusCountersLock ) { return linesWritten; } } /** * Increments the number of lines written to next steps by one * * @return Returns the new value */ public long incrementLinesWritten() { synchronized ( statusCountersLock ) { return ++linesWritten; } } /** * Decrements the number of lines written to next steps by one * * @return Returns the new value */ public long decrementLinesWritten() { synchronized ( statusCountersLock ) { return --linesWritten; } } /** * @param newLinesWrittenValue the new number of lines written to next steps */ public void setLinesWritten( long newLinesWrittenValue ) { synchronized ( statusCountersLock ) { linesWritten = newLinesWrittenValue; } } /** * @return Returns the number of lines updated in an output target: database, file, socket, etc. */ @Override public long getLinesUpdated() { synchronized ( statusCountersLock ) { return linesUpdated; } } /** * Increments the number of lines updated in an output target: database, file, socket, etc. * * @return the new incremented value */ public long incrementLinesUpdated() { synchronized ( statusCountersLock ) { return ++linesUpdated; } } /** * @param newLinesUpdatedValue the new number of lines updated in an output target: database, file, socket, etc. */ public void setLinesUpdated( long newLinesUpdatedValue ) { synchronized ( statusCountersLock ) { linesUpdated = newLinesUpdatedValue; } } /** * @return the number of lines rejected to an error handling step */ @Override public long getLinesRejected() { synchronized ( statusCountersLock ) { return linesRejected; } } /** * Increments the number of lines rejected to an error handling step * * @return the new incremented value */ public long incrementLinesRejected() { synchronized ( statusCountersLock ) { return ++linesRejected; } } /** * @param newLinesRejectedValue lines number of lines rejected to an error handling step */ @Override public void setLinesRejected( long newLinesRejectedValue ) { synchronized ( statusCountersLock ) { linesRejected = newLinesRejectedValue; } } /** * @return the number of lines skipped */ public long getLinesSkipped() { synchronized ( statusCountersLock ) { return linesSkipped; } } /** * Increments the number of lines skipped * * @return the new incremented value */ public long incrementLinesSkipped() { synchronized ( statusCountersLock ) { return ++linesSkipped; } } /** * @param newLinesSkippedValue lines number of lines skipped */ public void setLinesSkipped( long newLinesSkippedValue ) { synchronized ( statusCountersLock ) { linesSkipped = newLinesSkippedValue; } } /* * (non-Javadoc) * * @see org.pentaho.di.trans.step.StepInterface#getStepname() */ @Override public String getStepname() { return stepname; } /** * Sets the stepname. * * @param stepname the new stepname */ public void setStepname( String stepname ) { this.stepname = stepname; } /** * Gets the dispatcher. * * @return the dispatcher */ public Trans getDispatcher() { return trans; } /** * Gets the status description. * * @return the status description */ public String getStatusDescription() { return getStatus().getDescription(); } /** * @return Returns the stepMetaInterface. */ public StepMetaInterface getStepMetaInterface() { return stepMetaInterface; } /** * @param stepMetaInterface The stepMetaInterface to set. */ public void setStepMetaInterface( StepMetaInterface stepMetaInterface ) { this.stepMetaInterface = stepMetaInterface; } /** * @return Returns the stepDataInterface. */ public StepDataInterface getStepDataInterface() { return stepDataInterface; } /** * @param stepDataInterface The stepDataInterface to set. */ public void setStepDataInterface( StepDataInterface stepDataInterface ) { this.stepDataInterface = stepDataInterface; } /** * @return Returns the stepMeta. */ @Override public StepMeta getStepMeta() { return stepMeta; } /** * @param stepMeta The stepMeta to set. */ public void setStepMeta( StepMeta stepMeta ) { this.stepMeta = stepMeta; } /** * @return Returns the transMeta. */ public TransMeta getTransMeta() { return transMeta; } /** * @param transMeta The transMeta to set. */ public void setTransMeta( TransMeta transMeta ) { this.transMeta = transMeta; } /** * @return Returns the trans. */ @Override public Trans getTrans() { return trans; } /** * putRow is used to copy a row, to the alternate rowset(s) This should get priority over everything else! * (synchronized) If distribute is true, a row is copied only once to the output rowsets, otherwise copies are sent to * each rowset! * * @param row The row to put to the destination rowset(s). * @throws KettleStepException */ @Override public void putRow( RowMetaInterface rowMeta, Object[] row ) throws KettleStepException { if ( rowMeta != null ) { if ( !allowEmptyFieldNamesAndTypes ) { // check row meta for empty field name (BACKLOG-18004) for ( ValueMetaInterface vmi : rowMeta.getValueMetaList() ) { if ( StringUtils.isBlank( vmi.getName() ) ) { throw new KettleStepException( "Please set a field name for all field(s) that have 'null'." ); } if ( vmi.getType() <= 0 ) { throw new KettleStepException( "Please set a value for the missing field(s) type." ); } } } } getRowHandler().putRow( rowMeta, row ); } private void handlePutRow( RowMetaInterface rowMeta, Object[] row ) throws KettleStepException { // Are we pausing the step? If so, stall forever... // while ( paused.get() && !stopped.get() ) { try { Thread.sleep( 1 ); } catch ( InterruptedException e ) { throw new KettleStepException( e ); } } // Right after the pause loop we have to check if this thread is stopped or // not. // if ( stopped.get() && !safeStopped.get() ) { if ( log.isDebug() ) { logDebug( BaseMessages.getString( PKG, "BaseStep.Log.StopPuttingARow" ) ); } stopAll(); return; } // Have all threads started? // Are we running yet? If not, wait a bit until all threads have been // started. // if ( this.checkTransRunning == false ) { while ( !trans.isRunning() && !stopped.get() ) { try { Thread.sleep( 1 ); } catch ( InterruptedException e ) { // Ignore } } this.checkTransRunning = true; } // call all row listeners... // for ( RowListener listener : rowListeners ) { listener.rowWrittenEvent( rowMeta, row ); } // Keep adding to terminator_rows buffer... // if ( terminator && terminator_rows != null ) { try { terminator_rows.add( rowMeta.cloneRow( row ) ); } catch ( KettleValueException e ) { throw new KettleStepException( "Unable to clone row while adding rows to the terminator rows.", e ); } } outputRowSetsLock.readLock().lock(); try { if ( outputRowSets.isEmpty() ) { // No more output rowsets! // Still update the nr of lines written. // incrementLinesWritten(); return; // we're done here! } // Repartitioning happens when the current step is not partitioned, but the next one is. // That means we need to look up the partitioning information in the next step.. // If there are multiple steps, we need to look at the first (they should be all the same) // switch ( repartitioning ) { case StepPartitioningMeta.PARTITIONING_METHOD_NONE: noPartitioning( rowMeta, row ); break; case StepPartitioningMeta.PARTITIONING_METHOD_SPECIAL: specialPartitioning( rowMeta, row ); break; case StepPartitioningMeta.PARTITIONING_METHOD_MIRROR: mirrorPartitioning( rowMeta, row ); break; default: throw new KettleStepException( "Internal error: invalid repartitioning type: " + repartitioning ); } } finally { outputRowSetsLock.readLock().unlock(); } } /** * Copy always to all target steps/copies */ private void mirrorPartitioning( RowMetaInterface rowMeta, Object[] row ) { for ( RowSet rowSet : outputRowSets ) { putRowToRowSet( rowSet, rowMeta, row ); } } private void specialPartitioning( RowMetaInterface rowMeta, Object[] row ) throws KettleStepException { if ( nextStepPartitioningMeta == null ) { // Look up the partitioning of the next step. // This is the case for non-clustered partitioning... // List nextSteps = transMeta.findNextSteps( stepMeta ); if ( nextSteps.size() > 0 ) { nextStepPartitioningMeta = nextSteps.get( 0 ).getStepPartitioningMeta(); } // TODO: throw exception if we're not partitioning yet. // For now it throws a NP Exception. } int partitionNr; try { partitionNr = nextStepPartitioningMeta.getPartition( rowMeta, row ); } catch ( KettleException e ) { throw new KettleStepException( "Unable to convert a value to integer while calculating the partition number", e ); } RowSet selectedRowSet = null; if ( clusteredPartitioningFirst ) { clusteredPartitioningFirst = false; // We are only running remotely if both the distribution is there AND if the distribution is actually contains // something. // clusteredPartitioning = transMeta.getSlaveStepCopyPartitionDistribution() != null && !transMeta.getSlaveStepCopyPartitionDistribution().getDistribution().isEmpty(); } // OK, we have a SlaveStepCopyPartitionDistribution in the transformation... // We want to pre-calculate what rowset we're sending data to for which partition... // It is only valid in clustering / partitioning situations. // When doing a local partitioning, it is much simpler. // if ( clusteredPartitioning ) { // This next block is only performed once for speed... // if ( partitionNrRowSetList == null ) { partitionNrRowSetList = new RowSet[ outputRowSets.size() ]; // The distribution is calculated during transformation split // The slave-step-copy distribution is passed onto the slave transformation // SlaveStepCopyPartitionDistribution distribution = transMeta.getSlaveStepCopyPartitionDistribution(); String nextPartitionSchemaName = TransSplitter.createPartitionSchemaNameFromTarget( nextStepPartitioningMeta .getPartitionSchema().getName() ); for ( RowSet outputRowSet : outputRowSets ) { try { // Look at the pre-determined distribution, decided at "transformation split" time. // int partNr = distribution.getPartition( outputRowSet.getRemoteSlaveServerName(), nextPartitionSchemaName, outputRowSet .getDestinationStepCopy() ); if ( partNr < 0 ) { throw new KettleStepException( "Unable to find partition using rowset data, slave=" + outputRowSet.getRemoteSlaveServerName() + ", partition schema=" + nextStepPartitioningMeta.getPartitionSchema().getName() + ", copy=" + outputRowSet.getDestinationStepCopy() ); } partitionNrRowSetList[ partNr ] = outputRowSet; } catch ( NullPointerException e ) { throw ( e ); } } } // OK, now get the target partition based on the partition nr... // This should be very fast // if ( partitionNr < partitionNrRowSetList.length ) { selectedRowSet = partitionNrRowSetList[ partitionNr ]; } else { String rowsets = ""; for ( RowSet rowSet : partitionNrRowSetList ) { rowsets += "[" + rowSet.toString() + "] "; } throw new KettleStepException( "Internal error: the referenced partition nr '" + partitionNr + "' is higher than the maximum of '" + ( partitionNrRowSetList.length - 1 ) + ". The available row sets are: {" + rowsets + "}" ); } if ( selectedRowSet == null ) { logBasic( BaseMessages.getString( PKG, "BaseStep.TargetRowsetIsNotAvailable", partitionNr ) ); } else { // Wait putRowToRowSet( selectedRowSet, rowMeta, row ); incrementLinesWritten(); if ( log.isRowLevel() ) { try { logRowlevel( "Partitioned #" + partitionNr + " to " + selectedRowSet + ", row=" + rowMeta.getString( row ) ); } catch ( KettleValueException e ) { throw new KettleStepException( e ); } } } } else { // Local partitioning... // Put the row forward to the next step according to the partition rule. // // Count of partitioned row at one step int partCount = ( (BasePartitioner) nextStepPartitioningMeta.getPartitioner() ).getNrPartitions(); for ( int i = 0; i < nextSteps.length; i++ ) { selectedRowSet = outputRowSets.get( partitionNr + i * partCount ); if ( selectedRowSet == null ) { logBasic( BaseMessages.getString( PKG, "BaseStep.TargetRowsetIsNotAvailable", partitionNr ) ); } else { // Wait putRowToRowSet( selectedRowSet, rowMeta, row ); incrementLinesWritten(); if ( log.isRowLevel() ) { try { logRowlevel( BaseMessages.getString( PKG, "BaseStep.PartitionedToRow", partitionNr, selectedRowSet, rowMeta.getString( row ) ) ); } catch ( KettleValueException e ) { throw new KettleStepException( e ); } } } } } } private void noPartitioning( RowMetaInterface rowMeta, Object[] row ) throws KettleStepException { if ( distributed ) { if ( rowDistribution != null ) { // Plugin defined row distribution! // rowDistribution.distributeRow( rowMeta, row, this ); incrementLinesWritten(); } else { // ROUND ROBIN DISTRIBUTION: // -------------------------- // Copy the row to the "next" output rowset. // We keep the next one in out_handling // RowSet rs = outputRowSets.get( currentOutputRowSetNr ); // To reduce stress on the locking system we are NOT going to allow // the buffer to grow to its full capacity. // if ( isUsingThreadPriorityManagment() && !rs.isDone() && rs.size() >= upperBufferBoundary && !isStopped() ) { try { Thread.sleep( 0, 1 ); } catch ( InterruptedException e ) { // Ignore sleep interruption exception } } // Loop until we find room in the target rowset // putRowToRowSet( rs, rowMeta, row ); incrementLinesWritten(); // Now determine the next output rowset! // Only if we have more then one output... // if ( outputRowSets.size() > 1 ) { currentOutputRowSetNr++; if ( currentOutputRowSetNr >= outputRowSets.size() ) { currentOutputRowSetNr = 0; } } } } else { // Copy the row to all output rowsets // // Copy to the row in the other output rowsets... for ( int i = 1; i < outputRowSets.size(); i++ ) { // start at 1 RowSet rs = outputRowSets.get( i ); // To reduce stress on the locking system we are NOT going to allow // the buffer to grow to its full capacity. // if ( isUsingThreadPriorityManagment() && !rs.isDone() && rs.size() >= upperBufferBoundary && !isStopped() ) { try { Thread.sleep( 0, 1 ); } catch ( InterruptedException e ) { // Ignore sleep interruption exception } } try { // Loop until we find room in the target rowset // putRowToRowSet( rs, rowMeta, rowMeta.cloneRow( row ) ); incrementLinesWritten(); } catch ( KettleValueException e ) { throw new KettleStepException( "Unable to clone row while copying rows to multiple target steps", e ); } } // set row in first output rowset // RowSet rs = outputRowSets.get( 0 ); putRowToRowSet( rs, rowMeta, row ); incrementLinesWritten(); } } private void putRowToRowSet( RowSet rs, RowMetaInterface rowMeta, Object[] row ) { RowMetaInterface toBeSent; RowMetaInterface metaFromRs = rs.getRowMeta(); if ( metaFromRs == null ) { // RowSet is not initialised so far toBeSent = rowMeta.clone(); } else { // use the existing toBeSent = metaFromRs; } while ( !rs.putRow( toBeSent, row ) ) { if ( isStopped() && !safeStopped.get() ) { return; } } } /** * putRowTo is used to put a row in a certain specific RowSet. * * @param rowMeta The row meta-data to put to the destination RowSet. * @param row the data to put in the RowSet * @param rowSet the RoWset to put the row into. * @throws KettleStepException In case something unexpected goes wrong */ public void putRowTo( RowMetaInterface rowMeta, Object[] row, RowSet rowSet ) throws KettleStepException { getRowHandler().putRowTo( rowMeta, row, rowSet ); } public void handlePutRowTo( RowMetaInterface rowMeta, Object[] row, RowSet rowSet ) throws KettleStepException { // Are we pausing the step? If so, stall forever... // while ( paused.get() && !stopped.get() ) { try { Thread.sleep( 1 ); } catch ( InterruptedException e ) { throw new KettleStepException( e ); } } // call all row listeners... // for ( RowListener listener : rowListeners ) { listener.rowWrittenEvent( rowMeta, row ); } // Keep adding to terminator_rows buffer... if ( terminator && terminator_rows != null ) { try { terminator_rows.add( rowMeta.cloneRow( row ) ); } catch ( KettleValueException e ) { throw new KettleStepException( "Unable to clone row while adding rows to the terminator buffer", e ); } } if ( stopped.get() ) { if ( log.isDebug() ) { logDebug( BaseMessages.getString( PKG, "BaseStep.Log.StopPuttingARow" ) ); } stopAll(); return; } // Don't distribute or anything, only go to this rowset! // while ( !rowSet.putRow( rowMeta, row ) ) { if ( isStopped() ) { break; } } incrementLinesWritten(); } /** * Put error. * * @param rowMeta the row meta * @param row the row * @param nrErrors the nr errors * @param errorDescriptions the error descriptions * @param fieldNames the field names * @param errorCodes the error codes * @throws KettleStepException the kettle step exception */ public void putError( RowMetaInterface rowMeta, Object[] row, long nrErrors, String errorDescriptions, String fieldNames, String errorCodes ) throws KettleStepException { getRowHandler().putError( rowMeta, row, nrErrors, errorDescriptions, fieldNames, errorCodes ); } private void handlePutError( RowMetaInterface rowMeta, Object[] row, long nrErrors, String errorDescriptions, String fieldNames, String errorCodes ) throws KettleStepException { if ( trans.isSafeModeEnabled() ) { if ( rowMeta.size() > row.length ) { throw new KettleStepException( BaseMessages.getString( PKG, "BaseStep.Exception.MetadataDoesntMatchDataRowSize", Integer.toString( rowMeta.size() ), Integer .toString( row != null ? row.length : 0 ) ) ); } } StepErrorMeta stepErrorMeta = stepMeta.getStepErrorMeta(); if ( errorRowMeta == null ) { errorRowMeta = rowMeta.clone(); RowMetaInterface add = stepErrorMeta.getErrorRowMeta( nrErrors, errorDescriptions, fieldNames, errorCodes ); errorRowMeta.addRowMeta( add ); } Object[] errorRowData = RowDataUtil.allocateRowData( errorRowMeta.size() ); if ( row != null ) { System.arraycopy( row, 0, errorRowData, 0, rowMeta.size() ); } // Also add the error fields... stepErrorMeta.addErrorRowData( errorRowData, rowMeta.size(), nrErrors, errorDescriptions, fieldNames, errorCodes ); // call all row listeners... for ( RowListener listener : rowListeners ) { listener.errorRowWrittenEvent( rowMeta, row ); } if ( errorRowSet != null ) { while ( !errorRowSet.putRow( errorRowMeta, errorRowData ) ) { if ( isStopped() ) { break; } } incrementLinesRejected(); } verifyRejectionRates(); } /** * Verify rejection rates. */ private void verifyRejectionRates() { StepErrorMeta stepErrorMeta = stepMeta.getStepErrorMeta(); if ( stepErrorMeta == null ) { return; // nothing to verify. } // Was this one error too much? if ( maxErrors > 0 && getLinesRejected() > maxErrors ) { logError( BaseMessages.getString( PKG, "BaseStep.Log.TooManyRejectedRows", Long.toString( maxErrors ), Long .toString( getLinesRejected() ) ) ); setErrors( 1L ); stopAll(); } if ( maxPercentErrors > 0 && getLinesRejected() > 0 && ( minRowsForMaxErrorPercent <= 0 || getLinesRead() >= minRowsForMaxErrorPercent ) ) { int pct = (int) Math.ceil( 100 * (double) getLinesRejected() / getLinesRead() ); // additional conversion for PDI-10210 if ( pct > maxPercentErrors ) { logError( BaseMessages.getString( PKG, "BaseStep.Log.MaxPercentageRejectedReached", Integer.toString( pct ), Long .toString( getLinesRejected() ), Long.toString( getLinesRead() ) ) ); setErrors( 1L ); stopAll(); } } } /** * Current input stream. * * @return the row set */ @VisibleForTesting RowSet currentInputStream() { inputRowSetsLock.readLock().lock(); try { return inputRowSets.get( currentInputRowSetNr ); } finally { inputRowSetsLock.readLock().unlock(); } } /** * Find the next not-finished input-stream... in_handling says which one... */ private void nextInputStream() { blockPointer = 0; int streams = inputRowSets.size(); // No more streams left: exit! if ( streams == 0 ) { return; } // Just the one rowSet (common case) if ( streams == 1 ) { currentInputRowSetNr = 0; } // If we have some left: take the next! currentInputRowSetNr++; if ( currentInputRowSetNr >= streams ) { currentInputRowSetNr = 0; } } /** * Wait until the transformation is completely running and all threads have been started. */ protected void waitUntilTransformationIsStarted() { // Have all threads started? // Are we running yet? If not, wait a bit until all threads have been // started. // if ( this.checkTransRunning == false ) { while ( !trans.isRunning() && !stopped.get() ) { try { Thread.sleep( 1 ); } catch ( InterruptedException e ) { // Ignore sleep interruption exception } } this.checkTransRunning = true; } } /** * In case of getRow, we receive data from previous steps through the input rowset. In case we split the stream, we * have to copy the data to the alternate splits: rowsets 1 through n. */ @Override public Object[] getRow() throws KettleException { return getRowHandler().getRow(); } private Object[] handleGetRow() throws KettleException { // Are we pausing the step? If so, stall forever... // while ( paused.get() && !stopped.get() ) { try { Thread.sleep( 100 ); } catch ( InterruptedException e ) { throw new KettleStepException( e ); } } if ( stopped.get() ) { if ( log.isDebug() ) { logDebug( BaseMessages.getString( PKG, "BaseStep.Log.StopLookingForMoreRows" ) ); } stopAll(); return null; } // Small startup check // waitUntilTransformationIsStarted(); // See if we need to open sockets to remote input steps... // openRemoteInputStepSocketsOnce(); RowSet inputRowSet = null; Object[] row = null; inputRowSetsLock.readLock().lock(); try { // If everything is finished, we can stop immediately! // if ( inputRowSets.isEmpty() ) { return null; } // Do we need to switch to the next input stream? if ( blockPointer >= NR_OF_ROWS_IN_BLOCK ) { // Take a peek at the next input stream. // If there is no data, process another NR_OF_ROWS_IN_BLOCK on the next // input stream. // for ( int r = 0; r < inputRowSets.size() && row == null; r++ ) { nextInputStream(); inputRowSet = currentInputStream(); row = inputRowSet.getRowImmediate(); } if ( row != null ) { incrementLinesRead(); } } else { // What's the current input stream? inputRowSet = currentInputStream(); } // To reduce stress on the locking system we are going to allow // The buffer to grow beyond "a few" entries. // We'll only do that if the previous step has not ended... // if ( isUsingThreadPriorityManagment() && !inputRowSet.isDone() && inputRowSet.size() <= lowerBufferBoundary && !isStopped() ) { try { Thread.sleep( 0, 1 ); } catch ( InterruptedException e ) { // Ignore sleep interruption exception } } // See if this step is receiving partitioned data... // In that case it might be the case that one input row set is receiving // all data and // the other rowsets nothing. (repartitioning on the same key would do // that) // // We never guaranteed that the input rows would be read one by one // alternatively. // So in THIS particular case it is safe to just read 100 rows from one // rowset, then switch to another etc. // We can use timeouts to switch from one to another... // while ( row == null && !isStopped() ) { // Get a row from the input in row set ... // Timeout immediately if nothing is there to read. // We will then switch to the next row set to read from... // row = inputRowSet.getRowWait( 1, TimeUnit.MILLISECONDS ); if ( row != null ) { incrementLinesRead(); blockPointer++; } else { // Try once more... // If row is still empty and the row set is done, we remove the row // set from // the input stream and move on to the next one... // if ( inputRowSet.isDone() ) { row = inputRowSet.getRowWait( 1, TimeUnit.MILLISECONDS ); if ( row == null ) { // Must release the read lock before acquisition of the write lock to prevent deadlocks. inputRowSetsLock.readLock().unlock(); // Another thread might acquire the write lock before we do, // and invalidate the data we have just read. // // This is actually fine, until we only want to remove the current rowSet - ArrayList ignores non-existing // elements when removing. inputRowSetsLock.writeLock().lock(); try { inputRowSets.remove( inputRowSet ); if ( inputRowSets.isEmpty() ) { return null; // We're completely done. } } finally { inputRowSetsLock.readLock().lock(); // downgrade to read lock inputRowSetsLock.writeLock().unlock(); } } else { incrementLinesRead(); } } nextInputStream(); inputRowSet = currentInputStream(); } } // This rowSet is perhaps no longer giving back rows? // while ( row == null && !stopped.get() ) { // Try the next input row set(s) until we find a row set that still has // rows... // The getRowFrom() method removes row sets from the input row sets // list. // if ( inputRowSets.isEmpty() ) { return null; // We're done. } nextInputStream(); inputRowSet = currentInputStream(); row = getRowFrom( inputRowSet ); } } finally { inputRowSetsLock.readLock().unlock(); } // Also set the meta data on the first occurrence. // or if prevSteps.length > 1 inputRowMeta can be changed if ( inputRowMeta == null || prevSteps.length > 1 ) { inputRowMeta = inputRowSet.getRowMeta(); } if ( row != null ) { // OK, before we return the row, let's see if we need to check on mixing // row compositions... // if ( trans.isSafeModeEnabled() ) { transMeta.checkRowMixingStatically( stepMeta, null ); } for ( RowListener listener : rowListeners ) { listener.rowReadEvent( inputRowMeta, row ); } } // Check the rejection rates etc. as well. verifyRejectionRates(); return row; } /** * RowHandler controls how getRow/putRow are handled. * The default RowHandler will simply call * {@link #handleGetRow()} and {@link #handlePutRow(RowMetaInterface, Object[])} */ public void setRowHandler( RowHandler rowHandler ) { Preconditions.checkNotNull( rowHandler ); this.rowHandler = rowHandler; } public RowHandler getRowHandler() { if ( rowHandler == null ) { rowHandler = new DefaultRowHandler(); } return this.rowHandler; } /** * Opens socket connections to the remote input steps of this step.
* This method should be used by steps that don't call getRow() first in which it is executed automatically.
* This method should be called before any data is read from previous steps.
* This action is executed only once. * * @throws KettleStepException */ protected void openRemoteInputStepSocketsOnce() throws KettleStepException { if ( !remoteInputSteps.isEmpty() ) { if ( !remoteInputStepsInitialized ) { // Loop over the remote steps and open client sockets to them // Just be careful in case we're dealing with a partitioned clustered step. // A partitioned clustered step has only one. (see dispatch()) // inputRowSetsLock.writeLock().lock(); try { for ( RemoteStep remoteStep : remoteInputSteps ) { try { BlockingRowSet rowSet = remoteStep.openReaderSocket( this ); inputRowSets.add( rowSet ); } catch ( Exception e ) { throw new KettleStepException( "Error opening reader socket to remote step '" + remoteStep + "'", e ); } } } finally { inputRowSetsLock.writeLock().unlock(); } remoteInputStepsInitialized = true; } } } /** * Opens socket connections to the remote output steps of this step.
* This method is called in method initBeforeStart() because it needs to connect to the server sockets (remote steps) * as soon as possible to avoid time-out situations.
* This action is executed only once. * * @throws KettleStepException */ protected void openRemoteOutputStepSocketsOnce() throws KettleStepException { if ( !remoteOutputSteps.isEmpty() ) { if ( !remoteOutputStepsInitialized ) { outputRowSetsLock.writeLock().lock(); try { // Set the current slave target name on all the current output steps (local) // for ( RowSet rowSet : outputRowSets ) { rowSet.setRemoteSlaveServerName( getVariable( Const.INTERNAL_VARIABLE_SLAVE_SERVER_NAME ) ); if ( getVariable( Const.INTERNAL_VARIABLE_SLAVE_SERVER_NAME ) == null ) { throw new KettleStepException( "Variable '" + Const.INTERNAL_VARIABLE_SLAVE_SERVER_NAME + "' is not defined." ); } } // Start threads: one per remote step to funnel the data through... // for ( RemoteStep remoteStep : remoteOutputSteps ) { try { if ( remoteStep.getTargetSlaveServerName() == null ) { throw new KettleStepException( "The target slave server name is not defined for remote output step: " + remoteStep ); } BlockingRowSet rowSet = remoteStep.openWriterSocket(); if ( log.isDetailed() ) { logDetailed( BaseMessages.getString( PKG, "BaseStep.Log.OpenedWriterSocketToRemoteStep", remoteStep ) ); } outputRowSets.add( rowSet ); } catch ( IOException e ) { throw new KettleStepException( "Error opening writer socket to remote step '" + remoteStep + "'", e ); } } } finally { outputRowSetsLock.writeLock().unlock(); } remoteOutputStepsInitialized = true; } } } /** * Safe mode checking. * * @param row the row * @throws KettleRowException the kettle row exception */ protected void safeModeChecking( RowMetaInterface row ) throws KettleRowException { if ( row == null ) { return; } if ( inputReferenceRow == null ) { inputReferenceRow = row.clone(); // copy it! // Check for double field names. // String[] fieldnames = row.getFieldNames(); Arrays.sort( fieldnames ); for ( int i = 0; i < fieldnames.length - 1; i++ ) { if ( fieldnames[ i ].equals( fieldnames[ i + 1 ] ) ) { throw new KettleRowException( BaseMessages.getString( PKG, "BaseStep.SafeMode.Exception.DoubleFieldnames", fieldnames[ i ] ) ); } } } else { safeModeChecking( inputReferenceRow, row ); } } /* * (non-Javadoc) * * @see org.pentaho.di.trans.step.StepInterface#identifyErrorOutput() */ @Override public void identifyErrorOutput() { if ( stepMeta.isDoingErrorHandling() ) { StepErrorMeta stepErrorMeta = stepMeta.getStepErrorMeta(); outputRowSetsLock.writeLock().lock(); try { for ( int rowsetNr = 0; rowsetNr < outputRowSets.size(); rowsetNr++ ) { RowSet outputRowSet = outputRowSets.get( rowsetNr ); if ( outputRowSet.getDestinationStepName().equalsIgnoreCase( stepErrorMeta.getTargetStep().getName() ) ) { // This is the rowset to move! // errorRowSet = outputRowSet; outputRowSets.remove( rowsetNr ); return; } } } finally { outputRowSetsLock.writeLock().unlock(); } } } /** * Safe mode checking. * * @param referenceRowMeta the reference row meta * @param rowMeta the row meta * @throws KettleRowException the kettle row exception */ public static void safeModeChecking( RowMetaInterface referenceRowMeta, RowMetaInterface rowMeta ) throws KettleRowException { // See if the row we got has the same layout as the reference row. // First check the number of fields // if ( referenceRowMeta.size() != rowMeta.size() ) { throw new KettleRowException( BaseMessages.getString( PKG, "BaseStep.SafeMode.Exception.VaryingSize", "" + referenceRowMeta.size(), "" + rowMeta.size(), rowMeta.toString() ) ); } else { // Check field by field for the position of the names... for ( int i = 0; i < referenceRowMeta.size(); i++ ) { ValueMetaInterface referenceValue = referenceRowMeta.getValueMeta( i ); ValueMetaInterface compareValue = rowMeta.getValueMeta( i ); if ( !referenceValue.getName().equalsIgnoreCase( compareValue.getName() ) ) { throw new KettleRowException( BaseMessages.getString( PKG, "BaseStep.SafeMode.Exception.MixingLayout", "" + ( i + 1 ), referenceValue.getName() + " " + referenceValue.toStringMeta(), compareValue.getName() + " " + compareValue.toStringMeta() ) ); } if ( referenceValue.getType() != compareValue.getType() ) { throw new KettleRowException( BaseMessages.getString( PKG, "BaseStep.SafeMode.Exception.MixingTypes", "" + ( i + 1 ), referenceValue.getName() + " " + referenceValue.toStringMeta(), compareValue.getName() + " " + compareValue.toStringMeta() ) ); } if ( referenceValue.getStorageType() != compareValue.getStorageType() ) { throw new KettleRowException( BaseMessages.getString( PKG, "BaseStep.SafeMode.Exception.MixingStorageTypes", "" + ( i + 1 ), referenceValue.getName() + " " + referenceValue.toStringMeta(), compareValue.getName() + " " + compareValue.toStringMeta() ) ); } } } } /** * Gets the row from. * * @param rowSet the row set * @return the row from * @throws KettleStepException the kettle step exception */ public Object[] getRowFrom( RowSet rowSet ) throws KettleStepException { return getRowHandler().getRowFrom( rowSet ); } public Object[] handleGetRowFrom( RowSet rowSet ) throws KettleStepException { // Are we pausing the step? If so, stall forever... // while ( paused.get() && !stopped.get() ) { try { Thread.sleep( 10 ); } catch ( InterruptedException e ) { throw new KettleStepException( e ); } } // Have all threads started? // Are we running yet? If not, wait a bit until all threads have been // started. if ( this.checkTransRunning == false ) { while ( !trans.isRunning() && !stopped.get() ) { try { Thread.sleep( 1 ); } catch ( InterruptedException e ) { // Ignore sleep interruption exception } } this.checkTransRunning = true; } Object[] rowData = null; // To reduce stress on the locking system we are going to allow // The buffer to grow beyond "a few" entries. // We'll only do that if the previous step has not ended... // if ( isUsingThreadPriorityManagment() && !rowSet.isDone() && rowSet.size() <= lowerBufferBoundary && !isStopped() ) { try { Thread.sleep( 0, 1 ); } catch ( InterruptedException e ) { // Ignore sleep interruption exception } } // Grab a row... If nothing received after a timeout, try again. // rowData = rowSet.getRow(); while ( rowData == null && !rowSet.isDone() && !stopped.get() ) { rowData = rowSet.getRow(); // Verify deadlocks! // /* * if (rowData==null) { if (getInputRowSets().size()>1 && getLinesRead()==deadLockCounter) { * verifyInputDeadLock(); } deadLockCounter=getLinesRead(); } */ } // Still nothing: no more rows to be had? // if ( rowData == null && rowSet.isDone() ) { // Try one more time to get a row to make sure we don't get a // race-condition between the get and the isDone() // rowData = rowSet.getRow(); } if ( stopped.get() ) { if ( log.isDebug() ) { logDebug( BaseMessages.getString( PKG, "BaseStep.Log.StopLookingForMoreRows" ) ); } stopAll(); return null; } if ( rowData == null && rowSet.isDone() ) { // Try one more time... // rowData = rowSet.getRow(); if ( rowData == null ) { // Must release the read lock before acquisition of the write lock to prevent deadlocks. // // But #handleGetRowFrom() can be called either from outside or from handleGetRow(). // So a current thread might hold the read lock (possibly reentrantly) and might not. // We therefore must release it conditionally. int holdCount = inputRowSetsLock.getReadHoldCount(); for ( int i = 0; i < holdCount; i++ ) { inputRowSetsLock.readLock().unlock(); } // Just like in handleGetRow() method, an another thread might acquire the write lock before we do. // Here this is also fine, until we only want to remove the given rowSet - ArrayList ignores non-existing // elements when removing. inputRowSetsLock.writeLock().lock(); try { inputRowSets.remove( rowSet ); // Downgrade to read lock by restoring to the previous state before releasing the write lock for ( int i = 0; i < holdCount; i++ ) { inputRowSetsLock.readLock().lock(); } return null; } finally { inputRowSetsLock.writeLock().unlock(); } } } incrementLinesRead(); // call all rowlisteners... // for ( RowListener listener : rowListeners ) { listener.rowReadEvent( rowSet.getRowMeta(), rowData ); } return rowData; } /** * - A step sees that it can't get a new row from input in the step. - Then it verifies that there is more than one * input row set and that at least one is full and at least one is empty. - Then it finds a step in the transformation * (situated before the reader step) which has at least one full and one empty output row set. - If this situation * presents itself and if it happens twice with the same rows read count (meaning: stalled reading step) we throw an * exception. For the attached example that exception is: * * @throws KettleStepException */ protected void verifyInputDeadLock() throws KettleStepException { RowSet inputFull = null; RowSet inputEmpty = null; for ( RowSet rowSet : getInputRowSets() ) { if ( rowSet.size() == transMeta.getSizeRowset() ) { inputFull = rowSet; } else if ( rowSet.size() == 0 ) { inputEmpty = rowSet; } } if ( inputFull != null && inputEmpty != null ) { // Find a step where // - the input rowset are full // - one output rowset is full // - one output is empty for ( StepMetaDataCombi combi : trans.getSteps() ) { int inputSize = 0; List combiInputRowSets = combi.step.getInputRowSets(); int totalSize = combiInputRowSets.size() * transMeta.getSizeRowset(); for ( RowSet rowSet : combiInputRowSets ) { inputSize += rowSet.size(); } // All full probably means a stalled step. List combiOutputRowSets = combi.step.getOutputRowSets(); if ( inputSize > 0 && inputSize == totalSize && combiOutputRowSets.size() > 1 ) { RowSet outputFull = null; RowSet outputEmpty = null; for ( RowSet rowSet : combiOutputRowSets ) { if ( rowSet.size() == transMeta.getSizeRowset() ) { outputFull = rowSet; } else if ( rowSet.size() == 0 ) { outputEmpty = rowSet; } } if ( outputFull != null && outputEmpty != null ) { // Verify that this step is lated before the current one // if ( transMeta.findPrevious( stepMeta, combi.stepMeta ) ) { throw new KettleStepException( "A deadlock was detected between steps '" + combi.stepname + "' and '" + stepname + "'. The steps are both waiting for each other because a series of row set buffers filled up." ); } } } } } } /** * Find input row set. * * @param sourceStep the source step * @return the row set * @throws KettleStepException the kettle step exception */ public RowSet findInputRowSet( String sourceStep ) throws KettleStepException { // Check to see that "sourceStep" only runs in a single copy // Otherwise you'll see problems during execution. // StepMeta sourceStepMeta = transMeta.findStep( sourceStep ); if ( sourceStepMeta == null ) { throw new KettleStepException( BaseMessages.getString( PKG, "BaseStep.Exception.SourceStepToReadFromDoesntExist", sourceStep ) ); } if ( sourceStepMeta.getCopies() > 1 ) { throw new KettleStepException( BaseMessages.getString( PKG, "BaseStep.Exception.SourceStepToReadFromCantRunInMultipleCopies", sourceStep, Integer .toString( sourceStepMeta.getCopies() ) ) ); } return findInputRowSet( sourceStep, 0, getStepname(), getCopy() ); } /** * Find input row set. * * @param from the from * @param fromcopy the fromcopy * @param to the to * @param tocopy the tocopy * @return the row set */ public RowSet findInputRowSet( String from, int fromcopy, String to, int tocopy ) { inputRowSetsLock.readLock().lock(); try { for ( RowSet rs : inputRowSets ) { if ( rs.getOriginStepName().equalsIgnoreCase( from ) && rs.getDestinationStepName().equalsIgnoreCase( to ) && rs.getOriginStepCopy() == fromcopy && rs.getDestinationStepCopy() == tocopy ) { return rs; } } } finally { inputRowSetsLock.readLock().unlock(); } // See if the rowset is part of the output of a mapping source step... // // Lookup step "From" // StepMeta mappingStep = transMeta.findStep( from ); // See if it's a mapping // if ( mappingStep != null && mappingStep.isMapping() ) { // In this case we can cast the step thread to a Mapping... // List baseSteps = trans.findBaseSteps( from ); if ( baseSteps.size() == 1 ) { Mapping mapping = (Mapping) baseSteps.get( 0 ); // Find the appropriate rowset in the mapping... // The rowset in question has been passed over to a Mapping Input step inside the Mapping transformation. // MappingOutput[] outputs = mapping.getMappingTrans().findMappingOutput(); for ( MappingOutput output : outputs ) { for ( RowSet rs : output.getOutputRowSets() ) { // The destination is what counts here... // if ( rs.getDestinationStepName().equalsIgnoreCase( to ) ) { return rs; } } } } } return null; } /** * Find output row set. * * @param targetStep the target step * @return the row set * @throws KettleStepException the kettle step exception */ public RowSet findOutputRowSet( String targetStep ) throws KettleStepException { // Check to see that "targetStep" only runs in a single copy // Otherwise you'll see problems during execution. // StepMeta targetStepMeta = transMeta.findStep( targetStep ); if ( targetStepMeta == null ) { throw new KettleStepException( BaseMessages.getString( PKG, "BaseStep.Exception.TargetStepToWriteToDoesntExist", targetStep ) ); } if ( targetStepMeta.getCopies() > 1 ) { throw new KettleStepException( BaseMessages.getString( PKG, "BaseStep.Exception.TargetStepToWriteToCantRunInMultipleCopies", targetStep, Integer .toString( targetStepMeta.getCopies() ) ) ); } return findOutputRowSet( getStepname(), getCopy(), targetStep, 0 ); } /** * Find an output rowset in a running transformation. It will also look at the "to" step to see if this is a mapping. * If it is, it will find the appropriate rowset in that transformation. * * @param from * @param fromcopy * @param to * @param tocopy * @return The rowset or null if none is found. */ public RowSet findOutputRowSet( String from, int fromcopy, String to, int tocopy ) { outputRowSetsLock.readLock().lock(); try { for ( RowSet rs : outputRowSets ) { if ( rs.getOriginStepName().equalsIgnoreCase( from ) && rs.getDestinationStepName().equalsIgnoreCase( to ) && rs.getOriginStepCopy() == fromcopy && rs.getDestinationStepCopy() == tocopy ) { return rs; } } } finally { outputRowSetsLock.readLock().unlock(); } // See if the rowset is part of the input of a mapping target step... // // Lookup step "To" // StepMeta mappingStep = transMeta.findStep( to ); // See if it's a mapping // if ( mappingStep != null && mappingStep.isMapping() ) { // In this case we can cast the step thread to a Mapping... // List baseSteps = trans.findBaseSteps( to ); if ( baseSteps.size() == 1 ) { Mapping mapping = (Mapping) baseSteps.get( 0 ); // Find the appropriate rowset in the mapping... // The rowset in question has been passed over to a Mapping Input step inside the Mapping transformation. // MappingInput[] inputs = mapping.getMappingTrans().findMappingInput(); for ( MappingInput input : inputs ) { for ( RowSet rs : input.getInputRowSets() ) { // The source step is what counts in this case... // if ( rs.getOriginStepName().equalsIgnoreCase( from ) ) { return rs; } } } } } // Still nothing found! // return null; } // // We have to tell the next step we're finished with // writing to output rowset(s)! // /* * (non-Javadoc) * * @see org.pentaho.di.trans.step.StepInterface#setOutputDone() */ @Override public void setOutputDone() { outputRowSetsLock.readLock().lock(); try { if ( log.isDebug() ) { logDebug( BaseMessages.getString( PKG, "BaseStep.Log.OutputDone", String.valueOf( outputRowSets.size() ) ) ); } for ( RowSet rs : outputRowSets ) { rs.setDone(); } if ( errorRowSet != null ) { errorRowSet.setDone(); } } finally { outputRowSetsLock.readLock().unlock(); } } /** * This method finds the surrounding steps and rowsets for this base step. This steps keeps it's own list of rowsets * (etc.) to prevent it from having to search every time. *

* Note that all rowsets input and output is already created by transformation itself. So * in this place we will look and choose which rowsets will be used by this particular step. *

* We will collect all input rowsets and output rowsets so step will be able to read input data, * and write to the output. *

* Steps can run in multiple copies, on in partitioned fashion. For this case we should take * in account that in different cases we should take in account one to one, one to many and other cases * properly. */ public void dispatch() { if ( transMeta == null ) { // for preview reasons, no dispatching is done! return; } StepMeta stepMeta = transMeta.findStep( stepname ); if ( log.isDetailed() ) { logDetailed( BaseMessages.getString( PKG, "BaseStep.Log.StartingBuffersAllocation" ) ); } // How many next steps are there? 0, 1 or more?? // How many steps do we send output to? List previousSteps = transMeta.findPreviousSteps( stepMeta, true ); List succeedingSteps = transMeta.findNextSteps( stepMeta ); int nrInput = previousSteps.size(); int nrOutput = succeedingSteps.size(); inputRowSetsLock.writeLock().lock(); outputRowSetsLock.writeLock().lock(); try { inputRowSets = new ArrayList<>(); outputRowSets = new ArrayList<>(); errorRowSet = null; prevSteps = new StepMeta[ nrInput ]; nextSteps = new StepMeta[ nrOutput ]; currentInputRowSetNr = 0; // we start with input[0]; if ( log.isDetailed() ) { logDetailed( BaseMessages.getString( PKG, "BaseStep.Log.StepInfo", String.valueOf( nrInput ), String .valueOf( nrOutput ) ) ); } // populate input rowsets. for ( int i = 0; i < previousSteps.size(); i++ ) { prevSteps[ i ] = previousSteps.get( i ); if ( log.isDetailed() ) { logDetailed( BaseMessages.getString( PKG, "BaseStep.Log.GotPreviousStep", stepname, String.valueOf( i ), prevSteps[ i ].getName() ) ); } // Looking at the previous step, you can have either 1 rowset to look at or more then one. int prevCopies = prevSteps[ i ].getCopies(); int nextCopies = stepMeta.getCopies(); if ( log.isDetailed() ) { logDetailed( BaseMessages.getString( PKG, "BaseStep.Log.InputRowInfo", String.valueOf( prevCopies ), String.valueOf( nextCopies ) ) ); } int nrCopies; int dispatchType; boolean repartitioning; if ( prevSteps[ i ].isPartitioned() ) { repartitioning = !prevSteps[ i ].getStepPartitioningMeta() .equals( stepMeta.getStepPartitioningMeta() ); } else { repartitioning = stepMeta.isPartitioned(); } if ( prevCopies == 1 && nextCopies == 1 ) { // normal hop dispatchType = Trans.TYPE_DISP_1_1; nrCopies = 1; } else if ( prevCopies == 1 && nextCopies > 1 ) { // one to many hop dispatchType = Trans.TYPE_DISP_1_N; nrCopies = 1; } else if ( prevCopies > 1 && nextCopies == 1 ) { // from many to one hop dispatchType = Trans.TYPE_DISP_N_1; nrCopies = prevCopies; } else if ( prevCopies == nextCopies && !repartitioning ) { // this may be many-to-many or swim-lanes hop dispatchType = Trans.TYPE_DISP_N_N; nrCopies = 1; } else { // > 1! dispatchType = Trans.TYPE_DISP_N_M; nrCopies = prevCopies; } for ( int c = 0; c < nrCopies; c++ ) { RowSet rowSet = null; switch ( dispatchType ) { case Trans.TYPE_DISP_1_1: rowSet = trans.findRowSet( prevSteps[ i ].getName(), 0, stepname, 0 ); break; case Trans.TYPE_DISP_1_N: rowSet = trans.findRowSet( prevSteps[ i ].getName(), 0, stepname, getCopy() ); break; case Trans.TYPE_DISP_N_1: rowSet = trans.findRowSet( prevSteps[ i ].getName(), c, stepname, 0 ); break; case Trans.TYPE_DISP_N_N: rowSet = trans.findRowSet( prevSteps[ i ].getName(), getCopy(), stepname, getCopy() ); break; case Trans.TYPE_DISP_N_M: rowSet = trans.findRowSet( prevSteps[ i ].getName(), c, stepname, getCopy() ); break; default: break; } if ( rowSet != null ) { inputRowSets.add( rowSet ); if ( log.isDetailed() ) { logDetailed( BaseMessages.getString( PKG, "BaseStep.Log.FoundInputRowset", rowSet.getName() ) ); } } else { if ( !prevSteps[ i ].isMapping() && !stepMeta.isMapping() ) { logError( BaseMessages.getString( PKG, "BaseStep.Log.UnableToFindInputRowset" ) ); setErrors( 1 ); stopAll(); return; } } } } // And now the output part! for ( int i = 0; i < nrOutput; i++ ) { nextSteps[ i ] = succeedingSteps.get( i ); int prevCopies = stepMeta.getCopies(); int nextCopies = nextSteps[ i ].getCopies(); if ( log.isDetailed() ) { logDetailed( BaseMessages.getString( PKG, "BaseStep.Log.OutputRowInfo", String.valueOf( prevCopies ), String.valueOf( nextCopies ) ) ); } int nrCopies; int dispatchType; boolean repartitioning; if ( stepMeta.isPartitioned() ) { repartitioning = !stepMeta.getStepPartitioningMeta() .equals( nextSteps[ i ].getStepPartitioningMeta() ); } else { repartitioning = nextSteps[ i ].isPartitioned(); } if ( prevCopies == 1 && nextCopies == 1 ) { dispatchType = Trans.TYPE_DISP_1_1; nrCopies = 1; } else if ( prevCopies == 1 && nextCopies > 1 ) { dispatchType = Trans.TYPE_DISP_1_N; nrCopies = nextCopies; } else if ( prevCopies > 1 && nextCopies == 1 ) { dispatchType = Trans.TYPE_DISP_N_1; nrCopies = 1; } else if ( prevCopies == nextCopies && !repartitioning ) { dispatchType = Trans.TYPE_DISP_N_N; nrCopies = 1; } else { // > 1! dispatchType = Trans.TYPE_DISP_N_M; nrCopies = nextCopies; } for ( int c = 0; c < nrCopies; c++ ) { RowSet rowSet = null; switch ( dispatchType ) { case Trans.TYPE_DISP_1_1: rowSet = trans.findRowSet( stepname, 0, nextSteps[ i ].getName(), 0 ); break; case Trans.TYPE_DISP_1_N: rowSet = trans.findRowSet( stepname, 0, nextSteps[ i ].getName(), c ); break; case Trans.TYPE_DISP_N_1: rowSet = trans.findRowSet( stepname, getCopy(), nextSteps[ i ].getName(), 0 ); break; case Trans.TYPE_DISP_N_N: rowSet = trans.findRowSet( stepname, getCopy(), nextSteps[ i ].getName(), getCopy() ); break; case Trans.TYPE_DISP_N_M: rowSet = trans.findRowSet( stepname, getCopy(), nextSteps[ i ].getName(), c ); break; default: break; } if ( rowSet != null ) { outputRowSets.add( rowSet ); if ( log.isDetailed() ) { logDetailed( BaseMessages.getString( PKG, "BaseStep.Log.FoundOutputRowset", rowSet.getName() ) ); } } else { if ( !stepMeta.isMapping() && !nextSteps[ i ].isMapping() ) { logError( BaseMessages.getString( PKG, "BaseStep.Log.UnableToFindOutputRowset" ) ); setErrors( 1 ); stopAll(); return; } } } } } finally { inputRowSetsLock.writeLock().unlock(); outputRowSetsLock.writeLock().unlock(); } if ( stepMeta.getTargetStepPartitioningMeta() != null ) { nextStepPartitioningMeta = stepMeta.getTargetStepPartitioningMeta(); } if ( log.isDetailed() ) { logDetailed( BaseMessages.getString( PKG, "BaseStep.Log.FinishedDispatching" ) ); } } /** * Checks if is basic. * * @return true, if is basic */ public boolean isBasic() { return log.isBasic(); } /** * Checks if is detailed. * * @return true, if is detailed */ public boolean isDetailed() { return log.isDetailed(); } /** * Checks if is debug. * * @return true, if is debug */ public boolean isDebug() { return log.isDebug(); } /** * Checks if is row level. * * @return true, if is row level */ public boolean isRowLevel() { return log.isRowLevel(); } /** * Log minimal. * * @param message the message */ public void logMinimal( String message ) { log.logMinimal( message ); } /** * Log minimal. * * @param message the message * @param arguments the arguments */ public void logMinimal( String message, Object... arguments ) { log.logMinimal( message, arguments ); } /** * Log basic. * * @param message the message */ public void logBasic( String message ) { log.logBasic( message ); } /** * Log basic. * * @param message the message * @param arguments the arguments */ public void logBasic( String message, Object... arguments ) { log.logBasic( message, arguments ); } /** * Log detailed. * * @param message the message */ public void logDetailed( String message ) { log.logDetailed( message ); } /** * Log detailed. * * @param message the message * @param arguments the arguments */ public void logDetailed( String message, Object... arguments ) { log.logDetailed( message, arguments ); } /** * Log debug. * * @param message the message */ public void logDebug( String message ) { log.logDebug( message ); } /** * Log debug. * * @param message the message * @param arguments the arguments */ public void logDebug( String message, Object... arguments ) { log.logDebug( message, arguments ); } /** * Log rowlevel. * * @param message the message */ public void logRowlevel( String message ) { log.logRowlevel( message ); } /** * Log rowlevel. * * @param message the message * @param arguments the arguments */ public void logRowlevel( String message, Object... arguments ) { log.logRowlevel( message, arguments ); } /** * Log error. * * @param message the message */ public void logError( String message ) { log.logError( message ); } /** * Log error. * * @param message the message * @param e the e */ public void logError( String message, Throwable e ) { log.logError( message, e ); } /** * Log error. * * @param message the message * @param arguments the arguments */ public void logError( String message, Object... arguments ) { log.logError( message, arguments ); } /** * Gets the next class nr. * * @return the next class nr */ public int getNextClassNr() { int ret = trans.class_nr; trans.class_nr++; return ret; } /** * Output is done. * * @return true, if successful */ public boolean outputIsDone() { int nrstopped = 0; outputRowSetsLock.readLock().lock(); try { for ( RowSet rs : outputRowSets ) { if ( rs.isDone() ) { nrstopped++; } } return nrstopped >= outputRowSets.size(); } finally { outputRowSetsLock.readLock().unlock(); } } /* * (non-Javadoc) * * @see org.pentaho.di.trans.step.StepInterface#stopAll() */ @Override public void stopAll() { stopped.set( true ); trans.stopAll(); } /* * (non-Javadoc) * * @see org.pentaho.di.trans.step.StepInterface#isStopped() */ @Override public boolean isStopped() { return stopped.get(); } /* * (non-Javadoc) * * @see org.pentaho.di.trans.step.StepInterface#isRunning() */ @Override public boolean isRunning() { return running.get(); } /* * (non-Javadoc) * * @see org.pentaho.di.trans.step.StepInterface#isPaused() */ @Override public boolean isPaused() { return paused.get(); } /* * (non-Javadoc) * * @see org.pentaho.di.trans.step.StepInterface#setStopped(boolean) */ @Override public void setStopped( boolean stopped ) { this.stopped.set( stopped ); } @Override public void setSafeStopped( boolean stopped ) { this.safeStopped.set( stopped ); } @Override public boolean isSafeStopped() { return safeStopped.get(); } /* * (non-Javadoc) * * @see org.pentaho.di.trans.step.StepInterface#setRunning(boolean) */ @Override public void setRunning( boolean running ) { this.running.set( running ); } /* * (non-Javadoc) * * @see org.pentaho.di.trans.step.StepInterface#pauseRunning() */ @Override public void pauseRunning() { setPaused( true ); } /* * (non-Javadoc) * * @see org.pentaho.di.trans.step.StepInterface#resumeRunning() */ @Override public void resumeRunning() { setPaused( false ); } /** * Sets the paused. * * @param paused the new paused */ public void setPaused( boolean paused ) { this.paused.set( paused ); } /** * Sets the paused. * * @param paused the new paused */ public void setPaused( AtomicBoolean paused ) { this.paused = paused; } /** * Checks if is initialising. * * @return true, if is initialising */ public boolean isInitialising() { return init; } /* * (non-Javadoc) * * @see org.pentaho.di.trans.step.StepInterface#markStart() */ @Override public void markStart() { Calendar cal = Calendar.getInstance(); start_time = cal.getTime(); setInternalVariables(); } /** * Sets the internal variables. */ public void setInternalVariables() { setVariable( Const.INTERNAL_VARIABLE_STEP_NAME, stepname ); setVariable( Const.INTERNAL_VARIABLE_STEP_COPYNR, Integer.toString( getCopy() ) ); } /* * (non-Javadoc) * * @see org.pentaho.di.trans.step.StepInterface#markStop() */ @Override public void markStop() { Calendar cal = Calendar.getInstance(); stop_time = cal.getTime(); // Here we are completely done with the transformation. // Call all the attached listeners and notify the outside world that the step has finished. // synchronized ( stepListeners ) { for ( StepListener stepListener : stepListeners ) { stepListener.stepFinished( trans, stepMeta, this ); } } // We're finally completely done with this step. // setRunning( false ); } /* * (non-Javadoc) * * @see org.pentaho.di.trans.step.StepInterface#getRuntime() */ @Override public long getRuntime() { long lapsed; if ( start_time != null && stop_time == null ) { Calendar cal = Calendar.getInstance(); long now = cal.getTimeInMillis(); long st = start_time.getTime(); lapsed = now - st; } else if ( start_time != null && stop_time != null ) { lapsed = stop_time.getTime() - start_time.getTime(); } else { lapsed = 0; } return lapsed; } /** * Builds the log. * * @param sname the sname * @param copynr the copynr * @param lines_read the lines_read * @param lines_written the lines_written * @param lines_updated the lines_updated * @param lines_skipped the lines_skipped * @param errors the errors * @param start_date the start_date * @param end_date the end_date * @return the row meta and data */ public RowMetaAndData buildLog( String sname, int copynr, long lines_read, long lines_written, long lines_updated, long lines_skipped, long errors, Date start_date, Date end_date ) { RowMetaInterface r = new RowMeta(); Object[] data = new Object[ 9 ]; int nr = 0; r.addValueMeta( new ValueMetaString( BaseMessages.getString( PKG, "BaseStep.ColumnName.Stepname" ) ) ); data[ nr ] = sname; nr++; r.addValueMeta( new ValueMetaNumber( BaseMessages.getString( PKG, "BaseStep.ColumnName.Copy" ) ) ); data[ nr ] = (double) copynr; nr++; r.addValueMeta( new ValueMetaNumber( BaseMessages.getString( PKG, "BaseStep.ColumnName.LinesReaded" ) ) ); data[ nr ] = (double) lines_read; nr++; r.addValueMeta( new ValueMetaNumber( BaseMessages.getString( PKG, "BaseStep.ColumnName.LinesWritten" ) ) ); data[ nr ] = (double) lines_written; nr++; r.addValueMeta( new ValueMetaNumber( BaseMessages.getString( PKG, "BaseStep.ColumnName.LinesUpdated" ) ) ); data[ nr ] = (double) lines_updated; nr++; r.addValueMeta( new ValueMetaNumber( BaseMessages.getString( PKG, "BaseStep.ColumnName.LinesSkipped" ) ) ); data[ nr ] = (double) lines_skipped; nr++; r.addValueMeta( new ValueMetaNumber( BaseMessages.getString( PKG, "BaseStep.ColumnName.Errors" ) ) ); data[ nr ] = (double) errors; nr++; r.addValueMeta( new ValueMetaDate( "start_date" ) ); data[ nr ] = start_date; nr++; r.addValueMeta( new ValueMetaDate( "end_date" ) ); data[ nr ] = end_date; nr++; return new RowMetaAndData( r, data ); } /** * Gets the log fields. * * @param comm the comm * @return the log fields */ public static final RowMetaInterface getLogFields( String comm ) { RowMetaInterface r = new RowMeta(); ValueMetaInterface sname = new ValueMetaString( BaseMessages.getString( PKG, "BaseStep.ColumnName.Stepname" ) ); sname.setLength( 256 ); r.addValueMeta( sname ); r.addValueMeta( new ValueMetaNumber( BaseMessages.getString( PKG, "BaseStep.ColumnName.Copy" ) ) ); r.addValueMeta( new ValueMetaNumber( BaseMessages.getString( PKG, "BaseStep.ColumnName.LinesReaded" ) ) ); r.addValueMeta( new ValueMetaNumber( BaseMessages.getString( PKG, "BaseStep.ColumnName.LinesWritten" ) ) ); r.addValueMeta( new ValueMetaNumber( BaseMessages.getString( PKG, "BaseStep.ColumnName.LinesUpdated" ) ) ); r.addValueMeta( new ValueMetaNumber( BaseMessages.getString( PKG, "BaseStep.ColumnName.LinesSkipped" ) ) ); r.addValueMeta( new ValueMetaNumber( BaseMessages.getString( PKG, "BaseStep.ColumnName.Errors" ) ) ); r.addValueMeta( new ValueMetaDate( BaseMessages.getString( PKG, "BaseStep.ColumnName.StartDate" ) ) ); r.addValueMeta( new ValueMetaDate( BaseMessages.getString( PKG, "BaseStep.ColumnName.EndDate" ) ) ); for ( int i = 0; i < r.size(); i++ ) { r.getValueMeta( i ).setOrigin( comm ); } return r; } /* * (non-Javadoc) * * @see java.lang.Object#toString() */ @Override public String toString() { StringBuilder string = new StringBuilder( 50 ); // If the step runs in a mapping (and as such has a "parent transformation", we are going to print the name of the // transformation during logging // // if ( !Utils.isEmpty( getTrans().getMappingStepName() ) ) { string.append( '[' ).append( trans.toString() ).append( ']' ).append( '.' ); // Name of the mapping transformation } if ( !Utils.isEmpty( partitionID ) ) { string.append( stepname ).append( '.' ).append( partitionID ); } else if ( clusterSize > 1 ) { string .append( stepname ).append( '.' ).append( slaveNr ).append( '.' ).append( Integer.toString( getCopy() ) ); } else { string.append( stepname ).append( '.' ).append( Integer.toString( getCopy() ) ); } return string.toString(); } /* * (non-Javadoc) * * @see org.pentaho.di.trans.step.StepInterface#rowsetOutputSize() */ @Override public int rowsetOutputSize() { int size = 0; outputRowSetsLock.readLock().lock(); try { for ( RowSet outputRowSet : outputRowSets ) { size += outputRowSet.size(); } } finally { outputRowSetsLock.readLock().unlock(); } return size; } /* * (non-Javadoc) * * @see org.pentaho.di.trans.step.StepInterface#rowsetInputSize() */ @Override public int rowsetInputSize() { int size = 0; inputRowSetsLock.readLock().lock(); try { for ( RowSet inputRowSet : inputRowSets ) { size += inputRowSet.size(); } } finally { inputRowSetsLock.readLock().unlock(); } return size; } /** * Perform actions to stop a running step. This can be stopping running SQL queries (cancel), etc. Default it doesn't * do anything. * * @param stepDataInterface The interface to the step data containing the connections, resultsets, open files, etc. * @throws KettleException in case something goes wrong */ @Override public void stopRunning( StepMetaInterface stepMetaInterface, StepDataInterface stepDataInterface ) throws KettleException { } /** * Stops running operations This method is deprecated, please use the method specifying the metadata and data * interfaces. * * @deprecated use {@link #stopRunning(StepMetaInterface, StepDataInterface)} */ @Deprecated public void stopRunning() { } /** * Log summary. */ public void logSummary() { synchronized ( statusCountersLock ) { long li = getLinesInput(); long lo = getLinesOutput(); long lr = getLinesRead(); long lw = getLinesWritten(); long lu = getLinesUpdated(); long lj = getLinesRejected(); if ( li > 0 || lo > 0 || lr > 0 || lw > 0 || lu > 0 || lj > 0 || errors > 0 ) { logBasic( BaseMessages.getString( PKG, "BaseStep.Log.SummaryInfo", String.valueOf( li ), String .valueOf( lo ), String.valueOf( lr ), String.valueOf( lw ), String.valueOf( lw ), String .valueOf( errors + lj ) ) ); } else { logDetailed( BaseMessages.getString( PKG, "BaseStep.Log.SummaryInfo", String.valueOf( li ), String .valueOf( lo ), String.valueOf( lr ), String.valueOf( lw ), String.valueOf( lw ), String .valueOf( errors + lj ) ) ); } } } /* * (non-Javadoc) * * @see org.pentaho.di.trans.step.StepInterface#getStepID() */ @Override public String getStepID() { if ( stepMeta != null ) { return stepMeta.getStepID(); } return null; } /** * @return Returns the inputRowSets. */ @Override public List getInputRowSets() { inputRowSetsLock.readLock().lock(); try { return new ArrayList<>( inputRowSets ); } finally { inputRowSetsLock.readLock().unlock(); } } @Override public void addRowSetToInputRowSets( RowSet rowSet ) { inputRowSetsLock.writeLock().lock(); try { inputRowSets.add( rowSet ); } finally { inputRowSetsLock.writeLock().unlock(); } } protected RowSet getFirstInputRowSet() { inputRowSetsLock.readLock().lock(); try { return inputRowSets.get( 0 ); } finally { inputRowSetsLock.readLock().unlock(); } } protected void clearInputRowSets() { inputRowSetsLock.writeLock().lock(); try { inputRowSets.clear(); } finally { inputRowSetsLock.writeLock().unlock(); } } protected void swapFirstInputRowSetIfExists( String stepName ) { inputRowSetsLock.writeLock().lock(); try { for ( int i = 0; i < inputRowSets.size(); i++ ) { BlockingRowSet rs = (BlockingRowSet) inputRowSets.get( i ); if ( rs.getOriginStepName().equalsIgnoreCase( stepName ) ) { // swap this one and position 0...that means, the main stream is always stream 0 --> easy! // BlockingRowSet zero = (BlockingRowSet) inputRowSets.get( 0 ); inputRowSets.set( 0, rs ); inputRowSets.set( i, zero ); } } } finally { inputRowSetsLock.writeLock().unlock(); } } /** * @param inputRowSets The inputRowSets to set. */ public void setInputRowSets( List inputRowSets ) { inputRowSetsLock.writeLock().lock(); try { this.inputRowSets = inputRowSets; } finally { inputRowSetsLock.writeLock().unlock(); } } /** * @return Returns the outputRowSets. */ @Override public List getOutputRowSets() { outputRowSetsLock.readLock().lock(); try { return new ArrayList<>( outputRowSets ); } finally { outputRowSetsLock.readLock().unlock(); } } @Override public void addRowSetToOutputRowSets( RowSet rowSet ) { outputRowSetsLock.writeLock().lock(); try { outputRowSets.add( rowSet ); } finally { outputRowSetsLock.writeLock().unlock(); } } protected void clearOutputRowSets() { outputRowSetsLock.writeLock().lock(); try { outputRowSets.clear(); } finally { outputRowSetsLock.writeLock().unlock(); } } /** * @param outputRowSets The outputRowSets to set. */ public void setOutputRowSets( List outputRowSets ) { outputRowSetsLock.writeLock().lock(); try { this.outputRowSets = outputRowSets; } finally { outputRowSetsLock.writeLock().unlock(); } } /** * @return Returns the distributed. */ public boolean isDistributed() { return distributed; } /** * @param distributed The distributed to set. */ public void setDistributed( boolean distributed ) { this.distributed = distributed; } /* * (non-Javadoc) * * @see org.pentaho.di.trans.step.StepInterface#addRowListener(org.pentaho.di.trans.step.RowListener) */ @Override public void addRowListener( RowListener rowListener ) { rowListeners.add( rowListener ); } /* * (non-Javadoc) * * @see org.pentaho.di.trans.step.StepInterface#removeRowListener(org.pentaho.di.trans.step.RowListener) */ @Override public void removeRowListener( RowListener rowListener ) { rowListeners.remove( rowListener ); } /* * (non-Javadoc) * * @see org.pentaho.di.trans.step.StepInterface#getRowListeners() */ @Override public List getRowListeners() { return Collections.unmodifiableList( rowListeners ); } /** * Adds the result file. * * @param resultFile the result file */ public void addResultFile( ResultFile resultFile ) { ReentrantReadWriteLock.WriteLock lock = resultFilesLock.writeLock(); lock.lock(); try { resultFiles.put( resultFile.getFile().toString(), resultFile ); } finally { lock.unlock(); } } /* * (non-Javadoc) * * @see org.pentaho.di.trans.step.StepInterface#getResultFiles() */ @Override public Map getResultFiles() { ReentrantReadWriteLock.ReadLock lock = resultFilesLock.readLock(); lock.lock(); try { return new HashMap( this.resultFiles ); } finally { lock.unlock(); } } /* * (non-Javadoc) * * @see org.pentaho.di.trans.step.StepInterface#getStatus() */ @Override public StepExecutionStatus getStatus() { // Is this thread alive or not? // if ( isRunning() ) { if ( isStopped() ) { return StepExecutionStatus.STATUS_HALTING; } else { if ( isPaused() ) { return StepExecutionStatus.STATUS_PAUSED; } else { return StepExecutionStatus.STATUS_RUNNING; } } } else { // Step is not running... What are we doing? // // An init thread is running... // if ( trans.isInitializing() ) { if ( isInitialising() ) { return StepExecutionStatus.STATUS_INIT; } else { // Done initializing, but other threads are still busy. // So this step is idle // return StepExecutionStatus.STATUS_IDLE; } } else { // It's not running, it's not initializing, so what is it doing? // if ( isStopped() ) { return StepExecutionStatus.STATUS_STOPPED; } else { // To be sure (race conditions and all), get the rest in StepDataInterface object: // StepDataInterface sdi = trans.getStepDataInterface( stepname, stepcopy ); if ( sdi != null ) { if ( sdi.getStatus() == StepExecutionStatus.STATUS_DISPOSED ) { return StepExecutionStatus.STATUS_FINISHED; } else { return sdi.getStatus(); } } return StepExecutionStatus.STATUS_EMPTY; } } } } /** * @return the partitionID */ @Override public String getPartitionID() { return partitionID; } /** * @param partitionID the partitionID to set */ @Override public void setPartitionID( String partitionID ) { this.partitionID = partitionID; } /** * @return the partitionTargets */ public Map getPartitionTargets() { return partitionTargets; } /** * @param partitionTargets the partitionTargets to set */ public void setPartitionTargets( Map partitionTargets ) { this.partitionTargets = partitionTargets; } /** * @return the repartitioning type */ public int getRepartitioning() { return repartitioning; } /** * @param repartitioning the repartitioning type to set */ @Override public void setRepartitioning( int repartitioning ) { this.repartitioning = repartitioning; } /** * @return the partitioned */ @Override public boolean isPartitioned() { return partitioned; } /** * @param partitioned the partitioned to set */ @Override public void setPartitioned( boolean partitioned ) { this.partitioned = partitioned; } /** * Check feedback. * * @param lines the lines * @return true, if successful */ protected boolean checkFeedback( long lines ) { return getTransMeta().isFeedbackShown() && ( lines > 0 ) && ( getTransMeta().getFeedbackSize() > 0 ) && ( lines % getTransMeta().getFeedbackSize() ) == 0; } /** * @return the rowMeta */ public RowMetaInterface getInputRowMeta() { return inputRowMeta; } /** * @param rowMeta the rowMeta to set */ public void setInputRowMeta( RowMetaInterface rowMeta ) { this.inputRowMeta = rowMeta; } /** * @return the errorRowMeta */ public RowMetaInterface getErrorRowMeta() { return errorRowMeta; } /** * @param errorRowMeta the errorRowMeta to set */ public void setErrorRowMeta( RowMetaInterface errorRowMeta ) { this.errorRowMeta = errorRowMeta; } /** * @return the previewRowMeta */ public RowMetaInterface getPreviewRowMeta() { return previewRowMeta; } /** * @param previewRowMeta the previewRowMeta to set */ public void setPreviewRowMeta( RowMetaInterface previewRowMeta ) { this.previewRowMeta = previewRowMeta; } /* * (non-Javadoc) * * @see org.pentaho.di.core.variables.VariableSpace#copyVariablesFrom(org.pentaho.di.core.variables.VariableSpace) */ @Override public void copyVariablesFrom( VariableSpace space ) { variables.copyVariablesFrom( space ); } /* * (non-Javadoc) * * @see org.pentaho.di.core.variables.VariableSpace#environmentSubstitute(java.lang.String) */ @Override public String environmentSubstitute( String aString ) { return variables.environmentSubstitute( aString ); } /* * (non-Javadoc) * * @see org.pentaho.di.core.variables.VariableSpace#environmentSubstitute(java.lang.String[]) */ @Override public String[] environmentSubstitute( String[] aString ) { return variables.environmentSubstitute( aString ); } @Override public String fieldSubstitute( String aString, RowMetaInterface rowMeta, Object[] rowData ) throws KettleValueException { return variables.fieldSubstitute( aString, rowMeta, rowData ); } /* * (non-Javadoc) * * @see org.pentaho.di.core.variables.VariableSpace#getParentVariableSpace() */ @Override public VariableSpace getParentVariableSpace() { return variables.getParentVariableSpace(); } /* * (non-Javadoc) * * @see * org.pentaho.di.core.variables.VariableSpace#setParentVariableSpace(org.pentaho.di.core.variables.VariableSpace) */ @Override public void setParentVariableSpace( VariableSpace parent ) { variables.setParentVariableSpace( parent ); } /* * (non-Javadoc) * * @see org.pentaho.di.core.variables.VariableSpace#getVariable(java.lang.String, java.lang.String) */ @Override public String getVariable( String variableName, String defaultValue ) { return variables.getVariable( variableName, defaultValue ); } /* * (non-Javadoc) * * @see org.pentaho.di.core.variables.VariableSpace#getVariable(java.lang.String) */ @Override public String getVariable( String variableName ) { return variables.getVariable( variableName ); } /* * (non-Javadoc) * * @see org.pentaho.di.core.variables.VariableSpace#getBooleanValueOfVariable(java.lang.String, boolean) */ @Override public boolean getBooleanValueOfVariable( String variableName, boolean defaultValue ) { if ( !Utils.isEmpty( variableName ) ) { String value = environmentSubstitute( variableName ); if ( !Utils.isEmpty( value ) ) { return ValueMetaString.convertStringToBoolean( value ); } } return defaultValue; } /* * (non-Javadoc) * * @see * org.pentaho.di.core.variables.VariableSpace#initializeVariablesFrom(org.pentaho.di.core.variables.VariableSpace) */ @Override public void initializeVariablesFrom( VariableSpace parent ) { variables.initializeVariablesFrom( parent ); } /* * (non-Javadoc) * * @see org.pentaho.di.core.variables.VariableSpace#listVariables() */ @Override public String[] listVariables() { return variables.listVariables(); } /* * (non-Javadoc) * * @see org.pentaho.di.core.variables.VariableSpace#setVariable(java.lang.String, java.lang.String) */ @Override public void setVariable( String variableName, String variableValue ) { variables.setVariable( variableName, variableValue ); } /* * (non-Javadoc) * * @see org.pentaho.di.core.variables.VariableSpace#shareVariablesWith(org.pentaho.di.core.variables.VariableSpace) */ @Override public void shareVariablesWith( VariableSpace space ) { variables = space; } /* * (non-Javadoc) * * @see org.pentaho.di.core.variables.VariableSpace#injectVariables(java.util.Map) */ @Override public void injectVariables( Map prop ) { variables.injectVariables( prop ); } /** * Returns the step ID via the getStepID() method call. Support for CheckResultSourceInterface. * * @return getStepID() */ public String getTypeId() { return this.getStepID(); } /** * Returns the unique slave number in the cluster. * * @return the unique slave number in the cluster */ public int getSlaveNr() { return slaveNr; } /** * Returns the cluster size. * * @return the cluster size */ public int getClusterSize() { return clusterSize; } /** * Returns a unique step number across all slave servers: slaveNr * nrCopies + copyNr. * * @return a unique step number across all slave servers: slaveNr * nrCopies + copyNr */ public int getUniqueStepNrAcrossSlaves() { return uniqueStepNrAcrossSlaves; } /** * Returns the number of unique steps across all slave servers. * * @return the number of unique steps across all slave servers */ public int getUniqueStepCountAcrossSlaves() { return uniqueStepCountAcrossSlaves; } /** * Returns the serverSockets. * * @return the serverSockets */ public List getServerSockets() { return serverSockets; } /** * @param serverSockets the serverSockets to set * @return serverSockets the serverSockets to set. */ public void setServerSockets( List serverSockets ) { this.serverSockets = serverSockets; } /** * Set to true to actively manage priorities of step threads. * * @param usingThreadPriorityManagment set to true to actively manage priorities of step threads */ @Override public void setUsingThreadPriorityManagment( boolean usingThreadPriorityManagment ) { this.usingThreadPriorityManagment = usingThreadPriorityManagment; } /** * Retusn true if we are actively managing priorities of step threads. * * @return true if we are actively managing priorities of step threads */ @Override public boolean isUsingThreadPriorityManagment() { return usingThreadPriorityManagment; } /** * This method is executed by Trans right before the threads start and right after initialization. *

* More to the point: here we open remote output step sockets. * * @throws KettleStepException In case there is an error */ @Override public void initBeforeStart() throws KettleStepException { openRemoteOutputStepSocketsOnce(); } /** * Returns the step listeners. * * @return the stepListeners */ public List getStepListeners() { return stepListeners; } /** * Sets the step listeners. * * @param stepListeners the stepListeners to set */ public void setStepListeners( List stepListeners ) { this.stepListeners = Collections.synchronizedList( stepListeners ); } /* * (non-Javadoc) * * @see org.pentaho.di.trans.step.StepInterface#processRow(org.pentaho.di.trans.step.StepMetaInterface, * org.pentaho.di.trans.step.StepDataInterface) */ @Override public boolean processRow( StepMetaInterface smi, StepDataInterface sdi ) throws KettleException { return false; } /* * (non-Javadoc) * * @see org.pentaho.di.trans.step.StepInterface#canProcessOneRow() */ @Override public boolean canProcessOneRow() { inputRowSetsLock.readLock().lock(); try { switch ( inputRowSets.size() ) { case 0: return false; case 1: RowSet set = inputRowSets.get( 0 ); if ( set.isDone() ) { return false; } return set.size() > 0; default: boolean allDone = true; for ( RowSet rowSet : inputRowSets ) { if ( !rowSet.isDone() ) { allDone = false; } if ( rowSet.size() > 0 ) { return true; } } return !allDone; } } finally { inputRowSetsLock.readLock().unlock(); } } /* * (non-Javadoc) * * @see org.pentaho.di.trans.step.StepInterface#addStepListener(org.pentaho.di.trans.step.StepListener) */ @Override public void addStepListener( StepListener stepListener ) { stepListeners.add( stepListener ); } /* * (non-Javadoc) * * @see org.pentaho.di.trans.step.StepInterface#isMapping() */ @Override public boolean isMapping() { return stepMeta.isMapping(); } /** * Retutns the socket repository. * * @return the socketRepository */ public SocketRepository getSocketRepository() { return socketRepository; } /** * Sets the socket repository. * * @param socketRepository the socketRepository to set */ public void setSocketRepository( SocketRepository socketRepository ) { this.socketRepository = socketRepository; } /* * (non-Javadoc) * * @see org.pentaho.di.core.logging.LoggingObjectInterface#getObjectName() */ @Override public String getObjectName() { return getStepname(); } /* * (non-Javadoc) * * @see org.pentaho.di.trans.step.StepInterface#getLogChannel() */ @Override public LogChannelInterface getLogChannel() { return log; } /* * (non-Javadoc) * * @see org.pentaho.di.core.logging.LoggingObjectInterface#getFilename() */ @Override public String getFilename() { return null; } /* * (non-Javadoc) * * @see org.pentaho.di.core.logging.LoggingObjectInterface#getLogChannelId() */ @Override public String getLogChannelId() { return log.getLogChannelId(); } /* * (non-Javadoc) * * @see org.pentaho.di.core.logging.LoggingObjectInterface#getObjectId() */ @Override public ObjectId getObjectId() { if ( stepMeta == null ) { return null; } return stepMeta.getObjectId(); } /* * (non-Javadoc) * * @see org.pentaho.di.core.logging.LoggingObjectInterface#getObjectRevision() */ @Override public ObjectRevision getObjectRevision() { return null; } /* * (non-Javadoc) * * @see org.pentaho.di.core.logging.LoggingObjectInterface#getObjectType() */ @Override public LoggingObjectType getObjectType() { return LoggingObjectType.STEP; } /* * (non-Javadoc) * * @see org.pentaho.di.core.logging.LoggingObjectInterface#getParent() */ @Override public LoggingObjectInterface getParent() { return trans; } /* * (non-Javadoc) * * @see org.pentaho.di.core.logging.LoggingObjectInterface#getRepositoryDirectory() */ @Override public RepositoryDirectory getRepositoryDirectory() { return null; } /* * (non-Javadoc) * * @see org.pentaho.di.core.logging.LoggingObjectInterface#getObjectCopy() */ @Override public String getObjectCopy() { return Integer.toString( stepcopy ); } /* * (non-Javadoc) * * @see org.pentaho.di.core.logging.LoggingObjectInterface#getLogLevel() */ @Override public LogLevel getLogLevel() { return log != null ? log.getLogLevel() : null; } /** * Sets the log level. * * @param logLevel the new log level */ public void setLogLevel( LogLevel logLevel ) { log.setLogLevel( logLevel ); } /** * Close quietly. * * @param cl the object that can be closed. */ public static void closeQuietly( Closeable cl ) { if ( cl != null ) { try { cl.close(); } catch ( IOException ignored ) { // Ignore IOException on close } } } /** * Returns the container object ID. * * @return the containerObjectId */ @Override public String getContainerObjectId() { return containerObjectId; } /** * Sets the container object ID. * * @param containerObjectId the containerObjectId to set */ public void setCarteObjectId( String containerObjectId ) { this.containerObjectId = containerObjectId; } /* * (non-Javadoc) * * @see org.pentaho.di.trans.step.StepInterface#batchComplete() */ @Override public void batchComplete() throws KettleException { } /** * Gets the remote input steps. * * @return the remote input steps */ public List getRemoteInputSteps() { return remoteInputSteps; } /** * Gets the remote output steps. * * @return the remote output steps */ public List getRemoteOutputSteps() { return remoteOutputSteps; } /** * Returns the registration date * * @rerturn the registration date */ @Override public Date getRegistrationDate() { return null; } @Override public boolean isGatheringMetrics() { return log != null && log.isGatheringMetrics(); } @Override public void setGatheringMetrics( boolean gatheringMetrics ) { if ( log != null ) { log.setGatheringMetrics( gatheringMetrics ); } } @Override public boolean isForcingSeparateLogging() { return log != null && log.isForcingSeparateLogging(); } @Override public void setForcingSeparateLogging( boolean forcingSeparateLogging ) { if ( log != null ) { log.setForcingSeparateLogging( forcingSeparateLogging ); } } @Override public Repository getRepository() { return repository; } @Override public void setRepository( Repository repository ) { this.repository = repository; } @Override public IMetaStore getMetaStore() { return metaStore; } @Override public void setMetaStore( IMetaStore metaStore ) { this.metaStore = metaStore; } @Override public int getCurrentOutputRowSetNr() { return currentOutputRowSetNr; } @Override public void setCurrentOutputRowSetNr( int index ) { currentOutputRowSetNr = index; } @Override public int getCurrentInputRowSetNr() { return currentInputRowSetNr; } @Override public void setCurrentInputRowSetNr( int index ) { currentInputRowSetNr = index; } @Override public Map getExtensionDataMap() { return extensionDataMap; } private class DefaultRowHandler implements RowHandler { @Override public Object[] getRow() throws KettleException { return handleGetRow(); } @Override public void putRow( RowMetaInterface rowMeta, Object[] row ) throws KettleStepException { handlePutRow( rowMeta, row ); } @Override public void putError( RowMetaInterface rowMeta, Object[] row, long nrErrors, String errorDescriptions, String fieldNames, String errorCodes ) throws KettleStepException { handlePutError( rowMeta, row, nrErrors, errorDescriptions, fieldNames, errorCodes ); } @Override public Object[] getRowFrom( RowSet rowSet ) throws KettleStepException { return handleGetRowFrom( rowSet ); } @Override public void putRowTo( RowMetaInterface rowMeta, Object[] row, RowSet rowSet ) throws KettleStepException { handlePutRowTo( rowMeta, row, rowSet ); } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy