All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.batchee.container.impl.controller.PartitionedStepController Maven / Gradle / Ivy

There is a newer version: 2.0.0
Show newest version
/*
 * Copyright 2012 International Business Machines Corp.
 * 
 * See the NOTICE file distributed with this work for additional information
 * regarding copyright ownership. Licensed under the Apache License, 
 * Version 2.0 (the "License"); you may not use this file except in compliance
 * with the License. You may obtain a copy of the License at
 * 
 *   http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.batchee.container.impl.controller;

import org.apache.batchee.container.exception.BatchContainerRuntimeException;
import org.apache.batchee.container.impl.StepContextImpl;
import org.apache.batchee.container.impl.controller.chunk.ExceptionConfig;
import org.apache.batchee.container.impl.jobinstance.RuntimeJobExecution;
import org.apache.batchee.container.jsl.CloneUtility;
import org.apache.batchee.container.proxy.InjectionReferences;
import org.apache.batchee.container.proxy.ProxyFactory;
import org.apache.batchee.container.services.ServicesManager;
import org.apache.batchee.container.util.BatchPartitionPlan;
import org.apache.batchee.container.util.BatchPartitionWorkUnit;
import org.apache.batchee.container.util.BatchWorkUnit;
import org.apache.batchee.container.util.PartitionDataWrapper;
import org.apache.batchee.container.util.PartitionDataWrapper.PartitionEventType;
import org.apache.batchee.container.util.PartitionsBuilderConfig;
import org.apache.batchee.jaxb.Analyzer;
import org.apache.batchee.jaxb.JSLJob;
import org.apache.batchee.jaxb.JSLProperties;
import org.apache.batchee.jaxb.Property;
import org.apache.batchee.jaxb.Step;
import org.apache.batchee.spi.BatchArtifactFactory;

import javax.batch.api.listener.StepListener;
import javax.batch.api.partition.PartitionAnalyzer;
import javax.batch.api.partition.PartitionMapper;
import javax.batch.api.partition.PartitionPlan;
import javax.batch.api.partition.PartitionReducer;
import javax.batch.api.partition.PartitionReducer.PartitionStatus;
import javax.batch.operations.JobExecutionAlreadyCompleteException;
import javax.batch.operations.JobExecutionNotMostRecentException;
import javax.batch.operations.JobRestartException;
import javax.batch.operations.JobStartException;
import javax.batch.runtime.BatchStatus;
import javax.batch.runtime.Metric;
import javax.batch.runtime.StepExecution;
import java.util.ArrayList;
import java.util.List;
import java.util.Properties;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue;

public class PartitionedStepController extends BaseStepController {
    private static final int DEFAULT_PARTITION_INSTANCES = 1;
    private static final int DEFAULT_THREADS = 0; //0 means default to number of instances

    private PartitionPlan plan = null;

    private int partitions = DEFAULT_PARTITION_INSTANCES;
    private int threads = DEFAULT_THREADS;

    private Properties[] partitionProperties = null;

    private volatile List parallelBatchWorkUnits;

    private PartitionReducer partitionReducerProxy = null;

    private enum ExecutionType {
      /**
       * First execution of this step for the job instance (among all job executions)
       */
      START,
      /**
       * Step previously executed but did not complete successfully, override=false so continue from previous partitions' checkpoints, etc.
       */
      RESTART_NORMAL,
      /**
       * Step previously executed but did not complete successfully, override=true so start with an entire set of new partitions, checkpoints, etc.
       */
      RESTART_OVERRIDE,
      /**
       * Step previously completed, but we are re-executing with an entire set of new partitions, checkpoints, etc.
       */
      RESTART_AFTER_COMPLETION
    }

    private ExecutionType executionType = null;

    // On invocation this will be re-primed to reflect already-completed partitions from a previous execution.
    int numPreviouslyCompleted = 0;

    private PartitionAnalyzer analyzerProxy = null;

    final List subJobs = new ArrayList();
    protected List stepListeners = null;

    List completedWork = new ArrayList();

    BlockingQueue completedWorkQueue = null;

    private final BatchArtifactFactory factory;

    protected PartitionedStepController(final RuntimeJobExecution jobExecutionImpl, final Step step, final StepContextImpl stepContext,
                                        final long rootJobExecutionId, final ServicesManager servicesManager) {
        super(jobExecutionImpl, step, stepContext, rootJobExecutionId, servicesManager);
        factory = servicesManager.service(BatchArtifactFactory.class);
    }

    @Override
    public void stop() {

        updateBatchStatus(BatchStatus.STOPPING);

        // It's possible we may try to stop a partitioned step before any
        // sub steps have been started.
        synchronized (subJobs) {

            if (parallelBatchWorkUnits != null) {
                for (BatchWorkUnit subJob : parallelBatchWorkUnits) {
                    try {

                        // only try to stop the sub-jobs if they are running
                        if (subJob.getJobExecutionImpl().getBatchStatus() == BatchStatus.STARTING ||
                            subJob.getJobExecutionImpl().getBatchStatus() == BatchStatus.STARTED) {

                            kernelService.stopJob(subJob.getJobExecutionImpl().getExecutionId());
                        }
                    } catch (Exception e) {
                        // TODO - Is this what we want to know.
                        // Blow up if it happens to force the issue.
                        throw new IllegalStateException(e);
                    }
                }
            }
        }
    }

    private PartitionPlan generatePartitionPlan() {
        // Determine the number of partitions


        PartitionPlan plan = null;
        Integer previousNumPartitions = null;
        final org.apache.batchee.jaxb.PartitionMapper partitionMapper = step.getPartition().getMapper();

        //from persisted plan from previous run
        if (stepStatus.getNumPartitions() != null) {
            previousNumPartitions = stepStatus.getNumPartitions();
        }

        if (partitionMapper != null) { //from partition mapper

            final List propertyList = partitionMapper.getProperties() == null ? null
                : partitionMapper.getProperties().getPropertyList();

            // Set all the contexts associated with this controller.
            // Some of them may be null
            final InjectionReferences injectionRef = new InjectionReferences(jobExecutionImpl.getJobContext(), stepContext, propertyList);
            final PartitionMapper partitionMapperProxy =
                    ProxyFactory.createPartitionMapperProxy(factory, partitionMapper.getRef(), injectionRef, jobExecutionImpl);


            PartitionPlan mapperPlan = null;
            try {
                mapperPlan = partitionMapperProxy.mapPartitions();
            } catch (Exception e) {
                ExceptionConfig.wrapBatchException(e);
            }

            //Set up the new partition plan
            plan = new BatchPartitionPlan();
            plan.setPartitionsOverride(mapperPlan.getPartitionsOverride());

            //When true is specified, the partition count from the current run
            //is used and all results from past partitions are discarded.
            if (mapperPlan.getPartitionsOverride() || previousNumPartitions == null) {
                plan.setPartitions(mapperPlan.getPartitions());
            } else {
                plan.setPartitions(previousNumPartitions);
            }

            if (mapperPlan.getThreads() == 0) {
                plan.setThreads(plan.getPartitions());
            } else {
                plan.setThreads(mapperPlan.getThreads());
            }

            plan.setPartitionProperties(mapperPlan.getPartitionProperties());
        } else if (step.getPartition().getPlan() != null) { //from static partition element in jsl


            final String partitionsAttr = step.getPartition().getPlan().getPartitions();
            String threadsAttr;

            int numPartitions = Integer.MIN_VALUE;
            int numThreads;
            Properties[] partitionProps = null;

            if (partitionsAttr != null) {
                try {
                    numPartitions = Integer.parseInt(partitionsAttr);
                } catch (final NumberFormatException e) {
                    throw new IllegalArgumentException("Could not parse partition instances value in stepId: " + step.getId()
                        + ", with instances=" + partitionsAttr, e);
                }
                partitionProps = new Properties[numPartitions];
                if (numPartitions < 1) {
                    throw new IllegalArgumentException("Partition instances value must be 1 or greater in stepId: " + step.getId()
                        + ", with instances=" + partitionsAttr);
                }
            }

            threadsAttr = step.getPartition().getPlan().getThreads();
            if (threadsAttr != null) {
                try {
                    numThreads = Integer.parseInt(threadsAttr);
                    if (numThreads == 0) {
                        numThreads = numPartitions;
                    }
                } catch (final NumberFormatException e) {
                    throw new IllegalArgumentException("Could not parse partition threads value in stepId: " + step.getId()
                        + ", with threads=" + threadsAttr, e);
                }
                if (numThreads < 0) {
                    throw new IllegalArgumentException("Threads value must be 0 or greater in stepId: " + step.getId()
                        + ", with threads=" + threadsAttr);

                }
            } else { //default to number of partitions if threads isn't set
                numThreads = numPartitions;
            }


            if (step.getPartition().getPlan().getProperties() != null) {

                List jslProperties = step.getPartition().getPlan().getProperties();
                for (JSLProperties props : jslProperties) {
                    int targetPartition = Integer.parseInt(props.getPartition());

                    try {
                        partitionProps[targetPartition] = CloneUtility.jslPropertiesToJavaProperties(props);
                    } catch (ArrayIndexOutOfBoundsException e) {
                        throw new BatchContainerRuntimeException("There are only " + numPartitions + " partition instances, but there are "
                            + jslProperties.size()
                            + " partition properties lists defined. Remember that partition indexing is 0 based like Java arrays.", e);
                    }
                }
            }
            plan = new BatchPartitionPlan();
            plan.setPartitions(numPartitions);
            plan.setThreads(numThreads);
            plan.setPartitionProperties(partitionProps);
            plan.setPartitionsOverride(false); //FIXME what is the default for a static plan??
        }


        // Set the other instance variables for convenience.
        this.partitions = plan.getPartitions();
        this.threads = plan.getThreads();
        this.partitionProperties = plan.getPartitionProperties();

        return plan;
    }

    private void calculateExecutionType() {
        // We want to ignore override on the initial execution
        if (isRestartExecution()) {
            if (restartAfterCompletion) {
                executionType = ExecutionType.RESTART_AFTER_COMPLETION;
            } else if (plan.getPartitionsOverride()) {
                executionType = ExecutionType.RESTART_OVERRIDE;
            } else {
                executionType = ExecutionType.RESTART_NORMAL;
            }
        } else {
            executionType = ExecutionType.START;
        }
    }

    private void validateNumberOfPartitions() {

        int currentPlanSize = plan.getPartitions();

        if (executionType == ExecutionType.RESTART_NORMAL) {
            int previousPlanSize = stepStatus.getNumPartitions();
            if (previousPlanSize > 0 && previousPlanSize != currentPlanSize) {
                String msg = "On a normal restart, the plan on restart specified: " + currentPlanSize + " # of partitions, but the previous " +
                        "executions' plan specified a different number: " + previousPlanSize + " # of partitions.  Failing job.";
                throw new IllegalStateException(msg);
            }
        }

        //persist the partition plan so on restart we have the same plan to reuse
        stepStatus.setNumPartitions(currentPlanSize);
    }

    @Override
    protected void invokeCoreStep() throws JobRestartException, JobStartException, JobExecutionAlreadyCompleteException, JobExecutionNotMostRecentException {

        this.plan = this.generatePartitionPlan();

        //persist the partition plan so on restart we have the same plan to reuse
        stepStatus.setNumPartitions(plan.getPartitions());
        calculateExecutionType();

        validateNumberOfPartitions();

        /* When true is specified, the partition count from the current run
         * is used and all results from past partitions are discarded. Any
         * resource cleanup or back out of work done in the previous run is the
         * responsibility of the application. The PartitionReducer artifact's
         * rollbackPartitionedStep method is invoked during restart before any
         * partitions begin processing to provide a cleanup hook.
         */
        if (executionType == ExecutionType.RESTART_OVERRIDE) {
            if (this.partitionReducerProxy != null) {
                try {
                    this.partitionReducerProxy.rollbackPartitionedStep();
                } catch (Exception e) {
                    ExceptionConfig.wrapBatchException(e);
                }
            }
        }

        //Set up a blocking queue to pick up collector data from a partitioned thread
        if (this.analyzerProxy != null) {
            this.analyzerStatusQueue = new LinkedBlockingQueue();
        }
        this.completedWorkQueue = new LinkedBlockingQueue();

        // Build all sub jobs from partitioned step
        buildSubJobBatchWorkUnits();

        // kick off the threads
        executeAndWaitForCompletion();

        // Deal with the results.
        checkCompletedWork();
    }

    private void buildSubJobBatchWorkUnits() throws JobRestartException, JobStartException, JobExecutionAlreadyCompleteException, JobExecutionNotMostRecentException {
        synchronized (subJobs) {
            //check if we've already issued a stop
            if (jobExecutionImpl.getJobContext().getBatchStatus().equals(BatchStatus.STOPPING)) {
                return;
            }

            for (int instance = 0; instance < partitions; instance++) {
                subJobs.add(PartitionedStepBuilder.buildPartitionSubJob(jobExecutionImpl.getInstanceId(), jobExecutionImpl.getJobContext(), step, instance));
            }

            PartitionsBuilderConfig config =
                    new PartitionsBuilderConfig(subJobs, partitionProperties, analyzerStatusQueue, completedWorkQueue, jobExecutionImpl.getExecutionId());
            // Then build all the subjobs but do not start them yet
            if (executionType == ExecutionType.RESTART_NORMAL) {
                parallelBatchWorkUnits = kernelService.buildOnRestartParallelPartitions(config, jobExecutionImpl.getJobContext(), stepContext);
            } else {
                // This case includes RESTART_OVERRIDE and RESTART_AFTER_COMPLETION.
                //
                // So we're just going to create new "subjob" job instances in the DB in these cases,
                // and we'll have to make sure we're dealing with the correct ones, say in a subsequent "normal" restart
                // (of the current execution which is itself a restart)
                parallelBatchWorkUnits = kernelService.buildNewParallelPartitions(config, jobExecutionImpl.getJobContext(), stepContext);
            }

            // NOTE:  At this point I might not have as many work units as I had partitions, since some may have already completed.
        }
    }

    private void executeAndWaitForCompletion() throws JobRestartException {

        if (jobExecutionImpl.getJobContext().getBatchStatus().equals(BatchStatus.STOPPING)) {
            return;
        }

        int numTotalForThisExecution = parallelBatchWorkUnits.size();
        this.numPreviouslyCompleted = partitions - numTotalForThisExecution;
        int numCurrentCompleted = 0;
        int numCurrentSubmitted = 0;

        // All partitions have already completed on a previous execution.
        if (numTotalForThisExecution == 0) {
          return;
        }

        //Start up to to the max num we are allowed from the num threads attribute
        for (int i = 0; i < this.threads && i < numTotalForThisExecution; i++, numCurrentSubmitted++) {
            final BatchWorkUnit workUnit = parallelBatchWorkUnits.get(i);
            if (stepStatus.getStartCount() > 1 && !plan.getPartitionsOverride()) {
                kernelService.restartGeneratedJob(workUnit);
            } else {
                kernelService.startGeneratedJob(workUnit);
            }
        }

        while (true) {
            try {
                if (analyzerProxy != null) {
                    PartitionDataWrapper dataWrapper = analyzerStatusQueue.take();
                    if (PartitionEventType.ANALYZE_COLLECTOR_DATA.equals(dataWrapper.getEventType())) {
                        try {
                            analyzerProxy.analyzeCollectorData(dataWrapper.getCollectorData());
                        } catch (Exception e) {
                            ExceptionConfig.wrapBatchException(e);
                        }
                        continue; // without being ready to submit another
                    } else if (PartitionEventType.ANALYZE_STATUS.equals(dataWrapper.getEventType())) {
                        try {
                            analyzerProxy.analyzeStatus(dataWrapper.getBatchstatus(), dataWrapper.getExitStatus());
                        } catch (Exception e) {
                            ExceptionConfig.wrapBatchException(e);
                        }
                        completedWork.add(completedWorkQueue.take());  // Shouldn't be a a long wait.
                    } else {
                        throw new IllegalStateException("Invalid partition state");
                    }
                } else {
                    // block until at least one thread has finished to
                    // submit more batch work. hold on to the finished work to look at later
                    completedWork.add(completedWorkQueue.take());
                }
            } catch (final InterruptedException e) {
                throw new BatchContainerRuntimeException(e);
            }

            numCurrentCompleted++;
            if (numCurrentCompleted < numTotalForThisExecution) {
                if (numCurrentSubmitted < numTotalForThisExecution) {
                    if (stepStatus.getStartCount() > 1) {
                        kernelService.startGeneratedJob(parallelBatchWorkUnits.get(numCurrentSubmitted++));
                    } else {
                        kernelService.restartGeneratedJob(parallelBatchWorkUnits.get(numCurrentSubmitted++));
                    }
                }
            } else {
                break;
            }
        }
    }

    private void checkCompletedWork() {
        /**
         * check the batch status of each subJob after it's done to see if we need to issue a rollback
         * start rollback if any have stopped or failed
         */
        boolean rollback = false;

        for (final BatchWorkUnit subJob : completedWork) {
            final List steps = persistenceManagerService.getStepExecutionsForJobExecution(subJob.getJobExecutionImpl().getExecutionId());
            if (steps.size() == 1) {
                for (final Metric metric : steps.iterator().next().getMetrics()) {
                    stepContext.getMetric(metric.getType()).incValueBy(metric.getValue());
                }
            }/* else {
                // TODO: possible?
            }*/

            final BatchStatus batchStatus = subJob.getJobExecutionImpl().getJobContext().getBatchStatus();
            if (batchStatus.equals(BatchStatus.FAILED)) {
                rollback = true;

                //Keep track of the failing status and throw an exception to propagate after the rest of the partitions are complete
                stepContext.setBatchStatus(BatchStatus.FAILED);
            }
        }

        //If rollback is false we never issued a rollback so we can issue a logicalTXSynchronizationBeforeCompletion
        //NOTE: this will get issued even in a subjob fails or stops if no logicalTXSynchronizationRollback method is provied
        //We are assuming that not providing a rollback was intentional
        if (rollback) {
            if (this.partitionReducerProxy != null) {
                try {
                    this.partitionReducerProxy.rollbackPartitionedStep();
                } catch (Exception e) {
                    ExceptionConfig.wrapBatchException(e);
                }
            }
            throw new BatchContainerRuntimeException("One or more partitions failed");
        } else {
            if (this.partitionReducerProxy != null) {
                try {
                    this.partitionReducerProxy.beforePartitionedStepCompletion();
                } catch (Exception e) {
                    ExceptionConfig.wrapBatchException(e);
                }
            }
        }
    }

    @Override
    protected void setupStepArtifacts() {
        InjectionReferences injectionRef = new InjectionReferences(jobExecutionImpl.getJobContext(), stepContext, null);
        this.stepListeners = jobExecutionImpl.getListenerFactory().getListeners(StepListener.class, step, injectionRef, jobExecutionImpl);

        final Analyzer analyzer = step.getPartition().getAnalyzer();
        if (analyzer != null) {
            final List propList = analyzer.getProperties() == null ? null : analyzer.getProperties().getPropertyList();
            injectionRef = new InjectionReferences(jobExecutionImpl.getJobContext(), stepContext, propList);
            analyzerProxy = ProxyFactory.createPartitionAnalyzerProxy(factory, analyzer.getRef(), injectionRef, jobExecutionImpl);
        }

        final org.apache.batchee.jaxb.PartitionReducer partitionReducer = step.getPartition().getReducer();
        if (partitionReducer != null) {
            final List propList = partitionReducer.getProperties() == null ? null : partitionReducer.getProperties().getPropertyList();
            injectionRef = new InjectionReferences(jobExecutionImpl.getJobContext(), stepContext, propList);
            partitionReducerProxy = ProxyFactory.createPartitionReducerProxy(factory, partitionReducer.getRef(), injectionRef, jobExecutionImpl);
        }

    }

    @Override
    protected void invokePreStepArtifacts() {

        if (stepListeners != null) {
            for (StepListener listenerProxy : stepListeners) {
                // Call beforeStep on all the step listeners
                try {
                    listenerProxy.beforeStep();
                } catch (Exception e) {
                    ExceptionConfig.wrapBatchException(e);
                }
            }
        }

        // Invoke the reducer before all parallel steps start (must occur
        // before mapper as well)
        if (this.partitionReducerProxy != null) {
            try {
                this.partitionReducerProxy.beginPartitionedStep();
            } catch (Exception e) {
                ExceptionConfig.wrapBatchException(e);
            }
        }

    }

    @Override
    protected void invokePostStepArtifacts() {
        // Invoke the reducer after all parallel steps are done
        if (this.partitionReducerProxy != null) {

            try {
                if ((BatchStatus.COMPLETED).equals(stepContext.getBatchStatus())) {
                    this.partitionReducerProxy.afterPartitionedStepCompletion(PartitionStatus.COMMIT);
                } else {
                    this.partitionReducerProxy.afterPartitionedStepCompletion(PartitionStatus.ROLLBACK);
                }
            } catch (Exception e) {
                ExceptionConfig.wrapBatchException(e);
            }
        }

        // Called in spec'd order, e.g. Sec. 11.7
        if (stepListeners != null) {
            for (StepListener listenerProxy : stepListeners) {
                // Call afterStep on all the step listeners
                try {
                    listenerProxy.afterStep();
                } catch (Exception e) {
                    ExceptionConfig.wrapBatchException(e);
                }
            }
        }
    }

    @Override
    protected void sendStatusFromPartitionToAnalyzerIfPresent() {
        // Since we're already on the main thread, there will never
        // be anything to do on this thread.  It's only on the partitioned
        // threads that there is something to send back.
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy