All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.deeplearning4j.parallelism.main.ParallelWrapperMain Maven / Gradle / Ivy

/*
 *  ******************************************************************************
 *  *
 *  *
 *  * This program and the accompanying materials are made available under the
 *  * terms of the Apache License, Version 2.0 which is available at
 *  * https://www.apache.org/licenses/LICENSE-2.0.
 *  *
 *  *  See the NOTICE file distributed with this work for additional
 *  *  information regarding copyright ownership.
 *  * Unless required by applicable law or agreed to in writing, software
 *  * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 *  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 *  * License for the specific language governing permissions and limitations
 *  * under the License.
 *  *
 *  * SPDX-License-Identifier: Apache-2.0
 *  *****************************************************************************
 */

package org.deeplearning4j.parallelism.main;

import com.beust.jcommander.JCommander;
import com.beust.jcommander.Parameter;
import com.beust.jcommander.ParameterException;
import lombok.Data;
import lombok.extern.slf4j.Slf4j;
import org.deeplearning4j.config.DL4JClassLoading;
import org.deeplearning4j.core.storage.StatsStorageRouter;
import org.deeplearning4j.core.storage.impl.RemoteUIStatsStorageRouter;
import org.deeplearning4j.nn.api.Model;
import org.deeplearning4j.optimize.api.TrainingListener;
import org.deeplearning4j.parallelism.ParallelWrapper;
import org.deeplearning4j.core.util.ModelGuesser;
import org.deeplearning4j.util.ModelSerializer;
import org.nd4j.linalg.dataset.api.iterator.DataSetIterator;
import org.nd4j.linalg.dataset.api.iterator.MultiDataSetIterator;

import java.io.File;

@Data
@Slf4j
public class ParallelWrapperMain {
    @Parameter(names = {"--modelPath"}, description = "Path to the model", arity = 1, required = true)
    private String modelPath = null;
    @Parameter(names = {"--workers"}, description = "Number of workers", arity = 1)
    private int workers = 2;
    @Parameter(names = {"--prefetchSize"}, description = "The number of datasets to prefetch", arity = 1)
    private int prefetchSize = 16;
    @Parameter(names = {"--averagingFrequency"}, description = "The frequency for averaging parameters", arity = 1)
    private int averagingFrequency = 1;
    @Parameter(names = {"--reportScore"}, description = "The subcommand to run", arity = 1)
    private boolean reportScore = false;
    @Parameter(names = {"--averageUpdaters"}, description = "Whether to average updaters", arity = 1)
    private boolean averageUpdaters = true;
    @Parameter(names = {"--legacyAveraging"}, description = "Whether to use legacy averaging", arity = 1)
    private boolean legacyAveraging = true;
    @Parameter(names = {"--dataSetIteratorFactoryClazz"},
                    description = "The fully qualified class name of the multi data set iterator class to use.",
                    arity = 1)
    private String dataSetIteratorFactoryClazz = null;
    @Parameter(names = {"--multiDataSetIteratorFactoryClazz"},
                    description = "The fully qualified class name of the multi data set iterator class to use.",
                    arity = 1)
    private String multiDataSetIteratorFactoryClazz = null;
    @Parameter(names = {"--modelOutputPath"},
                    description = "The fully qualified class name of the multi data set iterator class to use.",
                    arity = 1, required = true)
    private String modelOutputPath = null;
    @Parameter(names = {"--uiUrl"}, description = "The host:port of the ui to use (optional)", arity = 1)
    private String uiUrl = null;


    private RemoteUIStatsStorageRouter remoteUIRouter;
    private ParallelWrapper wrapper;

    public static void main(String[] args) throws Exception {
        new ParallelWrapperMain().runMain(args);
    }

    public void runMain(String... args) throws Exception {
        JCommander jcmdr = new JCommander(this);

        try {
            jcmdr.parse(args);
        } catch (ParameterException e) {
            System.err.println(e.getMessage());
            //User provides invalid input -> print the usage info
            jcmdr.usage();
            try {
                Thread.sleep(500);
            } catch (Exception e2) {
            }
            System.exit(1);
        }

        run();

    }


    public void run() throws Exception {

        Model model = ModelGuesser.loadModelGuess(modelPath);
        // ParallelWrapper will take care of load balancing between GPUs.
        wrapper = new ParallelWrapper.Builder(model)
                        // DataSets prefetching options. Set this value with respect to number of actual devices
                        .prefetchBuffer(prefetchSize)

                        // set number of workers equal or higher then number of available devices. x1-x2 are good values to start with
                        .workers(workers)

                        // rare averaging improves performance, but might reduce model accuracy
                        .averagingFrequency(averagingFrequency).averageUpdaters(averageUpdaters)

                        // if set to TRUE, on every averaging model score will be reported
                        .reportScoreAfterAveraging(reportScore)

                        // optional parameter, set to false ONLY if your system has support P2P memory access across PCIe (hint: AWS do not support P2P)
                        //.useLegacyAveraging(legacyAveraging)

                        .build();

        if (dataSetIteratorFactoryClazz != null) {
            DataSetIteratorProviderFactory dataSetIteratorProviderFactory = DL4JClassLoading
                    .createNewInstance(dataSetIteratorFactoryClazz);

            DataSetIterator dataSetIterator = dataSetIteratorProviderFactory.create();
            if (uiUrl != null) {
                // it's important that the UI can report results from parallel training
                // there's potential for StatsListener to fail if certain properties aren't set in the model
                StatsStorageRouter remoteUIRouter = new RemoteUIStatsStorageRouter("http://" + uiUrl);
                TrainingListener trainingListener = DL4JClassLoading.createNewInstance(
                        "org.deeplearning4j.ui.model.stats.StatsListener",
                        StatsStorageRouter.class,
                        new Class[] { StatsStorageRouter.class },
                        new Object[] { null });
                wrapper.setListeners(remoteUIRouter, trainingListener);

            }
            wrapper.fit(dataSetIterator);
            ModelSerializer.writeModel(model, new File(modelOutputPath), true);
        } else if (multiDataSetIteratorFactoryClazz != null) {
            MultiDataSetProviderFactory multiDataSetProviderFactory = DL4JClassLoading
                    .createNewInstance(multiDataSetIteratorFactoryClazz);

            MultiDataSetIterator iterator = multiDataSetProviderFactory.create();
            if (uiUrl != null) {
                // it's important that the UI can report results from parallel training
                // there's potential for StatsListener to fail if certain properties aren't set in the model
                remoteUIRouter = new RemoteUIStatsStorageRouter("http://" + uiUrl);
                TrainingListener trainingListener = DL4JClassLoading
                        .createNewInstance(
                                "org.deeplearning4j.ui.model.stats.StatsListener",
                                TrainingListener.class,
                                new Class[]{ StatsStorageRouter.class },
                                new Object[]{ null });
                wrapper.setListeners(remoteUIRouter, trainingListener);

            }
            wrapper.fit(iterator);
            ModelSerializer.writeModel(model, new File(modelOutputPath), true);
        } else {
            throw new IllegalStateException("Please provide a datasetiteraator or multi datasetiterator class");
        }
    }

    /**
     * Stop the ParallelWrapper main. Mainly used for testing purposes
     */
    public void stop(){
        if(remoteUIRouter != null){
            remoteUIRouter.shutdown();
        }

        if(wrapper != null){
            try {
                wrapper.close();
            } catch (Throwable t){
                log.warn("ParallelWrapperMain.close(): Exception encountered trying to close ParallelWrapper instance", t);
                throw new RuntimeException(t);
            }
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy