All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.ignite.ml.trainers.DatasetTrainer Maven / Gradle / Ivy

Go to download

Apache Ignite® is a Distributed Database For High-Performance Computing With In-Memory Speed.

There is a newer version: 2.15.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.ignite.ml.trainers;

import java.util.Map;
import org.apache.ignite.Ignite;
import org.apache.ignite.IgniteCache;
import org.apache.ignite.lang.IgniteBiPredicate;
import org.apache.ignite.ml.Model;
import org.apache.ignite.ml.dataset.DatasetBuilder;
import org.apache.ignite.ml.dataset.impl.cache.CacheBasedDatasetBuilder;
import org.apache.ignite.ml.dataset.impl.local.LocalDatasetBuilder;
import org.apache.ignite.ml.environment.LearningEnvironment;
import org.apache.ignite.ml.environment.logging.MLLogger;
import org.apache.ignite.ml.math.functions.IgniteBiFunction;
import org.apache.ignite.ml.math.primitives.vector.Vector;
import org.jetbrains.annotations.NotNull;

/**
 * Interface for trainers. Trainer is just a function which produces model from the data.
 *
 * @param  Type of a produced model.
 * @param  Type of a label.
 */
public abstract class DatasetTrainer {
    /** Learning Environment. */
    protected LearningEnvironment environment = LearningEnvironment.DEFAULT;

    /**
     * Trains model based on the specified data.
     *
     * @param datasetBuilder Dataset builder.
     * @param featureExtractor Feature extractor.
     * @param lbExtractor Label extractor.
     * @param  Type of a key in {@code upstream} data.
     * @param  Type of a value in {@code upstream} data.
     * @return Model.
     */
    public abstract  M fit(DatasetBuilder datasetBuilder, IgniteBiFunction featureExtractor,
        IgniteBiFunction lbExtractor);

    /**
     * Gets state of model in arguments, compare it with training parameters of trainer and if they are fit then
     * trainer updates model in according to new data and return new model. In other case trains new model.
     *
     * @param mdl Learned model.
     * @param datasetBuilder Dataset builder.
     * @param featureExtractor Feature extractor.
     * @param lbExtractor Label extractor.
     * @param  Type of a key in {@code upstream} data.
     * @param  Type of a value in {@code upstream} data.
     * @return Updated model.
     */
    public  M update(M mdl, DatasetBuilder datasetBuilder,
        IgniteBiFunction featureExtractor, IgniteBiFunction lbExtractor) {

        if(mdl != null) {
            if (checkState(mdl))
                return updateModel(mdl, datasetBuilder, featureExtractor, lbExtractor);
            else {
                environment.logger(getClass()).log(
                    MLLogger.VerboseLevel.HIGH,
                    "Model cannot be updated because of initial state of " +
                        "it doesn't corresponds to trainer parameters"
                );
            }
        }

        return fit(datasetBuilder, featureExtractor, lbExtractor);
    }

    /**
     * @param mdl Model.
     * @return true if current critical for training parameters correspond to parameters from last training.
     */
    protected abstract boolean checkState(M mdl);

    /**
     * Used on update phase when given dataset is empty.
     * If last trained model exist then method returns it. In other case throws IllegalArgumentException.
     *
     * @param lastTrainedMdl Model.
     */
    @NotNull protected M getLastTrainedModelOrThrowEmptyDatasetException(M lastTrainedMdl) {
        String msg = "Cannot train model on empty dataset";
        if (lastTrainedMdl != null) {
            environment.logger(getClass()).log(MLLogger.VerboseLevel.HIGH, msg);
            return lastTrainedMdl;
        } else
            throw new EmptyDatasetException();
    }

    /**
     * Gets state of model in arguments, update in according to new data and return new model.
     *
     * @param mdl Learned model.
     * @param datasetBuilder Dataset builder.
     * @param featureExtractor Feature extractor.
     * @param lbExtractor Label extractor.
     * @param  Type of a key in {@code upstream} data.
     * @param  Type of a value in {@code upstream} data.
     * @return Updated model.
     */
    protected abstract  M updateModel(M mdl, DatasetBuilder datasetBuilder,
        IgniteBiFunction featureExtractor, IgniteBiFunction lbExtractor);

    /**
     * Trains model based on the specified data.
     *
     * @param ignite Ignite instance.
     * @param cache Ignite cache.
     * @param featureExtractor Feature extractor.
     * @param lbExtractor Label extractor.
     * @param  Type of a key in {@code upstream} data.
     * @param  Type of a value in {@code upstream} data.
     * @return Model.
     */
    public  M fit(Ignite ignite, IgniteCache cache,
        IgniteBiFunction featureExtractor, IgniteBiFunction lbExtractor) {
        return fit(
            new CacheBasedDatasetBuilder<>(ignite, cache),
            featureExtractor,
            lbExtractor
        );
    }

    /**
     * Gets state of model in arguments, update in according to new data and return new model.
     *
     * @param mdl Learned model.
     * @param ignite Ignite instance.
     * @param cache Ignite cache.
     * @param featureExtractor Feature extractor.
     * @param lbExtractor Label extractor.
     * @param  Type of a key in {@code upstream} data.
     * @param  Type of a value in {@code upstream} data.
     * @return Updated model.
     */
    public  M update(M mdl, Ignite ignite, IgniteCache cache,
        IgniteBiFunction featureExtractor, IgniteBiFunction lbExtractor) {
        return update(
            mdl, new CacheBasedDatasetBuilder<>(ignite, cache),
            featureExtractor,
            lbExtractor
        );
    }

    /**
     * Trains model based on the specified data.
     *
     * @param ignite Ignite instance.
     * @param cache Ignite cache.
     * @param filter Filter for {@code upstream} data.
     * @param featureExtractor Feature extractor.
     * @param lbExtractor Label extractor.
     * @param  Type of a key in {@code upstream} data.
     * @param  Type of a value in {@code upstream} data.
     * @return Model.
     */
    public  M fit(Ignite ignite, IgniteCache cache, IgniteBiPredicate filter,
        IgniteBiFunction featureExtractor, IgniteBiFunction lbExtractor) {
        return fit(
            new CacheBasedDatasetBuilder<>(ignite, cache, filter),
            featureExtractor,
            lbExtractor
        );
    }

    /**
     * Gets state of model in arguments, update in according to new data and return new model.
     *
     * @param mdl Learned model.
     * @param ignite Ignite instance.
     * @param cache Ignite cache.
     * @param filter Filter for {@code upstream} data.
     * @param featureExtractor Feature extractor.
     * @param lbExtractor Label extractor.
     * @param  Type of a key in {@code upstream} data.
     * @param  Type of a value in {@code upstream} data.
     * @return Updated model.
     */
    public  M update(M mdl, Ignite ignite, IgniteCache cache, IgniteBiPredicate filter,
        IgniteBiFunction featureExtractor, IgniteBiFunction lbExtractor) {
        return update(
            mdl, new CacheBasedDatasetBuilder<>(ignite, cache, filter),
            featureExtractor,
            lbExtractor
        );
    }

    /**
     * Trains model based on the specified data.
     *
     * @param data Data.
     * @param parts Number of partitions.
     * @param featureExtractor Feature extractor.
     * @param lbExtractor Label extractor.
     * @param  Type of a key in {@code upstream} data.
     * @param  Type of a value in {@code upstream} data.
     * @return Model.
     */
    public  M fit(Map data, int parts, IgniteBiFunction featureExtractor,
        IgniteBiFunction lbExtractor) {
        return fit(
            new LocalDatasetBuilder<>(data, parts),
            featureExtractor,
            lbExtractor
        );
    }

    /**
     * Gets state of model in arguments, update in according to new data and return new model.
     *
     * @param mdl Learned model.
     * @param data Data.
     * @param parts Number of partitions.
     * @param featureExtractor Feature extractor.
     * @param lbExtractor Label extractor.
     * @param  Type of a key in {@code upstream} data.
     * @param  Type of a value in {@code upstream} data.
     * @return Updated model.
     */
    public  M update(M mdl, Map data, int parts, IgniteBiFunction featureExtractor,
        IgniteBiFunction lbExtractor) {
        return update(
            mdl, new LocalDatasetBuilder<>(data, parts),
            featureExtractor,
            lbExtractor
        );
    }

    /**
     * Trains model based on the specified data.
     *
     * @param data Data.
     * @param filter Filter for {@code upstream} data.
     * @param parts Number of partitions.
     * @param featureExtractor Feature extractor.
     * @param lbExtractor Label extractor.
     * @param  Type of a key in {@code upstream} data.
     * @param  Type of a value in {@code upstream} data.
     * @return Model.
     */
    public  M fit(Map data, IgniteBiPredicate filter, int parts,
        IgniteBiFunction featureExtractor,
        IgniteBiFunction lbExtractor) {
        return fit(
            new LocalDatasetBuilder<>(data, filter, parts),
            featureExtractor,
            lbExtractor
        );
    }

    /**
     * Gets state of model in arguments, update in according to new data and return new model.
     *
     * @param data Data.
     * @param filter Filter for {@code upstream} data.
     * @param parts Number of partitions.
     * @param featureExtractor Feature extractor.
     * @param lbExtractor Label extractor.
     * @param  Type of a key in {@code upstream} data.
     * @param  Type of a value in {@code upstream} data.
     * @return Updated model.
     */
    public  M update(M mdl, Map data, IgniteBiPredicate filter, int parts,
        IgniteBiFunction featureExtractor,
        IgniteBiFunction lbExtractor) {
        return update(
            mdl, new LocalDatasetBuilder<>(data, filter, parts),
            featureExtractor,
            lbExtractor
        );
    }

    /**
     * Sets learning Environment
     * @param environment Environment.
     */
    public void setEnvironment(LearningEnvironment environment) {
        this.environment = environment;
    }

    /**
     * EmptyDataset exception.
     */
    public static class EmptyDatasetException extends IllegalArgumentException {
        /** Serial version uid. */
        private static final long serialVersionUID = 6914650522523293521L;

        /**
         * Constructs an instance of EmptyDatasetException.
         */
        public EmptyDatasetException() {
            super("Cannot train model on empty dataset");
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy