All Downloads are FREE. Search and download functionalities are using the official Maven repository.

goal.tools.adapt.Learner Maven / Gradle / Ivy

The newest version!
/**
 * The GOAL Runtime Environment. Copyright (C) 2015 Koen Hindriks.
 *
 * This program is free software: you can redistribute it and/or modify it under
 * the terms of the GNU General Public License as published by the Free Software
 * Foundation, either version 3 of the License, or (at your option) any later
 * version.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
 * details.
 *
 * You should have received a copy of the GNU General Public License along with
 * this program. If not, see .
 */
package goal.tools.adapt;

import java.util.List;

import goal.core.agent.Agent;
import goal.core.executors.modules.AdaptiveModuleExecutor;
import goal.core.executors.stack.ActionComboStackExecutor;
import goal.tools.errorhandling.exceptions.GOALRunFailedException;
import languageTools.program.agent.Module;
import mentalState.MentalState;
import mentalState.MentalStateWithEvents;
import mentalState.converter.GOALMentalStateConverter;

/**
 * To make decisions in adaptive sections of the GOAL program the interpreter
 * consults a Learner. A Learner can make decisions based on past decisions and
 * rewards received from these decisions.
 * 
 * Learner connects GOAL {@link Module}s to {@link LearnerAlgorithm}s.
 * 
 * To do this, the learner has to
 * 
    *
  • map the current GOAL state (the current goals, beliefs, and focus) into a * state number (an Integer). This is supported by the * {@link GOALMentalStateConverter} *
  • Keep for each {@link Module} an instance of {@link LearnerAlgorithm} and * update it with every step that the {@link Agent} does. The actual coupling * from the agent happens in {@link AdaptiveModuleExecutor}. *
  • Save the learned information to a file till the agent is run again. *
      * * *

      Saving the learned information

      *

      * When the agent is died and {@link Learner#terminate(MentalStateWithEvents, * Double)()} is called, The learner then updates a .lrn file, a * .adaptive.out and a .lrn.txt file. the * .lrn file holds the saved learning from each run and should get * updated on each run. So at any time, if you killed the agent, you would have * saved the learning so far. We also allowed for the agent to start up with the * specified .lrn file (i.e., with prior learning). The * .lrn.txt file is more for users (students) so that they can get * some feedback on what was being learned (when coding, debugging). This file * could be shared/compared between students. The .adaptive.out is * the log of all states and actions and rewards from memory. */ public interface Learner { /** * Selects an action from the list of options. The Learner can make this * choice based on the current module, mental state and prior experiences. * * @param module * the current {@link ModuleID}. * @param ms * the current mental state. * @param options * the enabled actions from which the learner can choose an * action executor. This list should contain at least 1 element. * @return the selected action from the options. (null is NOT allowed). */ public abstract ActionComboStackExecutor act(ModuleID module, MentalStateWithEvents ms, List options); /** * Update the reward based on the last action taken in the previous state. * Note that this makes the Learner stateful: you can not learn just * arbitrary actions. This must be called after every * {@link #act(String, MentalStateWithEvents, List)} call, even if the * environment can not give a proper reward value * * @param module * the current {@link ModuleID}. * * @param ms * the current mental state * @param reward * the reward for executing the last action selected by * {@link #act(String, MentalState, List)}. */ public abstract void update(ModuleID module, MentalStateWithEvents ms, double reward); /** * Terminate all learning. * * @param ms * @param envReward * @throws GOALRunFailedException */ public abstract void terminate(MentalStateWithEvents ms, Double envReward) throws GOALRunFailedException; }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy