aima.core.probability.mdp.MarkovDecisionProcess Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of aima-core Show documentation
Show all versions of aima-core Show documentation
AIMA-Java Core Algorithms from the book Artificial Intelligence a Modern Approach 3rd Ed.
package aima.core.probability.mdp;
import java.util.Set;
import aima.core.agent.Action;
/**
* Artificial Intelligence A Modern Approach (3rd Edition): page 647.
*
*
* A sequential decision problem for a fully observable, stochastic environment
* with a Markovian transition model and additive rewards is called a Markov
* decision process, or MDP, and consists of a set of states (with an
* initial state s0; a set ACTIONS(s) of actions in each state; a
* transition model P(s' | s, a); and a reward function R(s).
*
* Note: Some definitions of MDPs allow the reward to depend on the
* action and outcome too, so the reward function is R(s, a, s'). This
* simplifies the description of some environments but does not change the
* problem in any fundamental way.
*
* @param
* the state type.
* @param
* the action type.
*
* @author Ciaran O'Reilly
* @author Ravi Mohan
*
*/
public interface MarkovDecisionProcess {
/**
* Get the set of states associated with the Markov decision process.
*
* @return the set of states associated with the Markov decision process.
*/
Set states();
/**
* Get the initial state s0 for this instance of a Markov
* decision process.
*
* @return the initial state s0.
*/
S getInitialState();
/**
* Get the set of actions for state s.
*
* @param s
* the state.
* @return the set of actions for state s.
*/
Set actions(S s);
/**
* Return the probability of going from state s using action a to s' based
* on the underlying transition model P(s' | s, a).
*
* @param sDelta
* the state s' being transitioned to.
* @param s
* the state s being transitions from.
* @param a
* the action used to move from state s to s'.
* @return the probability of going from state s using action a to s'.
*/
double transitionProbability(S sDelta, S s, A a);
/**
* Get the reward associated with being in state s.
*
* @param s
* the state whose award is sought.
* @return the reward associated with being in state s.
*/
double reward(S s);
}