aima.core.probability.mdp.MarkovDecisionProcess Maven / Gradle / Ivy

package aima.core.probability.mdp;

import java.util.Set;

import aima.core.agent.Action;

/**
 * Artificial Intelligence A Modern Approach (3rd Edition): page 647.

 * 

 * 
 * A sequential decision problem for a fully observable, stochastic environment
 * with a Markovian transition model and additive rewards is called a Markov
 * decision process, or MDP, and consists of a set of states (with an
 * initial state s₀; a set ACTIONS(s) of actions in each state; a
 * transition model P(s' | s, a); and a reward function R(s).

 * 

 * Note: Some definitions of MDPs allow the reward to depend on the
 * action and outcome too, so the reward function is R(s, a, s'). This
 * simplifies the description of some environments but does not change the
 * problem in any fundamental way.
 * 
 * @param 
 *            the state type.
 * @param 
 *            the action type.
 * 
 * @author Ciaran O'Reilly
 * @author Ravi Mohan
 * 
 */
public interface MarkovDecisionProcess {

	/**
	 * Get the set of states associated with the Markov decision process.
	 * 
	 * @return the set of states associated with the Markov decision process.
	 */
	Set states();

	/**
	 * Get the initial state s₀ for this instance of a Markov
	 * decision process.
	 * 
	 * @return the initial state s₀.
	 */
	S getInitialState();

	/**
	 * Get the set of actions for state s.
	 * 
	 * @param s
	 *            the state.
	 * @return the set of actions for state s.
	 */
	Set actions(S s);

	/**
	 * Return the probability of going from state s using action a to s' based
	 * on the underlying transition model P(s' | s, a).
	 * 
	 * @param sDelta
	 *            the state s' being transitioned to.
	 * @param s
	 *            the state s being transitions from.
	 * @param a
	 *            the action used to move from state s to s'.
	 * @return the probability of going from state s using action a to s'.
	 */
	double transitionProbability(S sDelta, S s, A a);

	/**
	 * Get the reward associated with being in state s.
	 * 
	 * @param s
	 *            the state whose award is sought.
	 * @return the reward associated with being in state s.
	 */
	double reward(S s);
}