aima.core.probability.example.MDPFactory Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of aima-core Show documentation
Show all versions of aima-core Show documentation
AIMA-Java Core Algorithms from the book Artificial Intelligence a Modern Approach 3rd Ed.
The newest version!
package aima.core.probability.example;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import aima.core.environment.cellworld.Cell;
import aima.core.environment.cellworld.CellWorld;
import aima.core.environment.cellworld.CellWorldAction;
import aima.core.probability.mdp.ActionsFunction;
import aima.core.probability.mdp.MarkovDecisionProcess;
import aima.core.probability.mdp.RewardFunction;
import aima.core.probability.mdp.TransitionProbabilityFunction;
import aima.core.probability.mdp.impl.MDP;
/**
*
* @author Ciaran O'Reilly
* @author Ravi Mohan
*/
public class MDPFactory {
/**
* Constructs an MDP that can be used to generate the utility values
* detailed in Fig 17.3.
*
* @param cw
* the cell world from figure 17.1.
* @return an MDP that can be used to generate the utility values detailed
* in Fig 17.3.
*/
public static MarkovDecisionProcess, CellWorldAction> createMDPForFigure17_3(
final CellWorld cw) {
return new MDP, CellWorldAction>(cw.getCells(),
cw.getCellAt(1, 1), createActionsFunctionForFigure17_1(cw),
createTransitionProbabilityFunctionForFigure17_1(cw),
createRewardFunctionForFigure17_1());
}
/**
* Returns the allowed actions from a specified cell within the cell world
* described in Fig 17.1.
*
* @param cw
* the cell world from figure 17.1.
* @return the set of actions allowed at a particular cell. This set will be
* empty if at a terminal state.
*/
public static ActionsFunction, CellWorldAction> createActionsFunctionForFigure17_1(
final CellWorld cw) {
final Set> terminals = new HashSet>();
terminals.add(cw.getCellAt(4, 3));
terminals.add(cw.getCellAt(4, 2));
ActionsFunction, CellWorldAction> af = new ActionsFunction, CellWorldAction>() {
@Override
public Set actions(Cell s) {
// All actions can be performed in each cell
// (except terminal states)
if (terminals.contains(s)) {
return Collections.emptySet();
}
return CellWorldAction.actions();
}
};
return af;
}
/**
* Figure 17.1 (b) Illustration of the transition model of the environment:
* the 'intended' outcome occurs with probability 0.8, but with probability
* 0.2 the agent moves at right angles to the intended direction. A
* collision with a wall results in no movement.
*
* @param cw
* the cell world from figure 17.1.
* @return the transition probability function as described in figure 17.1.
*/
public static TransitionProbabilityFunction, CellWorldAction> createTransitionProbabilityFunctionForFigure17_1(
final CellWorld cw) {
TransitionProbabilityFunction, CellWorldAction> tf = new TransitionProbabilityFunction, CellWorldAction>() {
private double[] distribution = new double[] { 0.8, 0.1, 0.1 };
@Override
public double probability(Cell sDelta, Cell s,
CellWorldAction a) {
double prob = 0;
List> outcomes = possibleOutcomes(s, a);
for (int i = 0; i < outcomes.size(); i++) {
if (sDelta.equals(outcomes.get(i))) {
// Note: You have to sum the matches to
// sDelta as the different actions
// could have the same effect (i.e.
// staying in place due to there being
// no adjacent cells), which increases
// the probability of the transition for
// that state.
prob += distribution[i];
}
}
return prob;
}
private List> possibleOutcomes(Cell c,
CellWorldAction a) {
// There can be three possible outcomes for the planned action
List> outcomes = new ArrayList>();
outcomes.add(cw.result(c, a));
outcomes.add(cw.result(c, a.getFirstRightAngledAction()));
outcomes.add(cw.result(c, a.getSecondRightAngledAction()));
return outcomes;
}
};
return tf;
}
/**
*
* @return the reward function which takes the content of the cell as being
* the reward value.
*/
public static RewardFunction> createRewardFunctionForFigure17_1() {
RewardFunction> rf = new RewardFunction>() {
@Override
public double reward(Cell s) {
return s.getContent();
}
};
return rf;
}
}
| | | | | | | | | | | | | | | | |