All Downloads are FREE. Search and download functionalities are using the official Maven repository.

aima.core.probability.mdp.search.ValueIteration Maven / Gradle / Ivy

Go to download

AIMA-Java Core Algorithms from the book Artificial Intelligence a Modern Approach 3rd Ed.

The newest version!
package aima.core.probability.mdp.search;

import java.util.Map;
import java.util.Set;

import aima.core.agent.Action;
import aima.core.probability.mdp.MarkovDecisionProcess;
import aima.core.util.Util;

/**
 * Artificial Intelligence A Modern Approach (3rd Edition): page 653.
*
* *
 * function VALUE-ITERATION(mdp, ε) returns a utility function
 *   inputs: mdp, an MDP with states S, actions A(s), transition model P(s' | s, a),
 *             rewards R(s), discount γ
 *           ε the maximum error allowed in the utility of any state
 *   local variables: U, U', vectors of utilities for states in S, initially zero
 *                    δ the maximum change in the utility of any state in an iteration
 *                    
 *   repeat
 *       U <- U'; δ <- 0
 *       for each state s in S do
 *           U'[s] <- R(s) + γ  maxa ∈ A(s) Σs'P(s' | s, a) U[s']
 *           if |U'[s] - U[s]| > δ then δ <- |U'[s] - U[s]|
 *   until δ < ε(1 - γ)/γ
 *   return U
 * 
* * Figure 17.4 The value iteration algorithm for calculating utilities of * states. The termination condition is from Equation (17.8):
* *
 * if ||Ui+1 - Ui|| < ε(1 - γ)/γ then ||Ui+1 - U|| < ε
 * 
* * @param * the state type. * @param * the action type. * * @author Ciaran O'Reilly * @author Ravi Mohan * */ public class ValueIteration { // discount γ to be used. private double gamma = 0; /** * Constructor. * * @param gamma * discount γ to be used. */ public ValueIteration(double gamma) { if (gamma > 1.0 || gamma <= 0.0) { throw new IllegalArgumentException("Gamma must be > 0 and <= 1.0"); } this.gamma = gamma; } // function VALUE-ITERATION(mdp, ε) returns a utility function /** * The value iteration algorithm for calculating the utility of states. * * @param mdp * an MDP with states S, actions A(s),
* transition model P(s' | s, a), rewards R(s) * @param epsilon * the maximum error allowed in the utility of any state * @return a vector of utilities for states in S */ public Map valueIteration(MarkovDecisionProcess mdp, double epsilon) { // // local variables: U, U', vectors of utilities for states in S, // initially zero Map U = Util.create(mdp.states(), new Double(0)); Map Udelta = Util.create(mdp.states(), new Double(0)); // δ the maximum change in the utility of any state in an // iteration double delta = 0; // Note: Just calculate this once for efficiency purposes: // ε(1 - γ)/γ double minDelta = epsilon * (1 - gamma) / gamma; // repeat do { // U <- U'; δ <- 0 U.putAll(Udelta); delta = 0; // for each state s in S do for (S s : mdp.states()) { // maxa ∈ A(s) Set
actions = mdp.actions(s); // Handle terminal states (i.e. no actions). double aMax = 0; if (actions.size() > 0) { aMax = Double.NEGATIVE_INFINITY; } for (A a : actions) { // Σs'P(s' | s, a) U[s'] double aSum = 0; for (S sDelta : mdp.states()) { aSum += mdp.transitionProbability(sDelta, s, a) * U.get(sDelta); } if (aSum > aMax) { aMax = aSum; } } // U'[s] <- R(s) + γ // maxa ∈ A(s) Udelta.put(s, mdp.reward(s) + gamma * aMax); // if |U'[s] - U[s]| > δ then δ <- |U'[s] - U[s]| double aDiff = Math.abs(Udelta.get(s) - U.get(s)); if (aDiff > delta) { delta = aDiff; } } // until δ < ε(1 - γ)/γ } while (delta > minDelta); // return U return U; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy