main.java.burlap.domain.singleagent.gridworld.GridWorldRewardFunction Maven / Gradle / Ivy

Go to download

Show more of this group Show more artifacts with this name
Show all versions of burlap Show documentation

The Brown-UMBC Reinforcement Learning and Planning (BURLAP) Java code library is for the use and development of single or multi-agent planning and learning algorithms and domains to accompany them. The library uses a highly flexible state/observation representation where you define states with your own Java classes, enabling support for domains that discrete, continuous, relational, or anything else. Planning and learning algorithms range from classic forward search planning to value-function-based stochastic planning and learning algorithms.

The newest version!

package burlap.domain.singleagent.gridworld;

import burlap.domain.singleagent.gridworld.state.GridWorldState;
import burlap.mdp.core.action.Action;
import burlap.mdp.core.state.State;
import burlap.mdp.singleagent.model.RewardFunction;


/**
 * This class is used for defining reward functions in grid worlds that are a function of cell of the world to which
 * the agent transitions. That is, a double matrix (called rewardMatrix) the size of the grid world is stored. In an agent transitions
 * to cell x,y, then they will receive the double value stored in rewardMatrix[x][y]. The rewards returned for transitioning to an agent position
 * may be set with the {@link #setReward(int, int, double)} method.
 * 
 * This reward function is useful for simple grid worlds without any location objects or worlds for which the rewards are independent
 * of location objects. An alternative to this class is to define worlds with location objects and use the atLocation propositional function
 * and location types to define rewards.
 * @author James MacGlashan
 *
 */
public class GridWorldRewardFunction implements RewardFunction {

	protected double [][] rewardMatrix;
	protected int width;
	protected int height;
	
	
	/**
	 * Initializes the reward function for a grid world of size width and height and initializes the reward values everywhere to initializingReward.
	 * The reward returned from specific agent positions may be changed with the {@link #setReward(int, int, double)} method.
	 * @param width the width of the grid world
	 * @param height the height of the grid world
	 * @param initializingReward the reward to which all agent position transitions are initialized to return.
	 */
	public GridWorldRewardFunction(int width, int height, double initializingReward){
		this.initialize(width, height, initializingReward);
	}
	
	
	/**
	 * Initializes the reward function for a grid world of size width and height and initializes the reward values everywhere to 0.
	 * The reward returned from specific agent positions may be changed with the {@link #setReward(int, int, double)} method.
	 * @param width the width of the grid world
	 * @param height the height of the grid world
	 */
	public GridWorldRewardFunction(int width, int height){
		this(width, height, 0.);
	}

	
	
	/**
	 * Initializes the reward matrix.
	 * @param width the width of the grid world
	 * @param height the height of the grid world
	 * @param initializingReward the reward to which all agent position transitions are initialized to return.
	 */
	protected void initialize(int width, int height, double initializingReward){
		this.rewardMatrix = new double[width][height];
		this.width = width;
		this.height = height;
		for(int i = 0; i < this.width; i++){
			for(int j = 0; j < this.height; j++){
				this.rewardMatrix[i][j] = initializingReward;
			}
		}
	}
	
	/**
	 * Returns the reward matrix this reward function uses. Changes to the returned matrix *will* change this reward function.
	 * rewardMatrix[x][y] specifies the reward the agent will receive for transitioning to position x,y.
	 * @return the reward matrix this reward function uses
	 */
	public double [][] getRewardMatrix(){
		return this.rewardMatrix;
	}
	
	/**
	 * Sets the reward the agent will receive to transitioning to position x, y
	 * @param x the x position
	 * @param y the y position
	 * @param r the reward the agent will receive to transitioning to position x, y
	 */
	public void setReward(int x, int y, double r){
		this.rewardMatrix[x][y] = r;
	}
	
	/**
	 * Returns the reward this reward function will return when the agent transitions to position x, y.
	 * @param x the x position
	 * @param y the y position
	 * @return the reward this reward function will return when the agent transitions to position x, y.
	 */
	public double getRewardForTransitionsTo(int x, int y){
		return this.rewardMatrix[x][y];
	}
	
	@Override
	public double reward(State s, Action a, State sprime) {

		int x = ((GridWorldState)s).agent.x;
		int y = ((GridWorldState)s).agent.y;
		
		if(x >= this.width || x < 0 || y >= this.height || y < 0){
			throw new RuntimeException("GridWorld reward matrix is only defined for a " + this.width + "x" + 
					this.height +" world, but the agent transitioned to position (" + x + "," + y + "), which is outside the bounds.");
		}
		
		double r = this.rewardMatrix[x][y];
		return r;
	}

}