main.java.burlap.mdp.singleagent.common.GoalBasedRF Maven / Gradle / Ivy

Go to download

Show more of this group Show more artifacts with this name
Show all versions of burlap Show documentation

The Brown-UMBC Reinforcement Learning and Planning (BURLAP) Java code library is for the use and development of single or multi-agent planning and learning algorithms and domains to accompany them. The library uses a highly flexible state/observation representation where you define states with your own Java classes, enabling support for domains that discrete, continuous, relational, or anything else. Planning and learning algorithms range from classic forward search planning to value-function-based stochastic planning and learning algorithms.

The newest version!

package burlap.mdp.singleagent.common;

import burlap.mdp.auxiliary.stateconditiontest.StateConditionTest;
import burlap.mdp.auxiliary.stateconditiontest.TFGoalCondition;
import burlap.mdp.core.action.Action;
import burlap.mdp.core.TerminalFunction;
import burlap.mdp.core.state.State;
import burlap.mdp.singleagent.model.RewardFunction;


/**
 * A reward function implementation designed around goal conditions that are specified by a {@link burlap.mdp.auxiliary.stateconditiontest.StateConditionTest} object
 * or a {@link burlap.mdp.core.TerminalFunction}.
 * When the agent transition to a state marked as a goal state, it returns a goal reward. Otherwise a default reward is returned.
 * @author James MacGlashan
 *
 */
public class GoalBasedRF implements RewardFunction {

	protected StateConditionTest gc;
	protected double goalReward = 1.0;
	protected double defaultReward = 0.;
	
	/**
	 * Initializes with transitions to goal states returning a reward of 1 and all others returning 0
	 * @param gc {@link burlap.mdp.auxiliary.stateconditiontest.StateConditionTest} object that specifies goal states.
	 */
	public GoalBasedRF(StateConditionTest gc) {
		this.gc = gc;
	}

	
	/**
	 * Initializes with transitions to goal states returning the given reward and all others returning 0.
	 * @param gc {@link burlap.mdp.auxiliary.stateconditiontest.StateConditionTest} object that specifies goal states.
	 * @param goalReward the reward returned for transitions to goal states.
	 */
	public GoalBasedRF(StateConditionTest gc, double goalReward) {
		this.gc = gc;
		this.goalReward = goalReward;
	}
	
	
	/**
	 * Initializes with transitions to goal states returning the given reward and all others returning 0.
	 * @param gc {@link burlap.mdp.auxiliary.stateconditiontest.StateConditionTest} object that specifies goal states.
	 * @param goalReward the reward returned for transitions to goal states.
	 * @param defaultReward the default reward returned for all non-goal state transitions.
	 */
	public GoalBasedRF(StateConditionTest gc, double goalReward, double defaultReward) {
		this.gc = gc;
		this.goalReward = goalReward;
		this.defaultReward = defaultReward;
	}
	
	
	
	
	/**
	 * Initializes with transitions to goal states, indicated by the terminal function, returning a reward of 1 and all others returning 0
	 * @param tf {@link TerminalFunction} object that specifies goal states. 
	 */
	public GoalBasedRF(TerminalFunction tf) {
		this(new TFGoalCondition(tf));
	}

	
	/**
	 * Initializes with transitions to goal states, indicated by the terminal function, returning the given reward and all others returning 0.
	 * @param tf {@link TerminalFunction} object that specifies goal states.
	 * @param goalReward the reward returned for transitions to goal states.
	 */
	public GoalBasedRF(TerminalFunction tf, double goalReward) {
		this(new TFGoalCondition(tf), goalReward);
	}
	
	
	/**
	 * Initializes with transitions to goal states, indicated by the terminal function, returning the given reward and all others returning 0.
	 * @param tf {@link TerminalFunction} object that specifies goal states.
	 * @param goalReward the reward returned for transitions to goal states.
	 * @param defaultReward the default reward returned for all non-goal state transitions.
	 */
	public GoalBasedRF(TerminalFunction tf, double goalReward, double defaultReward) {
		this(new TFGoalCondition(tf), goalReward, defaultReward);
	}
	
	
	/**
	 * Returns the goal condition for this reward function.
	 * @return the goal condition for this reward function.
	 */
	public StateConditionTest getGoalCondition(){
		return this.gc;
	}


	public double getGoalReward() {
		return goalReward;
	}

	public void setGoalReward(double goalReward) {
		this.goalReward = goalReward;
	}

	public double getDefaultReward() {
		return defaultReward;
	}

	public void setDefaultReward(double defaultReward) {
		this.defaultReward = defaultReward;
	}

	@Override
	public double reward(State s, Action a, State sprime) {
		
		if(gc.satisfies(sprime)){
			return goalReward;
		}
		
		return defaultReward;
	}

}