All Downloads are FREE. Search and download functionalities are using the official Maven repository.

aima.core.learning.reinforcement.example.CellWorldEnvironment Maven / Gradle / Ivy

Go to download

AIMA-Java Core Algorithms from the book Artificial Intelligence a Modern Approach 3rd Ed.

The newest version!
package aima.core.learning.reinforcement.example;

import java.util.LinkedHashSet;
import java.util.Set;

import aima.core.agent.Action;
import aima.core.agent.Agent;
import aima.core.agent.Percept;
import aima.core.agent.impl.AbstractEnvironment;
import aima.core.environment.cellworld.Cell;
import aima.core.environment.cellworld.CellWorldAction;
import aima.core.probability.mdp.TransitionProbabilityFunction;
import aima.core.util.Randomizer;

/**
 * Implementation of the Cell World Environment, supporting the execution of
 * trials for reinforcement learning agents.
 * 
 * @author Ciaran O'Reilly
 * 
 */
public class CellWorldEnvironment extends AbstractEnvironment {
	private Cell startingCell = null;
	private Set> allStates = new LinkedHashSet>();
	private TransitionProbabilityFunction, CellWorldAction> tpf;
	private Randomizer r = null;
	private CellWorldEnvironmentState currentState = new CellWorldEnvironmentState();

	/**
	 * Constructor.
	 * 
	 * @param startingCell
	 *            the cell that agent(s) are to start from at the beginning of
	 *            each trial within the environment.
	 * @param allStates
	 *            all the possible states in this environment.
	 * @param tpf
	 *            the transition probability function that simulates how the
	 *            environment is meant to behave in response to an agent action.
	 * @param r
	 *            a Randomizer used to sample actions that are actually to be
	 *            executed based on the transition probabilities for actions.
	 */
	public CellWorldEnvironment(Cell startingCell,
			Set> allStates,
			TransitionProbabilityFunction, CellWorldAction> tpf,
			Randomizer r) {
		this.startingCell = startingCell;
		this.allStates.addAll(allStates);
		this.tpf = tpf;
		this.r = r;
	}

	/**
	 * Execute N trials.
	 * 
	 * @param n
	 *            the number of trials to execute.
	 */
	public void executeTrials(int n) {
		for (int i = 0; i < n; i++) {
			executeTrial();
		}
	}

	/**
	 * Execute a single trial.
	 */
	public void executeTrial() {
		currentState.reset();
		for (Agent a : agents) {
			a.setAlive(true);
			currentState.setAgentLocation(a, startingCell);
		}
		stepUntilDone();
	}

	@Override
	public void executeAction(Agent agent, Action action) {
		if (!action.isNoOp()) {
			Cell s = currentState.getAgentLocation(agent);
			double probabilityChoice = r.nextDouble();
			double total = 0;
			boolean set = false;
			for (Cell sDelta : allStates) {
				total += tpf.probability(sDelta, s, (CellWorldAction) action);
				if (total > 1.0) {
					throw new IllegalStateException("Bad probability calculation.");
				}
				if (total > probabilityChoice) {
					currentState.setAgentLocation(agent, sDelta);
					set = true;
					break;
				}
			}
			if (!set) {
				throw new IllegalStateException("Failed to simulate the action="+action+" correctly from s="+s);
			}
		}
	}

	@Override
	public Percept getPerceptSeenBy(Agent anAgent) {
		return currentState.getPerceptFor(anAgent);
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy