All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.github.chen0040.rl.models.UtilityModel Maven / Gradle / Ivy

package com.github.chen0040.rl.models;

import com.github.chen0040.rl.utils.Vec;
import lombok.Getter;
import lombok.Setter;

import java.io.Serializable;


/**
 * @author xschen
 * 9/27/2015 0027.
 * Utility value of a state $U(s)$ is the expected long term reward of state $s$ given the sequence of reward and the optimal policy
 * Utility value $U(s)$ at state $s$ can be obtained by the Bellman equation
 * Bellman Equtation states that $U(s) = R(s) + \gamma * max_a \sum_{s'} T(s,a,s')U(s')$
 * where s' is the possible transitioned state given that action $a$ is applied at state $s$
 * where $T(s,a,s')$ is the transition probability of $s \rightarrow s'$ given that action $a$ is applied at state $s$
 * where $\sum_{s'} T(s,a,s')U(s')$ is the expected long term reward given that action $a$ is applied at state $s$
 * where $max_a \sum_{s'} T(s,a,s')U(s')$ is the maximum expected long term reward given that the chosen optimal action $a$ is applied at state $s$
 */
@Getter
@Setter
public class UtilityModel implements Serializable {
    private Vec U;
    private int stateCount;
    private int actionCount;

    public void setU(Vec U){
        this.U = U;
    }

    public Vec getU() {
        return U;
    }

    public double getU(int stateId){
        return U.get(stateId);
    }

    public int getStateCount() {
        return stateCount;
    }

    public int getActionCount() {
        return actionCount;
    }

    public UtilityModel(int stateCount, int actionCount, double initialU){
        this.stateCount = stateCount;
        this.actionCount = actionCount;
        U = new Vec(stateCount);
        U.setAll(initialU);
    }

    public UtilityModel(int stateCount, int actionCount){
        this(stateCount, actionCount, 0.1);
    }

    public UtilityModel(){

    }

    public void copy(UtilityModel rhs){
        U = rhs.U==null ? null : rhs.U.makeCopy();
        actionCount = rhs.actionCount;
        stateCount = rhs.stateCount;
    }

    public UtilityModel makeCopy(){
        UtilityModel clone = new UtilityModel();
        clone.copy(this);
        return clone;
    }

    @Override
    public boolean equals(Object rhs){
        if(rhs != null && rhs instanceof  UtilityModel){
            UtilityModel rhs2 = (UtilityModel)rhs;
            if(actionCount != rhs2.actionCount || stateCount != rhs2.stateCount) return false;

            if((U==null && rhs2.U!=null) && (U!=null && rhs2.U ==null)) return false;
            return !(U != null && !U.equals(rhs2.U));

        }
        return false;
    }

    public void reset(double initialU){
        U.setAll(initialU);
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy