All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.barrybecker4.game.twoplayer.common.search.strategy.UCT_notes.txt Maven / Gradle / Ivy

There is a newer version: 1.6
Show newest version
/* same as on main page, but translated to java. See http://senseis.xmp.net/?UCT
 * best = child with max number of visits (instead of max winrate)
 * UCTK outside of sqrt(...) in uct formula
 * random result non-global
 */
 class Node {
    public int wins=0;
    public int visits=0;
    public int x, y; // position of move
    //public Node parent; //optional
    public Node child;
    public Node sibling;

    public Node(/*Node parent, */int x, int y) {
      this.x=x;
      this.y=y;
    }

    public void update(int val) {
      visits++;
      wins+=val;
    }

    public double getWinRate() {
        if (visits>0) return (double)wins / visits;
                 else return 0; /* should not happen */;
    }
 }

 class Board {
    public Node root=null;

    // child with highest number of visits is used (not: best winrate)
    public Node getBestChild(Node root) {
        Node child = root.child;
        Node best_child = null;
        int  best_visits= -1;
        while (child!=null) { // for all children
            if (child.visits>best_visits) {
                best_child=child;
                best_visits=child.visits;
            }
            child = child.sibling;
        }
        return best_child;
    }

    public static final double UCTK = 0.44; // 0.44 = sqrt(1/5)
    // Larger values give uniform search
    // Smaller values give very selective search
    public Node UCTSelect(Node node) {
        Node res=null;
        Node next = node.child;
        double best_uct=0;
        while (next!=null) { // for all children
            double uctvalue;
            if (next.visits > 0) {
                double winrate=next.getWinRate();
                double uct = UCTK * Math.sqrt( Math.log(node.visits) / next.visits );
                uctvalue = winrate + uct;
            }
            else {
                // Always play a random unexplored move first
                uctvalue = 10000 + 1000*Math.random();
            }
            if (uctvalue > best_uct) { // get max uctvalue of all children
                    best_uct = uctvalue;
                    res = next;
            }
            next = next.sibling;
        }
        return res;
    }

    // return 0=lose 1=win for current player to move
    int playSimulation(Node n) {
        int randomresult=0;
        if (n.child==null && n.visits<10) { // 10 simulations until chilren are expanded (saves memory)
            randomresult = playRandomGame();
        }
        else {
            if (n.child == null)
                createChildren(n);
            Node next = UCTSelect(n); // select a move
            if (next==null) { /* ERROR */ }
            makeMove(next.x, next.y);
            int res=playSimulation(next);
            randomresult = 1-res;
        }
        n.update(1-randomresult); //update node (Node-wins are associated with moves in the Nodes)
        return randomresult;
    }

    // generate a move, using the uct algorithm
    Move UCTSearch(int numsim) {
        root=new Node(-1,-1); //init uct tree
        createChildren(root);
        Board clone=new Board();
        for (int i=0; i




© 2015 - 2024 Weber Informatics LLC | Privacy Policy