com.barrybecker4.game.twoplayer.common.search.strategy.UCT_notes.txt Maven / Gradle / Ivy
/* same as on main page, but translated to java. See http://senseis.xmp.net/?UCT
* best = child with max number of visits (instead of max winrate)
* UCTK outside of sqrt(...) in uct formula
* random result non-global
*/
class Node {
public int wins=0;
public int visits=0;
public int x, y; // position of move
//public Node parent; //optional
public Node child;
public Node sibling;
public Node(/*Node parent, */int x, int y) {
this.x=x;
this.y=y;
}
public void update(int val) {
visits++;
wins+=val;
}
public double getWinRate() {
if (visits>0) return (double)wins / visits;
else return 0; /* should not happen */;
}
}
class Board {
public Node root=null;
// child with highest number of visits is used (not: best winrate)
public Node getBestChild(Node root) {
Node child = root.child;
Node best_child = null;
int best_visits= -1;
while (child!=null) { // for all children
if (child.visits>best_visits) {
best_child=child;
best_visits=child.visits;
}
child = child.sibling;
}
return best_child;
}
public static final double UCTK = 0.44; // 0.44 = sqrt(1/5)
// Larger values give uniform search
// Smaller values give very selective search
public Node UCTSelect(Node node) {
Node res=null;
Node next = node.child;
double best_uct=0;
while (next!=null) { // for all children
double uctvalue;
if (next.visits > 0) {
double winrate=next.getWinRate();
double uct = UCTK * Math.sqrt( Math.log(node.visits) / next.visits );
uctvalue = winrate + uct;
}
else {
// Always play a random unexplored move first
uctvalue = 10000 + 1000*Math.random();
}
if (uctvalue > best_uct) { // get max uctvalue of all children
best_uct = uctvalue;
res = next;
}
next = next.sibling;
}
return res;
}
// return 0=lose 1=win for current player to move
int playSimulation(Node n) {
int randomresult=0;
if (n.child==null && n.visits<10) { // 10 simulations until chilren are expanded (saves memory)
randomresult = playRandomGame();
}
else {
if (n.child == null)
createChildren(n);
Node next = UCTSelect(n); // select a move
if (next==null) { /* ERROR */ }
makeMove(next.x, next.y);
int res=playSimulation(next);
randomresult = 1-res;
}
n.update(1-randomresult); //update node (Node-wins are associated with moves in the Nodes)
return randomresult;
}
// generate a move, using the uct algorithm
Move UCTSearch(int numsim) {
root=new Node(-1,-1); //init uct tree
createChildren(root);
Board clone=new Board();
for (int i=0; i