I’m working on creating an AI that can play a variant of tic-tac-toe with 9 separate 3x3 grids. The game works like regular tic-tac-toe but your move position determines which grid your opponent must play in next. When any grid gets three in a row, that player wins the entire game.
I’ve been trying to use Monte Carlo Tree Search but my AI makes really bad moves. Even when it has two pieces in a row and can win immediately, it chooses different moves instead. Here’s my current implementation:
from math import log, sqrt
import random
import numpy as np
from copy import deepcopy
class MultiGridGame:
def __init__(self):
self.grids = np.zeros((10, 10), dtype="int8")
self.active_grid = 1
self.last_player = 2
def copy_state(self):
new_game = MultiGridGame()
new_game.last_player = self.last_player
new_game.active_grid = self.active_grid
new_game.grids = deepcopy(self.grids)
return new_game
def make_move(self, position):
self.last_player = 3 - self.last_player
if position >= 1 and position <= 9 and self.grids[self.active_grid][position] == 0:
self.grids[self.active_grid][position] = self.last_player
self.active_grid = position
def available_moves(self):
return [pos for pos in range(1, 10) if self.grids[self.active_grid][pos] == 0]
def check_winner(self, player):
for grid in self.grids:
win_patterns = [(1,2,3),(4,5,6),(7,8,9),(1,4,7),(2,5,8),(3,6,9),(1,5,9),(3,5,7)]
for (a,b,c) in win_patterns:
if grid[a] == grid[b] == grid[c] == player:
return 1.0
return 0.5 if not self.available_moves() else None
class SearchNode:
def __init__(self, move_made=None, parent_node=None, game_state=None):
self.move_made = move_made
self.parent_node = parent_node
self.child_nodes = []
self.win_count = 0
self.visit_count = 0
self.untried_moves = game_state.available_moves()
self.last_player = game_state.last_player
def select_best_child(self):
best = sorted(self.child_nodes,
key=lambda node: node.win_count/node.visit_count +
0.3 * sqrt(2*log(self.visit_count)/node.visit_count))[-1]
return best
def expand_tree(self, move, state):
new_node = SearchNode(move_made=move, parent_node=self, game_state=state)
self.untried_moves.remove(move)
self.child_nodes.append(new_node)
return new_node
def update_stats(self, result):
self.visit_count += 1
self.win_count += result
def run_mcts(initial_state, max_iterations):
root = SearchNode(game_state=initial_state)
for iteration in range(max_iterations):
current_node = root
current_state = initial_state.copy_state()
while not current_node.untried_moves and current_node.child_nodes:
current_node = current_node.select_best_child()
current_state.make_move(current_node.move_made)
if current_node.untried_moves:
random_move = random.choice(current_node.untried_moves)
current_state.make_move(random_move)
current_node = current_node.expand_tree(random_move, current_state)
while current_state.available_moves():
current_state.make_move(random.choice(current_state.available_moves()))
while current_node:
result = current_state.check_winner(current_node.last_player)
current_node.update_stats(result if result is not None else 0.5)
current_node = current_node.parent_node
return sorted(root.child_nodes, key=lambda node: node.visit_count)[-1].move_made
def play_game():
game = MultiGridGame()
while game.available_moves():
best_move = run_mcts(game, 1000)
print(f"AI chooses position: {best_move}")
game.make_move(best_move)
if __name__ == "__main__":
play_game()
The problem is my AI doesn’t recognize obvious winning moves or blocking moves. Should I add specific logic to check for immediate wins and blocks, or is there a way to improve the MCTS algorithm itself to make better decisions? Any suggestions on what might be wrong with my current approach?