-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathTrain.py
89 lines (69 loc) · 2.34 KB
/
Train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import pygame
from CheckerGame import CheckerBoard, Piece
from CheckerAI import CheckerAI
from Transition import BoardTransition
from constants import HEIGHT, WIDTH, _P1PIECE, _P2PIECE, Q_TABLE_FILE, DEBUG_HEIGHT
TRAIN_EPOCH = 1000
# In milliseconds
TURN_TIME = 100
if __name__ == "__main__":
pygame.init()
clock = pygame.time.Clock()
window = pygame.display.set_mode((WIDTH, HEIGHT + DEBUG_HEIGHT))
ai = CheckerAI(Q_TABLE_FILE)
bt = BoardTransition()
TurnEvent = pygame.USEREVENT + 1
trainActive = True
currentEpoch = 0
totalRewardApplied = 0
while (currentEpoch < TRAIN_EPOCH and trainActive):
print("Epoch: ", currentEpoch)
pygame.time.set_timer(TurnEvent, TURN_TIME)
CB = CheckerBoard()
CB.initializeBoard()
gameActive = True
repeatedMoves = dict()
currentBoardEval = 0
while gameActive:
CB.drawBoard(window)
time = pygame.time.get_ticks()
for event in pygame.event.get():
if event.type == pygame.QUIT:
trainActive = False
gameActive = False
elif (event.type == TurnEvent):
# depth = int(180/(CB.player1NumPieces + CB.player2NumPieces + 36))
nextBestMove = ai.nextBestMove(CB, 2)
ai.linkVisitedBoard(nextBestMove)
# Game Draw logic
if (repeatedMoves.get(nextBestMove) is not None):
if (repeatedMoves[nextBestMove] >= 3):
print("Boards repeated resulted in a draw!")
totalRewardApplied += ai.applyQReward(0)
gameActive = False
else:
repeatedMoves[nextBestMove] += 1
else:
repeatedMoves[nextBestMove] = 1
CB < nextBestMove
currentBoardEval = ai.evaluateBoard(CB)
nextBoardStates = bt.getAllBoards(CB)
wonPlayer = CB.gameEnd(len(nextBoardStates))
if (wonPlayer == _P1PIECE):
print("player 1 won!")
totalRewardApplied += ai.applyQReward(wonPlayer)
gameActive = False
elif (wonPlayer == _P2PIECE):
print("player 2 won!")
totalRewardApplied += ai.applyQReward(wonPlayer)
gameActive = False
pygame.time.set_timer(TurnEvent, TURN_TIME)
# Monitoring Performance
# clock.tick()
# print(clock.get_fps())
CB.drawPieces(window)
CB.debug(window, currentBoardEval, "Current Board Evaluation")
pygame.display.update()
currentEpoch += 1
print("Average Reward per Episode over", currentEpoch, " episodes is: ", totalRewardApplied / currentEpoch)
pygame.quit()