First implementation of new steps

supersimple33 · Apr 16, 2023 · 6ef7bed · 6ef7bed
1 parent b2a966a
commit 6ef7bed
Show file tree

Hide file tree

Showing 4 changed files with 143 additions and 50 deletions.
diff --git a/battleship_envs/envs/battleship1_env.py b/battleship_envs/envs/battleship1_env.py
@@ -3,7 +3,7 @@
 
 import numpy as np
 
-from shared import Space, setupShips
+from shared import Space, setup_ships
 
 #game code
 class Battleship1(gym.Env):
@@ -77,7 +77,7 @@ def step(self, target):
         targetSpace = self.state[y][x]
         self.reward = False
         # hit = False
-        if self.done == True:
+        if self.done:
             # print("Game Over")
             return self.hidState, self.reward, self.done, self.expectedShots #check return
         else:
@@ -140,7 +140,7 @@ def step(self, target):
     def reset(self, seed=None):
         self.seed = self.new_seed(seed)
 
-        self.state = setupShips(self.np_random)
+        self.state = setup_ships(self.np_random)
         self.hidState.fill(Space.Empty)
         self.expectedShots = np.copy(np.reshape(self.state, (100)))
 
@@ -174,14 +174,20 @@ def new_seed(self, seed=None):
 
 import timeit
 env = Battleship1()
+for i in range(100):
+    env.step(i)
+
 avg_list = []
 for i in range(0,20):
     print(i)
-    L = timeit.timeit('env.reset()', globals=globals(), number = 10000)
-    # L = timeit.timeit(setup='env.reset()', stmt='for i in range(0,99): env.step(i); env.reset()', globals=globals(), number = 100) #2.73
-    avg_list.append(L)
+    # L = [timeit.timeit('env.reset()', globals=globals(), number = 10000)]
+    L = timeit.repeat(setup='env.reset(); i=0', stmt='env.step(i); i += 1', globals=globals(), number = 100, repeat = 5000) #2.73
+    avg_list.append(sum(L))
 print("mean: ", sum(avg_list)/len(avg_list), "std_dev: ", np.std(avg_list))
 print(avg_list)
 
 # mean:  2.6814786437500002 std_dev:  0.028973936597862637
-# mean:  2.6619473125999997 std_dev:  0.019689577780807894
+# mean:  2.6619473125999997 std_dev:  0.019689577780807894
+
+# mean:  1.15642401880003 std_dev:  0.02141613605211344
+# mean:  1.171778880150033 std_dev:  0.0151238295477036
diff --git a/battleship_envs/envs/battleship3_env.py b/battleship_envs/envs/battleship3_env.py
@@ -6,7 +6,12 @@
 import gymnasium as gym
 from gymnasium import spaces
 
-from shared import Space, setupShips
+from shared import Space, setup_ships, hidden_spaces, hit_spaces, sunk_spaces
+
+# CAPS
+HIT_SWAPS = {Space.HiddenFive: Space.HitPFive, Space.HiddenFour: Space.HitPFour, Space.HiddenCruiser: Space.HitPCruiser, Space.HiddenSub: Space.HitPSub, Space.HiddenTwo: Space.HitPTwo}
+SUNK_SWAPS = {Space.HiddenFive: Space.SunkFive, Space.HiddenFour: Space.SunkFour, Space.HiddenCruiser: Space.SunkCruiser, Space.HiddenSub: Space.SunkSub, Space.HiddenTwo: Space.SunkTwo}
+HIT_ORDERING = [Space.HiddenTwo, Space.HiddenSub, Space.HiddenCruiser, Space.HiddenFour, Space.HiddenFive]
 
 class Battleship3(gym.Env):
     """My third implementation of the battleship game."""
@@ -19,35 +24,85 @@ def __init__(self, render_mode = None): # do we want to set seeds here?
         # Space number to hit
         self.action_space = spaces.Discrete(100)
         # One hot encodes of misses, hits, and then binaries of sunk ships
-        self.observation_space = spaces.Tuple((spaces.MultiBinary([10, 10, 6]), spaces.MultiBinary([5])))
+        self.observation_space = spaces.Tuple((spaces.MultiBinary([10, 10, 2]), spaces.MultiBinary([5])))
 
         self.state = None
-        self.hidState = None
+        self.hid_state = None
+        self.reward_range = (float("-inf"), 17) # i think these numbers are right, may need tweaking
 
     def reset(self, seed = None): # 42, runs ~ 10% faster than the original, ~
+        """Reset the game to an initial state and return a blank observation."""
         super().reset(seed=seed)
 
-        self.state = setupShips(self.np_random)
-        self.hidState = np.full(shape=(2,10,10),fill_value=Space.Empty)
+        self.state = setup_ships(self.np_random)
+        self.hid_state = np.full(shape=(2,10,10), fill_value=False)
+        self.dead_ships = np.zeros(5, dtype=np.bool_)
 
         self.counter = 0
         self.done = False
-        self.reward = 0
 
-        return self.hidState,
+        return (self.hid_state, self.dead_ships), {}
+
+    def step(self, target): 
+        """Take a step in the game shooting at the specified target."""
+        # super().step(target)
+        assert target in self.action_space
+
+        if self.done:
+            # raise ValueError("Game is over")
+            return (self.hid_state, self.dead_ships), 0, self.done, {}
+
+        self.counter += 1
+        # self.done = self.counter >= 100 # do we want to turn off the game after 100 moves?
+        reward = 0
+
+        x = target % 10
+        y = target // 10
+
+        # Did we hit an empty space?
+        if self.state[y][x] == Space.Empty:
+            self.state[y][x] = Space.Miss
+            self.hid_state[0][y][x] = True
+            reward = -1
+        # Did we hit a ship?
+        elif self.state[y][x] in hidden_spaces:
+            slot = self.state[y][x]
+
+            self.state[y][x] = HIT_SWAPS[slot]
+            self.hid_state[1][y][x] = True
+            reward = 1
+
+            # does this shot sink a ship?
+            if slot not in self.state:
+                self.dead_ships[HIT_ORDERING.index(slot)] = True
+                self.state[self.state == slot] = SUNK_SWAPS[slot]
+                # did we sink every ship?
+                if self.dead_ships.all():
+                    # print("Game Over")
+                    self.done = True
+        # Did we hit a ship we already sunk? uhoh
+        elif self.state[y][x] in hit_spaces or self.state[y][x] in sunk_spaces:
+            reward = -10
+        else:
+            raise ValueError("Invalid state")
+
+        return (self.hid_state, self.dead_ships), reward, self.done, {}
+
 
 import timeit
 env = Battleship3()
 avg_list = []
 for i in range(0,20):
     print(i)
-    L = timeit.timeit('env.reset()', globals=globals(), number = 10000)
-    # L = timeit.timeit(setup='env.reset()', stmt='for i in range(0,99): env.step(i); env.reset()', globals=globals(), number = 100) #2.73
-    avg_list.append(L)
+    # L = [timeit.timeit('env.reset()', globals=globals(), number = 10000)]
+    L = timeit.repeat(setup='env.reset(); i=0', stmt='env.step(i); i += 1', globals=globals(), number = 100, repeat = 5000) #2.73
+    avg_list.append(sum(L))
 print("mean: ", sum(avg_list)/len(avg_list), "std_dev: ", np.std(avg_list))
 print(avg_list)
 
 # Trial set 1
 # mean:  2.407954839600001 std_dev:  0.03430318664613518
 # mean:  2.502668951999999 std_dev:  0.08318714081173365
-# mean:  2.3938089833000005 std_dev:  0.015541127257026607
+# mean:  2.3938089833000005 std_dev:  0.015541127257026607
+
+# mean:  2.4946897120000466 std_dev:  0.004272984706538216
diff --git a/battleship_envs/envs/shared.py b/battleship_envs/envs/shared.py
@@ -1,31 +1,63 @@
+from enum import Flag, unique, IntEnum, auto
 import numpy as np
-from enum import Enum, unique, IntEnum
 
 # indList = ("|-|","!M!","(2)","(S)","(C)","(4)","(5)",3"x2x","xSx","xCx","x4x","x5x","|2|","|S|","|C|","|4|","|5|","HiddenCruiser")
 @unique
-class Space(Enum): #int enum for better performance?
+
+
+class Space(Flag): # is this the best performance? Do we still need the numpy floats? intflag? bonus safety checks?
     """The labels for each of the spaces in the battleship game."""
-    Empty = np.float32(0.0),"|-|" #_
+    Empty = 0
 
-    Miss = np.float32(-1.0),"!M!" #m
-    
-    HitPTwo = np.float32(1.0),"(2)"
-    HitPSub = np.float32(1.0),"(S)"
-    HitPCruiser = np.float32(1.0),"(C)"
-    HitPFour = np.float32(1.0),"(4)"
-    HitPFive = np.float32(1.0),"(5)"
+    Miss = auto()
+
+    HitPTwo = auto()
+    HitPSub = auto()
+    HitPCruiser = auto()
+    HitPFour = auto()
+    HitPFive = auto()
 
-    SunkTwo = np.float32(0.2),"x2x" # ship values, should every ship get its own channel
-    SunkSub = np.float32(0.4),"xSx"
-    SunkCruiser = np.float32(0.6),"xCx"
-    SunkFour = np.float32(0.8),"x4x"
-    SunkFive = np.float32(1.0),"x5x"
+    SunkTwo = auto()
+    SunkSub = auto()
+    SunkCruiser = auto()
+    SunkFour = auto()
+    SunkFive = auto()
+
+    HiddenTwo = auto()
+    HiddenSub = auto()
+    HiddenCruiser = auto()
+    HiddenFour = auto()
+    HiddenFive = auto()
+
+    _ignore_ = {
+        Empty : ("|-|", np.float32(0.0)),
+        Miss : ("!M!", np.float32(-1.0)),
+        HitPTwo : ("(2)", np.float32(1.0)),
+        HitPSub : ("(S)", np.float32(1.0)),
+        HitPCruiser : ("(C)", np.float32(1.0)),
+        HitPFour : ("(4)", np.float32(1.0)),
+        HitPFive : ("(5)", np.float32(1.0)),
+        SunkTwo : ("x2x", np.float32(0.2)),
+        SunkSub : ("xSx", np.float32(0.4)),
+        SunkCruiser : ("xCx", np.float32(0.6)),
+        SunkFour : ("x4x", np.float32(0.8)),
+        SunkFive : ("x5x", np.float32(1.0)),
+        HiddenTwo : ("|2|", np.float32(1.0)),
+        HiddenSub : ("|S|", np.float32(1.0)),
+        HiddenCruiser : ("|C|", np.float32(1.0)),
+        HiddenFour : ("|4|", np.float32(1.0)),
+        HiddenFive : ("|5|", np.float32(1.0)),
+    }
+
+    def description(self):
+        return self._ignore_[self][0]
+
+    def old_value(self):
+        return self._ignore_[self][1]
 
-    HiddenTwo = np.float32(1.0),"|2|" # Need to update these values likely 1
-    HiddenSub = np.float32(1.0),"|S|"
-    HiddenCruiser = np.float32(1.0),"|C|"
-    HiddenFour = np.float32(1.0),"|4|"
-    HiddenFive = np.float32(1.0),"|5|"
+hit_spaces = Space.HitPTwo | Space.HitPSub | Space.HitPCruiser | Space.HitPFour | Space.HitPFive
+sunk_spaces = Space.SunkTwo | Space.SunkSub | Space.SunkCruiser | Space.SunkFour | Space.SunkFive
+hidden_spaces = Space.HiddenTwo | Space.HiddenSub | Space.HiddenCruiser | Space.HiddenFour | Space.HiddenFive
 
 @unique
 class Direction(IntEnum):
@@ -69,7 +101,7 @@ def addShip(state, ship: Space, ship_len: int, x: int, y: int, d: Direction) ->
 emptyStateRef = np.full(shape=(10,10),fill_value=Space.Empty)
 hidSpaceRef = [Space.HiddenFive, Space.HiddenFour, Space.HiddenCruiser, Space.HiddenSub, Space.HiddenTwo]
 shipSpaceLength = [5, 4, 3, 3, 2]
-def setupShips(np_random: np.random.Generator): # need to make this very fast
+def setup_ships(np_random: np.random.Generator): # need to make this very fast
     """Create a new state with ships placed randomly"""
     i = 0
     state = np.copy(emptyStateRef)

diff --git a/trainer.py b/trainer.py
@@ -69,16 +69,16 @@ def makeMove(obs,f):
 
 # Converts regular spaces to what would be seen in a game 
 def singleShipSight(e, match):
-	if '2' in match.value[1]:
-		return 1 if '2' in e.value[1] else 0
-	if 'S' in match.value[1]:
-		return 1 if 'S' in e.value[1] else 0
-	if 'C' in match.value[1]:
-		return 1 if 'C' in e.value[1] else 0
-	if '4' in match.value[1]:
-		return 1 if '4' in e.value[1] else 0
-	if '5' in match.value[1]:
-		return 1 if '5' in e.value[1] else 0
+	if '2' in match.description:
+		return 1 if '2' in e.description else 0
+	if 'S' in match.description:
+		return 1 if 'S' in e.description else 0
+	if 'C' in match.description:
+		return 1 if 'C' in e.description else 0
+	if '4' in match.description:
+		return 1 if '4' in e.description else 0
+	if '5' in match.description:
+		return 1 if '5' in e.description else 0
 	return 0
 
 # Recursion Variables and Stats
@@ -87,7 +87,7 @@ def singleShipSight(e, match):
 iterations = 0
 observations = []
 expecteds = []
-vfunc = np.vectorize(lambda e: e.value[0])
+vfunc = np.vectorize(lambda e: e.old_values)
 vfuncSingleShip = np.vectorize(singleShipSight)
 possMoves = list(range(100))