Skip to content
This repository has been archived by the owner on Sep 30, 2024. It is now read-only.

Commit

Permalink
First implementation of new steps
Browse files Browse the repository at this point in the history
  • Loading branch information
supersimple33 committed Apr 16, 2023
1 parent b2a966a commit 6ef7bed
Show file tree
Hide file tree
Showing 4 changed files with 143 additions and 50 deletions.
20 changes: 13 additions & 7 deletions battleship_envs/envs/battleship1_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

import numpy as np

from shared import Space, setupShips
from shared import Space, setup_ships

#game code
class Battleship1(gym.Env):
Expand Down Expand Up @@ -77,7 +77,7 @@ def step(self, target):
targetSpace = self.state[y][x]
self.reward = False
# hit = False
if self.done == True:
if self.done:
# print("Game Over")
return self.hidState, self.reward, self.done, self.expectedShots #check return
else:
Expand Down Expand Up @@ -140,7 +140,7 @@ def step(self, target):
def reset(self, seed=None):
self.seed = self.new_seed(seed)

self.state = setupShips(self.np_random)
self.state = setup_ships(self.np_random)
self.hidState.fill(Space.Empty)
self.expectedShots = np.copy(np.reshape(self.state, (100)))

Expand Down Expand Up @@ -174,14 +174,20 @@ def new_seed(self, seed=None):

import timeit
env = Battleship1()
for i in range(100):
env.step(i)

avg_list = []
for i in range(0,20):
print(i)
L = timeit.timeit('env.reset()', globals=globals(), number = 10000)
# L = timeit.timeit(setup='env.reset()', stmt='for i in range(0,99): env.step(i); env.reset()', globals=globals(), number = 100) #2.73
avg_list.append(L)
# L = [timeit.timeit('env.reset()', globals=globals(), number = 10000)]
L = timeit.repeat(setup='env.reset(); i=0', stmt='env.step(i); i += 1', globals=globals(), number = 100, repeat = 5000) #2.73
avg_list.append(sum(L))
print("mean: ", sum(avg_list)/len(avg_list), "std_dev: ", np.std(avg_list))
print(avg_list)

# mean: 2.6814786437500002 std_dev: 0.028973936597862637
# mean: 2.6619473125999997 std_dev: 0.019689577780807894
# mean: 2.6619473125999997 std_dev: 0.019689577780807894

# mean: 1.15642401880003 std_dev: 0.02141613605211344
# mean: 1.171778880150033 std_dev: 0.0151238295477036
77 changes: 66 additions & 11 deletions battleship_envs/envs/battleship3_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,12 @@
import gymnasium as gym
from gymnasium import spaces

from shared import Space, setupShips
from shared import Space, setup_ships, hidden_spaces, hit_spaces, sunk_spaces

# CAPS
HIT_SWAPS = {Space.HiddenFive: Space.HitPFive, Space.HiddenFour: Space.HitPFour, Space.HiddenCruiser: Space.HitPCruiser, Space.HiddenSub: Space.HitPSub, Space.HiddenTwo: Space.HitPTwo}
SUNK_SWAPS = {Space.HiddenFive: Space.SunkFive, Space.HiddenFour: Space.SunkFour, Space.HiddenCruiser: Space.SunkCruiser, Space.HiddenSub: Space.SunkSub, Space.HiddenTwo: Space.SunkTwo}
HIT_ORDERING = [Space.HiddenTwo, Space.HiddenSub, Space.HiddenCruiser, Space.HiddenFour, Space.HiddenFive]

class Battleship3(gym.Env):
"""My third implementation of the battleship game."""
Expand All @@ -19,35 +24,85 @@ def __init__(self, render_mode = None): # do we want to set seeds here?
# Space number to hit
self.action_space = spaces.Discrete(100)
# One hot encodes of misses, hits, and then binaries of sunk ships
self.observation_space = spaces.Tuple((spaces.MultiBinary([10, 10, 6]), spaces.MultiBinary([5])))
self.observation_space = spaces.Tuple((spaces.MultiBinary([10, 10, 2]), spaces.MultiBinary([5])))

self.state = None
self.hidState = None
self.hid_state = None
self.reward_range = (float("-inf"), 17) # i think these numbers are right, may need tweaking

def reset(self, seed = None): # 42, runs ~ 10% faster than the original, ~
"""Reset the game to an initial state and return a blank observation."""
super().reset(seed=seed)

self.state = setupShips(self.np_random)
self.hidState = np.full(shape=(2,10,10),fill_value=Space.Empty)
self.state = setup_ships(self.np_random)
self.hid_state = np.full(shape=(2,10,10), fill_value=False)
self.dead_ships = np.zeros(5, dtype=np.bool_)

self.counter = 0
self.done = False
self.reward = 0

return self.hidState,
return (self.hid_state, self.dead_ships), {}

def step(self, target):
"""Take a step in the game shooting at the specified target."""
# super().step(target)
assert target in self.action_space

if self.done:
# raise ValueError("Game is over")
return (self.hid_state, self.dead_ships), 0, self.done, {}

self.counter += 1
# self.done = self.counter >= 100 # do we want to turn off the game after 100 moves?
reward = 0

x = target % 10
y = target // 10

# Did we hit an empty space?
if self.state[y][x] == Space.Empty:
self.state[y][x] = Space.Miss
self.hid_state[0][y][x] = True
reward = -1
# Did we hit a ship?
elif self.state[y][x] in hidden_spaces:
slot = self.state[y][x]

self.state[y][x] = HIT_SWAPS[slot]
self.hid_state[1][y][x] = True
reward = 1

# does this shot sink a ship?
if slot not in self.state:
self.dead_ships[HIT_ORDERING.index(slot)] = True
self.state[self.state == slot] = SUNK_SWAPS[slot]
# did we sink every ship?
if self.dead_ships.all():
# print("Game Over")
self.done = True
# Did we hit a ship we already sunk? uhoh
elif self.state[y][x] in hit_spaces or self.state[y][x] in sunk_spaces:
reward = -10
else:
raise ValueError("Invalid state")

return (self.hid_state, self.dead_ships), reward, self.done, {}


import timeit
env = Battleship3()
avg_list = []
for i in range(0,20):
print(i)
L = timeit.timeit('env.reset()', globals=globals(), number = 10000)
# L = timeit.timeit(setup='env.reset()', stmt='for i in range(0,99): env.step(i); env.reset()', globals=globals(), number = 100) #2.73
avg_list.append(L)
# L = [timeit.timeit('env.reset()', globals=globals(), number = 10000)]
L = timeit.repeat(setup='env.reset(); i=0', stmt='env.step(i); i += 1', globals=globals(), number = 100, repeat = 5000) #2.73
avg_list.append(sum(L))
print("mean: ", sum(avg_list)/len(avg_list), "std_dev: ", np.std(avg_list))
print(avg_list)

# Trial set 1
# mean: 2.407954839600001 std_dev: 0.03430318664613518
# mean: 2.502668951999999 std_dev: 0.08318714081173365
# mean: 2.3938089833000005 std_dev: 0.015541127257026607
# mean: 2.3938089833000005 std_dev: 0.015541127257026607

# mean: 2.4946897120000466 std_dev: 0.004272984706538216
74 changes: 53 additions & 21 deletions battleship_envs/envs/shared.py
Original file line number Diff line number Diff line change
@@ -1,31 +1,63 @@
from enum import Flag, unique, IntEnum, auto
import numpy as np
from enum import Enum, unique, IntEnum

# indList = ("|-|","!M!","(2)","(S)","(C)","(4)","(5)",3"x2x","xSx","xCx","x4x","x5x","|2|","|S|","|C|","|4|","|5|","HiddenCruiser")
@unique
class Space(Enum): #int enum for better performance?


class Space(Flag): # is this the best performance? Do we still need the numpy floats? intflag? bonus safety checks?
"""The labels for each of the spaces in the battleship game."""
Empty = np.float32(0.0),"|-|" #_
Empty = 0

Miss = np.float32(-1.0),"!M!" #m
HitPTwo = np.float32(1.0),"(2)"
HitPSub = np.float32(1.0),"(S)"
HitPCruiser = np.float32(1.0),"(C)"
HitPFour = np.float32(1.0),"(4)"
HitPFive = np.float32(1.0),"(5)"
Miss = auto()

HitPTwo = auto()
HitPSub = auto()
HitPCruiser = auto()
HitPFour = auto()
HitPFive = auto()

SunkTwo = np.float32(0.2),"x2x" # ship values, should every ship get its own channel
SunkSub = np.float32(0.4),"xSx"
SunkCruiser = np.float32(0.6),"xCx"
SunkFour = np.float32(0.8),"x4x"
SunkFive = np.float32(1.0),"x5x"
SunkTwo = auto()
SunkSub = auto()
SunkCruiser = auto()
SunkFour = auto()
SunkFive = auto()

HiddenTwo = auto()
HiddenSub = auto()
HiddenCruiser = auto()
HiddenFour = auto()
HiddenFive = auto()

_ignore_ = {
Empty : ("|-|", np.float32(0.0)),
Miss : ("!M!", np.float32(-1.0)),
HitPTwo : ("(2)", np.float32(1.0)),
HitPSub : ("(S)", np.float32(1.0)),
HitPCruiser : ("(C)", np.float32(1.0)),
HitPFour : ("(4)", np.float32(1.0)),
HitPFive : ("(5)", np.float32(1.0)),
SunkTwo : ("x2x", np.float32(0.2)),
SunkSub : ("xSx", np.float32(0.4)),
SunkCruiser : ("xCx", np.float32(0.6)),
SunkFour : ("x4x", np.float32(0.8)),
SunkFive : ("x5x", np.float32(1.0)),
HiddenTwo : ("|2|", np.float32(1.0)),
HiddenSub : ("|S|", np.float32(1.0)),
HiddenCruiser : ("|C|", np.float32(1.0)),
HiddenFour : ("|4|", np.float32(1.0)),
HiddenFive : ("|5|", np.float32(1.0)),
}

def description(self):
return self._ignore_[self][0]

def old_value(self):
return self._ignore_[self][1]

HiddenTwo = np.float32(1.0),"|2|" # Need to update these values likely 1
HiddenSub = np.float32(1.0),"|S|"
HiddenCruiser = np.float32(1.0),"|C|"
HiddenFour = np.float32(1.0),"|4|"
HiddenFive = np.float32(1.0),"|5|"
hit_spaces = Space.HitPTwo | Space.HitPSub | Space.HitPCruiser | Space.HitPFour | Space.HitPFive
sunk_spaces = Space.SunkTwo | Space.SunkSub | Space.SunkCruiser | Space.SunkFour | Space.SunkFive
hidden_spaces = Space.HiddenTwo | Space.HiddenSub | Space.HiddenCruiser | Space.HiddenFour | Space.HiddenFive

@unique
class Direction(IntEnum):
Expand Down Expand Up @@ -69,7 +101,7 @@ def addShip(state, ship: Space, ship_len: int, x: int, y: int, d: Direction) ->
emptyStateRef = np.full(shape=(10,10),fill_value=Space.Empty)
hidSpaceRef = [Space.HiddenFive, Space.HiddenFour, Space.HiddenCruiser, Space.HiddenSub, Space.HiddenTwo]
shipSpaceLength = [5, 4, 3, 3, 2]
def setupShips(np_random: np.random.Generator): # need to make this very fast
def setup_ships(np_random: np.random.Generator): # need to make this very fast
"""Create a new state with ships placed randomly"""
i = 0
state = np.copy(emptyStateRef)
Expand Down
22 changes: 11 additions & 11 deletions trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,16 +69,16 @@ def makeMove(obs,f):

# Converts regular spaces to what would be seen in a game
def singleShipSight(e, match):
if '2' in match.value[1]:
return 1 if '2' in e.value[1] else 0
if 'S' in match.value[1]:
return 1 if 'S' in e.value[1] else 0
if 'C' in match.value[1]:
return 1 if 'C' in e.value[1] else 0
if '4' in match.value[1]:
return 1 if '4' in e.value[1] else 0
if '5' in match.value[1]:
return 1 if '5' in e.value[1] else 0
if '2' in match.description:
return 1 if '2' in e.description else 0
if 'S' in match.description:
return 1 if 'S' in e.description else 0
if 'C' in match.description:
return 1 if 'C' in e.description else 0
if '4' in match.description:
return 1 if '4' in e.description else 0
if '5' in match.description:
return 1 if '5' in e.description else 0
return 0

# Recursion Variables and Stats
Expand All @@ -87,7 +87,7 @@ def singleShipSight(e, match):
iterations = 0
observations = []
expecteds = []
vfunc = np.vectorize(lambda e: e.value[0])
vfunc = np.vectorize(lambda e: e.old_values)
vfuncSingleShip = np.vectorize(singleShipSight)
possMoves = list(range(100))

Expand Down

0 comments on commit 6ef7bed

Please sign in to comment.