citbrains · yasuohayashibara · Oct 10, 2023 · Oct 11, 2023 · Oct 14, 2023
diff --git a/controllers/marl/marl.py b/controllers/marl/marl.py
@@ -0,0 +1,7 @@
+from marllib import marl
+
+env = marl.make_env(environment_name="soccer", map_name="soccer")
+mappo = marl.algos.mappo(hyperparam_source="test")
+model = marl.build_model(env, mappo, {"core_arch": "mlp", "encode_layer": "128-256"})
+mappo.fit(env, model, stop={'episode_reward_mean': 2000, 'timesteps_total': 10000000}, local_mode=True, num_gpus=1,
+          num_workers=1, share_policy='all', checkpoint_freq=50)
diff --git a/controllers/marl/soccer/player.py b/controllers/marl/soccer/player.py
@@ -0,0 +1,76 @@
+import numpy as np
+from controller import Supervisor
+
+class Player():
+    def __init__(self, name = None, supervisor = None):
+        super().__init__()
+        self.name = name
+        self.supervisor = supervisor
+        self.player = None
+        self.emitter = None
+        self.waiting_time = 0
+
+    def reset(self, pos = [0.0, 0.0, 0.0]):
+        children = self.supervisor.getRoot().getField('children')
+        if self.player != None:
+            self.player.remove()
+        if "blue" in self.name:
+            ch = int(self.name[-1])
+            children.importMFNodeFromString(-1, f'DEF {self.name} GankenKun_simple {{translation {pos[0]} {pos[1]} 0.450 rotation 0 0 1 {pos[2]} jerseyTexture "textures/GankenKun_{self.name}.png" jerseyColor 0, 0, 1 channel {ch} controller "GankenKun_soccer"}}')
+        else:
+            ch = int(self.name[-1])+3
+            children.importMFNodeFromString(-1, f'DEF {self.name} GankenKun_simple {{translation {pos[0]} {pos[1]} 0.450 rotation 0 0 1 {pos[2]} jerseyTexture "textures/GankenKun_{self.name}.png" jerseyColor 1, 0, 0 channel {ch} controller "GankenKun_soccer"}}')
+        self.pos = pos
+        self.emitter = self.supervisor.getDevice(f'{self.name}_emitter')
+        self.player = self.supervisor.getFromDef(f'{self.name}')
+        self.player_pos = self.player.getField('translation')
+        self.player_rot = self.player.getField('rotation')
+
+        self.alive = True
+        self.score = 0
+        self.action = 0
+        self.is_fall = False
+        self.is_replace = False
+        self.waiting_time = 1
+
+    def move(self, pos = [0.0, 0.0, 0.0]):
+        self.player.resetPhysics()
+        self.player_pos.setSFVec3f([pos[0], pos[1], 0.450])
+        self.player_rot.setSFRotation([0, 0, 1, pos[2]])
+
+    def send(self, message):
+        if self.waiting_time > 0:
+            self.waiting_time -= 1
+            return
+        if "kick" in message.decode('utf-8'):
+            self.waiting_time = 4
+        if self.emitter != None:
+            self.emitter.send(message)
+
+    def update(self):
+        #self.action = action
+        x, y, _ = self.player_pos.getSFVec3f()
+        yaw, pitch, roll = self.rotation_to_euler(self.player_rot.getSFRotation())
+        self.pos = [x, y, yaw]
+        if abs(pitch) > 1.0 or abs(roll) > 1.0:
+            self.is_fall = True
+        else:
+            self.is_fall = False
+
+    def is_done(self):
+        return not self.alive
+
+    def rotation_to_euler(self, rotation):
+        x, y, z, angle = rotation
+        c = np.cos(angle)
+        s = np.sin(angle)
+        t = 1 - c
+        R = np.array([
+            [t*x*x + c, t*x*y - z*s, t*x*z + y*s],
+            [t*x*y + z*s, t*y*y + c, t*y*z - x*s],
+            [t*x*z - y*s, t*y*z + x*s, t*z*z + c]
+        ])
+        yaw = np.arctan2(R[1, 0], R[0, 0])
+        pitch = np.arctan2(-R[2, 0], np.sqrt(R[2, 1]**2 + R[2, 2]**2))
+        roll = np.arctan2(R[2, 1], R[2, 2])
+        return yaw, pitch, roll
diff --git a/controllers/marl/soccer/soccer.py b/controllers/marl/soccer/soccer.py
@@ -0,0 +1,271 @@
+#!/usr/bin/env python3
+
+import numpy as np
+import math
+import copy
+import random
+
+from controller import Supervisor
+
+from gymnasium.spaces import Box, Discrete, Sequence
+from gymnasium.utils import EzPickle, seeding
+
+from pettingzoo import AECEnv
+from pettingzoo.utils import wrappers
+from pettingzoo.utils.agent_selector import agent_selector
+from pettingzoo.utils.conversions import parallel_wrapper_fn
+
+from soccer.player import Player
+
+__all__ = ["env", "parallel_env", "raw_env"]
+
+def env(**kwargs):
+    env = raw_env(**kwargs)
+    env = wrappers.AssertOutOfBoundsWrapper(env)
+    env = wrappers.OrderEnforcingWrapper(env)
+    return env
+
+parallel_env = parallel_wrapper_fn(env)
+
+def normalize_angle_rad(angle):
+    while angle > math.pi:
+        angle -= 2.0 * math.pi
+    while angle <= -math.pi:
+        angle += 2.0 * math.pi
+    return angle
+
+class raw_env(AECEnv, EzPickle):
+    metadata = {
+        "render_modes": ["human", "rgb_array"],
+        "name": "soccer_v0",
+        "is_parallelizable": True,
+    }
+
+    supervisor = None
+
+    def __init__(self, max_cycles=300, render_mode=None):
+        EzPickle.__init__(self, max_cycles=max_cycles, render_mode=render_mode)
+        if self.supervisor == None:
+            self.supervisor = Supervisor()
+        self.time_step = int(self.supervisor.getBasicTimeStep())
+
+        self.frames = 0
+        self.render_mode = render_mode
+        self._seed()
+        self.max_cycles = max_cycles
+        self.out_agent = []
+        self.agent_name_mapping = {}
+        self.agent_dict = {}
+        self.kill_list = []
+        self.agent_list = []
+        self.agents = ["blue1", "blue2", "blue3", "red1", "red2", "red3"]
+        #self.agents = ["blue1"]
+        self.dead_agents = []
+        for i in range(len(self.agents)):
+            self.agent_name_mapping[self.agents[i]] = i
+            self.agent_list.append(Player(self.agents[i], self.supervisor))
+        obs_space = Box(low=-5, high=5, shape = ([15]), dtype=np.float16)
+        #obs_space = Box(low=-5, high=5, shape = ([5]), dtype=np.float16)
+        self.observation_spaces = dict(zip(self.agents, [obs_space for _ in enumerate(self.agents)]))
+        self.action_spaces = dict(zip(self.agents, [Discrete(8) for _ in enumerate(self.agents)]))
+        self.actions = ["walk,1,0,0", "walk,-1,0,0", "walk,0,1,0", "walk,0,-1,0", "walk,0,0,1", "walk,0,0,-1", "motion,left_kick", "motion,right_kick"]
+        self.state_space = Box(low=-5, high=5, shape = ([21]), dtype=np.float16)
+
+        self.possible_agents = copy.deepcopy(self.agents)
+        self._agent_selector = agent_selector(self.agents)
+
+        self.reinit()
+
+    def __del__(self):
+        print("DELETE")
+
+    def observation_space(self, agent):
+        return self.observation_spaces[agent]
+
+    def action_space(self, agent):
+        return self.action_spaces[agent]
+
+    def _seed(self, seed=None):
+        self.np_random, seed = seeding.np_random(seed)
+
+    def observe(self, agent):
+        i = self.agent_name_mapping[agent]
+        state = self.state()
+        ball_x, ball_y = [state[0], state[1]]
+        bx, by, bthe = state[i*3+3], state[i*3+4],state[i*3+5]
+        s, c = math.sin(bthe), math.cos(bthe)
+        blx, bly = ball_x - bx, ball_y - by
+        x, y = blx * c + bly * s, - blx * s + bly * c
+        obs = [x, y]
+        obs += [bx, by, bthe]
+        no_agent = len(self.possible_agents)
+        base_index = list(range(no_agent))
+        if agent.startswith("red"):
+            index = base_index[int(no_agent/2):] + base_index[:int(no_agent/2)]
+        else:
+            index = base_index
+        index.remove(i)
+        for j in index:
+            rx, ry = state[j*3+3], state[j*3+4]
+            lx, ly = rx - bx, ry - by
+            x, y = lx * c + ly * s, - lx * s + ly * c
+            obs += [x, y]
+        if agent.startswith("red"):
+            obs[2] = -obs[2]
+            obs[3] = -obs[3]
+            obs[4] = normalize_angle_rad(obs[4]+math.pi)
+        return np.array(obs)
+
+    def state(self):
+        ball_x, ball_y, _ = self.ball_pos.getSFVec3f()
+        for agent in self.agent_list:
+            agent.update()
+        player = []
+        for i in range(len(self.agent_list)):
+            player.append(self.agent_list[i].pos)
+        state = [ball_x, ball_y, 0, player[0][0], player[0][1], player[0][2], player[1][0], player[1][1], player[1][2], player[2][0], player[2][1], player[2][2], player[3][0], player[3][1], player[3][2], player[4][0], player[4][1], player[4][2], player[5][0], player[5][1], player[5][2]]
+        #state = [ball_x, ball_y, 0, player[0][0], player[0][1], player[0][2]]
+        return np.array(state)
+
+    def step(self, action):
+        if self.terminations[self.agent_selection] or self.truncations[self.agent_selection]:
+            self._was_dead_step(action)
+            return
+        self._cumulative_rewards[self.agent_selection] = 0
+        agent = self.agent_list[self.agent_name_mapping[self.agent_selection]]
+        agent.score = 0
+
+        print("frames: "+str(self.frames))
+
+        i = self.agent_name_mapping[self.agent_selection]
+        if self.agent_list[i].is_fall:
+            while True:
+                if self.agents[i].startswith("blue"):
+                    x, y = random.uniform(-4.0, -0.5), random.uniform(-2.5, 2.5)
+                elif self.agents[i].startswith("red"):
+                    x, y = random.uniform(4.0, 0.5), random.uniform(-2.5, 2.5)
+                near_robot = False
+                for j in range(i):
+                    robot_x, robot_y, _ = self.agent_list[j].pos
+                    length = math.sqrt((x-robot_x)**2+(y-robot_y)**2)
+                    if length < 1:
+                        near_robot = True
+                        break
+                if near_robot == False:
+                    break
+            self.init_pos[i][0], self.init_pos[i][1] = x, y
+            self.agent_list[i].move(self.init_pos[i])
+            self.agent_list[i].is_replace = True
+        else:
+            message = self.actions[action].encode('utf-8')
+            agent.send(message)
+
+        if self._agent_selector.is_last():
+            self.frames += 1
+            self._clear_rewards()
+            for i in range(40):
+                self.supervisor.step(self.time_step)
+                ball_x, ball_y, _ = self.ball_pos.getSFVec3f()
+                ball_vel_x, ball_vel_y = self.ball.getVelocity()[:2]
+                for agent in self.agents:
+                    x, y, the = self.agent_list[self.agent_name_mapping[agent]].pos
+                    length = math.sqrt((x-ball_x)**2+(y-ball_y)**2)
+                    self.rewards[agent] += 0.2/length/40
+                    if length < 0.3:
+                        if agent.startswith("blue"):
+                            ball_dx, ball_dy = 4.5 - ball_x, 0 - ball_y
+                            ball_len = math.sqrt(ball_dx**2+ball_dy**2)
+                            ball_dx, ball_dy = ball_dx / ball_len, ball_dy / ball_len
+                            reward = ball_vel_x * ball_dx + ball_vel_y * ball_dy
+                            self.rewards[agent] += max(reward, 0) * 10
+                        elif agent.startswith("red"):
+                            ball_dx, ball_dy = 4.5 - ( -ball_x), 0 - (-ball_y)
+                            ball_len = math.sqrt(ball_dx**2+ball_dy**2)
+                            ball_dx, ball_dy = ball_dx / ball_len, ball_dy / ball_len
+                            reward = (-ball_vel_x) * ball_dx + (-ball_vel_y) * ball_dy
+                            self.rewards[agent] += max(reward, 0) * 10
+            for agent in self.agents:
+                self.rewards[agent] += -0.01
+                if self.rewards[agent] > 0.1:
+                    print("reward: "+str(agent)+" "+str(self.rewards[agent]))
+                if self.agent_list[self.agent_name_mapping[agent]].is_replace:
+                    self.rewards[agent] += -1
+                    self.agent_list[self.agent_name_mapping[agent]].is_replace = False
+                    print("reward(fall): "+str(agent)+" "+str(self.rewards[agent]))
+            for agent in self.agents:
+                self.total_rewards[agent] += self.rewards[agent]
+
+        terminate = False
+        truncate = self.frames >= self.max_cycles
+        self.terminations = {a: terminate for a in self.agents}
+        self.truncations = {a: truncate for a in self.agents}
+        if truncate:
+            for agent in self.agents:
+                self.infos[agent]["episode"] = {"r": self.total_rewards[agent], "l": self.max_cycles}
+
+        if self._agent_selector.is_last():
+            _live_agents = self.agents[:]
+            for k in self.kill_list:
+                _live_agents.remove(k)
+                self.terminations[k] = True
+                self.dead_agents.append(k)
+            self.kill_list = []
+            self._agent_selector.reinit(_live_agents)
+
+        if len(self._agent_selector.agent_order):
+            self.agent_selection = self._agent_selector.next()
+
+        self._accumulate_rewards()
+        self._deads_step_first()
+
+    def render():
+        pass
+
+    def reinit(self):
+        self.score = 0
+        self.run = True
+        children = self.supervisor.getRoot().getField('children')
+
+        try:
+            self.ball
+        except:
+            pass
+        else:
+            self.ball.remove()
+
+        y = random.uniform(-2.5, 2.5)
+        children.importMFNodeFromString(-1, f'DEF BALL RobocupSoccerBall {{ translation 0 {y} 0.1 size 1 }}')
+        self.ball = self.supervisor.getFromDef('BALL')
+        self.ball_pos = self.ball.getField('translation')
+        self.init_pos = [[-0.3, 0, 0], [-2, -1, 0], [-2, 1, 0], [1, 0, 3.14], [2, -1, 3.14], [2, 1, 3.14]]
+        for i in range(len(self.agent_list)):
+            while True:
+                if self.agents[i].startswith("blue"):
+                    x, y = random.uniform(-4.0, -0.5), random.uniform(-2.5, 2.5)
+                elif self.agents[i].startswith("red"):
+                    x, y = random.uniform(4.0, 0.5), random.uniform(-2.5, 2.5)
+                near_robot = False
+                for j in range(i):
+                    length = math.sqrt((x-self.init_pos[j][0])**2+(y-self.init_pos[j][1])**2)
+                    if length < 1:
+                        near_robot = True
+                        break
+                if near_robot == False:
+                    break
+            self.init_pos[i][0], self.init_pos[i][1] = x, y
+            self.agent_list[i].reset(self.init_pos[i])
+        self.frames = 0
+
+    def reset(self, seed = None, options = None):
+        if seed is not None:
+            self._seed(seed=seed)
+        self.agents = copy.deepcopy(self.possible_agents)
+        self._agent_selector.reinit(self.agents)
+        self.agent_selection = self._agent_selector.next()
+        self.rewards = dict(zip(self.agents, [0 for _ in self.agents]))
+        self.total_rewards = dict(zip(self.agents, [0 for _ in self.agents]))
+        self._cumulative_rewards = {a: 0 for a in self.agents}
+        self.terminations = dict(zip(self.agents, [False for _ in self.agents]))
+        self.truncations = dict(zip(self.agents, [False for _ in self.agents]))
+        self.infos = dict(zip(self.agents, [{} for _ in self.agents]))
+        self.reinit()
diff --git a/controllers/marl/soccer_v0.py b/controllers/marl/soccer_v0.py
@@ -0,0 +1,7 @@
+from soccer.soccer import (
+    env,
+    parallel_env,
+    raw_env,
+)
+
+__all__ = ["env", "parallel_env", "raw_env"]