Merge pull request #217 from upb-lea/gymnasium-last-changes

Gymnasium last changes
upb-lea · Aug 15, 2023 · a97e68e · a97e68e
2 parents 059e0d3 + a0ceb0e
commit a97e68e
Show file tree

Hide file tree

Showing 16 changed files with 139 additions and 27 deletions.
diff --git a/.gitignore b/.gitignore
@@ -30,5 +30,4 @@ examples/logs/
 /my_examples/
 .vscode/settings.json
 .vscode/launch.json
-
 plots/
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,10 +4,15 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
-##[Unreleased]
+## [Unreleased]
 ## Added
+- Support for Python 3.10
+- Changed API to be compatible with Gymnasium
+- Output a Motor Dashborad graph as PNG or PDF file (with LaTeX font)
+- Automated integration test (run a motor simulation with controller)
 
 ## Changed
+- Dropped support for Python 3.7 (due to gymnasium not supporting it)
 
 ## Fixed
 

diff --git a/README.md b/README.md
@@ -18,7 +18,7 @@
 
 ## Overview
 The gym-electric-motor (GEM) package is a Python toolbox for the simulation and control of various electric motors.
-It is built upon [OpenAI Gym Environments](https://gym.openai.com/), and, therefore, can be used for both, classical control simulation and [reinforcement learning](https://github.com/upb-lea/reinforcement_learning_course_materials) experiments. It allows you to construct a typical drive train with the usual building blocks, i.e., supply voltages, converters, electric motors and load models, and obtain not only a closed-loop simulation of this physical structure, but also a rich interface for plugging in any decision making algorithm, from linear feedback control to [Deep Deterministic Policy Gradient](https://spinningup.openai.com/en/latest/algorithms/ddpg.html) agents.
+It is built upon [Faram Gymnasium Environments](https://gym.openai.com/), and, therefore, can be used for both, classical control simulation and [reinforcement learning](https://github.com/upb-lea/reinforcement_learning_course_materials) experiments. It allows you to construct a typical drive train with the usual building blocks, i.e., supply voltages, converters, electric motors and load models, and obtain not only a closed-loop simulation of this physical structure, but also a rich interface for plugging in any decision making algorithm, from linear feedback control to [Deep Deterministic Policy Gradient](https://spinningup.openai.com/en/latest/algorithms/ddpg.html) agents.
 
 ## Getting Started
 An easy way to get started with GEM is by playing around with the following interactive notebooks in Google Colaboratory. Most important features of GEM as well as application demonstrations are showcased, and give a kickstart for engineers in industry and academia.
@@ -41,7 +41,7 @@ if __name__ == '__main__':
         (states, references), rewards, done, _ =\ 
         	env.step(env.action_space.sample())  # pick random control actions
         if done:
-            (states, references) = env.reset()
+            (states, references), _ = env.reset()
     env.close()
 ```
 

diff --git a/examples/classic_controllers/classic_controllers_dc_motor_example.py b/examples/classic_controllers/classic_controllers_dc_motor_example.py
@@ -53,7 +53,7 @@
     visualization = MotorDashboard(additional_plots=external_ref_plots)
     controller = Controller.make(env, external_ref_plots=external_ref_plots)
 
-    state, reference = env.reset(seed = None)
+    (state, reference), _ = env.reset(seed = None)
     # simulate the environment
     for i in range(10001):
         action = controller.control(state, reference)

diff --git a/examples/classic_controllers/classic_controllers_ind_motor_example.py b/examples/classic_controllers/classic_controllers_ind_motor_example.py
@@ -46,7 +46,7 @@
     """
     controller = Controller.make(env)
 
-    state, reference = env.reset()
+    (state, reference), _ = env.reset()
 
     # simulate the environment
     for i in range(10001):

diff --git a/examples/classic_controllers/classic_controllers_synch_motor_example.py b/examples/classic_controllers/classic_controllers_synch_motor_example.py
@@ -50,7 +50,7 @@
 
     controller = Controller.make(env, external_ref_plots=external_ref_plots, torque_control='analytical')
 
-    state, reference = env.reset()
+    (state, reference), _ = env.reset()
 
     # simulate the environment
     for i in range(10001):

diff --git a/examples/classic_controllers/custom_classic_controllers_dc_motor_example.py b/examples/classic_controllers/custom_classic_controllers_dc_motor_example.py
@@ -69,7 +69,7 @@
 
     controller = Controller.make(env, external_ref_plots=external_ref_plots, stages=stages)
 
-    state, reference = env.reset()
+    (state, reference), _ = env.reset()
 
     # simulate the environment
     for i in range(10001):

diff --git a/examples/classic_controllers/custom_classic_controllers_ind_motor_example.py b/examples/classic_controllers/custom_classic_controllers_ind_motor_example.py
@@ -49,7 +49,7 @@
 
     controller = Controller.make(env, stages=stages, external_plot=external_ref_plots)
 
-    state, reference = env.reset()
+    (state, reference), _ = env.reset()
 
     # simulate the environment
     for i in range(10001):

diff --git a/examples/classic_controllers/custom_classic_controllers_synch_motor_example.py b/examples/classic_controllers/custom_classic_controllers_synch_motor_example.py
@@ -101,7 +101,7 @@
 
     controller = Controller.make(env, stages=stages, external_ref_plots=external_ref_plots, torque_control='analytical')
 
-    state, reference = env.reset()
+    (state, reference), _ = env.reset()
 
     # simulate the environment
     for i in range(10001):

diff --git a/examples/classic_controllers/integration_test_classic_controllers_dc_motor.py b/examples/classic_controllers/integration_test_classic_controllers_dc_motor.py
@@ -70,7 +70,7 @@
     """
     controller = Controller.make(env, external_ref_plots=external_ref_plots)
 
-    state, reference = env.reset(seed=1337)
+    (state, reference), _ = env.reset(seed=1337)
     # simulate the environment
     for i in range(10001):
         action = controller.control(state, reference)

diff --git a/examples/environment_features/GEM_cookbook.ipynb b/examples/environment_features/GEM_cookbook.ipynb
@@ -19,7 +19,7 @@
     "\n",
     "The gym-electric-motor (GEM) package is a Python toolbox for the simulation and control of various electric motors.\n",
     "\n",
-    "It is built upon [OpenAI Gym Environments](https://gym.openai.com/), and, therefore, can be used for both, classical control simulation and reinforcement learning experiments. It allows you to construct a typical drive train with the usual building blocks, i.e. supply voltages, converters, electric motors and load models, and obtain not only a closed-loop simulation of this physical structure, but also a rich interface for plugging in any decision making algorithm, from PI-controllers to [Deep Deterministic Policy Gradient](https://spinningup.openai.com/en/latest/algorithms/ddpg.html) agents.\n",
+    "It is built upon [Farama Gymnasium Environments](https://gymnasium.farama.org/), and, therefore, can be used for both, classical control simulation and reinforcement learning experiments. It allows you to construct a typical drive train with the usual building blocks, i.e. supply voltages, converters, electric motors and load models, and obtain not only a closed-loop simulation of this physical structure, but also a rich interface for plugging in any decision making algorithm, from PI-controllers to [Deep Deterministic Policy Gradient](https://spinningup.openai.com/en/latest/algorithms/ddpg.html) agents.\n",
     "\n",
     "### 1.1  Installation\n",
     "Before you can start, you need to make sure that you have gym-electric-motor installed. You can install it easily using pip:\n",
@@ -114,20 +114,20 @@
     "Moreover, the angular velocity is the mechanical one and not the electrical:\n",
     "$p\\omega_{me} = p\\omega = \\omega_{el}$\n",
     "\n",
-    "### 1.3 OpenAI Gym Interface\n",
-    "Like every gym environment, the basic user interface consists of four main functions.\n",
+    "### 1.3 Farama Gymnasium Interface\n",
+    "Like every gymnasium environment, the basic user interface consists of four main functions.\n",
     "* `import gym_electric_motor as gem`    \n",
     "   Import the package. \n",
     "\n",
     "* `env = gem.make(environment-id, **kwargs)`  \n",
     "    Returns an instantiated motor environment. Call this function at the beginning.\n",
     "    The `gem.make()` method is equal to the `gym.make()`. By using `gem.make()`you can avoid importing gym additionally. \n",
     " \n",
-    "* `initial_observation = env.reset()`  \n",
+    "* `initial_observation, info = env.reset()`  \n",
     "    Resets the motor. This includes a new initial state and new reference trajectories.\n",
     "    Call this function before a new episode starts. \n",
     "\n",
-    "* `observation, reward, done, info = env.step(action)`      \n",
+    "* `observation, reward, terminated, truncated, info = env.step(action)`      \n",
     "    This function performs one action on the environment for one time step.\n",
     "    It simulates the motor and needs to be called in every time step.\n",
     "    First, the voltage applied on the motor due to the converter output is determined and then an ODE solver is used to compute the next state. \n",
@@ -231,7 +231,7 @@
     "    if terminated:\n",
     "        # Reset the environment\n",
     "        # This is required initally or after an episode end due to a constraint violation in the env.\n",
-    "        state, references = env.reset()\n",
+    "        (state, references), _ = env.reset()\n",
     "    # Visualization of environment: Red vertical lines indicate a constraint violation and therefore, a reset environment.\n",
     "    # Blue vertical lines indicate an additional reset by the user which is not due to a terminated episode.\n",
     "     \n",

diff --git a/gym_electric_motor/core.py b/gym_electric_motor/core.py
@@ -272,6 +272,7 @@ def reset(self, seed = None,*_, **__):
 
         Returns:
              The initial observation consisting of the initial state and initial reference.
+             info(dict): Auxiliary information (optional) 
         """
 
         self._seed(seed)
@@ -281,7 +282,10 @@ def reset(self, seed = None,*_, **__):
         reference, next_ref, _ = self.reference_generator.reset(state)
         self._reward_function.reset(state, reference)
         self._call_callbacks('on_reset_end', state, reference)
-        return state[self.state_filter], next_ref
+
+        observation = (state[self.state_filter], next_ref)
+        info = {}
+        return observation, info
 
     def render(self, *_, **__):
         """
@@ -300,7 +304,7 @@ def step(self, action):
             observation(Tuple(ndarray(float),ndarray(float)): Tuple of the new state and the next reference.
             reward(float): Amount of reward received for the last step.
             terminated(bool): Flag, indicating if a reset is required before new steps can be taken.
-            {}: An empty dictionary for consistency with the OpenAi Gym interface.
+            info(dict): Auxiliary information (optional) 
         """
 
         assert not self._terminated, 'A reset is required before the environment can perform further steps'
@@ -320,9 +324,10 @@ def step(self, action):
         # Call render code
         if self.render_mode == "figure":
             self.render()
-
-        return (state[self.state_filter], ref_next), reward, self._terminated, self._truncated, {}
-
+
+        info = {}
+        return (state[self.state_filter], ref_next), reward, self._terminated, self._truncated, info
+
     def _seed(self, seed=None):
         sg = np.random.SeedSequence(seed)
         components = [
@@ -398,7 +403,7 @@ class ReferenceGenerator:
     """The abstract base class for reference generators in gym electric motor environments.
 
     reference_space:
-        Space of reference observations as defined in the OpenAI Gym Toolbox.
+        Space of reference observations as defined in the Farama Gymnasium Toolbox.
 
     The reference generator is called twice per step.
 
@@ -632,15 +637,15 @@ def state_positions(self):
     def action_space(self):
         """
         Returns:
-            gymnasium.Space: An OpenAI Gym Space that describes the possible actions on the system.
+            gymnasium.Space: An Farama Gymnasium Space that describes the possible actions on the system.
         """
         return self._action_space
 
     @property
     def state_space(self):
         """
         Returns:
-             gymnasium.Space: An OpenAI Gym Space that describes the possible states of the system.
+             gymnasium.Space: An Farama Gymnasium Space that describes the possible states of the system.
         """
         return self._state_space
 

diff --git a/setup.py b/setup.py
@@ -16,7 +16,7 @@
 setuptools.setup(
       name='gym_electric_motor',
       version='1.1.0',
-      description='An OpenAI gym environment for electric motor control.',
+      description='An Farama Gymnasium environment for electric motor control.',
       packages=setuptools.find_packages(),
       install_requires=requirements,
       python_requires='>=3.6',

diff --git a/tests/integration_tests/ref_data.npz b/tests/integration_tests/ref_data.npz
diff --git a/tests/integration_tests/test_integration.py b/tests/integration_tests/test_integration.py
@@ -0,0 +1,103 @@
+# Following lines of code are needed to be abled to succesfully execute the import in line 7
+import sys
+import os
+path = os.getcwd()+'/examples/classic_controllers'
+sys.path.append(path)
+from classic_controllers import Controller
+#import pytest
+import gym_electric_motor as gem
+
+from gym_electric_motor.reference_generators import SinusoidalReferenceGenerator
+
+
+import numpy as np
+
+
+def simulate_env(seed = None):
+
+    motor_type = 'PermExDc'
+    control_type = 'SC'
+    action_type = 'Cont'
+    version = 'v0'
+
+    env_id = f'{action_type}-{control_type}-{motor_type}-{version}'
+
+
+    # definition of the reference generator
+
+    ref_generator = SinusoidalReferenceGenerator(amplitude_range= (1,1),
+                                                 frequency_range= (5,5),
+                                                 offset_range = (0,0),
+                                                 episode_lengths = (10001, 10001))
+
+    # initialize the gym-electric-motor environment
+    env = gem.make(env_id,
+                   reference_generator = ref_generator)
+
+    """
+        initialize the controller
+
+        Args:
+            environment                     gym-electric-motor environment
+            external_ref_plots (optional)   plots of the environment, to plot all reference values
+            stages (optional)               structure of the controller
+            automated_gain (optional)       if True (default), the controller will be tuned automatically
+            a (optional)                    tuning parameter of the symmetrical optimum (default: 4)
+    
+    """
+    controller = Controller.make(env)
+
+    (state, reference), _ = env.reset(seed)
+
+    test_states = []
+    test_reference = []
+    test_reward = []
+    test_term = []
+    test_trunc = []
+    test_info = []
+
+    # simulate the environment
+    for i in range(2001):
+        action = controller.control(state, reference)
+
+        (state, reference), reward, terminated, truncated, _ = env.step(action)
+
+        test_states.append(state)
+        test_reference.append(reference)
+        test_reward.append(reward)
+        test_term.append(terminated)
+        test_trunc.append(truncated)
+
+        if terminated:
+            env.reset()
+            controller.reset()
+
+    np.savez('./tests/integration_tests/test_data.npz', 
+             states = test_states, references = test_reference, 
+             rewards = test_reward, 
+             terminations = test_term, 
+             truncations = test_trunc)
+
+    #env.close()
+
+def test_simulate_env():
+    simulate_env(1337)
+    test_data = np.load('./tests/integration_tests/test_data.npz')
+    ref_data = np.load('./tests/integration_tests/ref_data.npz')
+
+    for file in ref_data.files:
+        assert(np.allclose(ref_data[file], test_data[file], equal_nan= True))
+
+    os.remove('./tests/integration_tests/test_data.npz')
+
+    # Anti test
+    simulate_env(1234)
+    test_data = np.load('./tests/integration_tests/test_data.npz')
+
+    # test only states, references and rewards
+    for file in ref_data.files[0:3]:
+        assert((not np.allclose(ref_data[file], test_data[file], equal_nan= True)))
+
+    os.remove('./tests/integration_tests/test_data.npz')
+
+
diff --git a/tests/test_core.py b/tests/test_core.py
@@ -105,7 +105,7 @@ def test_reset(self, env):
         for callback in cbs:
             assert callback.reset_begin == 0
             assert callback.reset_end == 0
-        state, ref = env.reset()
+        (state, ref), _ = env.reset()
         # The corresponding callback functions should've been called
         for callback in cbs:
             assert callback.reset_begin == 1