🚀 [RofuncRL] Update

Skylark0924 · Mar 9, 2024 · 1ca1f6f · 1ca1f6f
1 parent d6ad694
commit 1ca1f6f
Show file tree

Hide file tree

Showing 5 changed files with 1,749 additions and 0 deletions.
diff --git a/examples/learning_rl/IsaacGym_RofuncRL/example_HumanoidPhysHOI_RofuncRL.py b/examples/learning_rl/IsaacGym_RofuncRL/example_HumanoidPhysHOI_RofuncRL.py
@@ -0,0 +1,108 @@
+"""
+HumanoidPhysHOI (RofuncRL)
+===========================
+
+Humanoid backflip/walk/run/dance/hop, trained by RofuncRL
+"""
+
+import isaacgym
+import argparse
+
+from rofunc.config.utils import omegaconf_to_dict, get_config
+from rofunc.learning.RofuncRL.tasks import Tasks
+from rofunc.learning.RofuncRL.trainers import Trainers
+from rofunc.learning.pre_trained_models.download import model_zoo
+from rofunc.learning.utils.utils import set_seed
+
+
+def train(custom_args):
+    # Config task and trainer parameters for Isaac Gym environments
+    task, motion_file = custom_args.task.split('_')
+    args_overrides = ["task={}".format(task),
+                      "train={}{}RofuncRL".format(task, custom_args.agent.upper()),
+                      "device_id={}".format(custom_args.sim_device),
+                      "rl_device=cuda:{}".format(custom_args.rl_device),
+                      "headless={}".format(custom_args.headless),
+                      "num_envs={}".format(custom_args.num_envs)]
+    cfg = get_config('./learning/rl', 'config', args=args_overrides)
+    cfg.task.env.motion_file = f'amp_humanoid_{motion_file}.npy'
+    cfg_dict = omegaconf_to_dict(cfg.task)
+
+    set_seed(cfg.train.Trainer.seed)
+
+    # Instantiate the Isaac Gym environment
+    env = Tasks().task_map[task](cfg=cfg_dict,
+                                 rl_device=cfg.rl_device,
+                                 sim_device=f'cuda:{cfg.device_id}',
+                                 graphics_device_id=cfg.device_id,
+                                 headless=cfg.headless,
+                                 virtual_screen_capture=cfg.capture_video,  # TODO: check
+                                 force_render=cfg.force_render)
+
+    # Instantiate the RL trainer
+    trainer = Trainers().trainer_map[custom_args.agent](cfg=cfg,
+                                                        env=env,
+                                                        device=cfg.rl_device,
+                                                        env_name=custom_args.task)
+    # Start training
+    trainer.train()
+
+
+def inference(custom_args):
+    # Config task and trainer parameters for Isaac Gym environments
+    task, motion_file = custom_args.task.split('_')
+    args_overrides = ["task={}".format(task),
+                      "train={}{}RofuncRL".format(task, custom_args.agent.upper()),
+                      "device_id={}".format(custom_args.sim_device),
+                      "rl_device=cuda:{}".format(custom_args.rl_device),
+                      "headless={}".format(False),
+                      "num_envs={}".format(16)]
+    cfg = get_config('./learning/rl', 'config', args=args_overrides)
+    cfg_dict = omegaconf_to_dict(cfg.task)
+
+    set_seed(cfg.train.Trainer.seed)
+
+    # Instantiate the Isaac Gym environment
+    infer_env = Tasks().task_map[task](cfg=cfg_dict,
+                                       rl_device=cfg.rl_device,
+                                       sim_device=f'cuda:{cfg.device_id}',
+                                       graphics_device_id=cfg.device_id,
+                                       headless=cfg.headless,
+                                       virtual_screen_capture=cfg.capture_video,  # TODO: check
+                                       force_render=cfg.force_render)
+
+    # Instantiate the RL trainer
+    trainer = Trainers().trainer_map[custom_args.agent](cfg=cfg,
+                                                        env=infer_env,
+                                                        device=cfg.rl_device,
+                                                        env_name=custom_args.task,
+                                                        inference=True)
+
+    # load checkpoint
+    if custom_args.ckpt_path is None:
+        custom_args.ckpt_path = model_zoo(name=f"{custom_args.task}.pth")
+    trainer.agent.load_ckpt(custom_args.ckpt_path)
+
+    # Start inference
+    trainer.inference()
+
+
+if __name__ == '__main__':
+    gpu_id = 0
+
+    parser = argparse.ArgumentParser()
+    # Available tasks: HumanoidAMP_backflip, HumanoidAMP_walk, HumanoidAMP_run, HumanoidAMP_dance, HumanoidAMP_hop
+    parser.add_argument("--task", type=str, default="HumanoidAMP_hop")
+    parser.add_argument("--agent", type=str, default="amp")  # Available agent: amp
+    parser.add_argument("--num_envs", type=int, default=4096)
+    parser.add_argument("--sim_device", type=int, default=0)
+    parser.add_argument("--rl_device", type=int, default=gpu_id)
+    parser.add_argument("--headless", type=str, default="True")
+    parser.add_argument("--inference", action="store_true", help="turn to inference mode while adding this argument")
+    parser.add_argument("--ckpt_path", type=str, default=None)
+    custom_args = parser.parse_args()
+
+    if not custom_args.inference:
+        train(custom_args)
+    else:
+        inference(custom_args)
diff --git a/rofunc/config/learning/rl/task/HumanoidPhysHOI.yaml b/rofunc/config/learning/rl/task/HumanoidPhysHOI.yaml
@@ -0,0 +1,68 @@
+# if given, will override the device setting in gym. 
+env: 
+  numEnvs: 2048
+  envSpacing: 5
+  episodeLength: 40
+  isFlagrun: False
+  enableDebugVis: False
+  playdataset: False
+  projtype: "None"
+  saveImages: False
+  initVel: False
+
+  pdControl: True
+  powerScale: 1.0
+  controlFrequencyInv: 2 # 30 Hz
+  stateInit: "Start" #Random
+  hybridInitProb: 0.5
+  dataFPS: 25
+  dataFramesScale: 1.2 # 25->30fps
+  ballSize: 1.
+
+  localRootObs: False
+  keyBodies: ["Head", "L_Knee", "R_Knee", "L_Elbow", "R_Elbow", "L_Ankle", "R_Ankle", "L_Index3", "L_Middle3", "L_Pinky3", "L_Ring3","L_Thumb3","R_Index3", "R_Middle3", "R_Pinky3", "R_Ring3","R_Thumb3"] # #["L_Hip", "L_Knee", "left_foot", "L_Toe", "R_Hip", "R_Knee", "right_foot", "R_Toe", "Torso", "Spine", "Chest", "Neck", "Head", "L_Thorax", "L_Shoulder", "L_Elbow", "left_hand", "R_Thorax", "R_Shoulder", "R_Elbow", "right_hand"] #["right_hand", "left_hand", "right_foot", "left_foot"]
+  contactBodies: ["L_Index3", "L_Middle3", "L_Pinky3", "L_Ring3","L_Thumb3","R_Index3", "R_Middle3", "R_Pinky3", "R_Ring3","R_Thumb3"] #["right_foot", "left_foot"]
+  terminationHeight: 0.15
+  enableEarlyTermination: True
+
+  asset:
+    assetRoot: "physhoi/data/assets"
+    assetFileName: "smplx/smplx_capsule.xml"
+
+  plane:
+    staticFriction: 1.0
+    dynamicFriction: 1.0
+    restitution: 1.6
+
+  rewardWeights:
+    p: 50.
+    r: 50.
+    pv: 0.
+    rv: 0.
+
+    op: 1.
+    or: 0.
+    opv: 0.
+    orv: 0.
+
+    ig: 20.
+
+    cg1: 5.
+    cg2: 5.
+
+sim:
+  substeps: 2
+  physx:
+    num_threads: 4
+    solver_type: 1  # 0: pgs, 1: tgs
+    num_position_iterations: 4
+    num_velocity_iterations: 0
+    contact_offset: 0.02
+    rest_offset: 0.0
+    bounce_threshold_velocity: 0.2
+    max_depenetration_velocity: 10.0
+    default_buffer_size_multiplier: 10.0
+
+  flex:
+    num_inner_iterations: 10
+    warm_start: 0.25
diff --git a/rofunc/config/learning/rl/task/HumanoidPhyshoi60hz.yaml b/rofunc/config/learning/rl/task/HumanoidPhyshoi60hz.yaml
@@ -0,0 +1,68 @@
+# if given, will override the device setting in gym. 
+env: 
+  numEnvs: 2048
+  envSpacing: 5
+  episodeLength: 40
+  isFlagrun: False
+  enableDebugVis: False
+  playdataset: False
+  projtype: "None"
+  saveImages: False
+  initVel: False
+
+  pdControl: True
+  powerScale: 1.0
+  controlFrequencyInv: 1 # 60 Hz
+  stateInit: "Start" #Random
+  hybridInitProb: 0.5
+  dataFPS: 25
+  dataFramesScale: 2.4 # 25->60fps
+  ballSize: 1.
+
+  localRootObs: False
+  keyBodies: ["Head", "L_Knee", "R_Knee", "L_Elbow", "R_Elbow", "L_Ankle", "R_Ankle", "L_Index3", "L_Middle3", "L_Pinky3", "L_Ring3","L_Thumb3","R_Index3", "R_Middle3", "R_Pinky3", "R_Ring3","R_Thumb3"] # #["L_Hip", "L_Knee", "left_foot", "L_Toe", "R_Hip", "R_Knee", "right_foot", "R_Toe", "Torso", "Spine", "Chest", "Neck", "Head", "L_Thorax", "L_Shoulder", "L_Elbow", "left_hand", "R_Thorax", "R_Shoulder", "R_Elbow", "right_hand"] #["right_hand", "left_hand", "right_foot", "left_foot"]
+  contactBodies: ["L_Index3", "L_Middle3", "L_Pinky3", "L_Ring3","L_Thumb3","R_Index3", "R_Middle3", "R_Pinky3", "R_Ring3","R_Thumb3"] #["right_foot", "left_foot"]
+  terminationHeight: 0.15
+  enableEarlyTermination: True
+
+  asset:
+    assetRoot: "physhoi/data/assets"
+    assetFileName: "smplx/smplx_capsule.xml"
+
+  plane:
+    staticFriction: 1.0
+    dynamicFriction: 1.0
+    restitution: 1.6
+
+  rewardWeights:
+    p: 50.
+    r: 50.
+    pv: 0.
+    rv: 0.
+
+    op: 1.
+    or: 0.
+    opv: 0.
+    orv: 0.
+
+    ig: 20.
+
+    cg1: 5.
+    cg2: 1.
+
+sim:
+  substeps: 2
+  physx:
+    num_threads: 4
+    solver_type: 1  # 0: pgs, 1: tgs
+    num_position_iterations: 4
+    num_velocity_iterations: 0
+    contact_offset: 0.02
+    rest_offset: 0.0
+    bounce_threshold_velocity: 0.2
+    max_depenetration_velocity: 10.0
+    default_buffer_size_multiplier: 10.0
+
+  flex:
+    num_inner_iterations: 10
+    warm_start: 0.25
diff --git a/rofunc/learning/RofuncRL/agents/mixline/physhoi_agent.py b/rofunc/learning/RofuncRL/agents/mixline/physhoi_agent.py
@@ -0,0 +1,32 @@
+# Copyright 2023, Junjia LIU, [email protected]
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import gym
+import gymnasium
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from omegaconf import DictConfig
+from typing import Callable, Union, Tuple, Optional
+
+import rofunc as rf
+from rofunc.learning.RofuncRL.agents.base_agent import BaseAgent
+from rofunc.learning.RofuncRL.agents.mixline.amp_agent import AMPAgent
+from rofunc.learning.RofuncRL.models.base_models import BaseMLP
+from rofunc.learning.RofuncRL.utils.memory import Memory
+
+
+class PhysHOIAgent(AMPAgent):
+    def __init__(self):
+        super().__init__()