diff --git a/examples/learning_rl/IsaacGym_RofuncRL/example_DexterousHands_RofuncRL.py b/examples/learning_rl/IsaacGym_RofuncRL/example_DexterousHands_RofuncRL.py
index 8614b6eeb..92b976a5b 100644
--- a/examples/learning_rl/IsaacGym_RofuncRL/example_DexterousHands_RofuncRL.py
+++ b/examples/learning_rl/IsaacGym_RofuncRL/example_DexterousHands_RofuncRL.py
@@ -97,12 +97,13 @@ def inference(custom_args):
     #                  BiShadowHandGraspAndPlace, BiShadowHandLiftUnderarm, BiShadowHandPen, BiShadowHandPointCloud,
     #                  BiShadowHandPushBlock, BiShadowHandReOrientation, BiShadowHandScissors, BiShadowHandSwingCup,
     #                  BiShadowHandSwitch, BiShadowHandTwoCatchUnderarm
-    parser.add_argument("--task", type=str, default="BiShadowHandSwitch")
+    #                  QbSoftHandGrasp
+    parser.add_argument("--task", type=str, default="BiQbSoftHandGraspAndPlace")
     parser.add_argument("--agent", type=str, default="ppo")  # Available agents: ppo, sac, td3, a2c
     parser.add_argument("--num_envs", type=int, default=256)
     parser.add_argument("--sim_device", type=int, default=0)
     parser.add_argument("--rl_device", type=int, default=gpu_id)
-    parser.add_argument("--headless", type=str, default="True")
+    parser.add_argument("--headless", type=str, default="False")
     parser.add_argument("--inference", action="store_true", help="turn to inference mode while adding this argument")
     parser.add_argument("--ckpt_path", type=str, default=None)
     custom_args = parser.parse_args()
diff --git a/rofunc/config/learning/rl/task/BiQbSoftHandGraspAndPlace.yaml b/rofunc/config/learning/rl/task/BiQbSoftHandGraspAndPlace.yaml
new file mode 100644
index 000000000..0b541fb4f
--- /dev/null
+++ b/rofunc/config/learning/rl/task/BiQbSoftHandGraspAndPlace.yaml
@@ -0,0 +1,178 @@
+name: BiShadowHandGraspAndPlace
+
+physics_engine: ${..physics_engine}
+
+# if given, will override the device setting in gym.
+env: 
+  env_name: "bi_qbsofthand_grasp_and_place"
+  numEnvs: ${resolve_default:4096,${...num_envs}}
+  envSpacing: 1.5
+  episodeLength: 500
+  enableDebugVis: False
+  cameraDebug: True
+  pointCloudDebug: True
+  aggregateMode: 1
+
+  stiffnessScale: 1.0
+  forceLimitScale: 1.0
+  useRelativeControl: False
+  dofSpeedScale: 20.0
+  actionsMovingAverage: 1.0
+  controlFrequencyInv: 1 # 60 Hz
+
+  startPositionNoise: 0.0
+  startRotationNoise: 0.0
+
+  resetPositionNoise: 0.0
+  resetRotationNoise: 0.0
+  resetDofPosRandomInterval: 0.0
+  resetDofVelRandomInterval: 0.0
+
+  distRewardScale: 20
+  transition_scale: 0.5
+  orientation_scale: 0.1
+  rotRewardScale: 1.0
+  rotEps: 0.1
+  actionPenaltyScale: -0.0002
+  reachGoalBonus: 250
+  fallDistance: 0.4
+  fallPenalty: 0.0
+
+  objectType: "pot" # can be block, egg or pen
+  observationType: "full_state" # point_cloud or full_state
+  handAgentIndex: "[[0, 1, 2, 3, 4, 5]]"
+  asymmetric_observations: False
+  successTolerance: 0.1
+  printNumSuccesses: False
+  maxConsecutiveSuccesses: 0
+
+  asset:
+    assetFileNameBlock: "urdf/objects/cube_multicolor.urdf"
+    assetFileNameEgg: "mjcf/open_ai_assets/hand/egg.xml"
+    assetFileNamePen: "mjcf/open_ai_assets/hand/pen.xml"
+
+task:
+  randomize: False
+  randomization_params:
+    frequency: 600   # Define how many simulation steps between generating new randomizations
+    observations:
+      range: [0, .002] # range for the white noise
+      range_correlated: [0, .001 ] # range for correlated noise, refreshed with freq `frequency`
+      operation: "additive"
+      distribution: "gaussian"
+      schedule: "linear"   # "constant" is to turn on noise after `schedule_steps` num steps
+      schedule_steps: 40000
+    actions:
+      range: [0., .05]
+      range_correlated: [0, .015] # range for correlated noise, refreshed with freq `frequency`
+      operation: "additive"
+      distribution: "gaussian"
+      schedule: "linear"  # "linear" will linearly interpolate between no rand and max rand
+      schedule_steps: 40000
+    sim_params: 
+      gravity:
+        range: [0, 0.4]
+        operation: "additive"
+        distribution: "gaussian"
+        schedule: "linear"  # "linear" will linearly interpolate between no rand and max rand
+        schedule_steps: 40000
+    actor_params:
+      hand:
+        color: True
+        tendon_properties:
+          damping:
+            range: [0.3, 3.0]
+            operation: "scaling"
+            distribution: "loguniform"
+            schedule: "linear"  # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps`
+            schedule_steps: 30000
+          stiffness:
+            range: [0.75, 1.5]
+            operation: "scaling"
+            distribution: "loguniform"
+            schedule: "linear"  # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps`
+            schedule_steps: 30000
+        dof_properties:
+          damping: 
+            range: [0.3, 3.0]
+            operation: "scaling"
+            distribution: "loguniform"
+            schedule: "linear"  # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps`
+            schedule_steps: 30000
+          stiffness: 
+            range: [0.75, 1.5]
+            operation: "scaling"
+            distribution: "loguniform"
+            schedule: "linear"  # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps`
+            schedule_steps: 30000
+          lower:
+            range: [0, 0.01]
+            operation: "additive"
+            distribution: "gaussian"
+            schedule: "linear"  # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps`
+            schedule_steps: 30000
+          upper:
+            range: [0, 0.01]
+            operation: "additive"
+            distribution: "gaussian"
+            schedule: "linear"  # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps`
+            schedule_steps: 30000
+        rigid_body_properties:
+          mass: 
+            range: [0.5, 1.5]
+            operation: "scaling"
+            distribution: "uniform"
+            schedule: "linear"  # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps`
+            schedule_steps: 30000
+        rigid_shape_properties:
+          friction: 
+            num_buckets: 250
+            range: [0.7, 1.3]
+            operation: "scaling"
+            distribution: "uniform"
+            schedule: "linear"  # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps`
+            schedule_steps: 30000
+      object:
+        scale:
+          range: [0.95, 1.05]
+          operation: "scaling"
+          distribution: "uniform"
+          schedule: "linear"  # "linear" will scale the current random sample by ``min(current num steps, schedule_steps) / schedule_steps`
+          schedule_steps: 30000
+        rigid_body_properties:
+          mass: 
+            range: [0.5, 1.5]
+            operation: "scaling"
+            distribution: "uniform"
+            schedule: "linear"  # "linear" will scale the current random sample by ``min(current num steps, schedule_steps) / schedule_steps`
+            schedule_steps: 30000
+        rigid_shape_properties:
+          friction:
+            num_buckets: 250
+            range: [0.7, 1.3]
+            operation: "scaling"
+            distribution: "uniform"
+            schedule: "linear"  # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps`
+            schedule_steps: 30000
+
+sim:
+  dt: 0.0166 # 1/60 s
+  substeps: 2
+  up_axis: "z"
+  use_gpu_pipeline: ${eq:${...pipeline},"gpu"}
+  gravity: [ 0.0, 0.0, -9.81 ]
+  physx:
+    num_threads: 4
+    solver_type: 1  # 0: pgs, 1: tgs
+    num_position_iterations: 8
+    num_velocity_iterations: 0
+    contact_offset: 0.002
+    rest_offset: 0.0
+    bounce_threshold_velocity: 0.2
+    max_depenetration_velocity: 1000.0
+    default_buffer_size_multiplier: 5.0
+  flex:
+    num_outer_iterations: 5
+    num_inner_iterations: 20
+    warm_start: 0.8
+    relaxation: 0.75
diff --git a/rofunc/config/learning/rl/task/QbSoftHandGrasp.yaml b/rofunc/config/learning/rl/task/QbSoftHandGrasp.yaml
new file mode 100644
index 000000000..a7046816e
--- /dev/null
+++ b/rofunc/config/learning/rl/task/QbSoftHandGrasp.yaml
@@ -0,0 +1,179 @@
+name: QbSoftHandGrasp
+
+physics_engine: ${..physics_engine}
+
+# if given, will override the device setting in gym.
+env: 
+  env_name: "qbsofthand_grasp"
+  numEnvs: ${resolve_default:4096,${...num_envs}}
+  envSpacing: 1.5
+  episodeLength: 500
+  enableDebugVis: False
+  cameraDebug: True
+  pointCloudDebug: True
+  aggregateMode: 1
+
+  stiffnessScale: 1.0
+  forceLimitScale: 1.0
+  useRelativeControl: False
+  dofSpeedScale: 20.0
+  actionsMovingAverage: 1.0
+  controlFrequencyInv: 1 # 60 Hz
+
+  startPositionNoise: 0.0
+  startRotationNoise: 0.0
+
+  resetPositionNoise: 0.0
+  resetRotationNoise: 0.0
+  resetDofPosRandomInterval: 0.0
+  resetDofVelRandomInterval: 0.0
+
+  distRewardScale: 20
+  transition_scale: 0.5
+  orientation_scale: 0.1
+  rotRewardScale: 1.0
+  rotEps: 0.1
+  actionPenaltyScale: -0.0002
+  reachGoalBonus: 250
+  fallDistance: 0.4
+  fallPenalty: 0.0
+
+  objectType: "pot" # can be block, egg or pen
+  observationType: "full_state" # point_cloud or full_state
+  handAgentIndex: "[[0, 1, 2, 3, 4, 5]]"
+  asymmetric_observations: False
+  successTolerance: 0.1
+  printNumSuccesses: False
+  maxConsecutiveSuccesses: 0
+
+  asset:
+#    assetFileName: "mjcf/open_ai_assets/hand/shadow_hand.xml"
+    assetFileNameBlock: "urdf/objects/cube_multicolor.urdf"
+    assetFileNameEgg: "mjcf/open_ai_assets/hand/egg.xml"
+    assetFileNamePen: "mjcf/open_ai_assets/hand/pen.xml"
+
+task:
+  randomize: False
+  randomization_params:
+    frequency: 600   # Define how many simulation steps between generating new randomizations
+    observations:
+      range: [0, .002] # range for the white noise
+      range_correlated: [0, .001 ] # range for correlated noise, refreshed with freq `frequency`
+      operation: "additive"
+      distribution: "gaussian"
+      schedule: "linear"   # "constant" is to turn on noise after `schedule_steps` num steps
+      schedule_steps: 40000
+    actions:
+      range: [0., .05]
+      range_correlated: [0, .015] # range for correlated noise, refreshed with freq `frequency`
+      operation: "additive"
+      distribution: "gaussian"
+      schedule: "linear"  # "linear" will linearly interpolate between no rand and max rand
+      schedule_steps: 40000
+    sim_params: 
+      gravity:
+        range: [0, 0.4]
+        operation: "additive"
+        distribution: "gaussian"
+        schedule: "linear"  # "linear" will linearly interpolate between no rand and max rand
+        schedule_steps: 40000
+    actor_params:
+      hand:
+        color: True
+        tendon_properties:
+          damping:
+            range: [0.3, 3.0]
+            operation: "scaling"
+            distribution: "loguniform"
+            schedule: "linear"  # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps`
+            schedule_steps: 30000
+          stiffness:
+            range: [0.75, 1.5]
+            operation: "scaling"
+            distribution: "loguniform"
+            schedule: "linear"  # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps`
+            schedule_steps: 30000
+        dof_properties:
+          damping: 
+            range: [0.3, 3.0]
+            operation: "scaling"
+            distribution: "loguniform"
+            schedule: "linear"  # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps`
+            schedule_steps: 30000
+          stiffness: 
+            range: [0.75, 1.5]
+            operation: "scaling"
+            distribution: "loguniform"
+            schedule: "linear"  # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps`
+            schedule_steps: 30000
+          lower:
+            range: [0, 0.01]
+            operation: "additive"
+            distribution: "gaussian"
+            schedule: "linear"  # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps`
+            schedule_steps: 30000
+          upper:
+            range: [0, 0.01]
+            operation: "additive"
+            distribution: "gaussian"
+            schedule: "linear"  # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps`
+            schedule_steps: 30000
+        rigid_body_properties:
+          mass: 
+            range: [0.5, 1.5]
+            operation: "scaling"
+            distribution: "uniform"
+            schedule: "linear"  # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps`
+            schedule_steps: 30000
+        rigid_shape_properties:
+          friction: 
+            num_buckets: 250
+            range: [0.7, 1.3]
+            operation: "scaling"
+            distribution: "uniform"
+            schedule: "linear"  # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps`
+            schedule_steps: 30000
+      object:
+        scale:
+          range: [0.95, 1.05]
+          operation: "scaling"
+          distribution: "uniform"
+          schedule: "linear"  # "linear" will scale the current random sample by ``min(current num steps, schedule_steps) / schedule_steps`
+          schedule_steps: 30000
+        rigid_body_properties:
+          mass: 
+            range: [0.5, 1.5]
+            operation: "scaling"
+            distribution: "uniform"
+            schedule: "linear"  # "linear" will scale the current random sample by ``min(current num steps, schedule_steps) / schedule_steps`
+            schedule_steps: 30000
+        rigid_shape_properties:
+          friction:
+            num_buckets: 250
+            range: [0.7, 1.3]
+            operation: "scaling"
+            distribution: "uniform"
+            schedule: "linear"  # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps`
+            schedule_steps: 30000
+
+sim:
+  dt: 0.0166 # 1/60 s
+  substeps: 2
+  up_axis: "z"
+  use_gpu_pipeline: ${eq:${...pipeline},"gpu"}
+  gravity: [ 0.0, 0.0, -9.81 ]
+  physx:
+    num_threads: 4
+    solver_type: 1  # 0: pgs, 1: tgs
+    num_position_iterations: 8
+    num_velocity_iterations: 0
+    contact_offset: 0.002
+    rest_offset: 0.0
+    bounce_threshold_velocity: 0.2
+    max_depenetration_velocity: 1000.0
+    default_buffer_size_multiplier: 5.0
+  flex:
+    num_outer_iterations: 5
+    num_inner_iterations: 20
+    warm_start: 0.8
+    relaxation: 0.75
diff --git a/rofunc/learning/RofuncRL/tasks/__init__.py b/rofunc/learning/RofuncRL/tasks/__init__.py
index 4e7c65657..6265478c2 100644
--- a/rofunc/learning/RofuncRL/tasks/__init__.py
+++ b/rofunc/learning/RofuncRL/tasks/__init__.py
@@ -40,6 +40,8 @@ def __init__(self, env_type="isaacgym"):
             from .isaacgymenv.hands.shadow_hand_swing_cup import ShadowHandSwingCupTask
             from .isaacgymenv.hands.shadow_hand_switch import ShadowHandSwitchTask
             from .isaacgymenv.hands.shadow_hand_two_catch_underarm import ShadowHandTwoCatchUnderarmTask
+            from .isaacgymenv.hands.qbsofthand_grasp import QbSoftHandGraspTask
+            from .isaacgymenv.hands.bi_qbhand_grasp_and_place import BiQbSoftHandGraspAndPlaceTask
 
             self.task_map = {
                 "Ant": AntTask,
@@ -80,6 +82,8 @@ def __init__(self, env_type="isaacgym"):
                 "BiShadowHandSwingCup": ShadowHandSwingCupTask,
                 "BiShadowHandSwitch": ShadowHandSwitchTask,
                 "BiShadowHandTwoCatchUnderarm": ShadowHandTwoCatchUnderarmTask,
+                "QbSoftHandGrasp": QbSoftHandGraspTask,
+                "BiQbSoftHandGraspAndPlace": BiQbSoftHandGraspAndPlaceTask,
             }
         elif env_type == "omniisaacgym":
             # OmniIsaacGym tasks
diff --git a/rofunc/learning/RofuncRL/tasks/isaacgymenv/hands/bi_qbhand_grasp_and_place.py b/rofunc/learning/RofuncRL/tasks/isaacgymenv/hands/bi_qbhand_grasp_and_place.py
new file mode 100644
index 000000000..cb7b2e193
--- /dev/null
+++ b/rofunc/learning/RofuncRL/tasks/isaacgymenv/hands/bi_qbhand_grasp_and_place.py
@@ -0,0 +1,1541 @@
+import os
+import random
+
+from PIL import Image as Im
+from isaacgym import gymapi
+from isaacgym import gymtorch
+
+from rofunc.learning.RofuncRL.tasks.isaacgymenv.base.vec_task import VecTask
+from rofunc.learning.RofuncRL.tasks.utils.torch_jit_utils import *
+from rofunc.utils.oslab import get_rofunc_path
+
+
+class BiQbSoftHandGraspAndPlaceTask(VecTask):
+    """
+    This class corresponds to the GraspAndPlace task. This environment consists of dual-hands, an
+    object and a bucket that requires us to pick up the object and put it into the bucket.
+    """
+
+    def __init__(self, cfg, rl_device, sim_device, graphics_device_id, headless, virtual_screen_capture,
+                 force_render, agent_index=[[[0, 1, 2, 3, 4, 5]], [[0, 1, 2, 3, 4, 5]]], is_multi_agent=False):
+        self.cfg = cfg
+        self.agent_index = agent_index
+
+        self.is_multi_agent = is_multi_agent
+
+        self.randomize = self.cfg["task"]["randomize"]
+        self.randomization_params = self.cfg["task"]["randomization_params"]
+        self.aggregate_mode = self.cfg["env"]["aggregateMode"]
+
+        self.dist_reward_scale = self.cfg["env"]["distRewardScale"]
+        self.rot_reward_scale = self.cfg["env"]["rotRewardScale"]
+        self.action_penalty_scale = self.cfg["env"]["actionPenaltyScale"]
+        self.success_tolerance = self.cfg["env"]["successTolerance"]
+        self.reach_goal_bonus = self.cfg["env"]["reachGoalBonus"]
+        self.fall_dist = self.cfg["env"]["fallDistance"]
+        self.fall_penalty = self.cfg["env"]["fallPenalty"]
+        self.rot_eps = self.cfg["env"]["rotEps"]
+
+        self.vel_obs_scale = 0.2  # scale factor of velocity based observations
+        self.force_torque_obs_scale = 10.0  # scale factor of velocity based observations
+
+        self.reset_position_noise = self.cfg["env"]["resetPositionNoise"]
+        self.reset_rotation_noise = self.cfg["env"]["resetRotationNoise"]
+        self.reset_dof_pos_noise = self.cfg["env"]["resetDofPosRandomInterval"]
+        self.reset_dof_vel_noise = self.cfg["env"]["resetDofVelRandomInterval"]
+
+        self.shadow_hand_dof_speed_scale = self.cfg["env"]["dofSpeedScale"]
+        self.use_relative_control = self.cfg["env"]["useRelativeControl"]
+        self.act_moving_average = self.cfg["env"]["actionsMovingAverage"]
+
+        self.debug_viz = self.cfg["env"]["enableDebugVis"]
+
+        self.max_episode_length = self.cfg["env"]["episodeLength"]
+        self.reset_time = self.cfg["env"].get("resetTime", -1.0)
+        self.print_success_stat = self.cfg["env"]["printNumSuccesses"]
+        self.max_consecutive_successes = self.cfg["env"]["maxConsecutiveSuccesses"]
+        self.av_factor = self.cfg["env"].get("averFactor", 0.01)
+        print("Averaging factor: ", self.av_factor)
+
+        self.transition_scale = self.cfg["env"]["transition_scale"]
+        self.orientation_scale = self.cfg["env"]["orientation_scale"]
+
+        control_freq_inv = self.cfg["env"].get("controlFrequencyInv", 1)
+        if self.reset_time > 0.0:
+            self.max_episode_length = int(round(self.reset_time / (control_freq_inv * self.sim_params.dt)))
+            print("Reset time: ", self.reset_time)
+            print("New episode length: ", self.max_episode_length)
+
+        self.object_type = self.cfg["env"]["objectType"]
+        # assert self.object_type in ["block", "egg", "pen"]
+
+        self.ignore_z = (self.object_type == "pen")
+
+        self.asset_files_dict = {
+            "block": "urdf/objects/cube_multicolor.urdf",
+            "egg": "mjcf/open_ai_assets/hand/egg.xml",
+            "pen": "mjcf/open_ai_assets/hand/pen.xml",
+            # "pot": "mjcf/pot.xml",
+            "pot": "mjcf/bucket/100454/mobility.urdf"
+        }
+
+        if "asset" in self.cfg["env"]:
+            self.asset_files_dict["block"] = self.cfg["env"]["asset"].get("assetFileNameBlock",
+                                                                          self.asset_files_dict["block"])
+            self.asset_files_dict["egg"] = self.cfg["env"]["asset"].get("assetFileNameEgg",
+                                                                        self.asset_files_dict["egg"])
+            self.asset_files_dict["pen"] = self.cfg["env"]["asset"].get("assetFileNamePen",
+                                                                        self.asset_files_dict["pen"])
+
+        # can be "openai", "full_no_vel", "full", "full_state"
+        self.obs_type = self.cfg["env"]["observationType"]
+
+        if not (self.obs_type in ["point_cloud", "full_state"]):
+            raise Exception(
+                "Unknown type of observations!\nobservationType should be one of: [point_cloud, full_state]")
+
+        print("Obs type:", self.obs_type)
+
+        self.num_point_cloud_feature_dim = 768
+        self.num_obs_dict = {
+            "point_cloud": 425 + self.num_point_cloud_feature_dim * 3,
+            "point_cloud_for_distill": 425 + self.num_point_cloud_feature_dim * 3,
+            "full_state": 425
+        }
+        self.num_hand_obs = 72 + 95 + 26 + 6
+        self.up_axis = 'z'
+
+        self.right_fingertips = ["right_qbhand_thumb_distal_link", "right_qbhand_index_distal_link",
+                                 "right_qbhand_middle_distal_link", "right_qbhand_ring_distal_link",
+                                 "right_qbhand_little_distal_link"]
+        self.left_fingertips = ["left_qbhand_thumb_distal_link", "left_qbhand_index_distal_link",
+                                "left_qbhand_middle_distal_link", "left_qbhand_ring_distal_link",
+                                "left_qbhand_little_distal_link"]
+
+        self.num_fingertips = len(self.right_fingertips) * 2
+
+        self.use_vel_obs = False
+        self.fingertip_obs = True
+        self.asymmetric_obs = self.cfg["env"]["asymmetric_observations"]
+
+        num_states = 0
+        if self.asymmetric_obs:
+            num_states = 211
+
+        self.cfg["env"]["numObservations"] = self.num_obs_dict[self.obs_type]
+        self.cfg["env"]["numStates"] = num_states
+        if self.is_multi_agent:
+            self.num_agents = 2
+            self.cfg["env"]["numActions"] = 15 + 6
+
+        else:
+            self.num_agents = 1
+            self.cfg["env"]["numActions"] = (15 + 6) * 2
+
+        super().__init__(cfg, rl_device, sim_device, graphics_device_id, headless, virtual_screen_capture, force_render)
+
+        if self.obs_type in ["point_cloud"]:
+            from PIL import Image as Im
+            # from pointnet2_ops import pointnet2_utils
+
+        self.camera_debug = self.cfg["env"].get("cameraDebug", False)
+        self.point_cloud_debug = self.cfg["env"].get("pointCloudDebug", False)
+
+        if self.viewer != None:
+            cam_pos = gymapi.Vec3(10.0, 5.0, 1.0)
+            cam_target = gymapi.Vec3(6.0, 5.0, 0.0)
+            self.gym.viewer_camera_look_at(self.viewer, None, cam_pos, cam_target)
+
+        # get gym GPU state tensors
+        actor_root_state_tensor = self.gym.acquire_actor_root_state_tensor(self.sim)
+        dof_state_tensor = self.gym.acquire_dof_state_tensor(self.sim)
+        rigid_body_tensor = self.gym.acquire_rigid_body_state_tensor(self.sim)
+
+        sensor_tensor = self.gym.acquire_force_sensor_tensor(self.sim)
+        self.vec_sensor_tensor = gymtorch.wrap_tensor(sensor_tensor).view(self.num_envs, self.num_fingertips * 6)
+
+        dof_force_tensor = self.gym.acquire_dof_force_tensor(self.sim)
+        self.dof_force_tensor = gymtorch.wrap_tensor(dof_force_tensor).view(self.num_envs,
+                                                                            self.num_hand_dofs * 2 + self.num_object_dofs * 2)
+        self.dof_force_tensor = self.dof_force_tensor[:, :48]
+
+        self.gym.refresh_actor_root_state_tensor(self.sim)
+        self.gym.refresh_dof_state_tensor(self.sim)
+        self.gym.refresh_rigid_body_state_tensor(self.sim)
+
+        # create some wrapper tensors for different slices
+        self.shadow_hand_default_dof_pos = torch.zeros(self.num_hand_dofs, dtype=torch.float, device=self.device)
+        # self.shadow_hand_default_dof_pos = to_torch([0.0, 0.0, -0,  -0,  -0,  -0, -0, -0,
+        #                                     -0,  -0, -0,  -0,  -0,  -0, -0, -0,
+        #                                     -0,  -0, -0,  -1.04,  1.2,  0., 0, -1.57], dtype=torch.float, device=self.device)
+
+        self.dof_state = gymtorch.wrap_tensor(dof_state_tensor)
+        self.right_hand_dof_state = self.dof_state.view(self.num_envs, -1, 2)[:, :self.num_hand_dofs]
+        self.right_hand_dof_pos = self.right_hand_dof_state[..., 0]
+        self.right_hand_dof_vel = self.right_hand_dof_state[..., 1]
+
+        self.left_hand_dof_state = self.dof_state.view(self.num_envs, -1, 2)[:,
+                                   self.num_hand_dofs:self.num_hand_dofs * 2]
+        self.left_hand_dof_pos = self.left_hand_dof_state[..., 0]
+        self.left_hand_dof_vel = self.left_hand_dof_state[..., 1]
+
+        self.object_dof_state = self.dof_state.view(self.num_envs, -1, 2)[:,
+                                self.num_hand_dofs * 2:self.num_hand_dofs * 2 + self.num_object_dofs]
+        self.object_dof_pos = self.object_dof_state[..., 0]
+
+        self.rigid_body_states = gymtorch.wrap_tensor(rigid_body_tensor).view(self.num_envs, -1, 13)
+        self.num_bodies = self.rigid_body_states.shape[1]
+
+        self.root_state_tensor = gymtorch.wrap_tensor(actor_root_state_tensor).view(-1, 13)
+        self.hand_positions = self.root_state_tensor[:, 0:3]
+        self.hand_orientations = self.root_state_tensor[:, 3:7]
+        self.hand_linvels = self.root_state_tensor[:, 7:10]
+        self.hand_angvels = self.root_state_tensor[:, 10:13]
+        self.saved_root_tensor = self.root_state_tensor.clone()
+
+        self.num_dofs = self.gym.get_sim_dof_count(self.sim) // self.num_envs
+        self.prev_targets = torch.zeros((self.num_envs, self.num_dofs), dtype=torch.float, device=self.device)
+        self.cur_targets = torch.zeros((self.num_envs, self.num_dofs), dtype=torch.float, device=self.device)
+
+        self.global_indices = torch.arange(self.num_envs * 3, dtype=torch.int32, device=self.device).view(self.num_envs,
+                                                                                                          -1)
+        self.x_unit_tensor = to_torch([1, 0, 0], dtype=torch.float, device=self.device).repeat((self.num_envs, 1))
+        self.y_unit_tensor = to_torch([0, 1, 0], dtype=torch.float, device=self.device).repeat((self.num_envs, 1))
+        self.z_unit_tensor = to_torch([0, 0, 1], dtype=torch.float, device=self.device).repeat((self.num_envs, 1))
+
+        self.reset_goal_buf = self.reset_buf.clone()
+        self.successes = torch.zeros(self.num_envs, dtype=torch.float, device=self.device)
+        self.consecutive_successes = torch.zeros(1, dtype=torch.float, device=self.device)
+
+        self.av_factor = to_torch(self.av_factor, dtype=torch.float, device=self.device)
+        self.apply_forces = torch.zeros((self.num_envs, self.num_bodies, 3), device=self.device, dtype=torch.float)
+        self.apply_torque = torch.zeros((self.num_envs, self.num_bodies, 3), device=self.device, dtype=torch.float)
+
+        self.total_successes = 0
+        self.total_resets = 0
+
+    def create_sim(self):
+        """
+        Allocates which device will simulate and which device will render the scene. Defines the simulation type to be used
+        """
+        self.dt = self.sim_params.dt
+        self.up_axis_idx = self.set_sim_params_up_axis(self.sim_params, self.up_axis)
+
+        self.sim = super().create_sim(self.device_id, self.graphics_device_id, self.physics_engine, self.sim_params)
+        self._create_ground_plane()
+        self._create_envs(self.num_envs, self.cfg["env"]['envSpacing'], int(np.sqrt(self.num_envs)))
+
+    def _create_ground_plane(self):
+        """
+        Adds ground plane to simulation
+        """
+        plane_params = gymapi.PlaneParams()
+        plane_params.normal = gymapi.Vec3(0.0, 0.0, 1.0)
+        self.gym.add_ground(self.sim, plane_params)
+
+    def _create_envs(self, num_envs, spacing, num_per_row):
+        """
+        Create multiple parallel isaacgym environments
+
+        :param num_envs: The total number of environment
+        :param spacing: Specifies half the side length of the square area occupied by each environment
+        :param num_per_row: Specify how many environments in a row
+        :return:
+        """
+        lower = gymapi.Vec3(-spacing, -spacing, 0.0)
+        upper = gymapi.Vec3(spacing, spacing, spacing)
+
+        # get rofunc path from rofunc package metadata
+        rofunc_path = get_rofunc_path()
+        asset_root = os.path.join(rofunc_path, "simulator/assets")
+        right_hand_asset_file = "mjcf/qbhand_no_virtual_right.xml"
+        left_hand_asset_file = "mjcf/qbhand_no_virtual_left.xml"
+        table_texture_files = os.path.join(asset_root, "textures/texture_stone_stone_texture_0.jpg")
+        table_texture_handle = self.gym.create_texture_from_file(self.sim, table_texture_files)
+
+        if "asset" in self.cfg["env"]:
+            asset_root = self.cfg["env"]["asset"].get("assetRoot", asset_root)
+
+        object_asset_file = self.asset_files_dict[self.object_type]
+
+        # <editor-fold desc="load hand asset and set hand dof properties">
+        asset_options = gymapi.AssetOptions()
+        asset_options.flip_visual_attachments = False
+        asset_options.fix_base_link = False
+        asset_options.collapse_fixed_joints = True
+        asset_options.disable_gravity = True
+        asset_options.thickness = 0.001
+        asset_options.angular_damping = 100
+        asset_options.linear_damping = 100
+        if self.physics_engine == gymapi.SIM_PHYSX:
+            asset_options.use_physx_armature = True
+        asset_options.default_dof_drive_mode = gymapi.DOF_MODE_NONE
+        right_hand_asset = self.gym.load_asset(self.sim, asset_root, right_hand_asset_file, asset_options)
+        left_hand_asset = self.gym.load_asset(self.sim, asset_root, left_hand_asset_file, asset_options)
+
+        # print asset info
+        self.num_shadow_hand_bodies = self.gym.get_asset_rigid_body_count(right_hand_asset)
+        self.num_shadow_hand_shapes = self.gym.get_asset_rigid_shape_count(right_hand_asset)
+        self.num_hand_dofs = self.gym.get_asset_dof_count(right_hand_asset)
+        self.num_shadow_hand_actuators = self.gym.get_asset_actuator_count(right_hand_asset)
+        self.num_shadow_hand_tendons = self.gym.get_asset_tendon_count(right_hand_asset)
+        print("self.num_shadow_hand_bodies: ", self.num_shadow_hand_bodies)
+        print("self.num_shadow_hand_shapes: ", self.num_shadow_hand_shapes)
+        print("self.num_hand_dofs: ", self.num_hand_dofs)
+        print("self.num_shadow_hand_actuators: ", self.num_shadow_hand_actuators)
+        print("self.num_shadow_hand_tendons: ", self.num_shadow_hand_tendons)
+
+        actuated_dof_names = [self.gym.get_asset_actuator_joint_name(right_hand_asset, i) for i in
+                              range(self.num_shadow_hand_actuators)]
+        self.actuated_dof_indices = [self.gym.find_asset_dof_index(right_hand_asset, name) for name in
+                                     actuated_dof_names]
+
+        # set shadow_hand dof properties
+        right_hand_dof_props = self.gym.get_asset_dof_properties(right_hand_asset)
+        left_hand_dof_props = self.gym.get_asset_dof_properties(left_hand_asset)
+
+        self.shadow_hand_dof_lower_limits = []
+        self.shadow_hand_dof_upper_limits = []
+        self.shadow_hand_dof_default_pos = []
+        self.hand_dof_default_vel = []
+        self.sensors = []
+
+        for i in range(self.num_hand_dofs):
+            self.shadow_hand_dof_lower_limits.append(right_hand_dof_props['lower'][i])
+            self.shadow_hand_dof_upper_limits.append(right_hand_dof_props['upper'][i])
+            self.shadow_hand_dof_default_pos.append(0.0)
+            self.hand_dof_default_vel.append(0.0)
+
+        self.actuated_dof_indices = to_torch(self.actuated_dof_indices, dtype=torch.long, device=self.device)
+        self.shadow_hand_dof_lower_limits = to_torch(self.shadow_hand_dof_lower_limits, device=self.device)
+        self.shadow_hand_dof_upper_limits = to_torch(self.shadow_hand_dof_upper_limits, device=self.device)
+        self.shadow_hand_dof_default_pos = to_torch(self.shadow_hand_dof_default_pos, device=self.device)
+        self.hand_dof_default_vel = to_torch(self.hand_dof_default_vel, device=self.device)
+        # </editor-fold>
+
+        # <editor-fold desc="load manipulated object and goal assets">
+        object_asset_options = gymapi.AssetOptions()
+        object_asset_options.density = 500
+        object_asset_options.fix_base_link = False
+        # object_asset_options.collapse_fixed_joints = True
+        # object_asset_options.disable_gravity = True
+        object_asset_options.use_mesh_materials = True
+        object_asset_options.mesh_normal_mode = gymapi.COMPUTE_PER_VERTEX
+        object_asset_options.override_com = True
+        object_asset_options.override_inertia = True
+        object_asset_options.vhacd_enabled = True
+        object_asset_options.vhacd_params = gymapi.VhacdParams()
+        object_asset_options.vhacd_params.resolution = 100000
+        object_asset_options.default_dof_drive_mode = gymapi.DOF_MODE_NONE
+
+        object_asset = self.gym.load_asset(self.sim, asset_root, object_asset_file, object_asset_options)
+        block_asset_file = "urdf/objects/cube_multicolor1.urdf"
+        block_asset = self.gym.load_asset(self.sim, asset_root, block_asset_file, object_asset_options)
+
+        object_asset_options.disable_gravity = True
+        goal_asset = self.gym.load_asset(self.sim, asset_root, object_asset_file, object_asset_options)
+
+        self.num_object_bodies = self.gym.get_asset_rigid_body_count(object_asset)
+        self.num_object_shapes = self.gym.get_asset_rigid_shape_count(object_asset)
+
+        # set object dof properties
+        self.num_object_dofs = self.gym.get_asset_dof_count(object_asset)
+        object_dof_props = self.gym.get_asset_dof_properties(object_asset)
+
+        self.object_dof_lower_limits = []
+        self.object_dof_upper_limits = []
+
+        for i in range(self.num_object_dofs):
+            self.object_dof_lower_limits.append(object_dof_props['lower'][i])
+            self.object_dof_upper_limits.append(object_dof_props['upper'][i])
+
+        self.object_dof_lower_limits = to_torch(self.object_dof_lower_limits, device=self.device)
+        self.object_dof_upper_limits = to_torch(self.object_dof_upper_limits, device=self.device)
+        # </editor-fold>
+
+        # <editor-fold desc="create table asset">
+        table_dims = gymapi.Vec3(1.0, 1.0, 0.6)
+        asset_options = gymapi.AssetOptions()
+        asset_options.fix_base_link = True
+        asset_options.flip_visual_attachments = True
+        asset_options.collapse_fixed_joints = True
+        asset_options.disable_gravity = True
+        asset_options.thickness = 0.001
+
+        table_asset = self.gym.create_box(self.sim, table_dims.x, table_dims.y, table_dims.z, gymapi.AssetOptions())
+        # </editor-fold>
+
+        # <editor-fold desc="set initial poses">
+        right_hand_start_pose = gymapi.Transform()
+        right_hand_start_pose.p = gymapi.Vec3(0.55, 0.2, 0.8)
+        right_hand_start_pose.r = gymapi.Quat().from_euler_zyx(3.14159, 0, 1.57)
+
+        left_hand_start_pose = gymapi.Transform()
+        left_hand_start_pose.p = gymapi.Vec3(0.55, -0.2, 0.8)
+        left_hand_start_pose.r = gymapi.Quat().from_euler_zyx(3.14159, 0, 1.57)
+
+        object_start_pose = gymapi.Transform()
+        object_start_pose.p = gymapi.Vec3(0.0, 0.2, 0.6)
+        object_start_pose.r = gymapi.Quat().from_euler_zyx(0, 0, 0)
+        pose_dx, pose_dy, pose_dz = -1.0, 0.0, -0.0
+
+        block_start_pose = gymapi.Transform()
+        block_start_pose.p = gymapi.Vec3(0.0, -0.2, 0.6)
+        block_start_pose.r = gymapi.Quat().from_euler_zyx(1.57, 1.57, 0)
+        # object_start_pose.p.x = right_hand_start_pose.p.x + pose_dx
+        # object_start_pose.p.y = right_hand_start_pose.p.y + pose_dy
+        # object_start_pose.p.z = right_hand_start_pose.p.z + pose_dz
+
+        if self.object_type == "pen":
+            object_start_pose.p.z = right_hand_start_pose.p.z + 0.02
+
+        self.goal_displacement = gymapi.Vec3(-0., 0.0, 10)
+        self.goal_displacement_tensor = to_torch(
+            [self.goal_displacement.x, self.goal_displacement.y, self.goal_displacement.z], device=self.device)
+        goal_start_pose = gymapi.Transform()
+        goal_start_pose.p = object_start_pose.p + self.goal_displacement
+
+        goal_start_pose.p.z -= 0.0
+
+        table_pose = gymapi.Transform()
+        table_pose.p = gymapi.Vec3(0.0, 0.0, 0.5 * table_dims.z)
+        table_pose.r = gymapi.Quat().from_euler_zyx(-0., 0, 0)
+        # </editor-fold>
+
+        # compute aggregate size
+        max_agg_bodies = self.num_shadow_hand_bodies * 2 + 3 * self.num_object_bodies + 1
+        max_agg_shapes = self.num_shadow_hand_shapes * 2 + 3 * self.num_object_shapes + 1
+
+        self.hands = []
+        self.envs = []
+
+        self.object_init_state = []
+        self.hand_start_states = []
+
+        self.right_hand_indices = []
+        self.left_hand_indices = []
+        self.fingertip_indices = []
+        self.object_indices = []
+        self.goal_object_indices = []
+        self.table_indices = []
+        self.block_indices = []
+
+        self.fingertip_handles = [self.gym.find_asset_rigid_body_index(right_hand_asset, name) for name in
+                                  self.right_fingertips]
+        self.fingertip_left_handles = [self.gym.find_asset_rigid_body_index(left_hand_asset, name) for name
+                                       in self.left_fingertips]
+
+        # create fingertip force sensors, if needed
+        sensor_pose = gymapi.Transform()
+        for ft_handle in self.fingertip_handles:
+            self.gym.create_asset_force_sensor(right_hand_asset, ft_handle, sensor_pose)
+        for ft_a_handle in self.fingertip_left_handles:
+            self.gym.create_asset_force_sensor(left_hand_asset, ft_a_handle, sensor_pose)
+
+        if self.obs_type in ["point_cloud"]:
+            self.cameras = []
+            self.camera_tensors = []
+            self.camera_view_matrixs = []
+            self.camera_proj_matrixs = []
+
+            self.camera_props = gymapi.CameraProperties()
+            self.camera_props.width = 256
+            self.camera_props.height = 256
+            self.camera_props.enable_tensors = True
+
+            self.env_origin = torch.zeros((self.num_envs, 3), device=self.device, dtype=torch.float)
+            self.pointCloudDownsampleNum = 768
+            self.camera_u = torch.arange(0, self.camera_props.width, device=self.device)
+            self.camera_v = torch.arange(0, self.camera_props.height, device=self.device)
+
+            self.camera_v2, self.camera_u2 = torch.meshgrid(self.camera_v, self.camera_u, indexing='ij')
+
+            if self.point_cloud_debug:
+                import open3d as o3d
+                from bidexhands.utils.o3dviewer import PointcloudVisualizer
+                self.pointCloudVisualizer = PointcloudVisualizer()
+                self.pointCloudVisualizerInitialized = False
+                self.o3d_pc = o3d.geometry.PointCloud()
+            else:
+                self.pointCloudVisualizer = None
+
+        for i in range(self.num_envs):
+            # create env instance
+            env_ptr = self.gym.create_env(
+                self.sim, lower, upper, num_per_row
+            )
+
+            if self.aggregate_mode >= 1:
+                self.gym.begin_aggregate(env_ptr, max_agg_bodies, max_agg_shapes, True)
+
+            # add hand - collision filter = -1 to use asset collision filters set in mjcf loader
+            right_hand_actor = self.gym.create_actor(env_ptr, right_hand_asset, right_hand_start_pose, "right_hand", i,
+                                                     -1, 0)
+            left_hand_actor = self.gym.create_actor(env_ptr, left_hand_asset,
+                                                    left_hand_start_pose, "left_hand", i, -1, 0)
+
+            self.hand_start_states.append(
+                [right_hand_start_pose.p.x, right_hand_start_pose.p.y, right_hand_start_pose.p.z,
+                 right_hand_start_pose.r.x, right_hand_start_pose.r.y, right_hand_start_pose.r.z,
+                 right_hand_start_pose.r.w,
+                 0, 0, 0, 0, 0, 0])
+
+            self.gym.set_actor_dof_properties(env_ptr, right_hand_actor, right_hand_dof_props)
+            right_hand_idx = self.gym.get_actor_index(env_ptr, right_hand_actor, gymapi.DOMAIN_SIM)
+            self.right_hand_indices.append(right_hand_idx)
+            self.gym.set_actor_dof_properties(env_ptr, left_hand_actor, left_hand_dof_props)
+            left_right_hand_idx = self.gym.get_actor_index(env_ptr, left_hand_actor, gymapi.DOMAIN_SIM)
+            self.left_hand_indices.append(left_right_hand_idx)
+
+            # <editor-fold desc="randomize colors and textures for rigid body">
+            num_bodies = self.gym.get_actor_rigid_body_count(env_ptr, right_hand_actor)
+            hand_rigid_body_index = [[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11], [12, 13, 14, 15], [16, 17, 18, 19, 20],
+                                     [21, 22, 23, 24, 25]]
+
+            for n in self.agent_index[0]:
+                colorx = random.uniform(0, 1)
+                colory = random.uniform(0, 1)
+                colorz = random.uniform(0, 1)
+                for m in n:
+                    for o in hand_rigid_body_index[m]:
+                        self.gym.set_rigid_body_color(env_ptr, right_hand_actor, o, gymapi.MESH_VISUAL,
+                                                      gymapi.Vec3(colorx, colory, colorz))
+            for n in self.agent_index[1]:
+                colorx = random.uniform(0, 1)
+                colory = random.uniform(0, 1)
+                colorz = random.uniform(0, 1)
+                for m in n:
+                    for o in hand_rigid_body_index[m]:
+                        self.gym.set_rigid_body_color(env_ptr, left_hand_actor, o, gymapi.MESH_VISUAL,
+                                                      gymapi.Vec3(colorx, colory, colorz))
+                # gym.set_rigid_body_texture(env, actor_handles[-1], n, gymapi.MESH_VISUAL,
+                #                            loaded_texture_handle_list[random.randint(0, len(loaded_texture_handle_list)-1)])
+            # </editor-fold>
+
+            # create fingertip force-torque sensors
+            self.gym.enable_actor_dof_force_sensors(env_ptr, right_hand_actor)
+            self.gym.enable_actor_dof_force_sensors(env_ptr, left_hand_actor)
+
+            # add object
+            object_handle = self.gym.create_actor(env_ptr, object_asset, object_start_pose, "object", i, 0, 0)
+            self.object_init_state.append([object_start_pose.p.x, object_start_pose.p.y, object_start_pose.p.z,
+                                           object_start_pose.r.x, object_start_pose.r.y, object_start_pose.r.z,
+                                           object_start_pose.r.w,
+                                           0, 0, 0, 0, 0, 0])
+            object_idx = self.gym.get_actor_index(env_ptr, object_handle, gymapi.DOMAIN_SIM)
+            self.object_indices.append(object_idx)
+            # self.gym.set_actor_scale(env_ptr, object_handle, 0.3)
+
+            block_handle = self.gym.create_actor(env_ptr, block_asset, block_start_pose, "block", i, 0, 0)
+            block_idx = self.gym.get_actor_index(env_ptr, block_handle, gymapi.DOMAIN_SIM)
+            self.block_indices.append(block_idx)
+
+            # add goal object
+            goal_handle = self.gym.create_actor(env_ptr, goal_asset, goal_start_pose, "goal_object", i + self.num_envs,
+                                                0, 0)
+            goal_object_idx = self.gym.get_actor_index(env_ptr, goal_handle, gymapi.DOMAIN_SIM)
+            self.goal_object_indices.append(goal_object_idx)
+            # self.gym.set_actor_scale(env_ptr, goal_handle, 0.3)
+
+            # add table
+            table_handle = self.gym.create_actor(env_ptr, table_asset, table_pose, "table", i, -1, 0)
+            self.gym.set_rigid_body_texture(env_ptr, table_handle, 0, gymapi.MESH_VISUAL, table_texture_handle)
+            table_idx = self.gym.get_actor_index(env_ptr, table_handle, gymapi.DOMAIN_SIM)
+            self.table_indices.append(table_idx)
+
+            if self.object_type != "block":
+                self.gym.set_rigid_body_color(
+                    env_ptr, object_handle, 0, gymapi.MESH_VISUAL, gymapi.Vec3(0.6, 0.72, 0.98))
+                self.gym.set_rigid_body_color(
+                    env_ptr, goal_handle, 0, gymapi.MESH_VISUAL, gymapi.Vec3(0.6, 0.72, 0.98))
+
+            if self.obs_type in ["point_cloud"]:
+                camera_handle = self.gym.create_camera_sensor(env_ptr, self.camera_props)
+                self.gym.set_camera_location(camera_handle, env_ptr, gymapi.Vec3(0.25, -0., 1.0),
+                                             gymapi.Vec3(-0.24, -0., 0))
+                camera_tensor = self.gym.get_camera_image_gpu_tensor(self.sim, env_ptr, camera_handle,
+                                                                     gymapi.IMAGE_DEPTH)
+                torch_cam_tensor = gymtorch.wrap_tensor(camera_tensor)
+                cam_vinv = torch.inverse(
+                    (torch.tensor(self.gym.get_camera_view_matrix(self.sim, env_ptr, camera_handle)))).to(self.device)
+                cam_proj = torch.tensor(self.gym.get_camera_proj_matrix(self.sim, env_ptr, camera_handle),
+                                        device=self.device)
+
+                origin = self.gym.get_env_origin(env_ptr)
+                self.env_origin[i][0] = origin.x
+                self.env_origin[i][1] = origin.y
+                self.env_origin[i][2] = origin.z
+                self.camera_tensors.append(torch_cam_tensor)
+                self.camera_view_matrixs.append(cam_vinv)
+                self.camera_proj_matrixs.append(cam_proj)
+                self.cameras.append(camera_handle)
+
+            if self.aggregate_mode > 0:
+                self.gym.end_aggregate(env_ptr)
+
+            self.envs.append(env_ptr)
+            self.hands.append(right_hand_actor)
+
+        self.object_init_state = to_torch(self.object_init_state, device=self.device, dtype=torch.float).view(
+            self.num_envs, 13)
+        self.goal_states = self.object_init_state.clone()
+        # self.goal_pose = self.goal_states[:, 0:7]
+        # self.goal_pos = self.goal_states[:, 0:3]
+        # self.goal_rot = self.goal_states[:, 3:7]
+        # self.goal_states[:, self.up_axis_idx] -= 0.04
+        self.goal_init_state = self.goal_states.clone()
+        self.hand_start_states = to_torch(self.hand_start_states, device=self.device).view(self.num_envs, 13)
+
+        self.fingertip_handles = to_torch(self.fingertip_handles, dtype=torch.long, device=self.device)
+        self.fingertip_left_handles = to_torch(self.fingertip_left_handles, dtype=torch.long, device=self.device)
+
+        self.right_hand_indices = to_torch(self.right_hand_indices, dtype=torch.long, device=self.device)
+        self.left_hand_indices = to_torch(self.left_hand_indices, dtype=torch.long, device=self.device)
+
+        self.object_indices = to_torch(self.object_indices, dtype=torch.long, device=self.device)
+        self.goal_object_indices = to_torch(self.goal_object_indices, dtype=torch.long, device=self.device)
+        self.table_indices = to_torch(self.table_indices, dtype=torch.long, device=self.device)
+        self.block_indices = to_torch(self.block_indices, dtype=torch.long, device=self.device)
+
+    def compute_reward(self, actions):
+        """
+        Compute the reward of all environment. The core function is compute_hand_reward(
+            self.rew_buf, self.reset_buf, self.reset_goal_buf, self.progress_buf, self.successes, self.consecutive_successes,
+            self.max_episode_length, self.object_pos, self.object_rot, self.goal_pos, self.goal_rot, self.block_right_handle_pos, self.block_left_handle_pos, 
+            self.left_hand_pos, self.right_hand_pos, self.right_hand_ff_pos, self.right_hand_mf_pos, self.right_hand_rf_pos, self.right_hand_lf_pos, self.right_hand_th_pos, 
+            self.left_hand_ff_pos, self.left_hand_mf_pos, self.left_hand_rf_pos, self.left_hand_lf_pos, self.left_hand_th_pos, 
+            self.dist_reward_scale, self.rot_reward_scale, self.rot_eps, self.actions, self.action_penalty_scale,
+            self.success_tolerance, self.reach_goal_bonus, self.fall_dist, self.fall_penalty,
+            self.max_consecutive_successes, self.av_factor, (self.object_type == "pen")
+        )
+        , which we will introduce in detail there
+
+        Args:
+            actions (tensor): Actions of agents in the all environment 
+        """
+        self.rew_buf[:], self.reset_buf[:], self.reset_goal_buf[:], self.progress_buf[:], self.successes[
+                                                                                          :], self.consecutive_successes[
+                                                                                              :] = compute_hand_reward(
+            self.rew_buf, self.reset_buf, self.reset_goal_buf, self.progress_buf, self.successes,
+            self.consecutive_successes,
+            self.max_episode_length, self.object_pos, self.object_rot, self.goal_pos, self.goal_rot,
+            self.block_right_handle_pos, self.block_left_handle_pos,
+            self.left_hand_pos, self.right_hand_pos, self.right_hand_ff_pos, self.right_hand_mf_pos,
+            self.right_hand_rf_pos, self.right_hand_lf_pos, self.right_hand_th_pos,
+            self.left_hand_ff_pos, self.left_hand_mf_pos, self.left_hand_rf_pos, self.left_hand_lf_pos,
+            self.left_hand_th_pos,
+            self.dist_reward_scale, self.rot_reward_scale, self.rot_eps, self.actions, self.action_penalty_scale,
+            self.success_tolerance, self.reach_goal_bonus, self.fall_dist, self.fall_penalty,
+            self.max_consecutive_successes, self.av_factor, (self.object_type == "pen")
+        )
+
+        self.extras['successes'] = self.successes
+        self.extras['consecutive_successes'] = self.consecutive_successes
+
+        if self.print_success_stat:
+            self.total_resets = self.total_resets + self.reset_buf.sum()
+            direct_average_successes = self.total_successes + self.successes.sum()
+            self.total_successes = self.total_successes + (self.successes * self.reset_buf).sum()
+
+            # The direct average shows the overall result more quickly, but slightly undershoots long term
+            # policy performance.
+            print("Direct average consecutive successes = {:.1f}".format(
+                direct_average_successes / (self.total_resets + self.num_envs)))
+            if self.total_resets > 0:
+                print("Post-Reset average consecutive successes = {:.1f}".format(
+                    self.total_successes / self.total_resets))
+
+    def compute_observations(self):
+        """
+        Compute the observations of all environment. The core function is self.compute_full_state(True), 
+        which we will introduce in detail there
+
+        """
+        self.gym.refresh_dof_state_tensor(self.sim)
+        self.gym.refresh_actor_root_state_tensor(self.sim)
+        self.gym.refresh_rigid_body_state_tensor(self.sim)
+        self.gym.refresh_force_sensor_tensor(self.sim)
+        self.gym.refresh_dof_force_tensor(self.sim)
+
+        if self.obs_type in ["point_cloud"]:
+            self.gym.render_all_camera_sensors(self.sim)
+            self.gym.start_access_image_tensors(self.sim)
+
+        self.object_pose = self.root_state_tensor[self.object_indices, 0:7]
+        self.object_pos = self.root_state_tensor[self.object_indices, 0:3]
+        self.object_rot = self.root_state_tensor[self.object_indices, 3:7]
+        self.object_linvel = self.root_state_tensor[self.object_indices, 7:10]
+        self.object_angvel = self.root_state_tensor[self.object_indices, 10:13]
+
+        self.block_right_handle_pos = self.rigid_body_states[:, 16 * 2 + 1, 0:3]  # (256, ?, 13)
+        self.block_right_handle_rot = self.rigid_body_states[:, 16 * 2 + 1, 3:7]
+        self.block_right_handle_pos = self.block_right_handle_pos + quat_apply(self.block_right_handle_rot,
+                                                                               to_torch([0, 1, 0],
+                                                                                        device=self.device).repeat(
+                                                                                   self.num_envs, 1) * 0.)
+        self.block_right_handle_pos = self.block_right_handle_pos + quat_apply(self.block_right_handle_rot,
+                                                                               to_torch([1, 0, 0],
+                                                                                        device=self.device).repeat(
+                                                                                   self.num_envs, 1) * 0.0)
+        self.block_right_handle_pos = self.block_right_handle_pos + quat_apply(self.block_right_handle_rot,
+                                                                               to_torch([0, 0, 1],
+                                                                                        device=self.device).repeat(
+                                                                                   self.num_envs, 1) * 0.0)
+
+        self.block_left_handle_pos = self.rigid_body_states[:, 16 * 2 + 2, 0:3]
+        self.block_left_handle_rot = self.rigid_body_states[:, 16 * 2 + 2, 3:7]
+        self.block_left_handle_pos = self.block_left_handle_pos + quat_apply(self.block_left_handle_rot,
+                                                                             to_torch([0, 1, 0],
+                                                                                      device=self.device).repeat(
+                                                                                 self.num_envs, 1) * 0.0)
+        self.block_left_handle_pos = self.block_left_handle_pos + quat_apply(self.block_left_handle_rot,
+                                                                             to_torch([1, 0, 0],
+                                                                                      device=self.device).repeat(
+                                                                                 self.num_envs, 1) * 0.0)
+        self.block_left_handle_pos = self.block_left_handle_pos + quat_apply(self.block_left_handle_rot,
+                                                                             to_torch([0, 0, 1],
+                                                                                      device=self.device).repeat(
+                                                                                 self.num_envs, 1) * 0.0)
+
+        self.left_hand_pos = self.rigid_body_states[:, 3 + 16, 0:3]
+        self.left_hand_rot = self.rigid_body_states[:, 3 + 16, 3:7]
+        self.left_hand_pos = self.left_hand_pos + quat_apply(self.left_hand_rot,
+                                                             to_torch([0, 0, 1], device=self.device).repeat(
+                                                                 self.num_envs, 1) * 0.08)
+        self.left_hand_pos = self.left_hand_pos + quat_apply(self.left_hand_rot,
+                                                             to_torch([0, 1, 0], device=self.device).repeat(
+                                                                 self.num_envs, 1) * -0.02)
+
+        self.right_hand_pos = self.rigid_body_states[:, 3, 0:3]
+        self.right_hand_rot = self.rigid_body_states[:, 3, 3:7]
+        self.right_hand_pos = self.right_hand_pos + quat_apply(self.right_hand_rot,
+                                                               to_torch([0, 0, 1], device=self.device).repeat(
+                                                                   self.num_envs, 1) * 0.08)
+        self.right_hand_pos = self.right_hand_pos + quat_apply(self.right_hand_rot,
+                                                               to_torch([0, 1, 0], device=self.device).repeat(
+                                                                   self.num_envs, 1) * -0.02)
+
+        # <editor-fold desc="right hand finger">
+        self.right_hand_th_pos = self.rigid_body_states[:, 3, 0:3]
+        self.right_hand_th_rot = self.rigid_body_states[:, 3, 3:7]
+        self.right_hand_th_pos = self.right_hand_th_pos + quat_apply(self.right_hand_th_rot,
+                                                                     to_torch([0, 0, 1], device=self.device).repeat(
+                                                                         self.num_envs, 1) * 0.02)
+        self.right_hand_ff_pos = self.rigid_body_states[:, 6, 0:3]
+        self.right_hand_ff_rot = self.rigid_body_states[:, 6, 3:7]
+        self.right_hand_ff_pos = self.right_hand_ff_pos + quat_apply(self.right_hand_ff_rot,
+                                                                     to_torch([0, 0, 1], device=self.device).repeat(
+                                                                         self.num_envs, 1) * 0.02)
+        self.right_hand_mf_pos = self.rigid_body_states[:, 9, 0:3]
+        self.right_hand_mf_rot = self.rigid_body_states[:, 9, 3:7]
+        self.right_hand_mf_pos = self.right_hand_mf_pos + quat_apply(self.right_hand_mf_rot,
+                                                                     to_torch([0, 0, 1], device=self.device).repeat(
+                                                                         self.num_envs, 1) * 0.02)
+        self.right_hand_rf_pos = self.rigid_body_states[:, 12, 0:3]
+        self.right_hand_rf_rot = self.rigid_body_states[:, 12, 3:7]
+        self.right_hand_rf_pos = self.right_hand_rf_pos + quat_apply(self.right_hand_rf_rot,
+                                                                     to_torch([0, 0, 1], device=self.device).repeat(
+                                                                         self.num_envs, 1) * 0.02)
+        self.right_hand_lf_pos = self.rigid_body_states[:, 15, 0:3]
+        self.right_hand_lf_rot = self.rigid_body_states[:, 15, 3:7]
+        self.right_hand_lf_pos = self.right_hand_lf_pos + quat_apply(self.right_hand_lf_rot,
+                                                                     to_torch([0, 0, 1], device=self.device).repeat(
+                                                                         self.num_envs, 1) * 0.02)
+
+        # </editor-fold>
+
+        # <editor-fold desc="left hand finger">
+        self.left_hand_th_pos = self.rigid_body_states[:, 3 + 16, 0:3]
+        self.left_hand_th_rot = self.rigid_body_states[:, 3 + 16, 3:7]
+        self.left_hand_th_pos = self.left_hand_th_pos + quat_apply(self.left_hand_th_rot,
+                                                                   to_torch([0, 0, 1], device=self.device).repeat(
+                                                                       self.num_envs, 1) * 0.02)
+        self.left_hand_ff_pos = self.rigid_body_states[:, 6 + 16, 0:3]
+        self.left_hand_ff_rot = self.rigid_body_states[:, 6 + 16, 3:7]
+        self.left_hand_ff_pos = self.left_hand_ff_pos + quat_apply(self.left_hand_ff_rot,
+                                                                   to_torch([0, 0, 1], device=self.device).repeat(
+                                                                       self.num_envs, 1) * 0.02)
+        self.left_hand_mf_pos = self.rigid_body_states[:, 9 + 16, 0:3]
+        self.left_hand_mf_rot = self.rigid_body_states[:, 9 + 16, 3:7]
+        self.left_hand_mf_pos = self.left_hand_mf_pos + quat_apply(self.left_hand_mf_rot,
+                                                                   to_torch([0, 0, 1], device=self.device).repeat(
+                                                                       self.num_envs, 1) * 0.02)
+        self.left_hand_rf_pos = self.rigid_body_states[:, 12 + 16, 0:3]
+        self.left_hand_rf_rot = self.rigid_body_states[:, 12 + 16, 3:7]
+        self.left_hand_rf_pos = self.left_hand_rf_pos + quat_apply(self.left_hand_rf_rot,
+                                                                   to_torch([0, 0, 1], device=self.device).repeat(
+                                                                       self.num_envs, 1) * 0.02)
+        self.left_hand_lf_pos = self.rigid_body_states[:, 15 + 16, 0:3]
+        self.left_hand_lf_rot = self.rigid_body_states[:, 15 + 16, 3:7]
+        self.left_hand_lf_pos = self.left_hand_lf_pos + quat_apply(self.left_hand_lf_rot,
+                                                                   to_torch([0, 0, 1], device=self.device).repeat(
+                                                                       self.num_envs, 1) * 0.02)
+
+        # </editor-fold>
+
+        self.goal_pos = to_torch([-0.3, 0, 0.6], dtype=torch.float, device=self.device).repeat((self.num_envs, 1))
+        self.goal_rot = self.goal_states[:, 3:7]
+
+        self.right_fingertip_state = self.rigid_body_states[:, self.fingertip_handles][:, :, 0:13]
+        self.right_fingertip_pos = self.rigid_body_states[:, self.fingertip_handles][:, :, 0:3]
+        self.left_fingertip_state = self.rigid_body_states[:, self.fingertip_left_handles][:, :, 0:13]
+        self.left_fingertip_pos = self.rigid_body_states[:, self.fingertip_left_handles][:, :, 0:3]
+
+        if self.obs_type == "full_state":
+            self.compute_full_state()
+        elif self.obs_type == "point_cloud":
+            self.compute_point_cloud_observation()
+
+        if self.asymmetric_obs:
+            self.compute_full_state(True)
+
+    def compute_full_state(self, asymm_obs=False):
+        """
+        Compute the observations of all environment. The observation is composed of three parts: 
+        the state values of the left and right hands, and the information of objects and target. 
+        The state values of the left and right hands were the same for each task, including hand 
+        joint and finger positions, velocity, and force information. The detail 425-dimensional 
+        observational space as shown in below:
+
+        Index       Description
+        0 - 14	    right shadow hand dof position
+        15 - 29	    right shadow hand dof velocity
+        30 - 44	    right shadow hand dof force
+        45 - 109	right shadow hand fingertip pose, linear velocity, angle velocity (5 x 13)
+        110 - 139	right shadow hand fingertip force, torque (5 x 6)
+        140 - 142	right shadow hand base position
+        143 - 145	right shadow hand base rotation
+        146 - 171	right shadow hand actions
+        172 - 1	left shadow hand dof position
+        223 - 246	left shadow hand dof velocity
+        247 - 270	left shadow hand dof force
+        271 - 335	left shadow hand fingertip pose, linear velocity, angle velocity (5 x 13)
+        336 - 365	left shadow hand fingertip force, torque (5 x 6)
+        366 - 368	left shadow hand base position
+        369 - 371	left shadow hand base rotation
+        372 - 397	left shadow hand actions
+        398 - 404	object pose
+        405 - 407	object linear velocity
+        408 - 410	object angle velocity
+        411 - 413	block right handle position
+        414 - 417	block right handle rotation
+        418 - 420	block left handle position
+        421 - 424	block left handle rotation
+        """
+        num_ft_states = 13 * int(self.num_fingertips / 2)  # 65
+        num_ft_force_torques = 6 * int(self.num_fingertips / 2)  # 30
+
+        self.obs_buf[:, 0:self.num_hand_dofs] = unscale(self.right_hand_dof_pos,
+                                                        self.shadow_hand_dof_lower_limits,
+                                                        self.shadow_hand_dof_upper_limits)
+        self.obs_buf[:, self.num_hand_dofs:2 * self.num_hand_dofs] = self.vel_obs_scale * self.right_hand_dof_vel
+        self.obs_buf[:, 2 * self.num_hand_dofs:3 * self.num_hand_dofs] \
+            = self.force_torque_obs_scale * self.dof_force_tensor[:, :24]
+
+        fingertip_obs_start = 72  # 168 = 157 + 11
+        self.obs_buf[:, fingertip_obs_start:fingertip_obs_start + num_ft_states] = self.right_fingertip_state.reshape(
+            self.num_envs, num_ft_states)
+        self.obs_buf[:, fingertip_obs_start + num_ft_states:fingertip_obs_start + num_ft_states +
+                                                            num_ft_force_torques] = self.force_torque_obs_scale * self.vec_sensor_tensor[
+                                                                                                                  :,
+                                                                                                                  :30]
+
+        right_hand_pose_start = fingertip_obs_start + 95
+        self.obs_buf[:, right_hand_pose_start:right_hand_pose_start + 3] = self.right_hand_pos
+        self.obs_buf[:, right_hand_pose_start + 3:right_hand_pose_start + 4] = \
+            get_euler_xyz(self.hand_orientations[self.right_hand_indices, :])[0].unsqueeze(-1)
+        self.obs_buf[:, right_hand_pose_start + 4:right_hand_pose_start + 5] = \
+            get_euler_xyz(self.hand_orientations[self.right_hand_indices, :])[1].unsqueeze(-1)
+        self.obs_buf[:, right_hand_pose_start + 5:right_hand_pose_start + 6] = \
+            get_euler_xyz(self.hand_orientations[self.right_hand_indices, :])[2].unsqueeze(-1)
+
+        right_action_obs_start = right_hand_pose_start + 6
+        self.obs_buf[:, right_action_obs_start:right_action_obs_start + 26] = self.actions[:, :26]
+
+        # left_hand
+        left_hand_start = right_action_obs_start + 26
+        self.obs_buf[:, left_hand_start:self.num_hand_dofs + left_hand_start] = unscale(
+            self.left_hand_dof_pos,
+            self.shadow_hand_dof_lower_limits, self.shadow_hand_dof_upper_limits)
+        self.obs_buf[:,
+        self.num_hand_dofs + left_hand_start:2 * self.num_hand_dofs + left_hand_start] = self.vel_obs_scale * self.left_hand_dof_vel
+        self.obs_buf[:,
+        2 * self.num_hand_dofs + left_hand_start:3 * self.num_hand_dofs + left_hand_start] = self.force_torque_obs_scale * self.dof_force_tensor[
+                                                                                                                           :,
+                                                                                                                           24:48]
+
+        left_fingertip_obs_start = left_hand_start + 72
+        self.obs_buf[:,
+        left_fingertip_obs_start:left_fingertip_obs_start + num_ft_states] = self.left_fingertip_state.reshape(
+            self.num_envs, num_ft_states)
+        self.obs_buf[:, left_fingertip_obs_start + num_ft_states:left_fingertip_obs_start + num_ft_states +
+                                                                 num_ft_force_torques] = self.force_torque_obs_scale * self.vec_sensor_tensor[
+                                                                                                                       :,
+                                                                                                                       30:]
+
+        left_hand_pose_start = left_fingertip_obs_start + 95
+        self.obs_buf[:, left_hand_pose_start:left_hand_pose_start + 3] = self.left_hand_pos
+        self.obs_buf[:, left_hand_pose_start + 3:left_hand_pose_start + 4] = \
+            get_euler_xyz(self.hand_orientations[self.left_hand_indices, :])[0].unsqueeze(-1)
+        self.obs_buf[:, left_hand_pose_start + 4:left_hand_pose_start + 5] = \
+            get_euler_xyz(self.hand_orientations[self.left_hand_indices, :])[1].unsqueeze(-1)
+        self.obs_buf[:, left_hand_pose_start + 5:left_hand_pose_start + 6] = \
+            get_euler_xyz(self.hand_orientations[self.left_hand_indices, :])[2].unsqueeze(-1)
+
+        left_right_action_obs_start = left_hand_pose_start + 6
+        self.obs_buf[:, left_right_action_obs_start:left_right_action_obs_start + 26] = self.actions[:, 26:]
+
+        obj_obs_start = left_right_action_obs_start + 26  # 144
+        self.obs_buf[:, obj_obs_start:obj_obs_start + 7] = self.object_pose
+        self.obs_buf[:, obj_obs_start + 7:obj_obs_start + 10] = self.object_linvel
+        self.obs_buf[:, obj_obs_start + 10:obj_obs_start + 13] = self.vel_obs_scale * self.object_angvel
+        self.obs_buf[:, obj_obs_start + 13:obj_obs_start + 16] = self.block_right_handle_pos
+        self.obs_buf[:, obj_obs_start + 16:obj_obs_start + 20] = self.block_right_handle_rot
+        self.obs_buf[:, obj_obs_start + 20:obj_obs_start + 23] = self.block_left_handle_pos
+        self.obs_buf[:, obj_obs_start + 23:obj_obs_start + 27] = self.block_left_handle_rot
+        # goal_obs_start = obj_obs_start + 13  # 157 = 144 + 13
+        # self.obs_buf[:, goal_obs_start:goal_obs_start + 7] = self.goal_pose
+        # self.obs_buf[:, goal_obs_start + 7:goal_obs_start + 11] = quat_mul(self.object_rot, quat_conjugate(self.goal_rot))
+
+    def compute_point_cloud_observation(self, collect_demonstration=False):
+        """
+        Compute the observations of all environment. The observation is composed of three parts: 
+        the state values of the left and right hands, and the information of objects and target. 
+        The state values of the left and right hands were the same for each task, including hand 
+        joint and finger positions, velocity, and force information. The detail 425-dimensional 
+        observational space as shown in below:
+
+        Index       Description
+        0 - 23	    right shadow hand dof position
+        24 - 47	    right shadow hand dof velocity
+        48 - 71	    right shadow hand dof force
+        72 - 136	right shadow hand fingertip pose, linear velocity, angle velocity (5 x 13)
+        137 - 166	right shadow hand fingertip force, torque (5 x 6)
+        167 - 169	right shadow hand base position
+        170 - 172	right shadow hand base rotation
+        173 - 198	right shadow hand actions
+        199 - 222	left shadow hand dof position
+        223 - 246	left shadow hand dof velocity
+        247 - 270	left shadow hand dof force
+        271 - 335	left shadow hand fingertip pose, linear velocity, angle velocity (5 x 13)
+        336 - 365	left shadow hand fingertip force, torque (5 x 6)
+        366 - 368	left shadow hand base position
+        369 - 371	left shadow hand base rotation
+        372 - 397	left shadow hand actions
+        398 - 404	object pose
+        405 - 407	object linear velocity
+        408 - 410	object angle velocity
+        411 - 413	block right handle position
+        414 - 417	block right handle rotation
+        418 - 420	block left handle position
+        421 - 424	block left handle rotation
+        """
+        num_ft_states = 13 * int(self.num_fingertips / 2)  # 65
+        num_ft_force_torques = 6 * int(self.num_fingertips / 2)  # 30
+
+        self.obs_buf[:, 0:self.num_hand_dofs] = unscale(self.right_hand_dof_pos,
+                                                        self.shadow_hand_dof_lower_limits,
+                                                        self.shadow_hand_dof_upper_limits)
+        self.obs_buf[:,
+        self.num_hand_dofs:2 * self.num_hand_dofs] = self.vel_obs_scale * self.right_hand_dof_vel
+        self.obs_buf[:,
+        2 * self.num_hand_dofs:3 * self.num_hand_dofs] = self.force_torque_obs_scale * self.dof_force_tensor[
+                                                                                       :, :24]
+
+        fingertip_obs_start = 72  # 168 = 157 + 11
+        self.obs_buf[:, fingertip_obs_start:fingertip_obs_start + num_ft_states] = self.right_fingertip_state.reshape(
+            self.num_envs, num_ft_states)
+        self.obs_buf[:, fingertip_obs_start + num_ft_states:fingertip_obs_start + num_ft_states +
+                                                            num_ft_force_torques] = self.force_torque_obs_scale * self.vec_sensor_tensor[
+                                                                                                                  :,
+                                                                                                                  :30]
+
+        right_hand_pose_start = fingertip_obs_start + 95
+        self.obs_buf[:, right_hand_pose_start:right_hand_pose_start + 3] = self.right_hand_pos
+        self.obs_buf[:, right_hand_pose_start + 3:right_hand_pose_start + 4] = \
+            get_euler_xyz(self.hand_orientations[self.right_hand_indices, :])[0].unsqueeze(-1)
+        self.obs_buf[:, right_hand_pose_start + 4:right_hand_pose_start + 5] = \
+            get_euler_xyz(self.hand_orientations[self.right_hand_indices, :])[1].unsqueeze(-1)
+        self.obs_buf[:, right_hand_pose_start + 5:right_hand_pose_start + 6] = \
+            get_euler_xyz(self.hand_orientations[self.right_hand_indices, :])[2].unsqueeze(-1)
+
+        right_action_obs_start = right_hand_pose_start + 6
+        self.obs_buf[:, right_action_obs_start:right_action_obs_start + 26] = self.actions[:, :26]
+
+        # left_hand
+        left_hand_start = right_action_obs_start + 26
+        self.obs_buf[:, left_hand_start:self.num_hand_dofs + left_hand_start] = unscale(
+            self.left_hand_dof_pos,
+            self.shadow_hand_dof_lower_limits, self.shadow_hand_dof_upper_limits)
+        self.obs_buf[:,
+        self.num_hand_dofs + left_hand_start:2 * self.num_hand_dofs + left_hand_start] = self.vel_obs_scale * self.left_hand_dof_vel
+        self.obs_buf[:,
+        2 * self.num_hand_dofs + left_hand_start:3 * self.num_hand_dofs + left_hand_start] = self.force_torque_obs_scale * self.dof_force_tensor[
+                                                                                                                           :,
+                                                                                                                           24:48]
+
+        left_fingertip_obs_start = left_hand_start + 72
+        self.obs_buf[:,
+        left_fingertip_obs_start:left_fingertip_obs_start + num_ft_states] = self.left_fingertip_state.reshape(
+            self.num_envs, num_ft_states)
+        self.obs_buf[:, left_fingertip_obs_start + num_ft_states:left_fingertip_obs_start + num_ft_states +
+                                                                 num_ft_force_torques] = self.force_torque_obs_scale * self.vec_sensor_tensor[
+                                                                                                                       :,
+                                                                                                                       30:]
+
+        left_hand_pose_start = left_fingertip_obs_start + 95
+        self.obs_buf[:, left_hand_pose_start:left_hand_pose_start + 3] = self.left_hand_pos
+        self.obs_buf[:, left_hand_pose_start + 3:left_hand_pose_start + 4] = \
+            get_euler_xyz(self.hand_orientations[self.left_hand_indices, :])[0].unsqueeze(-1)
+        self.obs_buf[:, left_hand_pose_start + 4:left_hand_pose_start + 5] = \
+            get_euler_xyz(self.hand_orientations[self.left_hand_indices, :])[1].unsqueeze(-1)
+        self.obs_buf[:, left_hand_pose_start + 5:left_hand_pose_start + 6] = \
+            get_euler_xyz(self.hand_orientations[self.left_hand_indices, :])[2].unsqueeze(-1)
+
+        left_right_action_obs_start = left_hand_pose_start + 6
+        self.obs_buf[:, left_right_action_obs_start:left_right_action_obs_start + 26] = self.actions[:, 26:]
+
+        obj_obs_start = left_right_action_obs_start + 26  # 144
+        self.obs_buf[:, obj_obs_start:obj_obs_start + 7] = self.object_pose
+        self.obs_buf[:, obj_obs_start + 7:obj_obs_start + 10] = self.object_linvel
+        self.obs_buf[:, obj_obs_start + 10:obj_obs_start + 13] = self.vel_obs_scale * self.object_angvel
+        self.obs_buf[:, obj_obs_start + 13:obj_obs_start + 16] = self.block_right_handle_pos
+        self.obs_buf[:, obj_obs_start + 16:obj_obs_start + 20] = self.block_right_handle_rot
+        self.obs_buf[:, obj_obs_start + 20:obj_obs_start + 23] = self.block_left_handle_pos
+        self.obs_buf[:, obj_obs_start + 23:obj_obs_start + 27] = self.block_left_handle_rot
+        # goal_obs_start = obj_obs_start + 13  # 157 = 144 + 13
+        # self.obs_buf[:, goal_obs_start:goal_obs_start + 7] = self.goal_pose
+        # self.obs_buf[:, goal_obs_start + 7:goal_obs_start + 11] = quat_mul(self.object_rot, quat_conjugate(self.goal_rot))
+        point_clouds = torch.zeros((self.num_envs, self.pointCloudDownsampleNum, 3), device=self.device)
+
+        if self.camera_debug:
+            import matplotlib.pyplot as plt
+            self.camera_rgba_debug_fig = plt.figure("CAMERA_RGBD_DEBUG")
+            camera_rgba_image = self.camera_visulization(is_depth_image=False)
+            plt.imshow(camera_rgba_image)
+            plt.pause(1e-9)
+
+        for i in range(self.num_envs):
+            # Here is an example. In practice, it's better not to convert tensor from GPU to CPU
+            points = depth_image_to_point_cloud_GPU(self.camera_tensors[i], self.camera_view_matrixs[i],
+                                                    self.camera_proj_matrixs[i], self.camera_u2, self.camera_v2,
+                                                    self.camera_props.width, self.camera_props.height, 10, self.device)
+
+            if points.shape[0] > 0:
+                selected_points = self.sample_points(points, sample_num=self.pointCloudDownsampleNum,
+                                                     sample_mathed='random')
+            else:
+                selected_points = torch.zeros((self.num_envs, self.pointCloudDownsampleNum, 3), device=self.device)
+
+            point_clouds[i] = selected_points
+
+        if self.pointCloudVisualizer != None:
+            import open3d as o3d
+            points = point_clouds[0, :, :3].cpu().numpy()
+            # colors = plt.get_cmap()(point_clouds[0, :, 3].cpu().numpy())
+            self.o3d_pc.points = o3d.utility.Vector3dVector(points)
+            # self.o3d_pc.colors = o3d.utility.Vector3dVector(colors[..., :3])
+
+            if self.pointCloudVisualizerInitialized == False:
+                self.pointCloudVisualizer.add_geometry(self.o3d_pc)
+                self.pointCloudVisualizerInitialized = True
+            else:
+                self.pointCloudVisualizer.update(self.o3d_pc)
+
+        self.gym.end_access_image_tensors(self.sim)
+        point_clouds -= self.env_origin.view(self.num_envs, 1, 3)
+
+        point_clouds_start = obj_obs_start + 27
+        self.obs_buf[:, point_clouds_start:].copy_(point_clouds.view(self.num_envs, self.pointCloudDownsampleNum * 3))
+
+    def reset_target_pose(self, env_ids, apply_reset=False):
+        """
+        Reset and randomize the goal pose
+
+        Args:
+            env_ids (tensor): The index of the environment that needs to reset goal pose
+
+            apply_reset (bool): Whether to reset the goal directly here, usually used 
+            when the same task wants to complete multiple goals
+
+        """
+        rand_floats = torch_rand_float(-1.0, 1.0, (len(env_ids), 4), device=self.device)
+
+        new_rot = randomize_rotation(rand_floats[:, 0], rand_floats[:, 1], self.x_unit_tensor[env_ids],
+                                     self.y_unit_tensor[env_ids])
+
+        self.goal_states[env_ids, 0:3] = self.goal_init_state[env_ids, 0:3]
+        # self.goal_states[env_ids, 1] -= 0.25
+        self.goal_states[env_ids, 2] += 10.0
+
+        # self.goal_states[env_ids, 3:7] = new_rot
+        self.root_state_tensor[self.goal_object_indices[env_ids], 0:3] = self.goal_states[env_ids,
+                                                                         0:3] + self.goal_displacement_tensor
+        self.root_state_tensor[self.goal_object_indices[env_ids], 3:7] = self.goal_states[env_ids, 3:7]
+        self.root_state_tensor[self.goal_object_indices[env_ids], 7:13] = torch.zeros_like(
+            self.root_state_tensor[self.goal_object_indices[env_ids], 7:13])
+
+        if apply_reset:
+            goal_object_indices = self.goal_object_indices[env_ids].to(torch.int32)
+            self.gym.set_actor_root_state_tensor_indexed(self.sim,
+                                                         gymtorch.unwrap_tensor(self.root_state_tensor),
+                                                         gymtorch.unwrap_tensor(goal_object_indices), len(env_ids))
+        self.reset_goal_buf[env_ids] = 0
+
+    def reset_idx(self, env_ids, goal_env_ids):
+        """
+        Reset and randomize the environment
+
+        Args:
+            env_ids (tensor): The index of the environment that needs to reset
+
+            goal_env_ids (tensor): The index of the environment that only goals need reset
+
+        """
+        # randomization can happen only at reset time, since it can reset actor positions on GPU
+        if self.randomize:
+            self.apply_randomizations(self.randomization_params)
+
+        # generate random values
+        rand_floats = torch_rand_float(-1.0, 1.0, (len(env_ids), self.num_hand_dofs * 2 + 5), device=self.device)
+
+        # randomize start object poses
+        self.reset_target_pose(env_ids)
+
+        # reset object
+        self.root_state_tensor[self.object_indices[env_ids]] = self.object_init_state[env_ids].clone()
+
+        new_object_rot = randomize_rotation(rand_floats[:, 3], rand_floats[:, 4], self.x_unit_tensor[env_ids],
+                                            self.y_unit_tensor[env_ids])
+        if self.object_type == "pen":
+            rand_angle_y = torch.tensor(0.3)
+            new_object_rot = randomize_rotation_pen(rand_floats[:, 3], rand_floats[:, 4], rand_angle_y,
+                                                    self.x_unit_tensor[env_ids], self.y_unit_tensor[env_ids],
+                                                    self.z_unit_tensor[env_ids])
+
+        # self.root_state_tensor[self.object_indices[env_ids], 3:7] = new_object_rot
+        self.root_state_tensor[self.object_indices[env_ids], 7:13] = torch.zeros_like(
+            self.root_state_tensor[self.object_indices[env_ids], 7:13])
+
+        object_indices = torch.unique(torch.cat([self.object_indices[env_ids],
+                                                 self.goal_object_indices[env_ids],
+                                                 self.goal_object_indices[goal_env_ids]]).to(torch.int32))
+        # self.gym.set_actor_root_state_tensor_indexed(self.sim,
+        #                                              gymtorch.unwrap_tensor(self.root_state_tensor),
+        #                                              gymtorch.unwrap_tensor(object_indices), len(object_indices))
+
+        # reset shadow hand
+        delta_max = self.shadow_hand_dof_upper_limits - self.shadow_hand_dof_default_pos
+        delta_min = self.shadow_hand_dof_lower_limits - self.shadow_hand_dof_default_pos
+        rand_delta = delta_min + (delta_max - delta_min) * rand_floats[:, 5:5 + self.num_hand_dofs]
+
+        pos = self.shadow_hand_default_dof_pos + self.reset_dof_pos_noise * rand_delta
+
+        self.right_hand_dof_pos[env_ids, :] = pos
+        self.left_hand_dof_pos[env_ids, :] = pos
+
+        self.right_hand_dof_vel[env_ids, :] = self.hand_dof_default_vel + \
+                                              self.reset_dof_vel_noise * rand_floats[:,
+                                                                         5 + self.num_hand_dofs:5 + self.num_hand_dofs * 2]
+
+        self.left_hand_dof_vel[env_ids, :] = self.hand_dof_default_vel + \
+                                             self.reset_dof_vel_noise * rand_floats[:,
+                                                                        5 + self.num_hand_dofs:5 + self.num_hand_dofs * 2]
+
+        self.prev_targets[env_ids, :self.num_hand_dofs] = pos
+        self.cur_targets[env_ids, :self.num_hand_dofs] = pos
+
+        self.prev_targets[env_ids, self.num_hand_dofs:self.num_hand_dofs * 2] = pos
+        self.cur_targets[env_ids, self.num_hand_dofs:self.num_hand_dofs * 2] = pos
+
+        right_hand_indices = self.right_hand_indices[env_ids].to(torch.int32)
+        left_hand_indices = self.left_hand_indices[env_ids].to(torch.int32)
+
+        all_hand_indices = torch.unique(torch.cat([right_hand_indices,
+                                                   left_hand_indices]).to(torch.int32))
+
+        self.gym.set_dof_position_target_tensor_indexed(self.sim,
+                                                        gymtorch.unwrap_tensor(self.prev_targets),
+                                                        gymtorch.unwrap_tensor(all_hand_indices), len(all_hand_indices))
+
+        all_indices = torch.unique(torch.cat([all_hand_indices,
+                                              self.object_indices[env_ids],
+                                              self.table_indices[env_ids],
+                                              self.block_indices[env_ids]]).to(torch.int32))
+
+        self.hand_positions[all_indices.to(torch.long), :] = self.saved_root_tensor[all_indices.to(torch.long), 0:3]
+        self.hand_orientations[all_indices.to(torch.long), :] = self.saved_root_tensor[all_indices.to(torch.long), 3:7]
+
+        self.gym.set_dof_state_tensor_indexed(self.sim,
+                                              gymtorch.unwrap_tensor(self.dof_state),
+                                              gymtorch.unwrap_tensor(all_hand_indices), len(all_hand_indices))
+
+        self.gym.set_actor_root_state_tensor_indexed(self.sim,
+                                                     gymtorch.unwrap_tensor(self.root_state_tensor),
+                                                     gymtorch.unwrap_tensor(all_indices), len(all_indices))
+        self.progress_buf[env_ids] = 0
+        self.reset_buf[env_ids] = 0
+        self.successes[env_ids] = 0
+
+    def pre_physics_step(self, actions):
+        """
+        The pre-processing of the physics step. Determine whether the reset environment is needed, 
+        and calculate the next movement of Shadowhand through the given action. The 52-dimensional 
+        action space as shown in below:
+        
+        Index   Description
+        0 - 2	right shadow hand base translation
+        3 - 5	right shadow hand base rotation
+        6 - 20 	right shadow hand actuated joint
+        21 - 23 left shadow hand base translation
+        24 - 26 left shadow hand base rotation
+        27 - 41 left shadow hand actuated joint
+
+        Args:
+            actions (tensor): Actions of agents in the all environment 
+        """
+        env_ids = self.reset_buf.nonzero(as_tuple=False).squeeze(-1)
+        goal_env_ids = self.reset_goal_buf.nonzero(as_tuple=False).squeeze(-1)
+
+        # if only goals need reset, then call set API
+        if len(goal_env_ids) > 0 and len(env_ids) == 0:
+            self.reset_target_pose(goal_env_ids, apply_reset=True)
+        # if goals need reset in addition to other envs, call set API in reset()
+        elif len(goal_env_ids) > 0:
+            self.reset_target_pose(goal_env_ids)
+
+        if len(env_ids) > 0:
+            self.reset_idx(env_ids, goal_env_ids)
+
+        self.actions = actions.clone().to(self.device)
+        if self.use_relative_control:
+            targets = self.prev_targets[:,
+                      self.actuated_dof_indices] + self.shadow_hand_dof_speed_scale * self.dt * self.actions
+            self.cur_targets[:, self.actuated_dof_indices] = tensor_clamp(targets,
+                                                                          self.shadow_hand_dof_lower_limits[
+                                                                              self.actuated_dof_indices],
+                                                                          self.shadow_hand_dof_upper_limits[
+                                                                              self.actuated_dof_indices])
+        else:
+            self.cur_targets[:, self.actuated_dof_indices] = scale(self.actions[:, 6:21],
+                                                                   self.shadow_hand_dof_lower_limits[
+                                                                       self.actuated_dof_indices],
+                                                                   self.shadow_hand_dof_upper_limits[
+                                                                       self.actuated_dof_indices])
+            self.cur_targets[:, self.actuated_dof_indices] = self.act_moving_average * self.cur_targets[:,
+                                                                                       self.actuated_dof_indices] + (
+                                                                     1.0 - self.act_moving_average) * self.prev_targets[
+                                                                                                      :,
+                                                                                                      self.actuated_dof_indices]
+            self.cur_targets[:, self.actuated_dof_indices] = tensor_clamp(
+                self.cur_targets[:, self.actuated_dof_indices],
+                self.shadow_hand_dof_lower_limits[self.actuated_dof_indices],
+                self.shadow_hand_dof_upper_limits[self.actuated_dof_indices])
+
+            self.cur_targets[:, self.actuated_dof_indices + 15] = scale(self.actions[:, 27:42],
+                                                                        self.shadow_hand_dof_lower_limits[
+                                                                            self.actuated_dof_indices],
+                                                                        self.shadow_hand_dof_upper_limits[
+                                                                            self.actuated_dof_indices])
+            self.cur_targets[:, self.actuated_dof_indices + 15] = self.act_moving_average * self.cur_targets[:,
+                                                                                            self.actuated_dof_indices + 24] + (
+                                                                          1.0 - self.act_moving_average) * self.prev_targets[
+                                                                                                           :,
+                                                                                                           self.actuated_dof_indices]
+            self.cur_targets[:, self.actuated_dof_indices + 15] = tensor_clamp(
+                self.cur_targets[:, self.actuated_dof_indices + 15],
+                self.shadow_hand_dof_lower_limits[self.actuated_dof_indices],
+                self.shadow_hand_dof_upper_limits[self.actuated_dof_indices])
+            # self.cur_targets[:, 49] = scale(self.actions[:, 0],
+            #                                 self.object_dof_lower_limits[1], self.object_dof_upper_limits[1])
+            # angle_offsets = self.actions[:, 26:32] * self.dt * self.orientation_scale
+
+            self.apply_forces[:, 1, :] = actions[:, 0:3] * self.dt * self.transition_scale * 100000
+            self.apply_forces[:, 1 + 21, :] = actions[:, 21:24] * self.dt * self.transition_scale * 100000
+            self.apply_torque[:, 1, :] = self.actions[:, 3:6] * self.dt * self.orientation_scale * 1000
+            self.apply_torque[:, 1 + 21, :] = self.actions[:, 24:27] * self.dt * self.orientation_scale * 1000
+
+            self.gym.apply_rigid_body_force_tensors(self.sim, gymtorch.unwrap_tensor(self.apply_forces),
+                                                    gymtorch.unwrap_tensor(self.apply_torque), gymapi.ENV_SPACE)
+
+        self.prev_targets[:, self.actuated_dof_indices] = self.cur_targets[:, self.actuated_dof_indices]
+        self.prev_targets[:, self.actuated_dof_indices + 15] = self.cur_targets[:, self.actuated_dof_indices + 15]
+
+        # self.prev_targets[:, 49] = self.cur_targets[:, 49]
+        # self.gym.set_dof_position_target_tensor(self.sim, gymtorch.unwrap_tensor(self.cur_targets))
+        all_hand_indices = torch.unique(torch.cat([self.right_hand_indices,
+                                                   self.left_hand_indices]).to(torch.int32))
+        self.gym.set_dof_position_target_tensor_indexed(self.sim,
+                                                        gymtorch.unwrap_tensor(self.prev_targets),
+                                                        gymtorch.unwrap_tensor(all_hand_indices), len(all_hand_indices))
+
+    def post_physics_step(self):
+        """
+        The post-processing of the physics step. Compute the observation and reward, and visualize auxiliary 
+        lines for debug when needed
+        
+        """
+        self.progress_buf += 1
+        self.randomize_buf += 1
+
+        self.compute_observations()
+        self.compute_reward(self.actions)
+
+        if self.viewer and self.debug_viz:
+            # draw axes on target object
+            self.gym.clear_lines(self.viewer)
+            self.gym.refresh_rigid_body_state_tensor(self.sim)
+
+            for i in range(self.num_envs):
+                self.add_debug_lines(self.envs[i], self.block_right_handle_pos[i], self.block_right_handle_rot[i])
+                self.add_debug_lines(self.envs[i], self.block_left_handle_pos[i], self.block_left_handle_rot[i])
+                # self.add_debug_lines(self.envs[i], self.goal_pos[i], self.block_left_handle_rot[i])
+                # self.add_debug_lines(self.envs[i], self.right_hand_ff_pos[i], self.right_hand_ff_rot[i])
+                # self.add_debug_lines(self.envs[i], self.right_hand_mf_pos[i], self.right_hand_mf_rot[i])
+                # self.add_debug_lines(self.envs[i], self.right_hand_rf_pos[i], self.right_hand_rf_rot[i])
+                # self.add_debug_lines(self.envs[i], self.right_hand_lf_pos[i], self.right_hand_lf_rot[i])
+                # self.add_debug_lines(self.envs[i], self.right_hand_th_pos[i], self.right_hand_th_rot[i])
+
+                # self.add_debug_lines(self.envs[i], self.left_hand_ff_pos[i], self.right_hand_ff_rot[i])
+                # self.add_debug_lines(self.envs[i], self.left_hand_mf_pos[i], self.right_hand_mf_rot[i])
+                # self.add_debug_lines(self.envs[i], self.left_hand_rf_pos[i], self.right_hand_rf_rot[i])
+                # self.add_debug_lines(self.envs[i], self.left_hand_lf_pos[i], self.right_hand_lf_rot[i])
+                # self.add_debug_lines(self.envs[i], self.left_hand_th_pos[i], self.right_hand_th_rot[i])
+
+    def add_debug_lines(self, env, pos, rot):
+        posx = (pos + quat_apply(rot, to_torch([1, 0, 0], device=self.device) * 0.2)).cpu().numpy()
+        posy = (pos + quat_apply(rot, to_torch([0, 1, 0], device=self.device) * 0.2)).cpu().numpy()
+        posz = (pos + quat_apply(rot, to_torch([0, 0, 1], device=self.device) * 0.2)).cpu().numpy()
+
+        p0 = pos.cpu().numpy()
+        self.gym.add_lines(self.viewer, env, 1, [p0[0], p0[1], p0[2], posx[0], posx[1], posx[2]], [0.85, 0.1, 0.1])
+        self.gym.add_lines(self.viewer, env, 1, [p0[0], p0[1], p0[2], posy[0], posy[1], posy[2]], [0.1, 0.85, 0.1])
+        self.gym.add_lines(self.viewer, env, 1, [p0[0], p0[1], p0[2], posz[0], posz[1], posz[2]], [0.1, 0.1, 0.85])
+
+    def rand_row(self, tensor, dim_needed):
+        row_total = tensor.shape[0]
+        return tensor[torch.randint(low=0, high=row_total, size=(dim_needed,)), :]
+
+    def sample_points(self, points, sample_num=1000, sample_mathed='furthest'):
+        eff_points = points[points[:, 2] > 0.04]
+        if eff_points.shape[0] < sample_num:
+            eff_points = points
+        if sample_mathed == 'random':
+            sampled_points = self.rand_row(eff_points, sample_num)
+        elif sample_mathed == 'furthest':
+            sampled_points_id = pointnet2_utils.furthest_point_sample(eff_points.reshape(1, *eff_points.shape),
+                                                                      sample_num)
+            sampled_points = eff_points.index_select(0, sampled_points_id[0].long())
+        return sampled_points
+
+    def camera_visulization(self, is_depth_image=False):
+        if is_depth_image:
+            camera_depth_tensor = self.gym.get_camera_image_gpu_tensor(self.sim, self.envs[0], self.cameras[0],
+                                                                       gymapi.IMAGE_DEPTH)
+            torch_depth_tensor = gymtorch.wrap_tensor(camera_depth_tensor)
+            torch_depth_tensor = torch.clamp(torch_depth_tensor, -1, 1)
+            torch_depth_tensor = scale(torch_depth_tensor, to_torch([0], dtype=torch.float, device=self.device),
+                                       to_torch([256], dtype=torch.float, device=self.device))
+            camera_image = torch_depth_tensor.cpu().numpy()
+            camera_image = Im.fromarray(camera_image)
+
+        else:
+            camera_rgba_tensor = self.gym.get_camera_image_gpu_tensor(self.sim, self.envs[0], self.cameras[0],
+                                                                      gymapi.IMAGE_COLOR)
+            torch_rgba_tensor = gymtorch.wrap_tensor(camera_rgba_tensor)
+            camera_image = torch_rgba_tensor.cpu().numpy()
+            camera_image = Im.fromarray(camera_image)
+
+        return camera_image
+
+
+#####################################################################
+###=========================jit functions=========================###
+#####################################################################
+
+@torch.jit.script
+def depth_image_to_point_cloud_GPU(camera_tensor, camera_view_matrix_inv, camera_proj_matrix, u, v, width: float,
+                                   height: float, depth_bar: float, device: torch.device):
+    # time1 = time.time()
+    depth_buffer = camera_tensor.to(device)
+
+    # Get the camera view matrix and invert it to transform points from camera to world space
+    vinv = camera_view_matrix_inv
+
+    # Get the camera projection matrix and get the necessary scaling
+    # coefficients for deprojection
+
+    proj = camera_proj_matrix
+    fu = 2 / proj[0, 0]
+    fv = 2 / proj[1, 1]
+
+    centerU = width / 2
+    centerV = height / 2
+
+    Z = depth_buffer
+    X = -(u - centerU) / width * Z * fu
+    Y = (v - centerV) / height * Z * fv
+
+    Z = Z.view(-1)
+    valid = Z > -depth_bar
+    X = X.view(-1)
+    Y = Y.view(-1)
+
+    position = torch.vstack((X, Y, Z, torch.ones(len(X), device=device)))[:, valid]
+    position = position.permute(1, 0)
+    position = position @ vinv
+
+    points = position[:, 0:3]
+
+    return points
+
+
+@torch.jit.script
+def compute_hand_reward(
+        rew_buf, reset_buf, reset_goal_buf, progress_buf, successes, consecutive_successes,
+        max_episode_length: float, object_pos, object_rot, target_pos, target_rot, block_right_handle_pos,
+        block_left_handle_pos,
+        left_hand_pos, right_hand_pos, right_hand_ff_pos, right_hand_mf_pos, right_hand_rf_pos, right_hand_lf_pos,
+        right_hand_th_pos,
+        left_hand_ff_pos, left_hand_mf_pos, left_hand_rf_pos, left_hand_lf_pos, left_hand_th_pos,
+        dist_reward_scale: float, rot_reward_scale: float, rot_eps: float,
+        actions, action_penalty_scale: float,
+        success_tolerance: float, reach_goal_bonus: float, fall_dist: float,
+        fall_penalty: float, max_consecutive_successes: int, av_factor: float, ignore_z_rot: bool
+):
+    """
+    Compute the reward of all environment.
+
+    Args:
+        rew_buf (tensor): The reward buffer of all environments at this time
+
+        reset_buf (tensor): The reset buffer of all environments at this time
+
+        reset_goal_buf (tensor): The only-goal reset buffer of all environments at this time
+
+        progress_buf (tensor): The porgress buffer of all environments at this time
+
+        successes (tensor): The successes buffer of all environments at this time
+
+        consecutive_successes (tensor): The consecutive successes buffer of all environments at this time
+
+        max_episode_length (float): The max episode length in this environment
+
+        object_pos (tensor): The position of the object
+
+        object_rot (tensor): The rotation of the object
+
+        target_pos (tensor): The position of the target
+
+        target_rot (tensor): The rotate of the target
+
+        block_right_handle_pos (tensor): The position of the right block handle
+
+        block_left_handle_pos (tensor): The position of the left block handle
+
+        left_hand_pos, right_hand_pos (tensor): The position of the bimanual hands
+        
+        right_hand_ff_pos, right_hand_mf_pos, right_hand_rf_pos, right_hand_lf_pos, right_hand_th_pos (tensor): The position of the five fingers 
+            of the right hand
+
+        left_hand_ff_pos, left_hand_mf_pos, left_hand_rf_pos, left_hand_lf_pos, left_hand_th_pos (tensor): The position of the five fingers 
+            of the left hand
+
+        dist_reward_scale (float): The scale of the distance reward
+
+        rot_reward_scale (float): The scale of the rotation reward
+
+        rot_eps (float): The epsilon of the rotation calculate
+
+        actions (tensor): The action buffer of all environments at this time
+
+        action_penalty_scale (float): The scale of the action penalty reward
+
+        success_tolerance (float): The tolerance of the success determined
+
+        reach_goal_bonus (float): The reward given when the object reaches the goal
+
+        fall_dist (float): When the object is far from the Shadowhand, it is judged as falling
+
+        fall_penalty (float): The reward given when the object is fell
+
+        max_consecutive_successes (float): The maximum of the consecutive successes
+
+        av_factor (float): The average factor for calculate the consecutive successes
+
+        ignore_z_rot (bool): Is it necessary to ignore the rot of the z-axis, which is usually used 
+            for some specific objects (e.g. pen)
+    """
+    # Distance from the hand to the object
+    left_goal_dist = torch.norm(target_pos - block_left_handle_pos, p=2, dim=-1)
+    right_goal_dist = torch.norm(target_pos - block_right_handle_pos, p=2, dim=-1)
+    # goal_dist = target_pos[:, 2] - object_pos[:, 2]
+
+    right_hand_dist = torch.norm(block_right_handle_pos - right_hand_pos, p=2, dim=-1)
+    left_hand_dist = torch.norm(block_left_handle_pos - left_hand_pos, p=2, dim=-1)
+
+    right_hand_finger_dist = (torch.norm(block_right_handle_pos - right_hand_ff_pos, p=2, dim=-1) + torch.norm(
+        block_right_handle_pos - right_hand_mf_pos, p=2, dim=-1)
+                              + torch.norm(block_right_handle_pos - right_hand_rf_pos, p=2, dim=-1) + torch.norm(
+                block_right_handle_pos - right_hand_lf_pos, p=2, dim=-1)
+                              + torch.norm(block_right_handle_pos - right_hand_th_pos, p=2, dim=-1))
+    left_hand_finger_dist = (torch.norm(block_left_handle_pos - left_hand_ff_pos, p=2, dim=-1) + torch.norm(
+        block_left_handle_pos - left_hand_mf_pos, p=2, dim=-1)
+                             + torch.norm(block_left_handle_pos - left_hand_rf_pos, p=2, dim=-1) + torch.norm(
+                block_left_handle_pos - left_hand_lf_pos, p=2, dim=-1)
+                             + torch.norm(block_left_handle_pos - left_hand_th_pos, p=2, dim=-1))
+    # Orientation alignment for the cube in hand and goal cube
+    # quat_diff = quat_mul(object_rot, quat_conjugate(target_rot))
+    # rot_dist = 2.0 * torch.asin(torch.clamp(torch.norm(quat_diff[:, 0:3], p=2, dim=-1), max=1.0))
+
+    right_hand_dist_rew = torch.exp(-10 * right_hand_finger_dist)
+    left_hand_dist_rew = torch.exp(-10 * left_hand_finger_dist)
+
+    # rot_rew = 1.0/(torch.abs(rot_dist) + rot_eps) * rot_reward_scale
+
+    action_penalty = torch.sum(actions ** 2, dim=-1)
+
+    # Total reward is: position distance + orientation alignment + action regularization + success bonus + fall penalty
+    # reward = torch.exp(-0.05*(up_rew * dist_reward_scale)) + torch.exp(-0.05*(right_hand_dist_rew * dist_reward_scale)) + torch.exp(-0.05*(left_hand_dist_rew * dist_reward_scale))
+    # up_rew = torch.zeros_like(right_hand_dist_rew)
+    # up_rew = torch.where(right_hand_finger_dist < 0.6,
+    #                 torch.where(left_hand_finger_dist < 0.4,
+    up_rew = torch.zeros_like(right_hand_dist_rew)
+    up_rew = torch.exp(-10 * torch.norm(block_right_handle_pos - block_left_handle_pos, p=2, dim=-1)) * 2
+    # up_rew =  torch.where(right_hand_finger_dist <= 0.3, torch.norm(bottle_cap_up - bottle_pos, p=2, dim=-1) * 30, up_rew)
+
+    # reward = torch.exp(-0.1*(right_hand_dist_rew * dist_reward_scale)) + torch.exp(-0.1*(left_hand_dist_rew * dist_reward_scale))
+    reward = right_hand_dist_rew + left_hand_dist_rew + up_rew
+
+    resets = torch.where(right_hand_dist_rew <= 0, torch.ones_like(reset_buf), reset_buf)
+    resets = torch.where(right_hand_finger_dist >= 1.5, torch.ones_like(resets), resets)
+    resets = torch.where(left_hand_finger_dist >= 1.5, torch.ones_like(resets), resets)
+
+    # Find out which envs hit the goal and update successes count
+    successes = torch.where(successes == 0,
+                            torch.where(torch.norm(block_right_handle_pos - block_left_handle_pos, p=2, dim=-1) < 0.2,
+                                        torch.ones_like(successes), successes), successes)
+
+    resets = torch.where(progress_buf >= max_episode_length, torch.ones_like(resets), resets)
+
+    goal_resets = torch.zeros_like(resets)
+
+    num_resets = torch.sum(resets)
+    finished_cons_successes = torch.sum(successes * resets.float())
+
+    cons_successes = torch.where(resets > 0, successes * resets, consecutive_successes).mean()
+
+    return reward, resets, goal_resets, progress_buf, successes, cons_successes
+
+
+@torch.jit.script
+def randomize_rotation(rand0, rand1, x_unit_tensor, y_unit_tensor):
+    return quat_mul(quat_from_angle_axis(rand0 * np.pi, x_unit_tensor),
+                    quat_from_angle_axis(rand1 * np.pi, y_unit_tensor))
+
+
+@torch.jit.script
+def randomize_rotation_pen(rand0, rand1, max_angle, x_unit_tensor, y_unit_tensor, z_unit_tensor):
+    rot = quat_mul(quat_from_angle_axis(0.5 * np.pi + rand0 * max_angle, x_unit_tensor),
+                   quat_from_angle_axis(rand0 * np.pi, z_unit_tensor))
+    return rot
diff --git a/rofunc/learning/RofuncRL/tasks/isaacgymenv/hands/qbsofthand_grasp.py b/rofunc/learning/RofuncRL/tasks/isaacgymenv/hands/qbsofthand_grasp.py
new file mode 100644
index 000000000..5d4ba45e7
--- /dev/null
+++ b/rofunc/learning/RofuncRL/tasks/isaacgymenv/hands/qbsofthand_grasp.py
@@ -0,0 +1,1267 @@
+import os
+import random
+
+from PIL import Image as Im
+from isaacgym import gymapi
+from isaacgym import gymtorch
+
+from rofunc.learning.RofuncRL.tasks.isaacgymenv.base.vec_task import VecTask
+from rofunc.learning.RofuncRL.tasks.utils.torch_jit_utils import *
+from rofunc.utils.oslab import get_rofunc_path
+from rofunc.utils.logger.beauty_logger import beauty_print
+
+
+class QbSoftHandGraspTask(VecTask):
+    """
+    This class corresponds to the GraspAndPlace task. This environment consists of dual-hands, an
+    object and a bucket that requires us to pick up the object and put it into the bucket.
+    """
+
+    def __init__(self, cfg, rl_device, sim_device, graphics_device_id, headless, virtual_screen_capture,
+                 force_render, agent_index=[[[0, 1, 2, 3, 4, 5]], [[0, 1, 2, 3, 4, 5]]], is_multi_agent=False):
+        self.cfg = cfg
+        self.agent_index = agent_index
+
+        self.is_multi_agent = is_multi_agent
+
+        self.randomize = self.cfg["task"]["randomize"]
+        self.randomization_params = self.cfg["task"]["randomization_params"]
+        self.aggregate_mode = self.cfg["env"]["aggregateMode"]
+
+        self.dist_reward_scale = self.cfg["env"]["distRewardScale"]
+        self.rot_reward_scale = self.cfg["env"]["rotRewardScale"]
+        self.action_penalty_scale = self.cfg["env"]["actionPenaltyScale"]
+        self.success_tolerance = self.cfg["env"]["successTolerance"]
+        self.reach_goal_bonus = self.cfg["env"]["reachGoalBonus"]
+        self.fall_dist = self.cfg["env"]["fallDistance"]
+        self.fall_penalty = self.cfg["env"]["fallPenalty"]
+        self.rot_eps = self.cfg["env"]["rotEps"]
+
+        self.vel_obs_scale = 0.2  # scale factor of velocity based observations
+        self.force_torque_obs_scale = 10.0  # scale factor of velocity based observations
+
+        self.reset_position_noise = self.cfg["env"]["resetPositionNoise"]
+        self.reset_rotation_noise = self.cfg["env"]["resetRotationNoise"]
+        self.reset_dof_pos_noise = self.cfg["env"]["resetDofPosRandomInterval"]
+        self.reset_dof_vel_noise = self.cfg["env"]["resetDofVelRandomInterval"]
+
+        self.qbsofthand_dof_speed_scale = self.cfg["env"]["dofSpeedScale"]
+        self.use_relative_control = self.cfg["env"]["useRelativeControl"]
+        self.act_moving_average = self.cfg["env"]["actionsMovingAverage"]
+
+        self.debug_viz = self.cfg["env"]["enableDebugVis"]
+
+        self.max_episode_length = self.cfg["env"]["episodeLength"]
+        self.reset_time = self.cfg["env"].get("resetTime", -1.0)
+        self.print_success_stat = self.cfg["env"]["printNumSuccesses"]
+        self.max_consecutive_successes = self.cfg["env"]["maxConsecutiveSuccesses"]
+        self.av_factor = self.cfg["env"].get("averFactor", 0.01)
+        print("Averaging factor: ", self.av_factor)
+
+        self.transition_scale = self.cfg["env"]["transition_scale"]
+        self.orientation_scale = self.cfg["env"]["orientation_scale"]
+
+        control_freq_inv = self.cfg["env"].get("controlFrequencyInv", 1)
+        if self.reset_time > 0.0:
+            self.max_episode_length = int(round(self.reset_time / (control_freq_inv * self.sim_params.dt)))
+            print("Reset time: ", self.reset_time)
+            print("New episode length: ", self.max_episode_length)
+
+        self.object_type = self.cfg["env"]["objectType"]
+        # assert self.object_type in ["block", "egg", "pen"]
+
+        self.ignore_z = (self.object_type == "pen")
+
+        self.asset_files_dict = {
+            "block": "urdf/objects/cube_multicolor.urdf",
+            "egg": "mjcf/open_ai_assets/hand/egg.xml",
+            "pen": "mjcf/open_ai_assets/hand/pen.xml",
+            # "pot": "mjcf/pot.xml",
+            "pot": "mjcf/bucket/100454/mobility.urdf"
+        }
+
+        if "asset" in self.cfg["env"]:
+            self.asset_files_dict["block"] = self.cfg["env"]["asset"].get("assetFileNameBlock",
+                                                                          self.asset_files_dict["block"])
+            self.asset_files_dict["egg"] = self.cfg["env"]["asset"].get("assetFileNameEgg",
+                                                                        self.asset_files_dict["egg"])
+            self.asset_files_dict["pen"] = self.cfg["env"]["asset"].get("assetFileNamePen",
+                                                                        self.asset_files_dict["pen"])
+
+        # can be "openai", "full_no_vel", "full", "full_state"
+        self.obs_type = self.cfg["env"]["observationType"]
+
+        if not (self.obs_type in ["point_cloud", "full_state"]):
+            raise Exception(
+                "Unknown type of observations!\nobservationType should be one of: [point_cloud, full_state]")
+
+        print("Obs type:", self.obs_type)
+
+        self.num_point_cloud_feature_dim = 768
+        self.num_obs_dict = {
+            "point_cloud": 157 + self.num_point_cloud_feature_dim * 3,
+            "point_cloud_for_distill": 157 + self.num_point_cloud_feature_dim * 3,
+            "full_state": 157
+        }
+        self.num_hand_obs = 45 + 65 + 21 + 6  # TODO
+        self.up_axis = 'z'
+
+        self.fingertips = ["right_qbhand_thumb_distal_link", "right_qbhand_index_distal_link",
+                           "right_qbhand_middle_distal_link", "right_qbhand_ring_distal_link",
+                           "right_qbhand_little_distal_link"]
+
+        self.num_fingertips = len(self.fingertips)
+
+        self.use_vel_obs = False
+        self.fingertip_obs = True
+        self.asymmetric_obs = self.cfg["env"]["asymmetric_observations"]
+
+        num_states = 0
+
+        self.cfg["env"]["numObservations"] = self.num_obs_dict[self.obs_type]
+        self.cfg["env"]["numStates"] = num_states
+        self.cfg["env"]["numActions"] = 15 + 6  # hand dof + hand pos + hand rot
+
+        super().__init__(cfg, rl_device, sim_device, graphics_device_id, headless, virtual_screen_capture, force_render)
+
+        if self.obs_type in ["point_cloud"]:
+            from PIL import Image as Im
+            # from pointnet2_ops import pointnet2_utils
+
+        self.camera_debug = self.cfg["env"].get("cameraDebug", False)
+        self.point_cloud_debug = self.cfg["env"].get("pointCloudDebug", False)
+
+        if self.viewer != None:
+            cam_pos = gymapi.Vec3(10.0, 5.0, 1.0)
+            cam_target = gymapi.Vec3(6.0, 5.0, 0.0)
+            self.gym.viewer_camera_look_at(self.viewer, None, cam_pos, cam_target)
+
+        # get gym GPU state tensors
+        actor_root_state_tensor = self.gym.acquire_actor_root_state_tensor(self.sim)
+        dof_state_tensor = self.gym.acquire_dof_state_tensor(self.sim)
+        rigid_body_tensor = self.gym.acquire_rigid_body_state_tensor(self.sim)
+
+        # sensor_tensor = self.gym.acquire_force_sensor_tensor(self.sim)
+        # self.vec_sensor_tensor = gymtorch.wrap_tensor(sensor_tensor).view(self.num_envs, self.num_fingertips * 6)
+
+        dof_force_tensor = self.gym.acquire_dof_force_tensor(self.sim)
+        self.dof_force_tensor = gymtorch.wrap_tensor(dof_force_tensor).view(self.num_envs,
+                                                                            self.num_qbsofthand_dofs + self.num_object_dofs * 2)
+        self.dof_force_tensor = self.dof_force_tensor[:, :15]
+
+        self.gym.refresh_actor_root_state_tensor(self.sim)
+        self.gym.refresh_dof_state_tensor(self.sim)
+        self.gym.refresh_rigid_body_state_tensor(self.sim)
+
+        # create some wrapper tensors for different slices
+        self.qbsofthand_default_dof_pos = torch.zeros(self.num_qbsofthand_dofs, dtype=torch.float, device=self.device)
+        # self.qbsofthand_default_dof_pos = to_torch([0.0, 0.0, -0,  -0,  -0,  -0, -0, -0,
+        #                                     -0,  -0, -0,  -0,  -0,  -0, -0, -0,
+        #                                     -0,  -0, -0,  -1.04,  1.2,  0., 0, -1.57], dtype=torch.float, device=self.device)
+
+        self.dof_state = gymtorch.wrap_tensor(dof_state_tensor)
+        self.qbsofthand_dof_state = self.dof_state.view(self.num_envs, -1, 2)[:, :self.num_qbsofthand_dofs]
+        self.qbsofthand_dof_pos = self.qbsofthand_dof_state[..., 0]
+        self.qbsofthand_dof_vel = self.qbsofthand_dof_state[..., 1]
+
+        self.rigid_body_states = gymtorch.wrap_tensor(rigid_body_tensor).view(self.num_envs, -1, 13)  # (256, 22, 13)
+        self.num_bodies = self.rigid_body_states.shape[1]  # 22
+
+        self.root_state_tensor = gymtorch.wrap_tensor(actor_root_state_tensor).view(-1, 13)
+        self.hand_positions = self.root_state_tensor[:, 0:3]
+        self.hand_orientations = self.root_state_tensor[:, 3:7]
+        self.hand_linvels = self.root_state_tensor[:, 7:10]
+        self.hand_angvels = self.root_state_tensor[:, 10:13]
+        self.saved_root_tensor = self.root_state_tensor.clone()
+
+        self.num_dofs = self.gym.get_sim_dof_count(self.sim) // self.num_envs
+        self.prev_targets = torch.zeros((self.num_envs, self.num_dofs), dtype=torch.float, device=self.device)
+        self.cur_targets = torch.zeros((self.num_envs, self.num_dofs), dtype=torch.float, device=self.device)
+
+        self.global_indices = torch.arange(self.num_envs * 3, dtype=torch.int32, device=self.device).view(self.num_envs,
+                                                                                                          -1)
+        self.x_unit_tensor = to_torch([1, 0, 0], dtype=torch.float, device=self.device).repeat((self.num_envs, 1))
+        self.y_unit_tensor = to_torch([0, 1, 0], dtype=torch.float, device=self.device).repeat((self.num_envs, 1))
+        self.z_unit_tensor = to_torch([0, 0, 1], dtype=torch.float, device=self.device).repeat((self.num_envs, 1))
+
+        self.reset_goal_buf = self.reset_buf.clone()
+        self.successes = torch.zeros(self.num_envs, dtype=torch.float, device=self.device)
+        self.consecutive_successes = torch.zeros(1, dtype=torch.float, device=self.device)
+
+        self.av_factor = to_torch(self.av_factor, dtype=torch.float, device=self.device)
+        self.apply_forces = torch.zeros((self.num_envs, self.num_bodies, 3), device=self.device, dtype=torch.float)
+        self.apply_torque = torch.zeros((self.num_envs, self.num_bodies, 3), device=self.device, dtype=torch.float)
+
+        self.total_successes = 0
+        self.total_resets = 0
+
+    def create_sim(self):
+        """
+        Allocates which device will simulate and which device will render the scene. Defines the simulation type to be used
+        """
+        self.dt = self.sim_params.dt
+        self.up_axis_idx = self.set_sim_params_up_axis(self.sim_params, self.up_axis)
+
+        self.sim = super().create_sim(self.device_id, self.graphics_device_id, self.physics_engine, self.sim_params)
+        self._create_ground_plane()
+        self._create_envs(self.num_envs, self.cfg["env"]['envSpacing'], int(np.sqrt(self.num_envs)))
+
+    def _create_ground_plane(self):
+        """
+        Adds ground plane to simulation
+        """
+        plane_params = gymapi.PlaneParams()
+        plane_params.normal = gymapi.Vec3(0.0, 0.0, 1.0)
+        self.gym.add_ground(self.sim, plane_params)
+
+    def _create_envs(self, num_envs, spacing, num_per_row):
+        """
+        Create multiple parallel isaacgym environments
+
+        Args:
+            num_envs (int): The total number of environment 
+
+            spacing (float): Specifies half the side length of the square area occupied by each environment
+
+            num_per_row (int): Specify how many environments in a row
+        """
+        lower = gymapi.Vec3(-spacing, -spacing, 0.0)
+        upper = gymapi.Vec3(spacing, spacing, spacing)
+
+        # get rofunc path from rofunc package metadata
+        rofunc_path = get_rofunc_path()
+        asset_root = os.path.join(rofunc_path, "simulator/assets")
+        qbsofthand_asset_file = "mjcf/qbhand_no_virtual_right.xml"
+        table_texture_files = os.path.join(asset_root, "textures/texture_stone_stone_texture_0.jpg")
+        table_texture_handle = self.gym.create_texture_from_file(self.sim, table_texture_files)
+
+        if "asset" in self.cfg["env"]:
+            asset_root = self.cfg["env"]["asset"].get("assetRoot", asset_root)
+            qbsofthand_asset_file = self.cfg["env"]["asset"].get("assetFileName", qbsofthand_asset_file)
+
+        object_asset_file = self.asset_files_dict[self.object_type]
+
+        # load shadow hand_ asset
+        asset_options = gymapi.AssetOptions()
+        asset_options.flip_visual_attachments = False
+        asset_options.fix_base_link = False
+        asset_options.collapse_fixed_joints = True
+        asset_options.disable_gravity = True
+        asset_options.thickness = 0.001
+        asset_options.angular_damping = 100
+        asset_options.linear_damping = 100
+
+        if self.physics_engine == gymapi.SIM_PHYSX:
+            asset_options.use_physx_armature = True
+        asset_options.default_dof_drive_mode = gymapi.DOF_MODE_NONE
+
+        qbsofthand_asset = self.gym.load_asset(self.sim, asset_root, qbsofthand_asset_file, asset_options)
+
+        self.num_qbsofthand_bodies = self.gym.get_asset_rigid_body_count(qbsofthand_asset)
+        self.num_qbsofthand_shapes = self.gym.get_asset_rigid_shape_count(qbsofthand_asset)
+        self.num_qbsofthand_dofs = self.gym.get_asset_dof_count(qbsofthand_asset)
+        self.num_qbsofthand_actuators = self.gym.get_asset_actuator_count(qbsofthand_asset)
+        self.num_qbsofthand_tendons = self.gym.get_asset_tendon_count(qbsofthand_asset)
+        beauty_print(f"self.num_qbsofthand_bodies: {self.num_qbsofthand_bodies}")
+        beauty_print(f"self.num_qbsofthand_shapes: {self.num_qbsofthand_shapes}")
+        beauty_print(f"self.num_qbsofthand_dofs: {self.num_qbsofthand_dofs}")
+        beauty_print(f"self.num_qbsofthand_actuators: {self.num_qbsofthand_actuators}")
+        beauty_print(f"self.num_qbsofthand_tendons: {self.num_qbsofthand_tendons}")
+
+        actuated_dof_names = [self.gym.get_asset_actuator_joint_name(qbsofthand_asset, i) for i in
+                              range(self.num_qbsofthand_actuators)]
+        self.actuated_dof_indices = [self.gym.find_asset_dof_index(qbsofthand_asset, name) for name in
+                                     actuated_dof_names]
+        beauty_print(f"self.actuated_dof_indices: {self.actuated_dof_indices}")
+
+        # set qbsofthand dof properties
+        qbsofthand_dof_props = self.gym.get_asset_dof_properties(qbsofthand_asset)
+
+        self.qbsofthand_dof_lower_limits = []
+        self.qbsofthand_dof_upper_limits = []
+        self.qbsofthand_dof_default_pos = []
+        self.qbsofthand_dof_default_vel = []
+
+        for i in range(self.num_qbsofthand_dofs):
+            self.qbsofthand_dof_lower_limits.append(qbsofthand_dof_props['lower'][i])
+            self.qbsofthand_dof_upper_limits.append(qbsofthand_dof_props['upper'][i])
+            self.qbsofthand_dof_default_pos.append(0.0)
+            self.qbsofthand_dof_default_vel.append(0.0)
+
+        self.actuated_dof_indices = to_torch(self.actuated_dof_indices, dtype=torch.long, device=self.device)
+        self.qbsofthand_dof_lower_limits = to_torch(self.qbsofthand_dof_lower_limits, device=self.device)
+        self.qbsofthand_dof_upper_limits = to_torch(self.qbsofthand_dof_upper_limits, device=self.device)
+        self.qbsofthand_dof_default_pos = to_torch(self.qbsofthand_dof_default_pos, device=self.device)
+        self.qbsofthand_dof_default_vel = to_torch(self.qbsofthand_dof_default_vel, device=self.device)
+
+        # load manipulated object and goal assets
+        object_asset_options = gymapi.AssetOptions()
+        object_asset_options.density = 500
+        object_asset_options.fix_base_link = False
+        # object_asset_options.collapse_fixed_joints = True
+        # object_asset_options.disable_gravity = True
+        object_asset_options.use_mesh_materials = True
+        object_asset_options.mesh_normal_mode = gymapi.COMPUTE_PER_VERTEX
+        object_asset_options.override_com = True
+        object_asset_options.override_inertia = True
+        object_asset_options.vhacd_enabled = True
+        object_asset_options.vhacd_params = gymapi.VhacdParams()
+        object_asset_options.vhacd_params.resolution = 100000
+        object_asset_options.default_dof_drive_mode = gymapi.DOF_MODE_NONE
+
+        object_asset = self.gym.load_asset(self.sim, asset_root, object_asset_file, object_asset_options)
+        block_asset_file = "urdf/objects/cube_multicolor1.urdf"
+        block_asset = self.gym.load_asset(self.sim, asset_root, block_asset_file, object_asset_options)
+
+        object_asset_options.disable_gravity = True
+        goal_asset = self.gym.load_asset(self.sim, asset_root, object_asset_file, object_asset_options)
+
+        self.num_object_bodies = self.gym.get_asset_rigid_body_count(object_asset)
+        self.num_object_shapes = self.gym.get_asset_rigid_shape_count(object_asset)
+
+        # set object dof properties
+        self.num_object_dofs = self.gym.get_asset_dof_count(object_asset)
+        object_dof_props = self.gym.get_asset_dof_properties(object_asset)
+
+        self.object_dof_lower_limits = []
+        self.object_dof_upper_limits = []
+
+        for i in range(self.num_object_dofs):
+            self.object_dof_lower_limits.append(object_dof_props['lower'][i])
+            self.object_dof_upper_limits.append(object_dof_props['upper'][i])
+
+        self.object_dof_lower_limits = to_torch(self.object_dof_lower_limits, device=self.device)
+        self.object_dof_upper_limits = to_torch(self.object_dof_upper_limits, device=self.device)
+
+        # create table asset
+        table_dims = gymapi.Vec3(1.0, 1.0, 0.6)
+        asset_options = gymapi.AssetOptions()
+        asset_options.fix_base_link = True
+        asset_options.flip_visual_attachments = True
+        asset_options.collapse_fixed_joints = True
+        asset_options.disable_gravity = True
+        asset_options.thickness = 0.001
+
+        table_asset = self.gym.create_box(self.sim, table_dims.x, table_dims.y, table_dims.z, gymapi.AssetOptions())
+
+        qbsofthand_start_pose = gymapi.Transform()
+        qbsofthand_start_pose.p = gymapi.Vec3(0.55, 0.2, 0.8)
+        qbsofthand_start_pose.r = gymapi.Quat().from_euler_zyx(1.57, 0, 1.57)
+
+        object_start_pose = gymapi.Transform()
+        object_start_pose.p = gymapi.Vec3(0.0, 0.2, 0.6)
+        object_start_pose.r = gymapi.Quat().from_euler_zyx(0, 0, 0)
+
+        block_start_pose = gymapi.Transform()
+        block_start_pose.p = gymapi.Vec3(0.0, -0.2, 0.6)
+        block_start_pose.r = gymapi.Quat().from_euler_zyx(1.57, 1.57, 0)
+
+        if self.object_type == "pen":
+            object_start_pose.p.z = qbsofthand_start_pose.p.z + 0.02
+
+        self.goal_displacement = gymapi.Vec3(-0., 0.0, 10)
+        self.goal_displacement_tensor = to_torch(
+            [self.goal_displacement.x, self.goal_displacement.y, self.goal_displacement.z], device=self.device)
+        goal_start_pose = gymapi.Transform()
+        goal_start_pose.p = object_start_pose.p + self.goal_displacement
+
+        goal_start_pose.p.z -= 0.0
+
+        table_pose = gymapi.Transform()
+        table_pose.p = gymapi.Vec3(0.0, 0.0, 0.5 * table_dims.z)
+        table_pose.r = gymapi.Quat().from_euler_zyx(-0., 0, 0)
+
+        # compute aggregate size
+        max_agg_bodies = self.num_qbsofthand_bodies + 3 * self.num_object_bodies + 1
+        max_agg_shapes = self.num_qbsofthand_shapes + 3 * self.num_object_shapes + 1
+
+        self.qbsofthands = []
+        self.envs = []
+
+        self.object_init_state = []
+        self.hand_start_states = []
+
+        self.hand_indices = []
+        self.fingertip_indices = []
+        self.object_indices = []
+        self.goal_object_indices = []
+        self.table_indices = []
+        self.block_indices = []
+
+        self.fingertip_handles = [self.gym.find_asset_rigid_body_index(qbsofthand_asset, name) for name in
+                                  self.fingertips]
+
+        # # create fingertip force sensors, if needed
+        # sensor_pose = gymapi.Transform()
+        # for ft_handle in self.fingertip_handles:
+        #     self.gym.create_asset_force_sensor(qbsofthand_asset, ft_handle, sensor_pose)
+
+        if self.obs_type in ["point_cloud"]:
+            self.cameras = []
+            self.camera_tensors = []
+            self.camera_view_matrixs = []
+            self.camera_proj_matrixs = []
+
+            self.camera_props = gymapi.CameraProperties()
+            self.camera_props.width = 256
+            self.camera_props.height = 256
+            self.camera_props.enable_tensors = True
+
+            self.env_origin = torch.zeros((self.num_envs, 3), device=self.device, dtype=torch.float)
+            self.pointCloudDownsampleNum = 768
+            self.camera_u = torch.arange(0, self.camera_props.width, device=self.device)
+            self.camera_v = torch.arange(0, self.camera_props.height, device=self.device)
+
+            self.camera_v2, self.camera_u2 = torch.meshgrid(self.camera_v, self.camera_u, indexing='ij')
+
+            if self.point_cloud_debug:
+                import open3d as o3d
+                from bidexhands.utils.o3dviewer import PointcloudVisualizer
+                self.pointCloudVisualizer = PointcloudVisualizer()
+                self.pointCloudVisualizerInitialized = False
+                self.o3d_pc = o3d.geometry.PointCloud()
+            else:
+                self.pointCloudVisualizer = None
+
+        for i in range(self.num_envs):
+            # create env instance
+            env_ptr = self.gym.create_env(
+                self.sim, lower, upper, num_per_row
+            )
+
+            if self.aggregate_mode >= 1:
+                self.gym.begin_aggregate(env_ptr, max_agg_bodies, max_agg_shapes, True)
+
+            # add hand - collision filter = -1 to use asset collision filters set in mjcf loader
+            qbsofthand_actor = self.gym.create_actor(env_ptr, qbsofthand_asset, qbsofthand_start_pose, "hand", i, 2)
+
+            self.hand_start_states.append(
+                [qbsofthand_start_pose.p.x, qbsofthand_start_pose.p.y, qbsofthand_start_pose.p.z,
+                 qbsofthand_start_pose.r.x, qbsofthand_start_pose.r.y, qbsofthand_start_pose.r.z,
+                 qbsofthand_start_pose.r.w,
+                 0, 0, 0, 0, 0, 0])
+
+            self.gym.set_actor_dof_properties(env_ptr, qbsofthand_actor, qbsofthand_dof_props)
+            hand_idx = self.gym.get_actor_index(env_ptr, qbsofthand_actor, gymapi.DOMAIN_SIM)
+            self.hand_indices.append(hand_idx)
+
+            # randomize colors and textures for rigid body
+            num_bodies = self.gym.get_actor_rigid_body_count(env_ptr, qbsofthand_actor)
+            hand_rigid_body_index = [[0], [1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12], [13, 14, 15]]
+
+            for n in self.agent_index[0]:
+                colorx = random.uniform(0, 1)
+                colory = random.uniform(0, 1)
+                colorz = random.uniform(0, 1)
+                for m in n:
+                    for o in hand_rigid_body_index[m]:
+                        self.gym.set_rigid_body_color(env_ptr, qbsofthand_actor, o, gymapi.MESH_VISUAL,
+                                                      gymapi.Vec3(colorx, colory, colorz))
+
+            # # create fingertip force-torque sensors
+            # self.gym.enable_actor_dof_force_sensors(env_ptr, qbsofthand_actor)
+
+            # add object
+            object_handle = self.gym.create_actor(env_ptr, object_asset, object_start_pose, "object", i, 0, 0)
+            self.object_init_state.append([object_start_pose.p.x, object_start_pose.p.y, object_start_pose.p.z,
+                                           object_start_pose.r.x, object_start_pose.r.y, object_start_pose.r.z,
+                                           object_start_pose.r.w,
+                                           0, 0, 0, 0, 0, 0])
+            object_idx = self.gym.get_actor_index(env_ptr, object_handle, gymapi.DOMAIN_SIM)
+            self.object_indices.append(object_idx)
+            # self.gym.set_actor_scale(env_ptr, object_handle, 0.3)
+
+            block_handle = self.gym.create_actor(env_ptr, block_asset, block_start_pose, "block", i, 0, 0)
+            block_idx = self.gym.get_actor_index(env_ptr, block_handle, gymapi.DOMAIN_SIM)
+            self.block_indices.append(block_idx)
+
+            # add goal object
+            goal_handle = self.gym.create_actor(env_ptr, goal_asset, goal_start_pose, "goal_object", i + self.num_envs,
+                                                0, 0)
+            goal_object_idx = self.gym.get_actor_index(env_ptr, goal_handle, gymapi.DOMAIN_SIM)
+            self.goal_object_indices.append(goal_object_idx)
+            # self.gym.set_actor_scale(env_ptr, goal_handle, 0.3)
+
+            # add table
+            table_handle = self.gym.create_actor(env_ptr, table_asset, table_pose, "table", i, -1, 0)
+            self.gym.set_rigid_body_texture(env_ptr, table_handle, 0, gymapi.MESH_VISUAL, table_texture_handle)
+            table_idx = self.gym.get_actor_index(env_ptr, table_handle, gymapi.DOMAIN_SIM)
+            self.table_indices.append(table_idx)
+
+            if self.object_type != "block":
+                self.gym.set_rigid_body_color(
+                    env_ptr, object_handle, 0, gymapi.MESH_VISUAL, gymapi.Vec3(0.6, 0.72, 0.98))
+                self.gym.set_rigid_body_color(
+                    env_ptr, goal_handle, 0, gymapi.MESH_VISUAL, gymapi.Vec3(0.6, 0.72, 0.98))
+
+            if self.obs_type in ["point_cloud"]:
+                camera_handle = self.gym.create_camera_sensor(env_ptr, self.camera_props)
+                self.gym.set_camera_location(camera_handle, env_ptr, gymapi.Vec3(0.25, -0., 1.0),
+                                             gymapi.Vec3(-0.24, -0., 0))
+                camera_tensor = self.gym.get_camera_image_gpu_tensor(self.sim, env_ptr, camera_handle,
+                                                                     gymapi.IMAGE_DEPTH)
+                torch_cam_tensor = gymtorch.wrap_tensor(camera_tensor)
+                cam_vinv = torch.inverse(
+                    (torch.tensor(self.gym.get_camera_view_matrix(self.sim, env_ptr, camera_handle)))).to(self.device)
+                cam_proj = torch.tensor(self.gym.get_camera_proj_matrix(self.sim, env_ptr, camera_handle),
+                                        device=self.device)
+
+                origin = self.gym.get_env_origin(env_ptr)
+                self.env_origin[i][0] = origin.x
+                self.env_origin[i][1] = origin.y
+                self.env_origin[i][2] = origin.z
+                self.camera_tensors.append(torch_cam_tensor)
+                self.camera_view_matrixs.append(cam_vinv)
+                self.camera_proj_matrixs.append(cam_proj)
+                self.cameras.append(camera_handle)
+
+            if self.aggregate_mode > 0:
+                self.gym.end_aggregate(env_ptr)
+
+            self.envs.append(env_ptr)
+            self.qbsofthands.append(qbsofthand_actor)
+
+        self.object_init_state = to_torch(self.object_init_state, device=self.device, dtype=torch.float).view(
+            self.num_envs, 13)
+        self.goal_states = self.object_init_state.clone()
+        self.goal_init_state = self.goal_states.clone()
+        self.hand_start_states = to_torch(self.hand_start_states, device=self.device).view(self.num_envs, 13)
+
+        self.fingertip_handles = to_torch(self.fingertip_handles, dtype=torch.long, device=self.device)
+
+        self.hand_indices = to_torch(self.hand_indices, dtype=torch.long, device=self.device)
+        self.object_indices = to_torch(self.object_indices, dtype=torch.long, device=self.device)
+        self.goal_object_indices = to_torch(self.goal_object_indices, dtype=torch.long, device=self.device)
+        self.table_indices = to_torch(self.table_indices, dtype=torch.long, device=self.device)
+        self.block_indices = to_torch(self.block_indices, dtype=torch.long, device=self.device)
+
+    def compute_reward(self, actions):
+        """
+        Compute the reward of all environment. The core function is compute_hand_reward(
+            self.rew_buf, self.reset_buf, self.reset_goal_buf, self.progress_buf, self.successes, self.consecutive_successes,
+            self.max_episode_length, self.object_pos, self.object_rot, self.goal_pos, self.goal_rot, self.block_right_handle_pos, self.block_left_handle_pos, 
+            self.left_hand_pos, self.right_hand_pos, self.right_hand_ff_pos, self.right_hand_mf_pos, self.right_hand_rf_pos, self.right_hand_lf_pos, self.right_hand_th_pos, 
+            self.left_hand_ff_pos, self.left_hand_mf_pos, self.left_hand_rf_pos, self.left_hand_lf_pos, self.left_hand_th_pos, 
+            self.dist_reward_scale, self.rot_reward_scale, self.rot_eps, self.actions, self.action_penalty_scale,
+            self.success_tolerance, self.reach_goal_bonus, self.fall_dist, self.fall_penalty,
+            self.max_consecutive_successes, self.av_factor, (self.object_type == "pen")
+        )
+        , which we will introduce in detail there
+
+        Args:
+            actions (tensor): Actions of agents in the all environment 
+        """
+        self.rew_buf[:], self.reset_buf[:], self.reset_goal_buf[:], self.progress_buf[:], self.successes[
+                                                                                          :], self.consecutive_successes[
+                                                                                              :] = compute_hand_reward(
+            self.rew_buf, self.reset_buf, self.reset_goal_buf, self.progress_buf, self.successes,
+            self.consecutive_successes,
+            self.max_episode_length, self.object_pos, self.object_rot, self.goal_pos, self.goal_rot,
+            self.block_right_handle_pos, self.right_hand_pos, self.right_hand_ff_pos, self.right_hand_mf_pos,
+            self.right_hand_rf_pos, self.right_hand_lf_pos, self.right_hand_th_pos,
+            self.dist_reward_scale, self.rot_reward_scale, self.rot_eps, self.actions, self.action_penalty_scale,
+            self.success_tolerance, self.reach_goal_bonus, self.fall_dist, self.fall_penalty,
+            self.max_consecutive_successes, self.av_factor, (self.object_type == "pen")
+        )
+
+        self.extras['successes'] = self.successes
+        self.extras['consecutive_successes'] = self.consecutive_successes
+
+        if self.print_success_stat:
+            self.total_resets = self.total_resets + self.reset_buf.sum()
+            direct_average_successes = self.total_successes + self.successes.sum()
+            self.total_successes = self.total_successes + (self.successes * self.reset_buf).sum()
+
+            # The direct average shows the overall result more quickly, but slightly undershoots long term
+            # policy performance.
+            print("Direct average consecutive successes = {:.1f}".format(
+                direct_average_successes / (self.total_resets + self.num_envs)))
+            if self.total_resets > 0:
+                print("Post-Reset average consecutive successes = {:.1f}".format(
+                    self.total_successes / self.total_resets))
+
+    def compute_observations(self):
+        """
+        Compute the observations of all environment. The core function is self.compute_full_state(True), 
+        which we will introduce in detail there
+
+        """
+        self.gym.refresh_dof_state_tensor(self.sim)
+        self.gym.refresh_actor_root_state_tensor(self.sim)
+        self.gym.refresh_rigid_body_state_tensor(self.sim)
+        self.gym.refresh_force_sensor_tensor(self.sim)
+        self.gym.refresh_dof_force_tensor(self.sim)
+
+        if self.obs_type in ["point_cloud"]:
+            self.gym.render_all_camera_sensors(self.sim)
+            self.gym.start_access_image_tensors(self.sim)
+
+        self.object_pose = self.root_state_tensor[self.object_indices, 0:7]
+        self.object_pos = self.root_state_tensor[self.object_indices, 0:3]
+        self.object_rot = self.root_state_tensor[self.object_indices, 3:7]
+        self.object_linvel = self.root_state_tensor[self.object_indices, 7:10]
+        self.object_angvel = self.root_state_tensor[self.object_indices, 10:13]
+
+        self.block_right_handle_pos = self.rigid_body_states[:, 16 + 1, 0:3]
+        self.block_right_handle_rot = self.rigid_body_states[:, 16 + 1, 3:7]
+        self.block_right_handle_pos = self.block_right_handle_pos + quat_apply(self.block_right_handle_rot,
+                                                                               to_torch([0, 1, 0],
+                                                                                        device=self.device).repeat(
+                                                                                   self.num_envs, 1) * 0.)
+        self.block_right_handle_pos = self.block_right_handle_pos + quat_apply(self.block_right_handle_rot,
+                                                                               to_torch([1, 0, 0],
+                                                                                        device=self.device).repeat(
+                                                                                   self.num_envs, 1) * 0.0)
+        self.block_right_handle_pos = self.block_right_handle_pos + quat_apply(self.block_right_handle_rot,
+                                                                               to_torch([0, 0, 1],
+                                                                                        device=self.device).repeat(
+                                                                                   self.num_envs, 1) * 0.0)
+
+
+        self.right_hand_pos = self.rigid_body_states[:, 0, 0:3]
+        self.right_hand_rot = self.rigid_body_states[:, 0, 3:7]
+        self.right_hand_pos = self.right_hand_pos + quat_apply(self.right_hand_rot,
+                                                               to_torch([0, 0, 1], device=self.device).repeat(
+                                                                   self.num_envs, 1) * 0.08)
+        self.right_hand_pos = self.right_hand_pos + quat_apply(self.right_hand_rot,
+                                                               to_torch([0, 1, 0], device=self.device).repeat(
+                                                                   self.num_envs, 1) * -0.02)
+
+        # right hand finger
+        self.right_hand_th_pos = self.rigid_body_states[:, 3, 0:3]
+        self.right_hand_th_rot = self.rigid_body_states[:, 3, 3:7]
+        self.right_hand_th_pos = self.right_hand_th_pos + quat_apply(self.right_hand_th_rot,
+                                                                     to_torch([0, 0, 1], device=self.device).repeat(
+                                                                         self.num_envs, 1) * 0.02)
+        self.right_hand_ff_pos = self.rigid_body_states[:, 6, 0:3]
+        self.right_hand_ff_rot = self.rigid_body_states[:, 6, 3:7]
+        self.right_hand_ff_pos = self.right_hand_ff_pos + quat_apply(self.right_hand_ff_rot,
+                                                                     to_torch([0, 0, 1], device=self.device).repeat(
+                                                                         self.num_envs, 1) * 0.02)
+        self.right_hand_mf_pos = self.rigid_body_states[:, 9, 0:3]
+        self.right_hand_mf_rot = self.rigid_body_states[:, 9, 3:7]
+        self.right_hand_mf_pos = self.right_hand_mf_pos + quat_apply(self.right_hand_mf_rot,
+                                                                     to_torch([0, 0, 1], device=self.device).repeat(
+                                                                         self.num_envs, 1) * 0.02)
+        self.right_hand_rf_pos = self.rigid_body_states[:, 12, 0:3]
+        self.right_hand_rf_rot = self.rigid_body_states[:, 12, 3:7]
+        self.right_hand_rf_pos = self.right_hand_rf_pos + quat_apply(self.right_hand_rf_rot,
+                                                                     to_torch([0, 0, 1], device=self.device).repeat(
+                                                                         self.num_envs, 1) * 0.02)
+        self.right_hand_lf_pos = self.rigid_body_states[:, 15, 0:3]
+        self.right_hand_lf_rot = self.rigid_body_states[:, 15, 3:7]
+        self.right_hand_lf_pos = self.right_hand_lf_pos + quat_apply(self.right_hand_lf_rot,
+                                                                     to_torch([0, 0, 1], device=self.device).repeat(
+                                                                         self.num_envs, 1) * 0.02)
+
+        self.goal_pos = to_torch([-0.3, 0, 0.6], dtype=torch.float, device=self.device).repeat((self.num_envs, 1))
+        self.goal_rot = self.goal_states[:, 3:7]
+
+        self.fingertip_state = self.rigid_body_states[:, self.fingertip_handles][:, :, 0:13]
+        self.fingertip_pos = self.rigid_body_states[:, self.fingertip_handles][:, :, 0:3]
+
+        if self.obs_type == "full_state":
+            self.compute_full_state()
+        elif self.obs_type == "point_cloud":
+            self.compute_point_cloud_observation()
+
+        if self.asymmetric_obs:
+            self.compute_full_state(True)
+
+    def compute_full_state(self, asymm_obs=False):
+        """
+        Compute the observations of all environment. The observation is composed of three parts: 
+        the state values of the left and right hands, and the information of objects and target. 
+        The state values of the left and right hands were the same for each task, including hand 
+        joint and finger positions, velocity, and force information. The detail 157-dimensional 
+        observational space as shown in below:
+
+        Index       Description
+        0 - 14	    right shadow hand dof position
+        15 - 29	    right shadow hand dof velocity
+        30 - 44	    right shadow hand dof force
+        45 - 109	right shadow hand fingertip pose, linear velocity, angle velocity (5 x 13)
+        110 - 112	right shadow hand base position
+        113 - 115	right shadow hand base rotation
+        116 - 130	right shadow hand actions
+        131 - 137	object pose
+        138 - 140	object linear velocity
+        141 - 143	object angle velocity
+        144 - 146	block right handle position
+        147 - 150	block right handle rotation
+        """
+        num_ft_states = 13 * int(self.num_fingertips)  # 65
+
+        self.obs_buf[:, 0:self.num_qbsofthand_dofs] = unscale(self.qbsofthand_dof_pos,
+                                                               self.qbsofthand_dof_lower_limits,
+                                                               self.qbsofthand_dof_upper_limits)
+        self.obs_buf[:,
+        self.num_qbsofthand_dofs:2 * self.num_qbsofthand_dofs] = self.vel_obs_scale * self.qbsofthand_dof_vel
+        self.obs_buf[:,
+        2 * self.num_qbsofthand_dofs:3 * self.num_qbsofthand_dofs] = self.force_torque_obs_scale * self.dof_force_tensor[
+                                                                                                     :, :15]
+
+        fingertip_obs_start = 45
+        self.obs_buf[:, fingertip_obs_start:fingertip_obs_start + num_ft_states] = self.fingertip_state.reshape(
+            self.num_envs, num_ft_states)
+
+        hand_pose_start = fingertip_obs_start + 65
+        self.obs_buf[:, hand_pose_start:hand_pose_start + 3] = self.right_hand_pos
+        self.obs_buf[:, hand_pose_start + 3:hand_pose_start + 4] = \
+            get_euler_xyz(self.hand_orientations[self.hand_indices, :])[0].unsqueeze(-1)
+        self.obs_buf[:, hand_pose_start + 4:hand_pose_start + 5] = \
+            get_euler_xyz(self.hand_orientations[self.hand_indices, :])[1].unsqueeze(-1)
+        self.obs_buf[:, hand_pose_start + 5:hand_pose_start + 6] = \
+            get_euler_xyz(self.hand_orientations[self.hand_indices, :])[2].unsqueeze(-1)
+
+        action_obs_start = hand_pose_start + 6
+        self.obs_buf[:, action_obs_start:action_obs_start + 21] = self.actions[:, :21]
+
+        obj_obs_start = action_obs_start + 21  # 131
+        self.obs_buf[:, obj_obs_start:obj_obs_start + 7] = self.object_pose
+        self.obs_buf[:, obj_obs_start + 7:obj_obs_start + 10] = self.object_linvel
+        self.obs_buf[:, obj_obs_start + 10:obj_obs_start + 13] = self.vel_obs_scale * self.object_angvel
+        self.obs_buf[:, obj_obs_start + 13:obj_obs_start + 16] = self.block_right_handle_pos
+        self.obs_buf[:, obj_obs_start + 16:obj_obs_start + 20] = self.block_right_handle_rot
+        
+    def compute_point_cloud_observation(self, collect_demonstration=False):
+        """
+        Compute the observations of all environment. The observation is composed of three parts: 
+        the state values of the left and right hands, and the information of objects and target. 
+        The state values of the left and right hands were the same for each task, including hand 
+        joint and finger positions, velocity, and force information. The detail 157-dimensional 
+        observational space as shown in below:
+
+        Index       Description
+        0 - 23	    right shadow hand dof position
+        24 - 47	    right shadow hand dof velocity
+        48 - 71	    right shadow hand dof force
+        72 - 136	right shadow hand fingertip pose, linear velocity, angle velocity (5 x 13)
+        137 - 166	right shadow hand fingertip force, torque (5 x 6)
+        167 - 169	right shadow hand base position
+        170 - 172	right shadow hand base rotation
+        173 - 198	right shadow hand actions
+        199 - 222	left shadow hand dof position
+        223 - 246	left shadow hand dof velocity
+        247 - 270	left shadow hand dof force
+        271 - 335	left shadow hand fingertip pose, linear velocity, angle velocity (5 x 13)
+        336 - 365	left shadow hand fingertip force, torque (5 x 6)
+        366 - 368	left shadow hand base position
+        369 - 371	left shadow hand base rotation
+        372 - 397	left shadow hand actions
+        398 - 404	object pose
+        405 - 407	object linear velocity
+        408 - 410	object angle velocity
+        411 - 413	block right handle position
+        414 - 417	block right handle rotation
+        418 - 420	block left handle position
+        421 - 424	block left handle rotation
+        """
+        num_ft_states = 13 * int(self.num_fingertips / 2)  # 65
+        num_ft_force_torques = 6 * int(self.num_fingertips / 2)  # 30
+
+        self.obs_buf[:, 0:self.num_qbsofthand_dofs] = unscale(self.qbsofthand_dof_pos,
+                                                               self.qbsofthand_dof_lower_limits,
+                                                               self.qbsofthand_dof_upper_limits)
+        self.obs_buf[:,
+        self.num_qbsofthand_dofs:2 * self.num_qbsofthand_dofs] = self.vel_obs_scale * self.qbsofthand_dof_vel
+        self.obs_buf[:,
+        2 * self.num_qbsofthand_dofs:3 * self.num_qbsofthand_dofs] = self.force_torque_obs_scale * self.dof_force_tensor[
+                                                                                                     :, :24]
+
+        fingertip_obs_start = 72  # 168 = 157 + 11
+        self.obs_buf[:, fingertip_obs_start:fingertip_obs_start + num_ft_states] = self.fingertip_state.reshape(
+            self.num_envs, num_ft_states)
+        self.obs_buf[:, fingertip_obs_start + num_ft_states:fingertip_obs_start + num_ft_states +
+                                                            num_ft_force_torques] = self.force_torque_obs_scale * self.vec_sensor_tensor[
+                                                                                                                  :,
+                                                                                                                  :30]
+
+        hand_pose_start = fingertip_obs_start + 65
+        self.obs_buf[:, hand_pose_start:hand_pose_start + 3] = self.right_hand_pos
+        self.obs_buf[:, hand_pose_start + 3:hand_pose_start + 4] = \
+            get_euler_xyz(self.hand_orientations[self.hand_indices, :])[0].unsqueeze(-1)
+        self.obs_buf[:, hand_pose_start + 4:hand_pose_start + 5] = \
+            get_euler_xyz(self.hand_orientations[self.hand_indices, :])[1].unsqueeze(-1)
+        self.obs_buf[:, hand_pose_start + 5:hand_pose_start + 6] = \
+            get_euler_xyz(self.hand_orientations[self.hand_indices, :])[2].unsqueeze(-1)
+
+        action_obs_start = hand_pose_start + 6
+        self.obs_buf[:, action_obs_start:action_obs_start + 15] = self.actions[:, :15]
+
+        point_clouds = torch.zeros((self.num_envs, self.pointCloudDownsampleNum, 3), device=self.device)
+
+        if self.camera_debug:
+            import matplotlib.pyplot as plt
+            self.camera_rgba_debug_fig = plt.figure("CAMERA_RGBD_DEBUG")
+            camera_rgba_image = self.camera_visulization(is_depth_image=False)
+            plt.imshow(camera_rgba_image)
+            plt.pause(1e-9)
+
+        for i in range(self.num_envs):
+            # Here is an example. In practice, it's better not to convert tensor from GPU to CPU
+            points = depth_image_to_point_cloud_GPU(self.camera_tensors[i], self.camera_view_matrixs[i],
+                                                    self.camera_proj_matrixs[i], self.camera_u2, self.camera_v2,
+                                                    self.camera_props.width, self.camera_props.height, 10, self.device)
+
+            if points.shape[0] > 0:
+                selected_points = self.sample_points(points, sample_num=self.pointCloudDownsampleNum,
+                                                     sample_mathed='random')
+            else:
+                selected_points = torch.zeros((self.num_envs, self.pointCloudDownsampleNum, 3), device=self.device)
+
+            point_clouds[i] = selected_points
+
+        if self.pointCloudVisualizer != None:
+            import open3d as o3d
+            points = point_clouds[0, :, :3].cpu().numpy()
+            # colors = plt.get_cmap()(point_clouds[0, :, 3].cpu().numpy())
+            self.o3d_pc.points = o3d.utility.Vector3dVector(points)
+            # self.o3d_pc.colors = o3d.utility.Vector3dVector(colors[..., :3])
+
+            if self.pointCloudVisualizerInitialized == False:
+                self.pointCloudVisualizer.add_geometry(self.o3d_pc)
+                self.pointCloudVisualizerInitialized = True
+            else:
+                self.pointCloudVisualizer.update(self.o3d_pc)
+
+        self.gym.end_access_image_tensors(self.sim)
+        point_clouds -= self.env_origin.view(self.num_envs, 1, 3)
+
+        point_clouds_start = obj_obs_start + 27
+        self.obs_buf[:, point_clouds_start:].copy_(point_clouds.view(self.num_envs, self.pointCloudDownsampleNum * 3))
+
+    def reset_target_pose(self, env_ids, apply_reset=False):
+        """
+        Reset and randomize the goal pose
+
+        Args:
+            env_ids (tensor): The index of the environment that needs to reset goal pose
+
+            apply_reset (bool): Whether to reset the goal directly here, usually used 
+            when the same task wants to complete multiple goals
+
+        """
+        rand_floats = torch_rand_float(-1.0, 1.0, (len(env_ids), 4), device=self.device)
+
+        new_rot = randomize_rotation(rand_floats[:, 0], rand_floats[:, 1], self.x_unit_tensor[env_ids],
+                                     self.y_unit_tensor[env_ids])
+
+        self.goal_states[env_ids, 0:3] = self.goal_init_state[env_ids, 0:3]
+        # self.goal_states[env_ids, 1] -= 0.25
+        self.goal_states[env_ids, 2] += 10.0
+
+        # self.goal_states[env_ids, 3:7] = new_rot
+        self.root_state_tensor[self.goal_object_indices[env_ids], 0:3] = self.goal_states[env_ids,
+                                                                         0:3] + self.goal_displacement_tensor
+        self.root_state_tensor[self.goal_object_indices[env_ids], 3:7] = self.goal_states[env_ids, 3:7]
+        self.root_state_tensor[self.goal_object_indices[env_ids], 7:13] = torch.zeros_like(
+            self.root_state_tensor[self.goal_object_indices[env_ids], 7:13])
+
+        if apply_reset:
+            goal_object_indices = self.goal_object_indices[env_ids].to(torch.int32)
+            self.gym.set_actor_root_state_tensor_indexed(self.sim,
+                                                         gymtorch.unwrap_tensor(self.root_state_tensor),
+                                                         gymtorch.unwrap_tensor(goal_object_indices), len(env_ids))
+        self.reset_goal_buf[env_ids] = 0
+
+    def reset_idx(self, env_ids, goal_env_ids):
+        """
+        Reset and randomize the environment
+
+        Args:
+            env_ids (tensor): The index of the environment that needs to reset
+
+            goal_env_ids (tensor): The index of the environment that only goals need reset
+
+        """
+        # randomization can happen only at reset time, since it can reset actor positions on GPU
+        if self.randomize:
+            self.apply_randomizations(self.randomization_params)
+
+        # generate random values
+        rand_floats = torch_rand_float(-1.0, 1.0, (len(env_ids), self.num_qbsofthand_dofs * 2 + 5), device=self.device)
+
+        # randomize start object poses
+        self.reset_target_pose(env_ids)
+
+        # reset object
+        self.root_state_tensor[self.object_indices[env_ids]] = self.object_init_state[env_ids].clone()
+
+        new_object_rot = randomize_rotation(rand_floats[:, 3], rand_floats[:, 4], self.x_unit_tensor[env_ids],
+                                            self.y_unit_tensor[env_ids])
+        if self.object_type == "pen":
+            rand_angle_y = torch.tensor(0.3)
+            new_object_rot = randomize_rotation_pen(rand_floats[:, 3], rand_floats[:, 4], rand_angle_y,
+                                                    self.x_unit_tensor[env_ids], self.y_unit_tensor[env_ids],
+                                                    self.z_unit_tensor[env_ids])
+
+        # self.root_state_tensor[self.object_indices[env_ids], 3:7] = new_object_rot
+        self.root_state_tensor[self.object_indices[env_ids], 7:13] = torch.zeros_like(
+            self.root_state_tensor[self.object_indices[env_ids], 7:13])
+
+        object_indices = torch.unique(torch.cat([self.object_indices[env_ids],
+                                                 self.goal_object_indices[env_ids],
+                                                 self.goal_object_indices[goal_env_ids]]).to(torch.int32))
+        # self.gym.set_actor_root_state_tensor_indexed(self.sim,
+        #                                              gymtorch.unwrap_tensor(self.root_state_tensor),
+        #                                              gymtorch.unwrap_tensor(object_indices), len(object_indices))
+
+        # reset shadow hand
+        delta_max = self.qbsofthand_dof_upper_limits - self.qbsofthand_dof_default_pos
+        delta_min = self.qbsofthand_dof_lower_limits - self.qbsofthand_dof_default_pos
+        rand_delta = delta_min + (delta_max - delta_min) * rand_floats[:, 5:5 + self.num_qbsofthand_dofs]
+
+        pos = self.qbsofthand_default_dof_pos + self.reset_dof_pos_noise * rand_delta
+
+        self.qbsofthand_dof_pos[env_ids, :] = pos
+
+        self.qbsofthand_dof_vel[env_ids, :] = self.qbsofthand_dof_default_vel + \
+                                               self.reset_dof_vel_noise * rand_floats[:,
+                                                                          5 + self.num_qbsofthand_dofs:5 + self.num_qbsofthand_dofs * 2]
+
+        self.prev_targets[env_ids, :self.num_qbsofthand_dofs] = pos
+        self.cur_targets[env_ids, :self.num_qbsofthand_dofs] = pos
+
+        hand_indices = self.hand_indices[env_ids].to(torch.int32)
+
+        all_hand_indices = hand_indices.to(torch.int32)
+
+        self.gym.set_dof_position_target_tensor_indexed(self.sim,
+                                                        gymtorch.unwrap_tensor(self.prev_targets),
+                                                        gymtorch.unwrap_tensor(all_hand_indices), len(all_hand_indices))
+
+        all_indices = torch.unique(torch.cat([all_hand_indices,
+                                              self.object_indices[env_ids],
+                                              self.table_indices[env_ids],
+                                              self.block_indices[env_ids]]).to(torch.int32))
+
+        self.hand_positions[all_indices.to(torch.long), :] = self.saved_root_tensor[all_indices.to(torch.long), 0:3]
+        self.hand_orientations[all_indices.to(torch.long), :] = self.saved_root_tensor[all_indices.to(torch.long), 3:7]
+
+        self.gym.set_dof_state_tensor_indexed(self.sim,
+                                              gymtorch.unwrap_tensor(self.dof_state),
+                                              gymtorch.unwrap_tensor(all_hand_indices), len(all_hand_indices))
+
+        self.gym.set_actor_root_state_tensor_indexed(self.sim,
+                                                     gymtorch.unwrap_tensor(self.root_state_tensor),
+                                                     gymtorch.unwrap_tensor(all_indices), len(all_indices))
+        self.progress_buf[env_ids] = 0
+        self.reset_buf[env_ids] = 0
+        self.successes[env_ids] = 0
+
+    def pre_physics_step(self, actions):
+        """
+        The pre-processing of the physics step. Determine whether the reset environment is needed, 
+        and calculate the next movement of Shadowhand through the given action. The 52-dimensional 
+        action space as shown in below:
+        
+        Index   Description  # TODO
+        0 - 14 	right shadow hand actuated joint
+        15 - 17	right shadow hand base translation
+        18 - 20	right shadow hand base rotation
+
+        Args:
+            actions (tensor): Actions of agents in the all environment 
+        """
+        env_ids = self.reset_buf.nonzero(as_tuple=False).squeeze(-1)
+        goal_env_ids = self.reset_goal_buf.nonzero(as_tuple=False).squeeze(-1)
+
+        # if only goals need reset, then call set API
+        if len(goal_env_ids) > 0 and len(env_ids) == 0:
+            self.reset_target_pose(goal_env_ids, apply_reset=True)
+        # if goals need reset in addition to other envs, call set API in reset()
+        elif len(goal_env_ids) > 0:
+            self.reset_target_pose(goal_env_ids)
+
+        if len(env_ids) > 0:
+            self.reset_idx(env_ids, goal_env_ids)
+
+        self.actions = actions.clone().to(self.device)
+        if self.use_relative_control:
+            targets = self.prev_targets[:,
+                      self.actuated_dof_indices] + self.qbsofthand_dof_speed_scale * self.dt * self.actions
+            self.cur_targets[:, self.actuated_dof_indices] = tensor_clamp(targets,
+                                                                          self.qbsofthand_dof_lower_limits[
+                                                                              self.actuated_dof_indices],
+                                                                          self.qbsofthand_dof_upper_limits[
+                                                                              self.actuated_dof_indices])
+        else:
+            self.cur_targets[:, self.actuated_dof_indices] = scale(self.actions[:, 6:21],
+                                                                   self.qbsofthand_dof_lower_limits[
+                                                                       self.actuated_dof_indices],
+                                                                   self.qbsofthand_dof_upper_limits[
+                                                                       self.actuated_dof_indices])
+            self.cur_targets[:, self.actuated_dof_indices] = self.act_moving_average * self.cur_targets[:,
+                                                                                       self.actuated_dof_indices] + (
+                                                                     1.0 - self.act_moving_average) * self.prev_targets[
+                                                                                                      :,
+                                                                                                      self.actuated_dof_indices]
+            self.cur_targets[:, self.actuated_dof_indices] = tensor_clamp(
+                self.cur_targets[:, self.actuated_dof_indices],
+                self.qbsofthand_dof_lower_limits[self.actuated_dof_indices],
+                self.qbsofthand_dof_upper_limits[self.actuated_dof_indices])
+
+            self.apply_forces[:, 0, :] = actions[:, 0:3] * self.dt * self.transition_scale * 100000
+            self.apply_torque[:, 0, :] = self.actions[:, 3:6] * self.dt * self.orientation_scale * 1000
+
+            self.gym.apply_rigid_body_force_tensors(self.sim, gymtorch.unwrap_tensor(self.apply_forces),
+                                                    gymtorch.unwrap_tensor(self.apply_torque), gymapi.ENV_SPACE)
+
+        self.prev_targets[:, self.actuated_dof_indices] = self.cur_targets[:, self.actuated_dof_indices]
+        all_hand_indices = self.hand_indices.to(torch.int32)
+        self.gym.set_dof_position_target_tensor_indexed(self.sim,
+                                                        gymtorch.unwrap_tensor(self.prev_targets),
+                                                        gymtorch.unwrap_tensor(all_hand_indices), len(all_hand_indices))
+
+    def post_physics_step(self):
+        """
+        The post-processing of the physics step. Compute the observation and reward, and visualize auxiliary 
+        lines for debug when needed
+        
+        """
+        self.progress_buf += 1
+        self.randomize_buf += 1
+
+        self.compute_observations()
+        self.compute_reward(self.actions)
+
+        if self.viewer and self.debug_viz:
+            # draw axes on target object
+            self.gym.clear_lines(self.viewer)
+            self.gym.refresh_rigid_body_state_tensor(self.sim)
+
+            for i in range(self.num_envs):
+                self.add_debug_lines(self.envs[i], self.block_right_handle_pos[i], self.block_right_handle_rot[i])
+                # self.add_debug_lines(self.envs[i], self.goal_pos[i], self.block_left_handle_rot[i])
+                # self.add_debug_lines(self.envs[i], self.right_hand_ff_pos[i], self.right_hand_ff_rot[i])
+                # self.add_debug_lines(self.envs[i], self.right_hand_mf_pos[i], self.right_hand_mf_rot[i])
+                # self.add_debug_lines(self.envs[i], self.right_hand_rf_pos[i], self.right_hand_rf_rot[i])
+                # self.add_debug_lines(self.envs[i], self.right_hand_lf_pos[i], self.right_hand_lf_rot[i])
+                # self.add_debug_lines(self.envs[i], self.right_hand_th_pos[i], self.right_hand_th_rot[i])
+
+                # self.add_debug_lines(self.envs[i], self.left_hand_ff_pos[i], self.right_hand_ff_rot[i])
+                # self.add_debug_lines(self.envs[i], self.left_hand_mf_pos[i], self.right_hand_mf_rot[i])
+                # self.add_debug_lines(self.envs[i], self.left_hand_rf_pos[i], self.right_hand_rf_rot[i])
+                # self.add_debug_lines(self.envs[i], self.left_hand_lf_pos[i], self.right_hand_lf_rot[i])
+                # self.add_debug_lines(self.envs[i], self.left_hand_th_pos[i], self.right_hand_th_rot[i])
+
+    def add_debug_lines(self, env, pos, rot):
+        posx = (pos + quat_apply(rot, to_torch([1, 0, 0], device=self.device) * 0.2)).cpu().numpy()
+        posy = (pos + quat_apply(rot, to_torch([0, 1, 0], device=self.device) * 0.2)).cpu().numpy()
+        posz = (pos + quat_apply(rot, to_torch([0, 0, 1], device=self.device) * 0.2)).cpu().numpy()
+
+        p0 = pos.cpu().numpy()
+        self.gym.add_lines(self.viewer, env, 1, [p0[0], p0[1], p0[2], posx[0], posx[1], posx[2]], [0.85, 0.1, 0.1])
+        self.gym.add_lines(self.viewer, env, 1, [p0[0], p0[1], p0[2], posy[0], posy[1], posy[2]], [0.1, 0.85, 0.1])
+        self.gym.add_lines(self.viewer, env, 1, [p0[0], p0[1], p0[2], posz[0], posz[1], posz[2]], [0.1, 0.1, 0.85])
+
+    def rand_row(self, tensor, dim_needed):
+        row_total = tensor.shape[0]
+        return tensor[torch.randint(low=0, high=row_total, size=(dim_needed,)), :]
+
+    def sample_points(self, points, sample_num=1000, sample_mathed='furthest'):
+        eff_points = points[points[:, 2] > 0.04]
+        if eff_points.shape[0] < sample_num:
+            eff_points = points
+        if sample_mathed == 'random':
+            sampled_points = self.rand_row(eff_points, sample_num)
+        elif sample_mathed == 'furthest':
+            sampled_points_id = pointnet2_utils.furthest_point_sample(eff_points.reshape(1, *eff_points.shape),
+                                                                      sample_num)
+            sampled_points = eff_points.index_select(0, sampled_points_id[0].long())
+        return sampled_points
+
+    def camera_visulization(self, is_depth_image=False):
+        if is_depth_image:
+            camera_depth_tensor = self.gym.get_camera_image_gpu_tensor(self.sim, self.envs[0], self.cameras[0],
+                                                                       gymapi.IMAGE_DEPTH)
+            torch_depth_tensor = gymtorch.wrap_tensor(camera_depth_tensor)
+            torch_depth_tensor = torch.clamp(torch_depth_tensor, -1, 1)
+            torch_depth_tensor = scale(torch_depth_tensor, to_torch([0], dtype=torch.float, device=self.device),
+                                       to_torch([256], dtype=torch.float, device=self.device))
+            camera_image = torch_depth_tensor.cpu().numpy()
+            camera_image = Im.fromarray(camera_image)
+
+        else:
+            camera_rgba_tensor = self.gym.get_camera_image_gpu_tensor(self.sim, self.envs[0], self.cameras[0],
+                                                                      gymapi.IMAGE_COLOR)
+            torch_rgba_tensor = gymtorch.wrap_tensor(camera_rgba_tensor)
+            camera_image = torch_rgba_tensor.cpu().numpy()
+            camera_image = Im.fromarray(camera_image)
+
+        return camera_image
+
+
+#####################################################################
+###=========================jit functions=========================###
+#####################################################################
+
+@torch.jit.script
+def depth_image_to_point_cloud_GPU(camera_tensor, camera_view_matrix_inv, camera_proj_matrix, u, v, width: float,
+                                   height: float, depth_bar: float, device: torch.device):
+    # time1 = time.time()
+    depth_buffer = camera_tensor.to(device)
+
+    # Get the camera view matrix and invert it to transform points from camera to world space
+    vinv = camera_view_matrix_inv
+
+    # Get the camera projection matrix and get the necessary scaling
+    # coefficients for deprojection
+
+    proj = camera_proj_matrix
+    fu = 2 / proj[0, 0]
+    fv = 2 / proj[1, 1]
+
+    centerU = width / 2
+    centerV = height / 2
+
+    Z = depth_buffer
+    X = -(u - centerU) / width * Z * fu
+    Y = (v - centerV) / height * Z * fv
+
+    Z = Z.view(-1)
+    valid = Z > -depth_bar
+    X = X.view(-1)
+    Y = Y.view(-1)
+
+    position = torch.vstack((X, Y, Z, torch.ones(len(X), device=device)))[:, valid]
+    position = position.permute(1, 0)
+    position = position @ vinv
+
+    points = position[:, 0:3]
+
+    return points
+
+
+@torch.jit.script
+def compute_hand_reward(
+        rew_buf, reset_buf, reset_goal_buf, progress_buf, successes, consecutive_successes,
+        max_episode_length: float, object_pos, object_rot, target_pos, target_rot, block_right_handle_pos
+        , right_hand_pos, right_hand_ff_pos, right_hand_mf_pos, right_hand_rf_pos, right_hand_lf_pos,
+        right_hand_th_pos,
+        dist_reward_scale: float, rot_reward_scale: float, rot_eps: float,
+        actions, action_penalty_scale: float,
+        success_tolerance: float, reach_goal_bonus: float, fall_dist: float,
+        fall_penalty: float, max_consecutive_successes: int, av_factor: float, ignore_z_rot: bool
+):
+    """
+    Compute the reward of all environment.
+
+    Args:
+        rew_buf (tensor): The reward buffer of all environments at this time
+
+        reset_buf (tensor): The reset buffer of all environments at this time
+
+        reset_goal_buf (tensor): The only-goal reset buffer of all environments at this time
+
+        progress_buf (tensor): The porgress buffer of all environments at this time
+
+        successes (tensor): The successes buffer of all environments at this time
+
+        consecutive_successes (tensor): The consecutive successes buffer of all environments at this time
+
+        max_episode_length (float): The max episode length in this environment
+
+        object_pos (tensor): The position of the object
+
+        object_rot (tensor): The rotation of the object
+
+        target_pos (tensor): The position of the target
+
+        target_rot (tensor): The rotate of the target
+
+        block_right_handle_pos (tensor): The position of the right block handle
+
+        right_hand_ff_pos, right_hand_mf_pos, right_hand_rf_pos, right_hand_lf_pos, right_hand_th_pos (tensor): The position of the five fingers 
+            of the right hand
+
+        dist_reward_scale (float): The scale of the distance reward
+
+        rot_reward_scale (float): The scale of the rotation reward
+
+        rot_eps (float): The epsilon of the rotation calculate
+
+        actions (tensor): The action buffer of all environments at this time
+
+        action_penalty_scale (float): The scale of the action penalty reward
+
+        success_tolerance (float): The tolerance of the success determined
+
+        reach_goal_bonus (float): The reward given when the object reaches the goal
+
+        fall_dist (float): When the object is far from the Shadowhand, it is judged as falling
+
+        fall_penalty (float): The reward given when the object is fell
+
+        max_consecutive_successes (float): The maximum of the consecutive successes
+
+        av_factor (float): The average factor for calculate the consecutive successes
+
+        ignore_z_rot (bool): Is it necessary to ignore the rot of the z-axis, which is usually used 
+            for some specific objects (e.g. pen)
+    """
+    # Distance from the hand to the object
+    right_goal_dist = torch.norm(target_pos - block_right_handle_pos, p=2, dim=-1)
+    # goal_dist = target_pos[:, 2] - object_pos[:, 2]
+
+    right_hand_dist = torch.norm(block_right_handle_pos - right_hand_pos, p=2, dim=-1)
+
+    right_hand_finger_dist = (torch.norm(block_right_handle_pos - right_hand_ff_pos, p=2, dim=-1) + torch.norm(
+        block_right_handle_pos - right_hand_mf_pos, p=2, dim=-1)
+                              + torch.norm(block_right_handle_pos - right_hand_rf_pos, p=2, dim=-1) + torch.norm(
+                block_right_handle_pos - right_hand_lf_pos, p=2, dim=-1)
+                              + torch.norm(block_right_handle_pos - right_hand_th_pos, p=2, dim=-1))
+    # Orientation alignment for the cube in hand and goal cube
+    # quat_diff = quat_mul(object_rot, quat_conjugate(target_rot))
+    # rot_dist = 2.0 * torch.asin(torch.clamp(torch.norm(quat_diff[:, 0:3], p=2, dim=-1), max=1.0))
+
+    right_hand_dist_rew = torch.exp(-10 * right_hand_finger_dist)
+
+    # rot_rew = 1.0/(torch.abs(rot_dist) + rot_eps) * rot_reward_scale
+
+    action_penalty = torch.sum(actions ** 2, dim=-1)
+
+    # Total reward is: position distance + orientation alignment + action regularization + success bonus + fall penalty
+    # reward = torch.exp(-0.05*(up_rew * dist_reward_scale)) + torch.exp(-0.05*(right_hand_dist_rew * dist_reward_scale)) + torch.exp(-0.05*(left_hand_dist_rew * dist_reward_scale))
+    # up_rew = torch.zeros_like(right_hand_dist_rew)
+    # up_rew = torch.where(right_hand_finger_dist < 0.6,
+    #                 torch.where(left_hand_finger_dist < 0.4,
+    up_rew = torch.zeros_like(right_hand_dist_rew)
+    up_rew = torch.exp(-10 * torch.norm(block_right_handle_pos, p=2, dim=-1)) * 2
+    # up_rew =  torch.where(right_hand_finger_dist <= 0.3, torch.norm(bottle_cap_up - bottle_pos, p=2, dim=-1) * 30, up_rew)
+
+    # reward = torch.exp(-0.1*(right_hand_dist_rew * dist_reward_scale)) + torch.exp(-0.1*(left_hand_dist_rew * dist_reward_scale))
+    reward = right_hand_dist_rew + up_rew
+
+    resets = torch.where(right_hand_dist_rew <= 0, torch.ones_like(reset_buf), reset_buf)
+    resets = torch.where(right_hand_finger_dist >= 1.5, torch.ones_like(resets), resets)
+
+    # Find out which envs hit the goal and update successes count
+    successes = torch.where(successes == 0,
+                            torch.where(torch.norm(block_right_handle_pos, p=2, dim=-1) < 0.2,
+                                        torch.ones_like(successes), successes), successes)
+
+    resets = torch.where(progress_buf >= max_episode_length, torch.ones_like(resets), resets)
+
+    goal_resets = torch.zeros_like(resets)
+
+    num_resets = torch.sum(resets)
+    finished_cons_successes = torch.sum(successes * resets.float())
+
+    cons_successes = torch.where(resets > 0, successes * resets, consecutive_successes).mean()
+
+    return reward, resets, goal_resets, progress_buf, successes, cons_successes
+
+
+@torch.jit.script
+def randomize_rotation(rand0, rand1, x_unit_tensor, y_unit_tensor):
+    return quat_mul(quat_from_angle_axis(rand0 * np.pi, x_unit_tensor),
+                    quat_from_angle_axis(rand1 * np.pi, y_unit_tensor))
+
+
+@torch.jit.script
+def randomize_rotation_pen(rand0, rand1, max_angle, x_unit_tensor, y_unit_tensor, z_unit_tensor):
+    rot = quat_mul(quat_from_angle_axis(0.5 * np.pi + rand0 * max_angle, x_unit_tensor),
+                   quat_from_angle_axis(rand0 * np.pi, z_unit_tensor))
+    return rot
diff --git a/rofunc/learning/RofuncRL/tasks/isaacgymenv/hands/shadow_hand_grasp_and_place.py b/rofunc/learning/RofuncRL/tasks/isaacgymenv/hands/shadow_hand_grasp_and_place.py
index 601eceb45..f5476e50f 100644
--- a/rofunc/learning/RofuncRL/tasks/isaacgymenv/hands/shadow_hand_grasp_and_place.py
+++ b/rofunc/learning/RofuncRL/tasks/isaacgymenv/hands/shadow_hand_grasp_and_place.py
@@ -696,7 +696,7 @@ def compute_observations(self):
         self.object_linvel = self.root_state_tensor[self.object_indices, 7:10]
         self.object_angvel = self.root_state_tensor[self.object_indices, 10:13]
 
-        self.block_right_handle_pos = self.rigid_body_states[:, 26 * 2 + 1, 0:3]
+        self.block_right_handle_pos = self.rigid_body_states[:, 26 * 2 + 1, 0:3]  # (256, 58, 13)
         self.block_right_handle_rot = self.rigid_body_states[:, 26 * 2 + 1, 3:7]
         self.block_right_handle_pos = self.block_right_handle_pos + quat_apply(self.block_right_handle_rot,
                                                                                to_torch([0, 1, 0],
diff --git a/rofunc/simulator/assets/mjcf/qbhand_no_virtual_left.xml b/rofunc/simulator/assets/mjcf/qbhand_no_virtual_left.xml
new file mode 100644
index 000000000..d4683bb66
--- /dev/null
+++ b/rofunc/simulator/assets/mjcf/qbhand_no_virtual_left.xml
@@ -0,0 +1,432 @@
+<mujoco model="qbsofthand_no_virtual_left">
+    <compiler angle="degree" meshdir="../urdf/curi/meshes/qbhand/"/>
+
+    <statistic extent="2" center="0 0 1"/>
+    <option timestep="0.00555"/>
+    <default>
+        <motor ctrlrange="-1 1" ctrllimited="true"/>
+        <default class="body">
+            <geom type="capsule" condim="1" friction="1.0 0.05 0.05" solimp=".9 .99 .003" solref=".015 1"/>
+            <joint type="hinge" damping="0.1" stiffness="5" armature=".007" limited="true" solimplimit="0 .99 .01"/>
+            <site size=".04" group="3"/>
+            <default class="force-torque">
+                <site type="box" size=".01 .01 .02" rgba="1 0 0 1"/>
+            </default>
+            <default class="touch">
+                <site type="capsule" rgba="0 0 1 .3"/>
+            </default>
+        </default>
+    </default>
+    <asset>
+        <mesh name="flange_iso_04" file="flange_iso_04.stl" scale="0.001 0.001 0.001"/>
+        <mesh name="palm_pinch_r" file="palm_pinch_r.stl" scale="0.001 0.001 0.001"/>
+        <mesh name="ph_thumb_r" file="ph_thumb_r.stl" scale="0.001 0.001 0.001"/>
+        <mesh name="ph_thumb_r_collision" file="ph_thumb_r_collision.stl" scale="0.001 0.001 0.001"/>
+        <mesh name="palm_pinch_l" file="palm_pinch_l.stl" scale="0.001 0.001 0.001"/>
+        <mesh name="ph_thumb_l" file="ph_thumb_l.stl" scale="0.001 0.001 0.001"/>
+        <mesh name="ph_thumb_l_collision" file="ph_thumb_l_collision.stl" scale="0.001 0.001 0.001"/>
+        <mesh name="ph_short" file="ph_short.stl" scale="0.001 0.001 0.001"/>
+        <mesh name="ph_short_collision" file="ph_short_collision.stl" scale="0.001 0.001 0.001"/>
+        <mesh name="ph_distal" file="ph_distal.stl" scale="0.001 0.001 0.001"/>
+        <mesh name="ph_distal_collision" file="ph_distal_collision.stl" scale="0.001 0.001 0.001"/>
+        <mesh name="ph_knuckle" file="ph_knuckle.stl" scale="0.001 0.001 0.001"/>
+        <mesh name="ph_knuckle_collision" file="ph_knuckle_collision.stl" scale="0.001 0.001 0.001"/>
+        <mesh name="ph_medium" file="ph_medium.stl" scale="0.001 0.001 0.001"/>
+        <mesh name="ph_medium_collision" file="ph_medium_collision.stl" scale="0.001 0.001 0.001"/>
+    </asset>
+
+    <worldbody>
+        <light directional="true" pos="-0.5 0.5 3" dir="0 0 -1"/>
+        <geom name="floor" type="plane" conaffinity="1" size="100 100 .2"/>
+
+
+        <body name="left_qbhand_root_link" pos="0.0 0.0 0.0">
+            <geom type="mesh" rgba="0.35 0.35 0.35 1" mesh="flange_iso_04"/>
+            <geom pos="0.0045 0.0145 0.144" quat="0.000563312 0.000562864 0.706825 0.707388"
+                  type="mesh"
+                  rgba="0.35 0.35 0.35 1" mesh="palm_pinch_l"/>
+            <!--                            <geom pos="0 0 0.07" type="sphere" size=".04" density="1865"/>-->
+            <body name="left_qbhand_thumb_knuckle_link" pos="0.0485032 0.0164669 0.0975016"
+                  quat="-0.999998 -0.000796326 6.34136e-07 0.00159265">
+                <inertial pos="0.0130716 0.00273463 0.000640905"
+                          quat="0.531237 0.364349 0.576408 0.502783" mass="0.01"
+                          diaginertia="1.43383e-06 1.35753e-06 3.12211e-07"/>
+                <joint name="left_qbhand_thumb_knuckle_joint" pos="0 0 0" axis="0 0 1"
+                       limited="true"
+                       range="0 90"/>
+                <!--                                    <geom type="mesh" contype="0" conaffinity="0" group="1" rgba="0.35 0.35 0.35 1"-->
+                <!--                                          mesh="ph_thumb_l"/>-->
+                <geom type="mesh" rgba="0.35 0.35 0.35 1" mesh="ph_thumb_l_collision"/>
+                <!--                                    <body name="left_qbhand_thumb_proximal_virtual_link" pos="0.0255 0.0038 -0.00012"-->
+                <!--                                          quat="0.540302 -0.841471 0 0">-->
+                <!--                                        <inertial pos="0 0 0" mass="0.0001" diaginertia="1e-09 1e-09 1e-09"/>-->
+
+                <body name="left_qbhand_thumb_proximal_link" pos="0.038 0.0038 -0.00012"
+                      quat="0.540302 -0.841471 0 0">
+                    <joint name="left_qbhand_thumb_proximal_joint" pos="0 0 0"
+                           axis="0 0 -1"
+                           limited="true" range="0 45"/>
+                    <inertial pos="0.00476016 -0.00137869 0"
+                              quat="0.0365143 0.706163 -0.0365143 0.706163"
+                              mass="0.0063" diaginertia="2.6915e-07 2.3576e-07 1.79645e-07"/>
+                    <!--                                            <joint name="left_qbhand_thumb_proximal_virtual_joint" pos="0 0 0"-->
+                    <!--                                                   axis="0 0 -1"-->
+                    <!--                                                   limited="true" range="0 45"/>-->
+                    <!--                                            <geom type="mesh" contype="0" conaffinity="0" group="1"-->
+                    <!--                                                  rgba="0.35 0.35 0.35 1" mesh="ph_short"/>-->
+                    <geom type="mesh" rgba="0.35 0.35 0.35 1" mesh="ph_short_collision"/>
+                    <!--                                            <body name="left_qbhand_thumb_distal_virtual_link" pos="0.009 0 0">-->
+                    <!--                                                <inertial pos="0 0 0" mass="0.0001" diaginertia="1e-09 1e-09 1e-09"/>-->
+
+                    <body name="left_qbhand_thumb_distal_link" pos="0.023 0 0">
+                        <joint name="left_qbhand_thumb_distal_joint" pos="0 0 0" axis="0 0 -1"
+                               limited="true" range="0 45"/>
+                        <inertial pos="0.00880255 -0.00172193 0"
+                                  quat="0.0131386 0.706985 -0.0131386 0.706985"
+                                  mass="0.009"
+                                  diaginertia="6.6782e-07 5.99819e-07 2.654e-07"/>
+                        <!--                                                    <joint name="left_qbhand_thumb_distal_virtual_joint" pos="0 0 0"-->
+                        <!--                                                           axis="0 0 -1" limited="true" range="0 45"/>-->
+                        <!--                                                    <geom type="mesh" contype="0" conaffinity="0" group="1"-->
+                        <!--                                                          rgba="0.35 0.35 0.35 1" mesh="ph_distal"/>-->
+                        <geom type="mesh" rgba="0.35 0.35 0.35 1"
+                              mesh="ph_distal_collision"/>
+                    </body>
+                    <!--                                            </body>-->
+                </body>
+                <!--                                    </body>-->
+            </body>
+            <!--                            <body name="left_qbhand_index_knuckle_link" pos="0.0244777 0.000473743 0.136989"-->
+            <!--                                  quat="-0.499999 -0.500397 0.500796 -0.498805">-->
+            <!--                                <inertial pos="0.000427536 0 -0.00143217"-->
+            <!--                                          quat="0.587164 0.394003 0.394003 0.587164"-->
+            <!--                                          mass="0.0095"-->
+            <!--                                          diaginertia="1.12538e-06 7.39498e-07 6.27245e-07"/>-->
+            <!--                                &lt;!&ndash;                                    <joint name="left_qbhand_index_knuckle_joint" pos="0 0 0" axis="0 0 -1"&ndash;&gt;-->
+            <!--                                &lt;!&ndash;                                           limited="true"&ndash;&gt;-->
+            <!--                                &lt;!&ndash;                                           range="-30 30"/>&ndash;&gt;-->
+            <!--                                &lt;!&ndash;                                    <geom type="mesh" contype="0" conaffinity="0" group="1" rgba="0.35 0.35 0.35 1"&ndash;&gt;-->
+            <!--                                &lt;!&ndash;                                          mesh="ph_knuckle"/>&ndash;&gt;-->
+            <!--                                <geom type="mesh" rgba="0.35 0.35 0.35 1" mesh="ph_knuckle_collision"/>-->
+            <!--                                &lt;!&ndash;                                    <body name="left_qbhand_index_proximal_virtual_link" pos="0.009 0 0"&ndash;&gt;-->
+            <!--                                &lt;!&ndash;                                          quat="0.707388 0.706825 0 0">&ndash;&gt;-->
+            <!--                                &lt;!&ndash;                                        <inertial pos="0 0 0" mass="0.0001" diaginertia="1e-09 1e-09 1e-09"/>&ndash;&gt;-->
+
+            <body name="left_qbhand_index_proximal_link" pos="0.0244777 0.000473743 0.136989"
+                  quat="5.63039861e-04 7.06824925e-01 1.12646136e-03 7.07386402e-01">
+                <joint name="left_qbhand_index_proximal_joint" pos="0 0 0" axis="0 0 -1"
+                       limited="true" range="0 45"/>
+                <inertial pos="0.00877074 -0.00178631 0"
+                          quat="0.0123083 0.707 -0.0123083 0.707" mass="0.0082"
+                          diaginertia="5.95622e-07 5.59876e-07 2.26817e-07"/>
+                <!--                                            <joint name="left_qbhand_index_proximal_virtual_joint" pos="0 0 0"-->
+                <!--                                                   axis="0 0 -1"-->
+                <!--                                                   limited="true"-->
+                <!--                                                   range="0 45"/>-->
+                <!--                                            <geom type="mesh" contype="0" conaffinity="0" group="1"-->
+                <!--                                                  rgba="0.35 0.35 0.35 1"-->
+                <!--                                                  mesh="ph_medium"/>-->
+                <geom type="mesh" rgba="0.35 0.35 0.35 1" mesh="ph_medium_collision"/>
+                <!--                                            <body name="left_qbhand_index_middle_virtual_link" pos="0.017 0 0">-->
+                <!--                                                <inertial pos="0 0 0" mass="0.0001" diaginertia="1e-09 1e-09 1e-09"/>-->
+
+                <body name="left_qbhand_index_middle_link" pos="0.03 0 0">
+                    <joint name="left_qbhand_index_middle_joint" pos="0 0 0" axis="0 0 -1"
+                           limited="true"
+                           range="0 45"/>
+                    <inertial pos="0.00476016 -0.00137869 0"
+                              quat="0.0365143 0.706163 -0.0365143 0.706163"
+                              mass="0.0063"
+                              diaginertia="2.6915e-07 2.3576e-07 1.79645e-07"/>
+                    <!--                                                    <joint name="left_qbhand_index_middle_virtual_joint" pos="0 0 0"-->
+                    <!--                                                           axis="0 0 -1" limited="true"-->
+                    <!--                                                           range="0 45"/>-->
+                    <!--                                                    <geom type="mesh" contype="0" conaffinity="0" group="1"-->
+                    <!--                                                          rgba="0.35 0.35 0.35 1"-->
+                    <!--                                                          mesh="ph_short"/>-->
+                    <geom type="mesh" rgba="0.35 0.35 0.35 1"
+                          mesh="ph_short_collision"/>
+                    <!--                                                    <body name="left_qbhand_index_distal_virtual_link" pos="0.009 0 0">-->
+                    <!--                                                        <inertial pos="0 0 0" mass="0.0001"-->
+                    <!--                                                                  diaginertia="1e-09 1e-09 1e-09"/>-->
+
+                    <body name="left_qbhand_index_distal_link" pos="0.022 0 0">
+                        <joint name="left_qbhand_index_distal_joint" pos="0 0 0"
+                               axis="0 0 -1" limited="true"
+                               range="0 45"/>
+                        <inertial pos="0.00880255 -0.00172193 0"
+                                  quat="0.0131386 0.706985 -0.0131386 0.706985"
+                                  mass="0.009"
+                                  diaginertia="6.6782e-07 5.99819e-07 2.654e-07"/>
+                        <!--                                                            <joint name="left_qbhand_index_distal_virtual_joint"-->
+                        <!--                                                                   pos="0 0 0"-->
+                        <!--                                                                   axis="0 0 -1"-->
+                        <!--                                                                   limited="true" range="0 45"/>-->
+                        <!--                                                            <geom type="mesh" contype="0" conaffinity="0" group="1"-->
+                        <!--                                                                  rgba="0.35 0.35 0.35 1"-->
+                        <!--                                                                  mesh="ph_distal"/>-->
+                        <geom type="mesh" rgba="0.35 0.35 0.35 1"
+                              mesh="ph_distal_collision"/>
+                    </body>
+                    <!--                                                    </body>-->
+                </body>
+                <!--                                            </body>-->
+            </body>
+            <!--                                    </body>-->
+            <!--                            </body>-->
+            <!--                            <body name="left_qbhand_middle_knuckle_link" pos="0.0044777 0.000500022 0.143989"-->
+            <!--                                  quat="-0.499999 -0.500397 0.500796 -0.498805">-->
+            <!--                                <inertial pos="0.000427536 0 -0.00143217" quat="0.587164 0.394003 0.394003 0.587164"-->
+            <!--                                          mass="0.0095"-->
+            <!--                                          diaginertia="1.12538e-06 7.39498e-07 6.27245e-07"/>-->
+            <!--                                &lt;!&ndash;                                    <joint name="left_qbhand_middle_knuckle_joint" pos="0 0 0" axis="0 0 -1"&ndash;&gt;-->
+            <!--                                &lt;!&ndash;                                           limited="true"&ndash;&gt;-->
+            <!--                                &lt;!&ndash;                                           range="-30 30"/>&ndash;&gt;-->
+            <!--                                &lt;!&ndash;                                    <geom type="mesh" contype="0" conaffinity="0" group="1" rgba="0.35 0.35 0.35 1"&ndash;&gt;-->
+            <!--                                &lt;!&ndash;                                          mesh="ph_knuckle"/>&ndash;&gt;-->
+            <!--                                <geom type="mesh" rgba="0.35 0.35 0.35 1" mesh="ph_knuckle_collision"/>-->
+            <!--                                &lt;!&ndash;                                    <body name="left_qbhand_middle_proximal_virtual_link" pos="0.009 0 0"&ndash;&gt;-->
+            <!--                                &lt;!&ndash;                                          quat="0.707388 0.706825 0 0">&ndash;&gt;-->
+            <!--                                &lt;!&ndash;                                        <inertial pos="0 0 0" mass="0.0001" diaginertia="1e-09 1e-09 1e-09"/>&ndash;&gt;-->
+
+            <body name="left_qbhand_middle_proximal_link" pos="0.0044777 0.000500022 0.143989"
+                  quat="5.63039861e-04 7.06824925e-01 1.12646136e-03 7.07386402e-01">
+                <joint name="left_qbhand_middle_proximal_joint" pos="0 0 0" axis="0 0 -1"
+                       limited="true"
+                       range="0 45"/>
+                <inertial pos="0.00877074 -0.00178631 0"
+                          quat="0.0123083 0.707 -0.0123083 0.707" mass="0.0082"
+                          diaginertia="5.95622e-07 5.59876e-07 2.26817e-07"/>
+                <!--                                            <joint name="left_qbhand_middle_proximal_virtual_joint" pos="0 0 0"-->
+                <!--                                                   axis="0 0 -1"-->
+                <!--                                                   limited="true"-->
+                <!--                                                   range="0 45"/>-->
+                <!--                                            <geom type="mesh" contype="0" conaffinity="0" group="1"-->
+                <!--                                                  rgba="0.35 0.35 0.35 1"-->
+                <!--                                                  mesh="ph_medium"/>-->
+                <geom type="mesh" rgba="0.35 0.35 0.35 1" mesh="ph_medium_collision"/>
+                <!--                                            <body name="left_qbhand_middle_middle_virtual_link" pos="0.017 0 0">-->
+                <!--                                                <inertial pos="0 0 0" mass="0.0001" diaginertia="1e-09 1e-09 1e-09"/>-->
+
+                <body name="left_qbhand_middle_middle_link" pos="0.03 0 0">
+                    <joint name="left_qbhand_middle_middle_joint" pos="0 0 0" axis="0 0 -1"
+                           limited="true"
+                           range="0 45"/>
+                    <inertial pos="0.00476016 -0.00137869 0"
+                              quat="0.0365143 0.706163 -0.0365143 0.706163"
+                              mass="0.0063"
+                              diaginertia="2.6915e-07 2.3576e-07 1.79645e-07"/>
+                    <!--                                                    <joint name="left_qbhand_middle_middle_virtual_joint" pos="0 0 0"-->
+                    <!--                                                           axis="0 0 -1"-->
+                    <!--                                                           limited="true" range="0 45"/>-->
+                    <!--                                                    <geom type="mesh" contype="0" conaffinity="0" group="1"-->
+                    <!--                                                          rgba="0.35 0.35 0.35 1"-->
+                    <!--                                                          mesh="ph_short"/>-->
+                    <geom type="mesh" rgba="0.35 0.35 0.35 1"
+                          mesh="ph_short_collision"/>
+                    <!--                                                    <body name="left_qbhand_middle_distal_virtual_link" pos="0.009 0 0">-->
+                    <!--                                                        <inertial pos="0 0 0" mass="0.0001"-->
+                    <!--                                                                  diaginertia="1e-09 1e-09 1e-09"/>-->
+
+                    <body name="left_qbhand_middle_distal_link" pos="0.022 0 0">
+                        <joint name="left_qbhand_middle_distal_joint" pos="0 0 0"
+                               axis="0 0 -1" limited="true"
+                               range="0 45"/>
+                        <inertial pos="0.00880255 -0.00172193 0"
+                                  quat="0.0131386 0.706985 -0.0131386 0.706985"
+                                  mass="0.009"
+                                  diaginertia="6.6782e-07 5.99819e-07 2.654e-07"/>
+                        <!--                                                            <joint name="left_qbhand_middle_distal_virtual_joint"-->
+                        <!--                                                                   pos="0 0 0"-->
+                        <!--                                                                   axis="0 0 -1"-->
+                        <!--                                                                   limited="true" range="0 45"/>-->
+                        <!--                                                            <geom type="mesh" contype="0" conaffinity="0" group="1"-->
+                        <!--                                                                  rgba="0.35 0.35 0.35 1"-->
+                        <!--                                                                  mesh="ph_distal"/>-->
+                        <geom type="mesh" rgba="0.35 0.35 0.35 1"
+                              mesh="ph_distal_collision"/>
+                    </body>
+                    <!--                                                    </body>-->
+                </body>
+                <!--                                            </body>-->
+            </body>
+            <!--                                    </body>-->
+            <!--                            </body>-->
+            <!--                            <body name="left_qbhand_ring_knuckle_link" pos="-0.0155223 0.00053745 0.136989"-->
+            <!--                                  quat="-0.499999 -0.500397 0.500796 -0.498805">-->
+            <!--                                <inertial pos="0.000427536 0 -0.00143217" quat="0.587164 0.394003 0.394003 0.587164"-->
+            <!--                                          mass="0.0095"-->
+            <!--                                          diaginertia="1.12538e-06 7.39498e-07 6.27245e-07"/>-->
+            <!--                                &lt;!&ndash;                                    <joint name="left_qbhand_ring_knuckle_joint" pos="0 0 0" axis="0 0 -1"&ndash;&gt;-->
+            <!--                                &lt;!&ndash;                                           limited="true"&ndash;&gt;-->
+            <!--                                &lt;!&ndash;                                           range="-30 30"/>&ndash;&gt;-->
+            <!--                                &lt;!&ndash;                                    <geom type="mesh" contype="0" conaffinity="0" group="1" rgba="0.35 0.35 0.35 1"&ndash;&gt;-->
+            <!--                                &lt;!&ndash;                                          mesh="ph_knuckle"/>&ndash;&gt;-->
+            <!--                                <geom type="mesh" rgba="0.35 0.35 0.35 1" mesh="ph_knuckle_collision"/>-->
+            <!--                                &lt;!&ndash;                                    <body name="left_qbhand_ring_proximal_virtual_link" pos="0.009 0 0"&ndash;&gt;-->
+            <!--                                &lt;!&ndash;                                          quat="0.707388 0.706825 0 0">&ndash;&gt;-->
+            <!--                                &lt;!&ndash;                                        <inertial pos="0 0 0" mass="0.0001" diaginertia="1e-09 1e-09 1e-09"/>&ndash;&gt;-->
+
+            <body name="left_qbhand_ring_proximal_link" pos="-0.0155223 0.00053745 0.136989"
+                  quat="5.63039861e-04 7.06824925e-01 1.12646136e-03 7.07386402e-01">
+                <joint name="left_qbhand_ring_proximal_joint" pos="0 0 0" axis="0 0 -1"
+                       limited="true" range="0 45"/>
+                <inertial pos="0.00877074 -0.00178631 0"
+                          quat="0.0123083 0.707 -0.0123083 0.707" mass="0.0082"
+                          diaginertia="5.95622e-07 5.59876e-07 2.26817e-07"/>
+                <!--                                            <joint name="left_qbhand_ring_proximal_virtual_joint" pos="0 0 0"-->
+                <!--                                                   axis="0 0 -1"-->
+                <!--                                                   limited="true"-->
+                <!--                                                   range="0 45"/>-->
+                <!--                                            <geom type="mesh" contype="0" conaffinity="0" group="1"-->
+                <!--                                                  rgba="0.35 0.35 0.35 1"-->
+                <!--                                                  mesh="ph_medium"/>-->
+                <geom type="mesh" rgba="0.35 0.35 0.35 1" mesh="ph_medium_collision"/>
+                <!--                                            <body name="left_qbhand_ring_middle_virtual_link" pos="0.017 0 0">-->
+                <!--                                                <inertial pos="0 0 0" mass="0.0001" diaginertia="1e-09 1e-09 1e-09"/>-->
+
+                <body name="left_qbhand_ring_middle_link" pos="0.03 0 0">
+                    <joint name="left_qbhand_ring_middle_joint" pos="0 0 0" axis="0 0 -1"
+                           limited="true"
+                           range="0 45"/>
+                    <inertial pos="0.00476016 -0.00137869 0"
+                              quat="0.0365143 0.706163 -0.0365143 0.706163"
+                              mass="0.0063"
+                              diaginertia="2.6915e-07 2.3576e-07 1.79645e-07"/>
+                    <!--                                                    <joint name="left_qbhand_ring_middle_virtual_joint" pos="0 0 0"-->
+                    <!--                                                           axis="0 0 -1" limited="true"-->
+                    <!--                                                           range="0 45"/>-->
+                    <!--                                                    <geom type="mesh" contype="0" conaffinity="0" group="1"-->
+                    <!--                                                          rgba="0.35 0.35 0.35 1"-->
+                    <!--                                                          mesh="ph_short"/>-->
+                    <geom type="mesh" rgba="0.35 0.35 0.35 1"
+                          mesh="ph_short_collision"/>
+                    <!--                                                    <body name="left_qbhand_ring_distal_virtual_link" pos="0.009 0 0">-->
+                    <!--                                                        <inertial pos="0 0 0" mass="0.0001"-->
+                    <!--                                                                  diaginertia="1e-09 1e-09 1e-09"/>-->
+
+                    <body name="left_qbhand_ring_distal_link" pos="0.022 0 0">
+                        <joint name="left_qbhand_ring_distal_joint" pos="0 0 0"
+                               axis="0 0 -1"
+                               limited="true"
+                               range="0 45"/>
+                        <inertial pos="0.00880255 -0.00172193 0"
+                                  quat="0.0131386 0.706985 -0.0131386 0.706985"
+                                  mass="0.009"
+                                  diaginertia="6.6782e-07 5.99819e-07 2.654e-07"/>
+                        <!--                                                            <joint name="left_qbhand_ring_distal_virtual_joint"-->
+                        <!--                                                                   pos="0 0 0"-->
+                        <!--                                                                   axis="0 0 -1"-->
+                        <!--                                                                   limited="true" range="0 45"/>-->
+                        <!--                                                            <geom type="mesh" contype="0" conaffinity="0" group="1"-->
+                        <!--                                                                  rgba="0.35 0.35 0.35 1"-->
+                        <!--                                                                  mesh="ph_distal"/>-->
+                        <geom type="mesh" rgba="0.35 0.35 0.35 1"
+                              mesh="ph_distal_collision"/>
+                    </body>
+                    <!--                                                    </body>-->
+                </body>
+                <!--                                            </body>-->
+            </body>
+            <!--                                    </body>-->
+            <!--                            </body>-->
+            <!--                            <body name="left_qbhand_little_knuckle_link" pos="-0.0355222 0.000573284 0.131989"-->
+            <!--                                  quat="-0.499999 -0.500397 0.500796 -0.498805">-->
+            <!--                                <inertial pos="0.000427536 0 -0.00143217" quat="0.587164 0.394003 0.394003 0.587164"-->
+            <!--                                          mass="0.0095"-->
+            <!--                                          diaginertia="1.12538e-06 7.39498e-07 6.27245e-07"/>-->
+            <!--                                &lt;!&ndash;                                    <joint name="left_qbhand_little_knuckle_joint" pos="0 0 0" axis="0 0 -1"&ndash;&gt;-->
+            <!--                                &lt;!&ndash;                                           limited="true"&ndash;&gt;-->
+            <!--                                &lt;!&ndash;                                           range="-30 30"/>&ndash;&gt;-->
+            <!--                                &lt;!&ndash;                                    <geom type="mesh" contype="0" conaffinity="0" group="1" rgba="0.35 0.35 0.35 1"&ndash;&gt;-->
+            <!--                                &lt;!&ndash;                                          mesh="ph_knuckle"/>&ndash;&gt;-->
+            <!--                                <geom type="mesh" rgba="0.35 0.35 0.35 1" mesh="ph_knuckle_collision"/>-->
+            <!--                                &lt;!&ndash;                                    <body name="left_qbhand_little_proximal_virtual_link" pos="0.009 0 0"&ndash;&gt;-->
+            <!--                                &lt;!&ndash;                                          quat="0.707388 0.706825 0 0">&ndash;&gt;-->
+            <!--                                &lt;!&ndash;                                        <inertial pos="0 0 0" mass="0.0001" diaginertia="1e-09 1e-09 1e-09"/>&ndash;&gt;-->
+
+            <body name="left_qbhand_little_proximal_link" pos="-0.0355222 0.000573284 0.131989"
+                  quat="5.63039861e-04 7.06824925e-01 1.12646136e-03 7.07386402e-01">
+                <joint name="left_qbhand_little_proximal_joint" pos="0 0 0" axis="0 0 -1"
+                       limited="true"
+                       range="0 45"/>
+                <inertial pos="0.00476016 -0.00137869 0"
+                          quat="0.0365143 0.706163 -0.0365143 0.706163"
+                          mass="0.0063" diaginertia="2.6915e-07 2.3576e-07 1.79645e-07"/>
+                <!--                                            <joint name="left_qbhand_little_proximal_virtual_joint" pos="0 0 0"-->
+                <!--                                                   axis="0 0 -1"-->
+                <!--                                                   limited="true"-->
+                <!--                                                   range="0 45"/>-->
+                <!--                                            <geom type="mesh" contype="0" conaffinity="0" group="1"-->
+                <!--                                                  rgba="0.35 0.35 0.35 1"-->
+                <!--                                                  mesh="ph_short"/>-->
+                <geom type="mesh" rgba="0.35 0.35 0.35 1" mesh="ph_short_collision"/>
+                <!--                                            <body name="left_qbhand_little_middle_virtual_link" pos="0.009 0 0">-->
+                <!--                                                <inertial pos="0 0 0" mass="0.0001" diaginertia="1e-09 1e-09 1e-09"/>-->
+
+                <body name="left_qbhand_little_middle_link" pos="0.022 0 0">
+                    <joint name="left_qbhand_little_middle_joint" pos="0 0 0" axis="0 0 -1"
+                           limited="true"
+                           range="0 45"/>
+                    <inertial pos="0.00476016 -0.00137869 0"
+                              quat="0.0365143 0.706163 -0.0365143 0.706163"
+                              mass="0.0063"
+                              diaginertia="2.6915e-07 2.3576e-07 1.79645e-07"/>
+                    <!--                                                    <joint name="left_qbhand_little_middle_virtual_joint" pos="0 0 0"-->
+                    <!--                                                           axis="0 0 -1"-->
+                    <!--                                                           limited="true" range="0 45"/>-->
+                    <!--                                                    <geom type="mesh" contype="0" conaffinity="0" group="1"-->
+                    <!--                                                          rgba="0.35 0.35 0.35 1"-->
+                    <!--                                                          mesh="ph_short"/>-->
+                    <geom type="mesh" rgba="0.35 0.35 0.35 1"
+                          mesh="ph_short_collision"/>
+                    <!--                                                    <body name="left_qbhand_little_distal_virtual_link" pos="0.009 0 0">-->
+                    <!--                                                        <inertial pos="0 0 0" mass="0.0001"-->
+                    <!--                                                                  diaginertia="1e-09 1e-09 1e-09"/>-->
+
+                    <body name="left_qbhand_little_distal_link" pos="0.022 0 0">
+                        <joint name="left_qbhand_little_distal_joint" pos="0 0 0"
+                               axis="0 0 -1" limited="true"
+                               range="0 45"/>
+                        <inertial pos="0.00880255 -0.00172193 0"
+                                  quat="0.0131386 0.706985 -0.0131386 0.706985"
+                                  mass="0.009"
+                                  diaginertia="6.6782e-07 5.99819e-07 2.654e-07"/>
+                        <!--                                                            <joint name="left_qbhand_little_distal_virtual_joint"-->
+                        <!--                                                                   pos="0 0 0"-->
+                        <!--                                                                   axis="0 0 -1"-->
+                        <!--                                                                   limited="true" range="0 45"/>-->
+                        <!--                                                            <geom type="mesh" contype="0" conaffinity="0" group="1"-->
+                        <!--                                                                  rgba="0.35 0.35 0.35 1"-->
+                        <!--                                                                  mesh="ph_distal"/>-->
+                        <geom type="mesh" rgba="0.35 0.35 0.35 1"
+                              mesh="ph_distal_collision"/>
+                    </body>
+                    <!--                                                    </body>-->
+                </body>
+                <!--                                            </body>-->
+            </body>
+            <!--                                    </body>-->
+            <!--                            </body>-->
+            <!--                        </body>-->
+        </body>
+    </worldbody>
+
+    <actuator>
+        <motor name='left_qbhand_thumb_knuckle_joint' gear='50' joint='left_qbhand_thumb_knuckle_joint'/>
+        <motor name='left_qbhand_thumb_proximal_joint' gear='50' joint='left_qbhand_thumb_proximal_joint'/>
+        <motor name='left_qbhand_thumb_distal_joint' gear='50' joint='left_qbhand_thumb_distal_joint'/>
+        <motor name='left_qbhand_index_proximal_joint' gear='50' joint='left_qbhand_index_proximal_joint'/>
+        <motor name='left_qbhand_index_middle_joint' gear='50' joint='left_qbhand_index_middle_joint'/>
+        <motor name='left_qbhand_index_distal_joint' gear='50' joint='left_qbhand_index_distal_joint'/>
+        <motor name='left_qbhand_middle_proximal_joint' gear='50' joint='left_qbhand_middle_proximal_joint'/>
+        <motor name='left_qbhand_middle_middle_joint' gear='50' joint='left_qbhand_middle_middle_joint'/>
+        <motor name='left_qbhand_middle_distal_joint' gear='50' joint='left_qbhand_middle_distal_joint'/>
+        <motor name='left_qbhand_ring_proximal_joint' gear='50' joint='left_qbhand_ring_proximal_joint'/>
+        <motor name='left_qbhand_ring_middle_joint' gear='50' joint='left_qbhand_ring_middle_joint'/>
+        <motor name='left_qbhand_ring_distal_joint' gear='50' joint='left_qbhand_ring_distal_joint'/>
+        <motor name='left_qbhand_little_proximal_joint' gear='50' joint='left_qbhand_little_proximal_joint'/>
+        <motor name='left_qbhand_little_middle_joint' gear='50' joint='left_qbhand_little_middle_joint'/>
+        <motor name='left_qbhand_little_distal_joint' gear='50' joint='left_qbhand_little_distal_joint'/>
+    </actuator>
+</mujoco>
diff --git a/rofunc/simulator/assets/mjcf/qbhand_no_virtual_right.xml b/rofunc/simulator/assets/mjcf/qbhand_no_virtual_right.xml
new file mode 100644
index 000000000..7f7650963
--- /dev/null
+++ b/rofunc/simulator/assets/mjcf/qbhand_no_virtual_right.xml
@@ -0,0 +1,380 @@
+<mujoco model="qbsofthand_no_virtual_right">
+    <compiler angle="degree" meshdir="../urdf/curi/meshes/qbhand/"/>
+
+    <statistic extent="2" center="0 0 1"/>
+    <option timestep="0.00555"/>
+    <default>
+        <motor ctrlrange="-1 1" ctrllimited="true"/>
+        <default class="body">
+            <geom type="capsule" condim="1" friction="1.0 0.05 0.05" solimp=".9 .99 .003" solref=".015 1"/>
+            <joint type="hinge" damping="0.1" stiffness="5" armature=".007" limited="true" solimplimit="0 .99 .01"/>
+            <site size=".04" group="3"/>
+            <default class="force-torque">
+                <site type="box" size=".01 .01 .02" rgba="1 0 0 1"/>
+            </default>
+            <default class="touch">
+                <site type="capsule" rgba="0 0 1 .3"/>
+            </default>
+        </default>
+    </default>
+    <asset>
+        <mesh name="flange_iso_04" file="flange_iso_04.stl" scale="0.001 0.001 0.001"/>
+        <mesh name="palm_pinch_r" file="palm_pinch_r.stl" scale="0.001 0.001 0.001"/>
+        <mesh name="ph_thumb_r" file="ph_thumb_r.stl" scale="0.001 0.001 0.001"/>
+        <mesh name="ph_thumb_r_collision" file="ph_thumb_r_collision.stl" scale="0.001 0.001 0.001"/>
+        <mesh name="palm_pinch_l" file="palm_pinch_l.stl" scale="0.001 0.001 0.001"/>
+        <mesh name="ph_thumb_l" file="ph_thumb_l.stl" scale="0.001 0.001 0.001"/>
+        <mesh name="ph_thumb_l_collision" file="ph_thumb_l_collision.stl" scale="0.001 0.001 0.001"/>
+        <mesh name="ph_short" file="ph_short.stl" scale="0.001 0.001 0.001"/>
+        <mesh name="ph_short_collision" file="ph_short_collision.stl" scale="0.001 0.001 0.001"/>
+        <mesh name="ph_distal" file="ph_distal.stl" scale="0.001 0.001 0.001"/>
+        <mesh name="ph_distal_collision" file="ph_distal_collision.stl" scale="0.001 0.001 0.001"/>
+        <mesh name="ph_knuckle" file="ph_knuckle.stl" scale="0.001 0.001 0.001"/>
+        <mesh name="ph_knuckle_collision" file="ph_knuckle_collision.stl" scale="0.001 0.001 0.001"/>
+        <mesh name="ph_medium" file="ph_medium.stl" scale="0.001 0.001 0.001"/>
+        <mesh name="ph_medium_collision" file="ph_medium_collision.stl" scale="0.001 0.001 0.001"/>
+    </asset>
+
+    <worldbody>
+        <light directional="true" pos="-0.5 0.5 3" dir="0 0 -1"/>
+        <geom name="floor" type="plane" conaffinity="1" size="100 100 .2"/>
+
+        <body name="right_qbhand_root_link" pos="0.0 0.0 0.0" quat="0 0 1 0">-->
+            <geom type="mesh" rgba="0.35 0.35 0.35 1" mesh="flange_iso_04"/>
+            <geom pos="-0.0045 0.0145 0.144" quat="0.707388 0.706825 0 0" type="mesh"
+                  rgba="0.35 0.35 0.35 1" mesh="palm_pinch_r"/>
+            <body name="right_qbhand_thumb_knuckle_link" pos="-0.0485 0.016463 0.0974984"
+                  quat="0.000796327 0 0.000796326 0.999999">
+                <inertial pos="0.0130716 -0.00273578 0.000643478"
+                          quat="0.502681 0.576479 0.364062 0.531453" mass="0.01"
+                          diaginertia="1.4342e-06 1.35762e-06 3.12515e-07"/>
+                <joint name="right_qbhand_thumb_knuckle_joint" pos="0 0 0" axis="0 0 -1"
+                       limited="true" range="0 90"/>
+                <!--                                    <geom type="mesh" contype="0" conaffinity="0" group="1" rgba="0.35 0.35 0.35 1"-->
+                <!--                                          mesh="ph_thumb_r"/>-->
+                <geom type="mesh" rgba="0.35 0.35 0.35 1" mesh="ph_thumb_r_collision"/>
+                <!--                                    <geom name="right_qbhand_thumb_proximal_virtual_link" pos="0.0255 -0.0038 -0.00012"-->
+                <!--                                          quat="0.841496 -0.540263 0 0" type="sphere" size="0.001"/>-->
+                <!--                                    <body name="right_qbhand_thumb_proximal_virtual_link" pos="0.0255 -0.0038 -0.00012"-->
+                <!--                                          quat="0.841496 -0.540263 0 0">-->
+                <!--                                    <inertial pos="0 0 0" mass="0.0001" diaginertia="1e-09 1e-09 1e-09"/>-->
+
+                <body name="right_qbhand_thumb_proximal_link" pos="0.038 -0.0038 -0.00012"
+                      quat="0.841496 -0.540263 0 0">
+                    <joint name="right_qbhand_thumb_proximal_joint" pos="0 0 0"
+                           axis="0 0 -1" limited="true" range="0 45"/>
+                    <inertial pos="0.00476016 -0.00137869 0"
+                              quat="0.0365143 0.706163 -0.0365143 0.706163"
+                              mass="0.0063" diaginertia="2.6915e-07 2.3576e-07 1.79645e-07"/>
+                    <!--                                            <joint name="right_qbhand_thumb_proximal_virtual_joint" pos="0 0 0"-->
+                    <!--                                                   axis="0 0 -1"-->
+                    <!--                                                   limited="true" range="0 45"/>-->
+                    <!--                                            <geom type="mesh" contype="0" conaffinity="0" group="1"-->
+                    <!--                                                  rgba="0.35 0.35 0.35 1" mesh="ph_short"/>-->
+                    <geom type="mesh" rgba="0.35 0.35 0.35 1" mesh="ph_short_collision"/>
+                    <!--                                        <geom name="right_qbhand_thumb_distal_virtual_link" pos="0.009 0 0"-->
+                    <!--                                              type="sphere" size="0.001"/>-->
+                    <!--                                        <body name="right_qbhand_thumb_distal_virtual_link" pos="0.009 0 0">-->
+                    <!--                                            <inertial pos="0 0 0" mass="0.0001" diaginertia="1e-09 1e-09 1e-09"/>-->
+
+                    <body name="right_qbhand_thumb_distal_link" pos="0.023 0 0">
+                        <joint name="right_qbhand_thumb_distal_joint" pos="0 0 0" axis="0 0 -1"
+                               limited="true" range="0 45"/>
+                        <inertial pos="0.00880255 -0.00172193 0"
+                                  quat="0.0131386 0.706985 -0.0131386 0.706985"
+                                  mass="0.009"
+                                  diaginertia="6.6782e-07 5.99819e-07 2.654e-07"/>
+                        <!--                                                    <joint name="right_qbhand_thumb_distal_virtual_joint" pos="0 0 0"-->
+                        <!--                                                           axis="0 0 -1" limited="true" range="0 45"/>-->
+                        <!--                                                    <geom type="mesh" contype="0" conaffinity="0" group="1"-->
+                        <!--                                                          rgba="0.35 0.35 0.35 1" mesh="ph_distal"/>-->
+                        <geom type="mesh" rgba="0.35 0.35 0.35 1"
+                              mesh="ph_distal_collision"/>
+                    </body>
+                    <!--                                        </body>-->
+                </body>
+                <!--                                    </body>-->
+            </body>
+            <body name="right_qbhand_index_proximal_link" pos="-0.0245 0.00049443 0.137011"
+                  quat="-1.8365000e-07 -7.0682496e-01  5.6281635e-04 -7.0738804e-01">
+                <joint name="right_qbhand_index_proximal_joint" pos="0 0 0" axis="0 0 -1"
+                       limited="true" range="0 45"/>
+                <inertial pos="0.00877074 -0.00178631 0"
+                          quat="0.0123083 0.707 -0.0123083 0.707" mass="0.0082"
+                          diaginertia="5.95622e-07 5.59876e-07 2.26817e-07"/>
+                <!--                                            <joint name="right_qbhand_index_proximal_virtual_joint" pos="0 0 0"-->
+                <!--                                                   axis="0 0 -1"-->
+                <!--                                                   limited="true"-->
+                <!--                                                   range="0 45"/>-->
+                <!--                                            <geom type="mesh" contype="0" conaffinity="0" group="1"-->
+                <!--                                                  rgba="0.35 0.35 0.35 1"-->
+                <!--                                                  mesh="ph_medium"/>-->
+                <geom type="mesh" rgba="0.35 0.35 0.35 1" mesh="ph_medium_collision"/>
+                <!--                                        <geom name="right_qbhand_index_middle_virtual_link" pos="0.017 0 0"-->
+                <!--                                              type="sphere" size="0.001"/>-->
+                <!--                                            <body name="right_qbhand_index_middle_virtual_link" pos="0.017 0 0">-->
+                <!--                                                <inertial pos="0 0 0" mass="0.0001" diaginertia="1e-09 1e-09 1e-09"/>-->
+
+                <body name="right_qbhand_index_middle_link" pos="0.030 0 0">
+                    <joint name="right_qbhand_index_middle_joint" pos="0 0 0" axis="0 0 -1"
+                           limited="true"
+                           range="0 45"/>
+                    <inertial pos="0.00476016 -0.00137869 0"
+                              quat="0.0365143 0.706163 -0.0365143 0.706163"
+                              mass="0.0063"
+                              diaginertia="2.6915e-07 2.3576e-07 1.79645e-07"/>
+                    <!--                                                    <joint name="right_qbhand_index_middle_virtual_joint" pos="0 0 0"-->
+                    <!--                                                           axis="0 0 -1" limited="true"-->
+                    <!--                                                           range="0 45"/>-->
+                    <!--                                                    <geom type="mesh" contype="0" conaffinity="0" group="1"-->
+                    <!--                                                          rgba="0.35 0.35 0.35 1"-->
+                    <!--                                                          mesh="ph_short"/>-->
+                    <geom type="mesh" rgba="0.35 0.35 0.35 1" mesh="ph_short_collision"/>
+                    <!--                                            <geom name="right_qbhand_index_distal_virtual_link" pos="0.009 0 0"-->
+                    <!--                                                  type="sphere" size="0.001"/>-->
+                    <!--                                            <body name="right_qbhand_index_distal_virtual_link" pos="0.009 0 0">-->
+                    <!--                                                <inertial pos="0 0 0" mass="0.0001"-->
+                    <!--                                                          diaginertia="1e-09 1e-09 1e-09"/>-->
+
+                    <body name="right_qbhand_index_distal_link" pos="0.022 0 0">
+                        <joint name="right_qbhand_index_distal_joint" pos="0 0 0"
+                               axis="0 0 -1" limited="true"
+                               range="0 45"/>
+                        <inertial pos="0.00880255 -0.00172193 0"
+                                  quat="0.0131386 0.706985 -0.0131386 0.706985"
+                                  mass="0.009"
+                                  diaginertia="6.6782e-07 5.99819e-07 2.654e-07"/>
+                        <!--                                                            <joint name="right_qbhand_index_distal_virtual_joint"-->
+                        <!--                                                                   pos="0 0 0"-->
+                        <!--                                                                   axis="0 0 -1"-->
+                        <!--                                                                   limited="true" range="0 45"/>-->
+                        <!--                                                            <geom type="mesh" contype="0" conaffinity="0" group="1"-->
+                        <!--                                                                  rgba="0.35 0.35 0.35 1"-->
+                        <!--                                                                  mesh="ph_distal"/>-->
+                        <geom type="mesh" rgba="0.35 0.35 0.35 1"
+                              mesh="ph_distal_collision"/>
+                    </body>
+                    <!--                                            </body>-->
+                </body>
+                <!--                                            </body>-->
+            </body>
+            <body name="right_qbhand_middle_proximal_link" pos="-0.0045 0.000500004 0.144011"
+                  quat="-1.8365000e-07 -7.0682496e-01  5.6281635e-04 -7.0738804e-01">
+                <joint name="right_qbhand_middle_proximal_joint" pos="0 0 0" axis="0 0 -1"
+                       limited="true"
+                       range="0 45"/>
+                <inertial pos="0.00877074 -0.00178631 0"
+                          quat="0.0123083 0.707 -0.0123083 0.707" mass="0.0082"
+                          diaginertia="5.95622e-07 5.59876e-07 2.26817e-07"/>
+                <!--                                            <joint name="right_qbhand_middle_proximal_virtual_joint" pos="0 0 0"-->
+                <!--                                                   axis="0 0 -1"-->
+                <!--                                                   limited="true"-->
+                <!--                                                   range="0 45"/>-->
+                <!--                                            <geom type="mesh" contype="0" conaffinity="0" group="1"-->
+                <!--                                                  rgba="0.35 0.35 0.35 1"-->
+                <!--                                                  mesh="ph_medium"/>-->
+                <geom type="mesh" rgba="0.35 0.35 0.35 1" mesh="ph_medium_collision"/>
+                <!--                                        <geom name="right_qbhand_middle_middle_virtual_link" pos="0.017 0 0"-->
+                <!--                                              type="sphere" size="0.001"/>-->
+                <!--                                            <body name="right_qbhand_middle_middle_virtual_link" pos="0.017 0 0">-->
+                <!--                                                <inertial pos="0 0 0" mass="0.0001" diaginertia="1e-09 1e-09 1e-09"/>-->
+
+                <body name="right_qbhand_middle_middle_link" pos="0.030 0 0">
+                    <joint name="right_qbhand_middle_middle_joint" pos="0 0 0" axis="0 0 -1"
+                           limited="true"
+                           range="0 45"/>
+                    <inertial pos="0.00476016 -0.00137869 0"
+                              quat="0.0365143 0.706163 -0.0365143 0.706163"
+                              mass="0.0063"
+                              diaginertia="2.6915e-07 2.3576e-07 1.79645e-07"/>
+                    <!--                                                    <joint name="right_qbhand_middle_middle_virtual_joint" pos="0 0 0"-->
+                    <!--                                                           axis="0 0 -1"-->
+                    <!--                                                           limited="true" range="0 45"/>-->
+                    <!--                                                    <geom type="mesh" contype="0" conaffinity="0" group="1"-->
+                    <!--                                                          rgba="0.35 0.35 0.35 1"-->
+                    <!--                                                          mesh="ph_short"/>-->
+                    <geom type="mesh" rgba="0.35 0.35 0.35 1"
+                          mesh="ph_short_collision"/>
+                    <!--                                            <geom name="right_qbhand_middle_distal_virtual_link" pos="0.009 0 0"-->
+                    <!--                                                  type="sphere" size="0.001"/>-->
+                    <!--                                                    <body name="right_qbhand_middle_distal_virtual_link"-->
+                    <!--                                                          pos="0.009 0 0">-->
+                    <!--                                                        <inertial pos="0 0 0" mass="0.0001"-->
+                    <!--                                                                  diaginertia="1e-09 1e-09 1e-09"/>-->
+
+                    <body name="right_qbhand_middle_distal_link" pos="0.022 0 0">
+                        <joint name="right_qbhand_middle_distal_joint" pos="0 0 0"
+                               axis="0 0 -1" limited="true"
+                               range="0 45"/>
+                        <inertial pos="0.00880255 -0.00172193 0"
+                                  quat="0.0131386 0.706985 -0.0131386 0.706985"
+                                  mass="0.009"
+                                  diaginertia="6.6782e-07 5.99819e-07 2.654e-07"/>
+                        <!--                                                            <joint name="right_qbhand_middle_distal_virtual_joint"-->
+                        <!--                                                                   pos="0 0 0"-->
+                        <!--                                                                   axis="0 0 -1"-->
+                        <!--                                                                   limited="true" range="0 45"/>-->
+                        <!--                                                            <geom type="mesh" contype="0" conaffinity="0" group="1"-->
+                        <!--                                                                  rgba="0.35 0.35 0.35 1"-->
+                        <!--                                                                  mesh="ph_distal"/>-->
+                        <geom type="mesh" rgba="0.35 0.35 0.35 1"
+                              mesh="ph_distal_collision"/>
+                    </body>
+                    <!--                                                    </body>-->
+                </body>
+                <!--                                            </body>-->
+            </body>
+            <body name="right_qbhand_ring_proximal_link" pos="0.0155 0.00049443 0.137011"
+                  quat="-1.8365000e-07 -7.0682496e-01  5.6281635e-04 -7.0738804e-01">
+                <joint name="right_qbhand_ring_proximal_joint" pos="0 0 0" axis="0 0 -1"
+                       limited="true" range="0 45"/>
+                <inertial pos="0.00877074 -0.00178631 0"
+                          quat="0.0123083 0.707 -0.0123083 0.707" mass="0.0082"
+                          diaginertia="5.95622e-07 5.59876e-07 2.26817e-07"/>
+                <!--                                            <joint name="right_qbhand_ring_proximal_virtual_joint" pos="0 0 0"-->
+                <!--                                                   axis="0 0 -1"-->
+                <!--                                                   limited="true"-->
+                <!--                                                   range="0 45"/>-->
+                <!--                                            <geom type="mesh" contype="0" conaffinity="0" group="1"-->
+                <!--                                                  rgba="0.35 0.35 0.35 1"-->
+                <!--                                                  mesh="ph_medium"/>-->
+                <geom type="mesh" rgba="0.35 0.35 0.35 1" mesh="ph_medium_collision"/>
+                <!--                                        <geom name="right_qbhand_ring_middle_virtual_link" pos="0.017 0 0" type="sphere"-->
+                <!--                                              size="0.001"/>-->
+                <!--                                        <body name="right_qbhand_ring_middle_virtual_link" pos="0.017 0 0">-->
+                <!--                                            <inertial pos="0 0 0" mass="0.0001" diaginertia="1e-09 1e-09 1e-09"/>-->
+
+                <body name="right_qbhand_ring_middle_link" pos="0.030 0 0">
+                    <joint name="right_qbhand_ring_middle_joint" pos="0 0 0" axis="0 0 -1"
+                           limited="true"
+                           range="0 45"/>
+                    <inertial pos="0.00476016 -0.00137869 0"
+                              quat="0.0365143 0.706163 -0.0365143 0.706163"
+                              mass="0.0063"
+                              diaginertia="2.6915e-07 2.3576e-07 1.79645e-07"/>
+                    <!--                                                    <joint name="right_qbhand_ring_middle_virtual_joint" pos="0 0 0"-->
+                    <!--                                                           axis="0 0 -1" limited="true"-->
+                    <!--                                                           range="0 45"/>-->
+                    <!--                                                    <geom type="mesh" contype="0" conaffinity="0" group="1"-->
+                    <!--                                                          rgba="0.35 0.35 0.35 1"-->
+                    <!--                                                          mesh="ph_short"/>-->
+                    <geom type="mesh" rgba="0.35 0.35 0.35 1"
+                          mesh="ph_short_collision"/>
+                    <!--                                            <geom name="right_qbhand_ring_distal_virtual_link" pos="0.009 0 0"-->
+                    <!--                                                  type="sphere" size="0.001"/>-->
+                    <!--                                            <body name="right_qbhand_ring_distal_virtual_link" pos="0.009 0 0">-->
+                    <!--                                                <inertial pos="0 0 0" mass="0.0001"-->
+                    <!--                                                          diaginertia="1e-09 1e-09 1e-09"/>-->
+
+                    <body name="right_qbhand_ring_distal_link" pos="0.022 0 0">
+                        <joint name="right_qbhand_ring_distal_joint" pos="0 0 0"
+                               axis="0 0 -1"
+                               limited="true"
+                               range="0 45"/>
+                        <inertial pos="0.00880255 -0.00172193 0"
+                                  quat="0.0131386 0.706985 -0.0131386 0.706985"
+                                  mass="0.009"
+                                  diaginertia="6.6782e-07 5.99819e-07 2.654e-07"/>
+                        <!--                                                            <joint name="right_qbhand_ring_distal_virtual_joint"-->
+                        <!--                                                                   pos="0 0 0"-->
+                        <!--                                                                   axis="0 0 -1"-->
+                        <!--                                                                   limited="true" range="0 45"/>-->
+                        <!--                                                            <geom type="mesh" contype="0" conaffinity="0" group="1"-->
+                        <!--                                                                  rgba="0.35 0.35 0.35 1"-->
+                        <!--                                                                  mesh="ph_distal"/>-->
+                        <geom type="mesh" rgba="0.35 0.35 0.35 1"
+                              mesh="ph_distal_collision"/>
+                    </body>
+                    <!--                                            </body>-->
+                </body>
+                <!--                                        </body>-->
+            </body>
+            <body name="right_qbhand_little_proximal_link" pos="0.0355 0.000490449 0.132011"
+                  quat="-1.8365000e-07 -7.0682496e-01  5.6281635e-04 -7.0738804e-01">
+                <joint name="right_qbhand_little_proximal_joint" pos="0 0 0" axis="0 0 -1"
+                       limited="true"
+                       range="0 45"/>
+                <inertial pos="0.00476016 -0.00137869 0"
+                          quat="0.0365143 0.706163 -0.0365143 0.706163"
+                          mass="0.0063" diaginertia="2.6915e-07 2.3576e-07 1.79645e-07"/>
+                <!--                                            <joint name="right_qbhand_little_proximal_virtual_joint" pos="0 0 0"-->
+                <!--                                                   axis="0 0 -1"-->
+                <!--                                                   limited="true"-->
+                <!--                                                   range="0 45"/>-->
+                <!--                                            <geom type="mesh" contype="0" conaffinity="0" group="1"-->
+                <!--                                                  rgba="0.35 0.35 0.35 1"-->
+                <!--                                                  mesh="ph_short"/>-->
+                <geom type="mesh" rgba="0.35 0.35 0.35 1" mesh="ph_short_collision"/>
+                <!--                                        <geom name="right_qbhand_little_middle_virtual_link" pos="0.009 0 0"-->
+                <!--                                              type="sphere" size="0.001"/>-->
+                <!--                                            <body name="right_qbhand_little_middle_virtual_link" pos="0.009 0 0">-->
+                <!--                                                <inertial pos="0 0 0" mass="0.0001" diaginertia="1e-09 1e-09 1e-09"/>-->
+
+                <body name="right_qbhand_little_middle_link" pos="0.022 0 0">
+                    <joint name="right_qbhand_little_middle_joint" pos="0 0 0" axis="0 0 -1"
+                           limited="true"
+                           range="0 45"/>
+                    <inertial pos="0.00476016 -0.00137869 0"
+                              quat="0.0365143 0.706163 -0.0365143 0.706163"
+                              mass="0.0063"
+                              diaginertia="2.6915e-07 2.3576e-07 1.79645e-07"/>
+                    <!--                                                    <joint name="right_qbhand_little_middle_virtual_joint" pos="0 0 0"-->
+                    <!--                                                           axis="0 0 -1"-->
+                    <!--                                                           limited="true" range="0 45"/>-->
+                    <!--                                                    <geom type="mesh" contype="0" conaffinity="0" group="1"-->
+                    <!--                                                          rgba="0.35 0.35 0.35 1"-->
+                    <!--                                                          mesh="ph_short"/>-->
+                    <geom type="mesh" rgba="0.35 0.35 0.35 1"
+                          mesh="ph_short_collision"/>
+                    <!--                                            <geom name="right_qbhand_little_distal_virtual_link" pos="0.009 0 0"-->
+                    <!--                                                  type="sphere" size="0.001"/>-->
+                    <!--                                                    <body name="right_qbhand_little_distal_virtual_link"-->
+                    <!--                                                          pos="0.009 0 0">-->
+                    <!--                                                        <inertial pos="0 0 0" mass="0.0001"-->
+                    <!--                                                                  diaginertia="1e-09 1e-09 1e-09"/>-->
+                    <body name="right_qbhand_little_distal_link" pos="0.022 0 0">
+                        <joint name="right_qbhand_little_distal_joint" pos="0 0 0"
+                               axis="0 0 -1" limited="true"
+                               range="0 45"/>
+
+                        <inertial pos="0.00880255 -0.00172193 0"
+                                  quat="0.0131386 0.706985 -0.0131386 0.706985"
+                                  mass="0.009"
+                                  diaginertia="6.6782e-07 5.99819e-07 2.654e-07"/>
+                        <!--                                                            <joint name="right_qbhand_little_distal_virtual_joint"-->
+                        <!--                                                                   pos="0 0 0"-->
+                        <!--                                                                   axis="0 0 -1"-->
+                        <!--                                                                   limited="true" range="0 45"/>-->
+                        <!--                                                            <geom type="mesh" contype="0" conaffinity="0" group="1"-->
+                        <!--                                                                  rgba="0.35 0.35 0.35 1"-->
+                        <!--                                                                  mesh="ph_distal"/>-->
+                        <geom type="mesh" rgba="0.35 0.35 0.35 1"
+                              mesh="ph_distal_collision"/>
+                    </body>
+                    <!--                                                    </body>-->
+                </body>
+                <!--                                            </body>-->
+            </body>
+        </body>
+
+
+    </worldbody>
+
+    <actuator>
+        <motor name='right_qbhand_thumb_knuckle_joint' gear='50' joint='right_qbhand_thumb_knuckle_joint'/>
+        <motor name='right_qbhand_thumb_proximal_joint' gear='50' joint='right_qbhand_thumb_proximal_joint'/>
+        <motor name='right_qbhand_thumb_distal_joint' gear='50' joint='right_qbhand_thumb_distal_joint'/>
+        <motor name='right_qbhand_index_proximal_joint' gear='50' joint='right_qbhand_index_proximal_joint'/>
+        <motor name='right_qbhand_index_middle_joint' gear='50' joint='right_qbhand_index_middle_joint'/>
+        <motor name='right_qbhand_index_distal_joint' gear='50' joint='right_qbhand_index_distal_joint'/>
+        <motor name='right_qbhand_middle_proximal_joint' gear='50' joint='right_qbhand_middle_proximal_joint'/>
+        <motor name='right_qbhand_middle_middle_joint' gear='50' joint='right_qbhand_middle_middle_joint'/>
+        <motor name='right_qbhand_middle_distal_joint' gear='50' joint='right_qbhand_middle_distal_joint'/>
+        <motor name='right_qbhand_ring_proximal_joint' gear='50' joint='right_qbhand_ring_proximal_joint'/>
+        <motor name='right_qbhand_ring_middle_joint' gear='50' joint='right_qbhand_ring_middle_joint'/>
+        <motor name='right_qbhand_ring_distal_joint' gear='50' joint='right_qbhand_ring_distal_joint'/>
+        <motor name='right_qbhand_little_proximal_joint' gear='50' joint='right_qbhand_little_proximal_joint'/>
+        <motor name='right_qbhand_little_middle_joint' gear='50' joint='right_qbhand_little_middle_joint'/>
+        <motor name='right_qbhand_little_distal_joint' gear='50' joint='right_qbhand_little_distal_joint'/>
+    </actuator>
+</mujoco>
diff --git a/saved_runs/RofuncRL_PPOTrainer_BiShadowHandBlockStack_23-12-26_15-04-30-752629/events.out.tfevents.1703574270.rofunc.7656.0 b/saved_runs/RofuncRL_PPOTrainer_BiShadowHandBlockStack_23-12-26_15-04-30-752629/events.out.tfevents.1703574270.rofunc.7656.0
new file mode 100644
index 000000000..56aa22335
Binary files /dev/null and b/saved_runs/RofuncRL_PPOTrainer_BiShadowHandBlockStack_23-12-26_15-04-30-752629/events.out.tfevents.1703574270.rofunc.7656.0 differ
diff --git a/saved_runs/RofuncRL_PPOTrainer_BiShadowHandBottleCap_23-12-24_14-50-29-312650/events.out.tfevents.1703400629.rofunc.2157326.0 b/saved_runs/RofuncRL_PPOTrainer_BiShadowHandBottleCap_23-12-24_14-50-29-312650/events.out.tfevents.1703400629.rofunc.2157326.0
new file mode 100644
index 000000000..c35739e5b
Binary files /dev/null and b/saved_runs/RofuncRL_PPOTrainer_BiShadowHandBottleCap_23-12-24_14-50-29-312650/events.out.tfevents.1703400629.rofunc.2157326.0 differ
diff --git a/saved_runs/RofuncRL_PPOTrainer_BiShadowHandCatchAbreast_23-12-24_14-51-22-353264/events.out.tfevents.1703400682.rofunc.2157554.0 b/saved_runs/RofuncRL_PPOTrainer_BiShadowHandCatchAbreast_23-12-24_14-51-22-353264/events.out.tfevents.1703400682.rofunc.2157554.0
new file mode 100644
index 000000000..3805346fd
Binary files /dev/null and b/saved_runs/RofuncRL_PPOTrainer_BiShadowHandCatchAbreast_23-12-24_14-51-22-353264/events.out.tfevents.1703400682.rofunc.2157554.0 differ
diff --git a/saved_runs/RofuncRL_PPOTrainer_BiShadowHandDoorCloseInward_23-12-26_15-05-48-068396/events.out.tfevents.1703574348.rofunc.7930.0 b/saved_runs/RofuncRL_PPOTrainer_BiShadowHandDoorCloseInward_23-12-26_15-05-48-068396/events.out.tfevents.1703574348.rofunc.7930.0
new file mode 100644
index 000000000..861f38e04
Binary files /dev/null and b/saved_runs/RofuncRL_PPOTrainer_BiShadowHandDoorCloseInward_23-12-26_15-05-48-068396/events.out.tfevents.1703574348.rofunc.7930.0 differ
diff --git a/saved_runs/RofuncRL_PPOTrainer_BiShadowHandDoorCloseOutward_23-12-27_14-43-36-754709/events.out.tfevents.1703659416.rofunc.128859.0 b/saved_runs/RofuncRL_PPOTrainer_BiShadowHandDoorCloseOutward_23-12-27_14-43-36-754709/events.out.tfevents.1703659416.rofunc.128859.0
new file mode 100644
index 000000000..e94d835d6
Binary files /dev/null and b/saved_runs/RofuncRL_PPOTrainer_BiShadowHandDoorCloseOutward_23-12-27_14-43-36-754709/events.out.tfevents.1703659416.rofunc.128859.0 differ
diff --git a/saved_runs/RofuncRL_PPOTrainer_BiShadowHandOver_23-12-24_11-14-19-985779/events.out.tfevents.1703387659.rofunc.2144737.0 b/saved_runs/RofuncRL_PPOTrainer_BiShadowHandOver_23-12-24_11-14-19-985779/events.out.tfevents.1703387659.rofunc.2144737.0
new file mode 100644
index 000000000..b8869aa25
Binary files /dev/null and b/saved_runs/RofuncRL_PPOTrainer_BiShadowHandOver_23-12-24_11-14-19-985779/events.out.tfevents.1703387659.rofunc.2144737.0 differ
diff --git a/saved_runs/RofuncRL_PPOTrainer_BiShadowHandReOrientation_23-12-27_00-45-19-364801/events.out.tfevents.1703609119.rofunc.62187.0 b/saved_runs/RofuncRL_PPOTrainer_BiShadowHandReOrientation_23-12-27_00-45-19-364801/events.out.tfevents.1703609119.rofunc.62187.0
new file mode 100644
index 000000000..7407138b2
Binary files /dev/null and b/saved_runs/RofuncRL_PPOTrainer_BiShadowHandReOrientation_23-12-27_00-45-19-364801/events.out.tfevents.1703609119.rofunc.62187.0 differ