diff --git a/examples/learning_rl/IsaacGym_RofuncRL/example_DexterousHands_RofuncRL.py b/examples/learning_rl/IsaacGym_RofuncRL/example_DexterousHands_RofuncRL.py index 8614b6eeb..92b976a5b 100644 --- a/examples/learning_rl/IsaacGym_RofuncRL/example_DexterousHands_RofuncRL.py +++ b/examples/learning_rl/IsaacGym_RofuncRL/example_DexterousHands_RofuncRL.py @@ -97,12 +97,13 @@ def inference(custom_args): # BiShadowHandGraspAndPlace, BiShadowHandLiftUnderarm, BiShadowHandPen, BiShadowHandPointCloud, # BiShadowHandPushBlock, BiShadowHandReOrientation, BiShadowHandScissors, BiShadowHandSwingCup, # BiShadowHandSwitch, BiShadowHandTwoCatchUnderarm - parser.add_argument("--task", type=str, default="BiShadowHandSwitch") + # QbSoftHandGrasp + parser.add_argument("--task", type=str, default="BiQbSoftHandGraspAndPlace") parser.add_argument("--agent", type=str, default="ppo") # Available agents: ppo, sac, td3, a2c parser.add_argument("--num_envs", type=int, default=256) parser.add_argument("--sim_device", type=int, default=0) parser.add_argument("--rl_device", type=int, default=gpu_id) - parser.add_argument("--headless", type=str, default="True") + parser.add_argument("--headless", type=str, default="False") parser.add_argument("--inference", action="store_true", help="turn to inference mode while adding this argument") parser.add_argument("--ckpt_path", type=str, default=None) custom_args = parser.parse_args() diff --git a/rofunc/config/learning/rl/task/BiQbSoftHandGraspAndPlace.yaml b/rofunc/config/learning/rl/task/BiQbSoftHandGraspAndPlace.yaml new file mode 100644 index 000000000..0b541fb4f --- /dev/null +++ b/rofunc/config/learning/rl/task/BiQbSoftHandGraspAndPlace.yaml @@ -0,0 +1,178 @@ +name: BiShadowHandGraspAndPlace + +physics_engine: ${..physics_engine} + +# if given, will override the device setting in gym. +env: + env_name: "bi_qbsofthand_grasp_and_place" + numEnvs: ${resolve_default:4096,${...num_envs}} + envSpacing: 1.5 + episodeLength: 500 + enableDebugVis: False + cameraDebug: True + pointCloudDebug: True + aggregateMode: 1 + + stiffnessScale: 1.0 + forceLimitScale: 1.0 + useRelativeControl: False + dofSpeedScale: 20.0 + actionsMovingAverage: 1.0 + controlFrequencyInv: 1 # 60 Hz + + startPositionNoise: 0.0 + startRotationNoise: 0.0 + + resetPositionNoise: 0.0 + resetRotationNoise: 0.0 + resetDofPosRandomInterval: 0.0 + resetDofVelRandomInterval: 0.0 + + distRewardScale: 20 + transition_scale: 0.5 + orientation_scale: 0.1 + rotRewardScale: 1.0 + rotEps: 0.1 + actionPenaltyScale: -0.0002 + reachGoalBonus: 250 + fallDistance: 0.4 + fallPenalty: 0.0 + + objectType: "pot" # can be block, egg or pen + observationType: "full_state" # point_cloud or full_state + handAgentIndex: "[[0, 1, 2, 3, 4, 5]]" + asymmetric_observations: False + successTolerance: 0.1 + printNumSuccesses: False + maxConsecutiveSuccesses: 0 + + asset: + assetFileNameBlock: "urdf/objects/cube_multicolor.urdf" + assetFileNameEgg: "mjcf/open_ai_assets/hand/egg.xml" + assetFileNamePen: "mjcf/open_ai_assets/hand/pen.xml" + +task: + randomize: False + randomization_params: + frequency: 600 # Define how many simulation steps between generating new randomizations + observations: + range: [0, .002] # range for the white noise + range_correlated: [0, .001 ] # range for correlated noise, refreshed with freq `frequency` + operation: "additive" + distribution: "gaussian" + schedule: "linear" # "constant" is to turn on noise after `schedule_steps` num steps + schedule_steps: 40000 + actions: + range: [0., .05] + range_correlated: [0, .015] # range for correlated noise, refreshed with freq `frequency` + operation: "additive" + distribution: "gaussian" + schedule: "linear" # "linear" will linearly interpolate between no rand and max rand + schedule_steps: 40000 + sim_params: + gravity: + range: [0, 0.4] + operation: "additive" + distribution: "gaussian" + schedule: "linear" # "linear" will linearly interpolate between no rand and max rand + schedule_steps: 40000 + actor_params: + hand: + color: True + tendon_properties: + damping: + range: [0.3, 3.0] + operation: "scaling" + distribution: "loguniform" + schedule: "linear" # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps` + schedule_steps: 30000 + stiffness: + range: [0.75, 1.5] + operation: "scaling" + distribution: "loguniform" + schedule: "linear" # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps` + schedule_steps: 30000 + dof_properties: + damping: + range: [0.3, 3.0] + operation: "scaling" + distribution: "loguniform" + schedule: "linear" # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps` + schedule_steps: 30000 + stiffness: + range: [0.75, 1.5] + operation: "scaling" + distribution: "loguniform" + schedule: "linear" # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps` + schedule_steps: 30000 + lower: + range: [0, 0.01] + operation: "additive" + distribution: "gaussian" + schedule: "linear" # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps` + schedule_steps: 30000 + upper: + range: [0, 0.01] + operation: "additive" + distribution: "gaussian" + schedule: "linear" # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps` + schedule_steps: 30000 + rigid_body_properties: + mass: + range: [0.5, 1.5] + operation: "scaling" + distribution: "uniform" + schedule: "linear" # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps` + schedule_steps: 30000 + rigid_shape_properties: + friction: + num_buckets: 250 + range: [0.7, 1.3] + operation: "scaling" + distribution: "uniform" + schedule: "linear" # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps` + schedule_steps: 30000 + object: + scale: + range: [0.95, 1.05] + operation: "scaling" + distribution: "uniform" + schedule: "linear" # "linear" will scale the current random sample by ``min(current num steps, schedule_steps) / schedule_steps` + schedule_steps: 30000 + rigid_body_properties: + mass: + range: [0.5, 1.5] + operation: "scaling" + distribution: "uniform" + schedule: "linear" # "linear" will scale the current random sample by ``min(current num steps, schedule_steps) / schedule_steps` + schedule_steps: 30000 + rigid_shape_properties: + friction: + num_buckets: 250 + range: [0.7, 1.3] + operation: "scaling" + distribution: "uniform" + schedule: "linear" # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps` + schedule_steps: 30000 + +sim: + dt: 0.0166 # 1/60 s + substeps: 2 + up_axis: "z" + use_gpu_pipeline: ${eq:${...pipeline},"gpu"} + gravity: [ 0.0, 0.0, -9.81 ] + physx: + num_threads: 4 + solver_type: 1 # 0: pgs, 1: tgs + num_position_iterations: 8 + num_velocity_iterations: 0 + contact_offset: 0.002 + rest_offset: 0.0 + bounce_threshold_velocity: 0.2 + max_depenetration_velocity: 1000.0 + default_buffer_size_multiplier: 5.0 + flex: + num_outer_iterations: 5 + num_inner_iterations: 20 + warm_start: 0.8 + relaxation: 0.75 diff --git a/rofunc/config/learning/rl/task/QbSoftHandGrasp.yaml b/rofunc/config/learning/rl/task/QbSoftHandGrasp.yaml new file mode 100644 index 000000000..a7046816e --- /dev/null +++ b/rofunc/config/learning/rl/task/QbSoftHandGrasp.yaml @@ -0,0 +1,179 @@ +name: QbSoftHandGrasp + +physics_engine: ${..physics_engine} + +# if given, will override the device setting in gym. +env: + env_name: "qbsofthand_grasp" + numEnvs: ${resolve_default:4096,${...num_envs}} + envSpacing: 1.5 + episodeLength: 500 + enableDebugVis: False + cameraDebug: True + pointCloudDebug: True + aggregateMode: 1 + + stiffnessScale: 1.0 + forceLimitScale: 1.0 + useRelativeControl: False + dofSpeedScale: 20.0 + actionsMovingAverage: 1.0 + controlFrequencyInv: 1 # 60 Hz + + startPositionNoise: 0.0 + startRotationNoise: 0.0 + + resetPositionNoise: 0.0 + resetRotationNoise: 0.0 + resetDofPosRandomInterval: 0.0 + resetDofVelRandomInterval: 0.0 + + distRewardScale: 20 + transition_scale: 0.5 + orientation_scale: 0.1 + rotRewardScale: 1.0 + rotEps: 0.1 + actionPenaltyScale: -0.0002 + reachGoalBonus: 250 + fallDistance: 0.4 + fallPenalty: 0.0 + + objectType: "pot" # can be block, egg or pen + observationType: "full_state" # point_cloud or full_state + handAgentIndex: "[[0, 1, 2, 3, 4, 5]]" + asymmetric_observations: False + successTolerance: 0.1 + printNumSuccesses: False + maxConsecutiveSuccesses: 0 + + asset: +# assetFileName: "mjcf/open_ai_assets/hand/shadow_hand.xml" + assetFileNameBlock: "urdf/objects/cube_multicolor.urdf" + assetFileNameEgg: "mjcf/open_ai_assets/hand/egg.xml" + assetFileNamePen: "mjcf/open_ai_assets/hand/pen.xml" + +task: + randomize: False + randomization_params: + frequency: 600 # Define how many simulation steps between generating new randomizations + observations: + range: [0, .002] # range for the white noise + range_correlated: [0, .001 ] # range for correlated noise, refreshed with freq `frequency` + operation: "additive" + distribution: "gaussian" + schedule: "linear" # "constant" is to turn on noise after `schedule_steps` num steps + schedule_steps: 40000 + actions: + range: [0., .05] + range_correlated: [0, .015] # range for correlated noise, refreshed with freq `frequency` + operation: "additive" + distribution: "gaussian" + schedule: "linear" # "linear" will linearly interpolate between no rand and max rand + schedule_steps: 40000 + sim_params: + gravity: + range: [0, 0.4] + operation: "additive" + distribution: "gaussian" + schedule: "linear" # "linear" will linearly interpolate between no rand and max rand + schedule_steps: 40000 + actor_params: + hand: + color: True + tendon_properties: + damping: + range: [0.3, 3.0] + operation: "scaling" + distribution: "loguniform" + schedule: "linear" # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps` + schedule_steps: 30000 + stiffness: + range: [0.75, 1.5] + operation: "scaling" + distribution: "loguniform" + schedule: "linear" # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps` + schedule_steps: 30000 + dof_properties: + damping: + range: [0.3, 3.0] + operation: "scaling" + distribution: "loguniform" + schedule: "linear" # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps` + schedule_steps: 30000 + stiffness: + range: [0.75, 1.5] + operation: "scaling" + distribution: "loguniform" + schedule: "linear" # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps` + schedule_steps: 30000 + lower: + range: [0, 0.01] + operation: "additive" + distribution: "gaussian" + schedule: "linear" # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps` + schedule_steps: 30000 + upper: + range: [0, 0.01] + operation: "additive" + distribution: "gaussian" + schedule: "linear" # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps` + schedule_steps: 30000 + rigid_body_properties: + mass: + range: [0.5, 1.5] + operation: "scaling" + distribution: "uniform" + schedule: "linear" # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps` + schedule_steps: 30000 + rigid_shape_properties: + friction: + num_buckets: 250 + range: [0.7, 1.3] + operation: "scaling" + distribution: "uniform" + schedule: "linear" # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps` + schedule_steps: 30000 + object: + scale: + range: [0.95, 1.05] + operation: "scaling" + distribution: "uniform" + schedule: "linear" # "linear" will scale the current random sample by ``min(current num steps, schedule_steps) / schedule_steps` + schedule_steps: 30000 + rigid_body_properties: + mass: + range: [0.5, 1.5] + operation: "scaling" + distribution: "uniform" + schedule: "linear" # "linear" will scale the current random sample by ``min(current num steps, schedule_steps) / schedule_steps` + schedule_steps: 30000 + rigid_shape_properties: + friction: + num_buckets: 250 + range: [0.7, 1.3] + operation: "scaling" + distribution: "uniform" + schedule: "linear" # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps` + schedule_steps: 30000 + +sim: + dt: 0.0166 # 1/60 s + substeps: 2 + up_axis: "z" + use_gpu_pipeline: ${eq:${...pipeline},"gpu"} + gravity: [ 0.0, 0.0, -9.81 ] + physx: + num_threads: 4 + solver_type: 1 # 0: pgs, 1: tgs + num_position_iterations: 8 + num_velocity_iterations: 0 + contact_offset: 0.002 + rest_offset: 0.0 + bounce_threshold_velocity: 0.2 + max_depenetration_velocity: 1000.0 + default_buffer_size_multiplier: 5.0 + flex: + num_outer_iterations: 5 + num_inner_iterations: 20 + warm_start: 0.8 + relaxation: 0.75 diff --git a/rofunc/learning/RofuncRL/tasks/__init__.py b/rofunc/learning/RofuncRL/tasks/__init__.py index 4e7c65657..6265478c2 100644 --- a/rofunc/learning/RofuncRL/tasks/__init__.py +++ b/rofunc/learning/RofuncRL/tasks/__init__.py @@ -40,6 +40,8 @@ def __init__(self, env_type="isaacgym"): from .isaacgymenv.hands.shadow_hand_swing_cup import ShadowHandSwingCupTask from .isaacgymenv.hands.shadow_hand_switch import ShadowHandSwitchTask from .isaacgymenv.hands.shadow_hand_two_catch_underarm import ShadowHandTwoCatchUnderarmTask + from .isaacgymenv.hands.qbsofthand_grasp import QbSoftHandGraspTask + from .isaacgymenv.hands.bi_qbhand_grasp_and_place import BiQbSoftHandGraspAndPlaceTask self.task_map = { "Ant": AntTask, @@ -80,6 +82,8 @@ def __init__(self, env_type="isaacgym"): "BiShadowHandSwingCup": ShadowHandSwingCupTask, "BiShadowHandSwitch": ShadowHandSwitchTask, "BiShadowHandTwoCatchUnderarm": ShadowHandTwoCatchUnderarmTask, + "QbSoftHandGrasp": QbSoftHandGraspTask, + "BiQbSoftHandGraspAndPlace": BiQbSoftHandGraspAndPlaceTask, } elif env_type == "omniisaacgym": # OmniIsaacGym tasks diff --git a/rofunc/learning/RofuncRL/tasks/isaacgymenv/hands/bi_qbhand_grasp_and_place.py b/rofunc/learning/RofuncRL/tasks/isaacgymenv/hands/bi_qbhand_grasp_and_place.py new file mode 100644 index 000000000..cb7b2e193 --- /dev/null +++ b/rofunc/learning/RofuncRL/tasks/isaacgymenv/hands/bi_qbhand_grasp_and_place.py @@ -0,0 +1,1541 @@ +import os +import random + +from PIL import Image as Im +from isaacgym import gymapi +from isaacgym import gymtorch + +from rofunc.learning.RofuncRL.tasks.isaacgymenv.base.vec_task import VecTask +from rofunc.learning.RofuncRL.tasks.utils.torch_jit_utils import * +from rofunc.utils.oslab import get_rofunc_path + + +class BiQbSoftHandGraspAndPlaceTask(VecTask): + """ + This class corresponds to the GraspAndPlace task. This environment consists of dual-hands, an + object and a bucket that requires us to pick up the object and put it into the bucket. + """ + + def __init__(self, cfg, rl_device, sim_device, graphics_device_id, headless, virtual_screen_capture, + force_render, agent_index=[[[0, 1, 2, 3, 4, 5]], [[0, 1, 2, 3, 4, 5]]], is_multi_agent=False): + self.cfg = cfg + self.agent_index = agent_index + + self.is_multi_agent = is_multi_agent + + self.randomize = self.cfg["task"]["randomize"] + self.randomization_params = self.cfg["task"]["randomization_params"] + self.aggregate_mode = self.cfg["env"]["aggregateMode"] + + self.dist_reward_scale = self.cfg["env"]["distRewardScale"] + self.rot_reward_scale = self.cfg["env"]["rotRewardScale"] + self.action_penalty_scale = self.cfg["env"]["actionPenaltyScale"] + self.success_tolerance = self.cfg["env"]["successTolerance"] + self.reach_goal_bonus = self.cfg["env"]["reachGoalBonus"] + self.fall_dist = self.cfg["env"]["fallDistance"] + self.fall_penalty = self.cfg["env"]["fallPenalty"] + self.rot_eps = self.cfg["env"]["rotEps"] + + self.vel_obs_scale = 0.2 # scale factor of velocity based observations + self.force_torque_obs_scale = 10.0 # scale factor of velocity based observations + + self.reset_position_noise = self.cfg["env"]["resetPositionNoise"] + self.reset_rotation_noise = self.cfg["env"]["resetRotationNoise"] + self.reset_dof_pos_noise = self.cfg["env"]["resetDofPosRandomInterval"] + self.reset_dof_vel_noise = self.cfg["env"]["resetDofVelRandomInterval"] + + self.shadow_hand_dof_speed_scale = self.cfg["env"]["dofSpeedScale"] + self.use_relative_control = self.cfg["env"]["useRelativeControl"] + self.act_moving_average = self.cfg["env"]["actionsMovingAverage"] + + self.debug_viz = self.cfg["env"]["enableDebugVis"] + + self.max_episode_length = self.cfg["env"]["episodeLength"] + self.reset_time = self.cfg["env"].get("resetTime", -1.0) + self.print_success_stat = self.cfg["env"]["printNumSuccesses"] + self.max_consecutive_successes = self.cfg["env"]["maxConsecutiveSuccesses"] + self.av_factor = self.cfg["env"].get("averFactor", 0.01) + print("Averaging factor: ", self.av_factor) + + self.transition_scale = self.cfg["env"]["transition_scale"] + self.orientation_scale = self.cfg["env"]["orientation_scale"] + + control_freq_inv = self.cfg["env"].get("controlFrequencyInv", 1) + if self.reset_time > 0.0: + self.max_episode_length = int(round(self.reset_time / (control_freq_inv * self.sim_params.dt))) + print("Reset time: ", self.reset_time) + print("New episode length: ", self.max_episode_length) + + self.object_type = self.cfg["env"]["objectType"] + # assert self.object_type in ["block", "egg", "pen"] + + self.ignore_z = (self.object_type == "pen") + + self.asset_files_dict = { + "block": "urdf/objects/cube_multicolor.urdf", + "egg": "mjcf/open_ai_assets/hand/egg.xml", + "pen": "mjcf/open_ai_assets/hand/pen.xml", + # "pot": "mjcf/pot.xml", + "pot": "mjcf/bucket/100454/mobility.urdf" + } + + if "asset" in self.cfg["env"]: + self.asset_files_dict["block"] = self.cfg["env"]["asset"].get("assetFileNameBlock", + self.asset_files_dict["block"]) + self.asset_files_dict["egg"] = self.cfg["env"]["asset"].get("assetFileNameEgg", + self.asset_files_dict["egg"]) + self.asset_files_dict["pen"] = self.cfg["env"]["asset"].get("assetFileNamePen", + self.asset_files_dict["pen"]) + + # can be "openai", "full_no_vel", "full", "full_state" + self.obs_type = self.cfg["env"]["observationType"] + + if not (self.obs_type in ["point_cloud", "full_state"]): + raise Exception( + "Unknown type of observations!\nobservationType should be one of: [point_cloud, full_state]") + + print("Obs type:", self.obs_type) + + self.num_point_cloud_feature_dim = 768 + self.num_obs_dict = { + "point_cloud": 425 + self.num_point_cloud_feature_dim * 3, + "point_cloud_for_distill": 425 + self.num_point_cloud_feature_dim * 3, + "full_state": 425 + } + self.num_hand_obs = 72 + 95 + 26 + 6 + self.up_axis = 'z' + + self.right_fingertips = ["right_qbhand_thumb_distal_link", "right_qbhand_index_distal_link", + "right_qbhand_middle_distal_link", "right_qbhand_ring_distal_link", + "right_qbhand_little_distal_link"] + self.left_fingertips = ["left_qbhand_thumb_distal_link", "left_qbhand_index_distal_link", + "left_qbhand_middle_distal_link", "left_qbhand_ring_distal_link", + "left_qbhand_little_distal_link"] + + self.num_fingertips = len(self.right_fingertips) * 2 + + self.use_vel_obs = False + self.fingertip_obs = True + self.asymmetric_obs = self.cfg["env"]["asymmetric_observations"] + + num_states = 0 + if self.asymmetric_obs: + num_states = 211 + + self.cfg["env"]["numObservations"] = self.num_obs_dict[self.obs_type] + self.cfg["env"]["numStates"] = num_states + if self.is_multi_agent: + self.num_agents = 2 + self.cfg["env"]["numActions"] = 15 + 6 + + else: + self.num_agents = 1 + self.cfg["env"]["numActions"] = (15 + 6) * 2 + + super().__init__(cfg, rl_device, sim_device, graphics_device_id, headless, virtual_screen_capture, force_render) + + if self.obs_type in ["point_cloud"]: + from PIL import Image as Im + # from pointnet2_ops import pointnet2_utils + + self.camera_debug = self.cfg["env"].get("cameraDebug", False) + self.point_cloud_debug = self.cfg["env"].get("pointCloudDebug", False) + + if self.viewer != None: + cam_pos = gymapi.Vec3(10.0, 5.0, 1.0) + cam_target = gymapi.Vec3(6.0, 5.0, 0.0) + self.gym.viewer_camera_look_at(self.viewer, None, cam_pos, cam_target) + + # get gym GPU state tensors + actor_root_state_tensor = self.gym.acquire_actor_root_state_tensor(self.sim) + dof_state_tensor = self.gym.acquire_dof_state_tensor(self.sim) + rigid_body_tensor = self.gym.acquire_rigid_body_state_tensor(self.sim) + + sensor_tensor = self.gym.acquire_force_sensor_tensor(self.sim) + self.vec_sensor_tensor = gymtorch.wrap_tensor(sensor_tensor).view(self.num_envs, self.num_fingertips * 6) + + dof_force_tensor = self.gym.acquire_dof_force_tensor(self.sim) + self.dof_force_tensor = gymtorch.wrap_tensor(dof_force_tensor).view(self.num_envs, + self.num_hand_dofs * 2 + self.num_object_dofs * 2) + self.dof_force_tensor = self.dof_force_tensor[:, :48] + + self.gym.refresh_actor_root_state_tensor(self.sim) + self.gym.refresh_dof_state_tensor(self.sim) + self.gym.refresh_rigid_body_state_tensor(self.sim) + + # create some wrapper tensors for different slices + self.shadow_hand_default_dof_pos = torch.zeros(self.num_hand_dofs, dtype=torch.float, device=self.device) + # self.shadow_hand_default_dof_pos = to_torch([0.0, 0.0, -0, -0, -0, -0, -0, -0, + # -0, -0, -0, -0, -0, -0, -0, -0, + # -0, -0, -0, -1.04, 1.2, 0., 0, -1.57], dtype=torch.float, device=self.device) + + self.dof_state = gymtorch.wrap_tensor(dof_state_tensor) + self.right_hand_dof_state = self.dof_state.view(self.num_envs, -1, 2)[:, :self.num_hand_dofs] + self.right_hand_dof_pos = self.right_hand_dof_state[..., 0] + self.right_hand_dof_vel = self.right_hand_dof_state[..., 1] + + self.left_hand_dof_state = self.dof_state.view(self.num_envs, -1, 2)[:, + self.num_hand_dofs:self.num_hand_dofs * 2] + self.left_hand_dof_pos = self.left_hand_dof_state[..., 0] + self.left_hand_dof_vel = self.left_hand_dof_state[..., 1] + + self.object_dof_state = self.dof_state.view(self.num_envs, -1, 2)[:, + self.num_hand_dofs * 2:self.num_hand_dofs * 2 + self.num_object_dofs] + self.object_dof_pos = self.object_dof_state[..., 0] + + self.rigid_body_states = gymtorch.wrap_tensor(rigid_body_tensor).view(self.num_envs, -1, 13) + self.num_bodies = self.rigid_body_states.shape[1] + + self.root_state_tensor = gymtorch.wrap_tensor(actor_root_state_tensor).view(-1, 13) + self.hand_positions = self.root_state_tensor[:, 0:3] + self.hand_orientations = self.root_state_tensor[:, 3:7] + self.hand_linvels = self.root_state_tensor[:, 7:10] + self.hand_angvels = self.root_state_tensor[:, 10:13] + self.saved_root_tensor = self.root_state_tensor.clone() + + self.num_dofs = self.gym.get_sim_dof_count(self.sim) // self.num_envs + self.prev_targets = torch.zeros((self.num_envs, self.num_dofs), dtype=torch.float, device=self.device) + self.cur_targets = torch.zeros((self.num_envs, self.num_dofs), dtype=torch.float, device=self.device) + + self.global_indices = torch.arange(self.num_envs * 3, dtype=torch.int32, device=self.device).view(self.num_envs, + -1) + self.x_unit_tensor = to_torch([1, 0, 0], dtype=torch.float, device=self.device).repeat((self.num_envs, 1)) + self.y_unit_tensor = to_torch([0, 1, 0], dtype=torch.float, device=self.device).repeat((self.num_envs, 1)) + self.z_unit_tensor = to_torch([0, 0, 1], dtype=torch.float, device=self.device).repeat((self.num_envs, 1)) + + self.reset_goal_buf = self.reset_buf.clone() + self.successes = torch.zeros(self.num_envs, dtype=torch.float, device=self.device) + self.consecutive_successes = torch.zeros(1, dtype=torch.float, device=self.device) + + self.av_factor = to_torch(self.av_factor, dtype=torch.float, device=self.device) + self.apply_forces = torch.zeros((self.num_envs, self.num_bodies, 3), device=self.device, dtype=torch.float) + self.apply_torque = torch.zeros((self.num_envs, self.num_bodies, 3), device=self.device, dtype=torch.float) + + self.total_successes = 0 + self.total_resets = 0 + + def create_sim(self): + """ + Allocates which device will simulate and which device will render the scene. Defines the simulation type to be used + """ + self.dt = self.sim_params.dt + self.up_axis_idx = self.set_sim_params_up_axis(self.sim_params, self.up_axis) + + self.sim = super().create_sim(self.device_id, self.graphics_device_id, self.physics_engine, self.sim_params) + self._create_ground_plane() + self._create_envs(self.num_envs, self.cfg["env"]['envSpacing'], int(np.sqrt(self.num_envs))) + + def _create_ground_plane(self): + """ + Adds ground plane to simulation + """ + plane_params = gymapi.PlaneParams() + plane_params.normal = gymapi.Vec3(0.0, 0.0, 1.0) + self.gym.add_ground(self.sim, plane_params) + + def _create_envs(self, num_envs, spacing, num_per_row): + """ + Create multiple parallel isaacgym environments + + :param num_envs: The total number of environment + :param spacing: Specifies half the side length of the square area occupied by each environment + :param num_per_row: Specify how many environments in a row + :return: + """ + lower = gymapi.Vec3(-spacing, -spacing, 0.0) + upper = gymapi.Vec3(spacing, spacing, spacing) + + # get rofunc path from rofunc package metadata + rofunc_path = get_rofunc_path() + asset_root = os.path.join(rofunc_path, "simulator/assets") + right_hand_asset_file = "mjcf/qbhand_no_virtual_right.xml" + left_hand_asset_file = "mjcf/qbhand_no_virtual_left.xml" + table_texture_files = os.path.join(asset_root, "textures/texture_stone_stone_texture_0.jpg") + table_texture_handle = self.gym.create_texture_from_file(self.sim, table_texture_files) + + if "asset" in self.cfg["env"]: + asset_root = self.cfg["env"]["asset"].get("assetRoot", asset_root) + + object_asset_file = self.asset_files_dict[self.object_type] + + # + asset_options = gymapi.AssetOptions() + asset_options.flip_visual_attachments = False + asset_options.fix_base_link = False + asset_options.collapse_fixed_joints = True + asset_options.disable_gravity = True + asset_options.thickness = 0.001 + asset_options.angular_damping = 100 + asset_options.linear_damping = 100 + if self.physics_engine == gymapi.SIM_PHYSX: + asset_options.use_physx_armature = True + asset_options.default_dof_drive_mode = gymapi.DOF_MODE_NONE + right_hand_asset = self.gym.load_asset(self.sim, asset_root, right_hand_asset_file, asset_options) + left_hand_asset = self.gym.load_asset(self.sim, asset_root, left_hand_asset_file, asset_options) + + # print asset info + self.num_shadow_hand_bodies = self.gym.get_asset_rigid_body_count(right_hand_asset) + self.num_shadow_hand_shapes = self.gym.get_asset_rigid_shape_count(right_hand_asset) + self.num_hand_dofs = self.gym.get_asset_dof_count(right_hand_asset) + self.num_shadow_hand_actuators = self.gym.get_asset_actuator_count(right_hand_asset) + self.num_shadow_hand_tendons = self.gym.get_asset_tendon_count(right_hand_asset) + print("self.num_shadow_hand_bodies: ", self.num_shadow_hand_bodies) + print("self.num_shadow_hand_shapes: ", self.num_shadow_hand_shapes) + print("self.num_hand_dofs: ", self.num_hand_dofs) + print("self.num_shadow_hand_actuators: ", self.num_shadow_hand_actuators) + print("self.num_shadow_hand_tendons: ", self.num_shadow_hand_tendons) + + actuated_dof_names = [self.gym.get_asset_actuator_joint_name(right_hand_asset, i) for i in + range(self.num_shadow_hand_actuators)] + self.actuated_dof_indices = [self.gym.find_asset_dof_index(right_hand_asset, name) for name in + actuated_dof_names] + + # set shadow_hand dof properties + right_hand_dof_props = self.gym.get_asset_dof_properties(right_hand_asset) + left_hand_dof_props = self.gym.get_asset_dof_properties(left_hand_asset) + + self.shadow_hand_dof_lower_limits = [] + self.shadow_hand_dof_upper_limits = [] + self.shadow_hand_dof_default_pos = [] + self.hand_dof_default_vel = [] + self.sensors = [] + + for i in range(self.num_hand_dofs): + self.shadow_hand_dof_lower_limits.append(right_hand_dof_props['lower'][i]) + self.shadow_hand_dof_upper_limits.append(right_hand_dof_props['upper'][i]) + self.shadow_hand_dof_default_pos.append(0.0) + self.hand_dof_default_vel.append(0.0) + + self.actuated_dof_indices = to_torch(self.actuated_dof_indices, dtype=torch.long, device=self.device) + self.shadow_hand_dof_lower_limits = to_torch(self.shadow_hand_dof_lower_limits, device=self.device) + self.shadow_hand_dof_upper_limits = to_torch(self.shadow_hand_dof_upper_limits, device=self.device) + self.shadow_hand_dof_default_pos = to_torch(self.shadow_hand_dof_default_pos, device=self.device) + self.hand_dof_default_vel = to_torch(self.hand_dof_default_vel, device=self.device) + # + + # + object_asset_options = gymapi.AssetOptions() + object_asset_options.density = 500 + object_asset_options.fix_base_link = False + # object_asset_options.collapse_fixed_joints = True + # object_asset_options.disable_gravity = True + object_asset_options.use_mesh_materials = True + object_asset_options.mesh_normal_mode = gymapi.COMPUTE_PER_VERTEX + object_asset_options.override_com = True + object_asset_options.override_inertia = True + object_asset_options.vhacd_enabled = True + object_asset_options.vhacd_params = gymapi.VhacdParams() + object_asset_options.vhacd_params.resolution = 100000 + object_asset_options.default_dof_drive_mode = gymapi.DOF_MODE_NONE + + object_asset = self.gym.load_asset(self.sim, asset_root, object_asset_file, object_asset_options) + block_asset_file = "urdf/objects/cube_multicolor1.urdf" + block_asset = self.gym.load_asset(self.sim, asset_root, block_asset_file, object_asset_options) + + object_asset_options.disable_gravity = True + goal_asset = self.gym.load_asset(self.sim, asset_root, object_asset_file, object_asset_options) + + self.num_object_bodies = self.gym.get_asset_rigid_body_count(object_asset) + self.num_object_shapes = self.gym.get_asset_rigid_shape_count(object_asset) + + # set object dof properties + self.num_object_dofs = self.gym.get_asset_dof_count(object_asset) + object_dof_props = self.gym.get_asset_dof_properties(object_asset) + + self.object_dof_lower_limits = [] + self.object_dof_upper_limits = [] + + for i in range(self.num_object_dofs): + self.object_dof_lower_limits.append(object_dof_props['lower'][i]) + self.object_dof_upper_limits.append(object_dof_props['upper'][i]) + + self.object_dof_lower_limits = to_torch(self.object_dof_lower_limits, device=self.device) + self.object_dof_upper_limits = to_torch(self.object_dof_upper_limits, device=self.device) + # + + # + table_dims = gymapi.Vec3(1.0, 1.0, 0.6) + asset_options = gymapi.AssetOptions() + asset_options.fix_base_link = True + asset_options.flip_visual_attachments = True + asset_options.collapse_fixed_joints = True + asset_options.disable_gravity = True + asset_options.thickness = 0.001 + + table_asset = self.gym.create_box(self.sim, table_dims.x, table_dims.y, table_dims.z, gymapi.AssetOptions()) + # + + # + right_hand_start_pose = gymapi.Transform() + right_hand_start_pose.p = gymapi.Vec3(0.55, 0.2, 0.8) + right_hand_start_pose.r = gymapi.Quat().from_euler_zyx(3.14159, 0, 1.57) + + left_hand_start_pose = gymapi.Transform() + left_hand_start_pose.p = gymapi.Vec3(0.55, -0.2, 0.8) + left_hand_start_pose.r = gymapi.Quat().from_euler_zyx(3.14159, 0, 1.57) + + object_start_pose = gymapi.Transform() + object_start_pose.p = gymapi.Vec3(0.0, 0.2, 0.6) + object_start_pose.r = gymapi.Quat().from_euler_zyx(0, 0, 0) + pose_dx, pose_dy, pose_dz = -1.0, 0.0, -0.0 + + block_start_pose = gymapi.Transform() + block_start_pose.p = gymapi.Vec3(0.0, -0.2, 0.6) + block_start_pose.r = gymapi.Quat().from_euler_zyx(1.57, 1.57, 0) + # object_start_pose.p.x = right_hand_start_pose.p.x + pose_dx + # object_start_pose.p.y = right_hand_start_pose.p.y + pose_dy + # object_start_pose.p.z = right_hand_start_pose.p.z + pose_dz + + if self.object_type == "pen": + object_start_pose.p.z = right_hand_start_pose.p.z + 0.02 + + self.goal_displacement = gymapi.Vec3(-0., 0.0, 10) + self.goal_displacement_tensor = to_torch( + [self.goal_displacement.x, self.goal_displacement.y, self.goal_displacement.z], device=self.device) + goal_start_pose = gymapi.Transform() + goal_start_pose.p = object_start_pose.p + self.goal_displacement + + goal_start_pose.p.z -= 0.0 + + table_pose = gymapi.Transform() + table_pose.p = gymapi.Vec3(0.0, 0.0, 0.5 * table_dims.z) + table_pose.r = gymapi.Quat().from_euler_zyx(-0., 0, 0) + # + + # compute aggregate size + max_agg_bodies = self.num_shadow_hand_bodies * 2 + 3 * self.num_object_bodies + 1 + max_agg_shapes = self.num_shadow_hand_shapes * 2 + 3 * self.num_object_shapes + 1 + + self.hands = [] + self.envs = [] + + self.object_init_state = [] + self.hand_start_states = [] + + self.right_hand_indices = [] + self.left_hand_indices = [] + self.fingertip_indices = [] + self.object_indices = [] + self.goal_object_indices = [] + self.table_indices = [] + self.block_indices = [] + + self.fingertip_handles = [self.gym.find_asset_rigid_body_index(right_hand_asset, name) for name in + self.right_fingertips] + self.fingertip_left_handles = [self.gym.find_asset_rigid_body_index(left_hand_asset, name) for name + in self.left_fingertips] + + # create fingertip force sensors, if needed + sensor_pose = gymapi.Transform() + for ft_handle in self.fingertip_handles: + self.gym.create_asset_force_sensor(right_hand_asset, ft_handle, sensor_pose) + for ft_a_handle in self.fingertip_left_handles: + self.gym.create_asset_force_sensor(left_hand_asset, ft_a_handle, sensor_pose) + + if self.obs_type in ["point_cloud"]: + self.cameras = [] + self.camera_tensors = [] + self.camera_view_matrixs = [] + self.camera_proj_matrixs = [] + + self.camera_props = gymapi.CameraProperties() + self.camera_props.width = 256 + self.camera_props.height = 256 + self.camera_props.enable_tensors = True + + self.env_origin = torch.zeros((self.num_envs, 3), device=self.device, dtype=torch.float) + self.pointCloudDownsampleNum = 768 + self.camera_u = torch.arange(0, self.camera_props.width, device=self.device) + self.camera_v = torch.arange(0, self.camera_props.height, device=self.device) + + self.camera_v2, self.camera_u2 = torch.meshgrid(self.camera_v, self.camera_u, indexing='ij') + + if self.point_cloud_debug: + import open3d as o3d + from bidexhands.utils.o3dviewer import PointcloudVisualizer + self.pointCloudVisualizer = PointcloudVisualizer() + self.pointCloudVisualizerInitialized = False + self.o3d_pc = o3d.geometry.PointCloud() + else: + self.pointCloudVisualizer = None + + for i in range(self.num_envs): + # create env instance + env_ptr = self.gym.create_env( + self.sim, lower, upper, num_per_row + ) + + if self.aggregate_mode >= 1: + self.gym.begin_aggregate(env_ptr, max_agg_bodies, max_agg_shapes, True) + + # add hand - collision filter = -1 to use asset collision filters set in mjcf loader + right_hand_actor = self.gym.create_actor(env_ptr, right_hand_asset, right_hand_start_pose, "right_hand", i, + -1, 0) + left_hand_actor = self.gym.create_actor(env_ptr, left_hand_asset, + left_hand_start_pose, "left_hand", i, -1, 0) + + self.hand_start_states.append( + [right_hand_start_pose.p.x, right_hand_start_pose.p.y, right_hand_start_pose.p.z, + right_hand_start_pose.r.x, right_hand_start_pose.r.y, right_hand_start_pose.r.z, + right_hand_start_pose.r.w, + 0, 0, 0, 0, 0, 0]) + + self.gym.set_actor_dof_properties(env_ptr, right_hand_actor, right_hand_dof_props) + right_hand_idx = self.gym.get_actor_index(env_ptr, right_hand_actor, gymapi.DOMAIN_SIM) + self.right_hand_indices.append(right_hand_idx) + self.gym.set_actor_dof_properties(env_ptr, left_hand_actor, left_hand_dof_props) + left_right_hand_idx = self.gym.get_actor_index(env_ptr, left_hand_actor, gymapi.DOMAIN_SIM) + self.left_hand_indices.append(left_right_hand_idx) + + # + num_bodies = self.gym.get_actor_rigid_body_count(env_ptr, right_hand_actor) + hand_rigid_body_index = [[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11], [12, 13, 14, 15], [16, 17, 18, 19, 20], + [21, 22, 23, 24, 25]] + + for n in self.agent_index[0]: + colorx = random.uniform(0, 1) + colory = random.uniform(0, 1) + colorz = random.uniform(0, 1) + for m in n: + for o in hand_rigid_body_index[m]: + self.gym.set_rigid_body_color(env_ptr, right_hand_actor, o, gymapi.MESH_VISUAL, + gymapi.Vec3(colorx, colory, colorz)) + for n in self.agent_index[1]: + colorx = random.uniform(0, 1) + colory = random.uniform(0, 1) + colorz = random.uniform(0, 1) + for m in n: + for o in hand_rigid_body_index[m]: + self.gym.set_rigid_body_color(env_ptr, left_hand_actor, o, gymapi.MESH_VISUAL, + gymapi.Vec3(colorx, colory, colorz)) + # gym.set_rigid_body_texture(env, actor_handles[-1], n, gymapi.MESH_VISUAL, + # loaded_texture_handle_list[random.randint(0, len(loaded_texture_handle_list)-1)]) + # + + # create fingertip force-torque sensors + self.gym.enable_actor_dof_force_sensors(env_ptr, right_hand_actor) + self.gym.enable_actor_dof_force_sensors(env_ptr, left_hand_actor) + + # add object + object_handle = self.gym.create_actor(env_ptr, object_asset, object_start_pose, "object", i, 0, 0) + self.object_init_state.append([object_start_pose.p.x, object_start_pose.p.y, object_start_pose.p.z, + object_start_pose.r.x, object_start_pose.r.y, object_start_pose.r.z, + object_start_pose.r.w, + 0, 0, 0, 0, 0, 0]) + object_idx = self.gym.get_actor_index(env_ptr, object_handle, gymapi.DOMAIN_SIM) + self.object_indices.append(object_idx) + # self.gym.set_actor_scale(env_ptr, object_handle, 0.3) + + block_handle = self.gym.create_actor(env_ptr, block_asset, block_start_pose, "block", i, 0, 0) + block_idx = self.gym.get_actor_index(env_ptr, block_handle, gymapi.DOMAIN_SIM) + self.block_indices.append(block_idx) + + # add goal object + goal_handle = self.gym.create_actor(env_ptr, goal_asset, goal_start_pose, "goal_object", i + self.num_envs, + 0, 0) + goal_object_idx = self.gym.get_actor_index(env_ptr, goal_handle, gymapi.DOMAIN_SIM) + self.goal_object_indices.append(goal_object_idx) + # self.gym.set_actor_scale(env_ptr, goal_handle, 0.3) + + # add table + table_handle = self.gym.create_actor(env_ptr, table_asset, table_pose, "table", i, -1, 0) + self.gym.set_rigid_body_texture(env_ptr, table_handle, 0, gymapi.MESH_VISUAL, table_texture_handle) + table_idx = self.gym.get_actor_index(env_ptr, table_handle, gymapi.DOMAIN_SIM) + self.table_indices.append(table_idx) + + if self.object_type != "block": + self.gym.set_rigid_body_color( + env_ptr, object_handle, 0, gymapi.MESH_VISUAL, gymapi.Vec3(0.6, 0.72, 0.98)) + self.gym.set_rigid_body_color( + env_ptr, goal_handle, 0, gymapi.MESH_VISUAL, gymapi.Vec3(0.6, 0.72, 0.98)) + + if self.obs_type in ["point_cloud"]: + camera_handle = self.gym.create_camera_sensor(env_ptr, self.camera_props) + self.gym.set_camera_location(camera_handle, env_ptr, gymapi.Vec3(0.25, -0., 1.0), + gymapi.Vec3(-0.24, -0., 0)) + camera_tensor = self.gym.get_camera_image_gpu_tensor(self.sim, env_ptr, camera_handle, + gymapi.IMAGE_DEPTH) + torch_cam_tensor = gymtorch.wrap_tensor(camera_tensor) + cam_vinv = torch.inverse( + (torch.tensor(self.gym.get_camera_view_matrix(self.sim, env_ptr, camera_handle)))).to(self.device) + cam_proj = torch.tensor(self.gym.get_camera_proj_matrix(self.sim, env_ptr, camera_handle), + device=self.device) + + origin = self.gym.get_env_origin(env_ptr) + self.env_origin[i][0] = origin.x + self.env_origin[i][1] = origin.y + self.env_origin[i][2] = origin.z + self.camera_tensors.append(torch_cam_tensor) + self.camera_view_matrixs.append(cam_vinv) + self.camera_proj_matrixs.append(cam_proj) + self.cameras.append(camera_handle) + + if self.aggregate_mode > 0: + self.gym.end_aggregate(env_ptr) + + self.envs.append(env_ptr) + self.hands.append(right_hand_actor) + + self.object_init_state = to_torch(self.object_init_state, device=self.device, dtype=torch.float).view( + self.num_envs, 13) + self.goal_states = self.object_init_state.clone() + # self.goal_pose = self.goal_states[:, 0:7] + # self.goal_pos = self.goal_states[:, 0:3] + # self.goal_rot = self.goal_states[:, 3:7] + # self.goal_states[:, self.up_axis_idx] -= 0.04 + self.goal_init_state = self.goal_states.clone() + self.hand_start_states = to_torch(self.hand_start_states, device=self.device).view(self.num_envs, 13) + + self.fingertip_handles = to_torch(self.fingertip_handles, dtype=torch.long, device=self.device) + self.fingertip_left_handles = to_torch(self.fingertip_left_handles, dtype=torch.long, device=self.device) + + self.right_hand_indices = to_torch(self.right_hand_indices, dtype=torch.long, device=self.device) + self.left_hand_indices = to_torch(self.left_hand_indices, dtype=torch.long, device=self.device) + + self.object_indices = to_torch(self.object_indices, dtype=torch.long, device=self.device) + self.goal_object_indices = to_torch(self.goal_object_indices, dtype=torch.long, device=self.device) + self.table_indices = to_torch(self.table_indices, dtype=torch.long, device=self.device) + self.block_indices = to_torch(self.block_indices, dtype=torch.long, device=self.device) + + def compute_reward(self, actions): + """ + Compute the reward of all environment. The core function is compute_hand_reward( + self.rew_buf, self.reset_buf, self.reset_goal_buf, self.progress_buf, self.successes, self.consecutive_successes, + self.max_episode_length, self.object_pos, self.object_rot, self.goal_pos, self.goal_rot, self.block_right_handle_pos, self.block_left_handle_pos, + self.left_hand_pos, self.right_hand_pos, self.right_hand_ff_pos, self.right_hand_mf_pos, self.right_hand_rf_pos, self.right_hand_lf_pos, self.right_hand_th_pos, + self.left_hand_ff_pos, self.left_hand_mf_pos, self.left_hand_rf_pos, self.left_hand_lf_pos, self.left_hand_th_pos, + self.dist_reward_scale, self.rot_reward_scale, self.rot_eps, self.actions, self.action_penalty_scale, + self.success_tolerance, self.reach_goal_bonus, self.fall_dist, self.fall_penalty, + self.max_consecutive_successes, self.av_factor, (self.object_type == "pen") + ) + , which we will introduce in detail there + + Args: + actions (tensor): Actions of agents in the all environment + """ + self.rew_buf[:], self.reset_buf[:], self.reset_goal_buf[:], self.progress_buf[:], self.successes[ + :], self.consecutive_successes[ + :] = compute_hand_reward( + self.rew_buf, self.reset_buf, self.reset_goal_buf, self.progress_buf, self.successes, + self.consecutive_successes, + self.max_episode_length, self.object_pos, self.object_rot, self.goal_pos, self.goal_rot, + self.block_right_handle_pos, self.block_left_handle_pos, + self.left_hand_pos, self.right_hand_pos, self.right_hand_ff_pos, self.right_hand_mf_pos, + self.right_hand_rf_pos, self.right_hand_lf_pos, self.right_hand_th_pos, + self.left_hand_ff_pos, self.left_hand_mf_pos, self.left_hand_rf_pos, self.left_hand_lf_pos, + self.left_hand_th_pos, + self.dist_reward_scale, self.rot_reward_scale, self.rot_eps, self.actions, self.action_penalty_scale, + self.success_tolerance, self.reach_goal_bonus, self.fall_dist, self.fall_penalty, + self.max_consecutive_successes, self.av_factor, (self.object_type == "pen") + ) + + self.extras['successes'] = self.successes + self.extras['consecutive_successes'] = self.consecutive_successes + + if self.print_success_stat: + self.total_resets = self.total_resets + self.reset_buf.sum() + direct_average_successes = self.total_successes + self.successes.sum() + self.total_successes = self.total_successes + (self.successes * self.reset_buf).sum() + + # The direct average shows the overall result more quickly, but slightly undershoots long term + # policy performance. + print("Direct average consecutive successes = {:.1f}".format( + direct_average_successes / (self.total_resets + self.num_envs))) + if self.total_resets > 0: + print("Post-Reset average consecutive successes = {:.1f}".format( + self.total_successes / self.total_resets)) + + def compute_observations(self): + """ + Compute the observations of all environment. The core function is self.compute_full_state(True), + which we will introduce in detail there + + """ + self.gym.refresh_dof_state_tensor(self.sim) + self.gym.refresh_actor_root_state_tensor(self.sim) + self.gym.refresh_rigid_body_state_tensor(self.sim) + self.gym.refresh_force_sensor_tensor(self.sim) + self.gym.refresh_dof_force_tensor(self.sim) + + if self.obs_type in ["point_cloud"]: + self.gym.render_all_camera_sensors(self.sim) + self.gym.start_access_image_tensors(self.sim) + + self.object_pose = self.root_state_tensor[self.object_indices, 0:7] + self.object_pos = self.root_state_tensor[self.object_indices, 0:3] + self.object_rot = self.root_state_tensor[self.object_indices, 3:7] + self.object_linvel = self.root_state_tensor[self.object_indices, 7:10] + self.object_angvel = self.root_state_tensor[self.object_indices, 10:13] + + self.block_right_handle_pos = self.rigid_body_states[:, 16 * 2 + 1, 0:3] # (256, ?, 13) + self.block_right_handle_rot = self.rigid_body_states[:, 16 * 2 + 1, 3:7] + self.block_right_handle_pos = self.block_right_handle_pos + quat_apply(self.block_right_handle_rot, + to_torch([0, 1, 0], + device=self.device).repeat( + self.num_envs, 1) * 0.) + self.block_right_handle_pos = self.block_right_handle_pos + quat_apply(self.block_right_handle_rot, + to_torch([1, 0, 0], + device=self.device).repeat( + self.num_envs, 1) * 0.0) + self.block_right_handle_pos = self.block_right_handle_pos + quat_apply(self.block_right_handle_rot, + to_torch([0, 0, 1], + device=self.device).repeat( + self.num_envs, 1) * 0.0) + + self.block_left_handle_pos = self.rigid_body_states[:, 16 * 2 + 2, 0:3] + self.block_left_handle_rot = self.rigid_body_states[:, 16 * 2 + 2, 3:7] + self.block_left_handle_pos = self.block_left_handle_pos + quat_apply(self.block_left_handle_rot, + to_torch([0, 1, 0], + device=self.device).repeat( + self.num_envs, 1) * 0.0) + self.block_left_handle_pos = self.block_left_handle_pos + quat_apply(self.block_left_handle_rot, + to_torch([1, 0, 0], + device=self.device).repeat( + self.num_envs, 1) * 0.0) + self.block_left_handle_pos = self.block_left_handle_pos + quat_apply(self.block_left_handle_rot, + to_torch([0, 0, 1], + device=self.device).repeat( + self.num_envs, 1) * 0.0) + + self.left_hand_pos = self.rigid_body_states[:, 3 + 16, 0:3] + self.left_hand_rot = self.rigid_body_states[:, 3 + 16, 3:7] + self.left_hand_pos = self.left_hand_pos + quat_apply(self.left_hand_rot, + to_torch([0, 0, 1], device=self.device).repeat( + self.num_envs, 1) * 0.08) + self.left_hand_pos = self.left_hand_pos + quat_apply(self.left_hand_rot, + to_torch([0, 1, 0], device=self.device).repeat( + self.num_envs, 1) * -0.02) + + self.right_hand_pos = self.rigid_body_states[:, 3, 0:3] + self.right_hand_rot = self.rigid_body_states[:, 3, 3:7] + self.right_hand_pos = self.right_hand_pos + quat_apply(self.right_hand_rot, + to_torch([0, 0, 1], device=self.device).repeat( + self.num_envs, 1) * 0.08) + self.right_hand_pos = self.right_hand_pos + quat_apply(self.right_hand_rot, + to_torch([0, 1, 0], device=self.device).repeat( + self.num_envs, 1) * -0.02) + + # + self.right_hand_th_pos = self.rigid_body_states[:, 3, 0:3] + self.right_hand_th_rot = self.rigid_body_states[:, 3, 3:7] + self.right_hand_th_pos = self.right_hand_th_pos + quat_apply(self.right_hand_th_rot, + to_torch([0, 0, 1], device=self.device).repeat( + self.num_envs, 1) * 0.02) + self.right_hand_ff_pos = self.rigid_body_states[:, 6, 0:3] + self.right_hand_ff_rot = self.rigid_body_states[:, 6, 3:7] + self.right_hand_ff_pos = self.right_hand_ff_pos + quat_apply(self.right_hand_ff_rot, + to_torch([0, 0, 1], device=self.device).repeat( + self.num_envs, 1) * 0.02) + self.right_hand_mf_pos = self.rigid_body_states[:, 9, 0:3] + self.right_hand_mf_rot = self.rigid_body_states[:, 9, 3:7] + self.right_hand_mf_pos = self.right_hand_mf_pos + quat_apply(self.right_hand_mf_rot, + to_torch([0, 0, 1], device=self.device).repeat( + self.num_envs, 1) * 0.02) + self.right_hand_rf_pos = self.rigid_body_states[:, 12, 0:3] + self.right_hand_rf_rot = self.rigid_body_states[:, 12, 3:7] + self.right_hand_rf_pos = self.right_hand_rf_pos + quat_apply(self.right_hand_rf_rot, + to_torch([0, 0, 1], device=self.device).repeat( + self.num_envs, 1) * 0.02) + self.right_hand_lf_pos = self.rigid_body_states[:, 15, 0:3] + self.right_hand_lf_rot = self.rigid_body_states[:, 15, 3:7] + self.right_hand_lf_pos = self.right_hand_lf_pos + quat_apply(self.right_hand_lf_rot, + to_torch([0, 0, 1], device=self.device).repeat( + self.num_envs, 1) * 0.02) + + # + + # + self.left_hand_th_pos = self.rigid_body_states[:, 3 + 16, 0:3] + self.left_hand_th_rot = self.rigid_body_states[:, 3 + 16, 3:7] + self.left_hand_th_pos = self.left_hand_th_pos + quat_apply(self.left_hand_th_rot, + to_torch([0, 0, 1], device=self.device).repeat( + self.num_envs, 1) * 0.02) + self.left_hand_ff_pos = self.rigid_body_states[:, 6 + 16, 0:3] + self.left_hand_ff_rot = self.rigid_body_states[:, 6 + 16, 3:7] + self.left_hand_ff_pos = self.left_hand_ff_pos + quat_apply(self.left_hand_ff_rot, + to_torch([0, 0, 1], device=self.device).repeat( + self.num_envs, 1) * 0.02) + self.left_hand_mf_pos = self.rigid_body_states[:, 9 + 16, 0:3] + self.left_hand_mf_rot = self.rigid_body_states[:, 9 + 16, 3:7] + self.left_hand_mf_pos = self.left_hand_mf_pos + quat_apply(self.left_hand_mf_rot, + to_torch([0, 0, 1], device=self.device).repeat( + self.num_envs, 1) * 0.02) + self.left_hand_rf_pos = self.rigid_body_states[:, 12 + 16, 0:3] + self.left_hand_rf_rot = self.rigid_body_states[:, 12 + 16, 3:7] + self.left_hand_rf_pos = self.left_hand_rf_pos + quat_apply(self.left_hand_rf_rot, + to_torch([0, 0, 1], device=self.device).repeat( + self.num_envs, 1) * 0.02) + self.left_hand_lf_pos = self.rigid_body_states[:, 15 + 16, 0:3] + self.left_hand_lf_rot = self.rigid_body_states[:, 15 + 16, 3:7] + self.left_hand_lf_pos = self.left_hand_lf_pos + quat_apply(self.left_hand_lf_rot, + to_torch([0, 0, 1], device=self.device).repeat( + self.num_envs, 1) * 0.02) + + # + + self.goal_pos = to_torch([-0.3, 0, 0.6], dtype=torch.float, device=self.device).repeat((self.num_envs, 1)) + self.goal_rot = self.goal_states[:, 3:7] + + self.right_fingertip_state = self.rigid_body_states[:, self.fingertip_handles][:, :, 0:13] + self.right_fingertip_pos = self.rigid_body_states[:, self.fingertip_handles][:, :, 0:3] + self.left_fingertip_state = self.rigid_body_states[:, self.fingertip_left_handles][:, :, 0:13] + self.left_fingertip_pos = self.rigid_body_states[:, self.fingertip_left_handles][:, :, 0:3] + + if self.obs_type == "full_state": + self.compute_full_state() + elif self.obs_type == "point_cloud": + self.compute_point_cloud_observation() + + if self.asymmetric_obs: + self.compute_full_state(True) + + def compute_full_state(self, asymm_obs=False): + """ + Compute the observations of all environment. The observation is composed of three parts: + the state values of the left and right hands, and the information of objects and target. + The state values of the left and right hands were the same for each task, including hand + joint and finger positions, velocity, and force information. The detail 425-dimensional + observational space as shown in below: + + Index Description + 0 - 14 right shadow hand dof position + 15 - 29 right shadow hand dof velocity + 30 - 44 right shadow hand dof force + 45 - 109 right shadow hand fingertip pose, linear velocity, angle velocity (5 x 13) + 110 - 139 right shadow hand fingertip force, torque (5 x 6) + 140 - 142 right shadow hand base position + 143 - 145 right shadow hand base rotation + 146 - 171 right shadow hand actions + 172 - 1 left shadow hand dof position + 223 - 246 left shadow hand dof velocity + 247 - 270 left shadow hand dof force + 271 - 335 left shadow hand fingertip pose, linear velocity, angle velocity (5 x 13) + 336 - 365 left shadow hand fingertip force, torque (5 x 6) + 366 - 368 left shadow hand base position + 369 - 371 left shadow hand base rotation + 372 - 397 left shadow hand actions + 398 - 404 object pose + 405 - 407 object linear velocity + 408 - 410 object angle velocity + 411 - 413 block right handle position + 414 - 417 block right handle rotation + 418 - 420 block left handle position + 421 - 424 block left handle rotation + """ + num_ft_states = 13 * int(self.num_fingertips / 2) # 65 + num_ft_force_torques = 6 * int(self.num_fingertips / 2) # 30 + + self.obs_buf[:, 0:self.num_hand_dofs] = unscale(self.right_hand_dof_pos, + self.shadow_hand_dof_lower_limits, + self.shadow_hand_dof_upper_limits) + self.obs_buf[:, self.num_hand_dofs:2 * self.num_hand_dofs] = self.vel_obs_scale * self.right_hand_dof_vel + self.obs_buf[:, 2 * self.num_hand_dofs:3 * self.num_hand_dofs] \ + = self.force_torque_obs_scale * self.dof_force_tensor[:, :24] + + fingertip_obs_start = 72 # 168 = 157 + 11 + self.obs_buf[:, fingertip_obs_start:fingertip_obs_start + num_ft_states] = self.right_fingertip_state.reshape( + self.num_envs, num_ft_states) + self.obs_buf[:, fingertip_obs_start + num_ft_states:fingertip_obs_start + num_ft_states + + num_ft_force_torques] = self.force_torque_obs_scale * self.vec_sensor_tensor[ + :, + :30] + + right_hand_pose_start = fingertip_obs_start + 95 + self.obs_buf[:, right_hand_pose_start:right_hand_pose_start + 3] = self.right_hand_pos + self.obs_buf[:, right_hand_pose_start + 3:right_hand_pose_start + 4] = \ + get_euler_xyz(self.hand_orientations[self.right_hand_indices, :])[0].unsqueeze(-1) + self.obs_buf[:, right_hand_pose_start + 4:right_hand_pose_start + 5] = \ + get_euler_xyz(self.hand_orientations[self.right_hand_indices, :])[1].unsqueeze(-1) + self.obs_buf[:, right_hand_pose_start + 5:right_hand_pose_start + 6] = \ + get_euler_xyz(self.hand_orientations[self.right_hand_indices, :])[2].unsqueeze(-1) + + right_action_obs_start = right_hand_pose_start + 6 + self.obs_buf[:, right_action_obs_start:right_action_obs_start + 26] = self.actions[:, :26] + + # left_hand + left_hand_start = right_action_obs_start + 26 + self.obs_buf[:, left_hand_start:self.num_hand_dofs + left_hand_start] = unscale( + self.left_hand_dof_pos, + self.shadow_hand_dof_lower_limits, self.shadow_hand_dof_upper_limits) + self.obs_buf[:, + self.num_hand_dofs + left_hand_start:2 * self.num_hand_dofs + left_hand_start] = self.vel_obs_scale * self.left_hand_dof_vel + self.obs_buf[:, + 2 * self.num_hand_dofs + left_hand_start:3 * self.num_hand_dofs + left_hand_start] = self.force_torque_obs_scale * self.dof_force_tensor[ + :, + 24:48] + + left_fingertip_obs_start = left_hand_start + 72 + self.obs_buf[:, + left_fingertip_obs_start:left_fingertip_obs_start + num_ft_states] = self.left_fingertip_state.reshape( + self.num_envs, num_ft_states) + self.obs_buf[:, left_fingertip_obs_start + num_ft_states:left_fingertip_obs_start + num_ft_states + + num_ft_force_torques] = self.force_torque_obs_scale * self.vec_sensor_tensor[ + :, + 30:] + + left_hand_pose_start = left_fingertip_obs_start + 95 + self.obs_buf[:, left_hand_pose_start:left_hand_pose_start + 3] = self.left_hand_pos + self.obs_buf[:, left_hand_pose_start + 3:left_hand_pose_start + 4] = \ + get_euler_xyz(self.hand_orientations[self.left_hand_indices, :])[0].unsqueeze(-1) + self.obs_buf[:, left_hand_pose_start + 4:left_hand_pose_start + 5] = \ + get_euler_xyz(self.hand_orientations[self.left_hand_indices, :])[1].unsqueeze(-1) + self.obs_buf[:, left_hand_pose_start + 5:left_hand_pose_start + 6] = \ + get_euler_xyz(self.hand_orientations[self.left_hand_indices, :])[2].unsqueeze(-1) + + left_right_action_obs_start = left_hand_pose_start + 6 + self.obs_buf[:, left_right_action_obs_start:left_right_action_obs_start + 26] = self.actions[:, 26:] + + obj_obs_start = left_right_action_obs_start + 26 # 144 + self.obs_buf[:, obj_obs_start:obj_obs_start + 7] = self.object_pose + self.obs_buf[:, obj_obs_start + 7:obj_obs_start + 10] = self.object_linvel + self.obs_buf[:, obj_obs_start + 10:obj_obs_start + 13] = self.vel_obs_scale * self.object_angvel + self.obs_buf[:, obj_obs_start + 13:obj_obs_start + 16] = self.block_right_handle_pos + self.obs_buf[:, obj_obs_start + 16:obj_obs_start + 20] = self.block_right_handle_rot + self.obs_buf[:, obj_obs_start + 20:obj_obs_start + 23] = self.block_left_handle_pos + self.obs_buf[:, obj_obs_start + 23:obj_obs_start + 27] = self.block_left_handle_rot + # goal_obs_start = obj_obs_start + 13 # 157 = 144 + 13 + # self.obs_buf[:, goal_obs_start:goal_obs_start + 7] = self.goal_pose + # self.obs_buf[:, goal_obs_start + 7:goal_obs_start + 11] = quat_mul(self.object_rot, quat_conjugate(self.goal_rot)) + + def compute_point_cloud_observation(self, collect_demonstration=False): + """ + Compute the observations of all environment. The observation is composed of three parts: + the state values of the left and right hands, and the information of objects and target. + The state values of the left and right hands were the same for each task, including hand + joint and finger positions, velocity, and force information. The detail 425-dimensional + observational space as shown in below: + + Index Description + 0 - 23 right shadow hand dof position + 24 - 47 right shadow hand dof velocity + 48 - 71 right shadow hand dof force + 72 - 136 right shadow hand fingertip pose, linear velocity, angle velocity (5 x 13) + 137 - 166 right shadow hand fingertip force, torque (5 x 6) + 167 - 169 right shadow hand base position + 170 - 172 right shadow hand base rotation + 173 - 198 right shadow hand actions + 199 - 222 left shadow hand dof position + 223 - 246 left shadow hand dof velocity + 247 - 270 left shadow hand dof force + 271 - 335 left shadow hand fingertip pose, linear velocity, angle velocity (5 x 13) + 336 - 365 left shadow hand fingertip force, torque (5 x 6) + 366 - 368 left shadow hand base position + 369 - 371 left shadow hand base rotation + 372 - 397 left shadow hand actions + 398 - 404 object pose + 405 - 407 object linear velocity + 408 - 410 object angle velocity + 411 - 413 block right handle position + 414 - 417 block right handle rotation + 418 - 420 block left handle position + 421 - 424 block left handle rotation + """ + num_ft_states = 13 * int(self.num_fingertips / 2) # 65 + num_ft_force_torques = 6 * int(self.num_fingertips / 2) # 30 + + self.obs_buf[:, 0:self.num_hand_dofs] = unscale(self.right_hand_dof_pos, + self.shadow_hand_dof_lower_limits, + self.shadow_hand_dof_upper_limits) + self.obs_buf[:, + self.num_hand_dofs:2 * self.num_hand_dofs] = self.vel_obs_scale * self.right_hand_dof_vel + self.obs_buf[:, + 2 * self.num_hand_dofs:3 * self.num_hand_dofs] = self.force_torque_obs_scale * self.dof_force_tensor[ + :, :24] + + fingertip_obs_start = 72 # 168 = 157 + 11 + self.obs_buf[:, fingertip_obs_start:fingertip_obs_start + num_ft_states] = self.right_fingertip_state.reshape( + self.num_envs, num_ft_states) + self.obs_buf[:, fingertip_obs_start + num_ft_states:fingertip_obs_start + num_ft_states + + num_ft_force_torques] = self.force_torque_obs_scale * self.vec_sensor_tensor[ + :, + :30] + + right_hand_pose_start = fingertip_obs_start + 95 + self.obs_buf[:, right_hand_pose_start:right_hand_pose_start + 3] = self.right_hand_pos + self.obs_buf[:, right_hand_pose_start + 3:right_hand_pose_start + 4] = \ + get_euler_xyz(self.hand_orientations[self.right_hand_indices, :])[0].unsqueeze(-1) + self.obs_buf[:, right_hand_pose_start + 4:right_hand_pose_start + 5] = \ + get_euler_xyz(self.hand_orientations[self.right_hand_indices, :])[1].unsqueeze(-1) + self.obs_buf[:, right_hand_pose_start + 5:right_hand_pose_start + 6] = \ + get_euler_xyz(self.hand_orientations[self.right_hand_indices, :])[2].unsqueeze(-1) + + right_action_obs_start = right_hand_pose_start + 6 + self.obs_buf[:, right_action_obs_start:right_action_obs_start + 26] = self.actions[:, :26] + + # left_hand + left_hand_start = right_action_obs_start + 26 + self.obs_buf[:, left_hand_start:self.num_hand_dofs + left_hand_start] = unscale( + self.left_hand_dof_pos, + self.shadow_hand_dof_lower_limits, self.shadow_hand_dof_upper_limits) + self.obs_buf[:, + self.num_hand_dofs + left_hand_start:2 * self.num_hand_dofs + left_hand_start] = self.vel_obs_scale * self.left_hand_dof_vel + self.obs_buf[:, + 2 * self.num_hand_dofs + left_hand_start:3 * self.num_hand_dofs + left_hand_start] = self.force_torque_obs_scale * self.dof_force_tensor[ + :, + 24:48] + + left_fingertip_obs_start = left_hand_start + 72 + self.obs_buf[:, + left_fingertip_obs_start:left_fingertip_obs_start + num_ft_states] = self.left_fingertip_state.reshape( + self.num_envs, num_ft_states) + self.obs_buf[:, left_fingertip_obs_start + num_ft_states:left_fingertip_obs_start + num_ft_states + + num_ft_force_torques] = self.force_torque_obs_scale * self.vec_sensor_tensor[ + :, + 30:] + + left_hand_pose_start = left_fingertip_obs_start + 95 + self.obs_buf[:, left_hand_pose_start:left_hand_pose_start + 3] = self.left_hand_pos + self.obs_buf[:, left_hand_pose_start + 3:left_hand_pose_start + 4] = \ + get_euler_xyz(self.hand_orientations[self.left_hand_indices, :])[0].unsqueeze(-1) + self.obs_buf[:, left_hand_pose_start + 4:left_hand_pose_start + 5] = \ + get_euler_xyz(self.hand_orientations[self.left_hand_indices, :])[1].unsqueeze(-1) + self.obs_buf[:, left_hand_pose_start + 5:left_hand_pose_start + 6] = \ + get_euler_xyz(self.hand_orientations[self.left_hand_indices, :])[2].unsqueeze(-1) + + left_right_action_obs_start = left_hand_pose_start + 6 + self.obs_buf[:, left_right_action_obs_start:left_right_action_obs_start + 26] = self.actions[:, 26:] + + obj_obs_start = left_right_action_obs_start + 26 # 144 + self.obs_buf[:, obj_obs_start:obj_obs_start + 7] = self.object_pose + self.obs_buf[:, obj_obs_start + 7:obj_obs_start + 10] = self.object_linvel + self.obs_buf[:, obj_obs_start + 10:obj_obs_start + 13] = self.vel_obs_scale * self.object_angvel + self.obs_buf[:, obj_obs_start + 13:obj_obs_start + 16] = self.block_right_handle_pos + self.obs_buf[:, obj_obs_start + 16:obj_obs_start + 20] = self.block_right_handle_rot + self.obs_buf[:, obj_obs_start + 20:obj_obs_start + 23] = self.block_left_handle_pos + self.obs_buf[:, obj_obs_start + 23:obj_obs_start + 27] = self.block_left_handle_rot + # goal_obs_start = obj_obs_start + 13 # 157 = 144 + 13 + # self.obs_buf[:, goal_obs_start:goal_obs_start + 7] = self.goal_pose + # self.obs_buf[:, goal_obs_start + 7:goal_obs_start + 11] = quat_mul(self.object_rot, quat_conjugate(self.goal_rot)) + point_clouds = torch.zeros((self.num_envs, self.pointCloudDownsampleNum, 3), device=self.device) + + if self.camera_debug: + import matplotlib.pyplot as plt + self.camera_rgba_debug_fig = plt.figure("CAMERA_RGBD_DEBUG") + camera_rgba_image = self.camera_visulization(is_depth_image=False) + plt.imshow(camera_rgba_image) + plt.pause(1e-9) + + for i in range(self.num_envs): + # Here is an example. In practice, it's better not to convert tensor from GPU to CPU + points = depth_image_to_point_cloud_GPU(self.camera_tensors[i], self.camera_view_matrixs[i], + self.camera_proj_matrixs[i], self.camera_u2, self.camera_v2, + self.camera_props.width, self.camera_props.height, 10, self.device) + + if points.shape[0] > 0: + selected_points = self.sample_points(points, sample_num=self.pointCloudDownsampleNum, + sample_mathed='random') + else: + selected_points = torch.zeros((self.num_envs, self.pointCloudDownsampleNum, 3), device=self.device) + + point_clouds[i] = selected_points + + if self.pointCloudVisualizer != None: + import open3d as o3d + points = point_clouds[0, :, :3].cpu().numpy() + # colors = plt.get_cmap()(point_clouds[0, :, 3].cpu().numpy()) + self.o3d_pc.points = o3d.utility.Vector3dVector(points) + # self.o3d_pc.colors = o3d.utility.Vector3dVector(colors[..., :3]) + + if self.pointCloudVisualizerInitialized == False: + self.pointCloudVisualizer.add_geometry(self.o3d_pc) + self.pointCloudVisualizerInitialized = True + else: + self.pointCloudVisualizer.update(self.o3d_pc) + + self.gym.end_access_image_tensors(self.sim) + point_clouds -= self.env_origin.view(self.num_envs, 1, 3) + + point_clouds_start = obj_obs_start + 27 + self.obs_buf[:, point_clouds_start:].copy_(point_clouds.view(self.num_envs, self.pointCloudDownsampleNum * 3)) + + def reset_target_pose(self, env_ids, apply_reset=False): + """ + Reset and randomize the goal pose + + Args: + env_ids (tensor): The index of the environment that needs to reset goal pose + + apply_reset (bool): Whether to reset the goal directly here, usually used + when the same task wants to complete multiple goals + + """ + rand_floats = torch_rand_float(-1.0, 1.0, (len(env_ids), 4), device=self.device) + + new_rot = randomize_rotation(rand_floats[:, 0], rand_floats[:, 1], self.x_unit_tensor[env_ids], + self.y_unit_tensor[env_ids]) + + self.goal_states[env_ids, 0:3] = self.goal_init_state[env_ids, 0:3] + # self.goal_states[env_ids, 1] -= 0.25 + self.goal_states[env_ids, 2] += 10.0 + + # self.goal_states[env_ids, 3:7] = new_rot + self.root_state_tensor[self.goal_object_indices[env_ids], 0:3] = self.goal_states[env_ids, + 0:3] + self.goal_displacement_tensor + self.root_state_tensor[self.goal_object_indices[env_ids], 3:7] = self.goal_states[env_ids, 3:7] + self.root_state_tensor[self.goal_object_indices[env_ids], 7:13] = torch.zeros_like( + self.root_state_tensor[self.goal_object_indices[env_ids], 7:13]) + + if apply_reset: + goal_object_indices = self.goal_object_indices[env_ids].to(torch.int32) + self.gym.set_actor_root_state_tensor_indexed(self.sim, + gymtorch.unwrap_tensor(self.root_state_tensor), + gymtorch.unwrap_tensor(goal_object_indices), len(env_ids)) + self.reset_goal_buf[env_ids] = 0 + + def reset_idx(self, env_ids, goal_env_ids): + """ + Reset and randomize the environment + + Args: + env_ids (tensor): The index of the environment that needs to reset + + goal_env_ids (tensor): The index of the environment that only goals need reset + + """ + # randomization can happen only at reset time, since it can reset actor positions on GPU + if self.randomize: + self.apply_randomizations(self.randomization_params) + + # generate random values + rand_floats = torch_rand_float(-1.0, 1.0, (len(env_ids), self.num_hand_dofs * 2 + 5), device=self.device) + + # randomize start object poses + self.reset_target_pose(env_ids) + + # reset object + self.root_state_tensor[self.object_indices[env_ids]] = self.object_init_state[env_ids].clone() + + new_object_rot = randomize_rotation(rand_floats[:, 3], rand_floats[:, 4], self.x_unit_tensor[env_ids], + self.y_unit_tensor[env_ids]) + if self.object_type == "pen": + rand_angle_y = torch.tensor(0.3) + new_object_rot = randomize_rotation_pen(rand_floats[:, 3], rand_floats[:, 4], rand_angle_y, + self.x_unit_tensor[env_ids], self.y_unit_tensor[env_ids], + self.z_unit_tensor[env_ids]) + + # self.root_state_tensor[self.object_indices[env_ids], 3:7] = new_object_rot + self.root_state_tensor[self.object_indices[env_ids], 7:13] = torch.zeros_like( + self.root_state_tensor[self.object_indices[env_ids], 7:13]) + + object_indices = torch.unique(torch.cat([self.object_indices[env_ids], + self.goal_object_indices[env_ids], + self.goal_object_indices[goal_env_ids]]).to(torch.int32)) + # self.gym.set_actor_root_state_tensor_indexed(self.sim, + # gymtorch.unwrap_tensor(self.root_state_tensor), + # gymtorch.unwrap_tensor(object_indices), len(object_indices)) + + # reset shadow hand + delta_max = self.shadow_hand_dof_upper_limits - self.shadow_hand_dof_default_pos + delta_min = self.shadow_hand_dof_lower_limits - self.shadow_hand_dof_default_pos + rand_delta = delta_min + (delta_max - delta_min) * rand_floats[:, 5:5 + self.num_hand_dofs] + + pos = self.shadow_hand_default_dof_pos + self.reset_dof_pos_noise * rand_delta + + self.right_hand_dof_pos[env_ids, :] = pos + self.left_hand_dof_pos[env_ids, :] = pos + + self.right_hand_dof_vel[env_ids, :] = self.hand_dof_default_vel + \ + self.reset_dof_vel_noise * rand_floats[:, + 5 + self.num_hand_dofs:5 + self.num_hand_dofs * 2] + + self.left_hand_dof_vel[env_ids, :] = self.hand_dof_default_vel + \ + self.reset_dof_vel_noise * rand_floats[:, + 5 + self.num_hand_dofs:5 + self.num_hand_dofs * 2] + + self.prev_targets[env_ids, :self.num_hand_dofs] = pos + self.cur_targets[env_ids, :self.num_hand_dofs] = pos + + self.prev_targets[env_ids, self.num_hand_dofs:self.num_hand_dofs * 2] = pos + self.cur_targets[env_ids, self.num_hand_dofs:self.num_hand_dofs * 2] = pos + + right_hand_indices = self.right_hand_indices[env_ids].to(torch.int32) + left_hand_indices = self.left_hand_indices[env_ids].to(torch.int32) + + all_hand_indices = torch.unique(torch.cat([right_hand_indices, + left_hand_indices]).to(torch.int32)) + + self.gym.set_dof_position_target_tensor_indexed(self.sim, + gymtorch.unwrap_tensor(self.prev_targets), + gymtorch.unwrap_tensor(all_hand_indices), len(all_hand_indices)) + + all_indices = torch.unique(torch.cat([all_hand_indices, + self.object_indices[env_ids], + self.table_indices[env_ids], + self.block_indices[env_ids]]).to(torch.int32)) + + self.hand_positions[all_indices.to(torch.long), :] = self.saved_root_tensor[all_indices.to(torch.long), 0:3] + self.hand_orientations[all_indices.to(torch.long), :] = self.saved_root_tensor[all_indices.to(torch.long), 3:7] + + self.gym.set_dof_state_tensor_indexed(self.sim, + gymtorch.unwrap_tensor(self.dof_state), + gymtorch.unwrap_tensor(all_hand_indices), len(all_hand_indices)) + + self.gym.set_actor_root_state_tensor_indexed(self.sim, + gymtorch.unwrap_tensor(self.root_state_tensor), + gymtorch.unwrap_tensor(all_indices), len(all_indices)) + self.progress_buf[env_ids] = 0 + self.reset_buf[env_ids] = 0 + self.successes[env_ids] = 0 + + def pre_physics_step(self, actions): + """ + The pre-processing of the physics step. Determine whether the reset environment is needed, + and calculate the next movement of Shadowhand through the given action. The 52-dimensional + action space as shown in below: + + Index Description + 0 - 2 right shadow hand base translation + 3 - 5 right shadow hand base rotation + 6 - 20 right shadow hand actuated joint + 21 - 23 left shadow hand base translation + 24 - 26 left shadow hand base rotation + 27 - 41 left shadow hand actuated joint + + Args: + actions (tensor): Actions of agents in the all environment + """ + env_ids = self.reset_buf.nonzero(as_tuple=False).squeeze(-1) + goal_env_ids = self.reset_goal_buf.nonzero(as_tuple=False).squeeze(-1) + + # if only goals need reset, then call set API + if len(goal_env_ids) > 0 and len(env_ids) == 0: + self.reset_target_pose(goal_env_ids, apply_reset=True) + # if goals need reset in addition to other envs, call set API in reset() + elif len(goal_env_ids) > 0: + self.reset_target_pose(goal_env_ids) + + if len(env_ids) > 0: + self.reset_idx(env_ids, goal_env_ids) + + self.actions = actions.clone().to(self.device) + if self.use_relative_control: + targets = self.prev_targets[:, + self.actuated_dof_indices] + self.shadow_hand_dof_speed_scale * self.dt * self.actions + self.cur_targets[:, self.actuated_dof_indices] = tensor_clamp(targets, + self.shadow_hand_dof_lower_limits[ + self.actuated_dof_indices], + self.shadow_hand_dof_upper_limits[ + self.actuated_dof_indices]) + else: + self.cur_targets[:, self.actuated_dof_indices] = scale(self.actions[:, 6:21], + self.shadow_hand_dof_lower_limits[ + self.actuated_dof_indices], + self.shadow_hand_dof_upper_limits[ + self.actuated_dof_indices]) + self.cur_targets[:, self.actuated_dof_indices] = self.act_moving_average * self.cur_targets[:, + self.actuated_dof_indices] + ( + 1.0 - self.act_moving_average) * self.prev_targets[ + :, + self.actuated_dof_indices] + self.cur_targets[:, self.actuated_dof_indices] = tensor_clamp( + self.cur_targets[:, self.actuated_dof_indices], + self.shadow_hand_dof_lower_limits[self.actuated_dof_indices], + self.shadow_hand_dof_upper_limits[self.actuated_dof_indices]) + + self.cur_targets[:, self.actuated_dof_indices + 15] = scale(self.actions[:, 27:42], + self.shadow_hand_dof_lower_limits[ + self.actuated_dof_indices], + self.shadow_hand_dof_upper_limits[ + self.actuated_dof_indices]) + self.cur_targets[:, self.actuated_dof_indices + 15] = self.act_moving_average * self.cur_targets[:, + self.actuated_dof_indices + 24] + ( + 1.0 - self.act_moving_average) * self.prev_targets[ + :, + self.actuated_dof_indices] + self.cur_targets[:, self.actuated_dof_indices + 15] = tensor_clamp( + self.cur_targets[:, self.actuated_dof_indices + 15], + self.shadow_hand_dof_lower_limits[self.actuated_dof_indices], + self.shadow_hand_dof_upper_limits[self.actuated_dof_indices]) + # self.cur_targets[:, 49] = scale(self.actions[:, 0], + # self.object_dof_lower_limits[1], self.object_dof_upper_limits[1]) + # angle_offsets = self.actions[:, 26:32] * self.dt * self.orientation_scale + + self.apply_forces[:, 1, :] = actions[:, 0:3] * self.dt * self.transition_scale * 100000 + self.apply_forces[:, 1 + 21, :] = actions[:, 21:24] * self.dt * self.transition_scale * 100000 + self.apply_torque[:, 1, :] = self.actions[:, 3:6] * self.dt * self.orientation_scale * 1000 + self.apply_torque[:, 1 + 21, :] = self.actions[:, 24:27] * self.dt * self.orientation_scale * 1000 + + self.gym.apply_rigid_body_force_tensors(self.sim, gymtorch.unwrap_tensor(self.apply_forces), + gymtorch.unwrap_tensor(self.apply_torque), gymapi.ENV_SPACE) + + self.prev_targets[:, self.actuated_dof_indices] = self.cur_targets[:, self.actuated_dof_indices] + self.prev_targets[:, self.actuated_dof_indices + 15] = self.cur_targets[:, self.actuated_dof_indices + 15] + + # self.prev_targets[:, 49] = self.cur_targets[:, 49] + # self.gym.set_dof_position_target_tensor(self.sim, gymtorch.unwrap_tensor(self.cur_targets)) + all_hand_indices = torch.unique(torch.cat([self.right_hand_indices, + self.left_hand_indices]).to(torch.int32)) + self.gym.set_dof_position_target_tensor_indexed(self.sim, + gymtorch.unwrap_tensor(self.prev_targets), + gymtorch.unwrap_tensor(all_hand_indices), len(all_hand_indices)) + + def post_physics_step(self): + """ + The post-processing of the physics step. Compute the observation and reward, and visualize auxiliary + lines for debug when needed + + """ + self.progress_buf += 1 + self.randomize_buf += 1 + + self.compute_observations() + self.compute_reward(self.actions) + + if self.viewer and self.debug_viz: + # draw axes on target object + self.gym.clear_lines(self.viewer) + self.gym.refresh_rigid_body_state_tensor(self.sim) + + for i in range(self.num_envs): + self.add_debug_lines(self.envs[i], self.block_right_handle_pos[i], self.block_right_handle_rot[i]) + self.add_debug_lines(self.envs[i], self.block_left_handle_pos[i], self.block_left_handle_rot[i]) + # self.add_debug_lines(self.envs[i], self.goal_pos[i], self.block_left_handle_rot[i]) + # self.add_debug_lines(self.envs[i], self.right_hand_ff_pos[i], self.right_hand_ff_rot[i]) + # self.add_debug_lines(self.envs[i], self.right_hand_mf_pos[i], self.right_hand_mf_rot[i]) + # self.add_debug_lines(self.envs[i], self.right_hand_rf_pos[i], self.right_hand_rf_rot[i]) + # self.add_debug_lines(self.envs[i], self.right_hand_lf_pos[i], self.right_hand_lf_rot[i]) + # self.add_debug_lines(self.envs[i], self.right_hand_th_pos[i], self.right_hand_th_rot[i]) + + # self.add_debug_lines(self.envs[i], self.left_hand_ff_pos[i], self.right_hand_ff_rot[i]) + # self.add_debug_lines(self.envs[i], self.left_hand_mf_pos[i], self.right_hand_mf_rot[i]) + # self.add_debug_lines(self.envs[i], self.left_hand_rf_pos[i], self.right_hand_rf_rot[i]) + # self.add_debug_lines(self.envs[i], self.left_hand_lf_pos[i], self.right_hand_lf_rot[i]) + # self.add_debug_lines(self.envs[i], self.left_hand_th_pos[i], self.right_hand_th_rot[i]) + + def add_debug_lines(self, env, pos, rot): + posx = (pos + quat_apply(rot, to_torch([1, 0, 0], device=self.device) * 0.2)).cpu().numpy() + posy = (pos + quat_apply(rot, to_torch([0, 1, 0], device=self.device) * 0.2)).cpu().numpy() + posz = (pos + quat_apply(rot, to_torch([0, 0, 1], device=self.device) * 0.2)).cpu().numpy() + + p0 = pos.cpu().numpy() + self.gym.add_lines(self.viewer, env, 1, [p0[0], p0[1], p0[2], posx[0], posx[1], posx[2]], [0.85, 0.1, 0.1]) + self.gym.add_lines(self.viewer, env, 1, [p0[0], p0[1], p0[2], posy[0], posy[1], posy[2]], [0.1, 0.85, 0.1]) + self.gym.add_lines(self.viewer, env, 1, [p0[0], p0[1], p0[2], posz[0], posz[1], posz[2]], [0.1, 0.1, 0.85]) + + def rand_row(self, tensor, dim_needed): + row_total = tensor.shape[0] + return tensor[torch.randint(low=0, high=row_total, size=(dim_needed,)), :] + + def sample_points(self, points, sample_num=1000, sample_mathed='furthest'): + eff_points = points[points[:, 2] > 0.04] + if eff_points.shape[0] < sample_num: + eff_points = points + if sample_mathed == 'random': + sampled_points = self.rand_row(eff_points, sample_num) + elif sample_mathed == 'furthest': + sampled_points_id = pointnet2_utils.furthest_point_sample(eff_points.reshape(1, *eff_points.shape), + sample_num) + sampled_points = eff_points.index_select(0, sampled_points_id[0].long()) + return sampled_points + + def camera_visulization(self, is_depth_image=False): + if is_depth_image: + camera_depth_tensor = self.gym.get_camera_image_gpu_tensor(self.sim, self.envs[0], self.cameras[0], + gymapi.IMAGE_DEPTH) + torch_depth_tensor = gymtorch.wrap_tensor(camera_depth_tensor) + torch_depth_tensor = torch.clamp(torch_depth_tensor, -1, 1) + torch_depth_tensor = scale(torch_depth_tensor, to_torch([0], dtype=torch.float, device=self.device), + to_torch([256], dtype=torch.float, device=self.device)) + camera_image = torch_depth_tensor.cpu().numpy() + camera_image = Im.fromarray(camera_image) + + else: + camera_rgba_tensor = self.gym.get_camera_image_gpu_tensor(self.sim, self.envs[0], self.cameras[0], + gymapi.IMAGE_COLOR) + torch_rgba_tensor = gymtorch.wrap_tensor(camera_rgba_tensor) + camera_image = torch_rgba_tensor.cpu().numpy() + camera_image = Im.fromarray(camera_image) + + return camera_image + + +##################################################################### +###=========================jit functions=========================### +##################################################################### + +@torch.jit.script +def depth_image_to_point_cloud_GPU(camera_tensor, camera_view_matrix_inv, camera_proj_matrix, u, v, width: float, + height: float, depth_bar: float, device: torch.device): + # time1 = time.time() + depth_buffer = camera_tensor.to(device) + + # Get the camera view matrix and invert it to transform points from camera to world space + vinv = camera_view_matrix_inv + + # Get the camera projection matrix and get the necessary scaling + # coefficients for deprojection + + proj = camera_proj_matrix + fu = 2 / proj[0, 0] + fv = 2 / proj[1, 1] + + centerU = width / 2 + centerV = height / 2 + + Z = depth_buffer + X = -(u - centerU) / width * Z * fu + Y = (v - centerV) / height * Z * fv + + Z = Z.view(-1) + valid = Z > -depth_bar + X = X.view(-1) + Y = Y.view(-1) + + position = torch.vstack((X, Y, Z, torch.ones(len(X), device=device)))[:, valid] + position = position.permute(1, 0) + position = position @ vinv + + points = position[:, 0:3] + + return points + + +@torch.jit.script +def compute_hand_reward( + rew_buf, reset_buf, reset_goal_buf, progress_buf, successes, consecutive_successes, + max_episode_length: float, object_pos, object_rot, target_pos, target_rot, block_right_handle_pos, + block_left_handle_pos, + left_hand_pos, right_hand_pos, right_hand_ff_pos, right_hand_mf_pos, right_hand_rf_pos, right_hand_lf_pos, + right_hand_th_pos, + left_hand_ff_pos, left_hand_mf_pos, left_hand_rf_pos, left_hand_lf_pos, left_hand_th_pos, + dist_reward_scale: float, rot_reward_scale: float, rot_eps: float, + actions, action_penalty_scale: float, + success_tolerance: float, reach_goal_bonus: float, fall_dist: float, + fall_penalty: float, max_consecutive_successes: int, av_factor: float, ignore_z_rot: bool +): + """ + Compute the reward of all environment. + + Args: + rew_buf (tensor): The reward buffer of all environments at this time + + reset_buf (tensor): The reset buffer of all environments at this time + + reset_goal_buf (tensor): The only-goal reset buffer of all environments at this time + + progress_buf (tensor): The porgress buffer of all environments at this time + + successes (tensor): The successes buffer of all environments at this time + + consecutive_successes (tensor): The consecutive successes buffer of all environments at this time + + max_episode_length (float): The max episode length in this environment + + object_pos (tensor): The position of the object + + object_rot (tensor): The rotation of the object + + target_pos (tensor): The position of the target + + target_rot (tensor): The rotate of the target + + block_right_handle_pos (tensor): The position of the right block handle + + block_left_handle_pos (tensor): The position of the left block handle + + left_hand_pos, right_hand_pos (tensor): The position of the bimanual hands + + right_hand_ff_pos, right_hand_mf_pos, right_hand_rf_pos, right_hand_lf_pos, right_hand_th_pos (tensor): The position of the five fingers + of the right hand + + left_hand_ff_pos, left_hand_mf_pos, left_hand_rf_pos, left_hand_lf_pos, left_hand_th_pos (tensor): The position of the five fingers + of the left hand + + dist_reward_scale (float): The scale of the distance reward + + rot_reward_scale (float): The scale of the rotation reward + + rot_eps (float): The epsilon of the rotation calculate + + actions (tensor): The action buffer of all environments at this time + + action_penalty_scale (float): The scale of the action penalty reward + + success_tolerance (float): The tolerance of the success determined + + reach_goal_bonus (float): The reward given when the object reaches the goal + + fall_dist (float): When the object is far from the Shadowhand, it is judged as falling + + fall_penalty (float): The reward given when the object is fell + + max_consecutive_successes (float): The maximum of the consecutive successes + + av_factor (float): The average factor for calculate the consecutive successes + + ignore_z_rot (bool): Is it necessary to ignore the rot of the z-axis, which is usually used + for some specific objects (e.g. pen) + """ + # Distance from the hand to the object + left_goal_dist = torch.norm(target_pos - block_left_handle_pos, p=2, dim=-1) + right_goal_dist = torch.norm(target_pos - block_right_handle_pos, p=2, dim=-1) + # goal_dist = target_pos[:, 2] - object_pos[:, 2] + + right_hand_dist = torch.norm(block_right_handle_pos - right_hand_pos, p=2, dim=-1) + left_hand_dist = torch.norm(block_left_handle_pos - left_hand_pos, p=2, dim=-1) + + right_hand_finger_dist = (torch.norm(block_right_handle_pos - right_hand_ff_pos, p=2, dim=-1) + torch.norm( + block_right_handle_pos - right_hand_mf_pos, p=2, dim=-1) + + torch.norm(block_right_handle_pos - right_hand_rf_pos, p=2, dim=-1) + torch.norm( + block_right_handle_pos - right_hand_lf_pos, p=2, dim=-1) + + torch.norm(block_right_handle_pos - right_hand_th_pos, p=2, dim=-1)) + left_hand_finger_dist = (torch.norm(block_left_handle_pos - left_hand_ff_pos, p=2, dim=-1) + torch.norm( + block_left_handle_pos - left_hand_mf_pos, p=2, dim=-1) + + torch.norm(block_left_handle_pos - left_hand_rf_pos, p=2, dim=-1) + torch.norm( + block_left_handle_pos - left_hand_lf_pos, p=2, dim=-1) + + torch.norm(block_left_handle_pos - left_hand_th_pos, p=2, dim=-1)) + # Orientation alignment for the cube in hand and goal cube + # quat_diff = quat_mul(object_rot, quat_conjugate(target_rot)) + # rot_dist = 2.0 * torch.asin(torch.clamp(torch.norm(quat_diff[:, 0:3], p=2, dim=-1), max=1.0)) + + right_hand_dist_rew = torch.exp(-10 * right_hand_finger_dist) + left_hand_dist_rew = torch.exp(-10 * left_hand_finger_dist) + + # rot_rew = 1.0/(torch.abs(rot_dist) + rot_eps) * rot_reward_scale + + action_penalty = torch.sum(actions ** 2, dim=-1) + + # Total reward is: position distance + orientation alignment + action regularization + success bonus + fall penalty + # reward = torch.exp(-0.05*(up_rew * dist_reward_scale)) + torch.exp(-0.05*(right_hand_dist_rew * dist_reward_scale)) + torch.exp(-0.05*(left_hand_dist_rew * dist_reward_scale)) + # up_rew = torch.zeros_like(right_hand_dist_rew) + # up_rew = torch.where(right_hand_finger_dist < 0.6, + # torch.where(left_hand_finger_dist < 0.4, + up_rew = torch.zeros_like(right_hand_dist_rew) + up_rew = torch.exp(-10 * torch.norm(block_right_handle_pos - block_left_handle_pos, p=2, dim=-1)) * 2 + # up_rew = torch.where(right_hand_finger_dist <= 0.3, torch.norm(bottle_cap_up - bottle_pos, p=2, dim=-1) * 30, up_rew) + + # reward = torch.exp(-0.1*(right_hand_dist_rew * dist_reward_scale)) + torch.exp(-0.1*(left_hand_dist_rew * dist_reward_scale)) + reward = right_hand_dist_rew + left_hand_dist_rew + up_rew + + resets = torch.where(right_hand_dist_rew <= 0, torch.ones_like(reset_buf), reset_buf) + resets = torch.where(right_hand_finger_dist >= 1.5, torch.ones_like(resets), resets) + resets = torch.where(left_hand_finger_dist >= 1.5, torch.ones_like(resets), resets) + + # Find out which envs hit the goal and update successes count + successes = torch.where(successes == 0, + torch.where(torch.norm(block_right_handle_pos - block_left_handle_pos, p=2, dim=-1) < 0.2, + torch.ones_like(successes), successes), successes) + + resets = torch.where(progress_buf >= max_episode_length, torch.ones_like(resets), resets) + + goal_resets = torch.zeros_like(resets) + + num_resets = torch.sum(resets) + finished_cons_successes = torch.sum(successes * resets.float()) + + cons_successes = torch.where(resets > 0, successes * resets, consecutive_successes).mean() + + return reward, resets, goal_resets, progress_buf, successes, cons_successes + + +@torch.jit.script +def randomize_rotation(rand0, rand1, x_unit_tensor, y_unit_tensor): + return quat_mul(quat_from_angle_axis(rand0 * np.pi, x_unit_tensor), + quat_from_angle_axis(rand1 * np.pi, y_unit_tensor)) + + +@torch.jit.script +def randomize_rotation_pen(rand0, rand1, max_angle, x_unit_tensor, y_unit_tensor, z_unit_tensor): + rot = quat_mul(quat_from_angle_axis(0.5 * np.pi + rand0 * max_angle, x_unit_tensor), + quat_from_angle_axis(rand0 * np.pi, z_unit_tensor)) + return rot diff --git a/rofunc/learning/RofuncRL/tasks/isaacgymenv/hands/qbsofthand_grasp.py b/rofunc/learning/RofuncRL/tasks/isaacgymenv/hands/qbsofthand_grasp.py new file mode 100644 index 000000000..5d4ba45e7 --- /dev/null +++ b/rofunc/learning/RofuncRL/tasks/isaacgymenv/hands/qbsofthand_grasp.py @@ -0,0 +1,1267 @@ +import os +import random + +from PIL import Image as Im +from isaacgym import gymapi +from isaacgym import gymtorch + +from rofunc.learning.RofuncRL.tasks.isaacgymenv.base.vec_task import VecTask +from rofunc.learning.RofuncRL.tasks.utils.torch_jit_utils import * +from rofunc.utils.oslab import get_rofunc_path +from rofunc.utils.logger.beauty_logger import beauty_print + + +class QbSoftHandGraspTask(VecTask): + """ + This class corresponds to the GraspAndPlace task. This environment consists of dual-hands, an + object and a bucket that requires us to pick up the object and put it into the bucket. + """ + + def __init__(self, cfg, rl_device, sim_device, graphics_device_id, headless, virtual_screen_capture, + force_render, agent_index=[[[0, 1, 2, 3, 4, 5]], [[0, 1, 2, 3, 4, 5]]], is_multi_agent=False): + self.cfg = cfg + self.agent_index = agent_index + + self.is_multi_agent = is_multi_agent + + self.randomize = self.cfg["task"]["randomize"] + self.randomization_params = self.cfg["task"]["randomization_params"] + self.aggregate_mode = self.cfg["env"]["aggregateMode"] + + self.dist_reward_scale = self.cfg["env"]["distRewardScale"] + self.rot_reward_scale = self.cfg["env"]["rotRewardScale"] + self.action_penalty_scale = self.cfg["env"]["actionPenaltyScale"] + self.success_tolerance = self.cfg["env"]["successTolerance"] + self.reach_goal_bonus = self.cfg["env"]["reachGoalBonus"] + self.fall_dist = self.cfg["env"]["fallDistance"] + self.fall_penalty = self.cfg["env"]["fallPenalty"] + self.rot_eps = self.cfg["env"]["rotEps"] + + self.vel_obs_scale = 0.2 # scale factor of velocity based observations + self.force_torque_obs_scale = 10.0 # scale factor of velocity based observations + + self.reset_position_noise = self.cfg["env"]["resetPositionNoise"] + self.reset_rotation_noise = self.cfg["env"]["resetRotationNoise"] + self.reset_dof_pos_noise = self.cfg["env"]["resetDofPosRandomInterval"] + self.reset_dof_vel_noise = self.cfg["env"]["resetDofVelRandomInterval"] + + self.qbsofthand_dof_speed_scale = self.cfg["env"]["dofSpeedScale"] + self.use_relative_control = self.cfg["env"]["useRelativeControl"] + self.act_moving_average = self.cfg["env"]["actionsMovingAverage"] + + self.debug_viz = self.cfg["env"]["enableDebugVis"] + + self.max_episode_length = self.cfg["env"]["episodeLength"] + self.reset_time = self.cfg["env"].get("resetTime", -1.0) + self.print_success_stat = self.cfg["env"]["printNumSuccesses"] + self.max_consecutive_successes = self.cfg["env"]["maxConsecutiveSuccesses"] + self.av_factor = self.cfg["env"].get("averFactor", 0.01) + print("Averaging factor: ", self.av_factor) + + self.transition_scale = self.cfg["env"]["transition_scale"] + self.orientation_scale = self.cfg["env"]["orientation_scale"] + + control_freq_inv = self.cfg["env"].get("controlFrequencyInv", 1) + if self.reset_time > 0.0: + self.max_episode_length = int(round(self.reset_time / (control_freq_inv * self.sim_params.dt))) + print("Reset time: ", self.reset_time) + print("New episode length: ", self.max_episode_length) + + self.object_type = self.cfg["env"]["objectType"] + # assert self.object_type in ["block", "egg", "pen"] + + self.ignore_z = (self.object_type == "pen") + + self.asset_files_dict = { + "block": "urdf/objects/cube_multicolor.urdf", + "egg": "mjcf/open_ai_assets/hand/egg.xml", + "pen": "mjcf/open_ai_assets/hand/pen.xml", + # "pot": "mjcf/pot.xml", + "pot": "mjcf/bucket/100454/mobility.urdf" + } + + if "asset" in self.cfg["env"]: + self.asset_files_dict["block"] = self.cfg["env"]["asset"].get("assetFileNameBlock", + self.asset_files_dict["block"]) + self.asset_files_dict["egg"] = self.cfg["env"]["asset"].get("assetFileNameEgg", + self.asset_files_dict["egg"]) + self.asset_files_dict["pen"] = self.cfg["env"]["asset"].get("assetFileNamePen", + self.asset_files_dict["pen"]) + + # can be "openai", "full_no_vel", "full", "full_state" + self.obs_type = self.cfg["env"]["observationType"] + + if not (self.obs_type in ["point_cloud", "full_state"]): + raise Exception( + "Unknown type of observations!\nobservationType should be one of: [point_cloud, full_state]") + + print("Obs type:", self.obs_type) + + self.num_point_cloud_feature_dim = 768 + self.num_obs_dict = { + "point_cloud": 157 + self.num_point_cloud_feature_dim * 3, + "point_cloud_for_distill": 157 + self.num_point_cloud_feature_dim * 3, + "full_state": 157 + } + self.num_hand_obs = 45 + 65 + 21 + 6 # TODO + self.up_axis = 'z' + + self.fingertips = ["right_qbhand_thumb_distal_link", "right_qbhand_index_distal_link", + "right_qbhand_middle_distal_link", "right_qbhand_ring_distal_link", + "right_qbhand_little_distal_link"] + + self.num_fingertips = len(self.fingertips) + + self.use_vel_obs = False + self.fingertip_obs = True + self.asymmetric_obs = self.cfg["env"]["asymmetric_observations"] + + num_states = 0 + + self.cfg["env"]["numObservations"] = self.num_obs_dict[self.obs_type] + self.cfg["env"]["numStates"] = num_states + self.cfg["env"]["numActions"] = 15 + 6 # hand dof + hand pos + hand rot + + super().__init__(cfg, rl_device, sim_device, graphics_device_id, headless, virtual_screen_capture, force_render) + + if self.obs_type in ["point_cloud"]: + from PIL import Image as Im + # from pointnet2_ops import pointnet2_utils + + self.camera_debug = self.cfg["env"].get("cameraDebug", False) + self.point_cloud_debug = self.cfg["env"].get("pointCloudDebug", False) + + if self.viewer != None: + cam_pos = gymapi.Vec3(10.0, 5.0, 1.0) + cam_target = gymapi.Vec3(6.0, 5.0, 0.0) + self.gym.viewer_camera_look_at(self.viewer, None, cam_pos, cam_target) + + # get gym GPU state tensors + actor_root_state_tensor = self.gym.acquire_actor_root_state_tensor(self.sim) + dof_state_tensor = self.gym.acquire_dof_state_tensor(self.sim) + rigid_body_tensor = self.gym.acquire_rigid_body_state_tensor(self.sim) + + # sensor_tensor = self.gym.acquire_force_sensor_tensor(self.sim) + # self.vec_sensor_tensor = gymtorch.wrap_tensor(sensor_tensor).view(self.num_envs, self.num_fingertips * 6) + + dof_force_tensor = self.gym.acquire_dof_force_tensor(self.sim) + self.dof_force_tensor = gymtorch.wrap_tensor(dof_force_tensor).view(self.num_envs, + self.num_qbsofthand_dofs + self.num_object_dofs * 2) + self.dof_force_tensor = self.dof_force_tensor[:, :15] + + self.gym.refresh_actor_root_state_tensor(self.sim) + self.gym.refresh_dof_state_tensor(self.sim) + self.gym.refresh_rigid_body_state_tensor(self.sim) + + # create some wrapper tensors for different slices + self.qbsofthand_default_dof_pos = torch.zeros(self.num_qbsofthand_dofs, dtype=torch.float, device=self.device) + # self.qbsofthand_default_dof_pos = to_torch([0.0, 0.0, -0, -0, -0, -0, -0, -0, + # -0, -0, -0, -0, -0, -0, -0, -0, + # -0, -0, -0, -1.04, 1.2, 0., 0, -1.57], dtype=torch.float, device=self.device) + + self.dof_state = gymtorch.wrap_tensor(dof_state_tensor) + self.qbsofthand_dof_state = self.dof_state.view(self.num_envs, -1, 2)[:, :self.num_qbsofthand_dofs] + self.qbsofthand_dof_pos = self.qbsofthand_dof_state[..., 0] + self.qbsofthand_dof_vel = self.qbsofthand_dof_state[..., 1] + + self.rigid_body_states = gymtorch.wrap_tensor(rigid_body_tensor).view(self.num_envs, -1, 13) # (256, 22, 13) + self.num_bodies = self.rigid_body_states.shape[1] # 22 + + self.root_state_tensor = gymtorch.wrap_tensor(actor_root_state_tensor).view(-1, 13) + self.hand_positions = self.root_state_tensor[:, 0:3] + self.hand_orientations = self.root_state_tensor[:, 3:7] + self.hand_linvels = self.root_state_tensor[:, 7:10] + self.hand_angvels = self.root_state_tensor[:, 10:13] + self.saved_root_tensor = self.root_state_tensor.clone() + + self.num_dofs = self.gym.get_sim_dof_count(self.sim) // self.num_envs + self.prev_targets = torch.zeros((self.num_envs, self.num_dofs), dtype=torch.float, device=self.device) + self.cur_targets = torch.zeros((self.num_envs, self.num_dofs), dtype=torch.float, device=self.device) + + self.global_indices = torch.arange(self.num_envs * 3, dtype=torch.int32, device=self.device).view(self.num_envs, + -1) + self.x_unit_tensor = to_torch([1, 0, 0], dtype=torch.float, device=self.device).repeat((self.num_envs, 1)) + self.y_unit_tensor = to_torch([0, 1, 0], dtype=torch.float, device=self.device).repeat((self.num_envs, 1)) + self.z_unit_tensor = to_torch([0, 0, 1], dtype=torch.float, device=self.device).repeat((self.num_envs, 1)) + + self.reset_goal_buf = self.reset_buf.clone() + self.successes = torch.zeros(self.num_envs, dtype=torch.float, device=self.device) + self.consecutive_successes = torch.zeros(1, dtype=torch.float, device=self.device) + + self.av_factor = to_torch(self.av_factor, dtype=torch.float, device=self.device) + self.apply_forces = torch.zeros((self.num_envs, self.num_bodies, 3), device=self.device, dtype=torch.float) + self.apply_torque = torch.zeros((self.num_envs, self.num_bodies, 3), device=self.device, dtype=torch.float) + + self.total_successes = 0 + self.total_resets = 0 + + def create_sim(self): + """ + Allocates which device will simulate and which device will render the scene. Defines the simulation type to be used + """ + self.dt = self.sim_params.dt + self.up_axis_idx = self.set_sim_params_up_axis(self.sim_params, self.up_axis) + + self.sim = super().create_sim(self.device_id, self.graphics_device_id, self.physics_engine, self.sim_params) + self._create_ground_plane() + self._create_envs(self.num_envs, self.cfg["env"]['envSpacing'], int(np.sqrt(self.num_envs))) + + def _create_ground_plane(self): + """ + Adds ground plane to simulation + """ + plane_params = gymapi.PlaneParams() + plane_params.normal = gymapi.Vec3(0.0, 0.0, 1.0) + self.gym.add_ground(self.sim, plane_params) + + def _create_envs(self, num_envs, spacing, num_per_row): + """ + Create multiple parallel isaacgym environments + + Args: + num_envs (int): The total number of environment + + spacing (float): Specifies half the side length of the square area occupied by each environment + + num_per_row (int): Specify how many environments in a row + """ + lower = gymapi.Vec3(-spacing, -spacing, 0.0) + upper = gymapi.Vec3(spacing, spacing, spacing) + + # get rofunc path from rofunc package metadata + rofunc_path = get_rofunc_path() + asset_root = os.path.join(rofunc_path, "simulator/assets") + qbsofthand_asset_file = "mjcf/qbhand_no_virtual_right.xml" + table_texture_files = os.path.join(asset_root, "textures/texture_stone_stone_texture_0.jpg") + table_texture_handle = self.gym.create_texture_from_file(self.sim, table_texture_files) + + if "asset" in self.cfg["env"]: + asset_root = self.cfg["env"]["asset"].get("assetRoot", asset_root) + qbsofthand_asset_file = self.cfg["env"]["asset"].get("assetFileName", qbsofthand_asset_file) + + object_asset_file = self.asset_files_dict[self.object_type] + + # load shadow hand_ asset + asset_options = gymapi.AssetOptions() + asset_options.flip_visual_attachments = False + asset_options.fix_base_link = False + asset_options.collapse_fixed_joints = True + asset_options.disable_gravity = True + asset_options.thickness = 0.001 + asset_options.angular_damping = 100 + asset_options.linear_damping = 100 + + if self.physics_engine == gymapi.SIM_PHYSX: + asset_options.use_physx_armature = True + asset_options.default_dof_drive_mode = gymapi.DOF_MODE_NONE + + qbsofthand_asset = self.gym.load_asset(self.sim, asset_root, qbsofthand_asset_file, asset_options) + + self.num_qbsofthand_bodies = self.gym.get_asset_rigid_body_count(qbsofthand_asset) + self.num_qbsofthand_shapes = self.gym.get_asset_rigid_shape_count(qbsofthand_asset) + self.num_qbsofthand_dofs = self.gym.get_asset_dof_count(qbsofthand_asset) + self.num_qbsofthand_actuators = self.gym.get_asset_actuator_count(qbsofthand_asset) + self.num_qbsofthand_tendons = self.gym.get_asset_tendon_count(qbsofthand_asset) + beauty_print(f"self.num_qbsofthand_bodies: {self.num_qbsofthand_bodies}") + beauty_print(f"self.num_qbsofthand_shapes: {self.num_qbsofthand_shapes}") + beauty_print(f"self.num_qbsofthand_dofs: {self.num_qbsofthand_dofs}") + beauty_print(f"self.num_qbsofthand_actuators: {self.num_qbsofthand_actuators}") + beauty_print(f"self.num_qbsofthand_tendons: {self.num_qbsofthand_tendons}") + + actuated_dof_names = [self.gym.get_asset_actuator_joint_name(qbsofthand_asset, i) for i in + range(self.num_qbsofthand_actuators)] + self.actuated_dof_indices = [self.gym.find_asset_dof_index(qbsofthand_asset, name) for name in + actuated_dof_names] + beauty_print(f"self.actuated_dof_indices: {self.actuated_dof_indices}") + + # set qbsofthand dof properties + qbsofthand_dof_props = self.gym.get_asset_dof_properties(qbsofthand_asset) + + self.qbsofthand_dof_lower_limits = [] + self.qbsofthand_dof_upper_limits = [] + self.qbsofthand_dof_default_pos = [] + self.qbsofthand_dof_default_vel = [] + + for i in range(self.num_qbsofthand_dofs): + self.qbsofthand_dof_lower_limits.append(qbsofthand_dof_props['lower'][i]) + self.qbsofthand_dof_upper_limits.append(qbsofthand_dof_props['upper'][i]) + self.qbsofthand_dof_default_pos.append(0.0) + self.qbsofthand_dof_default_vel.append(0.0) + + self.actuated_dof_indices = to_torch(self.actuated_dof_indices, dtype=torch.long, device=self.device) + self.qbsofthand_dof_lower_limits = to_torch(self.qbsofthand_dof_lower_limits, device=self.device) + self.qbsofthand_dof_upper_limits = to_torch(self.qbsofthand_dof_upper_limits, device=self.device) + self.qbsofthand_dof_default_pos = to_torch(self.qbsofthand_dof_default_pos, device=self.device) + self.qbsofthand_dof_default_vel = to_torch(self.qbsofthand_dof_default_vel, device=self.device) + + # load manipulated object and goal assets + object_asset_options = gymapi.AssetOptions() + object_asset_options.density = 500 + object_asset_options.fix_base_link = False + # object_asset_options.collapse_fixed_joints = True + # object_asset_options.disable_gravity = True + object_asset_options.use_mesh_materials = True + object_asset_options.mesh_normal_mode = gymapi.COMPUTE_PER_VERTEX + object_asset_options.override_com = True + object_asset_options.override_inertia = True + object_asset_options.vhacd_enabled = True + object_asset_options.vhacd_params = gymapi.VhacdParams() + object_asset_options.vhacd_params.resolution = 100000 + object_asset_options.default_dof_drive_mode = gymapi.DOF_MODE_NONE + + object_asset = self.gym.load_asset(self.sim, asset_root, object_asset_file, object_asset_options) + block_asset_file = "urdf/objects/cube_multicolor1.urdf" + block_asset = self.gym.load_asset(self.sim, asset_root, block_asset_file, object_asset_options) + + object_asset_options.disable_gravity = True + goal_asset = self.gym.load_asset(self.sim, asset_root, object_asset_file, object_asset_options) + + self.num_object_bodies = self.gym.get_asset_rigid_body_count(object_asset) + self.num_object_shapes = self.gym.get_asset_rigid_shape_count(object_asset) + + # set object dof properties + self.num_object_dofs = self.gym.get_asset_dof_count(object_asset) + object_dof_props = self.gym.get_asset_dof_properties(object_asset) + + self.object_dof_lower_limits = [] + self.object_dof_upper_limits = [] + + for i in range(self.num_object_dofs): + self.object_dof_lower_limits.append(object_dof_props['lower'][i]) + self.object_dof_upper_limits.append(object_dof_props['upper'][i]) + + self.object_dof_lower_limits = to_torch(self.object_dof_lower_limits, device=self.device) + self.object_dof_upper_limits = to_torch(self.object_dof_upper_limits, device=self.device) + + # create table asset + table_dims = gymapi.Vec3(1.0, 1.0, 0.6) + asset_options = gymapi.AssetOptions() + asset_options.fix_base_link = True + asset_options.flip_visual_attachments = True + asset_options.collapse_fixed_joints = True + asset_options.disable_gravity = True + asset_options.thickness = 0.001 + + table_asset = self.gym.create_box(self.sim, table_dims.x, table_dims.y, table_dims.z, gymapi.AssetOptions()) + + qbsofthand_start_pose = gymapi.Transform() + qbsofthand_start_pose.p = gymapi.Vec3(0.55, 0.2, 0.8) + qbsofthand_start_pose.r = gymapi.Quat().from_euler_zyx(1.57, 0, 1.57) + + object_start_pose = gymapi.Transform() + object_start_pose.p = gymapi.Vec3(0.0, 0.2, 0.6) + object_start_pose.r = gymapi.Quat().from_euler_zyx(0, 0, 0) + + block_start_pose = gymapi.Transform() + block_start_pose.p = gymapi.Vec3(0.0, -0.2, 0.6) + block_start_pose.r = gymapi.Quat().from_euler_zyx(1.57, 1.57, 0) + + if self.object_type == "pen": + object_start_pose.p.z = qbsofthand_start_pose.p.z + 0.02 + + self.goal_displacement = gymapi.Vec3(-0., 0.0, 10) + self.goal_displacement_tensor = to_torch( + [self.goal_displacement.x, self.goal_displacement.y, self.goal_displacement.z], device=self.device) + goal_start_pose = gymapi.Transform() + goal_start_pose.p = object_start_pose.p + self.goal_displacement + + goal_start_pose.p.z -= 0.0 + + table_pose = gymapi.Transform() + table_pose.p = gymapi.Vec3(0.0, 0.0, 0.5 * table_dims.z) + table_pose.r = gymapi.Quat().from_euler_zyx(-0., 0, 0) + + # compute aggregate size + max_agg_bodies = self.num_qbsofthand_bodies + 3 * self.num_object_bodies + 1 + max_agg_shapes = self.num_qbsofthand_shapes + 3 * self.num_object_shapes + 1 + + self.qbsofthands = [] + self.envs = [] + + self.object_init_state = [] + self.hand_start_states = [] + + self.hand_indices = [] + self.fingertip_indices = [] + self.object_indices = [] + self.goal_object_indices = [] + self.table_indices = [] + self.block_indices = [] + + self.fingertip_handles = [self.gym.find_asset_rigid_body_index(qbsofthand_asset, name) for name in + self.fingertips] + + # # create fingertip force sensors, if needed + # sensor_pose = gymapi.Transform() + # for ft_handle in self.fingertip_handles: + # self.gym.create_asset_force_sensor(qbsofthand_asset, ft_handle, sensor_pose) + + if self.obs_type in ["point_cloud"]: + self.cameras = [] + self.camera_tensors = [] + self.camera_view_matrixs = [] + self.camera_proj_matrixs = [] + + self.camera_props = gymapi.CameraProperties() + self.camera_props.width = 256 + self.camera_props.height = 256 + self.camera_props.enable_tensors = True + + self.env_origin = torch.zeros((self.num_envs, 3), device=self.device, dtype=torch.float) + self.pointCloudDownsampleNum = 768 + self.camera_u = torch.arange(0, self.camera_props.width, device=self.device) + self.camera_v = torch.arange(0, self.camera_props.height, device=self.device) + + self.camera_v2, self.camera_u2 = torch.meshgrid(self.camera_v, self.camera_u, indexing='ij') + + if self.point_cloud_debug: + import open3d as o3d + from bidexhands.utils.o3dviewer import PointcloudVisualizer + self.pointCloudVisualizer = PointcloudVisualizer() + self.pointCloudVisualizerInitialized = False + self.o3d_pc = o3d.geometry.PointCloud() + else: + self.pointCloudVisualizer = None + + for i in range(self.num_envs): + # create env instance + env_ptr = self.gym.create_env( + self.sim, lower, upper, num_per_row + ) + + if self.aggregate_mode >= 1: + self.gym.begin_aggregate(env_ptr, max_agg_bodies, max_agg_shapes, True) + + # add hand - collision filter = -1 to use asset collision filters set in mjcf loader + qbsofthand_actor = self.gym.create_actor(env_ptr, qbsofthand_asset, qbsofthand_start_pose, "hand", i, 2) + + self.hand_start_states.append( + [qbsofthand_start_pose.p.x, qbsofthand_start_pose.p.y, qbsofthand_start_pose.p.z, + qbsofthand_start_pose.r.x, qbsofthand_start_pose.r.y, qbsofthand_start_pose.r.z, + qbsofthand_start_pose.r.w, + 0, 0, 0, 0, 0, 0]) + + self.gym.set_actor_dof_properties(env_ptr, qbsofthand_actor, qbsofthand_dof_props) + hand_idx = self.gym.get_actor_index(env_ptr, qbsofthand_actor, gymapi.DOMAIN_SIM) + self.hand_indices.append(hand_idx) + + # randomize colors and textures for rigid body + num_bodies = self.gym.get_actor_rigid_body_count(env_ptr, qbsofthand_actor) + hand_rigid_body_index = [[0], [1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12], [13, 14, 15]] + + for n in self.agent_index[0]: + colorx = random.uniform(0, 1) + colory = random.uniform(0, 1) + colorz = random.uniform(0, 1) + for m in n: + for o in hand_rigid_body_index[m]: + self.gym.set_rigid_body_color(env_ptr, qbsofthand_actor, o, gymapi.MESH_VISUAL, + gymapi.Vec3(colorx, colory, colorz)) + + # # create fingertip force-torque sensors + # self.gym.enable_actor_dof_force_sensors(env_ptr, qbsofthand_actor) + + # add object + object_handle = self.gym.create_actor(env_ptr, object_asset, object_start_pose, "object", i, 0, 0) + self.object_init_state.append([object_start_pose.p.x, object_start_pose.p.y, object_start_pose.p.z, + object_start_pose.r.x, object_start_pose.r.y, object_start_pose.r.z, + object_start_pose.r.w, + 0, 0, 0, 0, 0, 0]) + object_idx = self.gym.get_actor_index(env_ptr, object_handle, gymapi.DOMAIN_SIM) + self.object_indices.append(object_idx) + # self.gym.set_actor_scale(env_ptr, object_handle, 0.3) + + block_handle = self.gym.create_actor(env_ptr, block_asset, block_start_pose, "block", i, 0, 0) + block_idx = self.gym.get_actor_index(env_ptr, block_handle, gymapi.DOMAIN_SIM) + self.block_indices.append(block_idx) + + # add goal object + goal_handle = self.gym.create_actor(env_ptr, goal_asset, goal_start_pose, "goal_object", i + self.num_envs, + 0, 0) + goal_object_idx = self.gym.get_actor_index(env_ptr, goal_handle, gymapi.DOMAIN_SIM) + self.goal_object_indices.append(goal_object_idx) + # self.gym.set_actor_scale(env_ptr, goal_handle, 0.3) + + # add table + table_handle = self.gym.create_actor(env_ptr, table_asset, table_pose, "table", i, -1, 0) + self.gym.set_rigid_body_texture(env_ptr, table_handle, 0, gymapi.MESH_VISUAL, table_texture_handle) + table_idx = self.gym.get_actor_index(env_ptr, table_handle, gymapi.DOMAIN_SIM) + self.table_indices.append(table_idx) + + if self.object_type != "block": + self.gym.set_rigid_body_color( + env_ptr, object_handle, 0, gymapi.MESH_VISUAL, gymapi.Vec3(0.6, 0.72, 0.98)) + self.gym.set_rigid_body_color( + env_ptr, goal_handle, 0, gymapi.MESH_VISUAL, gymapi.Vec3(0.6, 0.72, 0.98)) + + if self.obs_type in ["point_cloud"]: + camera_handle = self.gym.create_camera_sensor(env_ptr, self.camera_props) + self.gym.set_camera_location(camera_handle, env_ptr, gymapi.Vec3(0.25, -0., 1.0), + gymapi.Vec3(-0.24, -0., 0)) + camera_tensor = self.gym.get_camera_image_gpu_tensor(self.sim, env_ptr, camera_handle, + gymapi.IMAGE_DEPTH) + torch_cam_tensor = gymtorch.wrap_tensor(camera_tensor) + cam_vinv = torch.inverse( + (torch.tensor(self.gym.get_camera_view_matrix(self.sim, env_ptr, camera_handle)))).to(self.device) + cam_proj = torch.tensor(self.gym.get_camera_proj_matrix(self.sim, env_ptr, camera_handle), + device=self.device) + + origin = self.gym.get_env_origin(env_ptr) + self.env_origin[i][0] = origin.x + self.env_origin[i][1] = origin.y + self.env_origin[i][2] = origin.z + self.camera_tensors.append(torch_cam_tensor) + self.camera_view_matrixs.append(cam_vinv) + self.camera_proj_matrixs.append(cam_proj) + self.cameras.append(camera_handle) + + if self.aggregate_mode > 0: + self.gym.end_aggregate(env_ptr) + + self.envs.append(env_ptr) + self.qbsofthands.append(qbsofthand_actor) + + self.object_init_state = to_torch(self.object_init_state, device=self.device, dtype=torch.float).view( + self.num_envs, 13) + self.goal_states = self.object_init_state.clone() + self.goal_init_state = self.goal_states.clone() + self.hand_start_states = to_torch(self.hand_start_states, device=self.device).view(self.num_envs, 13) + + self.fingertip_handles = to_torch(self.fingertip_handles, dtype=torch.long, device=self.device) + + self.hand_indices = to_torch(self.hand_indices, dtype=torch.long, device=self.device) + self.object_indices = to_torch(self.object_indices, dtype=torch.long, device=self.device) + self.goal_object_indices = to_torch(self.goal_object_indices, dtype=torch.long, device=self.device) + self.table_indices = to_torch(self.table_indices, dtype=torch.long, device=self.device) + self.block_indices = to_torch(self.block_indices, dtype=torch.long, device=self.device) + + def compute_reward(self, actions): + """ + Compute the reward of all environment. The core function is compute_hand_reward( + self.rew_buf, self.reset_buf, self.reset_goal_buf, self.progress_buf, self.successes, self.consecutive_successes, + self.max_episode_length, self.object_pos, self.object_rot, self.goal_pos, self.goal_rot, self.block_right_handle_pos, self.block_left_handle_pos, + self.left_hand_pos, self.right_hand_pos, self.right_hand_ff_pos, self.right_hand_mf_pos, self.right_hand_rf_pos, self.right_hand_lf_pos, self.right_hand_th_pos, + self.left_hand_ff_pos, self.left_hand_mf_pos, self.left_hand_rf_pos, self.left_hand_lf_pos, self.left_hand_th_pos, + self.dist_reward_scale, self.rot_reward_scale, self.rot_eps, self.actions, self.action_penalty_scale, + self.success_tolerance, self.reach_goal_bonus, self.fall_dist, self.fall_penalty, + self.max_consecutive_successes, self.av_factor, (self.object_type == "pen") + ) + , which we will introduce in detail there + + Args: + actions (tensor): Actions of agents in the all environment + """ + self.rew_buf[:], self.reset_buf[:], self.reset_goal_buf[:], self.progress_buf[:], self.successes[ + :], self.consecutive_successes[ + :] = compute_hand_reward( + self.rew_buf, self.reset_buf, self.reset_goal_buf, self.progress_buf, self.successes, + self.consecutive_successes, + self.max_episode_length, self.object_pos, self.object_rot, self.goal_pos, self.goal_rot, + self.block_right_handle_pos, self.right_hand_pos, self.right_hand_ff_pos, self.right_hand_mf_pos, + self.right_hand_rf_pos, self.right_hand_lf_pos, self.right_hand_th_pos, + self.dist_reward_scale, self.rot_reward_scale, self.rot_eps, self.actions, self.action_penalty_scale, + self.success_tolerance, self.reach_goal_bonus, self.fall_dist, self.fall_penalty, + self.max_consecutive_successes, self.av_factor, (self.object_type == "pen") + ) + + self.extras['successes'] = self.successes + self.extras['consecutive_successes'] = self.consecutive_successes + + if self.print_success_stat: + self.total_resets = self.total_resets + self.reset_buf.sum() + direct_average_successes = self.total_successes + self.successes.sum() + self.total_successes = self.total_successes + (self.successes * self.reset_buf).sum() + + # The direct average shows the overall result more quickly, but slightly undershoots long term + # policy performance. + print("Direct average consecutive successes = {:.1f}".format( + direct_average_successes / (self.total_resets + self.num_envs))) + if self.total_resets > 0: + print("Post-Reset average consecutive successes = {:.1f}".format( + self.total_successes / self.total_resets)) + + def compute_observations(self): + """ + Compute the observations of all environment. The core function is self.compute_full_state(True), + which we will introduce in detail there + + """ + self.gym.refresh_dof_state_tensor(self.sim) + self.gym.refresh_actor_root_state_tensor(self.sim) + self.gym.refresh_rigid_body_state_tensor(self.sim) + self.gym.refresh_force_sensor_tensor(self.sim) + self.gym.refresh_dof_force_tensor(self.sim) + + if self.obs_type in ["point_cloud"]: + self.gym.render_all_camera_sensors(self.sim) + self.gym.start_access_image_tensors(self.sim) + + self.object_pose = self.root_state_tensor[self.object_indices, 0:7] + self.object_pos = self.root_state_tensor[self.object_indices, 0:3] + self.object_rot = self.root_state_tensor[self.object_indices, 3:7] + self.object_linvel = self.root_state_tensor[self.object_indices, 7:10] + self.object_angvel = self.root_state_tensor[self.object_indices, 10:13] + + self.block_right_handle_pos = self.rigid_body_states[:, 16 + 1, 0:3] + self.block_right_handle_rot = self.rigid_body_states[:, 16 + 1, 3:7] + self.block_right_handle_pos = self.block_right_handle_pos + quat_apply(self.block_right_handle_rot, + to_torch([0, 1, 0], + device=self.device).repeat( + self.num_envs, 1) * 0.) + self.block_right_handle_pos = self.block_right_handle_pos + quat_apply(self.block_right_handle_rot, + to_torch([1, 0, 0], + device=self.device).repeat( + self.num_envs, 1) * 0.0) + self.block_right_handle_pos = self.block_right_handle_pos + quat_apply(self.block_right_handle_rot, + to_torch([0, 0, 1], + device=self.device).repeat( + self.num_envs, 1) * 0.0) + + + self.right_hand_pos = self.rigid_body_states[:, 0, 0:3] + self.right_hand_rot = self.rigid_body_states[:, 0, 3:7] + self.right_hand_pos = self.right_hand_pos + quat_apply(self.right_hand_rot, + to_torch([0, 0, 1], device=self.device).repeat( + self.num_envs, 1) * 0.08) + self.right_hand_pos = self.right_hand_pos + quat_apply(self.right_hand_rot, + to_torch([0, 1, 0], device=self.device).repeat( + self.num_envs, 1) * -0.02) + + # right hand finger + self.right_hand_th_pos = self.rigid_body_states[:, 3, 0:3] + self.right_hand_th_rot = self.rigid_body_states[:, 3, 3:7] + self.right_hand_th_pos = self.right_hand_th_pos + quat_apply(self.right_hand_th_rot, + to_torch([0, 0, 1], device=self.device).repeat( + self.num_envs, 1) * 0.02) + self.right_hand_ff_pos = self.rigid_body_states[:, 6, 0:3] + self.right_hand_ff_rot = self.rigid_body_states[:, 6, 3:7] + self.right_hand_ff_pos = self.right_hand_ff_pos + quat_apply(self.right_hand_ff_rot, + to_torch([0, 0, 1], device=self.device).repeat( + self.num_envs, 1) * 0.02) + self.right_hand_mf_pos = self.rigid_body_states[:, 9, 0:3] + self.right_hand_mf_rot = self.rigid_body_states[:, 9, 3:7] + self.right_hand_mf_pos = self.right_hand_mf_pos + quat_apply(self.right_hand_mf_rot, + to_torch([0, 0, 1], device=self.device).repeat( + self.num_envs, 1) * 0.02) + self.right_hand_rf_pos = self.rigid_body_states[:, 12, 0:3] + self.right_hand_rf_rot = self.rigid_body_states[:, 12, 3:7] + self.right_hand_rf_pos = self.right_hand_rf_pos + quat_apply(self.right_hand_rf_rot, + to_torch([0, 0, 1], device=self.device).repeat( + self.num_envs, 1) * 0.02) + self.right_hand_lf_pos = self.rigid_body_states[:, 15, 0:3] + self.right_hand_lf_rot = self.rigid_body_states[:, 15, 3:7] + self.right_hand_lf_pos = self.right_hand_lf_pos + quat_apply(self.right_hand_lf_rot, + to_torch([0, 0, 1], device=self.device).repeat( + self.num_envs, 1) * 0.02) + + self.goal_pos = to_torch([-0.3, 0, 0.6], dtype=torch.float, device=self.device).repeat((self.num_envs, 1)) + self.goal_rot = self.goal_states[:, 3:7] + + self.fingertip_state = self.rigid_body_states[:, self.fingertip_handles][:, :, 0:13] + self.fingertip_pos = self.rigid_body_states[:, self.fingertip_handles][:, :, 0:3] + + if self.obs_type == "full_state": + self.compute_full_state() + elif self.obs_type == "point_cloud": + self.compute_point_cloud_observation() + + if self.asymmetric_obs: + self.compute_full_state(True) + + def compute_full_state(self, asymm_obs=False): + """ + Compute the observations of all environment. The observation is composed of three parts: + the state values of the left and right hands, and the information of objects and target. + The state values of the left and right hands were the same for each task, including hand + joint and finger positions, velocity, and force information. The detail 157-dimensional + observational space as shown in below: + + Index Description + 0 - 14 right shadow hand dof position + 15 - 29 right shadow hand dof velocity + 30 - 44 right shadow hand dof force + 45 - 109 right shadow hand fingertip pose, linear velocity, angle velocity (5 x 13) + 110 - 112 right shadow hand base position + 113 - 115 right shadow hand base rotation + 116 - 130 right shadow hand actions + 131 - 137 object pose + 138 - 140 object linear velocity + 141 - 143 object angle velocity + 144 - 146 block right handle position + 147 - 150 block right handle rotation + """ + num_ft_states = 13 * int(self.num_fingertips) # 65 + + self.obs_buf[:, 0:self.num_qbsofthand_dofs] = unscale(self.qbsofthand_dof_pos, + self.qbsofthand_dof_lower_limits, + self.qbsofthand_dof_upper_limits) + self.obs_buf[:, + self.num_qbsofthand_dofs:2 * self.num_qbsofthand_dofs] = self.vel_obs_scale * self.qbsofthand_dof_vel + self.obs_buf[:, + 2 * self.num_qbsofthand_dofs:3 * self.num_qbsofthand_dofs] = self.force_torque_obs_scale * self.dof_force_tensor[ + :, :15] + + fingertip_obs_start = 45 + self.obs_buf[:, fingertip_obs_start:fingertip_obs_start + num_ft_states] = self.fingertip_state.reshape( + self.num_envs, num_ft_states) + + hand_pose_start = fingertip_obs_start + 65 + self.obs_buf[:, hand_pose_start:hand_pose_start + 3] = self.right_hand_pos + self.obs_buf[:, hand_pose_start + 3:hand_pose_start + 4] = \ + get_euler_xyz(self.hand_orientations[self.hand_indices, :])[0].unsqueeze(-1) + self.obs_buf[:, hand_pose_start + 4:hand_pose_start + 5] = \ + get_euler_xyz(self.hand_orientations[self.hand_indices, :])[1].unsqueeze(-1) + self.obs_buf[:, hand_pose_start + 5:hand_pose_start + 6] = \ + get_euler_xyz(self.hand_orientations[self.hand_indices, :])[2].unsqueeze(-1) + + action_obs_start = hand_pose_start + 6 + self.obs_buf[:, action_obs_start:action_obs_start + 21] = self.actions[:, :21] + + obj_obs_start = action_obs_start + 21 # 131 + self.obs_buf[:, obj_obs_start:obj_obs_start + 7] = self.object_pose + self.obs_buf[:, obj_obs_start + 7:obj_obs_start + 10] = self.object_linvel + self.obs_buf[:, obj_obs_start + 10:obj_obs_start + 13] = self.vel_obs_scale * self.object_angvel + self.obs_buf[:, obj_obs_start + 13:obj_obs_start + 16] = self.block_right_handle_pos + self.obs_buf[:, obj_obs_start + 16:obj_obs_start + 20] = self.block_right_handle_rot + + def compute_point_cloud_observation(self, collect_demonstration=False): + """ + Compute the observations of all environment. The observation is composed of three parts: + the state values of the left and right hands, and the information of objects and target. + The state values of the left and right hands were the same for each task, including hand + joint and finger positions, velocity, and force information. The detail 157-dimensional + observational space as shown in below: + + Index Description + 0 - 23 right shadow hand dof position + 24 - 47 right shadow hand dof velocity + 48 - 71 right shadow hand dof force + 72 - 136 right shadow hand fingertip pose, linear velocity, angle velocity (5 x 13) + 137 - 166 right shadow hand fingertip force, torque (5 x 6) + 167 - 169 right shadow hand base position + 170 - 172 right shadow hand base rotation + 173 - 198 right shadow hand actions + 199 - 222 left shadow hand dof position + 223 - 246 left shadow hand dof velocity + 247 - 270 left shadow hand dof force + 271 - 335 left shadow hand fingertip pose, linear velocity, angle velocity (5 x 13) + 336 - 365 left shadow hand fingertip force, torque (5 x 6) + 366 - 368 left shadow hand base position + 369 - 371 left shadow hand base rotation + 372 - 397 left shadow hand actions + 398 - 404 object pose + 405 - 407 object linear velocity + 408 - 410 object angle velocity + 411 - 413 block right handle position + 414 - 417 block right handle rotation + 418 - 420 block left handle position + 421 - 424 block left handle rotation + """ + num_ft_states = 13 * int(self.num_fingertips / 2) # 65 + num_ft_force_torques = 6 * int(self.num_fingertips / 2) # 30 + + self.obs_buf[:, 0:self.num_qbsofthand_dofs] = unscale(self.qbsofthand_dof_pos, + self.qbsofthand_dof_lower_limits, + self.qbsofthand_dof_upper_limits) + self.obs_buf[:, + self.num_qbsofthand_dofs:2 * self.num_qbsofthand_dofs] = self.vel_obs_scale * self.qbsofthand_dof_vel + self.obs_buf[:, + 2 * self.num_qbsofthand_dofs:3 * self.num_qbsofthand_dofs] = self.force_torque_obs_scale * self.dof_force_tensor[ + :, :24] + + fingertip_obs_start = 72 # 168 = 157 + 11 + self.obs_buf[:, fingertip_obs_start:fingertip_obs_start + num_ft_states] = self.fingertip_state.reshape( + self.num_envs, num_ft_states) + self.obs_buf[:, fingertip_obs_start + num_ft_states:fingertip_obs_start + num_ft_states + + num_ft_force_torques] = self.force_torque_obs_scale * self.vec_sensor_tensor[ + :, + :30] + + hand_pose_start = fingertip_obs_start + 65 + self.obs_buf[:, hand_pose_start:hand_pose_start + 3] = self.right_hand_pos + self.obs_buf[:, hand_pose_start + 3:hand_pose_start + 4] = \ + get_euler_xyz(self.hand_orientations[self.hand_indices, :])[0].unsqueeze(-1) + self.obs_buf[:, hand_pose_start + 4:hand_pose_start + 5] = \ + get_euler_xyz(self.hand_orientations[self.hand_indices, :])[1].unsqueeze(-1) + self.obs_buf[:, hand_pose_start + 5:hand_pose_start + 6] = \ + get_euler_xyz(self.hand_orientations[self.hand_indices, :])[2].unsqueeze(-1) + + action_obs_start = hand_pose_start + 6 + self.obs_buf[:, action_obs_start:action_obs_start + 15] = self.actions[:, :15] + + point_clouds = torch.zeros((self.num_envs, self.pointCloudDownsampleNum, 3), device=self.device) + + if self.camera_debug: + import matplotlib.pyplot as plt + self.camera_rgba_debug_fig = plt.figure("CAMERA_RGBD_DEBUG") + camera_rgba_image = self.camera_visulization(is_depth_image=False) + plt.imshow(camera_rgba_image) + plt.pause(1e-9) + + for i in range(self.num_envs): + # Here is an example. In practice, it's better not to convert tensor from GPU to CPU + points = depth_image_to_point_cloud_GPU(self.camera_tensors[i], self.camera_view_matrixs[i], + self.camera_proj_matrixs[i], self.camera_u2, self.camera_v2, + self.camera_props.width, self.camera_props.height, 10, self.device) + + if points.shape[0] > 0: + selected_points = self.sample_points(points, sample_num=self.pointCloudDownsampleNum, + sample_mathed='random') + else: + selected_points = torch.zeros((self.num_envs, self.pointCloudDownsampleNum, 3), device=self.device) + + point_clouds[i] = selected_points + + if self.pointCloudVisualizer != None: + import open3d as o3d + points = point_clouds[0, :, :3].cpu().numpy() + # colors = plt.get_cmap()(point_clouds[0, :, 3].cpu().numpy()) + self.o3d_pc.points = o3d.utility.Vector3dVector(points) + # self.o3d_pc.colors = o3d.utility.Vector3dVector(colors[..., :3]) + + if self.pointCloudVisualizerInitialized == False: + self.pointCloudVisualizer.add_geometry(self.o3d_pc) + self.pointCloudVisualizerInitialized = True + else: + self.pointCloudVisualizer.update(self.o3d_pc) + + self.gym.end_access_image_tensors(self.sim) + point_clouds -= self.env_origin.view(self.num_envs, 1, 3) + + point_clouds_start = obj_obs_start + 27 + self.obs_buf[:, point_clouds_start:].copy_(point_clouds.view(self.num_envs, self.pointCloudDownsampleNum * 3)) + + def reset_target_pose(self, env_ids, apply_reset=False): + """ + Reset and randomize the goal pose + + Args: + env_ids (tensor): The index of the environment that needs to reset goal pose + + apply_reset (bool): Whether to reset the goal directly here, usually used + when the same task wants to complete multiple goals + + """ + rand_floats = torch_rand_float(-1.0, 1.0, (len(env_ids), 4), device=self.device) + + new_rot = randomize_rotation(rand_floats[:, 0], rand_floats[:, 1], self.x_unit_tensor[env_ids], + self.y_unit_tensor[env_ids]) + + self.goal_states[env_ids, 0:3] = self.goal_init_state[env_ids, 0:3] + # self.goal_states[env_ids, 1] -= 0.25 + self.goal_states[env_ids, 2] += 10.0 + + # self.goal_states[env_ids, 3:7] = new_rot + self.root_state_tensor[self.goal_object_indices[env_ids], 0:3] = self.goal_states[env_ids, + 0:3] + self.goal_displacement_tensor + self.root_state_tensor[self.goal_object_indices[env_ids], 3:7] = self.goal_states[env_ids, 3:7] + self.root_state_tensor[self.goal_object_indices[env_ids], 7:13] = torch.zeros_like( + self.root_state_tensor[self.goal_object_indices[env_ids], 7:13]) + + if apply_reset: + goal_object_indices = self.goal_object_indices[env_ids].to(torch.int32) + self.gym.set_actor_root_state_tensor_indexed(self.sim, + gymtorch.unwrap_tensor(self.root_state_tensor), + gymtorch.unwrap_tensor(goal_object_indices), len(env_ids)) + self.reset_goal_buf[env_ids] = 0 + + def reset_idx(self, env_ids, goal_env_ids): + """ + Reset and randomize the environment + + Args: + env_ids (tensor): The index of the environment that needs to reset + + goal_env_ids (tensor): The index of the environment that only goals need reset + + """ + # randomization can happen only at reset time, since it can reset actor positions on GPU + if self.randomize: + self.apply_randomizations(self.randomization_params) + + # generate random values + rand_floats = torch_rand_float(-1.0, 1.0, (len(env_ids), self.num_qbsofthand_dofs * 2 + 5), device=self.device) + + # randomize start object poses + self.reset_target_pose(env_ids) + + # reset object + self.root_state_tensor[self.object_indices[env_ids]] = self.object_init_state[env_ids].clone() + + new_object_rot = randomize_rotation(rand_floats[:, 3], rand_floats[:, 4], self.x_unit_tensor[env_ids], + self.y_unit_tensor[env_ids]) + if self.object_type == "pen": + rand_angle_y = torch.tensor(0.3) + new_object_rot = randomize_rotation_pen(rand_floats[:, 3], rand_floats[:, 4], rand_angle_y, + self.x_unit_tensor[env_ids], self.y_unit_tensor[env_ids], + self.z_unit_tensor[env_ids]) + + # self.root_state_tensor[self.object_indices[env_ids], 3:7] = new_object_rot + self.root_state_tensor[self.object_indices[env_ids], 7:13] = torch.zeros_like( + self.root_state_tensor[self.object_indices[env_ids], 7:13]) + + object_indices = torch.unique(torch.cat([self.object_indices[env_ids], + self.goal_object_indices[env_ids], + self.goal_object_indices[goal_env_ids]]).to(torch.int32)) + # self.gym.set_actor_root_state_tensor_indexed(self.sim, + # gymtorch.unwrap_tensor(self.root_state_tensor), + # gymtorch.unwrap_tensor(object_indices), len(object_indices)) + + # reset shadow hand + delta_max = self.qbsofthand_dof_upper_limits - self.qbsofthand_dof_default_pos + delta_min = self.qbsofthand_dof_lower_limits - self.qbsofthand_dof_default_pos + rand_delta = delta_min + (delta_max - delta_min) * rand_floats[:, 5:5 + self.num_qbsofthand_dofs] + + pos = self.qbsofthand_default_dof_pos + self.reset_dof_pos_noise * rand_delta + + self.qbsofthand_dof_pos[env_ids, :] = pos + + self.qbsofthand_dof_vel[env_ids, :] = self.qbsofthand_dof_default_vel + \ + self.reset_dof_vel_noise * rand_floats[:, + 5 + self.num_qbsofthand_dofs:5 + self.num_qbsofthand_dofs * 2] + + self.prev_targets[env_ids, :self.num_qbsofthand_dofs] = pos + self.cur_targets[env_ids, :self.num_qbsofthand_dofs] = pos + + hand_indices = self.hand_indices[env_ids].to(torch.int32) + + all_hand_indices = hand_indices.to(torch.int32) + + self.gym.set_dof_position_target_tensor_indexed(self.sim, + gymtorch.unwrap_tensor(self.prev_targets), + gymtorch.unwrap_tensor(all_hand_indices), len(all_hand_indices)) + + all_indices = torch.unique(torch.cat([all_hand_indices, + self.object_indices[env_ids], + self.table_indices[env_ids], + self.block_indices[env_ids]]).to(torch.int32)) + + self.hand_positions[all_indices.to(torch.long), :] = self.saved_root_tensor[all_indices.to(torch.long), 0:3] + self.hand_orientations[all_indices.to(torch.long), :] = self.saved_root_tensor[all_indices.to(torch.long), 3:7] + + self.gym.set_dof_state_tensor_indexed(self.sim, + gymtorch.unwrap_tensor(self.dof_state), + gymtorch.unwrap_tensor(all_hand_indices), len(all_hand_indices)) + + self.gym.set_actor_root_state_tensor_indexed(self.sim, + gymtorch.unwrap_tensor(self.root_state_tensor), + gymtorch.unwrap_tensor(all_indices), len(all_indices)) + self.progress_buf[env_ids] = 0 + self.reset_buf[env_ids] = 0 + self.successes[env_ids] = 0 + + def pre_physics_step(self, actions): + """ + The pre-processing of the physics step. Determine whether the reset environment is needed, + and calculate the next movement of Shadowhand through the given action. The 52-dimensional + action space as shown in below: + + Index Description # TODO + 0 - 14 right shadow hand actuated joint + 15 - 17 right shadow hand base translation + 18 - 20 right shadow hand base rotation + + Args: + actions (tensor): Actions of agents in the all environment + """ + env_ids = self.reset_buf.nonzero(as_tuple=False).squeeze(-1) + goal_env_ids = self.reset_goal_buf.nonzero(as_tuple=False).squeeze(-1) + + # if only goals need reset, then call set API + if len(goal_env_ids) > 0 and len(env_ids) == 0: + self.reset_target_pose(goal_env_ids, apply_reset=True) + # if goals need reset in addition to other envs, call set API in reset() + elif len(goal_env_ids) > 0: + self.reset_target_pose(goal_env_ids) + + if len(env_ids) > 0: + self.reset_idx(env_ids, goal_env_ids) + + self.actions = actions.clone().to(self.device) + if self.use_relative_control: + targets = self.prev_targets[:, + self.actuated_dof_indices] + self.qbsofthand_dof_speed_scale * self.dt * self.actions + self.cur_targets[:, self.actuated_dof_indices] = tensor_clamp(targets, + self.qbsofthand_dof_lower_limits[ + self.actuated_dof_indices], + self.qbsofthand_dof_upper_limits[ + self.actuated_dof_indices]) + else: + self.cur_targets[:, self.actuated_dof_indices] = scale(self.actions[:, 6:21], + self.qbsofthand_dof_lower_limits[ + self.actuated_dof_indices], + self.qbsofthand_dof_upper_limits[ + self.actuated_dof_indices]) + self.cur_targets[:, self.actuated_dof_indices] = self.act_moving_average * self.cur_targets[:, + self.actuated_dof_indices] + ( + 1.0 - self.act_moving_average) * self.prev_targets[ + :, + self.actuated_dof_indices] + self.cur_targets[:, self.actuated_dof_indices] = tensor_clamp( + self.cur_targets[:, self.actuated_dof_indices], + self.qbsofthand_dof_lower_limits[self.actuated_dof_indices], + self.qbsofthand_dof_upper_limits[self.actuated_dof_indices]) + + self.apply_forces[:, 0, :] = actions[:, 0:3] * self.dt * self.transition_scale * 100000 + self.apply_torque[:, 0, :] = self.actions[:, 3:6] * self.dt * self.orientation_scale * 1000 + + self.gym.apply_rigid_body_force_tensors(self.sim, gymtorch.unwrap_tensor(self.apply_forces), + gymtorch.unwrap_tensor(self.apply_torque), gymapi.ENV_SPACE) + + self.prev_targets[:, self.actuated_dof_indices] = self.cur_targets[:, self.actuated_dof_indices] + all_hand_indices = self.hand_indices.to(torch.int32) + self.gym.set_dof_position_target_tensor_indexed(self.sim, + gymtorch.unwrap_tensor(self.prev_targets), + gymtorch.unwrap_tensor(all_hand_indices), len(all_hand_indices)) + + def post_physics_step(self): + """ + The post-processing of the physics step. Compute the observation and reward, and visualize auxiliary + lines for debug when needed + + """ + self.progress_buf += 1 + self.randomize_buf += 1 + + self.compute_observations() + self.compute_reward(self.actions) + + if self.viewer and self.debug_viz: + # draw axes on target object + self.gym.clear_lines(self.viewer) + self.gym.refresh_rigid_body_state_tensor(self.sim) + + for i in range(self.num_envs): + self.add_debug_lines(self.envs[i], self.block_right_handle_pos[i], self.block_right_handle_rot[i]) + # self.add_debug_lines(self.envs[i], self.goal_pos[i], self.block_left_handle_rot[i]) + # self.add_debug_lines(self.envs[i], self.right_hand_ff_pos[i], self.right_hand_ff_rot[i]) + # self.add_debug_lines(self.envs[i], self.right_hand_mf_pos[i], self.right_hand_mf_rot[i]) + # self.add_debug_lines(self.envs[i], self.right_hand_rf_pos[i], self.right_hand_rf_rot[i]) + # self.add_debug_lines(self.envs[i], self.right_hand_lf_pos[i], self.right_hand_lf_rot[i]) + # self.add_debug_lines(self.envs[i], self.right_hand_th_pos[i], self.right_hand_th_rot[i]) + + # self.add_debug_lines(self.envs[i], self.left_hand_ff_pos[i], self.right_hand_ff_rot[i]) + # self.add_debug_lines(self.envs[i], self.left_hand_mf_pos[i], self.right_hand_mf_rot[i]) + # self.add_debug_lines(self.envs[i], self.left_hand_rf_pos[i], self.right_hand_rf_rot[i]) + # self.add_debug_lines(self.envs[i], self.left_hand_lf_pos[i], self.right_hand_lf_rot[i]) + # self.add_debug_lines(self.envs[i], self.left_hand_th_pos[i], self.right_hand_th_rot[i]) + + def add_debug_lines(self, env, pos, rot): + posx = (pos + quat_apply(rot, to_torch([1, 0, 0], device=self.device) * 0.2)).cpu().numpy() + posy = (pos + quat_apply(rot, to_torch([0, 1, 0], device=self.device) * 0.2)).cpu().numpy() + posz = (pos + quat_apply(rot, to_torch([0, 0, 1], device=self.device) * 0.2)).cpu().numpy() + + p0 = pos.cpu().numpy() + self.gym.add_lines(self.viewer, env, 1, [p0[0], p0[1], p0[2], posx[0], posx[1], posx[2]], [0.85, 0.1, 0.1]) + self.gym.add_lines(self.viewer, env, 1, [p0[0], p0[1], p0[2], posy[0], posy[1], posy[2]], [0.1, 0.85, 0.1]) + self.gym.add_lines(self.viewer, env, 1, [p0[0], p0[1], p0[2], posz[0], posz[1], posz[2]], [0.1, 0.1, 0.85]) + + def rand_row(self, tensor, dim_needed): + row_total = tensor.shape[0] + return tensor[torch.randint(low=0, high=row_total, size=(dim_needed,)), :] + + def sample_points(self, points, sample_num=1000, sample_mathed='furthest'): + eff_points = points[points[:, 2] > 0.04] + if eff_points.shape[0] < sample_num: + eff_points = points + if sample_mathed == 'random': + sampled_points = self.rand_row(eff_points, sample_num) + elif sample_mathed == 'furthest': + sampled_points_id = pointnet2_utils.furthest_point_sample(eff_points.reshape(1, *eff_points.shape), + sample_num) + sampled_points = eff_points.index_select(0, sampled_points_id[0].long()) + return sampled_points + + def camera_visulization(self, is_depth_image=False): + if is_depth_image: + camera_depth_tensor = self.gym.get_camera_image_gpu_tensor(self.sim, self.envs[0], self.cameras[0], + gymapi.IMAGE_DEPTH) + torch_depth_tensor = gymtorch.wrap_tensor(camera_depth_tensor) + torch_depth_tensor = torch.clamp(torch_depth_tensor, -1, 1) + torch_depth_tensor = scale(torch_depth_tensor, to_torch([0], dtype=torch.float, device=self.device), + to_torch([256], dtype=torch.float, device=self.device)) + camera_image = torch_depth_tensor.cpu().numpy() + camera_image = Im.fromarray(camera_image) + + else: + camera_rgba_tensor = self.gym.get_camera_image_gpu_tensor(self.sim, self.envs[0], self.cameras[0], + gymapi.IMAGE_COLOR) + torch_rgba_tensor = gymtorch.wrap_tensor(camera_rgba_tensor) + camera_image = torch_rgba_tensor.cpu().numpy() + camera_image = Im.fromarray(camera_image) + + return camera_image + + +##################################################################### +###=========================jit functions=========================### +##################################################################### + +@torch.jit.script +def depth_image_to_point_cloud_GPU(camera_tensor, camera_view_matrix_inv, camera_proj_matrix, u, v, width: float, + height: float, depth_bar: float, device: torch.device): + # time1 = time.time() + depth_buffer = camera_tensor.to(device) + + # Get the camera view matrix and invert it to transform points from camera to world space + vinv = camera_view_matrix_inv + + # Get the camera projection matrix and get the necessary scaling + # coefficients for deprojection + + proj = camera_proj_matrix + fu = 2 / proj[0, 0] + fv = 2 / proj[1, 1] + + centerU = width / 2 + centerV = height / 2 + + Z = depth_buffer + X = -(u - centerU) / width * Z * fu + Y = (v - centerV) / height * Z * fv + + Z = Z.view(-1) + valid = Z > -depth_bar + X = X.view(-1) + Y = Y.view(-1) + + position = torch.vstack((X, Y, Z, torch.ones(len(X), device=device)))[:, valid] + position = position.permute(1, 0) + position = position @ vinv + + points = position[:, 0:3] + + return points + + +@torch.jit.script +def compute_hand_reward( + rew_buf, reset_buf, reset_goal_buf, progress_buf, successes, consecutive_successes, + max_episode_length: float, object_pos, object_rot, target_pos, target_rot, block_right_handle_pos + , right_hand_pos, right_hand_ff_pos, right_hand_mf_pos, right_hand_rf_pos, right_hand_lf_pos, + right_hand_th_pos, + dist_reward_scale: float, rot_reward_scale: float, rot_eps: float, + actions, action_penalty_scale: float, + success_tolerance: float, reach_goal_bonus: float, fall_dist: float, + fall_penalty: float, max_consecutive_successes: int, av_factor: float, ignore_z_rot: bool +): + """ + Compute the reward of all environment. + + Args: + rew_buf (tensor): The reward buffer of all environments at this time + + reset_buf (tensor): The reset buffer of all environments at this time + + reset_goal_buf (tensor): The only-goal reset buffer of all environments at this time + + progress_buf (tensor): The porgress buffer of all environments at this time + + successes (tensor): The successes buffer of all environments at this time + + consecutive_successes (tensor): The consecutive successes buffer of all environments at this time + + max_episode_length (float): The max episode length in this environment + + object_pos (tensor): The position of the object + + object_rot (tensor): The rotation of the object + + target_pos (tensor): The position of the target + + target_rot (tensor): The rotate of the target + + block_right_handle_pos (tensor): The position of the right block handle + + right_hand_ff_pos, right_hand_mf_pos, right_hand_rf_pos, right_hand_lf_pos, right_hand_th_pos (tensor): The position of the five fingers + of the right hand + + dist_reward_scale (float): The scale of the distance reward + + rot_reward_scale (float): The scale of the rotation reward + + rot_eps (float): The epsilon of the rotation calculate + + actions (tensor): The action buffer of all environments at this time + + action_penalty_scale (float): The scale of the action penalty reward + + success_tolerance (float): The tolerance of the success determined + + reach_goal_bonus (float): The reward given when the object reaches the goal + + fall_dist (float): When the object is far from the Shadowhand, it is judged as falling + + fall_penalty (float): The reward given when the object is fell + + max_consecutive_successes (float): The maximum of the consecutive successes + + av_factor (float): The average factor for calculate the consecutive successes + + ignore_z_rot (bool): Is it necessary to ignore the rot of the z-axis, which is usually used + for some specific objects (e.g. pen) + """ + # Distance from the hand to the object + right_goal_dist = torch.norm(target_pos - block_right_handle_pos, p=2, dim=-1) + # goal_dist = target_pos[:, 2] - object_pos[:, 2] + + right_hand_dist = torch.norm(block_right_handle_pos - right_hand_pos, p=2, dim=-1) + + right_hand_finger_dist = (torch.norm(block_right_handle_pos - right_hand_ff_pos, p=2, dim=-1) + torch.norm( + block_right_handle_pos - right_hand_mf_pos, p=2, dim=-1) + + torch.norm(block_right_handle_pos - right_hand_rf_pos, p=2, dim=-1) + torch.norm( + block_right_handle_pos - right_hand_lf_pos, p=2, dim=-1) + + torch.norm(block_right_handle_pos - right_hand_th_pos, p=2, dim=-1)) + # Orientation alignment for the cube in hand and goal cube + # quat_diff = quat_mul(object_rot, quat_conjugate(target_rot)) + # rot_dist = 2.0 * torch.asin(torch.clamp(torch.norm(quat_diff[:, 0:3], p=2, dim=-1), max=1.0)) + + right_hand_dist_rew = torch.exp(-10 * right_hand_finger_dist) + + # rot_rew = 1.0/(torch.abs(rot_dist) + rot_eps) * rot_reward_scale + + action_penalty = torch.sum(actions ** 2, dim=-1) + + # Total reward is: position distance + orientation alignment + action regularization + success bonus + fall penalty + # reward = torch.exp(-0.05*(up_rew * dist_reward_scale)) + torch.exp(-0.05*(right_hand_dist_rew * dist_reward_scale)) + torch.exp(-0.05*(left_hand_dist_rew * dist_reward_scale)) + # up_rew = torch.zeros_like(right_hand_dist_rew) + # up_rew = torch.where(right_hand_finger_dist < 0.6, + # torch.where(left_hand_finger_dist < 0.4, + up_rew = torch.zeros_like(right_hand_dist_rew) + up_rew = torch.exp(-10 * torch.norm(block_right_handle_pos, p=2, dim=-1)) * 2 + # up_rew = torch.where(right_hand_finger_dist <= 0.3, torch.norm(bottle_cap_up - bottle_pos, p=2, dim=-1) * 30, up_rew) + + # reward = torch.exp(-0.1*(right_hand_dist_rew * dist_reward_scale)) + torch.exp(-0.1*(left_hand_dist_rew * dist_reward_scale)) + reward = right_hand_dist_rew + up_rew + + resets = torch.where(right_hand_dist_rew <= 0, torch.ones_like(reset_buf), reset_buf) + resets = torch.where(right_hand_finger_dist >= 1.5, torch.ones_like(resets), resets) + + # Find out which envs hit the goal and update successes count + successes = torch.where(successes == 0, + torch.where(torch.norm(block_right_handle_pos, p=2, dim=-1) < 0.2, + torch.ones_like(successes), successes), successes) + + resets = torch.where(progress_buf >= max_episode_length, torch.ones_like(resets), resets) + + goal_resets = torch.zeros_like(resets) + + num_resets = torch.sum(resets) + finished_cons_successes = torch.sum(successes * resets.float()) + + cons_successes = torch.where(resets > 0, successes * resets, consecutive_successes).mean() + + return reward, resets, goal_resets, progress_buf, successes, cons_successes + + +@torch.jit.script +def randomize_rotation(rand0, rand1, x_unit_tensor, y_unit_tensor): + return quat_mul(quat_from_angle_axis(rand0 * np.pi, x_unit_tensor), + quat_from_angle_axis(rand1 * np.pi, y_unit_tensor)) + + +@torch.jit.script +def randomize_rotation_pen(rand0, rand1, max_angle, x_unit_tensor, y_unit_tensor, z_unit_tensor): + rot = quat_mul(quat_from_angle_axis(0.5 * np.pi + rand0 * max_angle, x_unit_tensor), + quat_from_angle_axis(rand0 * np.pi, z_unit_tensor)) + return rot diff --git a/rofunc/learning/RofuncRL/tasks/isaacgymenv/hands/shadow_hand_grasp_and_place.py b/rofunc/learning/RofuncRL/tasks/isaacgymenv/hands/shadow_hand_grasp_and_place.py index 601eceb45..f5476e50f 100644 --- a/rofunc/learning/RofuncRL/tasks/isaacgymenv/hands/shadow_hand_grasp_and_place.py +++ b/rofunc/learning/RofuncRL/tasks/isaacgymenv/hands/shadow_hand_grasp_and_place.py @@ -696,7 +696,7 @@ def compute_observations(self): self.object_linvel = self.root_state_tensor[self.object_indices, 7:10] self.object_angvel = self.root_state_tensor[self.object_indices, 10:13] - self.block_right_handle_pos = self.rigid_body_states[:, 26 * 2 + 1, 0:3] + self.block_right_handle_pos = self.rigid_body_states[:, 26 * 2 + 1, 0:3] # (256, 58, 13) self.block_right_handle_rot = self.rigid_body_states[:, 26 * 2 + 1, 3:7] self.block_right_handle_pos = self.block_right_handle_pos + quat_apply(self.block_right_handle_rot, to_torch([0, 1, 0], diff --git a/rofunc/simulator/assets/mjcf/qbhand_no_virtual_left.xml b/rofunc/simulator/assets/mjcf/qbhand_no_virtual_left.xml new file mode 100644 index 000000000..d4683bb66 --- /dev/null +++ b/rofunc/simulator/assets/mjcf/qbhand_no_virtual_left.xml @@ -0,0 +1,432 @@ + + + + + diff --git a/rofunc/simulator/assets/mjcf/qbhand_no_virtual_right.xml b/rofunc/simulator/assets/mjcf/qbhand_no_virtual_right.xml new file mode 100644 index 000000000..7f7650963 --- /dev/null +++ b/rofunc/simulator/assets/mjcf/qbhand_no_virtual_right.xml @@ -0,0 +1,380 @@ + + + + + diff --git a/saved_runs/RofuncRL_PPOTrainer_BiShadowHandBlockStack_23-12-26_15-04-30-752629/events.out.tfevents.1703574270.rofunc.7656.0 b/saved_runs/RofuncRL_PPOTrainer_BiShadowHandBlockStack_23-12-26_15-04-30-752629/events.out.tfevents.1703574270.rofunc.7656.0 new file mode 100644 index 000000000..56aa22335 Binary files /dev/null and b/saved_runs/RofuncRL_PPOTrainer_BiShadowHandBlockStack_23-12-26_15-04-30-752629/events.out.tfevents.1703574270.rofunc.7656.0 differ diff --git a/saved_runs/RofuncRL_PPOTrainer_BiShadowHandBottleCap_23-12-24_14-50-29-312650/events.out.tfevents.1703400629.rofunc.2157326.0 b/saved_runs/RofuncRL_PPOTrainer_BiShadowHandBottleCap_23-12-24_14-50-29-312650/events.out.tfevents.1703400629.rofunc.2157326.0 new file mode 100644 index 000000000..c35739e5b Binary files /dev/null and b/saved_runs/RofuncRL_PPOTrainer_BiShadowHandBottleCap_23-12-24_14-50-29-312650/events.out.tfevents.1703400629.rofunc.2157326.0 differ diff --git a/saved_runs/RofuncRL_PPOTrainer_BiShadowHandCatchAbreast_23-12-24_14-51-22-353264/events.out.tfevents.1703400682.rofunc.2157554.0 b/saved_runs/RofuncRL_PPOTrainer_BiShadowHandCatchAbreast_23-12-24_14-51-22-353264/events.out.tfevents.1703400682.rofunc.2157554.0 new file mode 100644 index 000000000..3805346fd Binary files /dev/null and b/saved_runs/RofuncRL_PPOTrainer_BiShadowHandCatchAbreast_23-12-24_14-51-22-353264/events.out.tfevents.1703400682.rofunc.2157554.0 differ diff --git a/saved_runs/RofuncRL_PPOTrainer_BiShadowHandDoorCloseInward_23-12-26_15-05-48-068396/events.out.tfevents.1703574348.rofunc.7930.0 b/saved_runs/RofuncRL_PPOTrainer_BiShadowHandDoorCloseInward_23-12-26_15-05-48-068396/events.out.tfevents.1703574348.rofunc.7930.0 new file mode 100644 index 000000000..861f38e04 Binary files /dev/null and b/saved_runs/RofuncRL_PPOTrainer_BiShadowHandDoorCloseInward_23-12-26_15-05-48-068396/events.out.tfevents.1703574348.rofunc.7930.0 differ diff --git a/saved_runs/RofuncRL_PPOTrainer_BiShadowHandDoorCloseOutward_23-12-27_14-43-36-754709/events.out.tfevents.1703659416.rofunc.128859.0 b/saved_runs/RofuncRL_PPOTrainer_BiShadowHandDoorCloseOutward_23-12-27_14-43-36-754709/events.out.tfevents.1703659416.rofunc.128859.0 new file mode 100644 index 000000000..e94d835d6 Binary files /dev/null and b/saved_runs/RofuncRL_PPOTrainer_BiShadowHandDoorCloseOutward_23-12-27_14-43-36-754709/events.out.tfevents.1703659416.rofunc.128859.0 differ diff --git a/saved_runs/RofuncRL_PPOTrainer_BiShadowHandOver_23-12-24_11-14-19-985779/events.out.tfevents.1703387659.rofunc.2144737.0 b/saved_runs/RofuncRL_PPOTrainer_BiShadowHandOver_23-12-24_11-14-19-985779/events.out.tfevents.1703387659.rofunc.2144737.0 new file mode 100644 index 000000000..b8869aa25 Binary files /dev/null and b/saved_runs/RofuncRL_PPOTrainer_BiShadowHandOver_23-12-24_11-14-19-985779/events.out.tfevents.1703387659.rofunc.2144737.0 differ diff --git a/saved_runs/RofuncRL_PPOTrainer_BiShadowHandReOrientation_23-12-27_00-45-19-364801/events.out.tfevents.1703609119.rofunc.62187.0 b/saved_runs/RofuncRL_PPOTrainer_BiShadowHandReOrientation_23-12-27_00-45-19-364801/events.out.tfevents.1703609119.rofunc.62187.0 new file mode 100644 index 000000000..7407138b2 Binary files /dev/null and b/saved_runs/RofuncRL_PPOTrainer_BiShadowHandReOrientation_23-12-27_00-45-19-364801/events.out.tfevents.1703609119.rofunc.62187.0 differ