diff --git a/README.md b/README.md new file mode 100644 index 0000000..5bf0f9b --- /dev/null +++ b/README.md @@ -0,0 +1,25 @@ +# Project Name + +## Installation + +1. Set up and install the d4rl environments by following the instructions provided in the d4rl documentation until you can successfully run `import d4rl` in your Python environment. + +2. Clone the GitHub repository into a folder named 'CORL' using the following command: + + ```bash + git clone https://github.com/username/repository.git CORL + ``` + + Replace `username/repository.git` with the actual URL of the GitHub repository you want to clone. + +3. Run the sample Python command outside the 'CORL' folder to execute the desired functionality. Make sure you have the necessary dependencies installed and the Python environment properly configured. + + ```bash + python path/to/sample_script.py + ``` + + Replace `path/to/sample_script.py` with the actual path to your sample Python script. + +## Usage + +Provide instructions on how to use your project or any additional information here. diff --git a/abl.sh b/abl.sh index 4149da1..a92fd98 100644 --- a/abl.sh +++ b/abl.sh @@ -2,40 +2,40 @@ for seed in 15 25 35 do # sizes # cql halfcheetah ME - python CORL/algorithms/offline/cql.py --project "Experiments_CORL" --env "halfcheetah-medium-expert-v2" --out_name "cql-halfcheetah-medium-expert-original_0.5" --num_t 49920 --len_t 20 --latent_reward 0 --bin_label 0 --seed $seed --bin_label_allow_overlap 1 --dataset_size_multiplier 0.5 - python CORL/algorithms/offline/cql.py --project "Experiments_CORL" --env "halfcheetah-medium-expert-v2" --out_name "cql-halfcheetah-medium-expert-original_0.2" --num_t 49920 --len_t 20 --latent_reward 0 --bin_label 0 --seed $seed --bin_label_allow_overlap 1 --dataset_size_multiplier 0.2 - python CORL/algorithms/offline/cql.py --project "Experiments_CORL" --env "halfcheetah-medium-expert-v2" --out_name "cql-halfcheetah-medium-expert-original_0.1" --num_t 49920 --len_t 20 --latent_reward 0 --bin_label 0 --seed $seed --bin_label_allow_overlap 1 --dataset_size_multiplier 0.1 + python CORL/algorithms/offline/cql.py --project "Experiments_CORL" --env "halfcheetah-medium-expert-v2" --out_name "cql-halfcheetah-medium-expert-original-0.5" --num_t 49920 --len_t 20 --latent_reward 0 --bin_label 0 --seed $seed --bin_label_allow_overlap 1 --dataset_size_multiplier 0.5 + python CORL/algorithms/offline/cql.py --project "Experiments_CORL" --env "halfcheetah-medium-expert-v2" --out_name "cql-halfcheetah-medium-expert-original-0.2" --num_t 49920 --len_t 20 --latent_reward 0 --bin_label 0 --seed $seed --bin_label_allow_overlap 1 --dataset_size_multiplier 0.2 + python CORL/algorithms/offline/cql.py --project "Experiments_CORL" --env "halfcheetah-medium-expert-v2" --out_name "cql-halfcheetah-medium-expert-original-0.1" --num_t 49920 --len_t 20 --latent_reward 0 --bin_label 0 --seed $seed --bin_label_allow_overlap 1 --dataset_size_multiplier 0.1 - python CORL/algorithms/offline/cql.py --project "Experiments_CORL" --env "halfcheetah-medium-expert-v2" --out_name "cql-halfcheetah-medium-expert-binary_labels_0.5" --num_t 49920 --len_t 20 --latent_reward 0 --bin_label 1 --seed $seed --bin_label_allow_overlap 1 --dataset_size_multiplier 0.5 - python CORL/algorithms/offline/cql.py --project "Experiments_CORL" --env "halfcheetah-medium-expert-v2" --out_name "cql-halfcheetah-medium-expert-binary_labels_0.2" --num_t 49920 --len_t 20 --latent_reward 0 --bin_label 1 --seed $seed --bin_label_allow_overlap 1 --dataset_size_multiplier 0.2 - python CORL/algorithms/offline/cql.py --project "Experiments_CORL" --env "halfcheetah-medium-expert-v2" --out_name "cql-halfcheetah-medium-expert-binary_labels_0.1" --num_t 49920 --len_t 20 --latent_reward 0 --bin_label 1 --seed $seed --bin_label_allow_overlap 1 --dataset_size_multiplier 0.1 + python CORL/algorithms/offline/cql.py --project "Experiments_CORL" --env "halfcheetah-medium-expert-v2" --out_name "cql-halfcheetah-medium-expert-binary_labels-0.5" --num_t 49920 --len_t 20 --latent_reward 0 --bin_label 1 --seed $seed --bin_label_allow_overlap 1 --dataset_size_multiplier 0.5 + python CORL/algorithms/offline/cql.py --project "Experiments_CORL" --env "halfcheetah-medium-expert-v2" --out_name "cql-halfcheetah-medium-expert-binary_labels-0.2" --num_t 49920 --len_t 20 --latent_reward 0 --bin_label 1 --seed $seed --bin_label_allow_overlap 1 --dataset_size_multiplier 0.2 + python CORL/algorithms/offline/cql.py --project "Experiments_CORL" --env "halfcheetah-medium-expert-v2" --out_name "cql-halfcheetah-medium-expert-binary_labels-0.1" --num_t 49920 --len_t 20 --latent_reward 0 --bin_label 1 --seed $seed --bin_label_allow_overlap 1 --dataset_size_multiplier 0.1 - python CORL/algorithms/offline/cql.py --project "Experiments_CORL" --env "halfcheetah-medium-expert-v2" --out_name "cql-halfcheetah-medium-expert-latent_reward_0.5" --num_t 49920 --len_t 20 --latent_reward 1 --bin_label 0 --seed $seed --bin_label_allow_overlap 1 --dataset_size_multiplier 0.5 - python CORL/algorithms/offline/cql.py --project "Experiments_CORL" --env "halfcheetah-medium-expert-v2" --out_name "cql-halfcheetah-medium-expert-latent_reward_0.2" --num_t 49920 --len_t 20 --latent_reward 1 --bin_label 0 --seed $seed --bin_label_allow_overlap 1 --dataset_size_multiplier 0.2 - python CORL/algorithms/offline/cql.py --project "Experiments_CORL" --env "halfcheetah-medium-expert-v2" --out_name "cql-halfcheetah-medium-expert-latent_reward_0.1" --num_t 49920 --len_t 20 --latent_reward 1 --bin_label 0 --seed $seed --bin_label_allow_overlap 1 --dataset_size_multiplier 0.1 + python CORL/algorithms/offline/cql.py --project "Experiments_CORL" --env "halfcheetah-medium-expert-v2" --out_name "cql-halfcheetah-medium-expert-latent_reward-0.5" --num_t 49920 --len_t 20 --latent_reward 1 --bin_label 0 --seed $seed --bin_label_allow_overlap 1 --dataset_size_multiplier 0.5 + python CORL/algorithms/offline/cql.py --project "Experiments_CORL" --env "halfcheetah-medium-expert-v2" --out_name "cql-halfcheetah-medium-expert-latent_reward-0.2" --num_t 49920 --len_t 20 --latent_reward 1 --bin_label 0 --seed $seed --bin_label_allow_overlap 1 --dataset_size_multiplier 0.2 + python CORL/algorithms/offline/cql.py --project "Experiments_CORL" --env "halfcheetah-medium-expert-v2" --out_name "cql-halfcheetah-medium-expert-latent_reward-0.1" --num_t 49920 --len_t 20 --latent_reward 1 --bin_label 0 --seed $seed --bin_label_allow_overlap 1 --dataset_size_multiplier 0.1 # iql hopper MR - python CORL/algorithms/offline/iql.py --project "Experiments_CORL" --env "hopper-medium-replay-v2" --out_name "iql-hopper-medium-replay-original-no_overlap_0.5" --num_t 9000 --len_t 20 --latent_reward 0 --bin_label 0 --seed $seed --bin_label_allow_overlap 0 --dataset_size_multiplier 0.5 - python CORL/algorithms/offline/iql.py --project "Experiments_CORL" --env "hopper-medium-replay-v2" --out_name "iql-hopper-medium-replay-original-no_overlap_0.2" --num_t 9000 --len_t 20 --latent_reward 0 --bin_label 0 --seed $seed --bin_label_allow_overlap 0 --dataset_size_multiplier 0.2 - python CORL/algorithms/offline/iql.py --project "Experiments_CORL" --env "hopper-medium-replay-v2" --out_name "iql-hopper-medium-replay-original-no_overlap_0.1" --num_t 9000 --len_t 20 --latent_reward 0 --bin_label 0 --seed $seed --bin_label_allow_overlap 0 --dataset_size_multiplier 0.1 + python CORL/algorithms/offline/iql.py --project "Experiments_CORL" --env "hopper-medium-replay-v2" --out_name "iql-hopper-medium-replay-original-no_overlap-0.5" --num_t 9000 --len_t 20 --latent_reward 0 --bin_label 0 --seed $seed --bin_label_allow_overlap 0 --dataset_size_multiplier 0.5 + python CORL/algorithms/offline/iql.py --project "Experiments_CORL" --env "hopper-medium-replay-v2" --out_name "iql-hopper-medium-replay-original-no_overlap-0.2" --num_t 9000 --len_t 20 --latent_reward 0 --bin_label 0 --seed $seed --bin_label_allow_overlap 0 --dataset_size_multiplier 0.2 + python CORL/algorithms/offline/iql.py --project "Experiments_CORL" --env "hopper-medium-replay-v2" --out_name "iql-hopper-medium-replay-original-no_overlap-0.1" --num_t 9000 --len_t 20 --latent_reward 0 --bin_label 0 --seed $seed --bin_label_allow_overlap 0 --dataset_size_multiplier 0.1 - python CORL/algorithms/offline/iql.py --project "Experiments_CORL" --env "hopper-medium-replay-v2" --out_name "iql-hopper-medium-replay-binary_labels-no_overlap_0.5" --num_t 9000 --len_t 20 --latent_reward 0 --bin_label 1 --seed $seed --bin_label_allow_overlap 0 --dataset_size_multiplier 0.5 - python CORL/algorithms/offline/iql.py --project "Experiments_CORL" --env "hopper-medium-replay-v2" --out_name "iql-hopper-medium-replay-binary_labels-no_overlap_0.2" --num_t 9000 --len_t 20 --latent_reward 0 --bin_label 1 --seed $seed --bin_label_allow_overlap 0 --dataset_size_multiplier 0.2 - python CORL/algorithms/offline/iql.py --project "Experiments_CORL" --env "hopper-medium-replay-v2" --out_name "iql-hopper-medium-replay-binary_labels-no_overlap_0.1" --num_t 9000 --len_t 20 --latent_reward 0 --bin_label 1 --seed $seed --bin_label_allow_overlap 0 --dataset_size_multiplier 0.1 + python CORL/algorithms/offline/iql.py --project "Experiments_CORL" --env "hopper-medium-replay-v2" --out_name "iql-hopper-medium-replay-binary_labels-no_overlap-0.5" --num_t 9000 --len_t 20 --latent_reward 0 --bin_label 1 --seed $seed --bin_label_allow_overlap 0 --dataset_size_multiplier 0.5 + python CORL/algorithms/offline/iql.py --project "Experiments_CORL" --env "hopper-medium-replay-v2" --out_name "iql-hopper-medium-replay-binary_labels-no_overlap-0.2" --num_t 9000 --len_t 20 --latent_reward 0 --bin_label 1 --seed $seed --bin_label_allow_overlap 0 --dataset_size_multiplier 0.2 + python CORL/algorithms/offline/iql.py --project "Experiments_CORL" --env "hopper-medium-replay-v2" --out_name "iql-hopper-medium-replay-binary_labels-no_overlap-0.1" --num_t 9000 --len_t 20 --latent_reward 0 --bin_label 1 --seed $seed --bin_label_allow_overlap 0 --dataset_size_multiplier 0.1 - python CORL/algorithms/offline/iql.py --project "Experiments_CORL" --env "hopper-medium-replay-v2" --out_name "iql-hopper-medium-replay-latent_reward-no_overlap_0.5" --num_t 9000 --len_t 20 --latent_reward 1 --bin_label 0 --seed $seed --bin_label_allow_overlap 0 --dataset_size_multiplier 0.5 - python CORL/algorithms/offline/iql.py --project "Experiments_CORL" --env "hopper-medium-replay-v2" --out_name "iql-hopper-medium-replay-latent_reward-no_overlap_0.2" --num_t 9000 --len_t 20 --latent_reward 1 --bin_label 0 --seed $seed --bin_label_allow_overlap 0 --dataset_size_multiplier 0.2 - python CORL/algorithms/offline/iql.py --project "Experiments_CORL" --env "hopper-medium-replay-v2" --out_name "iql-hopper-medium-replay-latent_reward-no_overlap_0.1" --num_t 9000 --len_t 20 --latent_reward 1 --bin_label 0 --seed $seed --bin_label_allow_overlap 0 --dataset_size_multiplier 0.1 + python CORL/algorithms/offline/iql.py --project "Experiments_CORL" --env "hopper-medium-replay-v2" --out_name "iql-hopper-medium-replay-latent_reward-no_overlap-0.5" --num_t 9000 --len_t 20 --latent_reward 1 --bin_label 0 --seed $seed --bin_label_allow_overlap 0 --dataset_size_multiplier 0.5 + python CORL/algorithms/offline/iql.py --project "Experiments_CORL" --env "hopper-medium-replay-v2" --out_name "iql-hopper-medium-replay-latent_reward-no_overlap-0.2" --num_t 9000 --len_t 20 --latent_reward 1 --bin_label 0 --seed $seed --bin_label_allow_overlap 0 --dataset_size_multiplier 0.2 + python CORL/algorithms/offline/iql.py --project "Experiments_CORL" --env "hopper-medium-replay-v2" --out_name "iql-hopper-medium-replay-latent_reward-no_overlap-0.1" --num_t 9000 --len_t 20 --latent_reward 1 --bin_label 0 --seed $seed --bin_label_allow_overlap 0 --dataset_size_multiplier 0.1 # ipl halfcheetah ME - python scripts/train.py --config output_configs/config_halfcheetah-medium-expert-v2_0.5.yaml --path "saved/halfcheetah-medium-expert_0.5_$seed" - python scripts/train.py --config output_configs/config_halfcheetah-medium-expert-v2_0.2.yaml --path "saved/halfcheetah-medium-expert_0.2_$seed" - python scripts/train.py --config output_configs/config_halfcheetah-medium-expert-v2_0.1.yaml --path "saved/halfcheetah-medium-expert_0.1_$seed" + python scripts/train.py --config output_configs/config_halfcheetah-medium-expert-v2-0.5.yaml --path "saved/halfcheetah-medium-expert-0.5_$seed" + python scripts/train.py --config output_configs/config_halfcheetah-medium-expert-v2-0.2.yaml --path "saved/halfcheetah-medium-expert-0.2_$seed" + python scripts/train.py --config output_configs/config_halfcheetah-medium-expert-v2-0.1.yaml --path "saved/halfcheetah-medium-expert-0.1_$seed" # ipl hopper MR - python scripts/train.py --config output_configs/config_hopper-medium-replay-v2_0.5.yaml --path "saved/hopper-medium-replay_0.5_$seed" - python scripts/train.py --config output_configs/config_hopper-medium-replay-v2_0.2.yaml --path "saved/hopper-medium-replay_0.2_$seed" - python scripts/train.py --config output_configs/config_hopper-medium-replay-v2_0.1.yaml --path "saved/hopper-medium-replay_0.1_$seed" + python scripts/train.py --config output_configs/config_hopper-medium-replay-v2-0.5.yaml --path "saved/hopper-medium-replay-0.5_$seed" + python scripts/train.py --config output_configs/config_hopper-medium-replay-v2-0.2.yaml --path "saved/hopper-medium-replay-0.2_$seed" + python scripts/train.py --config output_configs/config_hopper-medium-replay-v2-0.1.yaml --path "saved/hopper-medium-replay-0.1_$seed" # other algos # combo halfcheetah ME diff --git a/algorithms/offline/ipl_helper.py b/algorithms/offline/ipl_helper.py index affc89a..fa9b680 100644 --- a/algorithms/offline/ipl_helper.py +++ b/algorithms/offline/ipl_helper.py @@ -2,6 +2,9 @@ from pbrl import multiple_bernoulli_trials_zero_one, small_d4rl_dataset def save_preference_dataset(dataset, dpref, dpref_name, num_t, len_t, multiplier): + """ + Save ipl preference dataset from PBRL dataset. + """ t1s, t2s, ps = dpref mus = multiple_bernoulli_trials_zero_one(ps, num_trials=1) mus = 1.0 - mus diff --git a/algorithms/offline/pbrl.py b/algorithms/offline/pbrl.py index c68c7d4..b4553bb 100644 --- a/algorithms/offline/pbrl.py +++ b/algorithms/offline/pbrl.py @@ -30,6 +30,7 @@ def generate_pbrl_dataset(dataset, num_t, pbrl_dataset_file_path="", len_t=20): t1, r1 = get_random_trajectory_reward(dataset, len_t) t2, r2 = get_random_trajectory_reward(dataset, len_t) + # p = e^r1 / (e ^r1 + e ^r2) one_over_p = 1.0 + np.exp(r2 - r1) if np.isnan(one_over_p): p = 0.0