-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathutils.py
1330 lines (1164 loc) · 57.7 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#%% Imports
import numpy as np
import os
import random
import torch
import matplotlib.pyplot as plt
from perlin_noise import PerlinNoise
from copy import deepcopy
from datetime import datetime, timedelta, time
from wandb_setup import wandb_setup
#%% Functions
def render_and_wandb_init(opt, config_dict):
render = opt.render
log_wandb = not opt.no_wandb
wandb_run = None
if log_wandb:
wandb_run = wandb_setup(opt, config_dict)
return render, log_wandb, wandb_run
def adjust_config_train(opt, config_dict):
"""Changes configuration of config_dict based on args."""
print("Configuration elements changed by the CLI:")
### Environment
print(" -- General environment properties --")
if opt.nb_agents != -1:
config_dict["default_env_prop"]["cluster_prop"]["nb_agents"] = opt.nb_agents
print("Setting nb_agents to {}".format(opt.nb_agents))
if opt.time_step != -1:
config_dict["default_env_prop"]["time_step"] = opt.time_step
print("Setting time_step to {}".format(opt.time_step))
## Reward
print(" -- Reward properties --")
if opt.alpha_temp != -1:
print("Setting alpha_temp to {}".format(opt.alpha_temp))
config_dict["default_env_prop"]["reward_prop"]["alpha_temp"] = opt.alpha_temp
if opt.alpha_sig != -1:
print("Setting alpha_sig to {}".format(opt.alpha_sig))
config_dict["default_env_prop"]["reward_prop"]["alpha_sig"] = opt.alpha_sig
if opt.temp_penalty_mode != "config":
print("Setting temp_penalty_mode to {}".format(opt.temp_penalty_mode))
config_dict["default_env_prop"]["reward_prop"]["temp_penalty_mode"] = opt.temp_penalty_mode
if opt.alpha_ind_L2 != -1:
print("Setting alpha_ind_L2 to {}".format(opt.alpha_ind_L2))
config_dict["default_env_prop"]["reward_prop"]["temp_penalty_parameters"]["mixture"]["alpha_ind_L2"] = opt.alpha_ind_L2
if opt.alpha_common_L2 != -1:
print("Setting alpha_common_L2 to {}".format(opt.alpha_common_L2))
config_dict["default_env_prop"]["reward_prop"]["temp_penalty_parameters"]["mixture"]["alpha_common_L2"] = opt.alpha_common_L2
if opt.alpha_common_max != -1:
print("Setting alpha_common_max to {}".format(opt.alpha_common_max))
config_dict["default_env_prop"]["reward_prop"]["temp_penalty_parameters"]["mixture"]["alpha_common_max"] = opt.alpha_common_max
## Simulator
# Outdoors
print("-- Outdoors environment --")
if opt.OD_temp_mode != "config":
print("Setting OD_temp_mode to {}".format(opt.OD_temp_mode))
config_dict["default_env_prop"]["cluster_prop"]["temp_mode"] = opt.OD_temp_mode
config_dict["default_house_prop"]["solar_gain_bool"] = not opt.no_solar_gain
print("Setting solar_gain_bool to {}".format(not opt.no_solar_gain))
# House and HVAC
print("-- HVAC properties --")
if opt.cooling_capacity != -1:
print("Setting cooling_capacity to {}".format(opt.cooling_capacity))
config_dict["default_hvac_prop"]["cooling_capacity"] = opt.cooling_capacity
if opt.lockout_duration != -1:
print("Setting lockout_duration to {}".format(opt.lockout_duration))
config_dict["default_hvac_prop"]["lockout_duration"] = opt.lockout_duration
# Noise
print("-- Noise properties --")
if opt.house_noise_mode != "config":
print("Setting house_noise_mode to {}".format(opt.house_noise_mode))
config_dict["noise_house_prop"]["noise_mode"] = opt.house_noise_mode
if opt.house_noise_mode_test == "train":
print("Setting house_noise_mode_test to {}".format(config_dict["noise_house_prop"]["noise_mode"]))
config_dict["noise_house_prop_test"]["noise_mode"] = config_dict["noise_house_prop"]["noise_mode"]
else:
print("Setting house_noise_mode_test to {}".format(opt.house_noise_mode_test))
config_dict["noise_house_prop_test"]["noise_mode"] = opt.house_noise_mode_test
if opt.hvac_noise_mode != "config":
print("Setting hvac_noise_mode to {}".format(opt.hvac_noise_mode))
config_dict["noise_hvac_prop"]["noise_mode"] = opt.hvac_noise_mode
if opt.hvac_lockout_noise != -1:
config_dict["default_hvac_prop"]["lockout_noise"] = opt.hvac_lockout_noise
if opt.hvac_noise_mode_test == "train":
print("Setting hvac_noise_mode_test to {}".format(config_dict["noise_hvac_prop_test"]["noise_mode"]))
config_dict["noise_hvac_prop_test"]["noise_mode"] = config_dict["noise_hvac_prop_test"]["noise_mode"]
else:
print("Setting hvac_noise_mode_test to {}".format(opt.hvac_noise_mode_test))
config_dict["noise_hvac_prop_test"]["noise_mode"] = opt.hvac_noise_mode_test
## Signal
print("-- Signal --")
if opt.signal_mode != "config":
print("Setting signal_mode to {}".format(opt.signal_mode))
config_dict["default_env_prop"]["power_grid_prop"]["signal_mode"] = opt.signal_mode
if opt.base_power_mode != "config":
print("Setting base_power_mode to {}".format(opt.base_power_mode))
config_dict["default_env_prop"]["power_grid_prop"]["base_power_mode"] = opt.base_power_mode
config_dict["default_env_prop"]["power_grid_prop"]["artificial_ratio"] = opt.artificial_signal_ratio
print("Setting artificial_ratio to {}".format(opt.artificial_signal_ratio))
if opt.artificial_signal_ratio_range != -1:
print("Setting artificial_signal_ratio_range to {}".format(opt.artificial_signal_ratio_range))
config_dict["default_env_prop"]["power_grid_prop"]["artificial_signal_ratio_range"] = opt.artificial_signal_ratio_range
## State
if opt.state_solar_gain != "config":
print("Setting state solar gain to {}".format(opt.state_solar_gain))
if opt.state_solar_gain == "True":
config_dict["default_env_prop"]["state_properties"]["solar_gain"] = True
elif opt.state_solar_gain == "False":
config_dict["default_env_prop"]["state_properties"]["solar_gain"] = False
else:
raise ValueError("Invalid value for state solar gain")
if opt.state_hour != "config":
print("Setting state hour to {}".format(opt.state_hour))
if opt.state_hour == "True":
config_dict["default_env_prop"]["state_properties"]["hour"] = True
elif opt.state_hour == "False":
config_dict["default_env_prop"]["state_properties"]["hour"] = False
else:
raise ValueError("Invalid value for state_hour")
if opt.state_day != "config":
print("Setting state day to {}".format(opt.state_day))
if opt.state_day == "True":
config_dict["default_env_prop"]["state_properties"]["day"] = True
elif opt.state_day == "False":
config_dict["default_env_prop"]["state_properties"]["day"] = False
else:
raise ValueError("Invalid value for state_day")
if opt.state_thermal != "config":
if opt.state_thermal == "True":
config_dict["default_env_prop"]["state_properties"]["thermal"] = True
elif opt.state_thermal == "False":
config_dict["default_env_prop"]["state_properties"]["thermal"] = False
else:
raise ValueError("Invalid value for state_thermal")
if opt.state_hvac != "config":
if opt.state_hvac == "True":
config_dict["default_env_prop"]["state_properties"]["hvac"] = True
elif opt.state_hvac == "False":
config_dict["default_env_prop"]["state_properties"]["hvac"] = False
else:
raise ValueError("Invalid value for state_hvac")
if opt.message_thermal != "config":
if opt.message_thermal == "True":
config_dict["default_env_prop"]["message_properties"]["thermal"] = True
elif opt.message_thermal == "False":
config_dict["default_env_prop"]["message_properties"]["thermal"] = False
else:
raise ValueError("Invalid value for message_thermal")
if opt.message_hvac != "config":
if opt.message_hvac == "True":
config_dict["default_env_prop"]["message_properties"]["hvac"] = True
elif opt.state_hvac == "False":
config_dict["default_env_prop"]["message_properties"]["hvac"] = False
else:
raise ValueError("Invalid value for message_hvac")
### Agent
## Agent communication constraints
print("-- Agent communication constraints --")
if opt.nb_agents_comm != -1:
print("Setting nb_agents_comm to {}".format(opt.nb_agents_comm))
config_dict["default_env_prop"]["cluster_prop"]["nb_agents_comm"] = opt.nb_agents_comm
if opt.agents_comm_mode != "config":
print("Setting agents_comm_mode to {}".format(opt.agents_comm_mode))
config_dict["default_env_prop"]["cluster_prop"]["agents_comm_mode"] = opt.agents_comm_mode
if opt.comm_defect_prob != -1:
print("Setting comm_defect_prob to {}".format(opt.comm_defect_prob))
config_dict["default_env_prop"]["cluster_prop"]["comm_defect_prob"] = opt.comm_defect_prob
agent = opt.agent_type
if agent == "ppo":
print("-- PPO agent --")
## PPO agent
# NN architecture
if opt.layers_actor != "config":
print("Setting PPO layers_actor to {}".format(opt.layers_actor))
config_dict["PPO_prop"]["actor_layers"] = opt.layers_actor
if opt.layers_critic != "config":
print("Setting PPO layers_critic to {}".format(opt.layers_critic))
config_dict["PPO_prop"]["critic_layers"] = opt.layers_critic
if opt.layers_both != "config":
print("Setting PPO layers_both to {}".format(opt.layers_both))
config_dict["PPO_prop"]["actor_layers"] = opt.layers_both
config_dict["PPO_prop"]["critic_layers"] = opt.layers_both
# NN optimization
if opt.batch_size != -1:
print("Setting PPO batch_size to {}".format(opt.batch_size))
config_dict["PPO_prop"]["batch_size"] = opt.batch_size
if opt.lr_critic != -1:
print("Setting PPO lr_critic to {}".format(opt.lr_critic))
config_dict["PPO_prop"]["lr_critic"] = opt.lr_critic
if opt.lr_actor != -1:
print("Setting PPO lr_actor to {}".format(opt.lr_actor))
config_dict["PPO_prop"]["lr_actor"] = opt.lr_actor
if opt.lr_both != -1:
print("Setting PPO lr_both to {}".format(opt.lr_both))
config_dict["PPO_prop"]["lr_critic"] = opt.lr_both
config_dict["PPO_prop"]["lr_actor"] = opt.lr_both
if opt.lr_actor != -1 or opt.lr_critic != -1:
raise ValueError("Potential conflict: both lr_both and lr_actor or lr_critic were set in the CLI")
# RL optimization
if opt.gamma != -1:
print("Setting PPO gamma to {}".format(opt.gamma))
config_dict["PPO_prop"]["gamma"] = opt.gamma
if opt.clip_param != -1:
print("Setting PPO clip_param to {}".format(opt.clip_param))
config_dict["PPO_prop"]["clip_param"] = opt.clip_param
if opt.max_grad_norm != -1:
print("Setting PPO max_grad_norm to {}".format(opt.max_grad_norm))
config_dict["PPO_prop"]["max_grad_norm"] = opt.max_grad_norm
if opt.ppo_update_time != -1:
print("Setting PPO ppo_update_time to {}".format(opt.ppo_update_time))
config_dict["PPO_prop"]["ppo_update_time"] = opt.ppo_update_time
elif agent == "dqn":
print("-- DQN agent --")
## DQN agent
# NN architecture
if opt.DQNnetwork_layers != "config":
print("Setting DQNnetwork_layers to {}".format(opt.DQNnetwork_layers))
config_dict["DQN_prop"]["network_layers"] = opt.DQNnetwork_layers
# NN optimization
if opt.batch_size != -1:
print("Setting DQN batch_size to {}".format(opt.batch_size))
config_dict["DQN_prop"]["batch_size"] = opt.batch_size
if opt.lr != -1:
print("Setting DQN_lr to {}".format(opt.lr))
config_dict["DQN_prop"]["lr"] = opt.lr
# RL optimization
if opt.gamma != -1:
print("Setting DQN gamma to {}".format(opt.gamma))
config_dict["DQN_prop"]["gamma"] = opt.gamma
if opt.tau != -1:
print("Setting DQN tau to {}".format(opt.tau))
config_dict["DQN_prop"]["tau"] = opt.tau
if opt.buffer_capacity != -1:
print("Setting DQN buffer_capacity to {}".format(opt.buffer_capacity))
config_dict["DQN_prop"]["buffer_capacity"] = opt.buffer_capacity
if opt.epsilon_decay != -1:
print("Setting DQN epsilon_decay to {}".format(opt.epsilon_decay))
config_dict["DQN_prop"]["epsilon_decay"] = opt.epsilon_decay
if opt.min_epsilon != -1:
print("Setting DQN min_epsilon to {}".format(opt.min_epsilon))
config_dict["DQN_prop"]["min_epsilon"] = opt.min_epsilon
elif agent == "tarmac":
print("-- TarMAC agent --")
## TarMAC agent
if opt.recurrent_policy == "False":
print("Setting TarMAC recurrent_policy to False")
config_dict["TarMAC_prop"]["recurrent_policy"] = False
if opt.state_size != -1:
print("Setting TarMAC state_size to {}".format(opt.state_size))
config_dict["TarMAC_prop"]["state_size"] = opt.state_size
if opt.communication_size != -1:
print("Setting TarMAC communication_size to {}".format(opt.communication_size))
config_dict["TarMAC_prop"]["communication_size"] = opt.communication_size
if opt.tarmac_communication_mode != "config":
print("Setting tarmac_communication_mode to {}".format(opt.tarmac_communication_mode))
config_dict["TarMAC_prop"]["tarmac_communication_mode"] = opt.tarmac_communication_mode
if opt.comm_num_hops != -1:
print("Setting TarMAC comm_num_hops to {}".format(opt.comm_num_hops))
config_dict["TarMAC_prop"]["comm_num_hops"] = opt.comm_num_hops
if opt.value_loss_coef != -1:
print("Setting TarMAC value_loss_coef to {}".format(opt.value_loss_coef))
config_dict["TarMAC_prop"]["value_loss_coef"] = opt.value_loss_coef
if opt.entropy_coef != -1:
print("Setting TarMAC entropy_coef to {}".format(opt.entropy_coef))
config_dict["TarMAC_prop"]["entropy_coef"] = opt.entropy_coef
if opt.max_grad_norm != -1:
print("Setting TarMAC max_grad_norm to {}".format(opt.max_grad_norm))
config_dict["TarMAC_prop"]["tarmac_max_grad_norm"] = opt.max_grad_norm
if opt.lr != -1:
print("Setting TarMAC lr to {}".format(opt.lr))
config_dict["TarMAC_prop"]["lr"] = opt.lr
if opt.eps != -1:
print("Setting TarMAC eps to {}".format(opt.eps))
config_dict["TarMAC_prop"]["tarmac_eps"] = opt.eps
if opt.gamma != -1:
print("Setting TarMAC gamma to {}".format(opt.gamma))
config_dict["TarMAC_prop"]["tarmac_gamma"] = opt.gamma
if opt.alpha != -1:
print("Setting TarMAC alpha to {}".format(opt.alpha))
config_dict["TarMAC_prop"]["tarmac_alpha"] = opt.alpha
if opt.nb_tarmac_updates != -1:
print("Setting TarMAC nb_tarmac_updates to {}".format(opt.nb_tarmac_updates))
config_dict["TarMAC_prop"]["nb_tarmac_updates"] = opt.nb_tarmac_updates
if opt.batch_size != -1:
print("Setting TarMAC batch_size to {}".format(opt.batch_size))
config_dict["TarMAC_prop"]["tarmac_batch_size"] = opt.batch_size
elif agent == "tarmac_ppo":
print("-- TarMAC PPO agent --")
## TarMAC PPO agent
if opt.actor_hidden_state_size != -1:
print("Setting TarMAC actor_hidden_state_size to {}".format(opt.actor_hidden_state_size))
config_dict["TarMAC_PPO_prop"]["actor_hidden_state_size"] = opt.actor_hidden_state_size
if opt.communication_size != -1:
print("Setting TarMAC communication_size to {}".format(opt.communication_size))
config_dict["TarMAC_PPO_prop"]["communication_size"] = opt.communication_size
if opt.key_size != -1:
print("Setting TarMAC key_size to {}".format(opt.key_size))
config_dict["TarMAC_PPO_prop"]["key_size"] = opt.key_size
if opt.comm_num_hops != -1:
print("Setting TarMAC comm_num_hops to {}".format(opt.comm_num_hops))
config_dict["TarMAC_PPO_prop"]["comm_num_hops"] = opt.comm_num_hops
if opt.number_agents_comm_tarmac != -1:
print("Setting TarMAC number_agents_comm_tarmac to {}".format(opt.number_agents_comm_tarmac))
config_dict["TarMAC_PPO_prop"]["number_agents_comm_tarmac"] = opt.number_agents_comm_tarmac
if opt.tarmac_comm_mode != "config":
print("Setting tarmac_comm_mode to {}".format(opt.tarmac_comm_mode))
config_dict["TarMAC_PPO_prop"]["tarmac_comm_mode"] = opt.tarmac_comm_mode
if opt.tarmac_comm_mode != "config":
print("Setting tarmac_comm_mode to {}".format(opt.tarmac_comm_mode))
config_dict["TarMAC_PPO_prop"]["tarmac_comm_mode"] = opt.tarmac_comm_mode
if opt.tarmac_comm_defect_prob != -1:
print("Setting tarmac_comm_defect_prob to {}".format(opt.tarmac_comm_defect_prob))
config_dict["TarMAC_PPO_prop"]["tarmac_comm_defect_prob"] = opt.tarmac_comm_defect_prob
if opt.lr_critic != -1:
print("Setting TarMAC lr_critic to {}".format(opt.lr_critic))
config_dict["TarMAC_PPO_prop"]["lr_critic"] = opt.lr_critic
if opt.lr_actor != -1:
print("Setting TarMAC lr_actor to {}".format(opt.lr_actor))
config_dict["TarMAC_PPO_prop"]["lr_actor"] = opt.lr_actor
if opt.lr_both != -1:
print("Setting PPO lr_both to {}".format(opt.lr_both))
config_dict["TarMAC_PPO_prop"]["lr_critic"] = opt.lr_both
config_dict["TarMAC_PPO_prop"]["lr_actor"] = opt.lr_both
if opt.lr_actor != -1 or opt.lr_critic != -1:
raise ValueError("Potential conflict: both lr_both and lr_actor or lr_critic were set in the CLI")
if opt.eps != -1:
print("Setting TarMAC eps to {}".format(opt.eps))
config_dict["TarMAC_PPO_prop"]["eps"] = opt.eps
if opt.gamma != -1:
print("Setting TarMAC gamma to {}".format(opt.gamma))
config_dict["TarMAC_PPO_prop"]["gamma"] = opt.gamma
if opt.max_grad_norm != -1:
print("Setting TarMAC max_grad_norm to {}".format(opt.max_grad_norm))
config_dict["TarMAC_PPO_prop"]["max_grad_norm"] = opt.max_grad_norm
if opt.clip_param != -1:
print("Setting TarMAC clip_param to {}".format(opt.clip_param))
config_dict["TarMAC_PPO_prop"]["clip_param"] = opt.clip_param
if opt.ppo_update_time != -1:
print("Setting TarMAC ppo_update_time to {}".format(opt.ppo_update_time))
config_dict["TarMAC_PPO_prop"]["ppo_update_time"] = opt.ppo_update_time
if opt.batch_size != -1:
print("Setting TarMAC batch_size to {}".format(opt.batch_size))
config_dict["TarMAC_PPO_prop"]["batch_size"] = opt.batch_size
if opt.critic_hidden_layer_size != -1:
print("Setting TarMAC critic_hidden_layer_size to {}".format(opt.critic_hidden_layer_size))
config_dict["TarMAC_PPO_prop"]["critic_hidden_layer_size"] = opt.critic_hidden_layer_size
if opt.with_gru != 'config':
print("Setting TarMAC with_gru to {}".format(opt.with_gru))
if opt.with_gru == "True":
config_dict["TarMAC_PPO_prop"]["with_gru"] = True
else:
config_dict["TarMAC_PPO_prop"]["with_gru"] = False
if opt.with_comm != 'config':
print("Setting TarMAC with_comm to {}".format(opt.with_comm))
if opt.with_comm == "True":
config_dict["TarMAC_PPO_prop"]["with_comm"] = True
else:
config_dict["TarMAC_PPO_prop"]["with_comm"] = False
### TEST to avoid running for no reason
if config_dict["TarMAC_PPO_prop"]["number_agents_comm_tarmac"] >= config_dict["default_env_prop"]["cluster_prop"]["nb_agents"] and config_dict["TarMAC_PPO_prop"]["with_comm"]:
raise ValueError("number_agents_comm_tarmac {} is greater than or equal to nb_agents {}".format(config_dict["TarMAC_PPO_prop"]["number_agents_comm_tarmac"], config_dict["default_env_prop"]["cluster_prop"]["nb_agents"]))
### Training process
if opt.nb_inter_saving_actor != -1:
print("Setting nb_inter_saving_actor to {}".format(opt.nb_inter_saving_actor))
config_dict["training_prop"]["nb_inter_saving_actor"] = opt.nb_inter_saving_actor
if opt.nb_test_logs != -1:
print("Setting nb_test_logs to {}".format(opt.nb_test_logs))
config_dict["training_prop"]["nb_test_logs"] = opt.nb_test_logs
if opt.nb_time_steps_test != -1:
print("Setting nb_time_steps_test to {}".format(opt.nb_time_steps_test))
config_dict["training_prop"]["nb_time_steps_test"] = opt.nb_time_steps_test
if opt.nb_tr_episodes != -1:
print("Setting nb_tr_episodes to {}".format(opt.nb_tr_episodes))
config_dict["training_prop"]["nb_tr_episodes"] = opt.nb_tr_episodes
if opt.nb_tr_epochs != -1:
print("Setting nb_tr_epochs to {}".format(opt.nb_tr_epochs))
config_dict["training_prop"]["nb_tr_epochs"] = opt.nb_tr_epochs
if opt.nb_tr_logs != -1:
print("Setting nb_tr_logs to {}".format(opt.nb_tr_logs))
config_dict["training_prop"]["nb_tr_logs"] = opt.nb_tr_logs
if opt.nb_time_steps != -1:
print("Setting nb_time_steps to {}".format(opt.nb_time_steps))
config_dict["training_prop"]["nb_time_steps"] = opt.nb_time_steps
def adjust_config_deploy(opt, config_dict):
if opt.nb_agents != -1:
config_dict["default_env_prop"]["cluster_prop"]["nb_agents"] = opt.nb_agents
if opt.time_step != -1:
config_dict["default_env_prop"]["time_step"] = opt.time_step
if opt.cooling_capacity != -1:
config_dict["default_hvac_prop"]["cooling_capacity"] = opt.cooling_capacity
if opt.lockout_duration != -1:
config_dict["default_hvac_prop"]["lockout_duration"] = opt.lockout_duration
if opt.MPC_rolling_horizon != -1:
config_dict["MPC_prop"]["rolling_horizon"] = opt.MPC_rolling_horizon
if opt.signal_mode != "config":
config_dict["default_env_prop"]["power_grid_prop"][
"signal_mode"
] = opt.signal_mode
if opt.house_noise_mode != "config":
config_dict["noise_house_prop"]["noise_mode"] = opt.house_noise_mode
if opt.hvac_noise_mode != "config":
config_dict["noise_hvac_prop"]["noise_mode"] = opt.hvac_noise_mode
if opt.hvac_lockout_noise != -1:
config_dict["default_hvac_prop"]["lockout_noise"] = opt.hvac_lockout_noise
if opt.OD_temp_mode != "config":
config_dict["default_env_prop"]["cluster_prop"]["temp_mode"] = opt.OD_temp_mode
config_dict["default_house_prop"]["solar_gain_bool"] = not opt.no_solar_gain
if opt.base_power_mode != "config":
config_dict["default_env_prop"]["power_grid_prop"][
"base_power_mode"
] = opt.base_power_mode
if opt.nb_agents_comm != -1:
config_dict["default_env_prop"]["cluster_prop"][
"nb_agents_comm"
] = opt.nb_agents_comm
if opt.agents_comm_mode != "config":
config_dict["default_env_prop"]["cluster_prop"][
"agents_comm_mode"
] = opt.agents_comm_mode
if opt.comm_defect_prob != -1:
print("Setting comm_defect_prob to {}".format(opt.comm_defect_prob))
config_dict["default_env_prop"]["cluster_prop"]["comm_defect_prob"] = opt.comm_defect_prob
if opt.layers_actor != "config":
config_dict["PPO_prop"]["actor_layers"] = opt.layers_actor
if opt.layers_critic != "config":
config_dict["PPO_prop"]["critic_layers"] = opt.layers_critic
if opt.layers_both != "config":
config_dict["PPO_prop"]["actor_layers"] = opt.layers_both
config_dict["PPO_prop"]["critic_layers"] = opt.layers_both
if opt.DQNnetwork_layers != "config":
config_dict["DQN_prop"]["network_layers"] = opt.DQNnetwork_layers
if opt.start_datetime_mode != "config":
config_dict["default_env_prop"]["start_datetime_mode"] = opt.start_datetime_mode
print("-- TarMAC PPO agent --")
## TarMAC PPO agent
if opt.actor_hidden_state_size != -1:
print("Setting TarMAC actor_hidden_state_size to {}".format(opt.actor_hidden_state_size))
config_dict["TarMAC_PPO_prop"]["actor_hidden_state_size"] = opt.actor_hidden_state_size
if opt.communication_size != -1:
print("Setting TarMAC communication_size to {}".format(opt.communication_size))
config_dict["TarMAC_PPO_prop"]["communication_size"] = opt.communication_size
if opt.key_size != -1:
print("Setting TarMAC key_size to {}".format(opt.key_size))
config_dict["TarMAC_PPO_prop"]["key_size"] = opt.key_size
if opt.comm_num_hops != -1:
print("Setting TarMAC comm_num_hops to {}".format(opt.comm_num_hops))
config_dict["TarMAC_PPO_prop"]["comm_num_hops"] = opt.comm_num_hops
if opt.number_agents_comm_tarmac != -1:
print("Setting TarMAC number_agents_comm_tarmac to {}".format(opt.number_agents_comm_tarmac))
config_dict["TarMAC_PPO_prop"]["number_agents_comm_tarmac"] = opt.number_agents_comm_tarmac
if opt.tarmac_comm_mode != "config":
print("Setting tarmac_comm_mode to {}".format(opt.tarmac_comm_mode))
config_dict["TarMAC_PPO_prop"]["tarmac_comm_mode"] = opt.tarmac_comm_mode
if opt.tarmac_comm_defect_prob != -1:
print("Setting tarmac_comm_defect_prob to {}".format(opt.tarmac_comm_defect_prob))
config_dict["TarMAC_PPO_prop"]["tarmac_comm_defect_prob"] = opt.tarmac_comm_defect_prob
if opt.critic_hidden_layer_size != -1:
print("Setting TarMAC critic_hidden_layer_size to {}".format(opt.critic_hidden_layer_size))
config_dict["TarMAC_PPO_prop"]["critic_hidden_layer_size"] = opt.critic_hidden_layer_size
if opt.with_gru != 'config':
print("Setting TarMAC with_gru to {}".format(opt.with_gru))
if opt.with_gru == "True":
config_dict["TarMAC_PPO_prop"]["with_gru"] = True
else:
config_dict["TarMAC_PPO_prop"]["with_gru"] = False
if opt.with_comm != 'config':
print("Setting TarMAC with_comm to {}".format(opt.with_comm))
if opt.with_comm == "True":
config_dict["TarMAC_PPO_prop"]["with_comm"] = True
else:
config_dict["TarMAC_PPO_prop"]["with_comm"] = False
## State
print("-- Agent observations --")
if opt.state_solar_gain != "config":
print("Setting state solar gain to {}".format(opt.state_solar_gain))
if opt.state_solar_gain == "True":
config_dict["default_env_prop"]["state_properties"]["solar_gain"] = True
elif opt.state_solar_gain == "False":
config_dict["default_env_prop"]["state_properties"]["solar_gain"] = False
else:
raise ValueError("Invalid value for state solar gain")
if opt.state_hour != "config":
print("Setting state hour to {}".format(opt.state_hour))
if opt.state_hour == "True":
config_dict["default_env_prop"]["state_properties"]["hour"] = True
elif opt.state_hour == "False":
config_dict["default_env_prop"]["state_properties"]["hour"] = False
else:
raise ValueError("Invalid value for state_hour")
if opt.state_day != "config":
print("Setting state day to {}".format(opt.state_day))
if opt.state_day == "True":
config_dict["default_env_prop"]["state_properties"]["day"] = True
elif opt.state_day == "False":
config_dict["default_env_prop"]["state_properties"]["day"] = False
else:
raise ValueError("Invalid value for state_day")
if opt.state_thermal != "config":
print("Setting state thermal to {}".format(opt.state_thermal))
if opt.state_thermal == "True":
config_dict["default_env_prop"]["state_properties"]["thermal"] = True
elif opt.state_thermal == "False":
config_dict["default_env_prop"]["state_properties"]["thermal"] = False
else:
raise ValueError("Invalid value for state_day")
if opt.state_hvac != "config":
if opt.state_hvac == "True":
config_dict["default_env_prop"]["state_properties"]["hvac"] = True
elif opt.state_hvac == "False":
config_dict["default_env_prop"]["state_properties"]["hvac"] = False
else:
raise ValueError("Invalid value for state_day")
if opt.message_thermal != "config":
if opt.message_thermal == "True":
config_dict["default_env_prop"]["message_properties"]["thermal"] = True
elif opt.message_thermal == "False":
config_dict["default_env_prop"]["message_properties"]["thermal"] = False
else:
raise ValueError("Invalid value for message_thermal")
if opt.message_hvac != "config":
if opt.message_hvac == "True":
config_dict["default_env_prop"]["message_properties"]["hvac"] = True
elif opt.state_hvac == "False":
config_dict["default_env_prop"]["message_properties"]["hvac"] = False
else:
raise ValueError("Invalid value for message_hvac")
config_dict["default_env_prop"]["power_grid_prop"][
"artificial_ratio"
] = opt.artificial_signal_ratio
# Applying noise on environment properties
def applyPropertyNoise(
default_env_prop,
default_house_prop,
noise_house_prop,
default_hvac_prop,
noise_hvac_prop,
):
env_properties = deepcopy(default_env_prop)
nb_agents = default_env_prop["cluster_prop"]["nb_agents"]
# Creating the houses
houses_properties = []
agent_ids = []
for i in range(nb_agents):
house_prop = deepcopy(default_house_prop)
apply_house_noise(house_prop, noise_house_prop)
house_id = i
house_prop["id"] = house_id
hvac_prop = deepcopy(default_hvac_prop)
apply_hvac_noise(hvac_prop, noise_hvac_prop)
hvac_prop["id"] = house_id
agent_ids.append(house_id)
house_prop["hvac_properties"] = hvac_prop
houses_properties.append(house_prop)
env_properties["cluster_prop"]["houses_properties"] = houses_properties
env_properties["agent_ids"] = agent_ids
env_properties["nb_hvac"] = len(agent_ids)
# Setting the date
if env_properties["start_datetime_mode"] == "random":
env_properties["start_datetime"] = get_random_date_time(
datetime.strptime(default_env_prop["start_datetime"], "%Y-%m-%d %H:%M:%S")
) # Start date and time (Y,M,D, H, min, s)
elif env_properties["start_datetime_mode"] == "fixed":
env_properties["start_datetime"] = datetime.strptime(
default_env_prop["start_datetime"], "%Y-%m-%d %H:%M:%S"
)
else:
raise ValueError(
"start_datetime_mode in default_env_prop in config.py must be random or fixed. Current value: {}.".format(
env_properties["start_datetime_mode"] == "fixed"
)
)
return env_properties
# Applying noise on properties
def apply_house_noise(house_prop, noise_house_prop):
noise_house_mode = noise_house_prop["noise_mode"]
noise_house_params = noise_house_prop["noise_parameters"][noise_house_mode]
# Gaussian noise: target temp
house_prop["init_air_temp"] += np.abs(
random.gauss(0, noise_house_params["std_start_temp"])
)
house_prop["init_mass_temp"] += np.abs(
random.gauss(0, noise_house_params["std_start_temp"])
)
house_prop["target_temp"] += np.abs(
random.gauss(0, noise_house_params["std_target_temp"])
)
# Factor noise: house wall conductance, house thermal mass, air thermal mass, house mass surface conductance
factor_Ua = random.triangular(
noise_house_params["factor_thermo_low"],
noise_house_params["factor_thermo_high"],
1,
) # low, high, mode -> low <= N <= high, with max prob at mode.
house_prop["Ua"] *= factor_Ua
factor_Cm = random.triangular(
noise_house_params["factor_thermo_low"],
noise_house_params["factor_thermo_high"],
1,
) # low, high, mode -> low <= N <= high, with max prob at mode.
house_prop["Cm"] *= factor_Cm
factor_Ca = random.triangular(
noise_house_params["factor_thermo_low"],
noise_house_params["factor_thermo_high"],
1,
) # low, high, mode -> low <= N <= high, with max prob at mode.
house_prop["Ca"] *= factor_Ca
factor_Hm = random.triangular(
noise_house_params["factor_thermo_low"],
noise_house_params["factor_thermo_high"],
1,
) # low, high, mode -> low <= N <= high, with max prob at mode.
house_prop["Hm"] *= factor_Hm
def apply_hvac_noise(hvac_prop, noise_hvac_prop):
noise_hvac_mode = noise_hvac_prop["noise_mode"]
hvac_capacity = hvac_prop["cooling_capacity"]
noise_hvac_params = noise_hvac_prop["noise_parameters"][noise_hvac_mode]
hvac_prop["cooling_capacity"] = random.choices(
noise_hvac_params["cooling_capacity_list"][hvac_capacity]
)[0]
"""
# Gaussian noise: latent_cooling_fraction
hvac_prop["latent_cooling_fraction"] += random.gauss(
0, noise_hvac_params["std_latent_cooling_fraction"]
)
# Factor noise: COP, cooling_capacity
factor_COP = random.triangular(
noise_hvac_params["factor_COP_low"], noise_hvac_params["factor_COP_high"], 1
) # low, high, mode -> low <= N <= high, with max prob at mode.
hvac_prop["COP"] *= factor_COP
factor_cooling_capacity = random.triangular(
noise_hvac_params["factor_cooling_capacity_low"],
noise_hvac_params["factor_cooling_capacity_high"],
1,
) # low, high, mode -> low <= N <= high, with max prob at mode.
hvac_prop["cooling_capacity"] *= factor_cooling_capacity
"""
def get_random_date_time(start_date_time):
# Gets a uniformly sampled random date and time within a year from the start_date_time
days_in_year = 364
seconds_in_day = 60 * 60 * 24
random_days = random.randrange(days_in_year)
random_seconds = random.randrange(seconds_in_day)
random_date = start_date_time + timedelta(days=random_days, seconds=random_seconds)
return random_date
# Multi agent management
def get_actions(actors, obs_dict):
if isinstance(actors, dict): # One actor per agent
actions = {}
for agent_id in actors.keys():
actions[agent_id] = actors[agent_id].act(obs_dict)
return actions
else: # One actor for all agents (may need to change to ensure decentralized - ex: TarMAC_PPO)
actions_np = actors.act(obs_dict)
actions_dict = {}
for agent_id in obs_dict.keys():
actions_dict[agent_id] = actions_np[agent_id]
return actions_dict
def datetime2List(dt):
return [dt.year, dt.month, dt.day, dt.hour, dt.minute]
def superDict2List(SDict, id):
tmp = SDict[id].copy()
tmp["datetime"] = datetime2List(tmp["datetime"])
for k, v in tmp.items():
if not isinstance(tmp[k], list):
tmp[k] = [v]
return sum(list(tmp.values()), [])
def normStateDict(sDict, config_dict, returnDict=False):
default_house_prop = config_dict["default_house_prop"]
default_hvac_prop = config_dict["default_hvac_prop"]
default_env_prop = config_dict["default_env_prop"]
state_prop = default_env_prop["state_properties"]
result = {}
k_temp = ["house_temp", "house_mass_temp", "house_target_temp"]
k_div = ["hvac_cooling_capacity"]
if state_prop["thermal"]:
k_temp += ["OD_temp"]
k_div += [
"house_Ua",
"house_Cm",
"house_Ca",
"house_Hm",
]
if state_prop["hvac"]:
k_div += [
"hvac_COP",
"hvac_latent_cooling_fraction",
]
# k_lockdown = ['hvac_seconds_since_off', 'hvac_lockout_duration']
for k in k_temp:
# Assuming the temperatures will be between 15 to 30, centered around 20 -> between -1 and 2, centered around 0.
result[k] = (sDict[k] - 20) / 5
result["house_deadband"] = sDict["house_deadband"]
if state_prop["day"]:
day = sDict["datetime"].timetuple().tm_yday
result["sin_day"] = np.sin(day * 2 * np.pi / 365)
result["cos_day"] = np.cos(day * 2 * np.pi / 365)
if state_prop["hour"]:
hour = sDict["datetime"].hour
result["sin_hr"] = np.sin(hour * 2 * np.pi / 24)
result["cos_hr"] = np.cos(hour * 2 * np.pi / 24)
if state_prop["solar_gain"]:
result["house_solar_gain"] = sDict["house_solar_gain"] / 1000
for k in k_div:
k1 = "_".join(k.split("_")[1:])
if k1 in list(default_house_prop.keys()):
result[k] = sDict[k] / default_house_prop[k1]
elif k1 in list(default_hvac_prop.keys()):
result[k] = sDict[k] / default_hvac_prop[k1]
else:
print(k)
raise Exception("Error Key Matching.")
result["hvac_turned_on"] = 1 if sDict["hvac_turned_on"] else 0
result["hvac_lockout"] = 1 if sDict["hvac_lockout"] else 0
result["hvac_seconds_since_off"] = (
sDict["hvac_seconds_since_off"] / sDict["hvac_lockout_duration"]
)
result["hvac_lockout_duration"] = (
sDict["hvac_lockout_duration"] / sDict["hvac_lockout_duration"]
)
result["reg_signal"] = sDict["reg_signal"] / (
default_env_prop["reward_prop"]["norm_reg_sig"]
* default_env_prop["cluster_prop"]["nb_agents"]
)
result["cluster_hvac_power"] = sDict["cluster_hvac_power"] / (
default_env_prop["reward_prop"]["norm_reg_sig"]
* default_env_prop["cluster_prop"]["nb_agents"]
)
temp_messages = []
for message in sDict["message"]:
r_message = {}
r_message["current_temp_diff_to_target"] = (
message["current_temp_diff_to_target"] / 5
) # Already a difference, only need to normalize like k_temps
r_message["hvac_seconds_since_off"] = (
message["hvac_seconds_since_off"] / sDict["hvac_lockout_duration"]
)
r_message["hvac_curr_consumption"] = (
message["hvac_curr_consumption"]
/ default_env_prop["reward_prop"]["norm_reg_sig"]
)
r_message["hvac_max_consumption"] = (
message["hvac_max_consumption"]
/ default_env_prop["reward_prop"]["norm_reg_sig"]
)
if config_dict["default_env_prop"]["message_properties"]["thermal"]:
r_message["house_Ua"] = message["house_Ua"] / default_house_prop["Ua"]
r_message["house_Cm"] = message["house_Cm"] / default_house_prop["Cm"]
r_message["house_Ca"] = message["house_Ca"] / default_house_prop["Ca"]
r_message["house_Hm"] = message["house_Hm"] / default_house_prop["Hm"]
if config_dict["default_env_prop"]["message_properties"]["hvac"]:
r_message["hvac_COP"] = message["hvac_COP"] / default_hvac_prop["COP"]
r_message["hvac_latent_cooling_fraction"] = message["hvac_latent_cooling_fraction"] / default_hvac_prop["latent_cooling_fraction"]
r_message["hvac_cooling_capacity"] = message["hvac_cooling_capacity"] / default_hvac_prop["cooling_capacity"]
temp_messages.append(r_message)
if returnDict:
result["message"] = temp_messages
else: # Flatten the dictionary in a single np_array
flat_messages = []
for message in temp_messages:
flat_message = list(message.values())
flat_messages = flat_messages + flat_message
result = np.array(list(result.values()) + flat_messages)
return result
#%% Testing
def test_dqn_agent(agent, env, config_dict, opt, tr_time_steps):
"""
Test dqn agent on an episode of nb_test_timesteps
"""
env = deepcopy(env)
cumul_avg_reward = 0
cumul_temp_error = 0
cumul_signal_error = 0
nb_time_steps_test = config_dict["training_prop"]["nb_time_steps_test"]
obs_dict = env.reset()
with torch.no_grad():
for t in range(nb_time_steps_test):
action = {
k: agent.select_action(normStateDict(obs_dict[k], config_dict))
for k in obs_dict.keys()
}
obs_dict, rewards_dict, dones_dict, info_dict = env.step(action)
for i in range(env.nb_agents):
cumul_avg_reward += rewards_dict[i] / env.nb_agents
cumul_temp_error += (
np.abs(obs_dict[i]["house_temp"] - obs_dict[i]["house_target_temp"])
/ env.nb_agents
)
cumul_signal_error += np.abs(
obs_dict[i]["reg_signal"] - obs_dict[i]["cluster_hvac_power"]
) / (env.nb_agents**2)
mean_avg_return = cumul_avg_reward / nb_time_steps_test
mean_temp_error = cumul_temp_error / nb_time_steps_test
mean_signal_error = cumul_signal_error / nb_time_steps_test
return {
"Mean test return": mean_avg_return,
"Test mean temperature error": mean_temp_error,
"Test mean signal error": mean_signal_error,
"Training steps": tr_time_steps,
}
def test_ppo_agent(agent, env, config_dict, opt, tr_time_steps):
"""
Test ppo agent on an episode of nb_test_timesteps, with
"""
env = deepcopy(env)
cumul_avg_reward = 0
cumul_temp_error = 0
cumul_signal_error = 0
obs_dict = env.reset()
nb_time_steps_test = config_dict["training_prop"]["nb_time_steps_test"]
with torch.no_grad():
for t in range(nb_time_steps_test):
action_and_prob = {
k: agent.select_action(normStateDict(obs_dict[k], config_dict))
for k in obs_dict.keys()
}
action = {k: action_and_prob[k][0] for k in obs_dict.keys()}
obs_dict, rewards_dict, dones_dict, info_dict = env.step(action)
for i in range(env.nb_agents):
cumul_avg_reward += rewards_dict[i] / env.nb_agents
cumul_temp_error += (
np.abs(obs_dict[i]["house_temp"] - obs_dict[i]["house_target_temp"])
/ env.nb_agents
)
cumul_signal_error += np.abs(
obs_dict[i]["reg_signal"] - obs_dict[i]["cluster_hvac_power"]
) / (env.nb_agents**2)
mean_avg_return = cumul_avg_reward / nb_time_steps_test
mean_temp_error = cumul_temp_error / nb_time_steps_test
mean_signal_error = cumul_signal_error / nb_time_steps_test
return {
"Mean test return": mean_avg_return,
"Test mean temperature error": mean_temp_error,
"Test mean signal error": mean_signal_error,
"Training steps": tr_time_steps,
}
def test_tarmac_ppo_agent(agent, env, config_dict, opt, tr_time_steps):
"""
Test ppo agent on an episode of nb_test_timesteps, with
"""
env = deepcopy(env)
cumul_avg_reward = 0
cumul_temp_error = 0
cumul_signal_error = 0
obs_dict = env.reset()
nb_time_steps_test = config_dict["training_prop"]["nb_time_steps_test"]
with torch.no_grad():
for t in range(nb_time_steps_test):
obs_all = np.array([normStateDict(obs_dict[k], config_dict) for k in obs_dict.keys()])
actions_and_probs = agent.select_actions(obs_all)
action = {k: actions_and_probs[0][k] for k in obs_dict.keys()}
obs_dict, rewards_dict, dones_dict, info_dict = env.step(action)
for i in range(env.nb_agents):
cumul_avg_reward += rewards_dict[i] / env.nb_agents
cumul_temp_error += (
np.abs(obs_dict[i]["house_temp"] - obs_dict[i]["house_target_temp"])
/ env.nb_agents
)
cumul_signal_error += np.abs(
obs_dict[i]["reg_signal"] - obs_dict[i]["cluster_hvac_power"]
) / (env.nb_agents**2)
mean_avg_return = cumul_avg_reward / nb_time_steps_test
mean_temp_error = cumul_temp_error / nb_time_steps_test
mean_signal_error = cumul_signal_error / nb_time_steps_test
return {
"Mean test return": mean_avg_return,
"Test mean temperature error": mean_temp_error,
"Test mean signal error": mean_signal_error,
"Training steps": tr_time_steps,
}
def test_tarmac_agent(agent, env, config_dict, opt, tr_time_steps, init_states, init_comms, init_masks):
"Test tarmac agent on an episode of nb_test_timesteps"
env = deepcopy(env)
cumul_avg_reward = 0
cumul_temp_error = 0
cumul_signal_error = 0
cumul_temp_offset = 0
obs_dict = env.reset()
nb_time_steps_test = config_dict["training_prop"]["nb_time_steps_test"]
obs_shape = normStateDict(obs_dict[0], config_dict).shape #(obs_size,)
obs_torch = obs_dict2obs_torch(obs_shape, obs_dict, config_dict) # [1, nb agents, obs_size]
_, actions, _, states, communications, _ = agent.act( # Action is a tensor of shape [1, nb_agents, 1], value is a tensor of shape [1, 1], actions_log_prob is a tensor of shape [1, nb_agents, 1],
obs_torch, init_states, # communication is a tensor of shape [1, nb_agents, COMMUNICATION_SIZE], states is a tensor of shape [1, nb_agents, STATE_SIZE],
init_comms, init_masks,
)
actions_dict = actionsAC2actions_dict(actions) # [1, nb_agents, 1 (action_size)]
obs_dict, _, _, _ = env.step(actions_dict)
obs = obs_dict2obs_torch(obs_shape, obs_dict, config_dict) # [1, nb_agents, obs_size]
with torch.no_grad():