forked from Kirkados/AIAA_GNC_2021
-
Notifications
You must be signed in to change notification settings - Fork 0
/
settings.py
151 lines (128 loc) · 7.94 KB
/
settings.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
"""
All settings relating to D4PG are contained in this file.
This file is copied on each run and placed in the Tensorboard directory
so that all settings are preserved for future reference.
Notes:
"""
import numpy as np
class Settings:
#%%
########################
##### Run Settings #####
########################
RUN_NAME = 'hello_world' # use just the name. If trying to restore from file, use name along with timestamp
ENVIRONMENT = 'quad1_accel' # 'quad1' for Task 1 velocity; 'quad1_accel' for Task 1 accel; 'quad1_runway' for Task 2 accel
AGENT = '' # '' for Task 1, '_runway' for runway experiment
RECORD_VIDEO = True
VIDEO_RECORD_FREQUENCY = 20 # 20 Multiples of "CHECK_GREEDY_PERFORMANCE_EVERY_NUM_EPISODES"
NOISELESS_AT_TEST_TIME = True # Whether or not to test without action noise (Keep at True unless debugging)
LEARN_FROM_PIXELS = False # False = learn from state (fully observed); True = learn from pixels (partially observed)
RESUME_TRAINING = False # If True, be sure to set "RUN_NAME" to the previous run's filename
USE_GPU_WHEN_AVAILABLE = True # As of Nov 19, 2018, it appears better to use CPU. Re-evaluate again later
RANDOM_SEED = 13
#%%
#############################
##### Training Settings #####
#############################
# Hyperparameters
NUMBER_OF_ACTORS = 10
NUMBER_OF_EPISODES = 8e4 # that each agent will perform
MAX_TRAINING_ITERATIONS = 1e8 # of neural networks
ACTOR_LEARNING_RATE = 0.0001
CRITIC_LEARNING_RATE = 0.0001
TARGET_NETWORK_TAU = 0.001
NUMBER_OF_BINS = 51 # Also known as the number of atoms
L2_REGULARIZATION = False # optional for training the critic
L2_REG_PARAMETER = 1e-6
# Periodic events
UPDATE_TARGET_NETWORKS_EVERY_NUM_ITERATIONS = 1
UPDATE_ACTORS_EVERY_NUM_EPISODES = 1
CHECK_GREEDY_PERFORMANCE_EVERY_NUM_EPISODES = 5
LOG_TRAINING_PERFORMANCE_EVERY_NUM_ITERATIONS = 100
DISPLAY_TRAINING_PERFORMANCE_EVERY_NUM_ITERATIONS = 50000
DISPLAY_ACTOR_PERFORMANCE_EVERY_NUM_EPISODES = 2500
# Buffer settings
PRIORITY_REPLAY_BUFFER = False
PRIORITY_ALPHA = 0.6 # Controls the randomness vs prioritisation of the prioritised sampling (0.0 = Uniform sampling, 1.0 = Greedy prioritisation)
PRIORITY_BETA_START = 0.4 # Starting value of beta - controls to what degree IS weights influence the gradient updates to correct for the bias introduced by priority sampling (0 - no correction, 1 - full correction)
PRIORITY_BETA_END = 1.0 # Beta will be linearly annealed from its start value to this value throughout training
PRIORITY_EPSILON = 0.00001 # Small value to be added to updated priorities to ensure no sample has a probability of 0 of being chosen
DUMP_PRIORITY_REPLAY_BUFFER_EVER_NUM_ITERATIONS = 200 # Check if the prioritized replay buffer is overfulled every ## iterations. If so, dump the excess data
REPLAY_BUFFER_SIZE = 1000000
REPLAY_BUFFER_START_TRAINING_FULLNESS = 0 # how full the buffer should be before training begins
MINI_BATCH_SIZE = 256
# Exploration noise
UNIFORM_OR_GAUSSIAN_NOISE = False # True -> Uniform; False -> Gaussian
if UNIFORM_OR_GAUSSIAN_NOISE:
NOISE_SCALE = 1 # 1 is best for uniform -> noise scaled to the action range
else:
NOISE_SCALE = 1/3 # standard deviation = 1/3 the action range. Therefore a 3-sigma action will cause full exploration in the worst case scenario
NOISE_SCALE_DECAY =0.999986137152479 # 0.999986137152479 for 50k half-life # 0.9999 for 7k half-life # 1 means the noise does not decay during training
#%%
####################################
##### Model Structure Settings #####
####################################
# Whether or not to learn from pixels (defined above)
if LEARN_FROM_PIXELS:
# Define the properties of the convolutional layer in a list. Each dict in the list is one layer
# 'filters' gives the number of filters to be used
# 'kernel_size' gives the dimensions of the filter
# 'strides' gives the number of pixels that the filter skips while colvolving
CONVOLUTIONAL_LAYERS = [{'filters': 32, 'kernel_size': [8, 8], 'strides': [4, 4]},
{'filters': 64, 'kernel_size': [4, 4], 'strides': [2, 2]},
{'filters': 64, 'kernel_size': [3, 3], 'strides': [1, 1]}]
# Fully connected layers follow the (optional) convolutional layers
ACTOR_HIDDEN_LAYERS = [400, 300] # number of hidden neurons in each layer
CRITIC_HIDDEN_LAYERS = [400, 300] # number of hidden neurons in each layer
#%%
#########################
##### Save Settings #####
#########################
MODEL_SAVE_DIRECTORY = 'Tensorboard/Current/' # where to save all data
TENSORBOARD_FILE_EXTENSION = '.tensorboard' # file extension for tensorboard file
SAVE_CHECKPOINT_EVERY_NUM_ITERATIONS = 10000 # how often to save the neural network parameters
NUM_CHECKPOINT_MODELS_TO_SAVE = 5 # How many of the most recent policy models to keep before discarding
#%%
##############################
#### Environment Settings ####
##############################
environment_file = __import__('environment_' + ENVIRONMENT)
if ENVIRONMENT == 'gym':
env = environment_file.Environment('Temporary environment', 0, CHECK_GREEDY_PERFORMANCE_EVERY_NUM_EPISODES, VIDEO_RECORD_FREQUENCY, MODEL_SAVE_DIRECTORY) # Additional parameters needed for gym
else:
env = environment_file.Environment()
OBSERVATION_SIZE = env.OBSERVATION_SIZE + env.AUGMENT_STATE_WITH_ACTION_LENGTH*env.ACTION_SIZE # augmenting the state with past actions and states
UPPER_STATE_BOUND = env.UPPER_STATE_BOUND
LOWER_STATE_BOUND = env.LOWER_STATE_BOUND
ACTION_SIZE = env.ACTION_SIZE
LOWER_ACTION_BOUND = env.LOWER_ACTION_BOUND
UPPER_ACTION_BOUND = env.UPPER_ACTION_BOUND
NORMALIZE_STATE = env.NORMALIZE_STATE # Normalize state on each timestep to avoid vanishing gradients
MIN_V = env.MIN_V
MAX_V = env.MAX_V
DISCOUNT_FACTOR = env.DISCOUNT_FACTOR
N_STEP_RETURN = env.N_STEP_RETURN
TIMESTEP = env.TIMESTEP
MAX_NUMBER_OF_TIMESTEPS = env.MAX_NUMBER_OF_TIMESTEPS # per episode
NUMBER_OF_QUADS = env.NUMBER_OF_QUADS
irrelevant_states = []
for i in range(NUMBER_OF_QUADS):
for irrelevant_state in env.IRRELEVANT_STATES:
irrelevant_states.append(i * 6 + irrelevant_state)
IRRELEVANT_STATES = np.asarray(irrelevant_states)
TEST_ON_DYNAMICS = env.TEST_ON_DYNAMICS
KINEMATIC_NOISE = env.KINEMATIC_NOISE
TOTAL_STATE_SIZE = env.TOTAL_STATE_SIZE
AUGMENT_STATE_WITH_ACTION_LENGTH = env.AUGMENT_STATE_WITH_ACTION_LENGTH
VELOCITY_LIMIT = env.VELOCITY_LIMIT
RUNWAY_LENGTH_ELEMENTS = env.RUNWAY_LENGTH_ELEMENTS
RUNWAY_WIDTH_ELEMENTS = env.RUNWAY_WIDTH_ELEMENTS
RUNWAY_LENGTH = env.RUNWAY_LENGTH
RUNWAY_WIDTH = env.RUNWAY_WIDTH
MINIMUM_CAMERA_ALTITUDE = env.MINIMUM_CAMERA_ALTITUDE
MAXIMUM_CAMERA_ALTITUDE = env.MAXIMUM_CAMERA_ALTITUDE
# Delete the test environment
del env
ACTION_RANGE = UPPER_ACTION_BOUND - LOWER_ACTION_BOUND # range for each action
STATE_MEAN = (LOWER_STATE_BOUND + UPPER_STATE_BOUND)/2.
STATE_HALF_RANGE = (UPPER_STATE_BOUND - LOWER_STATE_BOUND)/2.