forked from allegroai/clearml-agent
-
Notifications
You must be signed in to change notification settings - Fork 0
/
clearml.conf
327 lines (269 loc) · 12.7 KB
/
clearml.conf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
# CLEARML-AGENT configuration file
api {
api_server: https://demoapi.demo.clear.ml
web_server: https://demoapp.demo.clear.ml
files_server: https://demofiles.demo.clear.ml
# Credentials are generated in the webapp, https://demoapp.demo.clear.ml/profile
# Overridden with os environment: CLEARML_API_ACCESS_KEY / CLEARML_API_SECRET_KEY
credentials {"access_key": "EGRTCO8JMSIGI6S39GTP43NFWXDQOW", "secret_key": "x!XTov_G-#vspE*Y(h$Anm&DIc5Ou-F)jsl$PdOyj5wG1&E!Z8"}
# verify host ssl certificate, set to False only if you have a very good reason
verify_certificate: True
}
agent {
# Set GIT user/pass credentials (if user/pass are set, GIT protocol will be set to https)
# leave blank for GIT SSH credentials (set force_git_ssh_protocol=true to force SSH protocol)
git_user=""
git_pass=""
# Limit credentials to a single domain, for example: github.com,
# all other domains will use public access (no user/pass). Default: always send user/pass for any VCS domain
git_host=""
# Force GIT protocol to use SSH regardless of the git url (Assumes GIT user/pass are blank)
force_git_ssh_protocol: false
# Force a specific SSH port when converting http to ssh links (the domain is kept the same)
# force_git_ssh_port: 0
# Force a specific SSH username when converting http to ssh links (the default username is 'git')
# force_git_ssh_user: git
# unique name of this worker, if None, created based on hostname:process_id
# Overridden with os environment: CLEARML_WORKER_NAME
# worker_id: "clearml-agent-machine1:gpu0"
worker_id: ""
# worker name, replaces the hostname when creating a unique name for this worker
# Overridden with os environment: CLEARML_WORKER_ID
# worker_name: "clearml-agent-machine1"
worker_name: ""
# Set the python version to use when creating the virtual environment and launching the experiment
# Example values: "/usr/bin/python3" or "/usr/local/bin/python3.6"
# The default is the python executing the clearml_agent
python_binary: ""
# ignore any requested python version (Default: False, if a Task was using a
# specific python version and the system supports multiple python the agent will use the requested python version)
# ignore_requested_python_version: true
# select python package manager:
# currently supported pip and conda
# poetry is used if pip selected and repository contains poetry.lock file
package_manager: {
# supported options: pip, conda, poetry
type: pip,
# specify pip version to use (examples "<20", "==19.3.1", "", empty string will install the latest version)
# pip_version: "<20"
# virtual environment inheres packages from system
system_site_packages: false,
# install with --upgrade
force_upgrade: false,
# additional artifact repositories to use when installing python packages
# extra_index_url: ["https://allegroai.jfrog.io/clearml/api/pypi/public/simple"]
extra_index_url: []
# additional conda channels to use when installing with conda package manager
conda_channels: ["pytorch", "conda-forge", "defaults", ]
# conda_full_env_update: false
# conda_env_as_base_docker: false
# set the priority packages to be installed before the rest of the required packages
# priority_packages: ["cython", "numpy", "setuptools", ]
# set the optional priority packages to be installed before the rest of the required packages,
# In case a package installation fails, the package will be ignored,
# and the virtual environment process will continue
# priority_optional_packages: ["pygobject", ]
# set the post packages to be installed after all the rest of the required packages
# post_packages: ["horovod", ]
# set the optional post packages to be installed after all the rest of the required packages,
# In case a package installation fails, the package will be ignored,
# and the virtual environment process will continue
# post_optional_packages: []
# set to True to support torch nightly build installation,
# notice: torch nightly builds are ephemeral and are deleted from time to time
torch_nightly: false,
},
# target folder for virtual environments builds, created when executing experiment
venvs_dir = ~/.clearml/venvs-builds
# cached virtual environment folder
venvs_cache: {
# maximum number of cached venvs
max_entries: 10
# minimum required free space to allow for cache entry, disable by passing 0 or negative value
free_space_threshold_gb: 2.0
# unmark to enable virtual environment caching
# path: ~/.clearml/venvs-cache
},
# cached git clone folder
vcs_cache: {
enabled: true,
path: ~/.clearml/vcs-cache
},
# DEPRECATED: please use `venvs_cache` and set `venvs_cache.path`
# use venv-update in order to accelerate python virtual environment building
# Still in beta, turned off by default
# venv_update: {
# enabled: false,
# },
# cached folder for specific python package download (mostly pytorch versions)
pip_download_cache {
enabled: true,
path: ~/.clearml/pip-download-cache
},
translate_ssh: true,
# reload configuration file every daemon execution
reload_config: false,
# pip cache folder mapped into docker, used for python package caching
docker_pip_cache = ~/.clearml/pip-cache
# apt cache folder mapped into docker, used for ubuntu package caching
docker_apt_cache = ~/.clearml/apt-cache
# optional arguments to pass to docker image
# these are local for this agent and will not be updated in the experiment's docker_cmd section
# extra_docker_arguments: ["--ipc=host", "-v", "/mnt/host/data:/mnt/data"]
# optional shell script to run in docker when started before the experiment is started
# extra_docker_shell_script: ["apt-get install -y bindfs", ]
# Install the required packages for opencv libraries (libsm6 libxext6 libxrender-dev libglib2.0-0),
# for backwards compatibility reasons, true as default,
# change to false to skip installation and decrease docker spin up time
# docker_install_opencv_libs: true
# set to true in order to force "docker pull" before running an experiment using a docker image.
# This makes sure the docker image is updated.
docker_force_pull: false
default_docker: {
# default docker image to use when running in docker mode
image: "nvidia/cuda:10.1-cudnn7-runtime-ubuntu18.04"
# optional arguments to pass to docker image
# arguments: ["--ipc=host"]
}
# set the OS environments based on the Task's Environment section before launching the Task process.
enable_task_env: false
# CUDA versions used for Conda setup & solving PyTorch wheel packages
# it Should be detected automatically. Override with os environment CUDA_VERSION / CUDNN_VERSION
# cuda_version: 10.1
# cudnn_version: 7.6
}
sdk {
# CLEARML - default SDK configuration
storage {
cache {
# Defaults to system temp folder / cache
default_base_dir: "~/.clearml/cache"
}
direct_access: [
# Objects matching are considered to be available for direct access, i.e. they will not be downloaded
# or cached, and any download request will return a direct reference.
# Objects are specified in glob format, available for url and content_type.
{ url: "file://*" } # file-urls are always directly referenced
]
}
metrics {
# History size for debug files per metric/variant. For each metric/variant combination with an attached file
# (e.g. debug image event), file names for the uploaded files will be recycled in such a way that no more than
# X files are stored in the upload destination for each metric/variant combination.
file_history_size: 100
# Max history size for matplotlib imshow files per plot title.
# File names for the uploaded images will be recycled in such a way that no more than
# X images are stored in the upload destination for each matplotlib plot title.
matplotlib_untitled_history_size: 100
# Limit the number of digits after the dot in plot reporting (reducing plot report size)
# plot_max_num_digits: 5
# Settings for generated debug images
images {
format: JPEG
quality: 87
subsampling: 0
}
# Support plot-per-graph fully matching Tensorboard behavior (i.e. if this is set to True, each series should have its own graph)
tensorboard_single_series_per_graph: False
}
network {
metrics {
# Number of threads allocated to uploading files (typically debug images) when transmitting metrics for
# a specific iteration
file_upload_threads: 4
# Warn about upload starvation if no uploads were made in specified period while file-bearing events keep
# being sent for upload
file_upload_starvation_warning_sec: 120
}
iteration {
# Max number of retries when getting frames if the server returned an error (http code 500)
max_retries_on_server_error: 5
# Backoff factory for consecutive retry attempts.
# SDK will wait for {backoff factor} * (2 ^ ({number of total retries} - 1)) between retries.
retry_backoff_factor_sec: 10
}
}
aws {
s3 {
# S3 credentials, used for read/write access by various SDK elements
# default, used for any bucket not specified below
key: ""
secret: ""
region: ""
credentials: [
# specifies key/secret credentials to use when handling s3 urls (read or write)
# {
# bucket: "my-bucket-name"
# key: "my-access-key"
# secret: "my-secret-key"
# },
# {
# # This will apply to all buckets in this host (unless key/value is specifically provided for a given bucket)
# host: "my-minio-host:9000"
# key: "12345678"
# secret: "12345678"
# multipart: false
# secure: false
# }
]
}
boto3 {
pool_connections: 512
max_multipart_concurrency: 16
}
}
google.storage {
# # Default project and credentials file
# # Will be used when no bucket configuration is found
# project: "clearml"
# credentials_json: "/path/to/credentials.json"
# # Specific credentials per bucket and sub directory
# credentials = [
# {
# bucket: "my-bucket"
# subdir: "path/in/bucket" # Not required
# project: "clearml"
# credentials_json: "/path/to/credentials.json"
# },
# ]
}
azure.storage {
# containers: [
# {
# account_name: "clearml"
# account_key: "secret"
# # container_name:
# }
# ]
}
log {
# debugging feature: set this to true to make null log propagate messages to root logger (so they appear in stdout)
null_log_propagate: False
task_log_buffer_capacity: 66
# disable urllib info and lower levels
disable_urllib3_info: True
}
development {
# Development-mode options
# dev task reuse window
task_reuse_time_window_in_hours: 72.0
# Run VCS repository detection asynchronously
vcs_repo_detect_async: True
# Store uncommitted git/hg source code diff in experiment manifest when training in development mode
# This stores "git diff" or "hg diff" into the experiment's "script.requirements.diff" section
store_uncommitted_code_diff_on_train: True
# Support stopping an experiment in case it was externally stopped, status was changed or task was reset
support_stopping: True
# Default Task output_uri. if output_uri is not provided to Task.init, default_output_uri will be used instead.
default_output_uri: ""
# Development mode worker
worker {
# Status report period in seconds
report_period_sec: 2
# ping to the server - check connectivity
ping_period_sec: 30
# Log all stdout & stderr
log_stdout: True
}
}
}