forked from adidas/lakehouse-engine
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Makefile
330 lines (294 loc) · 13.7 KB
/
Makefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
SHELL := /bin/bash -euxo pipefail
container_cli := docker
image_name := lakehouse-engine
deploy_env := dev
project_version := $(shell cat cicd/.bumpversion.cfg | grep "current_version =" | cut -f 3 -d " ")
version := $(project_version)
# Gets system information in upper case
system_information := $(shell uname -mvp | tr a-z A-Z)
meta_conf_file := cicd/meta.yaml
meta_os_conf_file := cicd/meta_os.yaml
group_id := $(shell id -g ${USER})
engine_conf_file := lakehouse_engine/configs/engine.yaml
engine_os_conf_file := lakehouse_engine/configs/engine_os.yaml
remove_files_from_os := $(engine_conf_file) $(meta_conf_file) CODEOWNERS sonar-project.properties CONTRIBUTING.md CHANGELOG.md assets/img/os_strategy.png
last_commit_msg := "$(shell git log -1 --pretty=%B)"
git_tag := $(shell git describe --tags --abbrev=0)
commits_url := $(shell cat $(meta_conf_file) | grep commits_url | cut -f 2 -d " ")
ifneq ($(project_version), $(version))
wheel_version := $(project_version)+$(subst _,.,$(subst -,.,$(version)))
project_name := lakehouse-engine-experimental
else
wheel_version := $(version)
project_name := lakehouse-engine
endif
# Add \ to make reg safe comparisons (e.g. in the perl commands)
wheel_version_reg_safe := $(subst +,\+,$(subst .,\.,$(wheel_version)))
project_version_reg_safe := $(subst .,\.,$(project_version))
# Condition to define the Python image to be built based on the machine CPU architecture.
# The base Python image only changes if the identified CPU architecture is ARM.
ifneq (,$(findstring ARM,$(system_information)))
python_image := $(shell cat $(meta_conf_file) | grep arm_python_image | cut -f 2 -d " ")
cpu_architecture := arm64
else
python_image := $(shell cat $(meta_conf_file) | grep amd_python_image | cut -f 2 -d " ")
cpu_architecture := amd64
endif
# Condition to define the spark driver memory limit to be used in the tests
# In order to change this limit you can use the spark_driver_memory parameter
# Example: make test spark_driver_memory=3g
#
# WARNING: When the tests are being run 2 spark nodes are created, so despite
# the default value being 2g, your configured docker environment should have
# extra memory for communication and overhead.
ifndef $(spark_driver_memory)
spark_driver_memory := "2g"
endif
# A requirements_full.lock file is created based on all the requirements of the project (core, dq, os, azure, sftp and cicd).
# The requirements_full.lock file is then used as a constraints file to build the other lock files so that we ensure dependencies are consistent and compatible
# with each other, otherwise, the the installations would likely fail.
# Moreover, the requirement_full.lock file is also used in the dockerfile to install all project dependencies.
full_requirements := -o requirements_full.lock requirements.txt requirements_os.txt requirements_dq.txt requirements_azure.txt requirements_sftp.txt requirements_cicd.txt
requirements := -o requirements.lock requirements.txt -c requirements_full.lock
os_requirements := -o requirements_os.lock requirements_os.txt -c requirements_full.lock
dq_requirements = -o requirements_dq.lock requirements_dq.txt -c requirements_full.lock
azure_requirements = -o requirements_azure.lock requirements_azure.txt -c requirements_full.lock
sftp_requirements = -o requirements_sftp.lock requirements_sftp.txt -c requirements_full.lock
os_deployment := False
container_user_dir := /home/appuser
trust_git_host := ssh -oStrictHostKeyChecking=no -i $(container_user_dir)/.ssh/id_rsa [email protected]
ifeq ($(os_deployment), True)
build_src_dir := tmp_os/lakehouse-engine
else
build_src_dir := .
endif
build-image:
$(container_cli) build \
--build-arg USER_ID=$(shell id -u ${USER}) \
--build-arg GROUP_ID=$(group_id) \
--build-arg PYTHON_IMAGE=$(python_image) \
--build-arg CPU_ARCHITECTURE=$(cpu_architecture) \
-t $(image_name):$(version) . -f cicd/Dockerfile
build-image-windows:
$(container_cli) build \
--build-arg PYTHON_IMAGE=$(python_image) \
--build-arg CPU_ARCHITECTURE=$(cpu_architecture) \
-t $(image_name):$(version) . -f cicd/Dockerfile
# The build target is used to build the wheel package.
# It makes usage of some `perl` commands to change the project wheel version in the pyproject.toml file,
# whenever the goal is to release a package for testing, instead of an official release.
# Ex: if you run 'make build-image version=feature-x-1276, and the current project version is 1.20.0, the generated wheel will be: lakehouse_engine_experimental-1.20.0+feature.x.1276-py3-none-any,
# while for the official 1.20.0 release, the wheel will be: lakehouse_engine-1.20.0-py3-none-any.
build:
perl -pi -e 's/version = "$(project_version_reg_safe)"/version = "$(wheel_version)"/g' pyproject.toml && \
perl -pi -e 's/name = "lakehouse-engine"/name = "$(project_name)"/g' pyproject.toml && \
$(container_cli) run --rm \
-w /app \
-v "$$PWD":/app \
$(image_name):$(version) \
/bin/bash -c 'python -m build --wheel $(build_src_dir)' && \
perl -pi -e 's/version = "$(wheel_version_reg_safe)"/version = "$(project_version)"/g' pyproject.toml && \
perl -pi -e 's/name = "$(project_name)"/name = "lakehouse-engine"/g' pyproject.toml
deploy: build
$(container_cli) run --rm \
-w /app \
-v "$$PWD":/app \
-v $(artifactory_credentials_file):$(container_user_dir)/.pypirc \
$(image_name):$(version) \
/bin/bash -c 'twine upload -r artifactory dist/$(subst -,_,$(project_name))-$(wheel_version)-py3-none-any.whl --skip-existing'
docs:
$(container_cli) run --rm \
-w /app \
-v "$$PWD":/app \
$(image_name):$(version) \
/bin/bash -c 'cd $(build_src_dir) && python ./cicd/code_doc/render_doc.py'
# mypy incremental mode is used by default, so in case there is any cache related issue,
# you can modify the command to include --no-incremental flag or you can delete mypy_cache folder.
lint:
$(container_cli) run --rm \
-w /app \
-v "$$PWD":/app \
$(image_name):$(version) \
/bin/bash -c 'flake8 --docstring-convention google --config=cicd/flake8.conf lakehouse_engine tests cicd/code_doc/render_doc.py \
&& mypy --no-incremental lakehouse_engine tests'
# useful to print and use make variables. Usage: make print-variable var=variable_to_print.
print-variable:
@echo $($(var))
style:
$(container_cli) run --rm \
-w /app \
-v "$$PWD":/app \
$(image_name):$(version) \
/bin/bash -c '''isort lakehouse_engine tests cicd/code_doc/render_doc.py && \
black lakehouse_engine tests cicd/code_doc/render_doc.py'''
terminal:
$(container_cli) run \
-it \
--rm \
-w /app \
-v "$$PWD":/app \
$(image_name):$(version) \
/bin/bash
# Can use test only: ```make test test_only="tests/feature/test_delta_load_record_mode_cdc.py"```.
# You can also hack it by doing ```make test test_only="-rx tests/feature/test_delta_load_record_mode_cdc.py"```
# to show complete output even of passed tests.
# We also fix the coverage filepaths, using perl, so that report has the correct paths
test:
$(container_cli) run \
--rm \
-w /app \
-v "$$PWD":/app \
$(image_name):$(version) \
/bin/bash -c "pytest \
--junitxml=artefacts/tests.xml \
--cov-report xml --cov-report xml:artefacts/coverage.xml \
--cov-report term-missing --cov=lakehouse_engine \
--log-cli-level=INFO --color=yes -x -v \
--spark_driver_memory=$(spark_driver_memory) $(test_only)" && \
perl -pi -e 's/filename=\"/filename=\"lakehouse_engine\//g' artefacts/coverage.xml
test-security:
$(container_cli) run \
--rm \
-w /app \
-v "$$PWD":/app \
$(image_name):$(version) \
/bin/bash -c 'bandit -c cicd/bandit.yaml -r lakehouse_engine tests'
#####################################
##### Dependency Management Targets #####
#####################################
audit-dep-safety:
$(container_cli) run --rm \
-w /app \
-v "$$PWD":/app \
$(image_name):$(version) \
/bin/bash -c 'pip-audit -r cicd/requirements_full.lock --desc on -f json --fix --dry-run -o artefacts/safety_analysis.json'
# This target will build the lock files to be used for building the wheel and delivering it.
build-lock-files:
$(container_cli) run --rm \
-w /app \
-v "$$PWD":/app \
$(image_name):$(version) \
/bin/bash -c 'cd cicd && pip-compile --resolver=backtracking $(full_requirements) && \
pip-compile --resolver=backtracking $(requirements) && \
pip-compile --resolver=backtracking $(os_requirements) && \
pip-compile --resolver=backtracking $(dq_requirements) && \
pip-compile --resolver=backtracking $(azure_requirements) && \
pip-compile --resolver=backtracking $(sftp_requirements)'
# We test the dependencies to check if they need to be updated because requirements.txt files have changed.
# On top of that, we also test if we will be able to install the base and the extra packages together,
# as their lock files are built separately and therefore dependency constraints might be too restricted.
# If that happens, pip install will fail because it cannot solve the dependency resolution process, and therefore
# we need to pin those conflict dependencies in the requirements.txt files to a version that fits both the base and
# extra packages.
test-deps:
@GIT_STATUS="$$(git status --porcelain --ignore-submodules cicd/)"; \
if [ ! "x$$GIT_STATUS" = "x" ]; then \
echo "!!! Requirements lists has been updated but lock file was not rebuilt !!!"; \
echo "!!! Run make build-lock-files !!!"; \
echo -e "$${GIT_STATUS}"; \
git diff cicd/; \
exit 1; \
fi
$(container_cli) run --rm \
-w /app \
-v "$$PWD":/app \
$(image_name):$(version) \
/bin/bash -c 'pip install -e .[azure,dq,sftp,os] --dry-run --ignore-installed'
# This will update the transitive dependencies even if there were no changes in the requirements files.
# This should be a recurrent activity to make sure transitive dependencies are kept up to date.
upgrade-lock-files:
$(container_cli) run --rm \
-w /app \
-v "$$PWD":/app \
$(image_name):$(version) \
/bin/bash -c 'cd cicd && pip-compile --resolver=backtracking --upgrade $(full_requirements) && \
pip-compile --resolver=backtracking --upgrade $(requirements) && \
pip-compile --resolver=backtracking --upgrade $(os_requirements) && \
pip-compile --resolver=backtracking --upgrade $(dq_requirements) && \
pip-compile --resolver=backtracking --upgrade $(azure_requirements) && \
pip-compile --resolver=backtracking --upgrade $(sftp_requirements)'
#####################################
##### GitHub Deployment Targets #####
#####################################
prepare-github-repo:
$(container_cli) run --rm \
-w /app \
-v "$$PWD":/app \
-v $(git_credentials_file):$(container_user_dir)/.ssh/id_rsa \
$(image_name):$(version) \
/bin/bash -c """mkdir -p tmp_os/$(repository); \
cd tmp_os/$(repository); \
git init -b master; \
git config pull.rebase false; \
git config user.email '[email protected]'; \
git config user.name 'Lakehouse Engine'; \
$(trust_git_host); \
git remote add origin [email protected]:adidas/$(repository).git; \
git pull origin master --tags"""
sync-to-github: prepare-github-repo
$(container_cli) run --rm \
-w /app \
-v "$$PWD":/app \
-v $(git_credentials_file):$(container_user_dir)/.ssh/id_rsa \
$(image_name):$(version) \
/bin/bash -c """cd tmp_os/lakehouse-engine; \
rsync -r --exclude=.git --exclude=.*cache* --exclude=venv --exclude=dist --exclude=tmp_os /app/ . ; \
rm $(remove_files_from_os); \
mv $(engine_os_conf_file) $(engine_conf_file); \
mv $(meta_os_conf_file) $(meta_conf_file); \
mv CONTRIBUTING_OS.md CONTRIBUTING.md; \
$(trust_git_host); \
git add . ; \
git commit -m "'${last_commit_msg}'"; \
git tag -a $(git_tag) -m 'Release $(git_tag)' ; \
git push origin master --follow-tags;"""
deploy-docs-to-github: docs prepare-github-repo
$(container_cli) run --rm \
-w /app \
-v "$$PWD":/app \
-v $(git_credentials_file):$(container_user_dir)/.ssh/id_rsa \
$(image_name):$(version) \
/bin/bash -c """cp -r tmp_os/lakehouse-engine/artefacts/docs/* tmp_os/lakehouse-engine-docs/ ; \
cd tmp_os/lakehouse-engine-docs; \
$(trust_git_host); \
git add . ; \
git commit -m 'Lakehouse Engine $(version) documentation'; \
git push origin master ; \
cd .. && rm -rf tmp_os/lakehouse-engine-docs"""
deploy-to-pypi: build
$(container_cli) run --rm \
-w /app \
-v "$$PWD":/app \
-v $(pypi_credentials_file):$(container_user_dir)/.pypirc \
$(image_name):$(version) \
/bin/bash -c 'twine upload tmp_os/lakehouse-engine/dist/lakehouse_engine-$(project_version)-py3-none-any.whl --skip-existing'
deploy-to-pypi-and-clean: deploy-to-pypi
$(container_cli) run --rm \
-w /app \
-v "$$PWD":/app \
$(image_name):$(version) \
/bin/bash -c 'rm -rf tmp_os/lakehouse-engine'
###########################
##### Release Targets #####
###########################
create-changelog:
echo "# Changelog - $(shell date +"%Y-%m-%d") v$(shell cat cicd/.bumpversion.cfg | grep "current_version =" | cut -f 3 -d " ")" > CHANGELOG.md && \
echo "All notable changes to this project will be documented in this file automatically. This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html)." >> CHANGELOG.md && \
echo "" >> CHANGELOG.md && \
git log --no-decorate --pretty=format:"#### [%cs] [%(describe)]%n [%h]($(commits_url)%H) %s" -n 1000 >> CHANGELOG.md
bump-up-version:
$(container_cli) run --rm \
-w /app \
-v "$$PWD":/app \
$(image_name):$(version) \
/bin/bash -c 'bump2version --config-file cicd/.bumpversion.cfg $(increment)'
prepare-release: bump-up-version create-changelog
echo "Prepared version and changelog to release!"
commit-release:
git commit -a -m 'Create release $(version)' && \
git tag -a 'v$(version)' -m 'Release $(version)'
push-release:
git push --follow-tags
delete-tag:
git push --delete origin $(tag)
.PHONY: $(MAKECMDGOALS)