forked from ocaml/merlin
-
Notifications
You must be signed in to change notification settings - Fork 0
504 lines (444 loc) · 20.1 KB
/
fuzzy-ci.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
name: Fuzzy CI
on:
pull_request:
branches: [master]
types: [opened, synchronize, reopened, unlabeled, labeled]
paths-ignore:
- "**.md"
- "**.txt"
- ".git*"
- "doc/**"
- "emacs/**"
- "vim/**"
- "**/emacs-lint.yml"
- "bench/**"
- "upstream/**"
- "tests/**"
env:
# Artifact names need to be consistent across jobs:
BASE_BRANCH_ARTIFACT_NAME: base-branch-data-${{ github.event.pull_request.base.sha }}-pr${{ github.event.pull_request.number }}
MERGE_BRANCH_ARTIFACT_NAME: merge-branch-data-${{ github.event.pull_request.base.sha }}-${{ github.event.pull_request.head.sha }}-pr${{ github.event.pull_request.number }}
DIFF_ARTIFACT_NAME: diff-${{ github.event.pull_request.base.sha }}-${{ github.event.pull_request.head.sha }}
# File names also need to be consistant across jobs:
FULL_DIFF_FILE: full_responses.diff
DISTILLED_DIFF_FILE: distilled_data.diff
# Note: FULL_DATA_FILE and DISTILLED_DATA_FILE need to be the file names of the files generated by `merl-an behavior`
FULL_DATA_FILE: full_responses.json
DISTILLED_DATA_FILE: distilled_data.json
# GitHub API related short-hands:
GH_API_COMMENTS: ${{ github.event.pull_request.comments_url }}
GH_API_LABELS: ${{ github.event.pull_request.issue_url }}/labels
GH_API_ARTIFACTS: ${{ github.event.pull_request.base.repo.url }}/actions/artifacts
# URL short-hands
ACTIONS_RUNS_ENDPOINT: ${{ github.event.repository.html_url }}/actions/runs
CURRENT_ACTION_URL: ${{ github.event.repository.html_url }}/actions/runs/${{ github.run_id }}
# Irmin version and merl-an version need to be consistent for reproducibility (Irmin is used as the test code base to test `ocamlmerlin` on)
IRMIN_VERSION: 3.9.0
# TODO: Release merl-an and install a certain version instead of pinning it to a certain commit
MERL_AN_SHA: 6411f0d3847e8b7e66362bcb1f9345a5d3e851ca
# The compiler version used on the respective branches. It also needs to form part of Irmin's build cache key.
# Bump either of these whenever the compiler version is bumped on either of the two branches.
merge_branch_COMPILER_VERSION: ocaml-base-compiler.4.14.1
base_branch_COMPILER_VERSION: ocaml-base-compiler.4.14.1
jobs:
data:
name: Generate data
runs-on: ubuntu-22.04
if: >
github.event.action == 'opened' ||
github.event.action == 'synchronize' ||
github.event.action == 'reopened' ||
(
github.event.action == 'unlabeled' &&
github.event.label.name == 'fuzzy-diff-looks-good'
)
env:
data_dir: data
strategy:
matrix:
commit: ["merge_branch", "base_branch"]
steps:
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Checking out ${{ matrix.commit }}
env:
base_branch_sha: ${{ github.event.pull_request.base.sha }}
merge_branch_sha: ${{ github.sha }}
run: |
sha=$${{ matrix.commit }}_sha
echo "Check out $sha"
git checkout $sha
- name: Get desired compiler version
id: compiler
run: |
v=$${{ matrix.commit }}_COMPILER_VERSION
echo "version=$v" | tee -a $GITHUB_OUTPUT
- name: Install OCaml
uses: ocaml/setup-ocaml@v3
with:
ocaml-compiler: ${{ steps.compiler.outputs.version }}
dune-cache: true
- name: Install merlin dependencies
run: |
opam pin menhirLib 20201216 --no-action
opam install . --deps-only --yes
- name: Install merlin
run: |
# Running `subst` to have the current commit in the data produced by `merl-an`
opam exec -- dune subst
opam exec -- dune build -p merlin-lib,dot-merlin-reader,merlin
opam exec -- dune install -p merlin-lib,dot-merlin-reader,merlin
- name: Pull irmin and its deps from cache if possible
uses: actions/cache@v4
id: irmin-cache
with:
path: irmin/
key: os${{ runner.os }}+arch${{ runner.arch }}+${{ hashFiles('fuzzy-ci-helpers/irmin.3.9.0.opam.locked') }}+${{ env.IRMIN_VERSION }}+${{ steps.compiler.outputs.version }}
- name: Download Irmin tarball
if: steps.irmin-cache.outputs.cache-hit != 'true'
run: |
wget https://github.com/mirage/irmin/releases/download/$IRMIN_VERSION/irmin-$IRMIN_VERSION.tbz
- name: Create irmin dir
if: steps.irmin-cache.outputs.cache-hit != 'true'
run: mkdir -p irmin
- name: Decompress Irmin tarball
if: steps.irmin-cache.outputs.cache-hit != 'true'
run: tar xvf irmin-$IRMIN_VERSION.tbz -C irmin --strip-components=1
- name: Get Irmin's lock files
if: steps.irmin-cache.outputs.cache-hit != 'true'
run: |
cp .github/fuzzy-ci-helpers/irmin.3.9.0.opam.locked irmin/irmin.opam.locked
- name: Install opam monorepo
if: steps.irmin-cache.outputs.cache-hit != 'true'
run: opam install opam-monorepo --yes
- name: Pull in Irmin's dependencies
if: steps.irmin-cache.outputs.cache-hit != 'true'
run: |
git checkout ${{ github.sha }}
opam monorepo pull --lockfile=irmin.opam.locked --yes
working-directory: irmin
- name: Prune Irmin
if: steps.irmin-cache.outputs.cache-hit != 'true'
run: |
rm -r examples/ bench/
find test/ -mindepth 1 -maxdepth 1 -type d -not -name 'irmin-pack' -exec rm -r {} \;
find src/ -mindepth 1 -maxdepth 1 -type d \
-not -name 'irmin-pack' \
-not -name 'irmin' \
-not -name 'irmin-tezos' \
-not -name ppx_irmin \
-not -name irmin_test \
-not -name irmin-test \
-exec rm -r {} \;
working-directory: irmin
- name: Build Irmin
run: |
opam exec -- dune build @check
working-directory: irmin
- name: Pull merl-an from cache if possible
uses: actions/cache@v4
id: merl-an-cache
with:
path: /usr/local/bin/merl-an
key: os${{ runner.os }}+arch${{ runner.arch }}+merl-an-sha$MERL_AN_SHA
- name: Install merl-an
if: steps.merl-an-cache.outputs.cache-hit != 'true'
run: opam pin -y merl-an https://github.com/pitag-ha/merl-an.git#$MERL_AN_SHA
- name: Add merl-an to /usr/local/bin/
if: steps.merl-an-cache.outputs.cache-hit != 'true'
run: opam exec -- cp $GITHUB_WORKSPACE/_opam/bin/merl-an /usr/local/bin/merl-an
- name: Create data set of Merlin responses
run: |
# Note: The parameters with most influence on the execution time are
# `--sample-size`: Number of samples per file defined by `--project` (and per local query).
# `--project`: List of dirs/files to create samples on. In the case of a dirs, all ml(i) files recursively in the dir are used.
# `--queries`: The `ocamlmerlin` queries that are being run.
opam exec -- merl-an behavior \
--queries=type-enclosing,occurrences,locate,complete-prefix,errors \
--sample-size=30 \
--data=${{ env.data_dir }} \
--merlin=ocamlmerlin \
--project=irmin/src/irmin,irmin/src/irmin-pack,irmin/test/irmin-pack
- name: Remove varying components from data
run: |
# TODO: This could be done on the `merl-an` side
jq '.responses |= map(del(.heap_mbytes, .timings, .cache))' \
${{ env.data_dir }}/$FULL_DATA_FILE > temp.json && \
mv temp.json ${{ env.data_dir }}/$FULL_DATA_FILE
- name: Create name for data artifact
id: artifact_name
env:
base_branch_artifact_name: ${{ env.BASE_BRANCH_ARTIFACT_NAME }}
merge_branch_artifact_name: ${{ env. MERGE_BRANCH_ARTIFACT_NAME }}
run: echo "name=$${{ matrix.commit }}_artifact_name" >> $GITHUB_OUTPUT
- name: Upload data
uses: actions/upload-artifact@v4
with:
name: ${{ steps.artifact_name.outputs.name }}
path: ${{ env.data_dir }}
- name: Compile diff tool
if: ${{ matrix.commit == 'merge_branch' }}
run: |
# Taking advantage that ocamlopt is installed on this runner: compile the diff tool here and share it with the next job where it's needed.
# All GH runners are hosted on x86 machines and all jobs in this workflow declare the same OS, so this should workTM.
opam exec -- ocamlopt -o create_diff .github/fuzzy-ci-helpers/create_diff.ml
- name: Upload diff tool
if: ${{ matrix.commit == 'merge_branch' }}
uses: actions/upload-artifact@v4
with:
name: diff_tool
path: create_diff
diff:
name: Generate diffs
runs-on: ubuntu-22.04
outputs:
diff_exits: ${{steps.full_responses_diff.outputs.diff_exists}}
needs: data
env:
base_data_dir: base_data
merge_data_dir: merge_data
diff_dir: diff
steps:
- name: Download base branch data
uses: actions/download-artifact@v4
with:
name: ${{ env.BASE_BRANCH_ARTIFACT_NAME }}
path: ${{ env.base_data_dir }}
- name: Download merge branch data
uses: actions/download-artifact@v4
with:
name: ${{ env.MERGE_BRANCH_ARTIFACT_NAME }}
path: ${{ env.merge_data_dir }}
- name: Create diff dir
run: mkdir -p "$diff_dir"
- name: Download diff tool
uses: actions/download-artifact@v4
with:
name: diff_tool
- name: Give diff tool execute permissions
run: chmod +x create_diff
- name: Generate full responses diff
id: full_responses_diff
run: |
jq -r -n \
--slurpfile data1 "$base_data_dir/$FULL_DATA_FILE" \
--slurpfile data2 "$merge_data_dir/$FULL_DATA_FILE" \
'def process_json($branch; $data):
($branch + ": " + $data.cmd + " (id=" + ($data.sample_id | tostring) + ")"), $data;
range($data1|length) as $i |
process_json("base branch"; $data1[$i]),
"--input-separator--",
process_json("merge branch"; $data2[$i]),
"--diff-cmd-separator--"' \
| ./create_diff "--input-separator--" "--diff-cmd-separator--" "$diff_dir/$FULL_DIFF_FILE"
if [ -s "$diff_dir/$FULL_DIFF_FILE" ]; then
echo "diff_exists=true" | tee -a $GITHUB_OUTPUT
else
echo "diff_exists=false" | tee -a $GITHUB_OUTPUT
fi
- name: Generate distilled data diff
# If there's no full reponses diff, there also won't be a distilled data diff
if: ${{ steps.full_responses_diff.outputs.diff_exists == 'true' }}
run: |
jq -r -n \
--slurpfile data1 "$base_data_dir/$DISTILLED_DATA_FILE" \
--slurpfile data2 "$merge_data_dir/$DISTILLED_DATA_FILE" \
'def process_json($branch; $data):
($branch + ": " + $data.cmd + " (id=" + ($data.sample_id | tostring) + ")"), $data;
range($data1|length) as $i |
process_json("base branch"; $data1[$i]),
"--input-separator--",
process_json("merge branch"; $data2[$i]),
"--diff-cmd-separator--"' \
| ./create_diff "--input-separator--" "--diff-cmd-separator--" "$diff_dir/$DISTILLED_DIFF_FILE"
- name: Upload diff(s)
uses: actions/upload-artifact@v4
with:
name: ${{ env.DIFF_ARTIFACT_NAME }}
path: ${{ env.diff_dir }}
output:
name: Evaluate diffs
runs-on: ubuntu-22.04
needs: diff
env:
earlier_diff_was_approved: ${{ contains(github.event.pull_request.labels.*.name, 'fuzzy-diff-looks-good') }}
current_diff_exists: ${{ needs.diff.outputs.diff_exits }}
diff_dir: ${{ needs.artifact_names.outputs.diff_dir }}
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Download current diff(s)
if: ${{ env.current_diff_exists == 'true' }}
uses: actions/download-artifact@v4
with:
name: ${{ env.DIFF_ARTIFACT_NAME }}
- name: Retreive hash of approved diff
if: ${{ env.earlier_diff_was_approved == 'true' }}
id: approved_diff
run: |
msg_start=$(head -c 50 .github/fuzzy-ci-helpers/msg.txt)
next_page_endpoint="$GH_API_COMMENTS?per_page=100&page=1"
latest_comment="{}"
while [ -n "$next_page_endpoint" ]; do
latest_comment=$(
curl -s -D "headers.txt" -H "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" "$next_page_endpoint" |
jq --arg msg_start "$msg_start" --argjson latest "{}" '
map(
select(
(.body | startswith($msg_start)) and .user.login == "github-actions[bot]"
)
) + [$latest] | max_by(.created_at)'
)
next_page_endpoint=$(
rg '^link:' headers.txt |
tr ',' '\n' |
rg 'rel="next"' |
cut -d'<' -f2 |
cut -d'>' -f1
)
done
hash=$(echo "$latest_comment" | jq '.body' -r | grep '256-sha' | awk '{print $NF}')
echo "hash='$hash'" | tee -a $GITHUB_OUTPUT
- name: Analyze current diff
id: current_diff
run: |
hash=$(sha256sum "$FULL_DIFF_FILE" | awk '{print $1}')
echo "hash='$hash'" | tee -a $GITHUB_OUTPUT
- name: Write instruction to delete PR label
# When this workflow is triggered by a PR from a fork, it doesn't have
# the permissions to delete PR labels. Instead, we forward the
# instruction to delete the label to fuzzy-ci-privileged.yml.
if: ${{ env.earlier_diff_was_approved == 'true' && steps.approved_diff.outputs.hash != steps.current_diff.outputs.hash }}
run: |
echo ${{ steps.approved_diff.outputs.hash }}
echo ${{ steps.current_diff.outputs.hash }}
mkdir -p ./forward
jq -n \
--arg instruction "delete_label" \
--arg endpoint "$GH_API_LABELS" \
'{instruction: $instruction, endpoint: $endpoint}' > ./forward/instruction.json
- name: Upload instruction to delete label
if: ${{ env.earlier_diff_was_approved == 'true' && steps.approved_diff.outputs.hash != steps.current_diff.outputs.hash }}
uses: actions/upload-artifact@v4
with:
name: forwarded_instructions
path: forward/
- name: Return
id: return
env:
github_api_labels_url: ${{ github.event.pull_request.base.repo.url }}/issues/${{ github.event.pull_request.number }}/labels
run: |
print_head_of_diffs () {
echo "--------beginning of full responses diff head--------"
head -n 100 "$FULL_DIFF_FILE"
echo "--------end of full responses diff head--------"
echo "--------beginning of distilled data diff head--------"
head -n 100 "$DISTILLED_DIFF_FILE"
echo "--------end of distilled data diff head--------"
}
# FIXME (?): Are nested conditionals always so ugly in Bash, or is there a better way? Option types and the possibility to match would help a lot.
LABEL_NAME=$(cat .github/fuzzy-ci-helpers/label_name.txt)
if $earlier_diff_was_approved; then
echo "Earlier diff was approved."
if [ ${{ steps.current_diff.outputs.hash }} == ${{ steps.approved_diff.outputs.hash }} ]; then
echo "This diff has been approved earlier. Everything ok."
exit 0
else
print_head_of_diffs
printf "The diff has changed since it was approved. So I'm removing the $LABEL_NAME label. If the new diff looks good, please set the label again.\n\
There's a head of the new diffs printed above. The whole diffs can be downloaded from $CURRENT_ACTION_URL .\n\
Previous sha256: ${{ steps.approved_diff.outputs.hash }}\n\
Current sha256: ${{ steps.current_diff.outputs.hash }}"
echo "delete_label=true" >> $GITHUB_OUTPUT
exit 1
fi
else
if $current_diff_exists; then
print_head_of_diffs
printf "There's a head of the diffs printed above. The diffs can be downloaded from $CURRENT_ACTION_URL .\nIf it looks good, please set the $LABEL_NAME label on the PR."
exit 1
else
echo "No diff. All good."
exit 0
fi
fi
approve:
name: Approve diff
if: >
github.event_name == 'pull_request' &&
github.event.action == 'labeled' &&
github.event.label.name == 'fuzzy-diff-looks-good'
runs-on: ubuntu-22.04
steps:
- name: Retreive diff artifact meta-data
id: diff_metadata
run: |
all_artifacts=$(curl -sSL "$GH_API_ARTIFACTS")
diff_artifact=$(echo "$all_artifacts" | jq "first(.artifacts[] | select(.name == \"$DIFF_ARTIFACT_NAME\") )")
id=$(echo "$diff_artifact" | jq ".id")
echo "id=$id" | tee -a $GITHUB_OUTPUT
workflow_run=$(echo "$diff_artifact" | jq ".workflow_run | .id")
echo "workflow_run=$workflow_run" | tee -a $GITHUB_OUTPUT
if [ -z $id ]; then
echo "exists=false" | tee -a $GITHUB_OUTPUT
else
echo "exists=true" | tee -a $GITHUB_OUTPUT
fi
- name: Write instruction to delete PR label
# When this workflow is triggered by a PR from a fork, it doesn't have
# the permissions to delete PR labels. Instead, we forward the
# instruction to delete the label to fuzzy-ci-privileged.yml.
if: ${{ steps.diff_metadata.outputs.exists == 'false' }}
run: |
mkdir -p ./forward
jq -n \
--arg instruction "delete_label" \
--arg endpoint "$GH_API_LABELS" \
'{instruction: $instruction, endpoint: $endpoint}' > ./forward/instruction.json
- name: Upload instruction to delete label
if: ${{ steps.diff_metadata.outputs.exists == 'false' }}
uses: actions/upload-artifact@v4
with:
name: forwarded_instructions
path: forward/
- name: Fail due to diff not existing yet
if: ${{ steps.diff_metadata.outputs.exists == 'false' }}
run: |
printf "You seem to have tried to approve a diff that doesn't exist yet.\nWait for the diff to have been generated and then try again."
exit 1
- name: Download diff
env:
id: ${{ steps.diff_metadata.outputs.id }}
run: |
# Doing this manually, since actions/download-artifact only works on the same workflow run on which the artifact was uploaded
curl -sSLO -H "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" "$GH_API_ARTIFACTS/$id/zip" -D headers.txt
- name: Unzip downloaded diff
run: |
unzip zip || (echo "Download of diff artifact failed" && cat headers.txt && cat zip && exit 1)
- name: Compute full responses diff hash
id: diff_hash
run: |
hash=$(sha256sum "$FULL_DIFF_FILE" | awk '{print $1}')
echo "hash=$hash" | tee -a $GITHUB_OUTPUT
- name: Write instruction to comment on PR
# When this workflow is triggered by a PR from a fork, it doesn't have
# the permissions to comment on PRs. Instead, we forward the
# instruction to comment on the PR to fuzzy-ci-privileged.yml.
env:
approved_diffs_workflow_run: ${{ steps.diff_metadata.outputs.workflow_run }}
approved_diffs_hash: ${{ steps.diff_hash.outputs.hash }}
run: |
mkdir -p ./forward
jq -n \
--arg instruction "comment" \
--arg endpoint "$GH_API_COMMENTS" \
--arg artifacts_url "$ACTIONS_RUNS_ENDPOINT/$approved_diffs_workflow_run" \
--arg hash "$approved_diffs_hash" \
'{instruction: $instruction, endpoint: $endpoint, artifacts_url: $artifacts_url, hash: $hash}' > ./forward/instruction.json
- name: Upload instruction to comment on PR
uses: actions/upload-artifact@v4
with:
name: forwarded_instructions
path: forward/