Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

test: autolabel A #17166

Closed
wants to merge 55 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
55 commits
Select commit Hold shift + click to select a range
6536d53
feat: add autolabelling of PRs
adomani Aug 18, 2024
64fdaad
fix test
adomani Aug 18, 2024
7a1df6e
Update Mathlib/Tactic/Autolabels/EnvExtension.lean
adomani Aug 30, 2024
2a9d807
Comments to address Jon's review
adomani Sep 5, 2024
a7f9140
Merge remote-tracking branch 'origin/master' into adomani/autolabels_…
adomani Sep 5, 2024
8869066
update deprecations, use simple extension, sort output
adomani Sep 5, 2024
e6872d2
remove cache get step
adomani Sep 11, 2024
c93053b
use tee
adomani Sep 15, 2024
f8fbcd3
remove clutter
adomani Sep 15, 2024
5b7e9c4
fix
adomani Sep 15, 2024
5db3a3b
reimplementation
joneugster Sep 23, 2024
935552e
add gh functionality
joneugster Sep 23, 2024
0ec6a5b
add env
joneugster Sep 23, 2024
d22a5b5
debug
joneugster Sep 23, 2024
2514dee
fix
joneugster Sep 23, 2024
039e3b9
what's the checkout dance
joneugster Sep 23, 2024
58ec715
debug
joneugster Sep 23, 2024
2b37c61
fix
joneugster Sep 23, 2024
387eaf2
simplify logic
joneugster Sep 23, 2024
c7331a0
cleanup workflow
joneugster Sep 23, 2024
468e4d9
delete old approach
joneugster Sep 23, 2024
91a3909
doc
joneugster Sep 23, 2024
52027f1
use 'gh pr'
joneugster Sep 23, 2024
7daf92f
fix label mapping
joneugster Sep 23, 2024
6e1520e
code review by adomani
joneugster Sep 24, 2024
11323cd
code review by grunweg
joneugster Sep 24, 2024
5ebde10
unicode fun
joneugster Sep 24, 2024
8a87f44
error on multiple arguments
joneugster Sep 24, 2024
2a9a253
adding tests; incorporating feedback
joneugster Sep 24, 2024
703923a
implement autodetection of folder name
joneugster Sep 24, 2024
bf008de
typo
joneugster Sep 24, 2024
e7d4ad4
Apply suggestions from code review
joneugster Sep 24, 2024
d282ed6
test workflow failure
joneugster Sep 24, 2024
b0d8c74
undo test
joneugster Sep 24, 2024
f858411
add test ensuring all content folders are covered
joneugster Sep 24, 2024
2334e69
move test to executable call
joneugster Sep 24, 2024
46c4c05
test
joneugster Sep 24, 2024
87b7b0a
undo tests
joneugster Sep 24, 2024
31030f0
remove action triggering on push
joneugster Sep 24, 2024
dc82420
cleanup
joneugster Sep 24, 2024
f82c300
Update scripts/autolabel.lean
joneugster Sep 24, 2024
d38a6a2
turn error/warnings into github annotations
joneugster Sep 24, 2024
b039a37
fix
joneugster Sep 24, 2024
382dd5d
remove workflow_dispatch
joneugster Sep 24, 2024
47588c7
update documentation about requiring gh
joneugster Sep 25, 2024
66f4bee
update "CI" label definiton
joneugster Sep 25, 2024
d2da3fe
Apply suggestions from code review
joneugster Sep 25, 2024
0a8735f
Apply suggestions from code review
joneugster Sep 26, 2024
dd53deb
update line number of github annotations
joneugster Sep 25, 2024
516f65c
update github annotations
joneugster Sep 26, 2024
ba612b1
fixes
joneugster Sep 26, 2024
dba7335
typos
joneugster Sep 26, 2024
7756396
more verbose error message
joneugster Sep 26, 2024
8fdf59c
trigger workflow when modifying itself
joneugster Sep 26, 2024
78199af
initial test
joneugster Sep 26, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 42 additions & 0 deletions .github/workflows/add_label_from_diff.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
name: Autolabel PRs

on:
pull_request:
types: [opened]
push:
paths:
- scripts/autolabel.lean
- .github/workflows/add_label_from_diff.yaml

jobs:
add_topic_label:
name: Add topic label
runs-on: ubuntu-latest
# Don't run on forks, where we wouldn't have permissions to add the label anyway.
if: github.repository == 'leanprover-community/mathlib4'
permissions:
issues: write
checks: write
pull-requests: write
contents: read
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: install elan
run: |
set -o pipefail
curl -sSfL https://github.com/leanprover/elan/releases/download/v3.1.1/elan-x86_64-unknown-linux-gnu.tar.gz | tar xz
./elan-init -y --default-toolchain none
echo "$HOME/.elan/bin" >> "${GITHUB_PATH}"
- name: lake exe autolabel
run: |
# the checkout dance, to avoid a detached head
git checkout master
git checkout -
lake exe autolabel "$NUMBER"
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
GH_REPO: ${{ github.repository }}
NUMBER: ${{ github.event.number }}
1 change: 1 addition & 0 deletions Mathlib.lean
Original file line number Diff line number Diff line change
Expand Up @@ -3475,6 +3475,7 @@ import Mathlib.NumberTheory.Zsqrtd.Basic
import Mathlib.NumberTheory.Zsqrtd.GaussianInt
import Mathlib.NumberTheory.Zsqrtd.QuadraticReciprocity
import Mathlib.NumberTheory.Zsqrtd.ToReal
import Mathlib.Numerology.Everything
import Mathlib.Order.Antichain
import Mathlib.Order.Antisymmetrization
import Mathlib.Order.Atoms
Expand Down
Empty file.
10 changes: 10 additions & 0 deletions lakefile.lean
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,16 @@ lean_lib docs where
## Executables provided by Mathlib
-/

/--
`lake exe autolabel 150100` adds a topic label to PR `150100` if there is a unique choice.
This requires GitHub CLI `gh` to be installed!

Calling `lake exe autolabel` without a PR number will print the result without applying
any labels online.
-/
lean_exe autolabel where
srcDir := "scripts"

/-- `lake exe cache get` retrieves precompiled `.olean` files from a central server. -/
lean_exe cache where
root := `Cache.Main
Expand Down
310 changes: 310 additions & 0 deletions scripts/autolabel.lean
Original file line number Diff line number Diff line change
@@ -0,0 +1,310 @@
/-

Check warning on line 1 in scripts/autolabel.lean

View workflow job for this annotation

GitHub Actions / Add topic label

Incomplete `AutoLabel.mathlibLabels`

scripts/autolabel.lean: the following paths inside `Mathlib/` are not covered by any label: #[Mathlib/Numerology] Please modify `AutoLabel.mathlibLabels` accordingly!
Copyright (c) 2024 Damiano Testa. All rights reserved.
Released under Apache 2.0 license as described in the file LICENSE.
Authors: Jon Eugster, Damiano Testa
-/
import Lean.Elab.Command

/-!
# Automatic labelling of PRs

This file contains the script to automatically assign a GitHub label to a PR.

## Label definition

The mapping from GitHub labels to Mathlib folders is done in this file and
needs to be updated here if necessary:

* `AutoLabel.mathlibLabels` contains an assignment of GitHub labels to folders inside
the mathlib repository. If no folder is specified, a label like `t-set-theory` will be
interpreted as matching the folder `"Mathlib" / "SetTheory"`.
* `AutoLabel.mathlibUnlabelled` contains subfolders of `Mathlib/` which are deliberately
left without topic label.

## lake exe autolabel

`lake exe autolabel` uses `git diff --name-only origin/master...HEAD` to determine which
files have been modifed and then finds all labels which should be added based on these changes.
These are printed for testing purposes.

`lake exe autolabel [NUMBER]` will further try to add the applicable labels
to the PR specified. This requires the **GitHub CLI** `gh` to be installed!
Example: `lake exe autolabel 10402` for PR #10402.

For the time being, the script only adds a label if it finds a **single unique label**
which would apply. If multiple labels are found, nothing happens.

## Workflow

There is a mathlib workflow `.github/workflows/add_label_from_diff.yaml` which executes
this script automatically.

Currently it is set to run only one time when a PR is created.

## Tests

Additionally, the script does a few consistency checks:

- it ensures all paths in specified in `AutoLabel.mathlibLabels` exist
- It makes sure all subfolders of `Mathlib/` belong to at least one label.
There is `AutoLabel.mathlibUnlabelled` to add exceptions for this test.

-/

open Lean System

namespace AutoLabel

/--
A `Label` consists of the
* The `label` field is the actual GitHub label name.
* The `dirs` field is the array of all "root paths" such that a modification in a file contained
in one of these paths should be labelled with `label`.
* The `exclusions` field is the array of all "root paths" that are excluded, among the
ones that start with the ones in `dirs`.
Any modifications to a file in an excluded path is ignored for the purposes of labelling.
-/
structure Label where
/-- The label name as it appears on GitHub -/
label : String
/-- Array of paths which fall under this label. e.g. `"Mathlib" / "Algebra"`.

For a label of the form `t-set-theory` this defaults to `#["Mathlib" / "SetTheory"]`. -/
dirs : Array FilePath := if label.startsWith "t-" then
#["Mathlib" / ("".intercalate (label.splitOn "-" |>.drop 1 |>.map .capitalize))]
else #[]
/-- Array of paths which should be excluded.
Any modifications to a file in an excluded path are ignored for the purposes of labelling. -/
exclusions : Array FilePath := #[]
deriving BEq, Hashable

/--
Mathlib labels and their corresponding folders. Add new labels and folders here!
-/
def mathlibLabels : Array Label := #[
{ label := "t-algebra",
dirs := #[
"Mathlib" / "Algebra",
"Mathlib" / "FieldTheory",
"Mathlib" / "RingTheory",
"Mathlib" / "GroupTheory",
"Mathlib" / "RepresentationTheory",
"Mathlib" / "LinearAlgebra"] },
{ label := "t-algebraic-geometry",
dirs := #[
"Mathlib" / "AlgebraicGeometry",
"Mathlib" / "Geometry" / "RingedSpace"] },
{ label := "t-analysis" },
{ label := "t-category-theory" },
{ label := "t-combinatorics" },
{ label := "t-computability" },
{ label := "t-condensed" },
{ label := "t-data" },
{ label := "t-differential-geometry",
dirs := #["Mathlib" / "Geometry" / "Manifold"] },
{ label := "t-dynamics" },
{ label := "t-euclidean-geometry",
dirs := #["Mathlib" / "Geometry" / "Euclidean"] },
{ label := "t-linter",
dirs := #["Mathlib" / "Tactic" / "Linter"] },
{ label := "t-logic",
dirs := #[
"Mathlib" / "Logic",
"Mathlib" / "ModelTheory"] },
{ label := "t-measure-probability",
dirs := #[
"Mathlib" / "MeasureTheory",
"Mathlib" / "Probability",
"Mathlib" / "InformationTheory"] },
{ label := "t-meta",
dirs := #[
"Mathlib" / "Control",
"Mathlib" / "Lean",
"Mathlib" / "Mathport",
"Mathlib" / "Tactic",
"Mathlib" / "Util"],
exclusions := #["Mathlib" / "Tactic" / "Linter"] },
{ label := "t-number-theory" },
{ label := "t-order" },
{ label := "t-set-theory" },
{ label := "t-topology",
dirs := #[
"Mathlib" / "Topology",
"Mathlib" / "AlgebraicTopology"] },
{ label := "CI",
dirs := #[".github"] },
{ label := "IMO",
dirs := #["Archive" / "Imo"] } ]

/-- Exceptions inside `Mathlib/` which are not covered by any label. -/
def mathlibUnlabelled : Array FilePath := #[
"Mathlib" / "Deprecated",
"Mathlib" / "Init",
"Mathlib" / "Testing",
"Mathlib" / "Std" ]

/-- Checks if the folder `path` lies inside the folder `dir`. -/
def _root_.System.FilePath.isPrefixOf (dir path : FilePath) : Bool :=
-- use `dir / ""` to prevent partial matching of folder names
(dir / "").normalize.toString.isPrefixOf (path / "").normalize.toString

/--
Return all names of labels in `mathlibLabels` which match
at least one of the `files`.

* `files`: array of relative paths starting from the mathlib root directory.
-/
def getMatchingLabels (files : Array FilePath) : Array String :=
let applicable := mathlibLabels.filter fun label ↦
-- first exclude all files the label excludes,
-- then see if any file remains included by the label
let notExcludedFiles := files.filter fun file ↦
label.exclusions.all (!·.isPrefixOf file)
label.dirs.any (fun dir ↦ notExcludedFiles.any (dir.isPrefixOf ·))
-- return sorted list of label names
applicable.map (·.label) |>.qsort (· < ·)

/-!
Testing the functionality of the declarations defined in this script
-/
section Tests

-- Test `FilePath.isPrefixOf`
#guard ("Mathlib" / "Algebra" : FilePath).isPrefixOf ("Mathlib" / "Algebra" / "Basic.lean")

-- Test `FilePath.isPrefixOf` does not trigger on partial prefixes
#guard ! ("Mathlib" / "Algebra" : FilePath).isPrefixOf ("Mathlib" / "AlgebraicGeometry")

#guard getMatchingLabels #[] == #[]
-- Test default value for `label.dirs` works
#guard getMatchingLabels #["Mathlib" / "SetTheory" / "ZFC"] == #["t-set-theory"]
-- Test exclusion
#guard getMatchingLabels #["Mathlib" / "Tactic"/ "Abel.lean"] == #["t-meta"]
#guard getMatchingLabels #["Mathlib" / "Tactic"/ "Linter" / "Lint.lean"] == #["t-linter"]
#guard getMatchingLabels #[
"Mathlib" / "Tactic"/ "Linter" / "Lint.lean",
"Mathlib" / "Tactic" / "Abel.lean" ] == #["t-linter", "t-meta"]

/-- Testing function to ensure the labels defined in `mathlibLabels` cover all
subfolders of `Mathlib/`. -/
partial def findUncoveredPaths (path : FilePath) (exceptions : Array FilePath := #[]) :
IO <| Array FilePath := do
let mut notMatched : Array FilePath := #[]
-- all directories inside `path`
let subDirs ← (← path.readDir).map (·.path) |>.filterM (do FilePath.isDir ·)
for dir in subDirs do
-- if the sub directory is not matched by a label,
-- we go recursively into it
if (getMatchingLabels #[dir]).size == 0 then
notMatched := notMatched ++ (← findUncoveredPaths dir exceptions)
-- a directory should be flagged if none of its sub-directories is matched by a label
-- note: we assume here the base directory, i.e. "Mathlib" is never matched by a label,
-- therefore we skip this test.
if notMatched.size == subDirs.size then
if exceptions.contains path then
return #[]
else
return #[path]
else
return notMatched

end Tests

/--
Create a message which GitHub CI parses as annotation and displays at the specified file.

Note: `file` is duplicated below so that it is also visible in the plain text output.

* `type`: "error" or "warning"
* `file`: file where the annotation should be displayed
* `title`: title of the annotation
* `message`: annotation message
-/
def githubAnnotation (type file title message : String) : String :=
s!"::{type} file={file},title={title}::{file}: {message}"

end AutoLabel

open IO AutoLabel in

/-- `args` is expected to have length 0 or 1, where the first argument is the PR number.

If a PR number is provided, the script requires GitHub CLI `gh` to be installed in order
to add the label to the PR.

## Exit codes:

- `0`: success
- `1`: invalid arguments provided
- `2`: invalid labels defined
- `3`: ~labels do not cover all of `Mathlib/`~ (unused; only emitting warning)
-/
unsafe def main (args : List String): IO Unit := do
if args.length > 1 then
println s!"::error:: autolabel: invalid number of arguments ({args.length}), \
expected at most 1. Please run without arguments or provide the target PR's \
number as a single argument!"
IO.Process.exit 1
let prNumber? := args[0]?

-- test: validate that all paths in `mathlibLabels` actually exist
let mut valid := true
for label in mathlibLabels do
for dir in label.dirs do
unless ← FilePath.pathExists dir do
-- print github annotation error
println <| AutoLabel.githubAnnotation "error" "scripts/autolabel.lean"
s!"Misformatted `{ ``AutoLabel.mathlibLabels }`"
s!"directory '{dir}' does not exist but is included by label '{label.label}'. \
Please update `{ ``AutoLabel.mathlibLabels }`!"
valid := false
for dir in label.exclusions do
unless ← FilePath.pathExists dir do
-- print github annotation error
println <| AutoLabel.githubAnnotation "error" "scripts/autolabel.lean"
s!"Misformatted `{ ``AutoLabel.mathlibLabels }`"
s!"directory '{dir}' does not exist but is excluded by label '{label.label}'. \
Please update `{ ``AutoLabel.mathlibLabels }`!"
valid := false
unless valid do
IO.Process.exit 2

-- test: validate that the labels cover all of the `Mathlib/` folder
let notMatchedPaths ← findUncoveredPaths "Mathlib" (exceptions := mathlibUnlabelled)
if notMatchedPaths.size > 0 then
-- print github annotation warning
-- note: only emitting a warning because the workflow is only triggered on the first commit
-- of a PR and could therefore lead to unexpected behaviour if a folder was created later.
println <| AutoLabel.githubAnnotation "warning" "scripts/autolabel.lean"
s!"Incomplete `{ ``AutoLabel.mathlibLabels }`"
s!"the following paths inside `Mathlib/` are not covered \
by any label: {notMatchedPaths} Please modify `AutoLabel.mathlibLabels` accordingly!"
-- IO.Process.exit 3

-- get the modified files
let gitDiff ← IO.Process.run {
cmd := "git",
args := #["diff", "--name-only", "origin/master...HEAD"] }
let modifiedFiles : Array FilePath := (gitDiff.splitOn "\n").toArray.map (⟨·⟩)

-- find labels covering the modified files
let labels := getMatchingLabels modifiedFiles

match labels with
| #[] =>
println s!"No applicable labels found!"
| #[label] =>
println s!"Exactly one label found: {label}"
match prNumber? with
| some n =>
let _ ← IO.Process.run {
cmd := "gh",
args := #["pr", "edit", n, "--add-label", label] }
println s!"Added label: {label}"
| none =>
println s!"No PR-number provided, skipping adding labels. \
(call `lake exe autolabel 150602` to add the labels to PR `150602`)"
| labels =>
println s!"Multiple labels found: {labels}"
println s!"Not adding any label."
IO.Process.exit 0
Loading