diff --git a/.github/workflows/add_label_from_diff.yaml b/.github/workflows/add_label_from_diff.yaml new file mode 100644 index 0000000000000..271ac1b95ce59 --- /dev/null +++ b/.github/workflows/add_label_from_diff.yaml @@ -0,0 +1,42 @@ +name: Autolabel PRs + +on: + pull_request: + types: [opened] + push: + paths: + - scripts/autolabel.lean + - .github/workflows/add_label_from_diff.yaml + +jobs: + add_topic_label: + name: Add topic label + runs-on: ubuntu-latest + # Don't run on forks, where we wouldn't have permissions to add the label anyway. + if: github.repository == 'leanprover-community/mathlib4' + permissions: + issues: write + checks: write + pull-requests: write + contents: read + steps: + - name: Checkout code + uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: install elan + run: | + set -o pipefail + curl -sSfL https://github.com/leanprover/elan/releases/download/v3.1.1/elan-x86_64-unknown-linux-gnu.tar.gz | tar xz + ./elan-init -y --default-toolchain none + echo "$HOME/.elan/bin" >> "${GITHUB_PATH}" + - name: lake exe autolabel + run: | + # the checkout dance, to avoid a detached head + git checkout master + git checkout - + lake exe autolabel "$NUMBER" + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GH_REPO: ${{ github.repository }} + NUMBER: ${{ github.event.number }} diff --git a/Mathlib.lean b/Mathlib.lean index 3442f8704e094..ddf844378e225 100644 --- a/Mathlib.lean +++ b/Mathlib.lean @@ -3475,6 +3475,7 @@ import Mathlib.NumberTheory.Zsqrtd.Basic import Mathlib.NumberTheory.Zsqrtd.GaussianInt import Mathlib.NumberTheory.Zsqrtd.QuadraticReciprocity import Mathlib.NumberTheory.Zsqrtd.ToReal +import Mathlib.Numerology.Everything import Mathlib.Order.Antichain import Mathlib.Order.Antisymmetrization import Mathlib.Order.Atoms diff --git a/Mathlib/Numerology/Everything.lean b/Mathlib/Numerology/Everything.lean new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/lakefile.lean b/lakefile.lean index 0b406c4741783..1af1f1e8b764a 100644 --- a/lakefile.lean +++ b/lakefile.lean @@ -80,6 +80,16 @@ lean_lib docs where ## Executables provided by Mathlib -/ +/-- +`lake exe autolabel 150100` adds a topic label to PR `150100` if there is a unique choice. +This requires GitHub CLI `gh` to be installed! + +Calling `lake exe autolabel` without a PR number will print the result without applying +any labels online. +-/ +lean_exe autolabel where + srcDir := "scripts" + /-- `lake exe cache get` retrieves precompiled `.olean` files from a central server. -/ lean_exe cache where root := `Cache.Main diff --git a/scripts/autolabel.lean b/scripts/autolabel.lean new file mode 100644 index 0000000000000..ccf7d7321c35e --- /dev/null +++ b/scripts/autolabel.lean @@ -0,0 +1,310 @@ +/- +Copyright (c) 2024 Damiano Testa. All rights reserved. +Released under Apache 2.0 license as described in the file LICENSE. +Authors: Jon Eugster, Damiano Testa +-/ +import Lean.Elab.Command + +/-! +# Automatic labelling of PRs + +This file contains the script to automatically assign a GitHub label to a PR. + +## Label definition + +The mapping from GitHub labels to Mathlib folders is done in this file and +needs to be updated here if necessary: + +* `AutoLabel.mathlibLabels` contains an assignment of GitHub labels to folders inside + the mathlib repository. If no folder is specified, a label like `t-set-theory` will be + interpreted as matching the folder `"Mathlib" / "SetTheory"`. +* `AutoLabel.mathlibUnlabelled` contains subfolders of `Mathlib/` which are deliberately + left without topic label. + +## lake exe autolabel + +`lake exe autolabel` uses `git diff --name-only origin/master...HEAD` to determine which +files have been modifed and then finds all labels which should be added based on these changes. +These are printed for testing purposes. + +`lake exe autolabel [NUMBER]` will further try to add the applicable labels +to the PR specified. This requires the **GitHub CLI** `gh` to be installed! +Example: `lake exe autolabel 10402` for PR #10402. + +For the time being, the script only adds a label if it finds a **single unique label** +which would apply. If multiple labels are found, nothing happens. + +## Workflow + +There is a mathlib workflow `.github/workflows/add_label_from_diff.yaml` which executes +this script automatically. + +Currently it is set to run only one time when a PR is created. + +## Tests + +Additionally, the script does a few consistency checks: + +- it ensures all paths in specified in `AutoLabel.mathlibLabels` exist +- It makes sure all subfolders of `Mathlib/` belong to at least one label. + There is `AutoLabel.mathlibUnlabelled` to add exceptions for this test. + +-/ + +open Lean System + +namespace AutoLabel + +/-- +A `Label` consists of the +* The `label` field is the actual GitHub label name. +* The `dirs` field is the array of all "root paths" such that a modification in a file contained + in one of these paths should be labelled with `label`. +* The `exclusions` field is the array of all "root paths" that are excluded, among the + ones that start with the ones in `dirs`. + Any modifications to a file in an excluded path is ignored for the purposes of labelling. +-/ +structure Label where + /-- The label name as it appears on GitHub -/ + label : String + /-- Array of paths which fall under this label. e.g. `"Mathlib" / "Algebra"`. + + For a label of the form `t-set-theory` this defaults to `#["Mathlib" / "SetTheory"]`. -/ + dirs : Array FilePath := if label.startsWith "t-" then + #["Mathlib" / ("".intercalate (label.splitOn "-" |>.drop 1 |>.map .capitalize))] + else #[] + /-- Array of paths which should be excluded. + Any modifications to a file in an excluded path are ignored for the purposes of labelling. -/ + exclusions : Array FilePath := #[] + deriving BEq, Hashable + +/-- +Mathlib labels and their corresponding folders. Add new labels and folders here! +-/ +def mathlibLabels : Array Label := #[ + { label := "t-algebra", + dirs := #[ + "Mathlib" / "Algebra", + "Mathlib" / "FieldTheory", + "Mathlib" / "RingTheory", + "Mathlib" / "GroupTheory", + "Mathlib" / "RepresentationTheory", + "Mathlib" / "LinearAlgebra"] }, + { label := "t-algebraic-geometry", + dirs := #[ + "Mathlib" / "AlgebraicGeometry", + "Mathlib" / "Geometry" / "RingedSpace"] }, + { label := "t-analysis" }, + { label := "t-category-theory" }, + { label := "t-combinatorics" }, + { label := "t-computability" }, + { label := "t-condensed" }, + { label := "t-data" }, + { label := "t-differential-geometry", + dirs := #["Mathlib" / "Geometry" / "Manifold"] }, + { label := "t-dynamics" }, + { label := "t-euclidean-geometry", + dirs := #["Mathlib" / "Geometry" / "Euclidean"] }, + { label := "t-linter", + dirs := #["Mathlib" / "Tactic" / "Linter"] }, + { label := "t-logic", + dirs := #[ + "Mathlib" / "Logic", + "Mathlib" / "ModelTheory"] }, + { label := "t-measure-probability", + dirs := #[ + "Mathlib" / "MeasureTheory", + "Mathlib" / "Probability", + "Mathlib" / "InformationTheory"] }, + { label := "t-meta", + dirs := #[ + "Mathlib" / "Control", + "Mathlib" / "Lean", + "Mathlib" / "Mathport", + "Mathlib" / "Tactic", + "Mathlib" / "Util"], + exclusions := #["Mathlib" / "Tactic" / "Linter"] }, + { label := "t-number-theory" }, + { label := "t-order" }, + { label := "t-set-theory" }, + { label := "t-topology", + dirs := #[ + "Mathlib" / "Topology", + "Mathlib" / "AlgebraicTopology"] }, + { label := "CI", + dirs := #[".github"] }, + { label := "IMO", + dirs := #["Archive" / "Imo"] } ] + +/-- Exceptions inside `Mathlib/` which are not covered by any label. -/ +def mathlibUnlabelled : Array FilePath := #[ + "Mathlib" / "Deprecated", + "Mathlib" / "Init", + "Mathlib" / "Testing", + "Mathlib" / "Std" ] + +/-- Checks if the folder `path` lies inside the folder `dir`. -/ +def _root_.System.FilePath.isPrefixOf (dir path : FilePath) : Bool := + -- use `dir / ""` to prevent partial matching of folder names + (dir / "").normalize.toString.isPrefixOf (path / "").normalize.toString + +/-- +Return all names of labels in `mathlibLabels` which match +at least one of the `files`. + +* `files`: array of relative paths starting from the mathlib root directory. +-/ +def getMatchingLabels (files : Array FilePath) : Array String := + let applicable := mathlibLabels.filter fun label ↦ + -- first exclude all files the label excludes, + -- then see if any file remains included by the label + let notExcludedFiles := files.filter fun file ↦ + label.exclusions.all (!·.isPrefixOf file) + label.dirs.any (fun dir ↦ notExcludedFiles.any (dir.isPrefixOf ·)) + -- return sorted list of label names + applicable.map (·.label) |>.qsort (· < ·) + +/-! +Testing the functionality of the declarations defined in this script +-/ +section Tests + +-- Test `FilePath.isPrefixOf` +#guard ("Mathlib" / "Algebra" : FilePath).isPrefixOf ("Mathlib" / "Algebra" / "Basic.lean") + +-- Test `FilePath.isPrefixOf` does not trigger on partial prefixes +#guard ! ("Mathlib" / "Algebra" : FilePath).isPrefixOf ("Mathlib" / "AlgebraicGeometry") + +#guard getMatchingLabels #[] == #[] +-- Test default value for `label.dirs` works +#guard getMatchingLabels #["Mathlib" / "SetTheory" / "ZFC"] == #["t-set-theory"] +-- Test exclusion +#guard getMatchingLabels #["Mathlib" / "Tactic"/ "Abel.lean"] == #["t-meta"] +#guard getMatchingLabels #["Mathlib" / "Tactic"/ "Linter" / "Lint.lean"] == #["t-linter"] +#guard getMatchingLabels #[ + "Mathlib" / "Tactic"/ "Linter" / "Lint.lean", + "Mathlib" / "Tactic" / "Abel.lean" ] == #["t-linter", "t-meta"] + +/-- Testing function to ensure the labels defined in `mathlibLabels` cover all +subfolders of `Mathlib/`. -/ +partial def findUncoveredPaths (path : FilePath) (exceptions : Array FilePath := #[]) : + IO <| Array FilePath := do + let mut notMatched : Array FilePath := #[] + -- all directories inside `path` + let subDirs ← (← path.readDir).map (·.path) |>.filterM (do FilePath.isDir ·) + for dir in subDirs do + -- if the sub directory is not matched by a label, + -- we go recursively into it + if (getMatchingLabels #[dir]).size == 0 then + notMatched := notMatched ++ (← findUncoveredPaths dir exceptions) + -- a directory should be flagged if none of its sub-directories is matched by a label + -- note: we assume here the base directory, i.e. "Mathlib" is never matched by a label, + -- therefore we skip this test. + if notMatched.size == subDirs.size then + if exceptions.contains path then + return #[] + else + return #[path] + else + return notMatched + +end Tests + +/-- +Create a message which GitHub CI parses as annotation and displays at the specified file. + +Note: `file` is duplicated below so that it is also visible in the plain text output. + +* `type`: "error" or "warning" +* `file`: file where the annotation should be displayed +* `title`: title of the annotation +* `message`: annotation message +-/ +def githubAnnotation (type file title message : String) : String := + s!"::{type} file={file},title={title}::{file}: {message}" + +end AutoLabel + +open IO AutoLabel in + +/-- `args` is expected to have length 0 or 1, where the first argument is the PR number. + +If a PR number is provided, the script requires GitHub CLI `gh` to be installed in order +to add the label to the PR. + +## Exit codes: + +- `0`: success +- `1`: invalid arguments provided +- `2`: invalid labels defined +- `3`: ~labels do not cover all of `Mathlib/`~ (unused; only emitting warning) +-/ +unsafe def main (args : List String): IO Unit := do + if args.length > 1 then + println s!"::error:: autolabel: invalid number of arguments ({args.length}), \ + expected at most 1. Please run without arguments or provide the target PR's \ + number as a single argument!" + IO.Process.exit 1 + let prNumber? := args[0]? + + -- test: validate that all paths in `mathlibLabels` actually exist + let mut valid := true + for label in mathlibLabels do + for dir in label.dirs do + unless ← FilePath.pathExists dir do + -- print github annotation error + println <| AutoLabel.githubAnnotation "error" "scripts/autolabel.lean" + s!"Misformatted `{ ``AutoLabel.mathlibLabels }`" + s!"directory '{dir}' does not exist but is included by label '{label.label}'. \ + Please update `{ ``AutoLabel.mathlibLabels }`!" + valid := false + for dir in label.exclusions do + unless ← FilePath.pathExists dir do + -- print github annotation error + println <| AutoLabel.githubAnnotation "error" "scripts/autolabel.lean" + s!"Misformatted `{ ``AutoLabel.mathlibLabels }`" + s!"directory '{dir}' does not exist but is excluded by label '{label.label}'. \ + Please update `{ ``AutoLabel.mathlibLabels }`!" + valid := false + unless valid do + IO.Process.exit 2 + + -- test: validate that the labels cover all of the `Mathlib/` folder + let notMatchedPaths ← findUncoveredPaths "Mathlib" (exceptions := mathlibUnlabelled) + if notMatchedPaths.size > 0 then + -- print github annotation warning + -- note: only emitting a warning because the workflow is only triggered on the first commit + -- of a PR and could therefore lead to unexpected behaviour if a folder was created later. + println <| AutoLabel.githubAnnotation "warning" "scripts/autolabel.lean" + s!"Incomplete `{ ``AutoLabel.mathlibLabels }`" + s!"the following paths inside `Mathlib/` are not covered \ + by any label: {notMatchedPaths} Please modify `AutoLabel.mathlibLabels` accordingly!" + -- IO.Process.exit 3 + + -- get the modified files + let gitDiff ← IO.Process.run { + cmd := "git", + args := #["diff", "--name-only", "origin/master...HEAD"] } + let modifiedFiles : Array FilePath := (gitDiff.splitOn "\n").toArray.map (⟨·⟩) + + -- find labels covering the modified files + let labels := getMatchingLabels modifiedFiles + + match labels with + | #[] => + println s!"No applicable labels found!" + | #[label] => + println s!"Exactly one label found: {label}" + match prNumber? with + | some n => + let _ ← IO.Process.run { + cmd := "gh", + args := #["pr", "edit", n, "--add-label", label] } + println s!"Added label: {label}" + | none => + println s!"No PR-number provided, skipping adding labels. \ + (call `lake exe autolabel 150602` to add the labels to PR `150602`)" + | labels => + println s!"Multiple labels found: {labels}" + println s!"Not adding any label." + IO.Process.exit 0