Merge pull request #4303 from ASuciuX/test/mutants-filter-pr-next

Mutants run on PR differences
stacks-network · Jan 30, 2024 · 409a7b3 · 409a7b3
2 parents 43d8695 + 654a222
commit 409a7b3
Show file tree

Hide file tree

Showing 2 changed files with 234 additions and 0 deletions.
diff --git a/.github/workflows/pr-differences-mutants.yml b/.github/workflows/pr-differences-mutants.yml
@@ -0,0 +1,139 @@
+name: PR Differences Mutants
+
+on:
+  pull_request:
+    types:
+      - opened
+      - reopened
+      - synchronize
+      - ready_for_review
+    paths:
+      - "**.rs"
+
+concurrency:
+  group: pr-differences-${{ github.head_ref || github.ref || github.run_id }}
+  # Always cancel duplicate jobs
+  cancel-in-progress: true
+
+jobs:
+  # Check and output whether to run big (`stacks-node`/`stackslib`) or small (others) packages with or without shards
+  check-big-packages-and-shards:
+    name: Check Packages and Shards
+
+    runs-on: ubuntu-latest
+
+    outputs:
+      run_big_packages: ${{ steps.check_packages_and_shards.outputs.run_big_packages }}
+      big_packages_with_shards: ${{ steps.check_packages_and_shards.outputs.big_packages_with_shards }}
+      run_small_packages: ${{ steps.check_packages_and_shards.outputs.run_small_packages }}
+      small_packages_with_shards: ${{ steps.check_packages_and_shards.outputs.small_packages_with_shards }}
+
+    steps:
+      - id: check_packages_and_shards
+        uses: stacks-network/actions/stacks-core/mutation-testing/check-packages-and-shards@main
+
+  # Mutation testing - Execute on PR on small packages that have functions modified (normal run, no shards)
+  pr-differences-mutants-small-normal:
+    name: Mutation Testing - Normal, Small
+
+    needs: check-big-packages-and-shards
+
+    if: ${{ needs.check-big-packages-and-shards.outputs.run_small_packages == 'true' && needs.check-big-packages-and-shards.outputs.small_packages_with_shards == 'false' }}
+
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Run mutants on diffs
+        uses: stacks-network/actions/stacks-core/mutation-testing/pr-differences@main
+        with:
+          package-dimension: "small"
+
+  # Mutation testing - Execute on PR on small packages that have functions modified (run with strategy matrix shards)
+  pr-differences-mutants-small-shards:
+    name: Mutation Testing - Shards, Small
+
+    needs: check-big-packages-and-shards
+
+    if: ${{ needs.check-big-packages-and-shards.outputs.run_small_packages == 'true' && needs.check-big-packages-and-shards.outputs.small_packages_with_shards == 'true' }}
+
+    runs-on: ubuntu-latest
+
+    strategy:
+      fail-fast: false
+      matrix:
+        shard: [0, 1, 2, 3]
+
+    steps:
+      - name: Run mutants on diffs
+        uses: stacks-network/actions/stacks-core/mutation-testing/pr-differences@main
+        with:
+          shard: ${{ matrix.shard }}
+          package-dimension: "small"
+
+  # Mutation testing - Execute on PR on big packages that have functions modified (normal run, no shards)
+  pr-differences-mutants-big-normal:
+    name: Mutation Testing - Normal, Big
+
+    needs: check-big-packages-and-shards
+
+    if: ${{ needs.check-big-packages-and-shards.outputs.run_big_packages == 'true' && needs.check-big-packages-and-shards.outputs.big_packages_with_shards == 'false' }}
+
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Run Run mutants on diffs
+        env:
+          BITCOIND_TEST: 1
+          RUST_BACKTRACE: full
+        uses: stacks-network/actions/stacks-core/mutation-testing/pr-differences@main
+        with:
+          package-dimension: "big"
+
+  # Mutation testing - Execute on PR on big packages that have functions modified (run with strategy matrix shards)
+  pr-differences-mutants-big-shards:
+    name: Mutation Testing - Shards, Big
+
+    needs: check-big-packages-and-shards
+
+    if: ${{ needs.check-big-packages-and-shards.outputs.run_big_packages == 'true' && needs.check-big-packages-and-shards.outputs.big_packages_with_shards == 'true' }}
+
+    runs-on: ubuntu-latest
+
+    strategy:
+      fail-fast: false
+      matrix:
+        shard: [0, 1, 2, 3, 4, 5, 6, 7]
+
+    steps:
+      - name: Run mutants on diffs
+        env:
+          BITCOIND_TEST: 1
+          RUST_BACKTRACE: full
+        uses: stacks-network/actions/stacks-core/mutation-testing/pr-differences@main
+        with:
+          shard: ${{ matrix.shard }}
+          package-dimension: "big"
+
+  # Output the mutants and fail the workflow if there are missed/timeout/unviable mutants
+  output-mutants:
+    name: Output Mutants
+
+    runs-on: ubuntu-latest
+
+    needs:
+      [
+        check-big-packages-and-shards,
+        pr-differences-mutants-small-normal,
+        pr-differences-mutants-small-shards,
+        pr-differences-mutants-big-normal,
+        pr-differences-mutants-big-shards,
+      ]
+
+    steps:
+      - name: Output Mutants
+        uses: stacks-network/actions/stacks-core/mutation-testing/output-pr-mutants@main
+        with:
+          big_packages: ${{ needs.check-big-packages-and-shards.outputs.run_big_packages }}
+          shards_for_big_packages: ${{ needs.check-big-packages-and-shards.outputs.big_packages_with_shards }}
+          small_packages: ${{ needs.check-big-packages-and-shards.outputs.run_small_packages }}
+          shards_for_small_packages: ${{ needs.check-big-packages-and-shards.outputs.small_packages_with_shards }}
diff --git a/docs/ci-release.md b/docs/ci-release.md
@@ -228,4 +228,99 @@ ex: Branch is named `develop` and the PR is numbered `113`
   - `stacks-core:2.1.0.0.0`
   - `stacks-core:latest`
 
+## Mutation Testing
+
+When a new Pull Request (PR) is submitted, this feature evaluates the quality of the tests added or modified in the PR.
+It checks the new and altered functions through mutation testing. 
+Mutation testing involves making small changes (mutations) to the code to check if the tests can detect these changes.
+
+The mutations are run with or without a [Github Actions matrix](https://docs.github.com/en/actions/using-jobs/using-a-matrix-for-your-jobs). 
+The matrix is used when there is a large number of mutations to run ([check doc specific cases](https://github.com/stacks-network/actions/blob/main/stacks-core/mutation-testing/check-packages-and-shards/README.md#outputs)).
+We utilize a matrix strategy with shards to enable parallel execution in GitHub Actions.
+This approach allows for the concurrent execution of multiple jobs across various runners.
+The total workload is divided across all shards, effectively reducing the overall duration of a workflow because the time taken is approximately the total time divided by the number of shards (+ initial build & test time).
+This is particularly advantageous for large packages that have significant build and test times, as it enhances efficiency and speeds up the process.
+
+Since mutation testing is directly correlated to the written tests, there are slower packages (due to the quantity or time it takes to run the tests) like `stackslib` or `stacks-node`. 
+These mutations are run separately from the others, with one or more parallel jobs, depending on the amount of mutations found.
+
+Once all the jobs have finished testing mutants, the last job collects all the tested mutations from the previous jobs, combines them and outputs them to the `Summary` section of the workflow, at the bottom of the page. 
+There, you can find all mutants on categories, with links to the function they tested, and a short description on how to fix the issue. 
+The PR should only be approved/merged after all the mutants tested are in the `Caught` category.
+
+### Time required to run the workflow based on mutants outcome and packages' size
+
+- Small packages typically completed in under 30 minutes, aided by the use of shards.
+- Large packages like stackslib and stacks-node initially required about 20-25 minutes for build and test processes.
+    - Each "missed" and "caught" mutant took approximately 15 minutes. Using shards, this meant about 50-55 minutes for processing around 32 mutants (10-16 functions modified). Every additional 8 mutants added another 15 minutes to the runtime.
+    - "Unviable" mutants, which are functions lacking a Default implementation for their returned struct type, took less than a minute each.
+    - "Timeout" mutants typically required more time. However, these should be marked to be skipped (by adding a skip flag to their header) since they indicate functions unable to proceed in their test workflow with mutated values, as opposed to the original implementations.
+
+File:
+
+- [PR Differences Mutants](../.github/workflows/pr-differences-mutants.yml)
+
+### Mutant Outcomes
+
+- caught — A test failed with this mutant applied. 
+This is a good sign about test coverage.
+
+- missed — No test failed with this mutation applied, which seems to indicate a gap in test coverage. 
+Or, it may be that the mutant is undistinguishable from the correct code. 
+In any case, you may wish to add a better test.
+
+- unviable — The attempted mutation doesn't compile. 
+This is inconclusive about test coverage, since the function's return structure may not implement `Default::default()` (one of the mutations applied), hence causing the compile to fail. 
+It is recommended to add `Default` implementation for the return structures of these functions, only mark that the function should be skipped as a last resort.
+
+- timeout — The mutation caused the test suite to run for a long time, until it was eventually killed.
+You might want to investigate the cause and only mark the function to be skipped if necessary.
+
+### Skipping Mutations
+
+Some functions may be inherently hard to cover with tests, for example if:
+
+- Generated mutants cause tests to hang.
+- You've chosen to test the functionality by human inspection or some higher-level integration tests.
+- The function has side effects or performance characteristics that are hard to test.
+- You've decided that the function is not important to test.
+
+To mark functions as skipped, so they are not mutated:
+
+- Add a Cargo dependency of the [mutants](https://crates.io/crates/mutants) crate, version `0.0.3` or later (this must be a regular `dependency`, not a `dev-dependency`, because the annotation will be on non-test code) and mark functions with `#[mutants::skip]`, or
+
+- You can avoid adding the dependency by using the slightly longer `#[cfg_attr(test, mutants::skip)]`.
+
+### Example
+
+```rust
+use std::time::{Duration, Instant};
+
+/// Returns true if the program should stop
+#[cfg_attr(test, mutants::skip)] // Returning false would cause a hang
+fn should_stop() -> bool {
+    true
+}
+
+pub fn controlled_loop() {
+    let start = Instant::now();
+    for i in 0.. {
+        println!("{}", i);
+        if should_stop() {
+            break;
+        }
+        if start.elapsed() > Duration::from_secs(60 * 5) {
+            panic!("timed out");
+        }
+    }
+}
+
+mod test {
+    #[test]
+    fn controlled_loop_terminates() {
+        super::controlled_loop()
+    }
+}
+```
+
 ---