regression tests & benchmark

epolack · epolack · commit d50e4afa3fee · 2024-02-26T10:05:43.000+01:00
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
@@ -21,11 +21,11 @@ jobs:
       fail-fast: false
       matrix:
         include:
-          - {mode: stable,  os: ubuntu-latest,  payload: noslow-example }
-          - {mode: stable,  os: macOS-latest,   payload: noslow         }
-          - {mode: stable,  os: windows-latest, payload: noslow         }
-          - {mode: stable,  os: ubuntu-latest,  payload: noslow-mpi     }
-          - {mode: nightly, os: ubuntu-latest,  payload: noslow         }
+          - {mode: stable,  os: ubuntu-latest,  payload: example-noslow-noregression }
+          - {mode: stable,  os: macOS-latest,   payload: noslow-noregression         }
+          - {mode: stable,  os: windows-latest, payload: noslow-noregression         }
+          - {mode: stable,  os: ubuntu-latest,  payload: mpi-noslow-noregression     }
+          - {mode: nightly, os: ubuntu-latest,  payload: noslow-noregression         }
     env:
       GKS_ENCODING: utf8
       GKSwstype: 100       # Needed for Plots-related tests
diff --git a/.github/workflows/regression.yaml b/.github/workflows/regression.yaml
@@ -0,0 +1,98 @@
+name: Regression
+on:
+  push:
+    branches:
+      - master
+    tags: ['*']
+  pull_request:
+  schedule:
+    - cron:  '0 4 * * 6'  # Run every Saturday
+concurrency:
+  # Skip intermediate builds: always.
+  # Cancel intermediate builds: only if it is a pull request build.
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }}
+
+jobs:
+  test:
+    name: Benchmarking ${{ matrix.description }}
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - {description: run,  payload: benchmarks.jl  }
+          - {description: load, payload: load.jl }
+    steps:
+      # Remove older benchmark comment
+      - name: pr-deleter
+        uses: maheshrayas/action-pr-comment-delete@v3.0
+        with:
+          github_token: ${{ secrets.GITHUB_TOKEN }}
+          org: <orgname>
+          repo: <repo>
+          user: github-actions[bot]
+          issue: ${{github.event.number}}
+
+      - uses: actions/checkout@v4
+
+      - name: Setup Julia stable
+        uses: julia-actions/setup-julia@v1
+        with:
+          version: '1.9'
+          arch: x64
+
+      - uses: julia-actions/cache@v1
+        with:
+          include-matrix: false
+      - uses: julia-actions/julia-buildpkg@v1
+
+      - name: Install dependencies
+        run: |
+          julia --project=benchmark -e '
+          using Pkg
+          Pkg.develop(PackageSpec(; path=pwd()))
+          Pkg.instantiate()'
+
+      - name: Run benchmarks against master
+        # Remove baseline once merged. Regression tests will only work after this is merged
+        # in master.
+        run: |
+          julia --project=benchmark -e "
+            using BenchmarkCI
+            baseline = \"HEAD\"
+            script = \"\$(pwd())/benchmark/${{ matrix.payload }}\"
+            BenchmarkCI.judge(; baseline, script, retune=true)"
+        if: ${{ github.event_name == 'pull_request' }}
+
+      - name: Run benchmarks against last release
+        run: |
+          julia --project=benchmark -e "
+            import Pkg
+            baseline = \"v\" * Pkg.TOML.parsefile(\"Project.toml\")[\"version\"]
+            script = \"\$(pwd())/benchmark/${{ matrix.payload }}\"
+            using BenchmarkCI
+            BenchmarkCI.judge(; baseline, script, retune=true)"
+        if: ${{ github.event_name == 'schedule' ||
+                github.event.push.ref == 'refs/heads/master' }}
+
+      - name: Print judgement
+        run: |
+          julia --project=benchmark -e '
+            using BenchmarkCI
+            BenchmarkCI.displayjudgement()'
+
+      - name: Post results
+        run: |
+          julia --project=benchmark -e '
+            using BenchmarkCI
+            BenchmarkCI.postjudge()'
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Is report successful
+        run: |
+          res=$(julia --project=benchmark -e '
+                  using BenchmarkCI
+                  BenchmarkCI.displayjudgement()' | grep --count ':x:')
+          if [[ $res -gt 1 ]]; then exit 1; fi
diff --git a/.gitignore b/.gitignore
@@ -10,3 +10,5 @@ Manifest.toml
 /LocalPreferences.toml
 .vscode
 .CondaPkg
+/.benchmarkci
+/benchmark/**/*.json
diff --git a/Project.toml b/Project.toml
@@ -128,6 +128,7 @@ ASEconvert = "3da9722f-58c2-4165-81be-b4d7253e8fd2"
 Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
 AtomsIO = "1692102d-eeb4-4df9-807b-c9517f998d44"
 AtomsIOPython = "9e4c859b-2281-48ef-8059-f50fe53c37b0"
+BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
 CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
 CUDA_Runtime_jll = "76a88914-d11a-5bdc-97e0-2f5a05c973a2"
 ComponentArrays = "b0b7db55-cfe3-40fc-9ded-d10e2dbeff66"
@@ -150,4 +151,4 @@ WriteVTK = "64499a7a-5c06-52f2-abe2-ccb03c286192"
 wannier90_jll = "c5400fa0-8d08-52c2-913f-1e3f656c1ce9"
 
 [targets]
-test = ["Test", "TestItemRunner", "ASEconvert", "Aqua", "AtomsIO", "AtomsIOPython", "CUDA", "CUDA_Runtime_jll", "ComponentArrays", "DoubleFloats", "FiniteDiff", "FiniteDifferences", "GenericLinearAlgebra", "IntervalArithmetic", "JLD2", "JSON3", "Logging", "Plots", "QuadGK", "Random", "KrylovKit", "Wannier", "WriteVTK", "wannier90_jll"]
+test = ["Test", "TestItemRunner", "ASEconvert", "Aqua", "AtomsIO", "AtomsIOPython", "BenchmarkTools", "CUDA", "CUDA_Runtime_jll", "ComponentArrays", "DoubleFloats", "FiniteDiff", "FiniteDifferences", "GenericLinearAlgebra", "IntervalArithmetic", "JLD2", "JSON3", "Logging", "Plots", "QuadGK", "Random", "KrylovKit", "Wannier", "WriteVTK", "wannier90_jll"]
diff --git a/benchmark/Project.toml b/benchmark/Project.toml
@@ -0,0 +1,10 @@
+[deps]
+AtomsBase = "a963bdd2-2df7-4f54-a1ee-49d51e6be12a"
+BenchmarkCI = "20533458-34a3-403d-a444-e18f38190b5b"
+BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
+DFTK = "acf6eb54-70d9-11e9-0013-234b7a5f5337"
+ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
+PkgBenchmark = "32113eaa-f34f-5b0d-bd6c-c81e245fc73d"
+TestItemRunner = "f8b46487-2199-4994-9208-9a1283c18c0a"
+Unitful = "1986cc42-f94f-5a68-af5c-568840ba703d"
+UnitfulAtomic = "a7773ee8-282e-5fa2-be4e-bd808c38a91a"
diff --git a/benchmark/benchmarks.jl b/benchmark/benchmarks.jl
@@ -0,0 +1,6 @@
+using BenchmarkTools
+using TestItemRunner
+
+const SUITE = BenchmarkGroup()
+
+@run_package_tests filter=ti->(:regression ∈ ti.tags)
diff --git a/benchmark/humongous/Project.toml b/benchmark/humongous/Project.toml
@@ -0,0 +1,10 @@
+[deps]
+AtomsIO = "1692102d-eeb4-4df9-807b-c9517f998d44"
+BenchmarkCI = "20533458-34a3-403d-a444-e18f38190b5b"
+BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
+DFTK = "acf6eb54-70d9-11e9-0013-234b7a5f5337"
+LibGit2 = "76f85450-5226-5b5a-8eaa-529ad045b433"
+LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+MKL = "33e6dc65-8f57-5167-99aa-e5a354878fb2"
+PkgBenchmark = "32113eaa-f34f-5b0d-bd6c-c81e245fc73d"
+TestItemRunner = "f8b46487-2199-4994-9208-9a1283c18c0a"
diff --git a/benchmark/humongous/benchmarks.jl b/benchmark/humongous/benchmarks.jl
@@ -0,0 +1,21 @@
+using BenchmarkTools
+using TestItemRunner
+
+function run_scenario(scenario, complexity)
+    scenario_filter(i) = occursin(string(scenario), i.filename) && complexity ∈ i.tags
+    @run_package_tests filter=scenario_filter
+end
+
+all_scenarios() = [:AlSiO2H, :Cr19, :Fe2MnAl, :Mn2RuGa, :WFe]
+function make_suite(; scenarios=all_scenarios(), complexity=:debug)
+    @assert complexity ∈ [:debug, :small, :full]
+    @assert all(scenarios .∈ Ref(all_scenarios()))
+
+    suite = BenchmarkGroup()
+    for scenario in scenarios
+        suite[scenario] = @benchmarkable run_scenario($scenario, $complexity)
+    end
+    suite
+end
+
+const SUITE = make_suite(; scenarios=[:AlSiO2H])
diff --git a/benchmark/humongous/run.jl b/benchmark/humongous/run.jl
@@ -0,0 +1,43 @@
+ROOTPATH = abspath(joinpath(@__DIR__, "../.."))
+import Pkg
+Pkg.activate(@__DIR__)
+if !isfile(joinpath(@__DIR__, "Manifest.toml"))
+    Pkg.develop(Pkg.PackageSpec(; path=ROOTPATH))
+    Pkg.instantiate()
+end
+
+import BenchmarkCI
+import LibGit2
+
+"""
+Launch with
+```julia
+julia --project=benchmark/humongous -e '
+   include("benchmark/humongous/run.jl")
+   run_benchmark()'
+```
+"""
+function run_benchmark(; retune=false, baseline="origin/master", target="HEAD",
+                       script=nothing)
+    mktempdir(mktempdir()) do repo_dir  # TestItemRunner needs access to parent directory as well.
+        project = joinpath(ROOTPATH, "benchmark", "humongous")
+        # Workaround to be able to benchmark releases before the use of PkgBenchmark.
+        # WARN: In this case, we need PkgBenchmark to be installed globally.
+        if isnothing(script)
+            # We run the default benchmark.
+            script = joinpath(project, "benchmarks.jl")
+        else
+            occursin(ROOTPATH, abspath(script)) &&
+                error("Script should be outside the repository.")
+        end
+        script_copy = joinpath(repo_dir, "benchmarks.jl")
+        cp(script, script_copy)
+
+        LibGit2.clone("https://github.com/epolack/DFTK-testproblems",
+                      joinpath(repo_dir, "test"))
+
+        BenchmarkCI.judge(; baseline, target, retune, script=script_copy, project)
+
+        BenchmarkCI.displayjudgement()
+    end
+end
diff --git a/benchmark/load.jl b/benchmark/load.jl
@@ -0,0 +1,14 @@
+using BenchmarkTools
+
+const SUITE = BenchmarkGroup()
+
+julia_cmd = unsafe_string(Base.JLOptions().julia_bin)
+SUITE["load"] = @benchmarkable run(`$julia_cmd \
+                                        --startup-file=no \
+                                        --project=$(Base.active_project()) \
+                                        -e 'using DFTK'`)
+SUITE["pecompilation"] =
+    @benchmarkable run(`$julia_cmd \
+                           --startup-file=no \
+                           --project=$(Base.active_project()) \
+                           -e 'Base.compilecache(Base.identify_package("DFTK"))'`)
diff --git a/benchmark/regression/testcases.jl b/benchmark/regression/testcases.jl
diff --git a/benchmark/run.jl b/benchmark/run.jl
diff --git a/test/runtests_runner.jl b/test/runtests_runner.jl