Skip to content

Update recipes #22

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 16 commits into from
May 5, 2025
2 changes: 1 addition & 1 deletion amg2023/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ version: 0.1.0
# incremented each time you make changes to the application. Versions are not expected to
# follow Semantic Versioning. They should reflect the version the application is using.
# It is recommended to use it with quotes.
appVersion: "1.16.0"
appVersion: "0.0.0"

# Common minicluster code
dependencies:
Expand Down
8 changes: 7 additions & 1 deletion base-template/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,10 @@ version: 0.1.0
# incremented each time you make changes to the application. Versions are not expected to
# follow Semantic Versioning. They should reflect the version the application is using.
# It is recommended to use it with quotes.
appVersion: "1.16.0"
appVersion: "0.0.0"

# Common minicluster code
dependencies:
- name: base-template
version: 0.1.0
repository: file://../base-template
20 changes: 16 additions & 4 deletions base-template/templates/_flux-minicluster.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,27 +12,39 @@ spec:
logging:
quiet: {{ if .Values.logging.quiet }}true{{ else }}false{{ end }}

{{ if .Values.minicluster.serviceAccountName }}pod:
serviceAccountName: {{ .Values.minicluster.serviceAccountName }}{{ end }}

# This disables installing flux via the view
flux:
container:
disable: {{ if .Values.minicluster.addFlux }}false{{ else }}true{{ end }}
{{ if .Values.flux }}{{ if .Values.flux.image }}image: {{ .Values.flux.image }}{{ end }}{{ end }}

containers:
- image: "{{ default "ghcr.io/converged-computing/metric-lammps-cpu:zen4-reax" .Values.minicluster.image }}"
command: /bin/bash /tmp/run_${app}.sh
{{ if .Values.minicluster.volumeName }}volumes:
{{ .Values.minicluster.volumeName }}:
path: {{ default "/shared" .Values.minicluster.volumePath }}
{{ if .Values.minicluster.volumeClaim }}claimName: {{ .Values.minicluster.volumeClaim }}{{ end }}
{{ end }}
{{ if .Values.minicluster.workdir }}workingDir: {{ .Values.minicluster.workdir }}{{ end }}
{{ if .Values.minicluster.pullAlways }}pullAlways: true{{ end }}
launcher: true
securityContext:
privileged: {{ if .Values.minicluster.privileged }}true{{ else }}false{{ end }}
{{ if .Values.minicluster.addCapabilities }}addCapabilities: [{{ "{{ .Values.minicluster.addCapabilities }}" }}]{{ end }}
resources:
limits:
nvidia.com/gpu: "{{ .Values.minicluster.gpus }}"
nvidia.com/gpu: "{{ default 0 .Values.minicluster.gpus }}"
commands:
{{ if .Values.minicluster.commands_broker_pre }}brokerPre: {{ .Values.minicluster.commands_broker_pre }}{{ end }}
{{ if .Values.minicluster.commands_init }}init: {{ .Values.minicluster.commands_init }}{{ end }}
post: |
{{ include "chart.fluxfinish" . }}

pre: |
{{ include "chart.gpus" . }}
{{ if .Values.minicluster.gpus }}{{ include "chart.gpus" . }}{{ end }}
cat <<EOF >> /tmp/run_${app}.sh
#!/bin/bash
set -euo pipefail
Expand All @@ -44,5 +56,5 @@ spec:
{{ include "chart.fluxpost" . }}
{{ include "chart.savelogs" . }}
EOF
cat /tmp/run_${app}.sh
cat /tmp/run_${app}.sh
{{ end }}
15 changes: 11 additions & 4 deletions base-template/templates/_helpers.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@ app.kubernetes.io/managed-by: {{ .Release.Service }}
do
echo "FLUX-RUN START $app-iter-\$i"
flux run --setattr=user.study_id=$app-iter-\$i -N{{ if .Values.experiment.nodes }}{{ .Values.experiment.nodes }}{{ else }}1{{ end }} {{ if .Values.experiment.tasks }}-n {{ .Values.experiment.tasks }}{{ end }} {{ include "chart.fluxopts" . }} ${apprun} |& tee /tmp/${app}.out
echo "FLUX-RUN END $app-iter-\$i"
{{ if .Values.minicluster.commands_post_iteration }}{{ .Values.minicluster.commands_post_iteration }};{{ end }}
echo "FLUX-RUN END $app-iter-\$i"
done
{{- end }}

Expand All @@ -63,7 +64,7 @@ app.kubernetes.io/managed-by: {{ .Release.Service }}


{{/* Flux Shared Options */}}
{{- define "chart.fluxopts" -}}-o cpu-affinity={{ default "per-task" .Values.experiment.cpu_affinity }} -o gpu-affinity={{ default "off" .Values.experiment.gpu_affinity }} {{ if .Values.experiment.run_threads }}--env OMP_NUM_THREADS={{ .Values.experiment.run_threads }}{{ end }} {{ if .Values.experiment.cores_per_task }}--cores-per-task {{ .Values.experiment.cores_per_task }}{{ end }} {{ if .Values.experiment.exclusive }}--exclusive{{ end }}{{- end }}
{{- define "chart.fluxopts" -}}-o cpu-affinity={{ default "per-task" .Values.experiment.cpu_affinity }} -o gpu-affinity={{ default "off" .Values.experiment.gpu_affinity }} {{ if .Values.experiment.run_threads }}--env OMP_NUM_THREADS={{ .Values.experiment.run_threads }}{{ end }} {{ if .Values.experiment.cores_per_task }}--cores-per-task {{ .Values.experiment.cores_per_task }}{{ end }} {{ if .Values.minicluster.gpus }} -g {{ .Values.minicluster.gpus }}{{ end }} {{ if .Values.experiment.exclusive }}--exclusive{{ end }}{{- end }}

{{/* Flux Run with Pairs
Iterations is not relevant for this one
Expand Down Expand Up @@ -96,8 +97,14 @@ Iterations is not relevant for this one
{{/* Flux GPUs */}}
{{- define "chart.gpus" -}}
{{ if .Values.minicluster.gpus }}procs=$(nproc); procs=$((procs - 1));
gpus={{ .Values.minicluster.gpus }}; gpus=$((gpus - 1)); {{ $gpus := (.Values.minicluster.gpus | int) }}
{{ $gpus := (subf $gpus 1 | int) }}flux R encode --hosts=${hosts} --cores=0-${procs} --gpu=0-${gpus} > ${viewroot}/etc/flux/system/R
gpus={{ .Values.minicluster.gpus }};
if [[ "$gpus" == "1" ]]; then
gpus=0;
else
gpus=$((gpus - 1)); gpus=0-$gpus
fi
{{ $gpus := (.Values.minicluster.gpus | int) }}
flux R encode --hosts=${hosts} --cores=0-${procs} --gpu=${gpus} > ${viewroot}/etc/flux/system/R
cat ${viewroot}/etc/flux/system/R || true
export CUDA_VISIBLE_DEVICES=0{{ range untilStep 1 $gpus 1 }},{{ . }}{{ end }}{{ end }}
{{- end }}
Expand Down
23 changes: 23 additions & 0 deletions bdas/.helmignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Patterns to ignore when building packages.
# This supports shell glob matching, relative path matching, and
# negation (prefixed with !). Only one pattern per line.
.DS_Store
# Common VCS dirs
.git/
.gitignore
.bzr/
.bzrignore
.hg/
.hgignore
.svn/
# Common backup files
*.swp
*.bak
*.tmp
*.orig
*~
# Various IDEs
.project
.idea/
*.tmproj
.vscode/
6 changes: 6 additions & 0 deletions bdas/Chart.lock
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
dependencies:
- name: base-template
repository: file://../base-template
version: 0.1.0
digest: sha256:ff1f39a86c81f3bc7ca008f4cc1ac05d23a412aeae78f289b73f3c945cbf6e82
generated: "2025-02-09T12:47:20.383924591-07:00"
30 changes: 30 additions & 0 deletions bdas/Chart.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
apiVersion: v2
name: chart
description: A Helm chart to deploy an HPC app with Flux in Kubernetes

# A chart can be either an 'application' or a 'library' chart.
#
# Application charts are a collection of templates that can be packaged into versioned archives
# to be deployed.
#
# Library charts provide useful utilities or functions for the chart developer. They're included as
# a dependency of application charts to inject those utilities and functions into the rendering
# pipeline. Library charts do not define any templates and therefore cannot be deployed.
type: application

# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 0.1.0

# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
# follow Semantic Versioning. They should reflect the version the application is using.
# It is recommended to use it with quotes.
appVersion: "0.0.0"

# Common minicluster code
dependencies:
- name: base-template
version: 0.1.0
repository: file://../base-template
4 changes: 4 additions & 0 deletions bdas/templates/flux-minicluster.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{{ include "base-template.flux-minicluster" . }}
environment:
apprun: '{{ default "Rscript" .Values.bdas.binary }} {{ default "/opt/bdas/benchmarks/r/princomp.r" .Values.bdas.benchmark }} {{ .Values.bdas.rows }} {{ .Values.bdas.cols }}'
{{- include "base-template.environment" . }}
43 changes: 43 additions & 0 deletions bdas/values.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# Default values for experiment
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.

# Logging (quiet will hide flux setup)
logging:
quiet: true

experiment:
iterations: 1
# num_threads: 3
# cores_per_task: 3
nodes: 1
tasks: 2

env:
app: "bdas"

bdas:
binary: Rscript
# Other files in this directory, both take rows/cols
# kmeans.r princomp.r svm.r utils.r
benchmark: /opt/bdas/benchmarks/r/princomp.r
rows: 250
cols: 50

minicluster:
# Container image
image: "ghcr.io/converged-computing/metric-bdas:latest"

# Interactive MiniCluster?
interactive: false

# Number of NVIDIA gpus
gpus: 0

workdir: /opt/bdas/benchmarks/r

# MiniCluster size
size: 1

# Add flux on the fly (set to false if Flux is already in the container)
addFlux: true
23 changes: 23 additions & 0 deletions cfdscope/.helmignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Patterns to ignore when building packages.
# This supports shell glob matching, relative path matching, and
# negation (prefixed with !). Only one pattern per line.
.DS_Store
# Common VCS dirs
.git/
.gitignore
.bzr/
.bzrignore
.hg/
.hgignore
.svn/
# Common backup files
*.swp
*.bak
*.tmp
*.orig
*~
# Various IDEs
.project
.idea/
*.tmproj
.vscode/
6 changes: 6 additions & 0 deletions cfdscope/Chart.lock
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
dependencies:
- name: base-template
repository: file://../base-template
version: 0.1.0
digest: sha256:ff1f39a86c81f3bc7ca008f4cc1ac05d23a412aeae78f289b73f3c945cbf6e82
generated: "2025-02-09T12:47:20.383924591-07:00"
30 changes: 30 additions & 0 deletions cfdscope/Chart.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
apiVersion: v2
name: chart
description: A Helm chart to deploy an HPC app with Flux in Kubernetes

# A chart can be either an 'application' or a 'library' chart.
#
# Application charts are a collection of templates that can be packaged into versioned archives
# to be deployed.
#
# Library charts provide useful utilities or functions for the chart developer. They're included as
# a dependency of application charts to inject those utilities and functions into the rendering
# pipeline. Library charts do not define any templates and therefore cannot be deployed.
type: application

# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 0.1.0

# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
# follow Semantic Versioning. They should reflect the version the application is using.
# It is recommended to use it with quotes.
appVersion: "0.0.0"

# Common minicluster code
dependencies:
- name: base-template
version: 0.1.0
repository: file://../base-template
4 changes: 4 additions & 0 deletions cfdscope/templates/flux-minicluster.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{{ include "base-template.flux-minicluster" . }}
environment:
apprun: '{{ .Values.cfdscope.binary }} --domain-size {{ .Values.cfdscope.domain_size }} --cell-size {{ .Values.cfdscope.cell_size }} --end-time {{ .Values.cfdscope.end_time }} --lid-speed {{ .Values.cfdscope.lid_speed }} --step-size {{ .Values.cfdscope.step_size }} --output-prefix {{ .Values.cfdscope.output_prefix }} --output-format {{ .Values.cfdscope.output_format }} --preconditioner {{ .Values.cfdscope.preconditioner }}'
{{- include "base-template.environment" . }}
56 changes: 56 additions & 0 deletions cfdscope/values.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# Default values for experiment
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.

# Logging (quiet will hide flux setup)
logging:
quiet: true

experiment:
iterations: 1
# num_threads: 3
# cores_per_task: 3
nodes: 1
# tasks: 2

env:
app: "cfdscope"

cfdscope:
binary: cfdscope
domain_size: 64
cell_size: 1.0
end_time: 5.0
lid_speed: 10
step_size: 0.4
lid_speed: 10
output_prefix: fields
output_format: csv
preconditioner: dic

# -l, --log-level arg Log level (trace, debug, info, warn, err, critical or off) (default: info)
# -d, --domain-size arg Number of the simulation cells along all three (default 20)
# -c, --cell-size arg Size of each simulation cell (default: 1.0)
# -e, --end-time arg Simulation duration (seconds) (default: 5.0)
# -s, --step-size arg Simulation step size (seconds) (default: 0.4)
# -u, --lid-speed arg Lid speed (cells/second) (default: 10)
# -o, --output-prefix arg Output file prefix (default: fields)
# -f, --output-format arg Output file format (csv, raw) (default: csv)
# -p, --preconditioner arg Preconditioner type (none, jacobi, dic) (default dic)
# -h, --help Print usage

minicluster:
# Container image
image: "ghcr.io/rse-ops/cfdscope:flux"

# Interactive MiniCluster?
interactive: false

# Number of NVIDIA gpus
gpus: 0

# MiniCluster size
size: 1

# Add flux on the fly (set to false if Flux is already in the container)
addFlux: false
23 changes: 23 additions & 0 deletions chatterbug/.helmignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Patterns to ignore when building packages.
# This supports shell glob matching, relative path matching, and
# negation (prefixed with !). Only one pattern per line.
.DS_Store
# Common VCS dirs
.git/
.gitignore
.bzr/
.bzrignore
.hg/
.hgignore
.svn/
# Common backup files
*.swp
*.bak
*.tmp
*.orig
*~
# Various IDEs
.project
.idea/
*.tmproj
.vscode/
6 changes: 6 additions & 0 deletions chatterbug/Chart.lock
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
dependencies:
- name: base-template
repository: file://../base-template
version: 0.1.0
digest: sha256:ff1f39a86c81f3bc7ca008f4cc1ac05d23a412aeae78f289b73f3c945cbf6e82
generated: "2025-02-09T12:47:20.383924591-07:00"
Loading