converged-computing · vsoch · May 5, 2025 · Apr 23, 2025 · Apr 24, 2025 · Apr 24, 2025
diff --git a/amg2023/Chart.yaml b/amg2023/Chart.yaml
@@ -21,7 +21,7 @@ version: 0.1.0
 # incremented each time you make changes to the application. Versions are not expected to
 # follow Semantic Versioning. They should reflect the version the application is using.
 # It is recommended to use it with quotes.
-appVersion: "1.16.0"
+appVersion: "0.0.0"
 
 # Common minicluster code
 dependencies:

diff --git a/base-template/Chart.yaml b/base-template/Chart.yaml
@@ -21,4 +21,10 @@ version: 0.1.0
 # incremented each time you make changes to the application. Versions are not expected to
 # follow Semantic Versioning. They should reflect the version the application is using.
 # It is recommended to use it with quotes.
-appVersion: "1.16.0"
+appVersion: "0.0.0"
+
+# Common minicluster code
+dependencies:
+- name: base-template
+  version: 0.1.0
+  repository: file://../base-template
diff --git a/base-template/templates/_flux-minicluster.yaml b/base-template/templates/_flux-minicluster.yaml
@@ -12,27 +12,39 @@ spec:
   logging:
     quiet: {{ if .Values.logging.quiet }}true{{ else }}false{{ end }}
 
+  {{ if .Values.minicluster.serviceAccountName }}pod:
+    serviceAccountName: {{  .Values.minicluster.serviceAccountName }}{{ end }}
+
   # This disables installing flux via the view
   flux:
     container:
       disable: {{ if .Values.minicluster.addFlux }}false{{ else }}true{{ end }}
+      {{ if .Values.flux }}{{ if .Values.flux.image }}image: {{ .Values.flux.image }}{{ end }}{{ end }}
 
   containers:
   - image: "{{ default "ghcr.io/converged-computing/metric-lammps-cpu:zen4-reax" .Values.minicluster.image }}"
     command: /bin/bash /tmp/run_${app}.sh
+    {{ if .Values.minicluster.volumeName }}volumes:
+       {{ .Values.minicluster.volumeName }}:
+         path: {{ default "/shared" .Values.minicluster.volumePath }}
+         {{ if .Values.minicluster.volumeClaim }}claimName: {{ .Values.minicluster.volumeClaim }}{{ end }}
+    {{ end }}
+    {{ if .Values.minicluster.workdir }}workingDir: {{ .Values.minicluster.workdir }}{{ end }}
+    {{ if .Values.minicluster.pullAlways }}pullAlways: true{{ end }}
     launcher: true
     securityContext:
       privileged: {{ if .Values.minicluster.privileged }}true{{ else }}false{{ end }}
+      {{ if .Values.minicluster.addCapabilities }}addCapabilities: [{{ "{{ .Values.minicluster.addCapabilities }}" }}]{{ end }}
     resources:
       limits:
-        nvidia.com/gpu: "{{ .Values.minicluster.gpus }}"
+        nvidia.com/gpu: "{{ default 0  .Values.minicluster.gpus }}"
     commands:
+      {{ if .Values.minicluster.commands_broker_pre }}brokerPre: {{ .Values.minicluster.commands_broker_pre }}{{ end }}
       {{ if .Values.minicluster.commands_init }}init: {{ .Values.minicluster.commands_init }}{{ end }}
       post: |
          {{ include "chart.fluxfinish" . }}
-
       pre: |
-         {{ include "chart.gpus" . }}
+         {{ if .Values.minicluster.gpus }}{{ include "chart.gpus" . }}{{ end }}
          cat <<EOF >> /tmp/run_${app}.sh
          #!/bin/bash
          set -euo pipefail
@@ -44,5 +56,5 @@ spec:
          {{ include "chart.fluxpost" . }}
          {{ include "chart.savelogs" . }}
          EOF
-         cat /tmp/run_${app}.sh         
+         cat /tmp/run_${app}.sh
 {{ end }}
diff --git a/base-template/templates/_helpers.tpl b/base-template/templates/_helpers.tpl
@@ -42,7 +42,8 @@ app.kubernetes.io/managed-by: {{ .Release.Service }}
          do
            echo "FLUX-RUN START $app-iter-\$i"
            flux run --setattr=user.study_id=$app-iter-\$i -N{{ if .Values.experiment.nodes }}{{ .Values.experiment.nodes }}{{ else }}1{{ end }} {{ if .Values.experiment.tasks }}-n {{ .Values.experiment.tasks }}{{ end }} {{ include "chart.fluxopts" . }} ${apprun} |& tee /tmp/${app}.out
-             echo "FLUX-RUN END $app-iter-\$i"
+           {{ if .Values.minicluster.commands_post_iteration }}{{ .Values.minicluster.commands_post_iteration }};{{ end }}
+            echo "FLUX-RUN END $app-iter-\$i"
          done
 {{- end }}
 
@@ -63,7 +64,7 @@ app.kubernetes.io/managed-by: {{ .Release.Service }}
 
 
 {{/* Flux Shared Options */}}
-{{- define "chart.fluxopts" -}}-o cpu-affinity={{ default "per-task" .Values.experiment.cpu_affinity }} -o gpu-affinity={{ default "off" .Values.experiment.gpu_affinity }} {{ if .Values.experiment.run_threads }}--env OMP_NUM_THREADS={{ .Values.experiment.run_threads }}{{ end }} {{ if .Values.experiment.cores_per_task }}--cores-per-task {{ .Values.experiment.cores_per_task }}{{ end }} {{ if .Values.experiment.exclusive }}--exclusive{{ end }}{{- end }}
+{{- define "chart.fluxopts" -}}-o cpu-affinity={{ default "per-task" .Values.experiment.cpu_affinity }} -o gpu-affinity={{ default "off" .Values.experiment.gpu_affinity }} {{ if .Values.experiment.run_threads }}--env OMP_NUM_THREADS={{ .Values.experiment.run_threads }}{{ end }} {{ if .Values.experiment.cores_per_task }}--cores-per-task {{ .Values.experiment.cores_per_task }}{{ end }} {{ if .Values.minicluster.gpus }} -g {{ .Values.minicluster.gpus }}{{ end }} {{ if .Values.experiment.exclusive }}--exclusive{{ end }}{{- end }}
 
 {{/* Flux Run with Pairs 
 Iterations is not relevant for this one
@@ -96,8 +97,14 @@ Iterations is not relevant for this one
 {{/* Flux GPUs */}}
 {{- define "chart.gpus" -}}
          {{ if .Values.minicluster.gpus }}procs=$(nproc); procs=$((procs - 1));   
-         gpus={{ .Values.minicluster.gpus }}; gpus=$((gpus - 1)); {{ $gpus := (.Values.minicluster.gpus | int) }}
-         {{ $gpus := (subf $gpus 1 | int) }}flux R encode --hosts=${hosts} --cores=0-${procs} --gpu=0-${gpus} > ${viewroot}/etc/flux/system/R
+         gpus={{ .Values.minicluster.gpus }}; 
+         if [[ "$gpus" == "1" ]]; then
+             gpus=0;
+         else
+             gpus=$((gpus - 1)); gpus=0-$gpus
+         fi
+         {{ $gpus := (.Values.minicluster.gpus | int) }}
+         flux R encode --hosts=${hosts} --cores=0-${procs} --gpu=${gpus} > ${viewroot}/etc/flux/system/R
          cat ${viewroot}/etc/flux/system/R || true
          export CUDA_VISIBLE_DEVICES=0{{ range untilStep 1 $gpus 1 }},{{ . }}{{ end }}{{ end }}
 {{- end }}

diff --git a/bdas/.helmignore b/bdas/.helmignore
@@ -0,0 +1,23 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*.orig
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/
diff --git a/bdas/Chart.lock b/bdas/Chart.lock
@@ -0,0 +1,6 @@
+dependencies:
+- name: base-template
+  repository: file://../base-template
+  version: 0.1.0
+digest: sha256:ff1f39a86c81f3bc7ca008f4cc1ac05d23a412aeae78f289b73f3c945cbf6e82
+generated: "2025-02-09T12:47:20.383924591-07:00"
diff --git a/bdas/Chart.yaml b/bdas/Chart.yaml
@@ -0,0 +1,30 @@
+apiVersion: v2
+name: chart
+description: A Helm chart to deploy an HPC app with Flux in Kubernetes
+
+# A chart can be either an 'application' or a 'library' chart.
+#
+# Application charts are a collection of templates that can be packaged into versioned archives
+# to be deployed.
+#
+# Library charts provide useful utilities or functions for the chart developer. They're included as
+# a dependency of application charts to inject those utilities and functions into the rendering
+# pipeline. Library charts do not define any templates and therefore cannot be deployed.
+type: application
+
+# This is the chart version. This version number should be incremented each time you make changes
+# to the chart and its templates, including the app version.
+# Versions are expected to follow Semantic Versioning (https://semver.org/)
+version: 0.1.0
+
+# This is the version number of the application being deployed. This version number should be
+# incremented each time you make changes to the application. Versions are not expected to
+# follow Semantic Versioning. They should reflect the version the application is using.
+# It is recommended to use it with quotes.
+appVersion: "0.0.0"
+
+# Common minicluster code
+dependencies:
+- name: base-template
+  version: 0.1.0
+  repository: file://../base-template
diff --git a/bdas/templates/flux-minicluster.yaml b/bdas/templates/flux-minicluster.yaml
@@ -0,0 +1,4 @@
+{{ include "base-template.flux-minicluster" . }}
+    environment:
+      apprun: '{{ default "Rscript" .Values.bdas.binary }} {{ default "/opt/bdas/benchmarks/r/princomp.r" .Values.bdas.benchmark }} {{ .Values.bdas.rows }} {{ .Values.bdas.cols }}'
+      {{- include "base-template.environment" . }}
diff --git a/bdas/values.yaml b/bdas/values.yaml
@@ -0,0 +1,43 @@
+# Default values for experiment
+# This is a YAML-formatted file.
+# Declare variables to be passed into your templates.
+
+# Logging (quiet will hide flux setup)
+logging:
+  quiet: true
+
+experiment:
+  iterations: 1
+  # num_threads: 3
+  # cores_per_task: 3
+  nodes: 1
+  tasks: 2
+
+env:
+  app: "bdas"
+
+bdas:
+  binary: Rscript
+  # Other files in this directory, both take rows/cols
+  # kmeans.r  princomp.r  svm.r  utils.r
+  benchmark: /opt/bdas/benchmarks/r/princomp.r
+  rows: 250
+  cols: 50
+
+minicluster:
+  # Container image
+  image: "ghcr.io/converged-computing/metric-bdas:latest"
+
+  # Interactive MiniCluster?
+  interactive: false
+
+  # Number of NVIDIA gpus
+  gpus: 0
+
+  workdir: /opt/bdas/benchmarks/r
+
+  # MiniCluster size
+  size: 1
+
+  # Add flux on the fly (set to false if Flux is already in the container)
+  addFlux: true
diff --git a/cfdscope/.helmignore b/cfdscope/.helmignore
@@ -0,0 +1,23 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*.orig
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/
diff --git a/cfdscope/Chart.lock b/cfdscope/Chart.lock
@@ -0,0 +1,6 @@
+dependencies:
+- name: base-template
+  repository: file://../base-template
+  version: 0.1.0
+digest: sha256:ff1f39a86c81f3bc7ca008f4cc1ac05d23a412aeae78f289b73f3c945cbf6e82
+generated: "2025-02-09T12:47:20.383924591-07:00"
diff --git a/cfdscope/Chart.yaml b/cfdscope/Chart.yaml
@@ -0,0 +1,30 @@
+apiVersion: v2
+name: chart
+description: A Helm chart to deploy an HPC app with Flux in Kubernetes
+
+# A chart can be either an 'application' or a 'library' chart.
+#
+# Application charts are a collection of templates that can be packaged into versioned archives
+# to be deployed.
+#
+# Library charts provide useful utilities or functions for the chart developer. They're included as
+# a dependency of application charts to inject those utilities and functions into the rendering
+# pipeline. Library charts do not define any templates and therefore cannot be deployed.
+type: application
+
+# This is the chart version. This version number should be incremented each time you make changes
+# to the chart and its templates, including the app version.
+# Versions are expected to follow Semantic Versioning (https://semver.org/)
+version: 0.1.0
+
+# This is the version number of the application being deployed. This version number should be
+# incremented each time you make changes to the application. Versions are not expected to
+# follow Semantic Versioning. They should reflect the version the application is using.
+# It is recommended to use it with quotes.
+appVersion: "0.0.0"
+
+# Common minicluster code
+dependencies:
+- name: base-template
+  version: 0.1.0
+  repository: file://../base-template
diff --git a/cfdscope/templates/flux-minicluster.yaml b/cfdscope/templates/flux-minicluster.yaml
@@ -0,0 +1,4 @@
+{{ include "base-template.flux-minicluster" . }}
+    environment:
+      apprun: '{{ .Values.cfdscope.binary }} --domain-size {{ .Values.cfdscope.domain_size }} --cell-size {{ .Values.cfdscope.cell_size }} --end-time {{ .Values.cfdscope.end_time }} --lid-speed {{ .Values.cfdscope.lid_speed }} --step-size {{ .Values.cfdscope.step_size }} --output-prefix {{ .Values.cfdscope.output_prefix }} --output-format {{ .Values.cfdscope.output_format }} --preconditioner {{ .Values.cfdscope.preconditioner }}'
+      {{- include "base-template.environment" . }}
diff --git a/cfdscope/values.yaml b/cfdscope/values.yaml
@@ -0,0 +1,56 @@
+# Default values for experiment
+# This is a YAML-formatted file.
+# Declare variables to be passed into your templates.
+
+# Logging (quiet will hide flux setup)
+logging:
+  quiet: true
+
+experiment:
+  iterations: 1
+  # num_threads: 3
+  # cores_per_task: 3
+  nodes: 1
+  # tasks: 2
+
+env:
+  app: "cfdscope"
+
+cfdscope:
+  binary: cfdscope
+  domain_size: 64
+  cell_size: 1.0
+  end_time: 5.0
+  lid_speed: 10
+  step_size: 0.4
+  lid_speed: 10
+  output_prefix: fields
+  output_format: csv
+  preconditioner: dic
+
+#  -l, --log-level arg       Log level (trace, debug, info, warn, err, critical or off) (default: info)
+#  -d, --domain-size arg     Number of the simulation cells along all three (default 20)
+#  -c, --cell-size arg       Size of each simulation cell (default: 1.0)
+#  -e, --end-time arg        Simulation duration (seconds) (default: 5.0)
+#  -s, --step-size arg       Simulation step size (seconds) (default: 0.4)
+#  -u, --lid-speed arg       Lid speed (cells/second) (default: 10)
+#  -o, --output-prefix arg   Output file prefix (default: fields)
+#  -f, --output-format arg   Output file format (csv, raw) (default: csv)
+#  -p, --preconditioner arg  Preconditioner type (none, jacobi, dic) (default dic)
+#  -h, --help                Print usage
+
+minicluster:
+  # Container image
+  image: "ghcr.io/rse-ops/cfdscope:flux"
+
+  # Interactive MiniCluster?
+  interactive: false
+
+  # Number of NVIDIA gpus
+  gpus: 0
+
+  # MiniCluster size
+  size: 1
+
+  # Add flux on the fly (set to false if Flux is already in the container)
+  addFlux: false
diff --git a/chatterbug/.helmignore b/chatterbug/.helmignore
@@ -0,0 +1,23 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*.orig
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/
diff --git a/chatterbug/Chart.lock b/chatterbug/Chart.lock
@@ -0,0 +1,6 @@
+dependencies:
+- name: base-template
+  repository: file://../base-template
+  version: 0.1.0
+digest: sha256:ff1f39a86c81f3bc7ca008f4cc1ac05d23a412aeae78f289b73f3c945cbf6e82
+generated: "2025-02-09T12:47:20.383924591-07:00"