Skip to content

Commit

Permalink
Reorganize the snap files for added clarity (#13)
Browse files Browse the repository at this point in the history
* drop wget dependency - curl is already available
* separate installing dcgm in its own part
* add comments around all parts
* move the hostengine service script under bin/ for consistency
* split local files into scripts/ and files/ to avoid shipping the
  configure_sources.sh script
* move proftester cleanup under the dcgm part
  • Loading branch information
aieri authored Sep 17, 2024
1 parent 0f17eee commit cddf5da
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 35 deletions.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ fi


echo "Architecture is $SYSTEM_ARCH. Downloading cuda-keyring package..."
wget "https://developer.download.nvidia.com/compute/cuda/repos/$DISTRIBUTION/$ARCH/$CUDA_PKG"
curl --remote-name "https://developer.download.nvidia.com/compute/cuda/repos/$DISTRIBUTION/$ARCH/$CUDA_PKG"

# Run the checksum verification and install cuda-keyring if valid
if echo "$SHA256SUM $CUDA_PKG" | sha256sum --check --status; then
Expand Down
73 changes: 39 additions & 34 deletions snap/snapcraft.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name: dcgm
base: core24
adopt-info: dcgm-exporter
adopt-info: dcgm
summary: Snap for NVIDIA DCGM and DCGM exporter
license: Apache-2.0
contact: [email protected]
Expand All @@ -22,7 +22,7 @@ title: NVIDIA DCGM

apps:
dcgm-exporter:
command: run_dcgm_exporter.sh
command: bin/run_dcgm_exporter.sh
plugs:
- network-bind
- opengl
Expand All @@ -37,7 +37,7 @@ apps:
- network-bind
- opengl
nv-hostengine:
command: run_nv_hostengine.sh
command: bin/run_nv_hostengine.sh
plugs:
- network-bind
- opengl
Expand All @@ -47,54 +47,59 @@ apps:
DCGM_HOME_DIR: "${SNAP_COMMON}"

parts:
wrapper:
plugin: dump
build-packages:
- wget
- dpkg
source: snap/local
# This is a workaround to package-repositories not supporting multiple architectures
# See https://forum.snapcraft.io/t/can-package-repositories-depend-on-architecture/27820
cuda-sources:
plugin: nil
source: snap/local/scripts
override-pull: |
craftctl default
./configure_sources.sh
override-build: |
craftctl default
chmod +x run_nv_hostengine.sh
chmod +x run_dcgm_exporter.sh
dcgm-exporter:
# This is the actual DCGM software. We don't build from source because the build
# process is very cumbersome and requires docker
# The deb is pulled from the sources configured in the cuda-sources part
dcgm:
after:
- wrapper
plugin: go
- cuda-sources
plugin: nil
stage-packages: [datacenter-gpu-manager=1:3.3.7]
build-snaps:
- go
source: https://github.com/NVIDIA/dcgm-exporter.git
source-type: git
source-tag: 3.3.7-3.5.0
# override build to set custom csv file
override-build: |
craftctl default
mkdir -p $SNAPCRAFT_PART_INSTALL/etc/dcgm-exporter
cp etc/default-counters.csv etc/dcp-metrics-included.csv $SNAPCRAFT_PART_INSTALL/etc/dcgm-exporter/
# override prime to set version
override-prime: |
craftctl default
# Locate dcgm .deb file
DEB_FILE=$(ls $HOME/parts/dcgm-exporter/stage_packages/datacenter-gpu-manager_*.deb)
DEB_FILE=$(ls $HOME/parts/dcgm/stage_packages/datacenter-gpu-manager_*.deb)
# Extract the version from the .deb file
DCGM_VERSION=$(dpkg-deb -f "$DEB_FILE" Version)
# Set the Snap version to the same as dcgm deb file
craftctl set version="${DCGM_VERSION#1:}"
cleanup:
after:
- dcgm-exporter
plugin: nil
source: snap/local/scripts
override-prime: |
# Remove dcgmproftesters and related libraries
$CRAFT_PROJECT_DIR/snap/local/scripts/remove_dcgmproftester.sh
# This is the DCGM exporter
dcgm-exporter:
plugin: go
build-snaps:
- go
source: https://github.com/NVIDIA/dcgm-exporter.git
source-type: git
source-tag: 3.3.7-3.5.0
# override build to set custom csv file
override-build: |
craftctl default
./scripts/remove_dcgmproftester.sh
mkdir -p $SNAPCRAFT_PART_INSTALL/etc/dcgm-exporter
cp etc/default-counters.csv etc/dcp-metrics-included.csv $SNAPCRAFT_PART_INSTALL/etc/dcgm-exporter/
# wrappers supporting snap options
wrapper:
plugin: dump
source: snap/local/files
organize:
run_nv_hostengine.sh: bin/
run_dcgm_exporter.sh: bin/

layout:
/etc/dcgm-exporter:
Expand Down

0 comments on commit cddf5da

Please sign in to comment.