Running NVIDIA GPU enabled containers in GNU Guix

gnu-with-nvidia.jpeg
Figure 1: Generated by the author using DALL-E

After trying in vain to get Pytorch with CUDA support to build on GNU Guix (there's a python-pytorch-with-cuda11 package on the guix-science-nonfree channel). I decided to have a stab at this nonguix issue and see if I could get the nvidia-container-toolkit running on Guix, which would allow me to run a containerised version of Pytorch. TLDR; I managed to get everything working and can now easily run (NVIDIA) gpu enabled docker containers, but with one rather ugly (and probably unsafe) hack… For now it's good enough for my use case, but any input on how to do this the Right Waytm would be appreciated!

Patching libnvidia-container

While building libnvidia-container appeared simple enough it continued to throw up errors when I tried to start a container. Fortunately the good folk over at Nix already had a working solution that pointed me in the right direction. It seems that libnvidia-container uses ldcache to find the NVIDIA libraries that it needs at run-time. This doesn't seem to work on Guix (or Nix) so the fix involved writing a small patch that provides the path to the NVIDIA libraries manually. Out of convenience I opted for /run/current-system/profile/lib as the base path for the libraries, but I could imagine it would be neater to provide /gnu/store paths instead (not sure though). If you'd like to follow along you'll need to save the following patch as libnvidia-container.patch along with the guix package definition (below).

diff --git a/src/ldcache.c b/src/ldcache.c
index 38bab05..ba922d9 100644
--- a/src/ldcache.c
+++ b/src/ldcache.c
@@ -108,40 +108,28 @@ ldcache_close(struct ldcache *ctx)

 int
 ldcache_resolve(struct ldcache *ctx, uint32_t arch, const char *root, const char * const libs[],
-    char *paths[], size_t size, ldcache_select_fn select, void *select_ctx)
+               char *paths[], size_t size)
 {
-        char path[PATH_MAX];
-        struct header_libc6 *h;
-        int override;
-
-        h = (struct header_libc6 *)ctx->ptr;
-        memset(paths, 0, size * sizeof(*paths));
-
-        for (uint32_t i = 0; i < h->nlibs; ++i) {
-                int32_t flags = h->libs[i].flags;
-                char *key = (char *)ctx->ptr + h->libs[i].key;
-                char *value = (char *)ctx->ptr + h->libs[i].value;
-
-                if (!(flags & LD_ELF) || (flags & LD_ARCH_MASK) != arch)
-                        continue;
-
-                for (size_t j = 0; j < size; ++j) {
-                        if (!str_has_prefix(key, libs[j]))
-                                continue;
-                        if (path_resolve(ctx->err, path, root, value) < 0)
-                                return (-1);
-                        if (paths[j] != NULL && str_equal(paths[j], path))
-                                continue;
-                        if ((override = select(ctx->err, select_ctx, root, paths[j], path)) < 0)
-                                return (-1);
-                        if (override) {
-                                free(paths[j]);
-                                paths[j] = xstrdup(ctx->err, path);
-                                if (paths[j] == NULL)
-                                        return (-1);
-                        }
-                        break;
-                }
-        }
-        return (0);
+  char path[PATH_MAX];
+  char dir[PATH_MAX] = "/run/current-system/profile/lib"; // Is it neater to refer to the /gnu/store path?
+  char lib[PATH_MAX];
+
+  memset(paths, 0, size * sizeof(*paths));
+
+  for (size_t j = 0; j < size; ++j) {
+
+    if (!strncmp(libs[j], "libvdpau_nvidia.so", 100))
+      strcat(dir, "/vdpau");
+    snprintf(lib, 100, "%s/%s", dir, libs[j]);
+
+    if (path_resolve_full(ctx->err, path, "/", lib) < 0)
+      return (-1);
+    if (!file_exists(ctx->err, path))
+      continue;
+    paths[j] = xstrdup(ctx->err, path);
+    if (paths[j] == NULL)
+      return (-1);
+
+  }
+  return (0);
 }
diff --git a/src/ldcache.h b/src/ldcache.h
index 33d78dd..95b603e 100644
--- a/src/ldcache.h
+++ b/src/ldcache.h
@@ -50,6 +50,6 @@ void ldcache_init(struct ldcache *, struct error *, const char *);
 int  ldcache_open(struct ldcache *);
 int  ldcache_close(struct ldcache *);
 int  ldcache_resolve(struct ldcache *, uint32_t, const char *, const char * const [],
-    char *[], size_t, ldcache_select_fn, void *);
+    char *[], size_t);

 #endif /* HEADER_LDCACHE_H */
diff --git a/src/nvc_info.c b/src/nvc_info.c
index 85c9a4d..2464299 100644
--- a/src/nvc_info.c
+++ b/src/nvc_info.c
@@ -216,15 +216,15 @@ find_library_paths(struct error *err, struct dxcore_context *dxcore, struct nvc_
         if (path_resolve_full(err, path, root, ldcache) < 0)
                 return (-1);
         ldcache_init(&ld, err, path);
-        if (ldcache_open(&ld) < 0)
-                return (-1);
+        //if (ldcache_open(&ld) < 0)
+        //        return (-1);

         info->nlibs = size;
         info->libs = array_new(err, size);
         if (info->libs == NULL)
                 goto fail;
         if (ldcache_resolve(&ld, LIB_ARCH, root, libs,
-            info->libs, info->nlibs, select_libraries_fn, info) < 0)
+            info->libs, info->nlibs) < 0)
                 goto fail;

         info->nlibs32 = size;
@@ -232,13 +232,13 @@ find_library_paths(struct error *err, struct dxcore_context *dxcore, struct nvc_
         if (info->libs32 == NULL)
                 goto fail;
         if (ldcache_resolve(&ld, LIB32_ARCH, root, libs,
-            info->libs32, info->nlibs32, select_libraries_fn, info) < 0)
+            info->libs32, info->nlibs32) < 0)
                 goto fail;
         rv = 0;

  fail:
-        if (ldcache_close(&ld) < 0)
-                return (-1);
+        //if (ldcache_close(&ld) < 0)
+        //        return (-1);
         return (rv);
 }

Guix package definitions

Now we can specify the required guix packages. Credits to Connor Clark and Luciano Laratelli for the initial work.

(define-module (babelfish packages nvidia-docker) ;; adapt module name for your own use
  #:use-module (guix gexp)
  #:use-module (guix git-download)
  #:use-module (guix utils)
  #:use-module (guix build utils)
  #:use-module (guix packages)
  #:use-module (guix build-system go)
  #:use-module (guix build-system gnu)
  #:use-module (guix licenses)
  #:use-module (gnu packages)
  #:use-module (gnu packages docker)
  #:use-module (gnu packages commencement)
  #:use-module (gnu packages onc-rpc)
  #:use-module (gnu packages golang)
  #:use-module (gnu packages check)
  #:use-module (gnu packages base)
  #:use-module (gnu packages curl)
  #:use-module (gnu packages version-control)
  #:use-module (gnu packages pkg-config)
  #:use-module (gnu packages gcc)
  #:use-module (gnu packages tls)
  #:use-module (gnu packages elf)
  #:use-module (gnu packages m4)
  #:use-module (gnu packages linux)
  #:use-module (nongnu packages nvidia))

(define-public nvidia-modprobe
  (package
    (name "nvidia-modprobe")
    (version "550.54.14")
    (source (origin
              (method git-fetch)
              (uri (git-reference
                    (url "https://github.com/NVIDIA/nvidia-modprobe")
                    (commit version)))
              (file-name (git-file-name name version))
              (sha256
               (base32 "1a7q03pnwk3wa0p57whwv2mvz60bv77vvvaljqzwnscpyf94q548"))))
    (build-system gnu-build-system)
    (arguments
     (list
      #:phases
      #~(modify-phases %standard-phases
          (delete 'configure)
          (add-before 'build 'set-correct-cflags
            (lambda* (#:key inputs outputs #:allow-other-keys)
              (setenv "CFLAGS" "-fPIC")
              (display "setting CFLAGS\n")
              (substitute* "modprobe-utils/nvidia-modprobe-utils.c"
                (("^static int nvidia_cap_get_device_file_attrs")
                 "int nvidia_cap_get_device_file_attrs"))
              )
            )
          (add-after 'build 'build-static-link-libraries
            (lambda* (#:key inputs outputs #:allow-other-keys)
              (invoke "ar" "rcs" "_out/Linux_x86_64/libnvidia-modprobe-utils.a" "_out/Linux_x86_64/nvidia-modprobe-utils.o" "_out/Linux_x86_64/pci-sysfs.o")
              (copy-recursively "_out/Linux_x86_64/" (string-append (assoc-ref %outputs "out") "/lib"))))
          (delete 'check)
          (add-after 'patch-source-shebangs 'replace-prefix
            (lambda* (#:key inputs outputs #:allow-other-keys)
              (setenv "CC" "gcc")
              (setenv "PREFIX" (assoc-ref %outputs "out"))
              (copy-recursively "modprobe-utils/" (string-append (assoc-ref %outputs "out") "/include"))
              #true) ; must return true for success
            ))
      #:tests? #f))
    (native-inputs
     (list gcc-toolchain m4))
    (synopsis "Load the NVIDIA kernel module and create NVIDIA character device files")
    (description "Load the NVIDIA kernel module and create NVIDIA character device files")
    (home-page "https://github.com/NVIDIA/nvidia-modprobe")
    (license gpl2)))

(define-public libnvidia-container
  (package
    (name "libnvidia-container")
    (version "1.13.1")
    (source (origin
              (method git-fetch)
              (uri (git-reference
                    (url "https://github.com/NVIDIA/libnvidia-container")
                    (commit (string-append "v" version))))
              (file-name (git-file-name name version))
              (patches (search-patches "libnvidia-container.patch"))
              (sha256
               (base32
                "0rzvh1zhh8pi5xjzaq3nmyzpcvjy41gq8w36dp1ai11a6j2lpa99"))))
    (build-system gnu-build-system)
    (arguments
     (list
      #:phases
      #~(modify-phases %standard-phases
          (delete 'configure)
          (delete 'build)
          (delete 'check)
          (add-after 'unpack 'ensure-writable-source
            (lambda* (#:key inputs outputs #:allow-other-keys)
              (setenv "HOME" "/tmp")
              (make-file-writable "src/ldcache.c")
              (make-file-writable "src/ldcache.h")
              (make-file-writable "src/nvc_info.c")))
          (add-after 'patch-source-shebangs 'replace-prefix
            (lambda* (#:key inputs outputs #:allow-other-keys)
              (substitute* "Makefile"
                (("/usr/local") (assoc-ref outputs "out"))  ;this overrides the prefix
                (("debug??libdir?") "debug")  ;ensure debug files get installed in the correct subdir
                ((".*nvidia-modprobe.mk.*") "\n")
                (("^all: shared static tools")
                 "all: shared tools")
                ((".*LIB_STATIC.*libdir.*$") ""))
              (substitute* "mk/nvcgo.mk"
                ((".*-rf.*")
                 "\tmkdir -p ${SRCS_DIR} && echo \"sources dir: ${SRCS_DIR}\"\n")
                (("CURDIR./src/..PREFIX.")
                 "CURDIR)/src/$(PREFIX)/*")) ;deleting sources fails
              (substitute* "src/cli/libnvc.c"
                (("libnvidia-ml.so.1")
                 "/run/current-system/profile/lib/libnvidia-ml.so.1"))
              (substitute* "src/nvc_internal.h"
                (("libnvidia-ml.so.1")
                 "/run/current-system/profile/lib/libnvidia-ml.so.1"))
              (setenv "C_INCLUDE_PATH" (string-append (getenv "C_INCLUDE_PATH") ":" (string-append #$libtirpc "/include/tirpc")))
              (setenv "LIBRARY_PATH" (string-append (getenv "LIBRARY_PATH") ":" (string-append #$libtirpc "/lib")))
              (setenv "LDFLAGS" (string-append (or (getenv "LDFLAGS") "") " -ltirpc -lseccomp -lcap -Wl,-rpath=" (assoc-ref outputs "out") "/lib"))
              (setenv "CFLAGS" (string-append (or (getenv "CFLAGS") "") " -DWITH_TIRPC -g"))
              (substitute* "Makefile"
                (("^WITH_LIBELF.*no")
                 "WITH_LIBELF ?= yes"))
              (substitute* "mk/common.mk"
                (("^REVISION.*")
                 (string-append "REVISION ?= " #$version "\n" "CC := gcc\n")))
              #true) ; must return true for success
            ))
      #:tests? #f))
    (native-inputs
     (list libseccomp nvidia-modprobe which libtirpc libcap libelf git-minimal curl tar coreutils docker go gcc-toolchain rpcsvc-proto pkgconf))
    (synopsis "Build and run containers leveraging NVIDIA GPUs")
    (description "The NVIDIA Container Toolkit allows users to build and run GPU accelerated containers. The toolkit includes a container runtime library and utilities to automatically configure containers to leverage NVIDIA GPUs.")
    (home-page "https://github.com/NVIDIA/nvidia-container-toolkit")
    (license asl2.0)))

(define-public nvidia-container-toolkit
  (package
    (name "nvidia-container-toolkit")
    (version "1.13.1")
    (source (origin
              (method git-fetch)
              (uri (git-reference
                    (url "https://github.com/NVIDIA/nvidia-container-toolkit")
                    (commit (string-append "v" version))))
              (file-name (git-file-name name version))
              (sha256
               (base32
                "01gh57jfpcv07c4442lbf9wiy0l1iwl85ig9drpp0637gbkzgwa4"))))
    (build-system go-build-system)
    (arguments
     (list
      #:import-path "github.com/NVIDIA/nvidia-container-toolkit"
      #:phases
      #~(modify-phases %standard-phases
          (add-after 'unpack 'fix-paths
            (lambda* (#:key inputs outputs #:allow-other-keys)
              (substitute* "src/github.com/NVIDIA/nvidia-container-toolkit/internal/config/config.go"
(("/usr/bin")
                 "/run/current-system/profile/bin"))))
          (replace 'build
            (lambda arguments
              (for-each
               (lambda (directory)
                 (apply (assoc-ref %standard-phases 'build)
                        (append arguments (list #:import-path directory))))
               '("github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk"
                 "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-container-runtime"
                 "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-container-runtime-hook")))))
      #:tests? #f
      #:install-source? #f))
    (propagated-inputs
     (list libnvidia-container))
    (synopsis "Build and run containers leveraging NVIDIA GPUs")
    (description "The NVIDIA Container Toolkit allows users to build and run GPU accelerated containers. The toolkit includes a container runtime library and utilities to automatically configure containers to leverage NVIDIA GPUs.")
    (home-page "https://github.com/NVIDIA/nvidia-container-toolkit")
    (license asl2.0)))

Enabling nvidia-container-runtime in docker

Once nvidia-container-toolkit is built and installed, Docker will need to be made aware of the runtime. To do so modify the docker-service-type in your system configuration and provide a config-file as follows:

(service docker-service-type
         (docker-configuration
          (config-file (local-file "files/daemon.json"))))

where daemon.json contains the following:

{
    "runtimes": {
        "nvidia": {
            "args": [],
            "path": "/run/current-system/profile/bin/nvidia-container-runtime"
        }
    }
}

Running an NVIDIA GPU enabled container

Now comes the ugly bit…. nvidia-container-toolkit seems to bind mount certain binaries (such as nvidia-smi) into the container. Having installed these previously with guix means that the binaries will look for their own dependencies under /gnu/store. The "correct way" to fix this would seem to be to identify all the binaries mounted in this way and recursively bind-mount their dependencies into the container from libnvidia-container. I'm not sure if there's a quick way to accomplish this so in the meantime the very ugly workaround is to bind-mount the entire /gnu/store directory into each GPU enabled container that you want to run. The following verifies that the approach works. We mount /gnu/store with the ro flags (read-only) to make sure nothing in the container can modify the store.

docker run --name pytorch -it --rm -v /gnu/store:/gnu/store:ro --runtime=nvidia -e NVIDIA_DRIVER_CAPABILITIES=compute,utility -e NVIDIA_VISIBLE_DEVICES=all --gpus all pytorch/pytorch:latest

Then in the Python process check whether we have access to the gpu:

Python 3.10.13 (main, Sep 11 2023, 13:44:35) [GCC 11.2.0] on linux
Type "help", "copyright", "credits" or "license" for more information.
>>> import torch
import torch
>>> torch.cuda.is_available()
torch.cuda.is_available()
True

Caveats

So how unsafe is this? Well, to my mind we're circumventing a level of isolation here and giving the containers full access to all the installed programs, libraries, etc. on the system. Provided the container doesn't run as root (and not as your own user), you're effectively "just" increasing the attack-surface of the container with everything under /gnu/store (of course if the container is running as root you have much larger problems). That is, should any of the programs/scripts/libraries there have a vulnerability that allows a privilige escalation, a potential hacker would have all the tools on the system at his/her disposal to craft and execute an exploit. Of course, the hacker would first have to gain a shell in the container in the first place, but that's certainly not impossible. All-in-all this setup works fine to play around with GPU enabled containers in Guix, but I would strongly caution against exposing any of the containers you run this way to the open internet.