From ccdb3e6dff0d9a75c0b488598a69e7a50c900d24 Mon Sep 17 00:00:00 2001
From: Evan Lezar <elezar@nvidia.com>
Date: Tue, 25 Nov 2025 15:57:29 +0100
Subject: [PATCH] Handle --gpus flag using CDI

This change switches to using CDI to handle the --gpus flag.
This removes the custom implementation that invoked the nvidia-container-cli
directly. This mechanism does not align with existing implementations.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
---
 docs/gpu.md                                 | 40 +++++++++++---
 pkg/cmd/container/run_linux.go              | 61 ++++++---------------
 pkg/composer/serviceparser/serviceparser.go | 15 +++--
 3 files changed, 62 insertions(+), 54 deletions(-)

diff --git a/docs/gpu.md b/docs/gpu.md
index 009170c1a37..cc254452502 100644
--- a/docs/gpu.md
+++ b/docs/gpu.md
@@ -9,8 +9,8 @@ nerdctl provides docker-compatible NVIDIA GPU support.
 
 - NVIDIA Drivers
   - Same requirement as when you use GPUs on Docker. For details, please refer to [the doc by NVIDIA](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html#pre-requisites).
-- `nvidia-container-cli`
-  - containerd relies on this CLI for setting up GPUs inside container. You can install this via [`libnvidia-container` package](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/arch-overview.html#libnvidia-container).
+- The NVIDIA Container Toolkit
+  - containerd relies on the NVIDIA Container Toolkit to make GPUs usable inside a container. You can install the NVIDIA Container Toolkit by following the [official installation instructions](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html).
 
 ## Options for `nerdctl run --gpus`
 
@@ -27,23 +27,24 @@ You can also pass detailed configuration to `--gpus` option as a list of key-val
 
 - `count`: number of GPUs to use. `all` exposes all available GPUs.
 - `device`: IDs of GPUs to use. UUID or numbers of GPUs can be specified.
-- `capabilities`: [Driver capabilities](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/user-guide.html#driver-capabilities). If unset, use default driver `utility`, `compute`.
 
 The following example exposes a specific GPU to the container.
 
 ```
-nerdctl run -it --rm --gpus '"capabilities=utility,compute",device=GPU-3a23c669-1f69-c64e-cf85-44e9b07e7a2a' nvidia/cuda:12.3.1-base-ubuntu20.04 nvidia-smi
+nerdctl run -it --rm --gpus 'device=GPU-3a23c669-1f69-c64e-cf85-44e9b07e7a2a' nvidia/cuda:12.3.1-base-ubuntu20.04 nvidia-smi
 ```
 
+Note that although `capabilities` options may be provided, these are ignored when processing the GPU request.
+
 ## Fields for `nerdctl compose`
 
 `nerdctl compose` also supports GPUs following [compose-spec](https://github.com/compose-spec/compose-spec/blob/master/deploy.md#devices).
 
-You can use GPUs on compose when you specify some of the following `capabilities` in `services.demo.deploy.resources.reservations.devices`.
+You can use GPUs on compose when you specify the `driver` as `nvidia` or one or
+more of the following `capabilities` in `services.demo.deploy.resources.reservations.devices`.
 
 - `gpu`
 - `nvidia`
-- all allowed capabilities for `nerdctl run --gpus`
 
 Available fields are the same as `nerdctl run --gpus`.
 
@@ -59,12 +60,37 @@ services:
       resources:
         reservations:
           devices:
-          - capabilities: ["utility"]
+          - driver: nvidia
             count: all
 ```
 
 ## Trouble Shooting
 
+### `nerdctl run --gpus` fails due to an unresolvable CDI device
+
+If the required CDI specifications for NVIDIA devices are not available on the
+system, the `nerdctl run` command will fail with an error similar to: `CDI device injection failed: unresolvable CDI devices nvidia.com/gpu=all` (the
+exact error message will depend on the device(s) requested).
+
+This should be the same error message that is reported when the `--device` flag
+is used to request a CDI device:
+```
+nerdctl run --device=nvidia.com/gpu=all
+```
+
+Ensure that the NVIDIA Container Toolkit (>= v1.18.0 is recommended) is installed and the requested CDI devices are present in the ouptut of `nvidia-ctk cdi list`:
+
+```
+$ nvidia-ctk cdi list
+INFO[0000] Found 3 CDI devices
+nvidia.com/gpu=0
+nvidia.com/gpu=GPU-3eb87630-93d5-b2b6-b8ff-9b359caf4ee2
+nvidia.com/gpu=all
+```
+
+See the NVIDIA Container Toolkit [CDI documentation](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/cdi-support.html) for more information.
+
+
 ### `nerdctl run --gpus` fails when using the Nvidia gpu-operator
 
 If the Nvidia driver is installed by the [gpu-operator](https://github.com/NVIDIA/gpu-operator).The `nerdctl run` will fail with the error message `(FATA[0000] exec: "nvidia-container-cli": executable file not found in $PATH)`.
diff --git a/pkg/cmd/container/run_linux.go b/pkg/cmd/container/run_linux.go
index 3280d3e532d..851307d905e 100644
--- a/pkg/cmd/container/run_linux.go
+++ b/pkg/cmd/container/run_linux.go
@@ -25,7 +25,6 @@ import (
 	"github.com/opencontainers/runtime-spec/specs-go"
 
 	containerd "github.com/containerd/containerd/v2/client"
-	"github.com/containerd/containerd/v2/contrib/nvidia"
 	"github.com/containerd/containerd/v2/core/containers"
 	"github.com/containerd/containerd/v2/pkg/oci"
 	"github.com/containerd/log"
@@ -99,7 +98,7 @@ func setPlatformOptions(ctx context.Context, client *containerd.Client, id, uts
 	if options.Sysctl != nil {
 		opts = append(opts, WithSysctls(strutil.ConvertKVStringsToMap(options.Sysctl)))
 	}
-	gpuOpt, err := parseGPUOpts(options.GPUs)
+	gpuOpt, err := parseGPUOpts(options.GOptions.CDISpecDirs, options.GPUs)
 	if err != nil {
 		return nil, err
 	}
@@ -262,60 +261,36 @@ func withOOMScoreAdj(score int) oci.SpecOpts {
 	}
 }
 
-func parseGPUOpts(value []string) (res []oci.SpecOpts, _ error) {
+func parseGPUOpts(cdiSpecDirs []string, value []string) (res []oci.SpecOpts, _ error) {
 	for _, gpu := range value {
-		gpuOpt, err := parseGPUOpt(gpu)
+		req, err := ParseGPUOptCSV(gpu)
 		if err != nil {
 			return nil, err
 		}
-		res = append(res, gpuOpt)
+		res = append(res, withCDIDevices(cdiSpecDirs, req.toCDIDeviceIDS()...))
 	}
 	return res, nil
 }
 
-func parseGPUOpt(value string) (oci.SpecOpts, error) {
-	req, err := ParseGPUOptCSV(value)
-	if err != nil {
-		return nil, err
+func (req *GPUReq) toCDIDeviceIDS() []string {
+	var cdiDeviceIDs []string
+	for _, id := range req.normalizeDeviceIDs() {
+		cdiDeviceIDs = append(cdiDeviceIDs, "nvidia.com/gpu="+id)
 	}
+	return cdiDeviceIDs
+}
 
-	var gpuOpts []nvidia.Opts
-
+func (req *GPUReq) normalizeDeviceIDs() []string {
 	if len(req.DeviceIDs) > 0 {
-		gpuOpts = append(gpuOpts, nvidia.WithDeviceUUIDs(req.DeviceIDs...))
-	} else if req.Count > 0 {
-		var devices []int
-		for i := 0; i < req.Count; i++ {
-			devices = append(devices, i)
-		}
-		gpuOpts = append(gpuOpts, nvidia.WithDevices(devices...))
-	} else if req.Count < 0 {
-		gpuOpts = append(gpuOpts, nvidia.WithAllDevices)
+		return req.DeviceIDs
 	}
-
-	str2cap := make(map[string]nvidia.Capability)
-	for _, c := range nvidia.AllCaps() {
-		str2cap[string(c)] = c
-	}
-	var nvidiaCaps []nvidia.Capability
-	for _, c := range req.Capabilities {
-		if cp, isNvidiaCap := str2cap[c]; isNvidiaCap {
-			nvidiaCaps = append(nvidiaCaps, cp)
-		}
+	if req.Count < 0 {
+		return []string{"all"}
 	}
-	if len(nvidiaCaps) != 0 {
-		gpuOpts = append(gpuOpts, nvidia.WithCapabilities(nvidiaCaps...))
-	} else {
-		// Add "utility", "compute" capability if unset.
-		// Please see also: https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/user-guide.html#driver-capabilities
-		gpuOpts = append(gpuOpts, nvidia.WithCapabilities(nvidia.Utility, nvidia.Compute))
-	}
-
-	if rootlessutil.IsRootless() {
-		// "--no-cgroups" option is needed to nvidia-container-cli in rootless environment
-		// Please see also: https://github.com/moby/moby/issues/38729#issuecomment-463493866
-		gpuOpts = append(gpuOpts, nvidia.WithNoCgroups)
+	var ids []string
+	for i := 0; i < req.Count; i++ {
+		ids = append(ids, fmt.Sprintf("%d", i))
 	}
 
-	return nvidia.WithGPUs(gpuOpts...), nil
+	return ids
 }
diff --git a/pkg/composer/serviceparser/serviceparser.go b/pkg/composer/serviceparser/serviceparser.go
index 804250f80ec..534acabcfd9 100644
--- a/pkg/composer/serviceparser/serviceparser.go
+++ b/pkg/composer/serviceparser/serviceparser.go
@@ -30,7 +30,6 @@ import (
 
 	"github.com/compose-spec/compose-go/v2/types"
 
-	"github.com/containerd/containerd/v2/contrib/nvidia"
 	"github.com/containerd/log"
 
 	"github.com/containerd/nerdctl/v2/pkg/identifiers"
@@ -262,9 +261,17 @@ func getMemLimit(svc types.ServiceConfig) (types.UnitBytes, error) {
 func getGPUs(svc types.ServiceConfig) (reqs []string, _ error) {
 	// "gpu" and "nvidia" are also allowed capabilities (but not used as nvidia driver capabilities)
 	// https://github.com/moby/moby/blob/v20.10.7/daemon/nvidia_linux.go#L37
-	capset := map[string]struct{}{"gpu": {}, "nvidia": {}}
-	for _, c := range nvidia.AllCaps() {
-		capset[string(c)] = struct{}{}
+	capset := map[string]struct{}{
+		"gpu": {}, "nvidia": {},
+		// Allow the list of capabilities here (excluding "all" and "none")
+		// https://github.com/NVIDIA/nvidia-container-toolkit/blob/ff7c2d4866a7d46d1bf2a83590b263e10ec99cb5/internal/config/image/capabilities.go#L28-L38
+		"compat32": {},
+		"compute":  {},
+		"display":  {},
+		"graphics": {},
+		"ngx":      {},
+		"utility":  {},
+		"video":    {},
 	}
 	if svc.Deploy != nil && svc.Deploy.Resources.Reservations != nil {
 		for _, dev := range svc.Deploy.Resources.Reservations.Devices {