From 94ce80a4e569786f00690a8df9fe2225de141ae0 Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Tue, 22 Jul 2025 12:32:08 +0200 Subject: [PATCH 1/7] [no-relnote] Fix typo in test Signed-off-by: Evan Lezar --- internal/platform-support/tegra/csv_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/internal/platform-support/tegra/csv_test.go b/internal/platform-support/tegra/csv_test.go index fa717a646..1fcda971b 100644 --- a/internal/platform-support/tegra/csv_test.go +++ b/internal/platform-support/tegra/csv_test.go @@ -213,10 +213,10 @@ func TestDiscovererFromCSVFiles(t *testing.T) { } } -func setGetTargetsFromCSVFiles(ovverride map[csv.MountSpecType][]string) func() { +func setGetTargetsFromCSVFiles(override map[csv.MountSpecType][]string) func() { original := getTargetsFromCSVFiles getTargetsFromCSVFiles = func(logger logger.Interface, files []string) map[csv.MountSpecType][]string { - return ovverride + return override } return func() { From e7a1e659314f533eec25eceabd00a11f59585eca Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Mon, 27 Nov 2023 15:01:02 +0100 Subject: [PATCH 2/7] Create .so and SONAME symlinks for driver libraries This change ensures that .so and SONAME symlinks are created for driver libraries in the container. Signed-off-by: Evan Lezar --- CHANGELOG.md | 1 + internal/discover/symlinks.go | 136 +++++++++++++++- internal/discover/symlinks_test.go | 174 +++++++++++++++++++++ internal/platform-support/tegra/csv.go | 1 + pkg/nvcdi/driver-nvml.go | 1 + tests/e2e/nvidia-container-toolkit_test.go | 108 +++++++++++-- 6 files changed, 409 insertions(+), 12 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c0aeb5f86..83cb821a9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ ## v1.18.0-rc.2 +- Ensure that .so symlinks are created for driver libraries in the container - Load settings from config.toml file during CDI generation - Use securejoin to resolve /proc - Refactor nvml CDI spec generation for consistency diff --git a/internal/discover/symlinks.go b/internal/discover/symlinks.go index a9cd811ad..e4aa26e66 100644 --- a/internal/discover/symlinks.go +++ b/internal/discover/symlinks.go @@ -17,11 +17,26 @@ package discover import ( + "debug/elf" "fmt" + "os" "path/filepath" + "strings" + + "github.com/NVIDIA/nvidia-container-toolkit/internal/logger" ) +type Symlink struct { + target string + link string +} + +func (s *Symlink) String() string { + return fmt.Sprintf("%s::%s", s.target, s.link) +} + type additionalSymlinks struct { + logger logger.Interface Discover version string hookCreator HookCreator @@ -29,11 +44,12 @@ type additionalSymlinks struct { // WithDriverDotSoSymlinks decorates the provided discoverer. // A hook is added that checks for specific driver symlinks that need to be created. -func WithDriverDotSoSymlinks(mounts Discover, version string, hookCreator HookCreator) Discover { +func WithDriverDotSoSymlinks(logger logger.Interface, mounts Discover, version string, hookCreator HookCreator) Discover { if version == "" { version = "*.*" } return &additionalSymlinks{ + logger: logger, Discover: mounts, hookCreator: hookCreator, version: version, @@ -60,7 +76,14 @@ func (d *additionalSymlinks) Hooks() ([]Hook, error) { } processedPaths[mount.Path] = true - for _, link := range d.getLinksForMount(mount.Path) { + linksForMount := d.getLinksForMount(mount.Path) + soSymlinks, err := d.getDotSoSymlinks(mount.HostPath, mount.Path) + if err != nil { + d.logger.Warningf("Failed to get soname symlinks for %+v: %v", mount, err) + } + linksForMount = append(linksForMount, soSymlinks...) + + for _, link := range linksForMount { if processedLinks[link] { continue } @@ -110,3 +133,112 @@ func (d additionalSymlinks) isDriverLibrary(libraryName string, filename string) match, _ := filepath.Match(pattern, filename) return match } + +func (d *additionalSymlinks) getDotSoSymlinks(hostLibraryPath string, libraryContainerPath string) ([]string, error) { + hostLibraryDir := filepath.Dir(hostLibraryPath) + containerLibraryDir, libraryName := filepath.Split(libraryContainerPath) + if !d.isDriverLibrary("*", libraryName) { + return nil, nil + } + + soname, err := getSoname(hostLibraryPath) + if err != nil { + return nil, err + } + + var soSymlinks []string + // Create the SONAME -> libraryName symlink. + // If the soname matches the library path, or the expected SONAME link does + // not exist on the host, we do not create it in the container. + if soname != libraryName && d.linkExistsInDir(hostLibraryDir, soname) { + s := Symlink{ + target: libraryName, + link: filepath.Join(containerLibraryDir, soname), + } + soSymlinks = append(soSymlinks, s.String()) + } + + soTarget := soname + if soTarget == "" { + soTarget = libraryName + } + // Create the .so -> SONAME symlink. + // If the .so link name matches the SONAME link, or the expected .so link + // does not exist on the host, we do not create it in the container. + if soLink := getSoLink(soTarget); soLink != soTarget && d.linkExistsInDir(hostLibraryDir, soLink) { + s := Symlink{ + target: soTarget, + link: filepath.Join(containerLibraryDir, soLink), + } + soSymlinks = append(soSymlinks, s.String()) + } + return soSymlinks, nil +} + +func (d *additionalSymlinks) linkExistsInDir(dir string, link string) bool { + if link == "" { + return false + } + linkPath := filepath.Join(dir, link) + exists, err := linkExists(linkPath) + if err != nil { + d.logger.Warningf("Failed to check symlink %q: %v", linkPath, err) + return false + } + return exists +} + +// linkExists returns true if the specified symlink exists. +// We use a function variable here to allow this to be overridden for testing. +var linkExists = func(linkPath string) (bool, error) { + info, err := os.Lstat(linkPath) + if os.IsNotExist(err) { + return false, nil + } + if err != nil { + return false, err + } + // The linkPath is a symlink. + if info.Mode()&os.ModeSymlink != 0 { + return true, nil + } + + return false, nil +} + +// getSoname returns the soname for the specified library path. +// We use a function variable here to allow this to be overridden for testing. +var getSoname = func(libraryPath string) (string, error) { + lib, err := elf.Open(libraryPath) + if err != nil { + return "", err + } + defer lib.Close() + + sonames, err := lib.DynString(elf.DT_SONAME) + if err != nil { + return "", err + } + if len(sonames) > 1 { + return "", fmt.Errorf("multiple SONAMEs detected for %v: %v", libraryPath, sonames) + } + if len(sonames) == 0 { + return filepath.Base(libraryPath), nil + } + return sonames[0], nil +} + +// getSoLink returns the filename for the .so symlink that should point to the +// soname symlink for the specified library. +// If the soname / library name does not end in a `.so[.*]` then an empty string +// is returned. +func getSoLink(soname string) string { + ext := filepath.Ext(soname) + if ext == "" { + return "" + } + if ext == ".so" { + return soname + } + return getSoLink(strings.TrimSuffix(soname, ext)) +} diff --git a/internal/discover/symlinks_test.go b/internal/discover/symlinks_test.go index e9a6c6f1e..c5ed66685 100644 --- a/internal/discover/symlinks_test.go +++ b/internal/discover/symlinks_test.go @@ -17,12 +17,15 @@ package discover import ( + "strings" "testing" + testlog "github.com/sirupsen/logrus/hooks/test" "github.com/stretchr/testify/require" ) func TestWithWithDriverDotSoSymlinks(t *testing.T) { + logger, _ := testlog.NewNullLogger() testCases := []struct { description string discover Discover @@ -315,6 +318,7 @@ func TestWithWithDriverDotSoSymlinks(t *testing.T) { for _, tc := range testCases { t.Run(tc.description, func(t *testing.T) { d := WithDriverDotSoSymlinks( + logger, tc.discover, tc.version, hookCreator, @@ -334,3 +338,173 @@ func TestWithWithDriverDotSoSymlinks(t *testing.T) { }) } } + +func TestGetDotSoSymlinks(t *testing.T) { + testCases := []struct { + description string + hostLibraryPath string + containerLibraryPath string + getSonameFunc func(string) (string, error) + linkExistsFunc func(string) (bool, error) + expectedError error + expectedSymlinks []string + }{ + { + description: "libcuda.soname links", + hostLibraryPath: "/usr/lib/libcuda.so.999.88.77", + getSonameFunc: func(s string) (string, error) { + return "libcuda.so.1", nil + }, + expectedError: nil, + expectedSymlinks: []string{ + "libcuda.so.999.88.77::/usr/lib/libcuda.so.1", + "libcuda.so.1::/usr/lib/libcuda.so", + }, + }, + { + description: "libcuda.soname links uses container path", + hostLibraryPath: "/usr/lib/libcuda.so.999.88.77", + containerLibraryPath: "/some/container/path/libcuda.so.999.88.77", + getSonameFunc: func(s string) (string, error) { + return "libcuda.so.1", nil + }, + expectedError: nil, + expectedSymlinks: []string{ + "libcuda.so.999.88.77::/some/container/path/libcuda.so.1", + "libcuda.so.1::/some/container/path/libcuda.so", + }, + }, + { + description: "equal soname uses library path", + hostLibraryPath: "/usr/lib/libcuda.so.999.88.77", + getSonameFunc: func(s string) (string, error) { + return "libcuda.so.999.88.77", nil + }, + expectedError: nil, + expectedSymlinks: []string{ + "libcuda.so.999.88.77::/usr/lib/libcuda.so", + }, + }, + { + description: "nonexistent symlink is ignored", + hostLibraryPath: "/usr/lib/libcuda.so.999.88.77", + getSonameFunc: func(s string) (string, error) { + return "libcuda.so.1", nil + }, + expectedError: nil, + linkExistsFunc: func(s string) (bool, error) { + return strings.HasSuffix(s, "libcuda.so.1"), nil + }, + expectedSymlinks: []string{ + "libcuda.so.999.88.77::/usr/lib/libcuda.so.1", + }, + }, + { + description: "soname is skipped", + hostLibraryPath: "/usr/lib/libcuda.so.999.88.77", + getSonameFunc: func(s string) (string, error) { + return "", nil + }, + expectedError: nil, + linkExistsFunc: func(s string) (bool, error) { + return strings.HasSuffix(s, "libcuda.so"), nil + }, + expectedSymlinks: []string{ + "libcuda.so.999.88.77::/usr/lib/libcuda.so", + }, + }, + } + + for _, tc := range testCases { + if tc.containerLibraryPath == "" { + tc.containerLibraryPath = tc.hostLibraryPath + } + if tc.linkExistsFunc == nil { + tc.linkExistsFunc = func(string) (bool, error) { + return true, nil + } + } + + t.Run(tc.description, func(t *testing.T) { + defer setGetSoname(tc.getSonameFunc)() + defer setLinkExists(tc.linkExistsFunc)() + + sut := &additionalSymlinks{version: "*.*"} + symlinks, err := sut.getDotSoSymlinks(tc.hostLibraryPath, tc.containerLibraryPath) + + if tc.expectedError == nil { + require.NoError(t, err) + } else { + require.EqualError(t, err, tc.expectedError.Error()) + } + + require.EqualValues(t, tc.expectedSymlinks, symlinks) + }) + } +} + +func TestGetSoLink(t *testing.T) { + testCases := []struct { + description string + input string + expectedSoLink string + }{ + { + description: "empty string", + input: "", + expectedSoLink: "", + }, + { + description: "cuda driver library", + input: "libcuda.so.999.88.77", + expectedSoLink: "libcuda.so", + }, + { + description: "beta cuda driver library", + input: "libcuda.so.999.88", + expectedSoLink: "libcuda.so", + }, + { + description: "no .so in libname", + input: "foo.bar.baz", + expectedSoLink: "", + }, + { + description: "multiple .so in libname", + input: "foo.so.so.566", + expectedSoLink: "foo.so.so", + }, + { + description: "no suffix after so", + input: "foo.so", + expectedSoLink: "foo.so", + }, + } + + for _, tc := range testCases { + t.Run(tc.description, func(t *testing.T) { + + soLink := getSoLink(tc.input) + + require.Equal(t, tc.expectedSoLink, soLink) + }) + } +} + +func setGetSoname(override func(string) (string, error)) func() { + original := getSoname + getSoname = override + + return func() { + getSoname = original + } +} + +func setLinkExists(override func(string) (bool, error)) func() { + original := linkExists + linkExists = override + + return func() { + linkExists = original + } +} diff --git a/internal/platform-support/tegra/csv.go b/internal/platform-support/tegra/csv.go index ca760ec5c..edb7fdc48 100644 --- a/internal/platform-support/tegra/csv.go +++ b/internal/platform-support/tegra/csv.go @@ -52,6 +52,7 @@ func (o tegraOptions) newDiscovererFromCSVFiles() (discover.Discover, error) { // We create a discoverer for mounted libraries and add additional .so // symlinks for the driver. libraries := discover.WithDriverDotSoSymlinks( + o.logger, discover.NewMounts( o.logger, o.symlinkLocator, diff --git a/pkg/nvcdi/driver-nvml.go b/pkg/nvcdi/driver-nvml.go index 51592ff5c..6fe7b9c79 100644 --- a/pkg/nvcdi/driver-nvml.go +++ b/pkg/nvcdi/driver-nvml.go @@ -100,6 +100,7 @@ func (l *nvcdilib) NewDriverLibraryDiscoverer(version string) (discover.Discover var discoverers []discover.Discover driverDotSoSymlinksDiscoverer := discover.WithDriverDotSoSymlinks( + l.logger, libraries, version, l.hookCreator, diff --git a/tests/e2e/nvidia-container-toolkit_test.go b/tests/e2e/nvidia-container-toolkit_test.go index 749e5afef..7922a13d0 100644 --- a/tests/e2e/nvidia-container-toolkit_test.go +++ b/tests/e2e/nvidia-container-toolkit_test.go @@ -29,6 +29,8 @@ import ( // Integration tests for Docker runtime var _ = Describe("docker", Ordered, ContinueOnFailure, func() { var runner Runner + var hostDriverVersion string + var hostDriverMajor string // Install the NVIDIA Container Toolkit BeforeAll(func(ctx context.Context) { @@ -50,6 +52,15 @@ var _ = Describe("docker", Ordered, ContinueOnFailure, func() { err = installer.Install() Expect(err).ToNot(HaveOccurred()) } + + driverOutput, _, err := runner.Run("nvidia-smi -q | grep \"Driver Version\"") + Expect(err).ToNot(HaveOccurred()) + parts := strings.SplitN(driverOutput, ":", 2) + Expect(parts).To(HaveLen(2)) + + hostDriverVersion = strings.TrimSpace(parts[1]) + Expect(hostDriverVersion).ToNot(BeEmpty()) + hostDriverMajor = strings.SplitN(hostDriverVersion, ".", 2)[0] }) // GPUs are accessible in a container: Running nvidia-smi -L inside the @@ -184,16 +195,7 @@ var _ = Describe("docker", Ordered, ContinueOnFailure, func() { compatDriverVersion := strings.TrimPrefix(filepath.Base(compatOutput), "libcuda.so.") compatMajor := strings.SplitN(compatDriverVersion, ".", 2)[0] - driverOutput, _, err := runner.Run("nvidia-smi -q | grep \"Driver Version\"") - Expect(err).ToNot(HaveOccurred()) - parts := strings.SplitN(driverOutput, ":", 2) - Expect(parts).To(HaveLen(2)) - - hostDriverVersion := strings.TrimSpace(parts[1]) - Expect(hostDriverVersion).ToNot(BeEmpty()) - driverMajor := strings.SplitN(hostDriverVersion, ".", 2)[0] - - if driverMajor >= compatMajor { + if hostDriverMajor >= compatMajor { GinkgoLogr.Info("CUDA Forward Compatibility tests require an older driver version", "hostDriverVersion", hostDriverVersion, "compatDriverVersion", compatDriverVersion) Skip("CUDA Forward Compatibility tests require an older driver version") } @@ -241,6 +243,8 @@ var _ = Describe("docker", Ordered, ContinueOnFailure, func() { BeforeAll(func(ctx context.Context) { _, _, err := runner.Run("docker pull ubuntu") Expect(err).ToNot(HaveOccurred()) + _, _, err = runner.Run("docker pull busybox") + Expect(err).ToNot(HaveOccurred()) }) It("should include libcuda.so in the ldcache", func(ctx context.Context) { @@ -257,6 +261,90 @@ var _ = Describe("docker", Ordered, ContinueOnFailure, func() { Expect(libs).To(ContainElements([]string{"libcuda.so", "libcuda.so.1"})) }) + + It("should include .so and SONAME symlinks", func(ctx context.Context) { + symlinkOutput, _, err := runner.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all busybox ls -l /usr/lib/x86_64-linux-gnu/ | awk '{print $1, $9, $11}'") + Expect(err).ToNot(HaveOccurred()) + Expect(symlinkOutput).ToNot(BeEmpty()) + + // This produces output similar to: + // We check this to ensure that we have valid driver library symlink + // chains. + // lrwxrwxrwx libcuda.so libcuda.so.1 + // lrwxrwxrwx libcuda.so.1 libcuda.so.570.133.20 + // -rw-r--r-- libcuda.so.570.133.20 + // lrwxrwxrwx libcudadebugger.so libcudadebugger.so.1 + // lrwxrwxrwx libcudadebugger.so.1 libcudadebugger.so.570.133.20 + // -rw-r--r-- libcudadebugger.so.570.133.20 + // lrwxrwxrwx libnvidia-ml.so libnvidia-ml.so.1 + // lrwxrwxrwx libnvidia-ml.so.1 libnvidia-ml.so.570.133.20 + // -rw-r--r-- libnvidia-ml.so.570.133.20 + // lrwxrwxrwx libnvidia-nvvm.so libnvidia-nvvm.so.4 + // lrwxrwxrwx libnvidia-nvvm.so.4 libnvidia-nvvm.so.570.133.20 + // -rw-r--r-- libnvidia-nvvm.so.570.133.20 + // lrwxrwxrwx libnvidia-opencl.so libnvidia-opencl.so.1 + // lrwxrwxrwx libnvidia-opencl.so.1 libnvidia-opencl.so.570.133.20 + // -rw-r--r-- libnvidia-opencl.so.570.133.20 + // -rw-r--r-- libnvidia-pkcs11-openssl3.so.570.133.20 + // -rw-r--r-- libnvidia-pkcs11.so.570.133.20 + // lrwxrwxrwx libnvidia-ptxjitcompiler.so libnvidia-ptxjitcompiler.so.1 + // lrwxrwxrwx libnvidia-ptxjitcompiler.so.1 libnvidia-ptxjitcompiler.so.570.133.20 + // -rw-r--r-- libnvidia-ptxjitcompiler.so.570.133.20 + + symlinkOutputLines := strings.Split(symlinkOutput, "\n") + symlinks := make(map[string][]string) + var soSymlink string + for _, line := range symlinkOutputLines { + trimmed := strings.TrimSpace(line) + if trimmed == "" { + soSymlink = "" + continue + } + // We only consider links and regular files. + if trimmed[0] != 'l' && trimmed[0] != '-' { + soSymlink = "" + continue + } + fmt.Printf("trimmed = %q\n", trimmed) + parts := strings.SplitN(trimmed, " ", 3) + permissionString := parts[0] + + // If the line represents a symlink we may have to handle a new symlink chain. + if strings.HasPrefix(permissionString, "l") && soSymlink == "" { + if strings.HasSuffix(parts[1], ".so") { + soSymlink = parts[1] + } + } + + // Keep track of the symlink chain. + symlinks[soSymlink] = append(symlinks[soSymlink], parts[1:]...) + + // We assume a regular file is the end of a symlink chain. + if strings.HasPrefix(permissionString, "-") { + soSymlink = "" + } + } + + // The symlink chains have the pattern: + // [A.so, A.so.1, A.so.1, A.so.driverVersion, A.so.driverVersion] + // A has the suffix .so. + Expect(symlinks).ToNot(BeEmpty()) + for soSymlink, chain := range symlinks { + if soSymlink == "" { + continue + } + Expect(chain).To(HaveLen(5)) + for _, c := range chain { + Expect(c).To(HavePrefix(soSymlink)) + } + Expect(chain[0]).To(HaveSuffix(".so")) + Expect(chain[1]).To(Equal(chain[2])) + Expect(chain[3]).To(Equal(chain[4])) + Expect(chain[3]).To(HaveSuffix(hostDriverVersion)) + Expect(chain[4]).To(HaveSuffix(hostDriverVersion)) + } + Expect(symlinks).To(And(HaveKey("libcuda.so"), HaveKey("libnvidia-ml.so"))) + }) }) When("Running containers with shared mount propagation", Ordered, func() { From 5b74b44f8a77c5faf5a3b111383948e4e881ef63 Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Mon, 21 Jul 2025 12:08:15 +0200 Subject: [PATCH 3/7] Remove create-soname-symlinks hook This change removes the create-soname-symlinks hook introduced in v1.18.0-rc.1. Instead we rely on explicitly creating the .so -> SONAME -> .so.RM_VERSION symlink chain through the create-symlink hook. Signed-off-by: Evan Lezar --- cmd/nvidia-cdi-hook/commands/commands.go | 2 - .../create-soname-symlinks/soname-symlinks.go | 166 ------------------ .../toolkit/toolkit_test.go | 9 - cmd/nvidia-ctk/cdi/generate/generate_test.go | 27 --- internal/discover/hooks.go | 3 - internal/discover/ldconfig.go | 7 +- internal/discover/ldconfig_test.go | 24 --- internal/ldconfig/ldconfig.go | 23 --- pkg/nvcdi/api.go | 3 - 9 files changed, 1 insertion(+), 263 deletions(-) delete mode 100644 cmd/nvidia-cdi-hook/create-soname-symlinks/soname-symlinks.go diff --git a/cmd/nvidia-cdi-hook/commands/commands.go b/cmd/nvidia-cdi-hook/commands/commands.go index 16efc098f..470755690 100644 --- a/cmd/nvidia-cdi-hook/commands/commands.go +++ b/cmd/nvidia-cdi-hook/commands/commands.go @@ -20,7 +20,6 @@ import ( "github.com/urfave/cli/v3" "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/chmod" - createsonamesymlinks "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/create-soname-symlinks" symlinks "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/create-symlinks" "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/cudacompat" disabledevicenodemodification "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/disable-device-node-modification" @@ -36,7 +35,6 @@ func New(logger logger.Interface) []*cli.Command { symlinks.NewCommand(logger), chmod.NewCommand(logger), cudacompat.NewCommand(logger), - createsonamesymlinks.NewCommand(logger), disabledevicenodemodification.NewCommand(logger), } } diff --git a/cmd/nvidia-cdi-hook/create-soname-symlinks/soname-symlinks.go b/cmd/nvidia-cdi-hook/create-soname-symlinks/soname-symlinks.go deleted file mode 100644 index ab1beb388..000000000 --- a/cmd/nvidia-cdi-hook/create-soname-symlinks/soname-symlinks.go +++ /dev/null @@ -1,166 +0,0 @@ -/** -# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -**/ - -package create_soname_symlinks - -import ( - "context" - "errors" - "fmt" - "log" - "os" - - "github.com/moby/sys/reexec" - "github.com/urfave/cli/v3" - - "github.com/NVIDIA/nvidia-container-toolkit/internal/ldconfig" - "github.com/NVIDIA/nvidia-container-toolkit/internal/logger" - "github.com/NVIDIA/nvidia-container-toolkit/internal/oci" -) - -const ( - reexecUpdateLdCacheCommandName = "reexec-create-soname-symlinks" -) - -type command struct { - logger logger.Interface -} - -type options struct { - folders []string - ldconfigPath string - containerSpec string -} - -func init() { - reexec.Register(reexecUpdateLdCacheCommandName, createSonameSymlinksHandler) - if reexec.Init() { - os.Exit(0) - } -} - -// NewCommand constructs an create-soname-symlinks command with the specified logger -func NewCommand(logger logger.Interface) *cli.Command { - c := command{ - logger: logger, - } - return c.build() -} - -// build the create-soname-symlinks command -func (m command) build() *cli.Command { - cfg := options{} - - // Create the 'create-soname-symlinks' command - c := cli.Command{ - Name: "create-soname-symlinks", - Usage: "Create soname symlinks libraries in specified directories", - Before: func(ctx context.Context, cmd *cli.Command) (context.Context, error) { - return ctx, m.validateFlags(cmd, &cfg) - }, - Action: func(ctx context.Context, cmd *cli.Command) error { - return m.run(cmd, &cfg) - }, - Flags: []cli.Flag{ - &cli.StringSliceFlag{ - Name: "folder", - Usage: "Specify a directory to generate soname symlinks in. Can be specified multiple times", - Destination: &cfg.folders, - }, - &cli.StringFlag{ - Name: "ldconfig-path", - Usage: "Specify the path to ldconfig on the host", - Destination: &cfg.ldconfigPath, - Value: "/sbin/ldconfig", - }, - &cli.StringFlag{ - Name: "container-spec", - Usage: "Specify the path to the OCI container spec. If empty or '-' the spec will be read from STDIN", - Destination: &cfg.containerSpec, - }, - }, - } - - return &c -} - -func (m command) validateFlags(_ *cli.Command, cfg *options) error { - if cfg.ldconfigPath == "" { - return errors.New("ldconfig-path must be specified") - } - return nil -} - -func (m command) run(_ *cli.Command, cfg *options) error { - s, err := oci.LoadContainerState(cfg.containerSpec) - if err != nil { - return fmt.Errorf("failed to load container state: %v", err) - } - - containerRootDir, err := s.GetContainerRoot() - if err != nil || containerRootDir == "" || containerRootDir == "/" { - return fmt.Errorf("failed to determined container root: %v", err) - } - - cmd, err := ldconfig.NewRunner( - reexecUpdateLdCacheCommandName, - cfg.ldconfigPath, - containerRootDir, - cfg.folders..., - ) - if err != nil { - return err - } - - return cmd.Run() -} - -// createSonameSymlinksHandler wraps createSonameSymlinks with error handling. -func createSonameSymlinksHandler() { - if err := createSonameSymlinks(os.Args); err != nil { - log.Printf("Error updating ldcache: %v", err) - os.Exit(1) - } -} - -// createSonameSymlinks ensures that soname symlinks are created in the -// specified directories. -// It is invoked from a reexec'd handler and provides namespace isolation for -// the operations performed by this hook. At the point where this is invoked, -// we are in a new mount namespace that is cloned from the parent. -// -// args[0] is the reexec initializer function name -// args[1] is the path of the ldconfig binary on the host -// args[2] is the container root directory -// The remaining args are directories where soname symlinks need to be created. -func createSonameSymlinks(args []string) error { - if len(args) < 3 { - return fmt.Errorf("incorrect arguments: %v", args) - } - hostLdconfigPath := args[1] - containerRootDirPath := args[2] - - ldconfig, err := ldconfig.New( - hostLdconfigPath, - containerRootDirPath, - ) - if err != nil { - return fmt.Errorf("failed to construct ldconfig runner: %w", err) - } - - return ldconfig.CreateSonameSymlinks(args[3:]...) -} diff --git a/cmd/nvidia-ctk-installer/toolkit/toolkit_test.go b/cmd/nvidia-ctk-installer/toolkit/toolkit_test.go index 0eaa7af38..2798c29b9 100644 --- a/cmd/nvidia-ctk-installer/toolkit/toolkit_test.go +++ b/cmd/nvidia-ctk-installer/toolkit/toolkit_test.go @@ -98,15 +98,6 @@ containerEdits: - libcuda.so.1::/lib/x86_64-linux-gnu/libcuda.so env: - NVIDIA_CTK_DEBUG=false - - hookName: createContainer - path: {{ .toolkitRoot }}/nvidia-cdi-hook - args: - - nvidia-cdi-hook - - create-soname-symlinks - - --folder - - /lib/x86_64-linux-gnu - env: - - NVIDIA_CTK_DEBUG=false - hookName: createContainer path: {{ .toolkitRoot }}/nvidia-cdi-hook args: diff --git a/cmd/nvidia-ctk/cdi/generate/generate_test.go b/cmd/nvidia-ctk/cdi/generate/generate_test.go index ddce01c65..4fbf48a91 100644 --- a/cmd/nvidia-ctk/cdi/generate/generate_test.go +++ b/cmd/nvidia-ctk/cdi/generate/generate_test.go @@ -102,15 +102,6 @@ containerEdits: - --host-driver-version=999.88.77 env: - NVIDIA_CTK_DEBUG=false - - hookName: createContainer - path: /usr/bin/nvidia-cdi-hook - args: - - nvidia-cdi-hook - - create-soname-symlinks - - --folder - - /lib/x86_64-linux-gnu - env: - - NVIDIA_CTK_DEBUG=false - hookName: createContainer path: /usr/bin/nvidia-cdi-hook args: @@ -188,15 +179,6 @@ containerEdits: - libcuda.so.1::/lib/x86_64-linux-gnu/libcuda.so env: - NVIDIA_CTK_DEBUG=false - - hookName: createContainer - path: /usr/bin/nvidia-cdi-hook - args: - - nvidia-cdi-hook - - create-soname-symlinks - - --folder - - /lib/x86_64-linux-gnu - env: - - NVIDIA_CTK_DEBUG=false - hookName: createContainer path: /usr/bin/nvidia-cdi-hook args: @@ -274,15 +256,6 @@ containerEdits: - libcuda.so.1::/lib/x86_64-linux-gnu/libcuda.so env: - NVIDIA_CTK_DEBUG=false - - hookName: createContainer - path: /usr/bin/nvidia-cdi-hook - args: - - nvidia-cdi-hook - - create-soname-symlinks - - --folder - - /lib/x86_64-linux-gnu - env: - - NVIDIA_CTK_DEBUG=false - hookName: createContainer path: /usr/bin/nvidia-cdi-hook args: diff --git a/internal/discover/hooks.go b/internal/discover/hooks.go index 8020e7bc6..36e13c787 100644 --- a/internal/discover/hooks.go +++ b/internal/discover/hooks.go @@ -46,9 +46,6 @@ const ( // An UpdateLDCacheHook is the hook used to update the ldcache in the // container. This allows injected libraries to be discoverable. UpdateLDCacheHook = HookName("update-ldcache") - // A CreateSonameSymlinksHook is the hook used to ensure that soname symlinks - // for injected libraries exist in the container. - CreateSonameSymlinksHook = HookName("create-soname-symlinks") defaultNvidiaCDIHookPath = "/usr/bin/nvidia-cdi-hook" ) diff --git a/internal/discover/ldconfig.go b/internal/discover/ldconfig.go index 15356de8b..54ac608eb 100644 --- a/internal/discover/ldconfig.go +++ b/internal/discover/ldconfig.go @@ -61,12 +61,7 @@ func (d ldconfig) Hooks() ([]Hook, error) { args = append(args, "--folder", f) } - h := Merge( - d.hookCreator.Create(CreateSonameSymlinksHook, args...), - d.hookCreator.Create(UpdateLDCacheHook, args...), - ) - - return h.Hooks() + return d.hookCreator.Create(UpdateLDCacheHook, args...).Hooks() } // getLibraryPaths extracts the library dirs from the specified mounts diff --git a/internal/discover/ldconfig_test.go b/internal/discover/ldconfig_test.go index 30576a7b2..3d49bd89a 100644 --- a/internal/discover/ldconfig_test.go +++ b/internal/discover/ldconfig_test.go @@ -44,12 +44,6 @@ func TestLDCacheUpdateHook(t *testing.T) { { description: "empty mounts", expectedHooks: []Hook{ - { - Lifecycle: "createContainer", - Path: testNvidiaCDIHookPath, - Args: []string{"nvidia-cdi-hook", "create-soname-symlinks"}, - Env: []string{"NVIDIA_CTK_DEBUG=false"}, - }, { Lifecycle: "createContainer", Path: testNvidiaCDIHookPath, @@ -80,12 +74,6 @@ func TestLDCacheUpdateHook(t *testing.T) { }, }, expectedHooks: []Hook{ - { - Lifecycle: "createContainer", - Path: testNvidiaCDIHookPath, - Args: []string{"nvidia-cdi-hook", "create-soname-symlinks", "--folder", "/usr/local/lib", "--folder", "/usr/local/libother"}, - Env: []string{"NVIDIA_CTK_DEBUG=false"}, - }, { Lifecycle: "createContainer", Path: testNvidiaCDIHookPath, @@ -103,12 +91,6 @@ func TestLDCacheUpdateHook(t *testing.T) { }, }, expectedHooks: []Hook{ - { - Lifecycle: "createContainer", - Path: testNvidiaCDIHookPath, - Args: []string{"nvidia-cdi-hook", "create-soname-symlinks", "--folder", "/usr/local/lib"}, - Env: []string{"NVIDIA_CTK_DEBUG=false"}, - }, { Lifecycle: "createContainer", Path: testNvidiaCDIHookPath, @@ -121,12 +103,6 @@ func TestLDCacheUpdateHook(t *testing.T) { description: "explicit ldconfig path is passed", ldconfigPath: testLdconfigPath, expectedHooks: []Hook{ - { - Lifecycle: "createContainer", - Path: testNvidiaCDIHookPath, - Args: []string{"nvidia-cdi-hook", "create-soname-symlinks", "--ldconfig-path", testLdconfigPath}, - Env: []string{"NVIDIA_CTK_DEBUG=false"}, - }, { Lifecycle: "createContainer", Path: testNvidiaCDIHookPath, diff --git a/internal/ldconfig/ldconfig.go b/internal/ldconfig/ldconfig.go index f3db1a77a..2b45425a3 100644 --- a/internal/ldconfig/ldconfig.go +++ b/internal/ldconfig/ldconfig.go @@ -69,29 +69,6 @@ func New(ldconfigPath string, inRoot string) (*Ldconfig, error) { return l, nil } -// CreateSonameSymlinks uses ldconfig to create the soname symlinks in the -// specified directories. -func (l *Ldconfig) CreateSonameSymlinks(directories ...string) error { - if len(directories) == 0 { - return nil - } - ldconfigPath, err := l.prepareRoot() - if err != nil { - return err - } - - args := []string{ - filepath.Base(ldconfigPath), - // Explicitly disable updating the LDCache. - "-N", - // Specify -n to only process the specified directories. - "-n", - } - args = append(args, directories...) - - return SafeExec(ldconfigPath, args, nil) -} - func (l *Ldconfig) UpdateLDCache(directories ...string) error { ldconfigPath, err := l.prepareRoot() if err != nil { diff --git a/pkg/nvcdi/api.go b/pkg/nvcdi/api.go index eea7120e5..e600fb219 100644 --- a/pkg/nvcdi/api.go +++ b/pkg/nvcdi/api.go @@ -63,9 +63,6 @@ const ( EnableCudaCompatHook = discover.EnableCudaCompatHook // An UpdateLDCacheHook is used to update the ldcache in the container. UpdateLDCacheHook = discover.UpdateLDCacheHook - // A CreateSonameSymlinksHook is the hook used to ensure that soname symlinks - // for injected libraries exist in the container. - CreateSonameSymlinksHook = discover.CreateSonameSymlinksHook // Deprecated: Use CreateSymlinksHook instead. HookCreateSymlinks = CreateSymlinksHook From 37fde66094b095dea3e909814357079bec420f8f Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Mon, 21 Jul 2025 12:33:04 +0200 Subject: [PATCH 4/7] [no-relnote] Don't read ldconfig path from config Signed-off-by: Evan Lezar --- cmd/nvidia-ctk/cdi/generate/generate.go | 1 - 1 file changed, 1 deletion(-) diff --git a/cmd/nvidia-ctk/cdi/generate/generate.go b/cmd/nvidia-ctk/cdi/generate/generate.go index a06fbac1d..054237737 100644 --- a/cmd/nvidia-ctk/cdi/generate/generate.go +++ b/cmd/nvidia-ctk/cdi/generate/generate.go @@ -173,7 +173,6 @@ func (m command) build() *cli.Command { Destination: &opts.ldconfigPath, Sources: cli.NewValueSourceChain( cli.EnvVar("NVIDIA_CTK_CDI_GENERATE_LDCONFIG_PATH"), - m.config.ValueFrom("nvidia-container-cli.ldconfig"), ), }, &cli.StringFlag{ From 0dab248d3d82659539ab2dfb4d1a6b10cd444dde Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Tue, 22 Jul 2025 15:57:13 +0200 Subject: [PATCH 5/7] [no-relnote] Allow local tests Signed-off-by: Evan Lezar --- tests/e2e/Makefile | 8 ++++++++ tests/e2e/e2e_test.go | 10 ++++++---- tests/e2e/nvidia-container-toolkit_test.go | 2 +- 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/tests/e2e/Makefile b/tests/e2e/Makefile index d70e8dd08..9dc60d3df 100644 --- a/tests/e2e/Makefile +++ b/tests/e2e/Makefile @@ -23,6 +23,14 @@ GINKGO_BIN := $(CURDIR)/bin/ginkgo test: $(GINKGO_BIN) $(GINKGO_BIN) $(GINKGO_ARGS) -v --json-report ginkgo.json ./tests/e2e/... +# test-preinstalled runs the test cases against the version of the toolkit that +# is already installed (and configured for docker) on the host. +.PHONY: test-preinstalled +test-preinstalled: test +test-preinstalled: E2E_INSTALL_CTK = false +test-preinstalled: E2E_SSH_HOST = + + $(GINKGO_BIN): mkdir -p $(CURDIR)/bin GOBIN=$(CURDIR)/bin go install github.com/onsi/ginkgo/v2/ginkgo@latest diff --git a/tests/e2e/e2e_test.go b/tests/e2e/e2e_test.go index d6b80dd50..0975ee18b 100644 --- a/tests/e2e/e2e_test.go +++ b/tests/e2e/e2e_test.go @@ -69,11 +69,13 @@ func getTestEnv() { } - sshKey = getRequiredEnvvar[string]("E2E_SSH_KEY") - sshUser = getRequiredEnvvar[string]("E2E_SSH_USER") - sshHost = getRequiredEnvvar[string]("E2E_SSH_HOST") + sshHost = getEnvVarOrDefault("E2E_SSH_HOST", "") + if sshHost != "" { + sshKey = getRequiredEnvvar[string]("E2E_SSH_KEY") + sshUser = getRequiredEnvvar[string]("E2E_SSH_USER") + sshPort = getEnvVarOrDefault("E2E_SSH_PORT", "22") + } - sshPort = getEnvVarOrDefault("E2E_SSH_PORT", "22") } // getRequiredEnvvar returns the specified envvar if set or raises an error. diff --git a/tests/e2e/nvidia-container-toolkit_test.go b/tests/e2e/nvidia-container-toolkit_test.go index 7922a13d0..e1433a293 100644 --- a/tests/e2e/nvidia-container-toolkit_test.go +++ b/tests/e2e/nvidia-container-toolkit_test.go @@ -398,7 +398,7 @@ var _ = Describe("docker", Ordered, ContinueOnFailure, func() { Expect(err).ToNot(HaveOccurred()) _, _, err = runner.Run(`docker build -t firmware-test \ - --build-arg RM_VERSION="$(basename $(ls -d /lib/firmware/nvidia/*.*))" \ + --build-arg RM_VERSION="` + hostDriverVersion + `" \ --build-arg CURRENT_DIR="` + outputDir + `" \ - < Date: Tue, 22 Jul 2025 22:47:23 +0200 Subject: [PATCH 6/7] [no-relnote] Remove CUDA repo references Signed-off-by: Evan Lezar --- deployments/container/Dockerfile | 7 +++++++ tests/e2e/nvidia-container-toolkit_test.go | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/deployments/container/Dockerfile b/deployments/container/Dockerfile index 2ac74cf9e..a87c532c8 100644 --- a/deployments/container/Dockerfile +++ b/deployments/container/Dockerfile @@ -18,6 +18,8 @@ ARG VERSION="N/A" FROM nvcr.io/nvidia/cuda:12.9.1-base-ubi9 AS build +RUN rm -f /etc/yum.repos.d/cuda.repo && rm -f /etc/ld.so.conf.d/nvidia.conf + RUN dnf install -y \ wget make git gcc \ && \ @@ -86,6 +88,8 @@ COPY LICENSE /licenses/ # The debpackages stage is used to extract the contents of deb packages. FROM nvcr.io/nvidia/cuda:12.9.1-base-ubuntu20.04 AS debpackages +RUN rm -f /etc/apt/sources.list.d/cuda.list + ARG TARGETARCH ARG PACKAGE_DIST_DEB=ubuntu18.04 @@ -104,6 +108,9 @@ RUN set -eux; \ # The rpmpackages stage is used to extract the contents of the rpm packages. FROM nvcr.io/nvidia/cuda:12.9.1-base-ubi9 AS rpmpackages + +RUN rm -f /etc/yum.repos.d/cuda.repo && rm -f /etc/ld.so.conf.d/nvidia.conf + RUN dnf install -y cpio ARG TARGETARCH diff --git a/tests/e2e/nvidia-container-toolkit_test.go b/tests/e2e/nvidia-container-toolkit_test.go index e1433a293..7922a13d0 100644 --- a/tests/e2e/nvidia-container-toolkit_test.go +++ b/tests/e2e/nvidia-container-toolkit_test.go @@ -398,7 +398,7 @@ var _ = Describe("docker", Ordered, ContinueOnFailure, func() { Expect(err).ToNot(HaveOccurred()) _, _, err = runner.Run(`docker build -t firmware-test \ - --build-arg RM_VERSION="` + hostDriverVersion + `" \ + --build-arg RM_VERSION="$(basename $(ls -d /lib/firmware/nvidia/*.*))" \ --build-arg CURRENT_DIR="` + outputDir + `" \ - < Date: Wed, 23 Jul 2025 12:09:05 +0200 Subject: [PATCH 7/7] [no-relnote] Fix minor typo in README Signed-off-by: Evan Lezar --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 231f53647..90129d1a1 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ ## Introduction -The NVIDIA Container Toolkit allows users to build and run GPU accelerated containers. The toolkit includes a container runtime [library](https://github.com/NVIDIA/libnvidia-container) and utilities to automatically configure containers to leverage NVIDIA GPUs. +The NVIDIA Container Toolkit allows users to build and run GPU-accelerated containers. The toolkit includes a container runtime [library](https://github.com/NVIDIA/libnvidia-container) and utilities to automatically configure containers to leverage NVIDIA GPUs. Product documentation including an architecture overview, platform support, and installation and usage guides can be found in the [documentation repository](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/overview.html).