Version in base suite: 525.85.05-1 Base version: nvidia-persistenced_525.85.05-1 Target version: nvidia-persistenced_535.171.04-1~deb12u1 Base file: /srv/ftp-master.debian.org/ftp/pool/contrib/n/nvidia-persistenced/nvidia-persistenced_525.85.05-1.dsc Target file: /srv/ftp-master.debian.org/policy/pool/contrib/n/nvidia-persistenced/nvidia-persistenced_535.171.04-1~deb12u1.dsc debian/changelog | 26 ++++++++++++++++ debian/control | 5 +-- debian/copyright | 4 +- debian/salsa-ci.yml | 3 - nv-ioctl-numa.h | 1 nvidia-numa.c | 78 +++++++++++++++++++++++++++++++++++++++++++++++++- nvidia-numa.h | 3 + nvidia-persistenced.c | 17 ++++++++++ version.mk | 2 - 9 files changed, 130 insertions(+), 9 deletions(-) diff -Nru nvidia-persistenced-525.85.05/debian/changelog nvidia-persistenced-535.171.04/debian/changelog --- nvidia-persistenced-525.85.05/debian/changelog 2023-02-09 09:47:31.000000000 +0000 +++ nvidia-persistenced-535.171.04/debian/changelog 2024-03-28 16:03:44.000000000 +0000 @@ -1,3 +1,29 @@ +nvidia-persistenced (535.171.04-1~deb12u1) bookworm; urgency=medium + + * Rebuild for bookworm. + + -- Andreas Beckmann Thu, 28 Mar 2024 17:03:44 +0100 + +nvidia-persistenced (535.171.04-1) unstable; urgency=medium + + * New upstream release. + + -- Andreas Beckmann Mon, 25 Mar 2024 10:51:19 +0100 + +nvidia-persistenced (530.41.03-1) unstable; urgency=medium + + * New upstream release. + * Switch B-D from pkg-config to pkgconf. + + -- Andreas Beckmann Tue, 19 Mar 2024 17:59:21 +0100 + +nvidia-persistenced (525.147.05-1) unstable; urgency=medium + + * New upstream release. + * Update the list of supported drivers. + + -- Andreas Beckmann Fri, 26 Jan 2024 23:34:41 +0100 + nvidia-persistenced (525.85.05-1) unstable; urgency=medium * New upstream release. diff -Nru nvidia-persistenced-525.85.05/debian/control nvidia-persistenced-535.171.04/debian/control --- nvidia-persistenced-525.85.05/debian/control 2023-02-09 09:47:31.000000000 +0000 +++ nvidia-persistenced-535.171.04/debian/control 2024-03-28 16:03:44.000000000 +0000 @@ -6,7 +6,7 @@ Andreas Beckmann , Build-Depends: debhelper-compat (= 13), - pkg-config, + pkgconf, libtirpc-dev, m4, Rules-Requires-Root: no @@ -21,8 +21,7 @@ Pre-Depends: ${misc:Pre-Depends} Depends: - libnvidia-cfg1 [!i386 !armhf !ppc64el] - | libnvidia-tesla-cfg1 [amd64 arm64 ppc64el] + libnvidia-cfg1 [!i386 !armhf] | libnvidia-tesla-470-cfg1 [amd64 arm64 ppc64el] | libnvidia-cfg.so.1 | libnvidia-cfg1-any, diff -Nru nvidia-persistenced-525.85.05/debian/copyright nvidia-persistenced-535.171.04/debian/copyright --- nvidia-persistenced-525.85.05/debian/copyright 2023-02-09 09:47:31.000000000 +0000 +++ nvidia-persistenced-535.171.04/debian/copyright 2024-03-28 16:03:44.000000000 +0000 @@ -9,12 +9,12 @@ NVIDIA drivers in non-free. Files: * -Copyright: Copyright (C) 2004-2022 NVIDIA Corporation +Copyright: Copyright (C) 2004-2023 NVIDIA Corporation License: Expat Files: debian/* Copyright: - © 2014-2023 Andreas Beckmann + © 2014-2024 Andreas Beckmann License: Expat License: Expat diff -Nru nvidia-persistenced-525.85.05/debian/salsa-ci.yml nvidia-persistenced-535.171.04/debian/salsa-ci.yml --- nvidia-persistenced-525.85.05/debian/salsa-ci.yml 2023-02-09 09:47:31.000000000 +0000 +++ nvidia-persistenced-535.171.04/debian/salsa-ci.yml 2024-03-28 16:03:44.000000000 +0000 @@ -1,7 +1,6 @@ --- include: - - https://salsa.debian.org/salsa-ci-team/pipeline/raw/master/salsa-ci.yml - - https://salsa.debian.org/salsa-ci-team/pipeline/raw/master/pipeline-jobs.yml + - https://salsa.debian.org/salsa-ci-team/pipeline/raw/master/recipes/debian.yml variables: SALSA_CI_COMPONENTS: 'main contrib non-free' diff -Nru nvidia-persistenced-525.85.05/nv-ioctl-numa.h nvidia-persistenced-535.171.04/nv-ioctl-numa.h --- nvidia-persistenced-525.85.05/nv-ioctl-numa.h 2023-01-14 01:39:35.000000000 +0000 +++ nvidia-persistenced-535.171.04/nv-ioctl-numa.h 2024-03-19 21:42:41.000000000 +0000 @@ -62,6 +62,7 @@ uint64_t memblock_size __aligned(8); uint64_t numa_mem_addr __aligned(8); uint64_t numa_mem_size __aligned(8); + uint8_t use_auto_online; nv_offline_addresses_t offline_addresses __aligned(8); } nv_ioctl_numa_info_t; diff -Nru nvidia-persistenced-525.85.05/nvidia-numa.c nvidia-persistenced-535.171.04/nvidia-numa.c --- nvidia-persistenced-525.85.05/nvidia-numa.c 2023-01-14 01:39:35.000000000 +0000 +++ nvidia-persistenced-535.171.04/nvidia-numa.c 2024-03-19 21:42:41.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, NVIDIA CORPORATION. + * Copyright (c) 2018-2023, NVIDIA CORPORATION. * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation @@ -51,6 +51,7 @@ #define MEMORY_PATH_FMT "/sys/devices/system/memory" #define MEMORY_HARD_OFFLINE_PATH_FMT MEMORY_PATH_FMT "/hard_offline_page" #define MEMORY_PROBE_PATH_FMT MEMORY_PATH_FMT "/probe" +#define AUTO_ONLINE_PATH MEMORY_PATH_FMT "/auto_online_blocks" #define MEMBLK_FILE_FMT "memory%d" #define MEMBLK_DIR_PATH_FMT MEMORY_PATH_FMT "/" MEMBLK_FILE_FMT #define MEMBLK_STATE_PATH_FMT MEMBLK_DIR_PATH_FMT "/state" @@ -59,6 +60,9 @@ #define STATE_ONLINE "online" #define VALID_MOVABLE_STATE "Movable" +#define SYSFS_NVIDIA_DIR "/sys/bus/pci/drivers/nvidia/" +#define SYSFS_ID_PATH SYSFS_NVIDIA_DIR "%s/%s" + #ifndef NV_IS_ALIGNED #define NV_IS_ALIGNED(v, gran) (0 == ((v) & ((gran) - 1))) #endif @@ -708,6 +712,10 @@ goto driver_fail; } + /* handle case where auto online/offline should be used for NUMA memory */ + if (numa_info_params.use_auto_online) + goto done; + /* Check if numa status from RM is valid */ switch (numa_info_params.status) { @@ -834,6 +842,7 @@ syslog(LOG_NOTICE, "NUMA: Memory onlining completed!\n"); done: numa_info->fd = fd; + numa_info->use_auto_online = numa_info_params.use_auto_online; return NVPD_SUCCESS; online_failed: @@ -864,6 +873,10 @@ return NVPD_ERR_NUMA_FAILURE; } + /* handle case where auto online/offline should be used for NUMA memory */ + if (numa_info->use_auto_online) + goto done; + status = offline_memory(fd); if (status < 0) { syslog_device(device_pci_info, @@ -873,7 +886,70 @@ return NVPD_ERR_NUMA_FAILURE; } +done: close(fd); numa_info->fd = -1; + numa_info->use_auto_online = 0; + return NVPD_SUCCESS; +} + +static int +read_int_from_file(char *devicename, char *id_file) +{ + FILE *fp; + char filename[PATH_MAX]; + unsigned int id; + + sprintf(filename, SYSFS_ID_PATH, devicename, id_file); + + fp = fopen(filename, "r"); + if (fp == NULL) + return -1; + if (fscanf(fp, "%x", &id) < 0) + return -1; + fclose(fp); + + return id; +} +/* + * Handle setup for systems with GPUs that require Auto-online of NUMA memory + */ +NvPdStatus setup_numa_auto_online(void) +{ + DIR *nvidia; + struct dirent *device; + int vendor_id, device_id; + int status; + + nvidia = opendir(SYSFS_NVIDIA_DIR); + if (nvidia == NULL) { + printf("Failed to open %s\n", SYSFS_NVIDIA_DIR); + syslog(LOG_ERR, "NUMA: Failed to open %s\n", SYSFS_NVIDIA_DIR); + return NVPD_ERR_DEVICE_NOT_FOUND; + } + + // Scans devices owned by the NVIDIA driver... + while ((device = readdir(nvidia)) != NULL) { + if (device->d_type != DT_LNK) + continue; + + vendor_id = read_int_from_file(device->d_name, "vendor"); + if (vendor_id != 0x10de) + continue; + + device_id = read_int_from_file(device->d_name, "device"); + + // Check for GH180, which requires auto-online + if (device_id >= 0x2340 && device_id <= 0x237f) { + syslog(LOG_INFO, "NUMA: Enabling NUMA memory Auto-Online due to GPU requirement\n"); + status = write_string_to_file(AUTO_ONLINE_PATH, BRING_ONLINE_CMD, strlen(BRING_ONLINE_CMD)); + if (status < 0) { + syslog(LOG_ERR, "NUMA: Failed to enable NUMA memory Auto-Online\n"); + return NVPD_ERR_NUMA_FAILURE; + } + return NVPD_SUCCESS; + } + } + return NVPD_SUCCESS; } diff -Nru nvidia-persistenced-525.85.05/nvidia-numa.h nvidia-persistenced-535.171.04/nvidia-numa.h --- nvidia-persistenced-525.85.05/nvidia-numa.h 2023-01-14 01:39:35.000000000 +0000 +++ nvidia-persistenced-535.171.04/nvidia-numa.h 2024-03-19 21:42:41.000000000 +0000 @@ -31,10 +31,13 @@ { int fd; NvCfgPciDevice *pci_info; + uint8_t use_auto_online; } NvNumaDevice; NvPdStatus nvNumaOnlineMemory(NvNumaDevice *numa_info); NvPdStatus nvNumaOfflineMemory(NvNumaDevice *numa_info); +NvPdStatus setup_numa_auto_online(void); + #endif diff -Nru nvidia-persistenced-525.85.05/nvidia-persistenced.c nvidia-persistenced-535.171.04/nvidia-persistenced.c --- nvidia-persistenced-525.85.05/nvidia-persistenced.c 2023-01-14 01:39:35.000000000 +0000 +++ nvidia-persistenced-535.171.04/nvidia-persistenced.c 2024-03-19 21:42:41.000000000 +0000 @@ -518,6 +518,8 @@ { char *lib_path; int status = 0; + NvCfgBool success; + NvCfgPciDevice *nv_cfg_devices; if (nvidia_cfg_path != NULL) { lib_path = nvstrcat(nvidia_cfg_path, "/", NVIDIA_CFG_LIB, NULL); @@ -550,6 +552,16 @@ return NVPD_ERR_DRIVER; } + /* Make a call to get_pci_devices for the side-effect of creating the device files */ + success = nv_cfg_api.get_pci_devices(&num_devices, &nv_cfg_devices); + if (!success) { + syslog(LOG_ERR, "Failed to query NVIDIA devices. Please ensure that " + "the NVIDIA device files (/dev/nvidia*) exist, and " + "that user %u has read and write permissions for " + "those files.", getuid()); + return NVPD_ERR_DRIVER; + } + return NVPD_SUCCESS; } @@ -902,6 +914,11 @@ if (status != NVPD_SUCCESS) { goto shutdown; } + + status = setup_numa_auto_online(); + if (status != NVPD_SUCCESS) { + goto shutdown; + } status = setup_devices(options.persistence_mode); if (status != NVPD_SUCCESS) { diff -Nru nvidia-persistenced-525.85.05/version.mk nvidia-persistenced-535.171.04/version.mk --- nvidia-persistenced-525.85.05/version.mk 2023-01-14 01:39:35.000000000 +0000 +++ nvidia-persistenced-535.171.04/version.mk 2024-03-19 21:42:41.000000000 +0000 @@ -1,4 +1,4 @@ -NVIDIA_VERSION = 525.85.05 +NVIDIA_VERSION = 535.171.04 # This file. VERSION_MK_FILE := $(lastword $(MAKEFILE_LIST))