Version in base suite: 535.216.01-1~deb12u1 Base version: nvidia-open-gpu-kernel-modules_535.216.01-1~deb12u1 Target version: nvidia-open-gpu-kernel-modules_535.247.01-1~deb12u1 Base file: /srv/ftp-master.debian.org/ftp/pool/contrib/n/nvidia-open-gpu-kernel-modules/nvidia-open-gpu-kernel-modules_535.216.01-1~deb12u1.dsc Target file: /srv/ftp-master.debian.org/policy/pool/contrib/n/nvidia-open-gpu-kernel-modules/nvidia-open-gpu-kernel-modules_535.247.01-1~deb12u1.dsc README.md | 9 debian/changelog | 59 ++ debian/control | 2 debian/copyright | 2 debian/nvidia-kernel-dkms.dkms.in | 2 debian/patches/0001-backport-NV_MODULE_IMPORT_NS_TAKES_STRING_LITERAL-ch.patch | 36 + debian/patches/fix-warnings.patch | 166 ++++++ debian/patches/implicit-function-declaration.patch | 25 debian/patches/kernel-flags.patch | 40 + debian/patches/module/0002-conftest.sh-remove-empty-lines-from-uts_release-outp.patch | 6 debian/patches/module/0037-import-pfn_valid-w-o-GPL-rcu_read_lock-unlock-from-v.patch | 91 --- debian/patches/module/0045-let-the-virt_addr_valid-macro-use-nv_pfn_valid-on-pp.patch | 37 - debian/patches/module/0052-backport-uvm-warning-fixes-from-560.28.03.patch | 257 ++++++++++ debian/patches/module/0053-fix-more-warnings.patch | 116 ++++ debian/patches/module/0054-fix-more-uvm-warnings.patch | 37 + debian/patches/module/0058-backport-warning-fixes-from-565.57.01.patch | 44 + debian/patches/module/0059-backport-uvm-warning-fixes-from-550.90.07.patch | 41 + debian/patches/module/0060-backport-module_import_ns_takes_string_literal-chang.patch | 54 ++ debian/patches/module/0062-Support-BTF-generation-for-non-release-builds.patch | 155 ++++++ debian/patches/module/bashisms.patch | 2 debian/patches/module/cc_version_check-gcc5.patch | 2 debian/patches/module/conftest-verbose.patch | 14 debian/patches/module/fragile-ARCH.patch | 36 - debian/patches/module/series | 10 debian/patches/module/use-kbuild-compiler.patch | 9 debian/patches/module/use-kbuild-flags.patch | 8 debian/patches/series | 3 debian/patches/series-manual | 7 debian/rules | 19 debian/rules.defs | 2 kernel-open/Kbuild | 21 kernel-open/Makefile | 31 + kernel-open/common/inc/nvmisc.h | 36 + kernel-open/conftest.sh | 252 +++++++++ kernel-open/nvidia-drm/nvidia-drm-drv.c | 7 kernel-open/nvidia-drm/nvidia-drm.Kbuild | 2 kernel-open/nvidia-modeset/nvidia-modeset-linux.c | 5 kernel-open/nvidia-modeset/nvidia-modeset.Kbuild | 4 kernel-open/nvidia-uvm/nvidia-uvm.Kbuild | 1 kernel-open/nvidia-uvm/uvm.c | 3 kernel-open/nvidia-uvm/uvm_hmm.c | 20 kernel-open/nvidia-uvm/uvm_kvmalloc.c | 2 kernel-open/nvidia-uvm/uvm_linux.h | 2 kernel-open/nvidia-uvm/uvm_mmu.h | 2 kernel-open/nvidia-uvm/uvm_pmm_gpu.c | 2 kernel-open/nvidia-uvm/uvm_va_space_mm.c | 6 kernel-open/nvidia/internal_crypt_lib.h | 4 kernel-open/nvidia/libspdm_ecc.c | 136 ++++- kernel-open/nvidia/nv-mmap.c | 4 kernel-open/nvidia/nvidia.Kbuild | 9 kernel-open/nvidia/os-mlock.c | 56 +- src/common/inc/nvBldVer.h | 20 src/common/inc/nvUnixVersion.h | 2 src/common/inc/nvVer.h | 2 src/common/inc/nvlog_defs.h | 5 src/common/nvswitch/kernel/ls10/link_ls10.c | 36 + src/common/nvswitch/kernel/smbpbi_nvswitch.c | 4 src/common/sdk/nvidia/inc/ctrl/ctrl0000/ctrl0000nvd.h | 2 src/common/sdk/nvidia/inc/ctrl/ctrl2080/ctrl2080ecc.h | 43 - src/common/sdk/nvidia/inc/ctrl/ctrl2080/ctrl2080gpu.h | 25 src/common/sdk/nvidia/inc/ctrl/ctrl2080/ctrl2080gr.h | 26 + src/common/sdk/nvidia/inc/ctrl/ctrl208f/ctrl208fgr.h | 29 + src/common/sdk/nvidia/inc/ctrl/ctrl90e7.h | 2 src/common/sdk/nvidia/inc/nverror.h | 4 src/common/sdk/nvidia/inc/nvmisc.h | 36 + src/common/uproc/os/common/include/liblogdecode.h | 2 src/nvidia/arch/nvalloc/common/inc/inforom/ifrecc.h | 2 src/nvidia/arch/nvalloc/common/inc/inforom/ifrstruct.h | 2 src/nvidia/arch/nvalloc/common/inc/nvcst.h | 5 src/nvidia/arch/nvalloc/common/inc/nvpcie.h | 2 src/nvidia/generated/g_all_dcl_pb.c | 14 src/nvidia/generated/g_all_dcl_pb.h | 8 src/nvidia/generated/g_intr_nvoc.h | 15 src/nvidia/generated/g_nv_name_released.h | 1 src/nvidia/generated/g_nvdebug_pb.h | 4 src/nvidia/generated/g_rs_resource_nvoc.h | 17 src/nvidia/generated/g_subdevice_nvoc.c | 79 +-- src/nvidia/generated/g_subdevice_nvoc.h | 8 src/nvidia/inc/libraries/nvport/string.h | 6 src/nvidia/inc/libraries/resserv/rs_resource.h | 17 src/nvidia/src/kernel/diagnostics/journal.c | 27 + src/nvidia/src/kernel/gpu/fifo/kernel_channel_group_api.c | 2 src/nvidia/src/kernel/gpu/fifo/kernel_fifo.c | 22 src/nvidia/src/kernel/gpu/gsp/kernel_gsp.c | 61 ++ src/nvidia/src/kernel/gpu/intr/intr.c | 43 + src/nvidia/src/kernel/gpu/subdevice/subdevice_ctrl_gpu_kernel.c | 41 + src/nvidia/src/kernel/mem_mgr/mem.c | 2 src/nvidia/src/kernel/platform/chipset/chipset_info.c | 11 src/nvidia/src/kernel/platform/chipset/chipset_pcie.c | 2 src/nvidia/src/kernel/rmapi/alloc_free.c | 41 + src/nvidia/src/kernel/virtualization/kernel_hostvgpudeviceapi.c | 2 src/nvidia/src/libraries/nvport/string/string_generic.c | 31 - src/nvidia/src/libraries/resserv/src/rs_server.c | 1 version.mk | 2 94 files changed, 2168 insertions(+), 434 deletions(-) diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/README.md nvidia-open-gpu-kernel-modules-535.247.01/README.md --- nvidia-open-gpu-kernel-modules-535.216.01/README.md 2024-09-17 18:01:16.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/README.md 2025-03-26 13:46:49.000000000 +0000 @@ -1,7 +1,7 @@ # NVIDIA Linux Open GPU Kernel Module Source This is the source release of the NVIDIA Linux open GPU kernel modules, -version 535.216.01. +version 535.247.01. ## How to Build @@ -17,7 +17,7 @@ Note that the kernel modules built here must be used with GSP firmware and user-space NVIDIA GPU driver components from a corresponding -535.216.01 driver release. This can be achieved by installing +535.247.01 driver release. This can be achieved by installing the NVIDIA GPU driver from the .run file using the `--no-kernel-modules` option. E.g., @@ -180,7 +180,7 @@ ## Compatible GPUs The open-gpu-kernel-modules can be used on any Turing or later GPU -(see the table below). However, in the 535.216.01 release, +(see the table below). However, in the 535.247.01 release, GeForce and Workstation support is still considered alpha-quality. To enable use of the open kernel modules on GeForce and Workstation GPUs, @@ -188,7 +188,7 @@ parameter to 1. For more details, see the NVIDIA GPU driver end user README here: -https://us.download.nvidia.com/XFree86/Linux-x86_64/535.216.01/README/kernel_open.html +https://us.download.nvidia.com/XFree86/Linux-x86_64/535.247.01/README/kernel_open.html In the below table, if three IDs are listed, the first is the PCI Device ID, the second is the PCI Subsystem Vendor ID, and the third is the PCI @@ -749,6 +749,7 @@ | NVIDIA H800 | 2324 10DE 17A8 | | NVIDIA H20 | 2329 10DE 198B | | NVIDIA H20 | 2329 10DE 198C | +| NVIDIA H20-3e | 232C 10DE 2063 | | NVIDIA H100 80GB HBM3 | 2330 10DE 16C0 | | NVIDIA H100 80GB HBM3 | 2330 10DE 16C1 | | NVIDIA H100 PCIe | 2331 10DE 1626 | diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/debian/changelog nvidia-open-gpu-kernel-modules-535.247.01/debian/changelog --- nvidia-open-gpu-kernel-modules-535.216.01/debian/changelog 2024-11-20 09:22:33.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/debian/changelog 2025-05-08 20:10:43.000000000 +0000 @@ -1,3 +1,62 @@ +nvidia-open-gpu-kernel-modules (535.247.01-1~deb12u1) bookworm; urgency=medium + + * Rebuild for bookworm. + + -- Andreas Beckmann Thu, 08 May 2025 22:10:43 +0200 + +nvidia-open-gpu-kernel-modules (535.247.01-1) unstable; urgency=medium + + * New upstream LTS and Tesla branch release 535.247.01 (2025-04-17). + * Fixed CVE-2025-23244. (Closes: #1104076) + https://nvidia.custhelp.com/app/answers/detail/a_id/5630 + * Sync with src:nvidia-graphics-drivers. + + -- Andreas Beckmann Wed, 07 May 2025 21:25:12 +0200 + +nvidia-open-gpu-kernel-modules (535.230.02-1) unstable; urgency=medium + + * New upstream LTS and Tesla branch release 535.230.02 (2025-01-16). + * Fixed CVE-2024-0150, CVE-2024-0147, CVE-2024-53869, CVE-2024-0131, + CVE-2024-0149. (Closes: #1093916) + https://nvidia.custhelp.com/app/answers/detail/a_id/5614 + * Sync with src:nvidia-graphics-drivers. + + -- Andreas Beckmann Sat, 03 May 2025 20:16:34 +0200 + +nvidia-open-gpu-kernel-modules (535.216.03-4) unstable; urgency=medium + + * Do not add -mfunction-return=thunk-extern flag, breaks backwards + compatibility with kernels built without this flag. + * Apply both patch sets manually. + + -- Andreas Beckmann Mon, 14 Apr 2025 21:06:39 +0200 + +nvidia-open-gpu-kernel-modules (535.216.03-3) unstable; urgency=medium + + * Backport NV_MODULE_IMPORT_NS_TAKES_STRING_LITERAL and + NV_CRYPTO_AKCIPHER_VERIFY_PRESENT changes from 550.144.03 and + NV_FOLIO_TEST_SWAPCACHE_PRESENT changes from 565.57.01 to fix open kernel + module build for Linux 6.13. + * Let pahole ignore language c++11 for BTF generation. (Closes: #1098812) + * Fix warnings during open module build. + * Build with more kernel hardening flags. + * Sync with src:nvidia-graphics-drivers. + * Bump Standards-Version to 4.7.2. No changes needed. + + -- Andreas Beckmann Wed, 02 Apr 2025 22:07:13 +0200 + +nvidia-open-gpu-kernel-modules (535.216.03-2) unstable; urgency=medium + + * Sync with src:nvidia-graphics-drivers. (Closes: #1090361) + + -- Andreas Beckmann Thu, 20 Feb 2025 02:32:44 +0100 + +nvidia-open-gpu-kernel-modules (535.216.03-1) unstable; urgency=medium + + * New upstream Tesla branch release 535.216.03 (2024-11-19). + + -- Andreas Beckmann Wed, 27 Nov 2024 09:38:37 +0100 + nvidia-open-gpu-kernel-modules (535.216.01-1~deb12u1) bookworm; urgency=medium * Rebuild for bookworm. diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/debian/control nvidia-open-gpu-kernel-modules-535.247.01/debian/control --- nvidia-open-gpu-kernel-modules-535.216.01/debian/control 2024-11-20 09:22:33.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/debian/control 2025-05-08 20:10:43.000000000 +0000 @@ -12,7 +12,7 @@ linux-headers-amd64 [amd64] , linux-headers-arm64 [arm64] , Rules-Requires-Root: no -Standards-Version: 4.7.0 +Standards-Version: 4.7.2 Homepage: https://github.com/NVIDIA/open-gpu-kernel-modules Vcs-Browser: https://salsa.debian.org/nvidia-team/nvidia-open-gpu-kernel-modules Vcs-Git: https://salsa.debian.org/nvidia-team/nvidia-open-gpu-kernel-modules.git diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/debian/copyright nvidia-open-gpu-kernel-modules-535.247.01/debian/copyright --- nvidia-open-gpu-kernel-modules-535.216.01/debian/copyright 2024-11-20 09:22:33.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/debian/copyright 2025-05-08 20:10:43.000000000 +0000 @@ -48,7 +48,7 @@ Files: debian/* Copyright: - © 2022-2024 Andreas Beckmann + © 2022-2025 Andreas Beckmann License: Expat License: Expat diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/debian/nvidia-kernel-dkms.dkms.in nvidia-open-gpu-kernel-modules-535.247.01/debian/nvidia-kernel-dkms.dkms.in --- nvidia-open-gpu-kernel-modules-535.216.01/debian/nvidia-kernel-dkms.dkms.in 2024-11-20 09:22:33.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/debian/nvidia-kernel-dkms.dkms.in 2025-05-08 20:10:43.000000000 +0000 @@ -13,7 +13,7 @@ MAKE[0]="env NV_VERBOSE=1 \ make ${parallel_jobs+-j$parallel_jobs} modules KERNEL_UNAME=${kernelver}" -CLEAN="make KERNEL_UNAME=${kernelver} clean" +CLEAN="true" BUILT_MODULE_NAME[0]="nvidia" DEST_MODULE_NAME[0]="$PACKAGE_NAME" diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/debian/patches/0001-backport-NV_MODULE_IMPORT_NS_TAKES_STRING_LITERAL-ch.patch nvidia-open-gpu-kernel-modules-535.247.01/debian/patches/0001-backport-NV_MODULE_IMPORT_NS_TAKES_STRING_LITERAL-ch.patch --- nvidia-open-gpu-kernel-modules-535.216.01/debian/patches/0001-backport-NV_MODULE_IMPORT_NS_TAKES_STRING_LITERAL-ch.patch 1970-01-01 00:00:00.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/debian/patches/0001-backport-NV_MODULE_IMPORT_NS_TAKES_STRING_LITERAL-ch.patch 2025-05-08 20:10:43.000000000 +0000 @@ -0,0 +1,36 @@ +From c673df2c0eea09ba213c5431b6dd3753b66edc21 Mon Sep 17 00:00:00 2001 +From: Andreas Beckmann +Date: Fri, 21 Feb 2025 11:18:37 +0100 +Subject: [PATCH] backport NV_MODULE_IMPORT_NS_TAKES_STRING_LITERAL changes + from 550.144.03 + +--- + kernel-open/nvidia/nv-frontend.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +diff --git a/kernel-open/nvidia/nv-frontend.c b/kernel-open/nvidia/nv-frontend.c +index f5b871e6..b411a662 100644 +--- a/kernel-open/nvidia/nv-frontend.c ++++ b/kernel-open/nvidia/nv-frontend.c +@@ -37,14 +37,16 @@ MODULE_ALIAS_CHARDEV_MAJOR(NV_MAJOR_DEVICE_NUMBER); + * ("module: add support for symbol namespaces") in 5.4 + */ + #if defined(MODULE_IMPORT_NS) +- + /* + * DMA_BUF namespace is added by commit id 16b0314aa746 + * ("dma-buf: move dma-buf symbols into the DMA_BUF module namespace") in 5.16 + */ ++#if defined(NV_MODULE_IMPORT_NS_TAKES_STRING_LITERAL) ++MODULE_IMPORT_NS("DMA_BUF"); ++#else + MODULE_IMPORT_NS(DMA_BUF); +- + #endif ++#endif // defined(MODULE_IMPORT_NS) + + static NvU32 nv_num_instances; + +-- +2.39.5 + diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/debian/patches/fix-warnings.patch nvidia-open-gpu-kernel-modules-535.247.01/debian/patches/fix-warnings.patch --- nvidia-open-gpu-kernel-modules-535.216.01/debian/patches/fix-warnings.patch 1970-01-01 00:00:00.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/debian/patches/fix-warnings.patch 2025-05-08 20:10:43.000000000 +0000 @@ -0,0 +1,166 @@ +Author: Andreas Beckmann +Description: fix warnings during open module build + +--- a/kernel-open/nvidia/libspdm_ec.c ++++ b/kernel-open/nvidia/libspdm_ec.c +@@ -33,6 +33,7 @@ static bool lkca_ecdsa_sign(void *ec_con + return false; + } + ++static + bool libspdm_ec_set_pub_key(void *ec_context, const uint8_t *public_key, + size_t public_key_size) + { +@@ -43,6 +44,7 @@ bool libspdm_ec_set_pub_key(void *ec_con + return lkca_ec_set_pub_key(ec_context, public_key, public_key_size); + } + ++static + bool libspdm_ec_get_pub_key(void *ec_context, uint8_t *public_key, + size_t *public_key_size) + { +@@ -57,6 +59,7 @@ bool libspdm_ec_get_pub_key(void *ec_con + return lkca_ec_get_pub_key(ec_context, public_key, public_key_size); + } + ++static + bool libspdm_ec_check_key(const void *ec_context) + { + /* TBD*/ +--- a/kernel-open/nvidia/libspdm_hkdf_sha.c ++++ b/kernel-open/nvidia/libspdm_hkdf_sha.c +@@ -26,6 +26,7 @@ + + #include "internal_crypt_lib.h" + ++static + bool libspdm_hkdf_sha256_extract_and_expand(const uint8_t *key, size_t key_size, + const uint8_t *salt, size_t salt_size, + const uint8_t *info, size_t info_size, +@@ -54,6 +55,7 @@ bool libspdm_hkdf_sha256_expand(const ui + out, out_size); + } + ++static + bool libspdm_hkdf_sha384_extract_and_expand(const uint8_t *key, size_t key_size, + const uint8_t *salt, size_t salt_size, + const uint8_t *info, size_t info_size, +@@ -82,6 +84,7 @@ bool libspdm_hkdf_sha384_expand(const ui + out, out_size); + } + ++static + bool libspdm_hkdf_sha512_extract_and_expand(const uint8_t *key, size_t key_size, + const uint8_t *salt, size_t salt_size, + const uint8_t *info, size_t info_size, +--- a/kernel-open/nvidia/libspdm_rand.c ++++ b/kernel-open/nvidia/libspdm_rand.c +@@ -31,6 +31,7 @@ bool libspdm_random_bytes(uint8_t *outpu + } + + // This is specifically allowed by spdm ++static + bool libspdm_random_seed(const uint8_t *seed, size_t seed_size) + { + return true; +--- a/kernel-open/nvidia/libspdm_x509.c ++++ b/kernel-open/nvidia/libspdm_x509.c +@@ -32,6 +32,7 @@ + #include + #endif + ++static + bool libspdm_x509_construct_certificate(const uint8_t *cert, size_t cert_size, + uint8_t **single_x509_cert) + { +@@ -39,17 +40,20 @@ bool libspdm_x509_construct_certificate( + return false; + } + ++static + bool libspdm_x509_construct_certificate_stack(uint8_t **x509_stack, ...) + { + LIBSPDM_ASSERT(false); + return false; + } + ++static + void libspdm_x509_free(void *x509_cert) + { + LIBSPDM_ASSERT(false); + } + ++static + void libspdm_x509_stack_free(void *x509_stack) + { + LIBSPDM_ASSERT(false); +@@ -108,6 +112,7 @@ bool libspdm_x509_get_subject_name(const + return false; + } + ++static + bool libspdm_x509_get_common_name(const uint8_t *cert, size_t cert_size, + char *common_name, + size_t *common_name_size) +@@ -116,6 +121,7 @@ bool libspdm_x509_get_common_name(const + return false; + } + ++static + bool + libspdm_x509_get_organization_name(const uint8_t *cert, size_t cert_size, + char *name_buffer, +@@ -337,6 +343,7 @@ bool libspdm_x509_get_cert_from_cert_cha + return false; + } + ++static + bool libspdm_x509_get_tbs_cert(const uint8_t *cert, size_t cert_size, + uint8_t **tbs_cert, size_t *tbs_cert_size) + { +@@ -367,6 +374,7 @@ bool libspdm_x509_get_issuer_name(const + return false; + } + ++static + bool + libspdm_x509_get_issuer_common_name(const uint8_t *cert, size_t cert_size, + char *common_name, +@@ -376,6 +384,7 @@ libspdm_x509_get_issuer_common_name(cons + return false; + } + ++static + bool + libspdm_x509_get_issuer_orgnization_name(const uint8_t *cert, size_t cert_size, + char *name_buffer, +@@ -385,6 +394,7 @@ libspdm_x509_get_issuer_orgnization_name + return false; + } + ++static + bool libspdm_x509_get_signature_algorithm(const uint8_t *cert, + size_t cert_size, uint8_t *oid, + size_t *oid_size) +--- a/src/common/unix/nvidia-push/interface/nvidia-push-utils.h ++++ b/src/common/unix/nvidia-push/interface/nvidia-push-utils.h +@@ -67,7 +67,7 @@ void nvPushAcquireTimelineSemaphore( + NvU64 val); + + NvBool nvPushDecodeMethod(NvU32 header, NvU32 *count); +-void nvPushSetObject(NvPushChannelPtr p, NvU32 subch, NvU32 object[NV_MAX_SUBDEVICES]); ++void nvPushSetObject(NvPushChannelPtr p, NvU32 subch, NvU32 *object); + void nvPushSetSubdeviceMask(NvPushChannelPtr p, NvU32 mask); + void __nvPushMakeRoom(NvPushChannelPtr, NvU32 count); + +--- a/src/common/unix/nvidia-push/src/nvidia-push.c ++++ b/src/common/unix/nvidia-push/src/nvidia-push.c +@@ -971,7 +971,7 @@ static NvU32 GetSetObjectHandle(NvPushCh + } + + // Issue a SET_OBJECT method on the specified subchannel. +-void nvPushSetObject(NvPushChannelPtr p, NvU32 subch, NvU32 object[NV_MAX_SUBDEVICES]) ++void nvPushSetObject(NvPushChannelPtr p, NvU32 subch, NvU32 *object) + { + const NvPushDeviceRec *pDevice = p->pDevice; + const NvU32 oldSubDevMask = p->currentSubDevMask; diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/debian/patches/implicit-function-declaration.patch nvidia-open-gpu-kernel-modules-535.247.01/debian/patches/implicit-function-declaration.patch --- nvidia-open-gpu-kernel-modules-535.216.01/debian/patches/implicit-function-declaration.patch 1970-01-01 00:00:00.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/debian/patches/implicit-function-declaration.patch 2025-05-08 20:10:43.000000000 +0000 @@ -0,0 +1,25 @@ +Author: Andreas Beckmann +Description: move -Werror=implicit-function-declaration to CC_ONLY_CFLAGS + avoid warning on C++ compilation since upstream build system uses CFLAGS for C++, too: + cc1plus: warning: ‘-Werror=’ argument ‘-Werror=implicit-function-declaration’ is not valid for C++ + +--- a/src/nvidia-modeset/Makefile ++++ b/src/nvidia-modeset/Makefile +@@ -151,6 +151,7 @@ CFLAGS += $(CONDITIONAL_CFLAGS) + CC_ONLY_CFLAGS += -Wimplicit + CC_ONLY_CFLAGS += -Wstrict-prototypes + CC_ONLY_CFLAGS += -Wmissing-prototypes ++CC_ONLY_CFLAGS += -Werror=implicit-function-declaration + CC_ONLY_CFLAGS += -std=gnu11 + + CXX_ONLY_CFLAGS += -std=gnu++11 +--- a/src/nvidia/Makefile ++++ b/src/nvidia/Makefile +@@ -182,6 +182,7 @@ endif + + CFLAGS += $(CONDITIONAL_CFLAGS) + ++CC_ONLY_CFLAGS += -Werror=implicit-function-declaration + CC_ONLY_CFLAGS += --std=gnu11 + CXX_ONLY_CFLAGS += --std=gnu++11 + diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/debian/patches/kernel-flags.patch nvidia-open-gpu-kernel-modules-535.247.01/debian/patches/kernel-flags.patch --- nvidia-open-gpu-kernel-modules-535.216.01/debian/patches/kernel-flags.patch 1970-01-01 00:00:00.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/debian/patches/kernel-flags.patch 2025-05-08 20:10:43.000000000 +0000 @@ -0,0 +1,40 @@ +Author: Andreas Beckmann +Description: build with more kernel hardening flags + fixes objtool warning "'naked' return found in MITIGATION_RETHUNK build" + +--- a/src/nvidia-modeset/Makefile ++++ b/src/nvidia-modeset/Makefile +@@ -90,6 +90,7 @@ ifeq ($(TARGET_ARCH),x86_64) + CFLAGS += -mno-sse + CFLAGS += -mno-sse2 + CFLAGS += -mno-3dnow ++ CFLAGS += -mno-avx + endif + + ifeq ($(TARGET_ARCH),aarch64) +@@ -144,6 +145,7 @@ ifeq ($(TARGET_ARCH),x86_64) + CONDITIONAL_CFLAGS += $(call TEST_CC_ARG, -fno-jump-tables) + CONDITIONAL_CFLAGS += $(call TEST_CC_ARG, -mindirect-branch=thunk-extern) + CONDITIONAL_CFLAGS += $(call TEST_CC_ARG, -mindirect-branch-register) ++ CONDITIONAL_CFLAGS += $(call TEST_CC_ARG, -mfunction-return=thunk-extern) + endif + + CFLAGS += $(CONDITIONAL_CFLAGS) +--- a/src/nvidia/Makefile ++++ b/src/nvidia/Makefile +@@ -85,6 +85,7 @@ ifeq ($(TARGET_ARCH),x86_64) + CFLAGS += -mno-sse + CFLAGS += -mno-sse2 + CFLAGS += -mno-3dnow ++ CFLAGS += -mno-avx + endif + + ifeq ($(TARGET_ARCH),aarch64) +@@ -178,6 +179,7 @@ ifeq ($(TARGET_ARCH),x86_64) + CONDITIONAL_CFLAGS += $(call TEST_CC_ARG, -fno-jump-tables) + CONDITIONAL_CFLAGS += $(call TEST_CC_ARG, -mindirect-branch-register) + CONDITIONAL_CFLAGS += $(call TEST_CC_ARG, -mindirect-branch=thunk-extern) ++ CONDITIONAL_CFLAGS += $(call TEST_CC_ARG, -mfunction-return=thunk-extern) + endif + + CFLAGS += $(CONDITIONAL_CFLAGS) diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/debian/patches/module/0002-conftest.sh-remove-empty-lines-from-uts_release-outp.patch nvidia-open-gpu-kernel-modules-535.247.01/debian/patches/module/0002-conftest.sh-remove-empty-lines-from-uts_release-outp.patch --- nvidia-open-gpu-kernel-modules-535.216.01/debian/patches/module/0002-conftest.sh-remove-empty-lines-from-uts_release-outp.patch 2024-11-20 09:22:33.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/debian/patches/module/0002-conftest.sh-remove-empty-lines-from-uts_release-outp.patch 2025-05-08 20:10:43.000000000 +0000 @@ -1,4 +1,4 @@ -From 3a03c9a15522c69286f9a94d5395430af8d3f628 Mon Sep 17 00:00:00 2001 +From 575dab59a98c0c6938124763f1d08052c922e159 Mon Sep 17 00:00:00 2001 From: Andreas Beckmann Date: Mon, 31 Oct 2022 14:40:42 +0100 Subject: [PATCH] conftest.sh: remove empty lines from uts_release output @@ -8,10 +8,10 @@ 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conftest.sh b/conftest.sh -index 7f0478ea..33ec54b3 100755 +index 60277da8..70a3d5af 100755 --- a/conftest.sh +++ b/conftest.sh -@@ -5930,7 +5930,7 @@ compile_test() { +@@ -5985,7 +5985,7 @@ compile_test() { echo "#include UTS_RELEASE" > conftest$$.c diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/debian/patches/module/0037-import-pfn_valid-w-o-GPL-rcu_read_lock-unlock-from-v.patch nvidia-open-gpu-kernel-modules-535.247.01/debian/patches/module/0037-import-pfn_valid-w-o-GPL-rcu_read_lock-unlock-from-v.patch --- nvidia-open-gpu-kernel-modules-535.216.01/debian/patches/module/0037-import-pfn_valid-w-o-GPL-rcu_read_lock-unlock-from-v.patch 2024-11-20 09:22:33.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/debian/patches/module/0037-import-pfn_valid-w-o-GPL-rcu_read_lock-unlock-from-v.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,91 +0,0 @@ -From b28cea9c8f2fcb649e2930cc02ac8cfc5e8c7806 Mon Sep 17 00:00:00 2001 -From: Andreas Beckmann -Date: Thu, 1 Feb 2024 14:58:45 -0500 -Subject: [PATCH] import pfn_valid() w/o GPL rcu_read_lock/unlock from v6.8-rc3 - -linux-6.1.76, 6.6.15, and 6.7.3 have modified the non-ARCH-specific -pfn_valid() to use __rcu_read_lock/unlock[1] that is marked GPL and -cannot be used here[2][3][4] unless using the open source variant. - -pfn_valid() has been changed to use GPL rcu_read_lock/unlock by -"mm/sparsemem: fix race in accessing memory_section->usage" in Linux - v6.8-rc1 (5ec8e8ea8b7783fab150cf86404fc38cb4db8800) -which has been backported to Linux - v6.7.3 (3a01daace71b521563c38bbbf874e14c3e58adb7) - v6.6.15 (70064241f2229f7ba7b9599a98f68d9142e81a97) - v6.1.76 (68ed9e33324021e9d6b798e9db00ca3093d2012a) - v5.10.210 (90ad17575d26874287271127d43ef3c2af876cea) - -pfn_valid() has been further changed to use non-GPL -rcu_read_lock_sched()/rcu_read_unlock_sched() by -"mm, kmsan: fix infinite recursion due to RCU critical section" in Linux - v6.8-rc3 (f6564fce256a3944aa1bc76cb3c40e792d97c1eb) -which has been backported to Linux - v6.7.4 (5a33420599fa0288792537e6872fd19cc8607ea6) - v6.6.16 (6335c0cdb2ea0ea02c999e04d34fd84f69fb27ff) - v6.1.77 (dc904345e3771aa01d0b8358b550802fdc6fe00b) -but not (yet) to Linux v5.10.x - -As a workaround, use the v6.8-rc3 implementation for all kernels -having only the first patch until NVIDIA makes a fixed release -(that no longer will be using pfn_valid[5]). - -[1] https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/include/linux/mmzone.h?h=v6.7.3&id=3a01daace71b521563c38bbbf874e14c3e58adb7 -[2] https://bugs.gentoo.org/923456 -[3] https://forums.developer.nvidia.com/t/280908 -[4] https://github.com/NVIDIA/open-gpu-kernel-modules/issues/594 -[5] https://github.com/NVIDIA/open-gpu-kernel-modules/issues/594#issuecomment-1916197641 - -Bug-Debian: https://bugs.debian.org/1062932 -Origin: gentoo, https://github.com/gentoo/gentoo/blob/c64caf53/x11-drivers/nvidia-drivers/files/nvidia-drivers-470.223.02-gpl-pfn_valid.patch ---- - common/inc/nv-linux.h | 33 +++++++++++++++++++++++++++++++++ - 1 file changed, 33 insertions(+) - -diff --git a/common/inc/nv-linux.h b/common/inc/nv-linux.h -index 94106b3e..18901239 100644 ---- a/common/inc/nv-linux.h -+++ b/common/inc/nv-linux.h -@@ -1947,6 +1947,39 @@ static inline NvU32 nv_default_irq_flags(nv_state_t *nv) - #define NV_GET_UNUSED_FD_FLAGS(flags) (-1) - #endif - -+#if (!defined(CONFIG_HAVE_ARCH_PFN_VALID)) && \ -+ ((LINUX_VERSION_CODE == KERNEL_VERSION(6,7,3)) || \ -+ (LINUX_VERSION_CODE == KERNEL_VERSION(6,6,15)) || \ -+ (LINUX_VERSION_CODE == KERNEL_VERSION(6,1,76)) || \ -+ ((LINUX_VERSION_CODE >= KERNEL_VERSION(5,10,210)) && (LINUX_VERSION_CODE < KERNEL_VERSION(5,11,0)))) -+ -+/* Linux v6.8-rc3 pfn_valid version without GPL rcu_read_lock/unlock() */ -+static inline int nv_pfn_valid(unsigned long pfn) -+{ -+ struct mem_section *ms; -+ int ret; -+ -+ if (PHYS_PFN(PFN_PHYS(pfn)) != pfn) -+ return 0; -+ -+ if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS) -+ return 0; -+ ms = __pfn_to_section(pfn); -+ rcu_read_lock_sched(); -+ if (!valid_section(ms)) { -+ rcu_read_unlock_sched(); -+ return 0; -+ } -+ ret = early_section(ms) || pfn_section_valid(ms, pfn); -+ rcu_read_unlock_sched(); -+ -+ return ret; -+} -+ -+#else -+# define nv_pfn_valid pfn_valid -+#endif -+ - #define MODULE_BASE_NAME "nvidia" - #define MODULE_INSTANCE_NUMBER 0 - #define MODULE_INSTANCE_STRING "" --- -2.39.5 - diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/debian/patches/module/0045-let-the-virt_addr_valid-macro-use-nv_pfn_valid-on-pp.patch nvidia-open-gpu-kernel-modules-535.247.01/debian/patches/module/0045-let-the-virt_addr_valid-macro-use-nv_pfn_valid-on-pp.patch --- nvidia-open-gpu-kernel-modules-535.216.01/debian/patches/module/0045-let-the-virt_addr_valid-macro-use-nv_pfn_valid-on-pp.patch 2024-11-20 09:22:33.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/debian/patches/module/0045-let-the-virt_addr_valid-macro-use-nv_pfn_valid-on-pp.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,37 +0,0 @@ -From 9e266458d8dc9e07123cbc46a887a5f3a9744cf2 Mon Sep 17 00:00:00 2001 -From: Andreas Beckmann -Date: Sun, 13 Oct 2024 08:52:51 +0200 -Subject: [PATCH] let the virt_addr_valid() macro use nv_pfn_valid() on ppc64el - ---- - common/inc/nv-linux.h | 7 +++++-- - 1 file changed, 5 insertions(+), 2 deletions(-) - -diff --git a/common/inc/nv-linux.h b/common/inc/nv-linux.h -index 18901239..433e5587 100644 ---- a/common/inc/nv-linux.h -+++ b/common/inc/nv-linux.h -@@ -1947,6 +1947,7 @@ static inline NvU32 nv_default_irq_flags(nv_state_t *nv) - #define NV_GET_UNUSED_FD_FLAGS(flags) (-1) - #endif - -+#if defined(NVCPU_PPC64LE) - #if (!defined(CONFIG_HAVE_ARCH_PFN_VALID)) && \ - ((LINUX_VERSION_CODE == KERNEL_VERSION(6,7,3)) || \ - (LINUX_VERSION_CODE == KERNEL_VERSION(6,6,15)) || \ -@@ -1976,8 +1977,10 @@ static inline int nv_pfn_valid(unsigned long pfn) - return ret; - } - --#else --# define nv_pfn_valid pfn_valid -+// let the virt_addr_valid() macro use nv_pfn_valid() -+#define pfn_valid nv_pfn_valid -+ -+#endif - #endif - - #define MODULE_BASE_NAME "nvidia" --- -2.39.5 - diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/debian/patches/module/0052-backport-uvm-warning-fixes-from-560.28.03.patch nvidia-open-gpu-kernel-modules-535.247.01/debian/patches/module/0052-backport-uvm-warning-fixes-from-560.28.03.patch --- nvidia-open-gpu-kernel-modules-535.216.01/debian/patches/module/0052-backport-uvm-warning-fixes-from-560.28.03.patch 1970-01-01 00:00:00.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/debian/patches/module/0052-backport-uvm-warning-fixes-from-560.28.03.patch 2025-05-08 20:10:43.000000000 +0000 @@ -0,0 +1,257 @@ +From 4cad785a66fe11ea3c70e8436b159bdf4dd84608 Mon Sep 17 00:00:00 2001 +From: Andreas Beckmann +Date: Sun, 8 Dec 2024 22:54:59 +0100 +Subject: [PATCH] backport uvm warning fixes from 560.28.03 + +--- + nvidia-uvm/uvm_channel_test.c | 8 ++++---- + nvidia-uvm/uvm_gpu_semaphore.c | 2 +- + nvidia-uvm/uvm_hopper_fault_buffer.c | 1 + + nvidia-uvm/uvm_maxwell_mmu.c | 1 + + nvidia-uvm/uvm_migrate_pageable.c | 6 +++++- + nvidia-uvm/uvm_mmu.c | 2 +- + nvidia-uvm/uvm_page_tree_test.c | 2 +- + nvidia-uvm/uvm_pascal_fault_buffer.c | 7 ------- + nvidia-uvm/uvm_pascal_mmu.c | 1 + + nvidia-uvm/uvm_pmm_sysmem_test.c | 7 +++++-- + nvidia-uvm/uvm_pmm_test.c | 10 +++++----- + nvidia-uvm/uvm_populate_pageable.c | 2 +- + 12 files changed, 26 insertions(+), 23 deletions(-) + +diff --git a/nvidia-uvm/uvm_channel_test.c b/nvidia-uvm/uvm_channel_test.c +index c4521d22..f3cfd494 100644 +--- a/nvidia-uvm/uvm_channel_test.c ++++ b/nvidia-uvm/uvm_channel_test.c +@@ -684,7 +684,7 @@ done: + // This test verifies that concurrent pushes using the same channel pool + // select different channels, when the Confidential Computing feature is + // enabled. +-NV_STATUS test_conf_computing_channel_selection(uvm_va_space_t *va_space) ++static NV_STATUS test_conf_computing_channel_selection(uvm_va_space_t *va_space) + { + NV_STATUS status = NV_OK; + uvm_channel_pool_t *pool; +@@ -746,7 +746,7 @@ error: + return status; + } + +-NV_STATUS test_write_ctrl_gpfifo_noop(uvm_va_space_t *va_space) ++static NV_STATUS test_write_ctrl_gpfifo_noop(uvm_va_space_t *va_space) + { + uvm_gpu_t *gpu; + +@@ -785,7 +785,7 @@ NV_STATUS test_write_ctrl_gpfifo_noop(uvm_va_space_t *va_space) + return NV_OK; + } + +-NV_STATUS test_write_ctrl_gpfifo_and_pushes(uvm_va_space_t *va_space) ++static NV_STATUS test_write_ctrl_gpfifo_and_pushes(uvm_va_space_t *va_space) + { + uvm_gpu_t *gpu; + +@@ -833,7 +833,7 @@ NV_STATUS test_write_ctrl_gpfifo_and_pushes(uvm_va_space_t *va_space) + return NV_OK; + } + +-NV_STATUS test_write_ctrl_gpfifo_tight(uvm_va_space_t *va_space) ++static NV_STATUS test_write_ctrl_gpfifo_tight(uvm_va_space_t *va_space) + { + NV_STATUS status = NV_OK; + uvm_gpu_t *gpu; +diff --git a/nvidia-uvm/uvm_gpu_semaphore.c b/nvidia-uvm/uvm_gpu_semaphore.c +index f014dfad..64b005e9 100644 +--- a/nvidia-uvm/uvm_gpu_semaphore.c ++++ b/nvidia-uvm/uvm_gpu_semaphore.c +@@ -507,7 +507,7 @@ static bool tracking_semaphore_check_gpu(uvm_gpu_tracking_semaphore_t *tracking_ + return true; + } + +-bool tracking_semaphore_uses_mutex(uvm_gpu_tracking_semaphore_t *tracking_semaphore) ++static bool tracking_semaphore_uses_mutex(uvm_gpu_tracking_semaphore_t *tracking_semaphore) + { + uvm_gpu_t *gpu = tracking_semaphore->semaphore.page->pool->gpu; + +diff --git a/nvidia-uvm/uvm_hopper_fault_buffer.c b/nvidia-uvm/uvm_hopper_fault_buffer.c +index 64262b9b..4a259cfe 100644 +--- a/nvidia-uvm/uvm_hopper_fault_buffer.c ++++ b/nvidia-uvm/uvm_hopper_fault_buffer.c +@@ -21,6 +21,7 @@ + + *******************************************************************************/ + ++#include "uvm_hal.h" + #include "uvm_hal_types.h" + #include "hwref/hopper/gh100/dev_fault.h" + +diff --git a/nvidia-uvm/uvm_maxwell_mmu.c b/nvidia-uvm/uvm_maxwell_mmu.c +index d2107844..6610833d 100644 +--- a/nvidia-uvm/uvm_maxwell_mmu.c ++++ b/nvidia-uvm/uvm_maxwell_mmu.c +@@ -38,6 +38,7 @@ + #include "uvm_forward_decl.h" + #include "uvm_gpu.h" + #include "uvm_mmu.h" ++#include "uvm_hal.h" + #include "uvm_push_macros.h" + #include "hwref/maxwell/gm107/dev_mmu.h" + +diff --git a/nvidia-uvm/uvm_migrate_pageable.c b/nvidia-uvm/uvm_migrate_pageable.c +index 6b95e203..ddf6265d 100644 +--- a/nvidia-uvm/uvm_migrate_pageable.c ++++ b/nvidia-uvm/uvm_migrate_pageable.c +@@ -507,7 +507,7 @@ static NV_STATUS migrate_vma_copy_pages(struct vm_area_struct *vma, + return NV_OK; + } + +-void migrate_vma_cleanup_pages(unsigned long *dst, unsigned long npages) ++static void migrate_vma_cleanup_pages(unsigned long *dst, unsigned long npages) + { + unsigned long i; + +@@ -553,6 +553,7 @@ void uvm_migrate_vma_alloc_and_copy(struct migrate_vma *args, migrate_vma_state_ + migrate_vma_cleanup_pages(args->dst, state->num_pages); + } + ++#if defined(CONFIG_MIGRATE_VMA_HELPER) + void uvm_migrate_vma_alloc_and_copy_helper(struct vm_area_struct *vma, + const unsigned long *src, + unsigned long *dst, +@@ -571,6 +572,7 @@ void uvm_migrate_vma_alloc_and_copy_helper(struct vm_area_struct *vma, + + uvm_migrate_vma_alloc_and_copy(&args, (migrate_vma_state_t *) private); + } ++#endif + + void uvm_migrate_vma_finalize_and_map(struct migrate_vma *args, migrate_vma_state_t *state) + { +@@ -642,6 +644,7 @@ void uvm_migrate_vma_finalize_and_map(struct migrate_vma *args, migrate_vma_stat + UVM_ASSERT(!bitmap_intersects(state->populate_pages_mask, state->allocation_failed_mask, state->num_pages)); + } + ++#if defined(CONFIG_MIGRATE_VMA_HELPER) + void uvm_migrate_vma_finalize_and_map_helper(struct vm_area_struct *vma, + const unsigned long *src, + const unsigned long *dst, +@@ -660,6 +663,7 @@ void uvm_migrate_vma_finalize_and_map_helper(struct vm_area_struct *vma, + + uvm_migrate_vma_finalize_and_map(&args, (migrate_vma_state_t *) private); + } ++#endif + + static NV_STATUS nv_migrate_vma(struct migrate_vma *args, migrate_vma_state_t *state) + { +diff --git a/nvidia-uvm/uvm_mmu.c b/nvidia-uvm/uvm_mmu.c +index 6379fd2d..818b8dfc 100644 +--- a/nvidia-uvm/uvm_mmu.c ++++ b/nvidia-uvm/uvm_mmu.c +@@ -2287,7 +2287,7 @@ bool uvm_mmu_gpu_needs_dynamic_sysmem_mapping(uvm_gpu_t *gpu) + return uvm_gpu_is_virt_mode_sriov_heavy(gpu); + } + +-NV_STATUS create_static_vidmem_mapping(uvm_gpu_t *gpu) ++static NV_STATUS create_static_vidmem_mapping(uvm_gpu_t *gpu) + { + NvU32 page_size; + NvU64 size; +diff --git a/nvidia-uvm/uvm_page_tree_test.c b/nvidia-uvm/uvm_page_tree_test.c +index 782ac511..ae8e91ea 100644 +--- a/nvidia-uvm/uvm_page_tree_test.c ++++ b/nvidia-uvm/uvm_page_tree_test.c +@@ -1511,7 +1511,7 @@ static uvm_mmu_page_table_alloc_t fake_table_alloc(uvm_aperture_t aperture, NvU6 + // Queries the supported page sizes of the GPU(uvm_gpu_t) and fills the + // page_sizes array up to MAX_NUM_PAGE_SIZE. Returns the number of elements in + // page_sizes; +-size_t get_page_sizes(uvm_gpu_t *gpu, NvU32 *page_sizes) ++static size_t get_page_sizes(uvm_gpu_t *gpu, NvU32 *page_sizes) + { + unsigned long page_size_log2; + unsigned long page_sizes_bitvec; +diff --git a/nvidia-uvm/uvm_pascal_fault_buffer.c b/nvidia-uvm/uvm_pascal_fault_buffer.c +index fb02ef28..7479e313 100644 +--- a/nvidia-uvm/uvm_pascal_fault_buffer.c ++++ b/nvidia-uvm/uvm_pascal_fault_buffer.c +@@ -325,10 +325,3 @@ NvU32 uvm_hal_pascal_fault_buffer_entry_size(uvm_parent_gpu_t *parent_gpu) + { + return NVB069_FAULT_BUF_SIZE; + } +- +-void uvm_hal_pascal_fault_buffer_parse_non_replayable_entry_unsupported(uvm_parent_gpu_t *parent_gpu, +- void *fault_packet, +- uvm_fault_buffer_entry_t *buffer_entry) +-{ +- UVM_ASSERT_MSG(false, "fault_buffer_parse_non_replayable_entry called on Pascal GPU\n"); +-} +diff --git a/nvidia-uvm/uvm_pascal_mmu.c b/nvidia-uvm/uvm_pascal_mmu.c +index 52db0469..e67a7037 100644 +--- a/nvidia-uvm/uvm_pascal_mmu.c ++++ b/nvidia-uvm/uvm_pascal_mmu.c +@@ -36,6 +36,7 @@ + #include "uvm_global.h" + #include "uvm_gpu.h" + #include "uvm_mmu.h" ++#include "uvm_hal.h" + #include "uvm_push_macros.h" + #include "uvm_pascal_fault_buffer.h" + #include "hwref/pascal/gp100/dev_fault.h" +diff --git a/nvidia-uvm/uvm_pmm_sysmem_test.c b/nvidia-uvm/uvm_pmm_sysmem_test.c +index ec4a630d..b5440a8b 100644 +--- a/nvidia-uvm/uvm_pmm_sysmem_test.c ++++ b/nvidia-uvm/uvm_pmm_sysmem_test.c +@@ -1067,7 +1067,9 @@ done: + return status; + } + +-NV_STATUS do_test_cpu_chunk_free(uvm_cpu_chunk_t *chunk, uvm_va_space_t *va_space, uvm_processor_mask_t *test_gpus) ++static NV_STATUS do_test_cpu_chunk_free(uvm_cpu_chunk_t *chunk, ++ uvm_va_space_t *va_space, ++ const uvm_processor_mask_t *test_gpus) + { + NV_STATUS status = NV_OK; + uvm_cpu_chunk_t **split_chunks; +@@ -1163,7 +1165,8 @@ done_free: + return status; + } + +-NV_STATUS test_cpu_chunk_free(uvm_va_space_t *va_space, uvm_processor_mask_t *test_gpus) ++static NV_STATUS test_cpu_chunk_free(uvm_va_space_t *va_space, ++ const uvm_processor_mask_t *test_gpus) + { + uvm_cpu_chunk_t *chunk; + uvm_chunk_sizes_mask_t alloc_sizes = uvm_cpu_chunk_get_allocation_sizes(); +diff --git a/nvidia-uvm/uvm_pmm_test.c b/nvidia-uvm/uvm_pmm_test.c +index 4c1ee667..71369ea8 100644 +--- a/nvidia-uvm/uvm_pmm_test.c ++++ b/nvidia-uvm/uvm_pmm_test.c +@@ -906,11 +906,11 @@ NV_STATUS uvm_test_pmm_check_leak(UVM_TEST_PMM_CHECK_LEAK_PARAMS *params, struct + return status; + } + +-NV_STATUS __test_pmm_async_alloc_type(uvm_va_space_t *va_space, +- uvm_gpu_t *gpu, +- size_t num_chunks, +- uvm_pmm_gpu_memory_type_t mem_type, +- size_t work_iterations) ++static NV_STATUS __test_pmm_async_alloc_type(uvm_va_space_t *va_space, ++ uvm_gpu_t *gpu, ++ size_t num_chunks, ++ uvm_pmm_gpu_memory_type_t mem_type, ++ size_t work_iterations) + { + NV_STATUS status; + NV_STATUS tracker_status = NV_OK; +diff --git a/nvidia-uvm/uvm_populate_pageable.c b/nvidia-uvm/uvm_populate_pageable.c +index 9c90032e..9e96471a 100644 +--- a/nvidia-uvm/uvm_populate_pageable.c ++++ b/nvidia-uvm/uvm_populate_pageable.c +@@ -53,7 +53,7 @@ static bool is_write_populate(struct vm_area_struct *vma, uvm_populate_permissio + } + } + +-NV_STATUS uvm_handle_fault(struct vm_area_struct *vma, unsigned long start, unsigned long vma_num_pages, bool write) ++static NV_STATUS uvm_handle_fault(struct vm_area_struct *vma, unsigned long start, unsigned long vma_num_pages, bool write) + { + NV_STATUS status = NV_OK; + +-- +2.39.5 + diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/debian/patches/module/0053-fix-more-warnings.patch nvidia-open-gpu-kernel-modules-535.247.01/debian/patches/module/0053-fix-more-warnings.patch --- nvidia-open-gpu-kernel-modules-535.216.01/debian/patches/module/0053-fix-more-warnings.patch 1970-01-01 00:00:00.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/debian/patches/module/0053-fix-more-warnings.patch 2025-05-08 20:10:43.000000000 +0000 @@ -0,0 +1,116 @@ +From 23b987c0d375bf84ea2c1bbe0a16fd6974e3d5c0 Mon Sep 17 00:00:00 2001 +From: Andreas Beckmann +Date: Sun, 8 Dec 2024 22:13:22 +0100 +Subject: [PATCH] fix more warnings + +--- + common/inc/nv-linux.h | 8 ++++++++ + common/inc/nv-proto.h | 2 +- + nvidia/libspdm_aead.c | 2 ++ + nvidia/nv-frontend.c | 2 -- + nvidia/nv-frontend.h | 3 +++ + 5 files changed, 14 insertions(+), 3 deletions(-) + +diff --git a/common/inc/nv-linux.h b/common/inc/nv-linux.h +index 433e5587..54b2ce17 100644 +--- a/common/inc/nv-linux.h ++++ b/common/inc/nv-linux.h +@@ -598,21 +598,27 @@ static NvBool nv_numa_node_has_memory(int node_id) + { \ + (ptr) = kmalloc(size, NV_GFP_KERNEL); \ + if (ptr) \ ++ { \ + NV_MEMDBG_ADD(ptr, size); \ ++ } \ + } + + #define NV_KZALLOC(ptr, size) \ + { \ + (ptr) = kzalloc(size, NV_GFP_KERNEL); \ + if (ptr) \ ++ { \ + NV_MEMDBG_ADD(ptr, size); \ ++ } \ + } + + #define NV_KMALLOC_ATOMIC(ptr, size) \ + { \ + (ptr) = kmalloc(size, NV_GFP_ATOMIC); \ + if (ptr) \ ++ { \ + NV_MEMDBG_ADD(ptr, size); \ ++ } \ + } + + #if defined(__GFP_RETRY_MAYFAIL) +@@ -627,7 +633,9 @@ static NvBool nv_numa_node_has_memory(int node_id) + { \ + (ptr) = kmalloc(size, NV_GFP_NO_OOM); \ + if (ptr) \ ++ { \ + NV_MEMDBG_ADD(ptr, size); \ ++ } \ + } + + #define NV_KFREE(ptr, size) \ +diff --git a/common/inc/nv-proto.h b/common/inc/nv-proto.h +index 815107d2..b016e42e 100644 +--- a/common/inc/nv-proto.h ++++ b/common/inc/nv-proto.h +@@ -43,7 +43,7 @@ void nv_procfs_remove_gpu (nv_linux_state_t *); + + int nvidia_mmap (struct file *, struct vm_area_struct *); + int nvidia_mmap_helper (nv_state_t *, nv_linux_file_private_t *, nvidia_stack_t *, struct vm_area_struct *, void *); +-int nv_encode_caching (pgprot_t *, NvU32, NvU32); ++int nv_encode_caching (pgprot_t *, NvU32, nv_memory_type_t); + void nv_revoke_gpu_mappings_locked(nv_state_t *); + + NvUPtr nv_vm_map_pages (struct page **, NvU32, NvBool, NvBool); +diff --git a/nvidia/libspdm_aead.c b/nvidia/libspdm_aead.c +index 3844da48..18d23ba2 100644 +--- a/nvidia/libspdm_aead.c ++++ b/nvidia/libspdm_aead.c +@@ -38,6 +38,7 @@ struct lkca_aead_ctx + }; + #endif + ++static + int libspdm_aead_prealloc(void **context, char const *alg) + { + #ifndef USE_LKCA +@@ -168,6 +169,7 @@ static int lkca_aead_internal(struct crypto_aead *aead, + } + #endif + ++static + int libspdm_aead_prealloced(void *context, + const uint8_t *key, size_t key_size, + const uint8_t *iv, size_t iv_size, +diff --git a/nvidia/nv-frontend.c b/nvidia/nv-frontend.c +index eb7a2e08..0bdbe02a 100644 +--- a/nvidia/nv-frontend.c ++++ b/nvidia/nv-frontend.c +@@ -48,8 +48,6 @@ struct semaphore nv_module_table_lock; + // minor number table + nvidia_module_t *nv_minor_num_table[NV_FRONTEND_CONTROL_DEVICE_MINOR_MAX + 1]; + +-int nvidia_init_module(void); +-void nvidia_exit_module(void); + + /* EXPORTS to Linux Kernel */ + +diff --git a/nvidia/nv-frontend.h b/nvidia/nv-frontend.h +index 1ce72a0b..ee31f1d5 100644 +--- a/nvidia/nv-frontend.h ++++ b/nvidia/nv-frontend.h +@@ -44,4 +44,7 @@ int nvidia_frontend_remove_device(nvidia_module_t *, nv_linux_state_t *); + + extern nvidia_module_t *nv_minor_num_table[]; + ++int nvidia_init_module(void); ++void nvidia_exit_module(void); ++ + #endif +-- +2.39.5 + diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/debian/patches/module/0054-fix-more-uvm-warnings.patch nvidia-open-gpu-kernel-modules-535.247.01/debian/patches/module/0054-fix-more-uvm-warnings.patch --- nvidia-open-gpu-kernel-modules-535.216.01/debian/patches/module/0054-fix-more-uvm-warnings.patch 1970-01-01 00:00:00.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/debian/patches/module/0054-fix-more-uvm-warnings.patch 2025-05-08 20:10:43.000000000 +0000 @@ -0,0 +1,37 @@ +From 6499352d257956c9458fe5076f1a42d9e44a3ddb Mon Sep 17 00:00:00 2001 +From: Andreas Beckmann +Date: Tue, 10 Dec 2024 20:08:50 +0100 +Subject: [PATCH] fix more uvm warnings + +--- + nvidia-uvm/uvm_gpu.c | 1 + + nvidia-uvm/uvm_tools.c | 1 + + 2 files changed, 2 insertions(+) + +diff --git a/nvidia-uvm/uvm_gpu.c b/nvidia-uvm/uvm_gpu.c +index 9070f444..f7874a9e 100644 +--- a/nvidia-uvm/uvm_gpu.c ++++ b/nvidia-uvm/uvm_gpu.c +@@ -1881,6 +1881,7 @@ uvm_gpu_t *uvm_gpu_get_by_uuid(const NvProcessorUuid *gpu_uuid) + return uvm_gpu_get_by_uuid_locked(gpu_uuid); + } + ++static + uvm_gpu_t *uvm_gpu_get_by_parent_and_swizz_id_locked(uvm_parent_gpu_t *parent_gpu, NvU32 swizz_id) + { + uvm_gpu_t *gpu; +diff --git a/nvidia-uvm/uvm_tools.c b/nvidia-uvm/uvm_tools.c +index 3619a7ce..569c454b 100644 +--- a/nvidia-uvm/uvm_tools.c ++++ b/nvidia-uvm/uvm_tools.c +@@ -35,6 +35,7 @@ + #include "uvm_range_group.h" + #include "uvm_mem.h" + #include "nv_speculation_barrier.h" ++#include "uvm_tools_init.h" + + // We limit the number of times a page can be retained by the kernel + // to prevent the user from maliciously passing UVM tools the same page +-- +2.39.5 + diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/debian/patches/module/0058-backport-warning-fixes-from-565.57.01.patch nvidia-open-gpu-kernel-modules-535.247.01/debian/patches/module/0058-backport-warning-fixes-from-565.57.01.patch --- nvidia-open-gpu-kernel-modules-535.216.01/debian/patches/module/0058-backport-warning-fixes-from-565.57.01.patch 1970-01-01 00:00:00.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/debian/patches/module/0058-backport-warning-fixes-from-565.57.01.patch 2025-05-08 20:10:43.000000000 +0000 @@ -0,0 +1,44 @@ +From cfac14a1b72a623b2021713b354056681936efe7 Mon Sep 17 00:00:00 2001 +From: Andreas Beckmann +Date: Sun, 2 Feb 2025 18:54:55 +0100 +Subject: [PATCH] backport warning fixes from 565.57.01 + +--- + nvidia-uvm/uvm_ats_sva.c | 4 ++-- + nvidia/nvlink_proto.h | 1 - + 2 files changed, 2 insertions(+), 3 deletions(-) + +diff --git a/nvidia-uvm/uvm_ats_sva.c b/nvidia-uvm/uvm_ats_sva.c +index d4b9e2c4..a1256f26 100644 +--- a/nvidia-uvm/uvm_ats_sva.c ++++ b/nvidia-uvm/uvm_ats_sva.c +@@ -127,12 +127,12 @@ static NvU32 smmu_vintf_read32(void __iomem *smmu_cmdqv_base, int reg) + + // We always use VCMDQ127 for the WAR + #define VCMDQ 127 +-void smmu_vcmdq_write32(void __iomem *smmu_cmdqv_base, int reg, NvU32 val) ++static void smmu_vcmdq_write32(void __iomem *smmu_cmdqv_base, int reg, NvU32 val) + { + iowrite32(val, SMMU_VCMDQ_BASE_ADDR(smmu_cmdqv_base, VCMDQ) + reg); + } + +-NvU32 smmu_vcmdq_read32(void __iomem *smmu_cmdqv_base, int reg) ++static NvU32 smmu_vcmdq_read32(void __iomem *smmu_cmdqv_base, int reg) + { + return ioread32(SMMU_VCMDQ_BASE_ADDR(smmu_cmdqv_base, VCMDQ) + reg); + } +diff --git a/nvidia/nvlink_proto.h b/nvidia/nvlink_proto.h +index 356001e4..3e3cffef 100644 +--- a/nvidia/nvlink_proto.h ++++ b/nvidia/nvlink_proto.h +@@ -45,7 +45,6 @@ void nvswitch_exit (void); + */ + int tegrashim_init (void); + void tegrashim_exit (void); +-NvlStatus tegrashim_init_device (struct pci_dev *); + #endif + + #endif /* _NVLINK_PROTO_H_ */ +-- +2.39.5 + diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/debian/patches/module/0059-backport-uvm-warning-fixes-from-550.90.07.patch nvidia-open-gpu-kernel-modules-535.247.01/debian/patches/module/0059-backport-uvm-warning-fixes-from-550.90.07.patch --- nvidia-open-gpu-kernel-modules-535.216.01/debian/patches/module/0059-backport-uvm-warning-fixes-from-550.90.07.patch 1970-01-01 00:00:00.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/debian/patches/module/0059-backport-uvm-warning-fixes-from-550.90.07.patch 2025-05-08 20:10:43.000000000 +0000 @@ -0,0 +1,41 @@ +From a8ca828b7ae6e3cd6fd7d3ae2a4fda04ab1c980d Mon Sep 17 00:00:00 2001 +From: Andreas Beckmann +Date: Wed, 5 Feb 2025 01:54:41 +0100 +Subject: [PATCH] backport uvm warning fixes from 550.90.07 + +--- + nvidia-uvm/uvm_map_external.c | 11 ++++++----- + 1 file changed, 6 insertions(+), 5 deletions(-) + +diff --git a/nvidia-uvm/uvm_map_external.c b/nvidia-uvm/uvm_map_external.c +index 83992951..7c1982c9 100644 +--- a/nvidia-uvm/uvm_map_external.c ++++ b/nvidia-uvm/uvm_map_external.c +@@ -39,6 +39,7 @@ + #include "uvm_pte_batch.h" + #include "uvm_tlb_batch.h" + #include "nv_uvm_interface.h" ++#include "nv_uvm_types.h" + + #include "uvm_pushbuffer.h" + +@@ -101,11 +102,11 @@ static NV_STATUS uvm_pte_buffer_init(uvm_va_range_t *va_range, + + pte_buffer->va_range = va_range; + pte_buffer->gpu = gpu; +- pte_buffer->mapping_info.cachingType = map_rm_params->caching_type; +- pte_buffer->mapping_info.mappingType = map_rm_params->mapping_type; +- pte_buffer->mapping_info.formatType = map_rm_params->format_type; +- pte_buffer->mapping_info.elementBits = map_rm_params->element_bits; +- pte_buffer->mapping_info.compressionType = map_rm_params->compression_type; ++ pte_buffer->mapping_info.cachingType = (UvmRmGpuCachingType) map_rm_params->caching_type; ++ pte_buffer->mapping_info.mappingType = (UvmRmGpuMappingType) map_rm_params->mapping_type; ++ pte_buffer->mapping_info.formatType = (UvmRmGpuFormatType) map_rm_params->format_type; ++ pte_buffer->mapping_info.elementBits = (UvmRmGpuFormatElementBits) map_rm_params->element_bits; ++ pte_buffer->mapping_info.compressionType = (UvmRmGpuCompressionType) map_rm_params->compression_type; + if (va_range->type == UVM_VA_RANGE_TYPE_EXTERNAL) + pte_buffer->mapping_info.mappingPageSize = page_size; + +-- +2.39.5 + diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/debian/patches/module/0060-backport-module_import_ns_takes_string_literal-chang.patch nvidia-open-gpu-kernel-modules-535.247.01/debian/patches/module/0060-backport-module_import_ns_takes_string_literal-chang.patch --- nvidia-open-gpu-kernel-modules-535.216.01/debian/patches/module/0060-backport-module_import_ns_takes_string_literal-chang.patch 1970-01-01 00:00:00.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/debian/patches/module/0060-backport-module_import_ns_takes_string_literal-chang.patch 2025-05-08 20:10:43.000000000 +0000 @@ -0,0 +1,54 @@ +From a46fa4405ec31fbf621696fd99e5dd6d200af156 Mon Sep 17 00:00:00 2001 +From: Andreas Beckmann +Date: Fri, 21 Feb 2025 10:21:42 +0100 +Subject: [PATCH] backport module_import_ns_takes_string_literal changes from + 550.144.03 + +--- + conftest.sh | 17 +++++++++++++++++ + nvidia/nvidia.Kbuild | 1 + + 2 files changed, 18 insertions(+) + +diff --git a/conftest.sh b/conftest.sh +index 70a3d5af..bf61ef7b 100755 +--- a/conftest.sh ++++ b/conftest.sh +@@ -5517,6 +5517,23 @@ compile_test() { + else + echo "#undef NV_OF_PROPERTY_READ_VARIABLE_U32_ARRAY_PRESENT" | append_conftest "functions" + fi ++ ;; ++ ++ module_import_ns_takes_string_literal) ++ # ++ # Determine if the MODULE_IMPORT_NS macro takes a string literal ++ # or constant. ++ # ++ # Commit cdd30ebb1b9f ("module: Convert symbol namespace to ++ # string literal") changed MODULE_IMPORT_NS to take a string ++ # literal in Linux kernel v6.13. ++ # ++ CODE=" ++ #include ++ ++ MODULE_IMPORT_NS(DMA_BUF);" ++ ++ compile_check_conftest "$CODE" "NV_MODULE_IMPORT_NS_TAKES_STRING_LITERAL" "" "functions" + ;; + + devm_of_platform_populate) +diff --git a/nvidia/nvidia.Kbuild b/nvidia/nvidia.Kbuild +index c88538fd..6a72a6c2 100644 +--- a/nvidia/nvidia.Kbuild ++++ b/nvidia/nvidia.Kbuild +@@ -194,6 +194,7 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += get_task_ioprio + NV_CONFTEST_FUNCTION_COMPILE_TESTS += mdev_set_iommu_device + NV_CONFTEST_FUNCTION_COMPILE_TESTS += offline_and_remove_memory + NV_CONFTEST_FUNCTION_COMPILE_TESTS += crypto_tfm_ctx_aligned ++NV_CONFTEST_FUNCTION_COMPILE_TESTS += module_import_ns_takes_string_literal + + NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_gpl_of_node_to_nid + NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_gpl_sme_active +-- +2.39.5 + diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/debian/patches/module/0062-Support-BTF-generation-for-non-release-builds.patch nvidia-open-gpu-kernel-modules-535.247.01/debian/patches/module/0062-Support-BTF-generation-for-non-release-builds.patch --- nvidia-open-gpu-kernel-modules-535.216.01/debian/patches/module/0062-Support-BTF-generation-for-non-release-builds.patch 1970-01-01 00:00:00.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/debian/patches/module/0062-Support-BTF-generation-for-non-release-builds.patch 2025-05-08 20:10:43.000000000 +0000 @@ -0,0 +1,155 @@ +From d70daaaafa22f244641c0b2010ed89de7da2102f Mon Sep 17 00:00:00 2001 +From: Rahul Rameshbabu +Date: Fri, 3 Jan 2025 13:13:31 -0800 +Subject: [PATCH] Support BTF generation for non-release builds + +Generating BTF has a number of benefits for developers such as live debug of +running code without the need to instrument the codebase at compile time or +being able to mutate code execution paths arbitrarily through kfuncs. These +benefits can be used by both our internal developers and company-external +consumers of the open kernel modules wishing to utilize BTF information for +debugging the NVIDIA kernel modules. An added benefit to this change is that +the debug/develop drivers will work out of the box on distributions that do +BTF generation of their kernel images and ship the BTF information in their +base kernel package. + +NVKMS builds kernel components with C++ linkage for its DisplayPort +library. This leads to C++ DWARF tags being included in the debuginfo of +nvidia-modeset.ko. When the linux kernel source directory includes a copy +of vmlinux with BTF information generated, any modules built against that +kernel will require generating BTF information as a target. pahole is used +for BTF information generation. While pahole does support a number of +languages in theory, a number of DWARF tags are not handled by the utility +aside for the C language DWARF tags. This leads to the BTF information +generation target failing for nvidia-modeset.ko due to the C++ dplib DWARF +symbols contained in the module. This failure leads to general NVIDIA +driver installation failures through the NVIDIA installer path. + +Using PAHOLE="pahole --lang_exclude=c++,rust" will not work since the +kernel build system will append a "--lang_exclude=rust" later in the +invocation. pahole overwrites previous flags with conflicting flags passed +later in the invocation. Instead, use awk to wrap the pahole invocation to +manipulate the arguments. awk is already a build system requirement for the +linux kernel, so no new dependencies are added. If "--lang_exclude=rust" is +present in the invocation, convert the flag to "--lang_exclude=c++,rust". +The workaround is designed to enable both closed and open kernel module +builds to succeed when BTF generation is enabled. However, the BTF +generated for the proprietary kernel module builds is limited in comparison +to the open kernel module builds, due to more complex DWARF expression +handling issues in the proprietary builds. + +Cannot use a GNU Make multiline variable to store the awk program due to +the multiline variable causing issues with the sed command used to generate +Makefiles from Makefile.template. + /bin/sh: -c: line 2: unexpected EOF while looking for matching `"' + make-4.3[3]: *** [Makefile:150: modules] Error 2 + make-4.3[2]: *** [/home/binary-eater/Documents/graphics/bugfix_main/drivers/resman/arch/nvalloc/unix/kernel-interface.nvmk:593: build-kernel-modules] Error 2 + make-4.3[1]: *** [/home/binary-eater/Documents/graphics/bugfix_main/drivers/resman/unix.nvmk:331: kernel-interface.submake.build] Error 2 + make-4.3: *** [/home/binary-eater/Documents/graphics/bugfix_main/drivers/common/build/nvCommonRules.nvmk:197: resman.build] Error 2 + +Example kfuncs available from generated BTF: + sudo bpftrace -l '*prime*' + kfunc:nvidia_drm:__nv_drm_gem_nvkms_memory_prime_get_sg_table + kfunc:nvidia_drm:__nv_drm_gem_nvkms_prime_dup + kfunc:nvidia_drm:__nv_drm_gem_nvkms_prime_vmap + kfunc:nvidia_drm:__nv_drm_gem_nvkms_prime_vunmap + kfunc:nvidia_drm:__nv_drm_gem_user_memory_prime_get_sg_table + kfunc:nvidia_drm:__nv_drm_gem_user_memory_prime_vmap + kfunc:nvidia_drm:__nv_drm_gem_user_memory_prime_vunmap + kfunc:nvidia_drm:__nv_drm_prime_fence_context_destroy + kfunc:nvidia_drm:nv_drm_gem_prime_fence_attach_ioctl + kfunc:nvidia_drm:nv_drm_gem_prime_fence_event + kfunc:nvidia_drm:nv_drm_gem_prime_fence_op_enable_signaling + kfunc:nvidia_drm:nv_drm_gem_prime_fence_op_get_timeline_name + kfunc:nvidia_drm:nv_drm_gem_prime_fence_op_release + kfunc:nvidia_drm:nv_drm_gem_prime_fence_op_wait + kfunc:nvidia_drm:nv_drm_gem_prime_force_fence_signal + kfunc:nvidia_drm:nv_drm_gem_prime_get_sg_table + kfunc:nvidia_drm:nv_drm_gem_prime_import + kfunc:nvidia_drm:nv_drm_gem_prime_import_sg_table + kfunc:nvidia_drm:nv_drm_gem_prime_vmap + kfunc:nvidia_drm:nv_drm_gem_prime_vunmap + kfunc:nvidia_drm:nv_drm_prime_fence_context_create_ioctl + + sudo bpftrace -l '*nvkms*' + kfunc:nvidia_modeset:nvkms_alloc + kfunc:nvidia_modeset:nvkms_alloc_ref_ptr + kfunc:nvidia_modeset:nvkms_alloc_timer + kfunc:nvidia_modeset:nvkms_alloc_timer_with_ref_ptr + kfunc:nvidia_modeset:nvkms_allow_write_combining + kfunc:nvidia_modeset:nvkms_call_rm + kfunc:nvidia_modeset:nvkms_close + kfunc:nvidia_modeset:nvkms_close_from_kapi + kfunc:nvidia_modeset:nvkms_close_gpu + kfunc:nvidia_modeset:nvkms_close_pm_unlocked + kfunc:nvidia_modeset:nvkms_config_file_open + kfunc:nvidia_modeset:nvkms_copyin + kfunc:nvidia_modeset:nvkms_copyout + kfunc:nvidia_modeset:nvkms_debug_force_color_space + kfunc:nvidia_modeset:nvkms_dec_ref + kfunc:nvidia_modeset:nvkms_disable_hdmi_frl + + +Example of limited kfuncs available from kernel module builds lacking BTF: + sudo bpftrace -l '*nvkms*' + + +Signed-off-by: Rahul Rameshbabu + +Ignore c++11 in addition to c++. + +Origin: other, https://github.com/NVIDIA/open-gpu-kernel-modules/issues/756 +Origin: other, https://github.com/Binary-Eater/open-gpu-kernel-modules/commit/854449a7b76cdb4ad17919a1c8a662a4ff5b943d +--- + Makefile | 28 +++++++++++++++++++++++++++- + 1 file changed, 27 insertions(+), 1 deletion(-) + +diff --git a/Makefile b/Makefile +index 72672c2a..40a9792e 100644 +--- a/Makefile ++++ b/Makefile +@@ -71,6 +71,31 @@ else + CC ?= cc + LD ?= ld + OBJDUMP ?= objdump ++ AWK ?= awk ++ # Bake the following awk program in a string. The program is needed to add C++ ++ # to the languages excluded from BTF generation. ++ # ++ # Also, unconditionally return success (0) from the awk program, rather than ++ # propagating pahole's return status (with 'exit system(pahole_cmd)'), to ++ # workaround an DW_TAG_rvalue_reference_type error in ++ # kernel/nvidia-modeset.ko. ++ # ++ # BEGIN { ++ # pahole_cmd = "pahole" ++ # for (i = 1; i < ARGC; i++) { ++ # if (ARGV[i] ~ /--lang_exclude=/) { ++ # pahole_cmd = pahole_cmd sprintf(" %s,c++,c++11", ARGV[i]) ++ # } else { ++ # pahole_cmd = pahole_cmd sprintf(" %s", ARGV[i]) ++ # } ++ # } ++ # system(pahole_cmd) ++ # } ++ PAHOLE_AWK_PROGRAM = BEGIN { pahole_cmd = \"pahole\"; for (i = 1; i < ARGC; i++) { if (ARGV[i] ~ /--lang_exclude=/) { pahole_cmd = pahole_cmd sprintf(\" %s,c++,c++11\", ARGV[i]); } else { pahole_cmd = pahole_cmd sprintf(\" %s\", ARGV[i]); } } system(pahole_cmd); } ++ # If scripts/pahole-flags.sh is not present in the kernel tree, add PAHOLE and ++ # PAHOLE_AWK_PROGRAM assignments to PAHOLE_VARIABLES; otherwise assign the ++ # empty string to PAHOLE_VARIABLES. ++ PAHOLE_VARIABLES=$(if $(wildcard $(KERNEL_SOURCES)/scripts/pahole-flags.sh),,"PAHOLE=$(AWK) '$(PAHOLE_AWK_PROGRAM)'") + + ifndef ARCH + ARCH := $(shell uname -m | sed -e 's/i.86/i386/' \ +@@ -109,7 +134,8 @@ else + + .PHONY: modules module clean clean_conftest modules_install + modules clean modules_install: +- @$(MAKE) "LD=$(LD)" "CC=$(CC)" "OBJDUMP=$(OBJDUMP)" $(KBUILD_PARAMS) $@ ++ @$(MAKE) "LD=$(LD)" "CC=$(CC)" "OBJDUMP=$(OBJDUMP)" \ ++ $(PAHOLE_VARIABLES) $(KBUILD_PARAMS) $@ + @if [ "$@" = "modules" ]; then \ + for module in $(NV_KERNEL_MODULES); do \ + if [ -x split-object-file.sh ]; then \ +-- +2.39.5 + diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/debian/patches/module/bashisms.patch nvidia-open-gpu-kernel-modules-535.247.01/debian/patches/module/bashisms.patch --- nvidia-open-gpu-kernel-modules-535.216.01/debian/patches/module/bashisms.patch 2024-11-20 09:22:33.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/debian/patches/module/bashisms.patch 2025-05-08 20:10:43.000000000 +0000 @@ -3,7 +3,7 @@ --- a/conftest.sh +++ b/conftest.sh -@@ -6793,7 +6793,7 @@ case "$5" in +@@ -7023,7 +7023,7 @@ case "$5" in if [ -n "$VGX_BUILD" ]; then if [ -f /proc/xen/capabilities ]; then diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/debian/patches/module/cc_version_check-gcc5.patch nvidia-open-gpu-kernel-modules-535.247.01/debian/patches/module/cc_version_check-gcc5.patch --- nvidia-open-gpu-kernel-modules-535.216.01/debian/patches/module/cc_version_check-gcc5.patch 2024-11-20 09:22:33.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/debian/patches/module/cc_version_check-gcc5.patch 2025-05-08 20:10:43.000000000 +0000 @@ -5,7 +5,7 @@ --- a/conftest.sh +++ b/conftest.sh -@@ -6627,7 +6627,7 @@ case "$5" in +@@ -6857,7 +6857,7 @@ case "$5" in kernel_cc_minor=`echo ${kernel_cc_version} | cut -d '.' -f 2` echo " diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/debian/patches/module/conftest-verbose.patch nvidia-open-gpu-kernel-modules-535.247.01/debian/patches/module/conftest-verbose.patch --- nvidia-open-gpu-kernel-modules-535.216.01/debian/patches/module/conftest-verbose.patch 2024-11-20 09:22:33.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/debian/patches/module/conftest-verbose.patch 2025-05-08 20:10:43.000000000 +0000 @@ -3,7 +3,7 @@ --- a/Kbuild +++ b/Kbuild -@@ -166,6 +166,17 @@ NV_CONFTEST_HEADERS += $(obj)/conftest/h +@@ -180,6 +180,17 @@ NV_CONFTEST_HEADERS += $(obj)/conftest/h NV_CONFTEST_HEADERS += $(NV_CONFTEST_COMPILE_TEST_HEADERS) @@ -21,7 +21,7 @@ # # Generate a header file for a single conftest compile test. Each compile test # header depends on conftest.sh, as well as the generated conftest/headers.h -@@ -190,6 +200,8 @@ define NV_GENERATE_COMPILE_TEST_HEADER +@@ -204,6 +215,8 @@ define NV_GENERATE_COMPILE_TEST_HEADER @mkdir -p $(obj)/conftest @# concatenate /dev/null to prevent cat from hanging when $$^ is empty @cat $$^ /dev/null > $$@ @@ -30,7 +30,7 @@ endef # -@@ -209,9 +221,11 @@ $(eval $(call NV_GENERATE_COMPILE_TEST_H +@@ -223,9 +236,11 @@ $(eval $(call NV_GENERATE_COMPILE_TEST_H $(eval $(call NV_GENERATE_COMPILE_TEST_HEADER,symbols,$(NV_CONFTEST_SYMBOL_COMPILE_TESTS))) $(eval $(call NV_GENERATE_COMPILE_TEST_HEADER,types,$(NV_CONFTEST_TYPE_COMPILE_TESTS))) @@ -43,7 +43,7 @@ # Each of these headers is checked for presence with a test #include; a -@@ -318,6 +332,7 @@ NV_HEADER_PRESENCE_PART = $(addprefix $( +@@ -335,6 +350,7 @@ NV_HEADER_PRESENCE_PART = $(addprefix $( define NV_HEADER_PRESENCE_CHECK $$(call NV_HEADER_PRESENCE_PART,$(1)): $$(NV_CONFTEST_SCRIPT) $(obj)/conftest/uts_release @mkdir -p $$(dir $$@) @@ -51,7 +51,7 @@ @$$(NV_CONFTEST_CMD) test_kernel_header '$$(NV_CONFTEST_CFLAGS)' '$(1)' > $$@ endef -@@ -327,6 +342,8 @@ $(foreach header,$(NV_HEADER_PRESENCE_TE +@@ -344,6 +360,8 @@ $(foreach header,$(NV_HEADER_PRESENCE_TE # Concatenate all of the parts into headers.h. $(obj)/conftest/headers.h: $(call NV_HEADER_PRESENCE_PART,$(NV_HEADER_PRESENCE_TESTS)) @cat $^ > $@ @@ -60,7 +60,7 @@ clean-dirs := $(obj)/conftest -@@ -347,7 +364,8 @@ BUILD_SANITY_CHECKS = \ +@@ -364,7 +382,8 @@ BUILD_SANITY_CHECKS = \ .PHONY: $(BUILD_SANITY_CHECKS) @@ -70,7 +70,7 @@ @$(NV_CONFTEST_CMD) $@ full_output # Perform all sanity checks before generating the conftest headers -@@ -356,11 +374,13 @@ $(NV_CONFTEST_HEADERS): | $(BUILD_SANITY +@@ -373,11 +392,13 @@ $(NV_CONFTEST_HEADERS): | $(BUILD_SANITY # Make the conftest headers depend on the kernel version string diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/debian/patches/module/fragile-ARCH.patch nvidia-open-gpu-kernel-modules-535.247.01/debian/patches/module/fragile-ARCH.patch --- nvidia-open-gpu-kernel-modules-535.216.01/debian/patches/module/fragile-ARCH.patch 2024-11-20 09:22:33.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/debian/patches/module/fragile-ARCH.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,36 +0,0 @@ -Author: Andreas Beckmann -Description: rename the ARCH variable - the NVIDIA build systems reacts very fragile if this contains confusing - values from different sources - ---- a/Makefile -+++ b/Makefile -@@ -56,8 +56,8 @@ else - LD ?= ld - OBJDUMP ?= objdump - -- ifndef ARCH -- ARCH := $(shell uname -m | sed -e 's/i.86/i386/' \ -+ ifndef nvkARCH -+ nvkARCH := $(shell uname -m | sed -e 's/i.86/i386/' \ - -e 's/armv[0-7]\w\+/arm/' \ - -e 's/aarch64/arm64/' \ - -e 's/ppc64le/powerpc/' \ -@@ -74,7 +74,7 @@ else - KBUILD_PARAMS += V=1 - endif - KBUILD_PARAMS += -C $(KERNEL_SOURCES) M=$(CURDIR) -- KBUILD_PARAMS += ARCH=$(ARCH) -+ KBUILD_PARAMS += ARCH=$(nvkARCH) - KBUILD_PARAMS += NV_KERNEL_SOURCES=$(KERNEL_SOURCES) - KBUILD_PARAMS += NV_KERNEL_OUTPUT=$(KERNEL_OUTPUT) - KBUILD_PARAMS += NV_KERNEL_MODULES="$(NV_KERNEL_MODULES)" -@@ -104,7 +104,7 @@ else - # and hence must be used whenever present. - - LD_SCRIPT ?= $(KERNEL_SOURCES)/scripts/module-common.lds \ -- $(KERNEL_SOURCES)/arch/$(ARCH)/kernel/module.lds \ -+ $(KERNEL_SOURCES)/arch/$(nvkARCH)/kernel/module.lds \ - $(KERNEL_OUTPUT)/scripts/module.lds - NV_MODULE_COMMON_SCRIPTS := $(foreach s, $(wildcard $(LD_SCRIPT)), -T $(s)) - diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/debian/patches/module/series nvidia-open-gpu-kernel-modules-535.247.01/debian/patches/module/series --- nvidia-open-gpu-kernel-modules-535.216.01/debian/patches/module/series 2024-11-20 09:22:33.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/debian/patches/module/series 2025-05-08 20:10:43.000000000 +0000 @@ -5,13 +5,17 @@ 0001-bump-minimum-supported-kernel-version-to-3.10.patch 0002-conftest.sh-remove-empty-lines-from-uts_release-outp.patch 0034-fix-typos.patch -0037-import-pfn_valid-w-o-GPL-rcu_read_lock-unlock-from-v.patch 0042-Log-an-error-message-when-nv_mem_client_init-fails-d.patch -0045-let-the-virt_addr_valid-macro-use-nv_pfn_valid-on-pp.patch 0046-backport-nv_get_kern_phys_address-changes-from-555.4.patch +0052-backport-uvm-warning-fixes-from-560.28.03.patch +0053-fix-more-warnings.patch +0054-fix-more-uvm-warnings.patch +0058-backport-warning-fixes-from-565.57.01.patch +0059-backport-uvm-warning-fixes-from-550.90.07.patch +0060-backport-module_import_ns_takes_string_literal-chang.patch +0062-Support-BTF-generation-for-non-release-builds.patch # build system updates -fragile-ARCH.patch conftest-verbose.patch use-kbuild-compiler.patch use-kbuild-flags.patch diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/debian/patches/module/use-kbuild-compiler.patch nvidia-open-gpu-kernel-modules-535.247.01/debian/patches/module/use-kbuild-compiler.patch --- nvidia-open-gpu-kernel-modules-535.216.01/debian/patches/module/use-kbuild-compiler.patch 2024-11-20 09:22:33.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/debian/patches/module/use-kbuild-compiler.patch 2025-05-08 20:10:43.000000000 +0000 @@ -7,20 +7,21 @@ --- a/Makefile +++ b/Makefile -@@ -83,8 +83,8 @@ else +@@ -134,9 +134,9 @@ else .PHONY: modules module clean clean_conftest modules_install modules clean modules_install: -- @$(MAKE) "LD=$(LD)" "CC=$(CC)" "OBJDUMP=$(OBJDUMP)" $(KBUILD_PARAMS) $@ +- @$(MAKE) "LD=$(LD)" "CC=$(CC)" "OBJDUMP=$(OBJDUMP)" \ ++ $(MAKE) \ + $(PAHOLE_VARIABLES) $(KBUILD_PARAMS) $@ - @if [ "$@" = "modules" ]; then \ -+ $(MAKE) $(KBUILD_PARAMS) $@ + @set -x; if [ "$@" = "modules" ]; then \ for module in $(NV_KERNEL_MODULES); do \ if [ -x split-object-file.sh ]; then \ ./split-object-file.sh $$module.ko; \ --- a/Kbuild +++ b/Kbuild -@@ -147,7 +147,7 @@ NV_CONFTEST_SCRIPT := $(src)/conftest.sh +@@ -161,7 +161,7 @@ NV_CONFTEST_SCRIPT := $(src)/conftest.sh NV_CONFTEST_HEADER := $(obj)/conftest/headers.h NV_CONFTEST_CMD := /bin/sh $(NV_CONFTEST_SCRIPT) \ diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/debian/patches/module/use-kbuild-flags.patch nvidia-open-gpu-kernel-modules-535.247.01/debian/patches/module/use-kbuild-flags.patch --- nvidia-open-gpu-kernel-modules-535.216.01/debian/patches/module/use-kbuild-flags.patch 2024-11-20 09:22:33.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/debian/patches/module/use-kbuild-flags.patch 2025-05-08 20:10:43.000000000 +0000 @@ -13,7 +13,7 @@ --- a/Kbuild +++ b/Kbuild -@@ -154,6 +154,16 @@ NV_CFLAGS_FROM_CONFTEST := $(shell $(NV_ +@@ -168,6 +168,16 @@ NV_CFLAGS_FROM_CONFTEST := $(shell $(NV_ NV_CONFTEST_CFLAGS = $(NV_CFLAGS_FROM_CONFTEST) $(EXTRA_CFLAGS) -fno-pie NV_CONFTEST_CFLAGS += $(call cc-disable-warning,pointer-sign) NV_CONFTEST_CFLAGS += $(call cc-option,-fshort-wchar,) @@ -32,7 +32,7 @@ NV_CONFTEST_COMPILE_TEST_HEADERS += $(obj)/conftest/functions.h --- a/nvidia/nvidia.Kbuild +++ b/nvidia/nvidia.Kbuild -@@ -103,7 +103,7 @@ always += $(NVIDIA_INTERFACE) +@@ -100,7 +100,7 @@ always += $(NVIDIA_INTERFACE) always-y += $(NVIDIA_INTERFACE) $(obj)/$(NVIDIA_INTERFACE): $(addprefix $(obj)/,$(NVIDIA_OBJECTS)) @@ -43,7 +43,7 @@ # --- a/Makefile +++ b/Makefile -@@ -118,7 +118,7 @@ else +@@ -171,7 +171,7 @@ else # cannot be defined in the *Kbuild files, which are only used during stage 1. %-linux.o: modules @@ -54,7 +54,7 @@ # Kbuild's "clean" rule won't clean up the conftest headers on its own, and --- a/nvidia-modeset/nvidia-modeset.Kbuild +++ b/nvidia-modeset/nvidia-modeset.Kbuild -@@ -77,7 +77,7 @@ always += $(NVIDIA_MODESET_INTERFACE) +@@ -74,7 +74,7 @@ always += $(NVIDIA_MODESET_INTERFACE) always-y += $(NVIDIA_MODESET_INTERFACE) $(obj)/$(NVIDIA_MODESET_INTERFACE): $(addprefix $(obj)/,$(NVIDIA_MODESET_OBJECTS)) diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/debian/patches/series nvidia-open-gpu-kernel-modules-535.247.01/debian/patches/series --- nvidia-open-gpu-kernel-modules-535.216.01/debian/patches/series 2024-11-20 09:22:33.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/debian/patches/series 1970-01-01 00:00:00.000000000 +0000 @@ -1,3 +0,0 @@ -hardening.patch -typos.patch -hmm.patch diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/debian/patches/series-manual nvidia-open-gpu-kernel-modules-535.247.01/debian/patches/series-manual --- nvidia-open-gpu-kernel-modules-535.216.01/debian/patches/series-manual 1970-01-01 00:00:00.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/debian/patches/series-manual 2025-05-08 20:10:43.000000000 +0000 @@ -0,0 +1,7 @@ +0001-backport-NV_MODULE_IMPORT_NS_TAKES_STRING_LITERAL-ch.patch +hardening.patch +typos.patch +hmm.patch +fix-warnings.patch +implicit-function-declaration.patch +#kernel-flags.patch diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/debian/rules nvidia-open-gpu-kernel-modules-535.247.01/debian/rules --- nvidia-open-gpu-kernel-modules-535.216.01/debian/rules 2024-11-20 09:22:33.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/debian/rules 2025-05-08 20:10:43.000000000 +0000 @@ -8,6 +8,9 @@ # The generated object files are linked into kernel modules. export DEB_BUILD_MAINT_OPTIONS = hardening=-stackprotector +# Avoid warnings on C++ compilation +export DEB_CFLAGS_MAINT_STRIP = -Werror=implicit-function-declaration + include /usr/share/dpkg/pkg-info.mk NVIDIA_RELEASE = $(DEB_VERSION_UPSTREAM) @@ -20,13 +23,8 @@ version = $(NVIDIA_RELEASE) version_major = $(NVIDIA_MAJOR) -ifeq (yes,$(NVIDIA_TESLA)) -variant = $(if $(NVIDIA_TESLA),tesla) -variant_description = $(if $(NVIDIA_TESLA), (Tesla version)) -else variant = $(if $(NVIDIA_TESLA),tesla-$(NVIDIA_TESLA)) variant_description = $(if $(NVIDIA_TESLA), (Tesla $(NVIDIA_TESLA) version)) -endif -variant = $(if $(variant),-$(variant)) nvidia = nvidia$(-variant) current = $(if $(variant),$(variant),current) @@ -52,7 +50,6 @@ TEMPLATE_DIRS = debian TEMPLATE_DIRS += debian/module/debian -TEMPLATE_DIRS += debian/patches/module TEMPLATE_DIRS += debian/tests TEMPLATES := $(filter-out %.modules.in,$(wildcard $(addsuffix /*.in,$(TEMPLATE_DIRS)))) @@ -73,6 +70,7 @@ dh $@ prepare: $(AUTOGEN) + QUILT_PATCHES=debian/patches QUILT_SERIES=series-manual quilt --quiltrc /dev/null push -a || test $$? = 2 cd kernel-open && QUILT_PATCHES=../debian/patches/module quilt --quiltrc /dev/null push -a || test $$? = 2 override_dh_auto_build: prepare @@ -95,8 +93,10 @@ $(RM) -r test-module-build execute_after_dh_clean: - if [ -f debian/patches/module/series ]; then cd kernel-open && QUILT_PATCHES=../debian/patches/module quilt --quiltrc /dev/null pop -a || test $$? = 2 ; fi + cd kernel-open && QUILT_PATCHES=../debian/patches/module quilt --quiltrc /dev/null pop -a || test $$? = 2 $(RM) -r kernel-open/.pc/ + test ! -s .pc/applied-patches || quilt --quiltrc /dev/null pop -a || test $$? = 2 + $(RM) -r .pc/ $(RM) $(filter-out $(AUTOKEEP),$(AUTOGEN)) $(if $(AUTOKEEP),$(MAKE) -f debian/rules $(AUTOKEEP)) @@ -124,18 +124,19 @@ module_source_pkg = debian/$(nvidia-kernel)-source module_source_txz = $(module_source_pkg)/usr/src/$(nvidia-kernel).tar.xz + pack-module-source: find $(module_source_pkg) \( -name '*.binary' -o -name '*.in' -not -name '*.modules.in' \) -print -delete tar c \ -C $(module_source_pkg)/usr/src \ - --owner=root --group=root \ + --numeric-owner --owner=0 --group=0 \ --mode=a=rX,u+w \ --sort=name \ --mtime='@$(SOURCE_DATE_EPOCH)' \ --clamp-mtime \ + --remove-files \ modules | xz -T1 -9 > $(module_source_txz) tar tfv $(module_source_txz) - $(RM) -r $(module_source_pkg)/usr/src/modules ############################################################################ diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/debian/rules.defs nvidia-open-gpu-kernel-modules-535.247.01/debian/rules.defs --- nvidia-open-gpu-kernel-modules-535.216.01/debian/rules.defs 2024-11-20 09:22:33.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/debian/rules.defs 2025-05-08 20:10:43.000000000 +0000 @@ -1,6 +1,6 @@ LINUX_KMOD_VARIANT = -open -LINUX_KMOD_TESTED = 6.11 +LINUX_KMOD_TESTED = 6.14 ARCH_LIST = amd64 ARCH_LIST += arm64 diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/Kbuild nvidia-open-gpu-kernel-modules-535.247.01/kernel-open/Kbuild --- nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/Kbuild 2024-09-17 17:05:59.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/kernel-open/Kbuild 2025-03-26 06:23:16.000000000 +0000 @@ -57,6 +57,20 @@ UBSAN_SANITIZE := y endif +# +# Command to create a symbolic link, explicitly resolving the symlink target +# to an absolute path to abstract away the difference between Linux < 6.13, +# where the CWD is the Linux kernel source tree for Kbuild extmod builds, and +# Linux >= 6.13, where the CWD is the external module source tree. +# +# This is used to create the nv*-kernel.o -> nv*-kernel.o_binary symlinks for +# kernel modules which use precompiled binary object files. +# + +quiet_cmd_symlink = SYMLINK $@ + cmd_symlink = ln -sf $(abspath $<) $@ + + $(foreach _module, $(NV_KERNEL_MODULES), \ $(eval include $(src)/$(_module)/$(_module).Kbuild)) @@ -72,7 +86,7 @@ EXTRA_CFLAGS += -I$(src) EXTRA_CFLAGS += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-error -Wno-format-extra-args EXTRA_CFLAGS += -D__KERNEL__ -DMODULE -DNVRM -EXTRA_CFLAGS += -DNV_VERSION_STRING=\"535.216.01\" +EXTRA_CFLAGS += -DNV_VERSION_STRING=\"535.247.01\" ifneq ($(SYSSRCHOST1X),) EXTRA_CFLAGS += -I$(SYSSRCHOST1X) @@ -242,10 +256,12 @@ drm/drm_device.h \ drm/drm_mode_config.h \ drm/drm_modeset_lock.h \ + drm/drm_client_setup.h \ dt-bindings/interconnect/tegra_icc_id.h \ generated/autoconf.h \ generated/compile.h \ generated/utsrelease.h \ + linux/aperture.h \ linux/efi.h \ linux/kconfig.h \ linux/platform/tegra/mc_utils.h \ @@ -308,7 +324,8 @@ soc/tegra/bpmp-abi.h \ soc/tegra/bpmp.h \ linux/cc_platform.h \ - asm/cpufeature.h + asm/cpufeature.h \ + crypto/sig.h # Filename to store the define for the header in $(1); this is only consumed by # the rule below that concatenates all of these together. diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/Makefile nvidia-open-gpu-kernel-modules-535.247.01/kernel-open/Makefile --- nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/Makefile 2024-09-17 17:05:59.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/kernel-open/Makefile 2025-03-26 06:23:16.000000000 +0000 @@ -52,6 +52,22 @@ endif endif + # If CC hasn't been set explicitly, check the value of CONFIG_CC_VERSION_TEXT. + # Look for the compiler specified there, and use it by default, if found. + ifeq ($(origin CC),default) + cc_version_text=$(firstword $(shell . $(KERNEL_OUTPUT)/.config; \ + echo "$$CONFIG_CC_VERSION_TEXT")) + + ifneq ($(cc_version_text),) + ifeq ($(shell command -v $(cc_version_text)),) + $(warning WARNING: Unable to locate the compiler $(cc_version_text) \ + from CONFIG_CC_VERSION_TEXT in the kernel configuration.) + else + CC=$(cc_version_text) + endif + endif + endif + CC ?= cc LD ?= ld OBJDUMP ?= objdump @@ -64,6 +80,16 @@ ) endif + KERNEL_ARCH = $(ARCH) + + ifneq ($(filter $(ARCH),i386 x86_64),) + KERNEL_ARCH = x86 + else + ifeq ($(filter $(ARCH),arm64 powerpc),) + $(error Unsupported architecture $(ARCH)) + endif + endif + NV_KERNEL_MODULES ?= $(wildcard nvidia nvidia-uvm nvidia-vgpu-vfio nvidia-modeset nvidia-drm nvidia-peermem) NV_KERNEL_MODULES := $(filter-out $(NV_EXCLUDE_KERNEL_MODULES), \ $(NV_KERNEL_MODULES)) @@ -103,8 +129,9 @@ # module symbols on which the Linux kernel's module resolution is dependent # and hence must be used whenever present. - LD_SCRIPT ?= $(KERNEL_SOURCES)/scripts/module-common.lds \ - $(KERNEL_SOURCES)/arch/$(ARCH)/kernel/module.lds \ + LD_SCRIPT ?= $(KERNEL_SOURCES)/scripts/module-common.lds \ + $(KERNEL_SOURCES)/arch/$(KERNEL_ARCH)/kernel/module.lds \ + $(KERNEL_OUTPUT)/arch/$(KERNEL_ARCH)/module.lds \ $(KERNEL_OUTPUT)/scripts/module.lds NV_MODULE_COMMON_SCRIPTS := $(foreach s, $(wildcard $(LD_SCRIPT)), -T $(s)) diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/common/inc/nvmisc.h nvidia-open-gpu-kernel-modules-535.247.01/kernel-open/common/inc/nvmisc.h --- nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/common/inc/nvmisc.h 2024-09-17 17:05:35.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/kernel-open/common/inc/nvmisc.h 2025-03-26 06:23:00.000000000 +0000 @@ -695,6 +695,42 @@ } // +// Bug 4851259: Newly added functions must be hidden from certain HS-signed +// ucode compilers to avoid signature mismatch. +// +#ifndef NVDEC_1_0 +/*! + * Returns the position of nth set bit in the given mask. + * + * Returns -1 if mask has fewer than n bits set. + * + * n is 0 indexed and has valid values 0..31 inclusive, so "zeroth" set bit is + * the first set LSB. + * + * Example, if mask = 0x000000F0u and n = 1, the return value will be 5. + * Example, if mask = 0x000000F0u and n = 4, the return value will be -1. + */ +static NV_FORCEINLINE NvS32 +nvGetNthSetBitIndex32(NvU32 mask, NvU32 n) +{ + NvU32 seenSetBitsCount = 0; + NvS32 index; + FOR_EACH_INDEX_IN_MASK(32, index, mask) + { + if (seenSetBitsCount == n) + { + return index; + } + ++seenSetBitsCount; + } + FOR_EACH_INDEX_IN_MASK_END; + + return -1; +} + +#endif // NVDEC_1_0 + +// // Size to use when declaring variable-sized arrays // #define NV_ANYSIZE_ARRAY 1 diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/conftest.sh nvidia-open-gpu-kernel-modules-535.247.01/kernel-open/conftest.sh --- nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/conftest.sh 2024-09-17 16:26:52.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/kernel-open/conftest.sh 2025-03-26 05:51:40.000000000 +0000 @@ -2475,6 +2475,22 @@ fi ;; + file_operations_fop_unsigned_offset_present) + # + # Determine if the FOP_UNSIGNED_OFFSET define is present. + # + # Added by commit 641bb4394f40 ("fs: move FMODE_UNSIGNED_OFFSET to + # fop_flags") in v6.12. + # + CODE=" + #include + int conftest_file_operations_fop_unsigned_offset_present(void) { + return FOP_UNSIGNED_OFFSET; + }" + + compile_check_conftest "$CODE" "NV_FILE_OPERATIONS_FOP_UNSIGNED_OFFSET_PRESENT" "" "types" + ;; + mm_context_t) # # Determine if the 'mm_context_t' data type is present @@ -5215,6 +5231,45 @@ compile_check_conftest "$CODE" "NV_FOLLOW_PFN_PRESENT" "" "functions" ;; + + follow_pte_arg_vma) + # + # Determine if the first argument of follow_pte is + # mm_struct or vm_area_struct. + # + # The first argument was changed from mm_struct to vm_area_struct by + # commit 29ae7d96d166 ("mm: pass VMA instead of MM to follow_pte()") + # + CODE=" + #include + + typeof(follow_pte) conftest_follow_pte_has_vma_arg; + int conftest_follow_pte_has_vma_arg(struct vm_area_struct *vma, + unsigned long address, + pte_t **ptep, + spinlock_t **ptl) { + return 0; + }" + + compile_check_conftest "$CODE" "NV_FOLLOW_PTE_ARG1_VMA" "" "types" + ;; + + ptep_get) + # + # Determine if ptep_get() is present. + # + # ptep_get() was added by commit 481e980a7c19 + # ("mm: Allow arches to provide ptep_get()") + # + CODE=" + #include + void conftest_ptep_get(void) { + ptep_get(); + }" + + compile_check_conftest "$CODE" "NV_PTEP_GET_PRESENT" "" "functions" + ;; + drm_plane_atomic_check_has_atomic_state_arg) # # Determine if drm_plane_helper_funcs::atomic_check takes 'state' @@ -6109,6 +6164,32 @@ compile_check_conftest "$CODE" "NV_NUM_REGISTERED_FB_PRESENT" "" "types" ;; + acpi_video_register_backlight) + # + # Determine if acpi_video_register_backlight() function is present + # + # acpi_video_register_backlight was added by commit 3dbc80a3e4c55c + # (ACPI: video: Make backlight class device registration a separate + # step (v2)) for v6.0 (2022-09-02). + # Note: the include directive for in this conftest is + # necessary in order to support kernels between commit 0b9f7d93ca61 + # ("ACPI / i915: ignore firmware requests backlight change") for + # v3.16 (2014-07-07) and commit 3bd6bce369f5 ("ACPI / video: Port + # to new backlight interface selection API") for v4.2 (2015-07-16). + # Kernels within this range use the 'bool' type and the related + # 'false' value in without first including the + # definitions of that type and value. + # + CODE=" + #include + #include + void conftest_acpi_video_register_backlight(void) { + acpi_video_register_backlight(0); + }" + + compile_check_conftest "$CODE" "NV_ACPI_VIDEO_REGISTER_BACKLIGHT" "" "functions" + ;; + acpi_video_backlight_use_native) # # Determine if acpi_video_backlight_use_native() function is present @@ -6362,6 +6443,25 @@ compile_check_conftest "$CODE" "NV_MEMORY_FAILURE_MF_SW_SIMULATED_DEFINED" "" "types" ;; + drm_client_setup) + # + # Determine whether drm_client_setup is present. + # + # Added by commit d07fdf922592 ("drm/fbdev-ttm: + # Convert to client-setup") in v6.13. + # + CODE=" + #include + #if defined(NV_DRM_DRM_CLIENT_SETUP_H_PRESENT) + #include + #endif + void conftest_drm_client_setup(void) { + drm_client_setup(); + }" + + compile_check_conftest "$CODE" "NV_DRM_CLIENT_SETUP_PRESENT" "" "functions" + ;; + drm_output_poll_changed) # # Determine whether drm_mode_config_funcs.output_poll_changed @@ -6385,6 +6485,38 @@ compile_check_conftest "$CODE" "NV_DRM_OUTPUT_POLL_CHANGED_PRESENT" "" "types" ;; + aperture_remove_conflicting_devices) + # + # Determine whether aperture_remove_conflicting_devices is present. + # + # Added by commit 7283f862bd991 ("drm: Implement DRM aperture + # helpers under video/") in v6.0 + CODE=" + #if defined(NV_LINUX_APERTURE_H_PRESENT) + #include + #endif + void conftest_aperture_remove_conflicting_devices(void) { + aperture_remove_conflicting_devices(); + }" + compile_check_conftest "$CODE" "NV_APERTURE_REMOVE_CONFLICTING_DEVICES_PRESENT" "" "functions" + ;; + + aperture_remove_conflicting_pci_devices) + # + # Determine whether aperture_remove_conflicting_pci_devices is present. + # + # Added by commit 7283f862bd991 ("drm: Implement DRM aperture + # helpers under video/") in v6.0 + CODE=" + #if defined(NV_LINUX_APERTURE_H_PRESENT) + #include + #endif + void conftest_aperture_remove_conflicting_pci_devices(void) { + aperture_remove_conflicting_pci_devices(); + }" + compile_check_conftest "$CODE" "NV_APERTURE_REMOVE_CONFLICTING_PCI_DEVICES_PRESENT" "" "functions" + ;; + crypto_tfm_ctx_aligned) # Determine if 'crypto_tfm_ctx_aligned' is defined. # @@ -6406,17 +6538,17 @@ # This test is not complete and may return false positive. # CODE=" - #include - #include - #include - #include - #include - #include - #include - #include - #include - #include - #include + #include + #include + #include + #include + #include + #include + #include + #include + #include + #include + #include void conftest_crypto(void) { struct shash_desc sd; struct crypto_shash cs; @@ -6426,6 +6558,47 @@ compile_check_conftest "$CODE" "NV_CRYPTO_PRESENT" "" "symbols" ;; + crypto_akcipher_verify) + # + # Determine whether the crypto_akcipher_verify API is still present. + # It was removed by commit 6b34562 ('crypto: akcipher - Drop sign/verify operations') + # in v6.13-rc1 (2024-10-04). + # + # This test is dependent on the crypto conftest to determine whether crypto should be + # enabled at all. That means that if the kernel is old enough such that crypto_akcipher_verify + # + # The test merely checks for the presence of the API, as it assumes that if the API + # is no longer present, the new API to replace it (crypto_sig_verify) must be present. + # If the kernel version is too old to have crypto_akcipher_verify, it will fail the crypto + # conftest above and all crypto code will be compiled out. + # + CODE=" + #include + #include + void conftest_crypto_akcipher_verify(void) { + (void)crypto_akcipher_verify; + }" + + compile_check_conftest "$CODE" "NV_CRYPTO_AKCIPHER_VERIFY_PRESENT" "" "symbols" + ;; + + ecc_digits_from_bytes) + # + # Determine whether ecc_digits_from_bytes is present. + # It was added in commit c6ab5c915da4 ('crypto: ecc - Prevent ecc_digits_from_bytes from + # reading too many bytes') in v6.10. + # + # This functionality is needed when crypto_akcipher_verify is not present. + # + CODE=" + #include + void conftest_ecc_digits_from_bytes(void) { + (void)ecc_digits_from_bytes; + }" + + compile_check_conftest "$CODE" "NV_ECC_DIGITS_FROM_BYTES_PRESENT" "" "symbols" + ;; + mempolicy_has_unified_nodes) # # Determine if the 'mempolicy' structure has @@ -6514,6 +6687,63 @@ compile_check_conftest "$CODE" "NV_DRM_UNLOCKED_IOCTL_FLAG_PRESENT" "" "types" ;; + folio_test_swapcache) + # + # Determine if the folio_test_swapcache() function is present. + # + # folio_test_swapcache() was exported by commit d389a4a811551 ("mm: + # Add folio flag manipulation functions") in v5.16. + # + CODE=" + #include + void conftest_folio_test_swapcache(void) { + folio_test_swapcache(); + }" + + compile_check_conftest "$CODE" "NV_FOLIO_TEST_SWAPCACHE_PRESENT" "" "functions" + ;; + + module_import_ns_takes_constant) + # + # Determine if the MODULE_IMPORT_NS macro takes a string literal + # or constant. + # + # Commit cdd30ebb1b9f ("module: Convert symbol namespace to + # string literal") changed MODULE_IMPORT_NS to take a string + # literal in Linux kernel v6.13. + # + CODE=" + #include + + MODULE_IMPORT_NS(DMA_BUF);" + + compile_check_conftest "$CODE" "NV_MODULE_IMPORT_NS_TAKES_CONSTANT" "" "generic" + ;; + + drm_driver_has_date) + # + # Determine if the 'drm_driver' structure has a 'date' field. + # + # Removed by commit cb2e1c2136f7 ("drm: remove driver date from + # struct drm_driver and all drivers") in linux-next, expected in + # v6.14. + # + CODE=" + #if defined(NV_DRM_DRMP_H_PRESENT) + #include + #endif + + #if defined(NV_DRM_DRM_DRV_H_PRESENT) + #include + #endif + + int conftest_drm_driver_has_date(void) { + return offsetof(struct drm_driver, date); + }" + + compile_check_conftest "$CODE" "NV_DRM_DRIVER_HAS_DATE" "" "types" + ;; + # When adding a new conftest entry, please use the correct format for # specifying the relevant upstream Linux kernel commit. # diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/nvidia/internal_crypt_lib.h nvidia-open-gpu-kernel-modules-535.247.01/kernel-open/nvidia/internal_crypt_lib.h --- nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/nvidia/internal_crypt_lib.h 2024-09-17 17:05:32.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/kernel-open/nvidia/internal_crypt_lib.h 2025-03-26 06:22:58.000000000 +0000 @@ -63,7 +63,9 @@ * old or even just user disabled. If we should use LKCA, include headers, else * define stubs to return errors. */ -#if defined(NV_CRYPTO_PRESENT) && defined (NV_CONFIG_CRYPTO_PRESENT) +#if defined(NV_CRYPTO_PRESENT) && defined (NV_CONFIG_CRYPTO_PRESENT) && \ + (defined(NV_CRYPTO_AKCIPHER_VERIFY_PRESENT) || \ + (defined(NV_CRYPTO_SIG_H_PRESENT) && defined(NV_ECC_DIGITS_FROM_BYTES_PRESENT))) #define USE_LKCA 1 #endif diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/nvidia/libspdm_ecc.c nvidia-open-gpu-kernel-modules-535.247.01/kernel-open/nvidia/libspdm_ecc.c --- nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/nvidia/libspdm_ecc.c 2024-09-17 17:05:32.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/kernel-open/nvidia/libspdm_ecc.c 2025-03-26 06:22:58.000000000 +0000 @@ -1,5 +1,5 @@ /* -* SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +* SPDX-FileCopyrightText: Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining a @@ -30,14 +30,26 @@ #include #include #include +#ifndef NV_CRYPTO_AKCIPHER_VERIFY_PRESENT +#include + +struct signature +{ + u64 r[ECC_MAX_DIGITS]; + u64 s[ECC_MAX_DIGITS]; +}; +#endif // NV_CRYPTO_AKCIPHER_VERIFY_PRESENT + +#define ECDSA_PUBKEY_HEADER_XY_PRESENT (0x4) struct ecc_ctx { unsigned int curve_id; u64 priv_key[ECC_MAX_DIGITS]; // In big endian struct { - // ecdsa wants byte preceding pub_key to be set to '4' - u64 pub_key_prefix; + // ecdsa pubkey has header indicating length of pubkey + u8 padding[7]; + u8 pub_key_prefix; u64 pub_key[2 * ECC_MAX_DIGITS]; }; @@ -46,7 +58,7 @@ char const *name; int size; }; -#endif +#endif // USE_LKCA void *libspdm_ec_new_by_nid(size_t nid) { @@ -77,7 +89,7 @@ ctx->priv_key_set = false; return ctx; -#endif +#endif // USE_LKCA } void libspdm_ec_free(void *ec_context) @@ -109,7 +121,7 @@ ctx->pub_key_set = true; ctx->priv_key_set = true; return true; -#endif +#endif // USE_LKCA } bool lkca_ec_set_pub_key(void *ec_context, const uint8_t *public_key, @@ -139,7 +151,7 @@ memcpy(ctx->pub_key, public_key, public_key_size); ctx->pub_key_set = true; return true; -#endif +#endif // USE_LKCA } bool lkca_ec_get_pub_key(void *ec_context, uint8_t *public_key, @@ -158,7 +170,7 @@ memcpy(public_key, ctx->pub_key, ctx->size); return true; -#endif +#endif // USE_LKCA } bool lkca_ec_generate_key(void *ec_context, uint8_t *public_data, @@ -185,7 +197,7 @@ ctx->pub_key_set = true; return true; -#endif +#endif // USE_LKCA } bool lkca_ec_compute_key(void *ec_context, const uint8_t *peer_public, @@ -218,28 +230,87 @@ *key_size = ctx->size / 2; return true; -#endif +#endif // USE_LKCA } -bool lkca_ecdsa_verify(void *ec_context, size_t hash_nid, - const uint8_t *message_hash, size_t hash_size, - const uint8_t *signature, size_t sig_size) +#ifndef NV_CRYPTO_AKCIPHER_VERIFY_PRESENT +static bool lkca_ecdsa_verify_crypto_sig(void *ec_context, size_t hash_nid, + const uint8_t *message_hash, size_t hash_size, + const uint8_t *signature, size_t sig_size) { #ifndef USE_LKCA return false; -#else +#else // USE_LKCA + struct ecc_ctx *ctx = ec_context; + u8 *pub_key; + int err; + DECLARE_CRYPTO_WAIT(wait); + struct crypto_sig * tfm = NULL; + struct signature sig; + + if (sig_size != ctx->size || !ctx->pub_key_set) + { + return false; + } + + tfm = crypto_alloc_sig(ctx->name, CRYPTO_ALG_TYPE_SIG, 0); + if (IS_ERR(tfm)) { + pr_info("crypto_alloc_sig failed in lkca_ecdsa_verify\n"); + return false; + } + + // modify header of pubkey to indicate size + pub_key = (u8 *) &(ctx->pub_key_prefix); + *pub_key = ECDSA_PUBKEY_HEADER_XY_PRESENT; + err = crypto_sig_set_pubkey(tfm, pub_key, ctx->size + 1); + if (err != 0) + { + pr_info("crypto_sig_set_pubkey failed in lkca_ecdsa_verify: %d", -err); + goto failTfm; + } + + // + // Compared to the way we receive the signature, we need to: + // - swap order of all digits + // - swap endianness for each digit + // + memset(&sig, 0, sizeof(sig)); + ecc_digits_from_bytes(signature, ctx->size/2, sig.r, ECC_MAX_DIGITS); + ecc_digits_from_bytes(signature + ctx->size/2, ctx->size/2, sig.s, ECC_MAX_DIGITS); + + err = crypto_sig_verify(tfm, (void *)&sig, sizeof(sig), message_hash, hash_size); + if (err != 0) + { + pr_info("crypto_sig_verify failed in lkca_ecdsa_verify %d\n", -err); + } + +failTfm: + crypto_free_sig(tfm); + + return err == 0; +#endif // USE_LKCA +} + +#else // NV_CRYPTO_AKCIPHER_VERIFY_PRESENT +static bool lkca_ecdsa_verify_akcipher(void *ec_context, size_t hash_nid, + const uint8_t *message_hash, size_t hash_size, + const uint8_t *signature, size_t sig_size) +{ +#ifndef USE_LKCA + return false; +#else // USE_LKCA struct ecc_ctx *ctx = ec_context; + u8 *pub_key; + int err; + DECLARE_CRYPTO_WAIT(wait); // Roundabout way u64 ber_max_len = 3 + 2 * (4 + (ECC_MAX_BYTES)); u64 ber_len = 0; u8 *ber = NULL; - u8 *pub_key; struct akcipher_request *req = NULL; struct crypto_akcipher *tfm = NULL; struct scatterlist sg; - DECLARE_CRYPTO_WAIT(wait); - int err; if (sig_size != ctx->size) { return false; @@ -251,21 +322,21 @@ tfm = crypto_alloc_akcipher(ctx->name, CRYPTO_ALG_TYPE_AKCIPHER, 0); if (IS_ERR(tfm)) { - pr_info("ALLOC FAILED\n"); + pr_info("crypto_alloc_akcipher failed in lkca_ecdsa_verify\n"); return false; } - pub_key = (u8 *) ctx->pub_key; - pub_key--; // Go back into byte of pub_key_prefix - *pub_key = 4; // And set it to 4 to placate kernel + // modify header of pubkey to indicate size + pub_key = (u8 *) &(ctx->pub_key_prefix); + *pub_key = ECDSA_PUBKEY_HEADER_XY_PRESENT; if ((err = crypto_akcipher_set_pub_key(tfm, pub_key, ctx->size + 1)) != 0) { - pr_info("SET PUB KEY FAILED: %d\n", -err); + pr_info("crypto_akcipher_set_pub_key failed in lkca_ecdsa_verify: %d\n", -err); goto failTfm; } req = akcipher_request_alloc(tfm, GFP_KERNEL); if (IS_ERR(req)) { - pr_info("REQUEST ALLOC FAILED\n"); + pr_info("akcipher_request_alloc failed in lkca_ecdsa_verify\n"); goto failTfm; } @@ -310,9 +381,8 @@ CRYPTO_TFM_REQ_MAY_SLEEP, crypto_req_done, &wait); akcipher_request_set_crypt(req, &sg, NULL, ber_len, hash_size); err = crypto_wait_req(crypto_akcipher_verify(req), &wait); - if (err != 0){ - pr_info("Verify FAILED %d\n", -err); + pr_info("crypto_akcipher_verify failed in lkca_ecdsa_verify %d\n", -err); } kfree(ber); @@ -322,5 +392,19 @@ crypto_free_akcipher(tfm); return err == 0; -#endif +#endif // USE_LKCA +} +#endif // NV_CRYPTO_AKCIPHER_VERIFY_PRESENT + +bool lkca_ecdsa_verify(void *ec_context, size_t hash_nid, + const uint8_t *message_hash, size_t hash_size, + const uint8_t *signature, size_t sig_size) +{ +#ifndef NV_CRYPTO_AKCIPHER_VERIFY_PRESENT + return lkca_ecdsa_verify_crypto_sig(ec_context, hash_nid, message_hash, hash_size, + signature, sig_size); +#else // NV_CRYPTO_AKCIPHER_VERIFY_PRESENT + return lkca_ecdsa_verify_akcipher(ec_context, hash_nid, message_hash, hash_size, + signature, sig_size); +#endif // NV_CRYPTO_AKCIPHER_VERIFY_PRESENT } diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/nvidia/nv-mmap.c nvidia-open-gpu-kernel-modules-535.247.01/kernel-open/nvidia/nv-mmap.c --- nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/nvidia/nv-mmap.c 2024-09-17 17:05:26.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/kernel-open/nvidia/nv-mmap.c 2025-03-26 06:22:55.000000000 +0000 @@ -143,6 +143,10 @@ return -EINVAL; } + if (write && !(mmap_context->prot & NV_PROTECT_WRITEABLE)) + { + return -EACCES; + } offset = mmap_context->mmap_start; if (nv->flags & NV_FLAG_CONTROL) diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/nvidia/nvidia.Kbuild nvidia-open-gpu-kernel-modules-535.247.01/kernel-open/nvidia/nvidia.Kbuild --- nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/nvidia/nvidia.Kbuild 2024-09-17 17:05:26.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/kernel-open/nvidia/nvidia.Kbuild 2025-03-26 06:22:54.000000000 +0000 @@ -40,9 +40,6 @@ NVIDIA_BINARY_OBJECT := $(src)/nvidia/nv-kernel.o_binary NVIDIA_BINARY_OBJECT_O := nvidia/nv-kernel.o -quiet_cmd_symlink = SYMLINK $@ - cmd_symlink = ln -sf $< $@ - targets += $(NVIDIA_BINARY_OBJECT_O) $(obj)/$(NVIDIA_BINARY_OBJECT_O): $(NVIDIA_BINARY_OBJECT) FORCE @@ -162,6 +159,7 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += cc_platform_has NV_CONFTEST_FUNCTION_COMPILE_TESTS += seq_read_iter NV_CONFTEST_FUNCTION_COMPILE_TESTS += follow_pfn +NV_CONFTEST_FUNCTION_COMPILE_TESTS += ptep_get NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_gem_object_get NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_gem_object_put_unlocked NV_CONFTEST_FUNCTION_COMPILE_TESTS += add_memory_driver_managed @@ -228,7 +226,11 @@ NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_tsec_comms_free_gscco_mem NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_memory_block_size_bytes NV_CONFTEST_SYMBOL_COMPILE_TESTS += crypto +NV_CONFTEST_SYMBOL_COMPILE_TESTS += crypto_akcipher_verify NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_follow_pte +NV_CONFTEST_SYMBOL_COMPILE_TESTS += follow_pte_arg_vma +NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_follow_pfnmap_start +NV_CONFTEST_SYMBOL_COMPILE_TESTS += ecc_digits_from_bytes NV_CONFTEST_TYPE_COMPILE_TESTS += dma_ops NV_CONFTEST_TYPE_COMPILE_TESTS += swiotlb_dma_ops @@ -271,3 +273,4 @@ NV_CONFTEST_GENERIC_COMPILE_TESTS += cmd_uphy_display_port_init NV_CONFTEST_GENERIC_COMPILE_TESTS += cmd_uphy_display_port_off NV_CONFTEST_GENERIC_COMPILE_TESTS += memory_failure_mf_sw_simulated_defined +NV_CONFTEST_GENERIC_COMPILE_TESTS += module_import_ns_takes_constant diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/nvidia/os-mlock.c nvidia-open-gpu-kernel-modules-535.247.01/kernel-open/nvidia/os-mlock.c --- nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/nvidia/os-mlock.c 2024-09-17 17:05:27.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/kernel-open/nvidia/os-mlock.c 2025-03-26 06:22:55.000000000 +0000 @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1999-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1999-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining a @@ -32,14 +32,27 @@ #define NV_NUM_PIN_PAGES_PER_ITERATION 0x80000 #endif -static inline int nv_follow_pfn(struct vm_area_struct *vma, - unsigned long address, - unsigned long *pfn) +static inline int nv_follow_flavors(struct vm_area_struct *vma, + unsigned long address, + unsigned long *pfn) { -#if defined(NV_FOLLOW_PFN_PRESENT) - return follow_pfn(vma, address, pfn); -#else -#if NV_IS_EXPORT_SYMBOL_PRESENT_follow_pte +#if NV_IS_EXPORT_SYMBOL_PRESENT_follow_pfnmap_start + struct follow_pfnmap_args args = {}; + int rc; + + args.address = address; + args.vma = vma; + + rc = follow_pfnmap_start(&args); + if (rc) + return rc; + + *pfn = args.pfn; + + follow_pfnmap_end(&args); + + return 0; +#elif NV_IS_EXPORT_SYMBOL_PRESENT_follow_pte int status = 0; spinlock_t *ptl; pte_t *ptep; @@ -47,17 +60,40 @@ if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) return status; + // + // The first argument of follow_pte() was changed from + // mm_struct to vm_area_struct in kernel 6.10. + // +#if defined(NV_FOLLOW_PTE_ARG1_VMA) status = follow_pte(vma, address, &ptep, &ptl); +#else + status = follow_pte(vma->vm_mm, address, &ptep, &ptl); +#endif if (status) return status; + +#if defined(NV_PTEP_GET_PRESENT) *pfn = pte_pfn(ptep_get(ptep)); +#else + *pfn = pte_pfn(READ_ONCE(*ptep)); +#endif // The lock is acquired inside follow_pte() pte_unmap_unlock(ptep, ptl); return 0; -#else // NV_IS_EXPORT_SYMBOL_PRESENT_follow_pte +#else return -1; -#endif // NV_IS_EXPORT_SYMBOL_PRESENT_follow_pte +#endif // NV_IS_EXPORT_SYMBOL_PRESENT_follow_pfnmap_start +} + +static inline int nv_follow_pfn(struct vm_area_struct *vma, + unsigned long address, + unsigned long *pfn) +{ +#if defined(NV_FOLLOW_PFN_PRESENT) + return follow_pfn(vma, address, pfn); +#else + return nv_follow_flavors(vma, address, pfn); #endif } diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/nvidia-drm/nvidia-drm-drv.c nvidia-open-gpu-kernel-modules-535.247.01/kernel-open/nvidia-drm/nvidia-drm-drv.c --- nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/nvidia-drm/nvidia-drm-drv.c 2024-09-17 16:53:17.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/kernel-open/nvidia-drm/nvidia-drm-drv.c 2025-03-26 06:16:31.000000000 +0000 @@ -1285,6 +1285,10 @@ .read = drm_read, .llseek = noop_llseek, + +#if defined(NV_FILE_OPERATIONS_FOP_UNSIGNED_OFFSET_PRESENT) + .fop_flags = FOP_UNSIGNED_OFFSET, +#endif }; static const struct drm_ioctl_desc nv_drm_ioctls[] = { @@ -1435,7 +1439,10 @@ .name = "nvidia-drm", .desc = "NVIDIA DRM driver", + +#if defined(NV_DRM_DRIVER_HAS_DATE) .date = "20160202", +#endif #if defined(NV_DRM_DRIVER_HAS_DEVICE_LIST) .device_list = LIST_HEAD_INIT(nv_drm_driver.device_list), diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/nvidia-drm/nvidia-drm.Kbuild nvidia-open-gpu-kernel-modules-535.247.01/kernel-open/nvidia-drm/nvidia-drm.Kbuild --- nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/nvidia-drm/nvidia-drm.Kbuild 2024-09-17 16:28:47.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/kernel-open/nvidia-drm/nvidia-drm.Kbuild 2025-03-26 05:52:36.000000000 +0000 @@ -135,3 +135,5 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_has_dumb_destroy NV_CONFTEST_TYPE_COMPILE_TESTS += drm_unlocked_ioctl_flag_present NV_CONFTEST_TYPE_COMPILE_TESTS += drm_output_poll_changed +NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_has_date +NV_CONFTEST_TYPE_COMPILE_TESTS += file_operations_fop_unsigned_offset_present diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/nvidia-modeset/nvidia-modeset-linux.c nvidia-open-gpu-kernel-modules-535.247.01/kernel-open/nvidia-modeset/nvidia-modeset-linux.c --- nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/nvidia-modeset/nvidia-modeset-linux.c 2024-09-17 16:50:50.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/kernel-open/nvidia-modeset/nvidia-modeset-linux.c 2025-03-26 06:14:50.000000000 +0000 @@ -1007,6 +1007,11 @@ #if defined(NV_ACPI_VIDEO_BACKLIGHT_USE_NATIVE) if (!acpi_video_backlight_use_native()) { +#if defined(NV_ACPI_VIDEO_REGISTER_BACKLIGHT) + nvkms_log(NVKMS_LOG_LEVEL_INFO, NVKMS_LOG_PREFIX, + "ACPI reported no NVIDIA native backlight available; attempting to use ACPI backlight."); + acpi_video_register_backlight(); +#endif return NULL; } #endif diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/nvidia-modeset/nvidia-modeset.Kbuild nvidia-open-gpu-kernel-modules-535.247.01/kernel-open/nvidia-modeset/nvidia-modeset.Kbuild --- nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/nvidia-modeset/nvidia-modeset.Kbuild 2024-09-17 16:28:47.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/kernel-open/nvidia-modeset/nvidia-modeset.Kbuild 2025-03-26 05:52:36.000000000 +0000 @@ -40,9 +40,6 @@ NVIDIA_MODESET_BINARY_OBJECT := $(src)/nvidia-modeset/nv-modeset-kernel.o_binary NVIDIA_MODESET_BINARY_OBJECT_O := nvidia-modeset/nv-modeset-kernel.o -quiet_cmd_symlink = SYMLINK $@ -cmd_symlink = ln -sf $< $@ - targets += $(NVIDIA_MODESET_BINARY_OBJECT_O) $(obj)/$(NVIDIA_MODESET_BINARY_OBJECT_O): $(NVIDIA_MODESET_BINARY_OBJECT) FORCE @@ -93,3 +90,4 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += ktime_get_real_ts64 NV_CONFTEST_FUNCTION_COMPILE_TESTS += ktime_get_raw_ts64 NV_CONFTEST_FUNCTION_COMPILE_TESTS += acpi_video_backlight_use_native +NV_CONFTEST_FUNCTION_COMPILE_TESTS += acpi_video_register_backlight diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/nvidia-uvm/nvidia-uvm.Kbuild nvidia-open-gpu-kernel-modules-535.247.01/kernel-open/nvidia-uvm/nvidia-uvm.Kbuild --- nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/nvidia-uvm/nvidia-uvm.Kbuild 2024-09-17 17:05:26.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/kernel-open/nvidia-uvm/nvidia-uvm.Kbuild 2025-03-26 06:22:54.000000000 +0000 @@ -86,6 +86,7 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += mmgrab NV_CONFTEST_FUNCTION_COMPILE_TESTS += iommu_sva_bind_device_has_drvdata_arg NV_CONFTEST_FUNCTION_COMPILE_TESTS += vm_fault_to_errno +NV_CONFTEST_FUNCTION_COMPILE_TESTS += folio_test_swapcache NV_CONFTEST_TYPE_COMPILE_TESTS += backing_dev_info NV_CONFTEST_TYPE_COMPILE_TESTS += mm_context_t diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/nvidia-uvm/uvm.c nvidia-open-gpu-kernel-modules-535.247.01/kernel-open/nvidia-uvm/uvm.c --- nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/nvidia-uvm/uvm.c 2024-09-17 17:05:43.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/kernel-open/nvidia-uvm/uvm.c 2025-03-26 06:23:05.000000000 +0000 @@ -682,6 +682,9 @@ // Semaphore pool vmas do not have vma wrappers, but some functions will // assume vm_private_data is a wrapper. vma->vm_private_data = NULL; +#if defined(VM_WIPEONFORK) + nv_vm_flags_set(vma, VM_WIPEONFORK); +#endif if (is_fork) { // If we forked, leave the parent vma alone. diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/nvidia-uvm/uvm_hmm.c nvidia-open-gpu-kernel-modules-535.247.01/kernel-open/nvidia-uvm/uvm_hmm.c --- nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/nvidia-uvm/uvm_hmm.c 2024-09-17 17:05:53.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/kernel-open/nvidia-uvm/uvm_hmm.c 2025-03-26 06:23:12.000000000 +0000 @@ -71,6 +71,24 @@ #include "uvm_va_policy.h" #include "uvm_tools.h" +// The function nv_PageSwapCache() wraps the check for page swap cache flag in +// order to support a wide variety of kernel versions. +// The function PageSwapCache() is removed after 32f51ead3d77 ("mm: remove +// PageSwapCache") in v6.12-rc1. +// The function folio_test_swapcache() was added in Linux 5.16 (d389a4a811551 +// "mm: Add folio flag manipulation functions") +// Systems with HMM patches backported to 5.14 are possible, but those systems +// do not include folio_test_swapcache() +// TODO: Bug 4050579: Remove this when migration of swap cached pages is updated +static __always_inline bool nv_PageSwapCache(struct page *page) +{ +#if defined(NV_FOLIO_TEST_SWAPCACHE_PRESENT) + return folio_test_swapcache(page_folio(page)); +#else + return PageSwapCache(page); +#endif +} + static NV_STATUS gpu_chunk_add(uvm_va_block_t *va_block, uvm_page_index_t page_index, struct page *page); @@ -2554,7 +2572,7 @@ continue; } - if (PageSwapCache(src_page)) { + if (nv_PageSwapCache(src_page)) { // TODO: Bug 4050579: Remove this when swap cached pages can be // migrated. if (service_context) { diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/nvidia-uvm/uvm_kvmalloc.c nvidia-open-gpu-kernel-modules-535.247.01/kernel-open/nvidia-uvm/uvm_kvmalloc.c --- nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/nvidia-uvm/uvm_kvmalloc.c 2024-09-17 17:05:52.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/kernel-open/nvidia-uvm/uvm_kvmalloc.c 2025-03-26 06:23:11.000000000 +0000 @@ -36,7 +36,7 @@ typedef struct { size_t alloc_size; - uint8_t ptr[0]; + uint8_t ptr[]; } uvm_vmalloc_hdr_t; typedef struct diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/nvidia-uvm/uvm_linux.h nvidia-open-gpu-kernel-modules-535.247.01/kernel-open/nvidia-uvm/uvm_linux.h --- nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/nvidia-uvm/uvm_linux.h 2024-09-17 17:05:38.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/kernel-open/nvidia-uvm/uvm_linux.h 2025-03-26 06:23:02.000000000 +0000 @@ -226,7 +226,7 @@ #define __GFP_NORETRY 0 #endif -#define NV_UVM_GFP_FLAGS (GFP_KERNEL) +#define NV_UVM_GFP_FLAGS (GFP_KERNEL | __GFP_NOMEMALLOC) // Develop builds define DEBUG but enable optimization #if defined(DEBUG) && !defined(NVIDIA_UVM_DEVELOP) diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/nvidia-uvm/uvm_mmu.h nvidia-open-gpu-kernel-modules-535.247.01/kernel-open/nvidia-uvm/uvm_mmu.h --- nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/nvidia-uvm/uvm_mmu.h 2024-09-17 17:05:47.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/kernel-open/nvidia-uvm/uvm_mmu.h 2025-03-26 06:23:08.000000000 +0000 @@ -162,7 +162,7 @@ // pointers to child directories on the host. // this array is variable length, so it needs to be last to allow it to // take up extra space - uvm_page_directory_t *entries[0]; + uvm_page_directory_t *entries[]; }; enum diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/nvidia-uvm/uvm_pmm_gpu.c nvidia-open-gpu-kernel-modules-535.247.01/kernel-open/nvidia-uvm/uvm_pmm_gpu.c --- nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/nvidia-uvm/uvm_pmm_gpu.c 2024-09-17 17:05:52.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/kernel-open/nvidia-uvm/uvm_pmm_gpu.c 2025-03-26 06:23:11.000000000 +0000 @@ -221,7 +221,7 @@ // Array of all child subchunks // TODO: Bug 1765461: Can the array be inlined? It could save the parent // pointer. - uvm_gpu_chunk_t *subchunks[0]; + uvm_gpu_chunk_t *subchunks[]; }; typedef enum diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/nvidia-uvm/uvm_va_space_mm.c nvidia-open-gpu-kernel-modules-535.247.01/kernel-open/nvidia-uvm/uvm_va_space_mm.c --- nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/nvidia-uvm/uvm_va_space_mm.c 2024-09-17 17:05:44.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/kernel-open/nvidia-uvm/uvm_va_space_mm.c 2025-03-26 06:23:06.000000000 +0000 @@ -291,8 +291,12 @@ // allocates memory which is attached to the mm_struct and freed // when the mm_struct is freed. ret = __mmu_notifier_register(NULL, current->mm); - if (ret) + if (ret) { + // Inform uvm_va_space_mm_unregister() that it has nothing to do. + uvm_mmdrop(va_space_mm->mm); + va_space_mm->mm = NULL; return errno_to_nv_status(ret); + } #else UVM_ASSERT(0); #endif diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/src/common/inc/nvBldVer.h nvidia-open-gpu-kernel-modules-535.247.01/src/common/inc/nvBldVer.h --- nvidia-open-gpu-kernel-modules-535.216.01/src/common/inc/nvBldVer.h 2024-09-17 16:44:51.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/src/common/inc/nvBldVer.h 2025-03-26 06:11:42.000000000 +0000 @@ -36,25 +36,25 @@ // and then checked back in. You cannot make changes to these sections without // corresponding changes to the buildmeister script #ifndef NV_BUILD_BRANCH - #define NV_BUILD_BRANCH r538_95 + #define NV_BUILD_BRANCH r539_28 #endif #ifndef NV_PUBLIC_BRANCH - #define NV_PUBLIC_BRANCH r538_95 + #define NV_PUBLIC_BRANCH r539_28 #endif #if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS) -#define NV_BUILD_BRANCH_VERSION "rel/gpu_drv/r535/r538_95-686" -#define NV_BUILD_CHANGELIST_NUM (34854198) +#define NV_BUILD_BRANCH_VERSION "rel/gpu_drv/r535/r539_28-859" +#define NV_BUILD_CHANGELIST_NUM (35750789) #define NV_BUILD_TYPE "Official" -#define NV_BUILD_NAME "rel/gpu_drv/r535/r538_95-686" -#define NV_LAST_OFFICIAL_CHANGELIST_NUM (34854198) +#define NV_BUILD_NAME "rel/gpu_drv/r535/r539_28-859" +#define NV_LAST_OFFICIAL_CHANGELIST_NUM (35750789) #else /* Windows builds */ -#define NV_BUILD_BRANCH_VERSION "r538_95-1" -#define NV_BUILD_CHANGELIST_NUM (34853858) +#define NV_BUILD_BRANCH_VERSION "r539_28-1" +#define NV_BUILD_CHANGELIST_NUM (35750715) #define NV_BUILD_TYPE "Official" -#define NV_BUILD_NAME "538.96" -#define NV_LAST_OFFICIAL_CHANGELIST_NUM (34853858) +#define NV_BUILD_NAME "539.29" +#define NV_LAST_OFFICIAL_CHANGELIST_NUM (35750715) #define NV_BUILD_BRANCH_BASE_VERSION R535 #endif // End buildmeister python edited section diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/src/common/inc/nvUnixVersion.h nvidia-open-gpu-kernel-modules-535.247.01/src/common/inc/nvUnixVersion.h --- nvidia-open-gpu-kernel-modules-535.216.01/src/common/inc/nvUnixVersion.h 2024-09-17 16:44:52.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/src/common/inc/nvUnixVersion.h 2025-03-26 06:11:42.000000000 +0000 @@ -4,7 +4,7 @@ #if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS) || defined(NV_VMWARE) || defined(NV_QNX) || defined(NV_INTEGRITY) || \ (defined(RMCFG_FEATURE_PLATFORM_GSP) && RMCFG_FEATURE_PLATFORM_GSP == 1) -#define NV_VERSION_STRING "535.216.01" +#define NV_VERSION_STRING "535.247.01" #else diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/src/common/inc/nvVer.h nvidia-open-gpu-kernel-modules-535.247.01/src/common/inc/nvVer.h --- nvidia-open-gpu-kernel-modules-535.216.01/src/common/inc/nvVer.h 2024-09-17 16:44:52.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/src/common/inc/nvVer.h 2025-03-26 06:11:42.000000000 +0000 @@ -3,7 +3,7 @@ #define NV_COMPANY_NAME_STRING_SHORT "NVIDIA" #define NV_COMPANY_NAME_STRING_FULL "NVIDIA Corporation" #define NV_COMPANY_NAME_STRING NV_COMPANY_NAME_STRING_FULL -#define NV_COPYRIGHT_YEAR "2024" +#define NV_COPYRIGHT_YEAR "2025" #define NV_COPYRIGHT "(C) " NV_COPYRIGHT_YEAR " NVIDIA Corporation. All rights reserved." // Please do not use the non-ascii copyright symbol for (C). #if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS) || defined(NV_VMWARE) || defined(NV_QNX) || defined(NV_INTEGRITY) || \ diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/src/common/inc/nvlog_defs.h nvidia-open-gpu-kernel-modules-535.247.01/src/common/inc/nvlog_defs.h --- nvidia-open-gpu-kernel-modules-535.216.01/src/common/inc/nvlog_defs.h 2024-09-17 16:44:53.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/src/common/inc/nvlog_defs.h 2025-03-26 06:11:42.000000000 +0000 @@ -102,10 +102,11 @@ #define NVLOG_MAX_BUFFERS_v11 16 #define NVLOG_MAX_BUFFERS_v12 256 +#define NVLOG_MAX_BUFFERS_v13 3840 #if NVOS_IS_UNIX -#define NVLOG_MAX_BUFFERS NVLOG_MAX_BUFFERS_v12 -#define NVLOG_LOGGER_VERSION 12 // v1.2 +#define NVLOG_MAX_BUFFERS NVLOG_MAX_BUFFERS_v13 +#define NVLOG_LOGGER_VERSION 13 // v1.3 #else #define NVLOG_MAX_BUFFERS NVLOG_MAX_BUFFERS_v11 #define NVLOG_LOGGER_VERSION 11 // v1.1 diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/src/common/nvswitch/kernel/ls10/link_ls10.c nvidia-open-gpu-kernel-modules-535.247.01/src/common/nvswitch/kernel/ls10/link_ls10.c --- nvidia-open-gpu-kernel-modules-535.216.01/src/common/nvswitch/kernel/ls10/link_ls10.c 2024-09-17 16:45:39.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/src/common/nvswitch/kernel/ls10/link_ls10.c 2025-03-26 06:12:10.000000000 +0000 @@ -408,6 +408,42 @@ switch (mode) { + case NVLINK_LINKSTATE_SAFE: + { + // check if link is in reset + if (nvswitch_is_link_in_reset(device, link)) + { + NVSWITCH_PRINT(device, ERROR, + "%s: link #%d is still in reset, cannot change link state\n", + __FUNCTION__, link->linkNumber); + return NVL_ERR_INVALID_STATE; + } + + NVSWITCH_PRINT(device, INFO, + "%s : Link state request to Safe for (%s):(%s) not needed. Skipping.\n", + __FUNCTION__, device->name, link->linkName); + + break; + } + + case NVLINK_LINKSTATE_HS: + { + // check if link is in reset + if (nvswitch_is_link_in_reset(device, link)) + { + NVSWITCH_PRINT(device, ERROR, + "%s: link #%d is still in reset, cannot change link state\n", + __FUNCTION__, link->linkNumber); + return -NVL_ERR_INVALID_STATE; + } + + NVSWITCH_PRINT(device, INFO, + "%s : Link state request to Active for (%s):(%s) not needed. Skipping.\n", + __FUNCTION__, device->name, link->linkName); + + break; + } + case NVLINK_LINKSTATE_INITPHASE1: { // Apply appropriate SIMMODE settings diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/src/common/nvswitch/kernel/smbpbi_nvswitch.c nvidia-open-gpu-kernel-modules-535.247.01/src/common/nvswitch/kernel/smbpbi_nvswitch.c --- nvidia-open-gpu-kernel-modules-535.216.01/src/common/nvswitch/kernel/smbpbi_nvswitch.c 2024-09-17 16:45:41.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/src/common/nvswitch/kernel/smbpbi_nvswitch.c 2025-03-26 06:12:11.000000000 +0000 @@ -90,10 +90,10 @@ if (status == NVL_SUCCESS) { -#if defined(DEBUG) || defined(DEVELOP) || defined(NV_MODS) +#if defined(DEBUG) || defined(DEVELOP) nvswitch_lib_smbpbi_log_sxid(device, NVSWITCH_ERR_NO_ERROR, "NVSWITCH SMBPBI server is online."); -#endif // defined(DEBUG) || defined(DEVELOP) || defined(NV_MODS) +#endif // defined(DEBUG) || defined(DEVELOP) NVSWITCH_PRINT(device, INFO, "%s: SMBPBI POST INIT completed\n", __FUNCTION__); } diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/src/common/sdk/nvidia/inc/ctrl/ctrl0000/ctrl0000nvd.h nvidia-open-gpu-kernel-modules-535.247.01/src/common/sdk/nvidia/inc/ctrl/ctrl0000/ctrl0000nvd.h --- nvidia-open-gpu-kernel-modules-535.216.01/src/common/sdk/nvidia/inc/ctrl/ctrl0000/ctrl0000nvd.h 2024-09-17 16:47:59.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/src/common/sdk/nvidia/inc/ctrl/ctrl0000/ctrl0000nvd.h 2025-03-26 06:13:29.000000000 +0000 @@ -256,7 +256,7 @@ #define NV0000_CTRL_NVD_SIGNATURE_SIZE (4) /* Maximum number of buffers */ -#define NV0000_CTRL_NVD_MAX_BUFFERS (256) +#define NV0000_CTRL_NVD_MAX_BUFFERS (3840) #define NV0000_CTRL_NVD_GET_NVLOG_INFO_PARAMS_MESSAGE_ID (0x4U) diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/src/common/sdk/nvidia/inc/ctrl/ctrl2080/ctrl2080ecc.h nvidia-open-gpu-kernel-modules-535.247.01/src/common/sdk/nvidia/inc/ctrl/ctrl2080/ctrl2080ecc.h --- nvidia-open-gpu-kernel-modules-535.216.01/src/common/sdk/nvidia/inc/ctrl/ctrl2080/ctrl2080ecc.h 2024-09-17 16:48:07.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/src/common/sdk/nvidia/inc/ctrl/ctrl2080/ctrl2080ecc.h 2025-03-26 06:13:34.000000000 +0000 @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2017-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2017-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining a @@ -39,36 +39,9 @@ /* * NV2080_CTRL_ECC_GET_CLIENT_EXPOSED_COUNTERS_PARAMS * - * sramLastClearedTimestamp [out] - * dramLastClearedTimestamp [out] - * unix-epoch based timestamp. These fields indicate when the error counters - * were last cleared by the user. - * - * sramErrorCounts [out] - * dramErrorCounts [out] - * Aggregate error counts for SRAM and DRAM - */ - -#define NV2080_CTRL_ECC_GET_CLIENT_EXPOSED_COUNTERS_PARAMS_MESSAGE_ID (0x0U) - -typedef struct NV2080_CTRL_ECC_GET_CLIENT_EXPOSED_COUNTERS_PARAMS { - NvU32 sramLastClearedTimestamp; - NvU32 dramLastClearedTimestamp; - - NV_DECLARE_ALIGNED(NvU64 sramCorrectedTotalCounts, 8); - NV_DECLARE_ALIGNED(NvU64 sramUncorrectedTotalCounts, 8); - NV_DECLARE_ALIGNED(NvU64 dramCorrectedTotalCounts, 8); - NV_DECLARE_ALIGNED(NvU64 dramUncorrectedTotalCounts, 8); -} NV2080_CTRL_ECC_GET_CLIENT_EXPOSED_COUNTERS_PARAMS; - -#define NV2080_CTRL_CMD_ECC_GET_ECI_COUNTERS (0x20803401U) /* finn: Evaluated from "(FINN_NV20_SUBDEVICE_0_ECC_INTERFACE_ID << 8) | NV2080_CTRL_ECC_GET_ECI_COUNTERS_PARAMS_MESSAGE_ID" */ - -/* - * NV2080_CTRL_ECC_GET_ECI_COUNTERS_PARAMS - * * sramParityUncorrectedUnique [out] * sramSecDedUncorrectedUnique [out] - * sramCorrectedTotal [out] + * sramCorrectedUnique [out] * dramUncorrectedTotal [out] * dramCorrectedTotal [out] * Aggregate error counts for SRAM and DRAM. @@ -88,12 +61,12 @@ * Boolean flag which is set if SRAM error threshold was exceeded */ -#define NV2080_CTRL_ECC_GET_ECI_COUNTERS_PARAMS_MESSAGE_ID (0x1U) +#define NV2080_CTRL_ECC_GET_CLIENT_EXPOSED_COUNTERS_PARAMS_MESSAGE_ID (0x0U) -typedef struct NV2080_CTRL_ECC_GET_ECI_COUNTERS_PARAMS { +typedef struct NV2080_CTRL_ECC_GET_CLIENT_EXPOSED_COUNTERS_PARAMS { NV_DECLARE_ALIGNED(NvU64 sramParityUncorrectedUnique, 8); NV_DECLARE_ALIGNED(NvU64 sramSecDedUncorrectedUnique, 8); - NV_DECLARE_ALIGNED(NvU64 sramCorrectedTotal, 8); + NV_DECLARE_ALIGNED(NvU64 sramCorrectedUnique, 8); NV_DECLARE_ALIGNED(NvU64 dramUncorrectedTotal, 8); NV_DECLARE_ALIGNED(NvU64 dramCorrectedTotal, 8); @@ -106,7 +79,7 @@ NV_DECLARE_ALIGNED(NvU64 sramBucketOther, 8); NvBool sramErrorThresholdExceeded; -} NV2080_CTRL_ECC_GET_ECI_COUNTERS_PARAMS; +} NV2080_CTRL_ECC_GET_CLIENT_EXPOSED_COUNTERS_PARAMS; /* * NV2080_CTRL_ECC_GET_VOLATILE_COUNTS_PARAMS @@ -124,9 +97,9 @@ * dramUncTot [out]: * total uncorrectable DRAM error count */ -#define NV2080_CTRL_CMD_ECC_GET_VOLATILE_COUNTS (0x20803402U) /* finn: Evaluated from "(FINN_NV20_SUBDEVICE_0_ECC_INTERFACE_ID << 8) | NV2080_CTRL_ECC_GET_VOLATILE_COUNTS_PARAMS_MESSAGE_ID" */ +#define NV2080_CTRL_CMD_ECC_GET_VOLATILE_COUNTS (0x20803401U) /* finn: Evaluated from "(FINN_NV20_SUBDEVICE_0_ECC_INTERFACE_ID << 8) | NV2080_CTRL_ECC_GET_VOLATILE_COUNTS_PARAMS_MESSAGE_ID" */ -#define NV2080_CTRL_ECC_GET_VOLATILE_COUNTS_PARAMS_MESSAGE_ID (0x2U) +#define NV2080_CTRL_ECC_GET_VOLATILE_COUNTS_PARAMS_MESSAGE_ID (0x1U) typedef struct NV2080_CTRL_ECC_GET_VOLATILE_COUNTS_PARAMS { NV_DECLARE_ALIGNED(NvU64 sramCorUni, 8); diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/src/common/sdk/nvidia/inc/ctrl/ctrl2080/ctrl2080gpu.h nvidia-open-gpu-kernel-modules-535.247.01/src/common/sdk/nvidia/inc/ctrl/ctrl2080/ctrl2080gpu.h --- nvidia-open-gpu-kernel-modules-535.216.01/src/common/sdk/nvidia/inc/ctrl/ctrl2080/ctrl2080gpu.h 2024-09-17 16:48:09.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/src/common/sdk/nvidia/inc/ctrl/ctrl2080/ctrl2080gpu.h 2025-03-26 06:13:34.000000000 +0000 @@ -4284,4 +4284,29 @@ #define NV2080_CTRL_GPU_GET_NVENC_SW_SESSION_INFO_V2 (0x208001afU) /* finn: Evaluated from "(FINN_NV20_SUBDEVICE_0_GPU_INTERFACE_ID << 8) | NV2080_CTRL_GPU_GET_NVENC_SW_SESSION_INFO_V2_PARAMS_MESSAGE_ID" */ +/* + * NV2080_CTRL_CMD_GPU_GET_TPC_RECONFIG_MASK + * + * This command returns the TPC reconfig mask for a specific GPC + * + * gpc[IN] + * The GPC for which the TPC reconfig mask needs to be queried. + * The GPC should be specified as a logical index. + * + * tpcReconfigMask[OUT] + * Mask of reconfigurable TPCs in the specified GPC + * + * Possible status values returned are: + * NV_OK + * NV_ERR_NOT_SUPPORTED + */ +#define NV2080_CTRL_CMD_GPU_GET_TPC_RECONFIG_MASK (0x208001e7U) /* finn: Evaluated from "(FINN_NV20_SUBDEVICE_0_GPU_INTERFACE_ID << 8) | NV2080_CTRL_GPU_GET_TPC_RECONFIG_MASK_PARAMS_MESSAGE_ID" */ + +#define NV2080_CTRL_GPU_GET_TPC_RECONFIG_MASK_PARAMS_MESSAGE_ID (0xe7U) + +typedef struct NV2080_CTRL_GPU_GET_TPC_RECONFIG_MASK_PARAMS { + NvU32 gpc; + NvU32 tpcReconfigMask; +} NV2080_CTRL_GPU_GET_TPC_RECONFIG_MASK_PARAMS; + /* _ctrl2080gpu_h_ */ diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/src/common/sdk/nvidia/inc/ctrl/ctrl2080/ctrl2080gr.h nvidia-open-gpu-kernel-modules-535.247.01/src/common/sdk/nvidia/inc/ctrl/ctrl2080/ctrl2080gr.h --- nvidia-open-gpu-kernel-modules-535.216.01/src/common/sdk/nvidia/inc/ctrl/ctrl2080/ctrl2080gr.h 2024-09-17 16:48:09.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/src/common/sdk/nvidia/inc/ctrl/ctrl2080/ctrl2080gr.h 2025-03-26 06:13:34.000000000 +0000 @@ -1851,4 +1851,30 @@ NvU32 numGfxTpc; } NV2080_CTRL_GR_GET_GFX_GPC_AND_TPC_INFO_PARAMS; +/* + * NV2080_CTRL_CMD_GR_GET_TPC_RECONFIG_MASK + * + * This command returns the TPC reconfig mask for a specific GPC + * + * gpc[IN] + * The GPC for which the TPC reconfig mask needs to be queried. + * The GPC should be specified as a logical index. + * + * tpcReconfigMask[OUT] + * Mask of reconfigurable TPCs in the specified GPC + * + * grRouteInfo[IN] + * This parameter specifies the routing information used to + * disambiguate the target GR engine. + */ +#define NV2080_CTRL_CMD_GR_GET_TPC_RECONFIG_MASK (0x2080123bU) /* finn: Evaluated from "(FINN_NV20_SUBDEVICE_0_GR_INTERFACE_ID << 8) | NV2080_CTRL_GR_GET_TPC_RECONFIG_MASK_PARAMS_MESSAGE_ID" */ + +#define NV2080_CTRL_GR_GET_TPC_RECONFIG_MASK_PARAMS_MESSAGE_ID (0x3bU) + +typedef struct NV2080_CTRL_GR_GET_TPC_RECONFIG_MASK_PARAMS { + NvU32 gpc; + NvU32 tpcReconfigMask; + NV_DECLARE_ALIGNED(NV2080_CTRL_GR_ROUTE_INFO grRouteInfo, 8); +} NV2080_CTRL_GR_GET_TPC_RECONFIG_MASK_PARAMS; + /* _ctrl2080gr_h_ */ diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/src/common/sdk/nvidia/inc/ctrl/ctrl208f/ctrl208fgr.h nvidia-open-gpu-kernel-modules-535.247.01/src/common/sdk/nvidia/inc/ctrl/ctrl208f/ctrl208fgr.h --- nvidia-open-gpu-kernel-modules-535.216.01/src/common/sdk/nvidia/inc/ctrl/ctrl208f/ctrl208fgr.h 2024-09-17 16:48:13.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/src/common/sdk/nvidia/inc/ctrl/ctrl208f/ctrl208fgr.h 2025-03-26 06:13:37.000000000 +0000 @@ -106,4 +106,33 @@ NV_DECLARE_ALIGNED(NV2080_CTRL_GR_ROUTE_INFO grRouteInfo, 8); } NV208F_CTRL_GR_ECC_INJECTION_SUPPORTED_PARAMS; +/* + * NV208F_CTRL_CMD_GR_ECC_SET_TRANSIENT_CLEARING_POLICY + * + * Control command to determine whether or not the actions to clear potential transient + * errors in the SM should be taken + * + * Parameters: + * + * policy + * NV208F_CTRL_GR_ECC_TRANSIENT_CLEARING_DISABLED + * Don't attempt to clear a transient error in the SM + * NV208F_CTRL_GR_ECC_TRANSIENT_CLEARING_ENABLED + * Attempt to clear a transient error in the SM + * + * Possible status values returned are: + * NV_OK + * NV_ERR_INVALID_ARGUMENT + */ +#define NV208F_CTRL_GR_ECC_TRANSIENT_CLEARING_DISABLED (0x00000000) +#define NV208F_CTRL_GR_ECC_TRANSIENT_CLEARING_ENABLED (0x00000001) + +#define NV208F_CTRL_CMD_GR_ECC_SET_TRANSIENT_CLEARING_POLICY (0x208f1205) /* finn: Evaluated from "(FINN_NV20_SUBDEVICE_DIAG_GR_INTERFACE_ID << 8) | NV208F_CTRL_GR_ECC_SET_TRANSIENT_CLEARING_POLICY_PARAMS_MESSAGE_ID" */ + +#define NV208F_CTRL_GR_ECC_SET_TRANSIENT_CLEARING_POLICY_PARAMS_MESSAGE_ID (0x5U) + +typedef struct NV208F_CTRL_GR_ECC_SET_TRANSIENT_CLEARING_POLICY_PARAMS { + NvU32 policy; +} NV208F_CTRL_GR_ECC_SET_TRANSIENT_CLEARING_POLICY_PARAMS; + /* _ctrl208fgr_h_ */ diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/src/common/sdk/nvidia/inc/ctrl/ctrl90e7.h nvidia-open-gpu-kernel-modules-535.247.01/src/common/sdk/nvidia/inc/ctrl/ctrl90e7.h --- nvidia-open-gpu-kernel-modules-535.216.01/src/common/sdk/nvidia/inc/ctrl/ctrl90e7.h 2024-09-17 16:48:17.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/src/common/sdk/nvidia/inc/ctrl/ctrl90e7.h 2025-03-26 06:13:40.000000000 +0000 @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2013-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2013-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining a diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/src/common/sdk/nvidia/inc/nverror.h nvidia-open-gpu-kernel-modules-535.247.01/src/common/sdk/nvidia/inc/nverror.h --- nvidia-open-gpu-kernel-modules-535.216.01/src/common/sdk/nvidia/inc/nverror.h 2024-09-17 16:48:24.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/src/common/sdk/nvidia/inc/nverror.h 2025-03-26 06:13:43.000000000 +0000 @@ -123,7 +123,9 @@ #define ROBUST_CHANNEL_FAST_PATH_ERROR (139) #define UNRECOVERABLE_ECC_ERROR_ESCAPE (140) #define GPU_INIT_ERROR (143) -#define ROBUST_CHANNEL_LAST_ERROR (GPU_INIT_ERROR) +#define RESOURCE_RETIREMENT_EVENT (156) +#define RESOURCE_RETIREMENT_FAILURE (157) +#define ROBUST_CHANNEL_LAST_ERROR (RESOURCE_RETIREMENT_FAILURE) // Indexed CE reference diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/src/common/sdk/nvidia/inc/nvmisc.h nvidia-open-gpu-kernel-modules-535.247.01/src/common/sdk/nvidia/inc/nvmisc.h --- nvidia-open-gpu-kernel-modules-535.216.01/src/common/sdk/nvidia/inc/nvmisc.h 2024-09-17 16:48:24.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/src/common/sdk/nvidia/inc/nvmisc.h 2025-03-26 06:13:43.000000000 +0000 @@ -695,6 +695,42 @@ } // +// Bug 4851259: Newly added functions must be hidden from certain HS-signed +// ucode compilers to avoid signature mismatch. +// +#ifndef NVDEC_1_0 +/*! + * Returns the position of nth set bit in the given mask. + * + * Returns -1 if mask has fewer than n bits set. + * + * n is 0 indexed and has valid values 0..31 inclusive, so "zeroth" set bit is + * the first set LSB. + * + * Example, if mask = 0x000000F0u and n = 1, the return value will be 5. + * Example, if mask = 0x000000F0u and n = 4, the return value will be -1. + */ +static NV_FORCEINLINE NvS32 +nvGetNthSetBitIndex32(NvU32 mask, NvU32 n) +{ + NvU32 seenSetBitsCount = 0; + NvS32 index; + FOR_EACH_INDEX_IN_MASK(32, index, mask) + { + if (seenSetBitsCount == n) + { + return index; + } + ++seenSetBitsCount; + } + FOR_EACH_INDEX_IN_MASK_END; + + return -1; +} + +#endif // NVDEC_1_0 + +// // Size to use when declaring variable-sized arrays // #define NV_ANYSIZE_ARRAY 1 diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/src/common/uproc/os/common/include/liblogdecode.h nvidia-open-gpu-kernel-modules-535.247.01/src/common/uproc/os/common/include/liblogdecode.h --- nvidia-open-gpu-kernel-modules-535.216.01/src/common/uproc/os/common/include/liblogdecode.h 2024-09-17 16:48:26.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/src/common/uproc/os/common/include/liblogdecode.h 2025-03-26 06:13:44.000000000 +0000 @@ -42,7 +42,7 @@ # define LIBOS_LOG_DECODE_ENABLE 1 # define LIBOS_LOG_TO_NVLOG 0 -# define LIBOS_LOG_MAX_LOGS 160 // Max logs for all GPUs for offline decoder +# define LIBOS_LOG_MAX_LOGS 3840 // Max logs for all GPUs for offline decoder #endif // NVRM diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/arch/nvalloc/common/inc/inforom/ifrecc.h nvidia-open-gpu-kernel-modules-535.247.01/src/nvidia/arch/nvalloc/common/inc/inforom/ifrecc.h --- nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/arch/nvalloc/common/inc/inforom/ifrecc.h 2024-09-17 16:45:43.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/src/nvidia/arch/nvalloc/common/inc/inforom/ifrecc.h 2025-03-26 06:12:12.000000000 +0000 @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2017-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2017-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining a diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/arch/nvalloc/common/inc/inforom/ifrstruct.h nvidia-open-gpu-kernel-modules-535.247.01/src/nvidia/arch/nvalloc/common/inc/inforom/ifrstruct.h --- nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/arch/nvalloc/common/inc/inforom/ifrstruct.h 2024-09-17 16:45:43.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/src/nvidia/arch/nvalloc/common/inc/inforom/ifrstruct.h 2025-03-26 06:12:12.000000000 +0000 @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1999-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1999-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining a diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/arch/nvalloc/common/inc/nvcst.h nvidia-open-gpu-kernel-modules-535.247.01/src/nvidia/arch/nvalloc/common/inc/nvcst.h --- nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/arch/nvalloc/common/inc/nvcst.h 2024-09-17 16:45:44.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/src/nvidia/arch/nvalloc/common/inc/nvcst.h 2025-03-26 06:12:13.000000000 +0000 @@ -65,6 +65,7 @@ CHIPSET_SETUP_FUNC(Intel_4381_setupFunc) CHIPSET_SETUP_FUNC(Intel_7A82_setupFunc) CHIPSET_SETUP_FUNC(Intel_7A04_setupFunc) +CHIPSET_SETUP_FUNC(Intel_1B81_setupFunc) CHIPSET_SETUP_FUNC(SiS_656_setupFunc) CHIPSET_SETUP_FUNC(ATI_RS400_setupFunc) CHIPSET_SETUP_FUNC(ATI_RS480_setupFunc) @@ -186,8 +187,8 @@ {PCI_VENDOR_ID_INTEL, 0x4385, CS_INTEL_4381, "Intel-RocketLake", Intel_4381_setupFunc}, {PCI_VENDOR_ID_INTEL, 0x7A82, CS_INTEL_7A82, "Intel-AlderLake", Intel_7A82_setupFunc}, {PCI_VENDOR_ID_INTEL, 0x7A84, CS_INTEL_7A82, "Intel-AlderLake", Intel_7A82_setupFunc}, - {PCI_VENDOR_ID_INTEL, 0x1B81, CS_INTEL_1B81, "Intel-SapphireRapids", NULL}, - {PCI_VENDOR_ID_INTEL, 0x7A8A, CS_INTEL_1B81, "Intel-SapphireRapids", NULL}, + {PCI_VENDOR_ID_INTEL, 0x1B81, CS_INTEL_1B81, "Intel-SapphireRapids", Intel_1B81_setupFunc}, + {PCI_VENDOR_ID_INTEL, 0x7A8A, CS_INTEL_1B81, "Intel-SapphireRapids", Intel_1B81_setupFunc}, {PCI_VENDOR_ID_INTEL, 0x18DC, CS_INTEL_18DC, "Intel-IceLake", NULL}, {PCI_VENDOR_ID_INTEL, 0x7A04, CS_INTEL_7A04, "Intel-RaptorLake", Intel_7A04_setupFunc}, {PCI_VENDOR_ID_INTEL, 0x5795, CS_INTEL_5795, "Intel-GraniteRapids", NULL}, diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/arch/nvalloc/common/inc/nvpcie.h nvidia-open-gpu-kernel-modules-535.247.01/src/nvidia/arch/nvalloc/common/inc/nvpcie.h --- nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/arch/nvalloc/common/inc/nvpcie.h 2024-09-17 16:45:44.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/src/nvidia/arch/nvalloc/common/inc/nvpcie.h 2025-03-26 06:12:13.000000000 +0000 @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2000-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2000-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining a diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/generated/g_all_dcl_pb.c nvidia-open-gpu-kernel-modules-535.247.01/src/nvidia/generated/g_all_dcl_pb.c --- nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/generated/g_all_dcl_pb.c 2024-09-17 16:49:02.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/src/nvidia/generated/g_all_dcl_pb.c 2025-03-26 06:13:59.000000000 +0000 @@ -122,6 +122,18 @@ PRB_MAYBE_FIELD_NAME("engine") PRB_MAYBE_FIELD_DEFAULT(0) }, + { + 331, + { + PRB_OPTIONAL, + PRB_MESSAGE, + 0, + }, + RC_RCDIAGRECORD, + 0, + PRB_MAYBE_FIELD_NAME("rc_diag_recs") + PRB_MAYBE_FIELD_DEFAULT(0) + }, }; // 'ErrorBlock' field defaults @@ -150,7 +162,7 @@ PRB_MAYBE_MESSAGE_NAME("Dcl.Engines") }, { - 7, + 8, prb_fields_dcl_dclmsg, PRB_MAYBE_MESSAGE_NAME("Dcl.DclMsg") }, diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/generated/g_all_dcl_pb.h nvidia-open-gpu-kernel-modules-535.247.01/src/nvidia/generated/g_all_dcl_pb.h --- nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/generated/g_all_dcl_pb.h 2024-09-17 16:49:02.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/src/nvidia/generated/g_all_dcl_pb.h 2025-03-26 06:13:59.000000000 +0000 @@ -18,8 +18,8 @@ // Message maximum lengths // Does not include repeated fields, strings and byte arrays. #define DCL_ENGINES_LEN 130 -#define DCL_DCLMSG_LEN 567 -#define DCL_ERRORBLOCK_LEN 571 +#define DCL_DCLMSG_LEN 610 +#define DCL_ERRORBLOCK_LEN 614 extern const PRB_FIELD_DESC prb_fields_dcl_engines[]; @@ -41,6 +41,7 @@ #define DCL_DCLMSG_JOURNAL_BUGCHECK (&prb_fields_dcl_dclmsg[4]) #define DCL_DCLMSG_RCCOUNTER (&prb_fields_dcl_dclmsg[5]) #define DCL_DCLMSG_ENGINE (&prb_fields_dcl_dclmsg[6]) +#define DCL_DCLMSG_RC_DIAG_RECS (&prb_fields_dcl_dclmsg[7]) // 'DclMsg' field lengths #define DCL_DCLMSG_COMMON_LEN 42 @@ -50,6 +51,7 @@ #define DCL_DCLMSG_JOURNAL_BUGCHECK_LEN 69 #define DCL_DCLMSG_RCCOUNTER_LEN 64 #define DCL_DCLMSG_ENGINE_LEN 133 +#define DCL_DCLMSG_RC_DIAG_RECS_LEN 42 extern const PRB_FIELD_DESC prb_fields_dcl_errorblock[]; @@ -57,7 +59,7 @@ #define DCL_ERRORBLOCK_DATA (&prb_fields_dcl_errorblock[0]) // 'ErrorBlock' field lengths -#define DCL_ERRORBLOCK_DATA_LEN 570 +#define DCL_ERRORBLOCK_DATA_LEN 613 extern const PRB_SERVICE_DESC prb_services_dcl[]; diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/generated/g_intr_nvoc.h nvidia-open-gpu-kernel-modules-535.247.01/src/nvidia/generated/g_intr_nvoc.h --- nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/generated/g_intr_nvoc.h 2024-09-17 16:54:55.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/src/nvidia/generated/g_intr_nvoc.h 2025-03-26 06:16:51.000000000 +0000 @@ -7,7 +7,7 @@ #endif /* - * SPDX-FileCopyrightText: Copyright (c) 2006-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2006-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining a @@ -96,6 +96,11 @@ // Default value for intrStuckThreshold #define INTR_STUCK_THRESHOLD 1000 +// Minimum length of interrupt to log as long-running +#define LONG_INTR_LOG_LENGTH_NS (1000000LLU) // 1ms +// Maximum frequency of long-running interrupt print, per engine +#define LONG_INTR_LOG_RATELIMIT_NS (10000000000LLU) // 10s + #define INTR_TABLE_INIT_KERNEL (1 << 0) #define INTR_TABLE_INIT_PHYSICAL (1 << 1) @@ -194,6 +199,13 @@ #else #define PRIVATE_FIELD(x) NVOC_PRIVATE_FIELD(x) #endif +struct __nvoc_inner_struc_Intr_1__ { + NvU32 intrCount; + NvU64 intrLength; + NvU64 lastPrintTime; +}; + + struct Intr { const struct NVOC_RTTI *__nvoc_rtti; struct OBJENGSTATE __nvoc_base_OBJENGSTATE; @@ -262,6 +274,7 @@ NvU32 intrEn0Orig; NvBool halIntrEnabled; NvU32 saveIntrEn0; + struct __nvoc_inner_struc_Intr_1__ longIntrStats[167]; }; #ifndef __NVOC_CLASS_Intr_TYPEDEF__ diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/generated/g_nv_name_released.h nvidia-open-gpu-kernel-modules-535.247.01/src/nvidia/generated/g_nv_name_released.h --- nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/generated/g_nv_name_released.h 2024-09-17 16:55:39.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/src/nvidia/generated/g_nv_name_released.h 2025-03-26 06:17:15.000000000 +0000 @@ -899,6 +899,7 @@ { 0x2324, 0x17a8, 0x10de, "NVIDIA H800" }, { 0x2329, 0x198b, 0x10de, "NVIDIA H20" }, { 0x2329, 0x198c, 0x10de, "NVIDIA H20" }, + { 0x232C, 0x2063, 0x10de, "NVIDIA H20-3e" }, { 0x2330, 0x16c0, 0x10de, "NVIDIA H100 80GB HBM3" }, { 0x2330, 0x16c1, 0x10de, "NVIDIA H100 80GB HBM3" }, { 0x2331, 0x1626, 0x10de, "NVIDIA H100 PCIe" }, diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/generated/g_nvdebug_pb.h nvidia-open-gpu-kernel-modules-535.247.01/src/nvidia/generated/g_nvdebug_pb.h --- nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/generated/g_nvdebug_pb.h 2024-09-17 16:49:06.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/src/nvidia/generated/g_nvdebug_pb.h 2025-03-26 06:14:01.000000000 +0000 @@ -40,7 +40,7 @@ // Does not include repeated fields, strings and byte arrays. #define NVDEBUG_SYSTEMINFO_LEN 275 #define NVDEBUG_GPUINFO_LEN 164 -#define NVDEBUG_NVDUMP_LEN 1308 +#define NVDEBUG_NVDUMP_LEN 1351 #define NVDEBUG_SYSTEMINFO_NORTHBRIDGEINFO_LEN 12 #define NVDEBUG_SYSTEMINFO_SOCINFO_LEN 12 #define NVDEBUG_SYSTEMINFO_CPUINFO_LEN 24 @@ -104,7 +104,7 @@ // 'NvDump' field lengths #define NVDEBUG_NVDUMP_SYSTEM_INFO_LEN 278 -#define NVDEBUG_NVDUMP_DCL_MSG_LEN 570 +#define NVDEBUG_NVDUMP_DCL_MSG_LEN 613 #define NVDEBUG_NVDUMP_GPU_INFO_LEN 167 #define NVDEBUG_NVDUMP_EXCEPTION_ADDRESS_LEN 10 #define NVDEBUG_NVDUMP_SYSTEM_INFO_GSPRM_LEN 278 diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/generated/g_rs_resource_nvoc.h nvidia-open-gpu-kernel-modules-535.247.01/src/nvidia/generated/g_rs_resource_nvoc.h --- nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/generated/g_rs_resource_nvoc.h 2024-09-17 16:55:19.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/src/nvidia/generated/g_rs_resource_nvoc.h 2025-03-26 06:17:03.000000000 +0000 @@ -81,15 +81,16 @@ */ struct RS_LOCK_INFO { - struct RsClient *pClient; ///< Pointer to client that was locked (if any) - struct RsClient *pSecondClient; ///< Pointer to second client, for dual-client locking - RsResourceRef *pContextRef; ///< User-defined reference - struct RsSession *pSession; ///< Session object to be locked, if any - NvU32 flags; ///< RS_LOCK_FLAGS_* - NvU32 state; ///< RS_LOCK_STATE_* + struct RsClient *pClient; ///< Pointer to client that was locked (if any) + struct RsClient *pSecondClient; ///< Pointer to second client, for dual-client locking + RsResourceRef *pContextRef; ///< User-defined reference + RsResourceRef *pResRefToBackRef; ///< Resource from which to infer indirect GPU dependencies + struct RsSession *pSession; ///< Session object to be locked, if any + NvU32 flags; ///< RS_LOCK_FLAGS_* + NvU32 state; ///< RS_LOCK_STATE_* NvU32 gpuMask; - NvU8 traceOp; ///< RS_LOCK_TRACE_* operation for lock-metering - NvU32 traceClassId; ///< Class of initial resource that was locked for lock metering + NvU8 traceOp; ///< RS_LOCK_TRACE_* operation for lock-metering + NvU32 traceClassId; ///< Class of initial resource that was locked for lock metering }; struct RS_RES_ALLOC_PARAMS_INTERNAL diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/generated/g_subdevice_nvoc.c nvidia-open-gpu-kernel-modules-535.247.01/src/nvidia/generated/g_subdevice_nvoc.c --- nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/generated/g_subdevice_nvoc.c 2024-09-17 16:55:21.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/src/nvidia/generated/g_subdevice_nvoc.c 2025-03-26 06:17:04.000000000 +0000 @@ -7411,33 +7411,18 @@ #if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x210u) /*pFunc=*/ (void (*)(void)) NULL, #else - /*pFunc=*/ (void (*)(void)) subdeviceCtrlCmdEccGetEciCounters_IMPL, -#endif // NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x210u) - /*flags=*/ 0x210u, - /*accessRight=*/0x0u, - /*methodId=*/ 0x20803401u, - /*paramSize=*/ sizeof(NV2080_CTRL_ECC_GET_ECI_COUNTERS_PARAMS), - /*pClassInfo=*/ &(__nvoc_class_def_Subdevice.classInfo), -#if NV_PRINTF_STRINGS_ALLOWED - /*func=*/ "subdeviceCtrlCmdEccGetEciCounters" -#endif - }, - { /* [479] */ -#if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x210u) - /*pFunc=*/ (void (*)(void)) NULL, -#else /*pFunc=*/ (void (*)(void)) subdeviceCtrlCmdEccGetVolatileCounts_IMPL, #endif // NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x210u) /*flags=*/ 0x210u, /*accessRight=*/0x0u, - /*methodId=*/ 0x20803402u, + /*methodId=*/ 0x20803401u, /*paramSize=*/ sizeof(NV2080_CTRL_ECC_GET_VOLATILE_COUNTS_PARAMS), /*pClassInfo=*/ &(__nvoc_class_def_Subdevice.classInfo), #if NV_PRINTF_STRINGS_ALLOWED /*func=*/ "subdeviceCtrlCmdEccGetVolatileCounts" #endif }, - { /* [480] */ + { /* [479] */ #if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x810u) /*pFunc=*/ (void (*)(void)) NULL, #else @@ -7452,7 +7437,7 @@ /*func=*/ "subdeviceCtrlCmdFlaRange" #endif }, - { /* [481] */ + { /* [480] */ #if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x102204u) /*pFunc=*/ (void (*)(void)) NULL, #else @@ -7467,7 +7452,7 @@ /*func=*/ "subdeviceCtrlCmdFlaSetupInstanceMemBlock" #endif }, - { /* [482] */ + { /* [481] */ #if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x100004u) /*pFunc=*/ (void (*)(void)) NULL, #else @@ -7482,7 +7467,7 @@ /*func=*/ "subdeviceCtrlCmdFlaGetRange" #endif }, - { /* [483] */ + { /* [482] */ #if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x1810u) /*pFunc=*/ (void (*)(void)) NULL, #else @@ -7497,7 +7482,7 @@ /*func=*/ "subdeviceCtrlCmdFlaGetFabricMemStats" #endif }, - { /* [484] */ + { /* [483] */ #if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x4211u) /*pFunc=*/ (void (*)(void)) NULL, #else @@ -7512,7 +7497,7 @@ /*func=*/ "subdeviceCtrlCmdGspGetFeatures" #endif }, - { /* [485] */ + { /* [484] */ #if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x210u) /*pFunc=*/ (void (*)(void)) NULL, #else @@ -7527,7 +7512,7 @@ /*func=*/ "subdeviceCtrlCmdGspGetRmHeapStats" #endif }, - { /* [486] */ + { /* [485] */ #if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x2210u) /*pFunc=*/ (void (*)(void)) NULL, #else @@ -7542,7 +7527,7 @@ /*func=*/ "subdeviceCtrlCmdGrmgrGetGrFsInfo" #endif }, - { /* [487] */ + { /* [486] */ #if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x3u) /*pFunc=*/ (void (*)(void)) NULL, #else @@ -7557,7 +7542,7 @@ /*func=*/ "subdeviceCtrlCmdOsUnixGc6BlockerRefCnt" #endif }, - { /* [488] */ + { /* [487] */ #if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x11u) /*pFunc=*/ (void (*)(void)) NULL, #else @@ -7572,7 +7557,7 @@ /*func=*/ "subdeviceCtrlCmdOsUnixAllowDisallowGcoff" #endif }, - { /* [489] */ + { /* [488] */ #if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x1u) /*pFunc=*/ (void (*)(void)) NULL, #else @@ -7587,7 +7572,7 @@ /*func=*/ "subdeviceCtrlCmdOsUnixAudioDynamicPower" #endif }, - { /* [490] */ + { /* [489] */ #if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x13u) /*pFunc=*/ (void (*)(void)) NULL, #else @@ -7602,7 +7587,7 @@ /*func=*/ "subdeviceCtrlCmdOsUnixVidmemPersistenceStatus" #endif }, - { /* [491] */ + { /* [490] */ #if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x7u) /*pFunc=*/ (void (*)(void)) NULL, #else @@ -7617,7 +7602,7 @@ /*func=*/ "subdeviceCtrlCmdOsUnixUpdateTgpStatus" #endif }, - { /* [492] */ + { /* [491] */ #if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x600u) /*pFunc=*/ (void (*)(void)) NULL, #else @@ -7632,7 +7617,7 @@ /*func=*/ "subdeviceCtrlCmdVgpuMgrInternalBootloadGspVgpuPluginTask" #endif }, - { /* [493] */ + { /* [492] */ #if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x600u) /*pFunc=*/ (void (*)(void)) NULL, #else @@ -7647,7 +7632,7 @@ /*func=*/ "subdeviceCtrlCmdVgpuMgrInternalShutdownGspVgpuPluginTask" #endif }, - { /* [494] */ + { /* [493] */ #if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x600u) /*pFunc=*/ (void (*)(void)) NULL, #else @@ -7662,7 +7647,7 @@ /*func=*/ "subdeviceCtrlCmdVgpuMgrInternalPgpuAddVgpuType" #endif }, - { /* [495] */ + { /* [494] */ #if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x600u) /*pFunc=*/ (void (*)(void)) NULL, #else @@ -7677,7 +7662,7 @@ /*func=*/ "subdeviceCtrlCmdVgpuMgrInternalEnumerateVgpuPerPgpu" #endif }, - { /* [496] */ + { /* [495] */ #if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x600u) /*pFunc=*/ (void (*)(void)) NULL, #else @@ -7692,7 +7677,7 @@ /*func=*/ "subdeviceCtrlCmdVgpuMgrInternalClearGuestVmInfo" #endif }, - { /* [497] */ + { /* [496] */ #if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x600u) /*pFunc=*/ (void (*)(void)) NULL, #else @@ -7707,7 +7692,7 @@ /*func=*/ "subdeviceCtrlCmdVgpuMgrInternalGetVgpuFbUsage" #endif }, - { /* [498] */ + { /* [497] */ #if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x600u) /*pFunc=*/ (void (*)(void)) NULL, #else @@ -7722,7 +7707,7 @@ /*func=*/ "subdeviceCtrlCmdVgpuMgrInternalSetVgpuEncoderCapacity" #endif }, - { /* [499] */ + { /* [498] */ #if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x600u) /*pFunc=*/ (void (*)(void)) NULL, #else @@ -7737,7 +7722,7 @@ /*func=*/ "subdeviceCtrlCmdVgpuMgrInternalCleanupGspVgpuPluginResources" #endif }, - { /* [500] */ + { /* [499] */ #if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x600u) /*pFunc=*/ (void (*)(void)) NULL, #else @@ -7752,7 +7737,7 @@ /*func=*/ "subdeviceCtrlCmdVgpuMgrInternalGetPgpuFsEncoding" #endif }, - { /* [501] */ + { /* [500] */ #if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x600u) /*pFunc=*/ (void (*)(void)) NULL, #else @@ -7767,7 +7752,7 @@ /*func=*/ "subdeviceCtrlCmdVgpuMgrInternalGetPgpuMigrationSupport" #endif }, - { /* [502] */ + { /* [501] */ #if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x600u) /*pFunc=*/ (void (*)(void)) NULL, #else @@ -7782,7 +7767,7 @@ /*func=*/ "subdeviceCtrlCmdVgpuMgrInternalSetVgpuMgrConfig" #endif }, - { /* [503] */ + { /* [502] */ #if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0xa50u) /*pFunc=*/ (void (*)(void)) NULL, #else @@ -7797,7 +7782,7 @@ /*func=*/ "subdeviceCtrlCmdGetAvailableHshubMask" #endif }, - { /* [504] */ + { /* [503] */ #if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x210u) /*pFunc=*/ (void (*)(void)) NULL, #else @@ -7812,7 +7797,7 @@ /*func=*/ "subdeviceCtrlCmdPerfGetGpumonPerfmonUtilSamples" #endif }, - { /* [505] */ + { /* [504] */ #if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x600u) /*pFunc=*/ (void (*)(void)) NULL, #else @@ -7832,7 +7817,7 @@ const struct NVOC_EXPORT_INFO __nvoc_export_info_Subdevice = { - /*numEntries=*/ 506, + /*numEntries=*/ 505, /*pExportEntries=*/ __nvoc_exported_method_def_Subdevice }; @@ -9296,10 +9281,6 @@ #endif #if !NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x210u) - pThis->__subdeviceCtrlCmdEccGetEciCounters__ = &subdeviceCtrlCmdEccGetEciCounters_IMPL; -#endif - -#if !NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x210u) pThis->__subdeviceCtrlCmdEccGetVolatileCounts__ = &subdeviceCtrlCmdEccGetVolatileCounts_IMPL; #endif @@ -9956,6 +9937,8 @@ pThis->__subdeviceControl__ = &__nvoc_thunk_GpuResource_subdeviceControl; pThis->__subdeviceUnmap__ = &__nvoc_thunk_GpuResource_subdeviceUnmap; + + pThis->__subdeviceGetMemInterMapParams__ = &__nvoc_thunk_RmResource_subdeviceGetMemInterMapParams; } static void __nvoc_init_funcTable_Subdevice_3(Subdevice *pThis, RmHalspecOwner *pRmhalspecowner) { @@ -9966,8 +9949,6 @@ PORT_UNREFERENCED_VARIABLE(rmVariantHal); PORT_UNREFERENCED_VARIABLE(rmVariantHal_HalVarIdx); - pThis->__subdeviceGetMemInterMapParams__ = &__nvoc_thunk_RmResource_subdeviceGetMemInterMapParams; - pThis->__subdeviceGetMemoryMappingDescriptor__ = &__nvoc_thunk_RmResource_subdeviceGetMemoryMappingDescriptor; pThis->__subdeviceUnregisterEvent__ = &__nvoc_thunk_Notifier_subdeviceUnregisterEvent; diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/generated/g_subdevice_nvoc.h nvidia-open-gpu-kernel-modules-535.247.01/src/nvidia/generated/g_subdevice_nvoc.h --- nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/generated/g_subdevice_nvoc.h 2024-09-17 16:55:21.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/src/nvidia/generated/g_subdevice_nvoc.h 2025-03-26 06:17:04.000000000 +0000 @@ -469,7 +469,6 @@ NV_STATUS (*__subdeviceCtrlCmdFlcnGetCtxBufferInfo__)(struct Subdevice *, NV2080_CTRL_FLCN_GET_CTX_BUFFER_INFO_PARAMS *); NV_STATUS (*__subdeviceCtrlCmdFlcnGetCtxBufferSize__)(struct Subdevice *, NV2080_CTRL_FLCN_GET_CTX_BUFFER_SIZE_PARAMS *); NV_STATUS (*__subdeviceCtrlCmdEccGetClientExposedCounters__)(struct Subdevice *, NV2080_CTRL_ECC_GET_CLIENT_EXPOSED_COUNTERS_PARAMS *); - NV_STATUS (*__subdeviceCtrlCmdEccGetEciCounters__)(struct Subdevice *, NV2080_CTRL_ECC_GET_ECI_COUNTERS_PARAMS *); NV_STATUS (*__subdeviceCtrlCmdEccGetVolatileCounts__)(struct Subdevice *, NV2080_CTRL_ECC_GET_VOLATILE_COUNTS_PARAMS *); NV_STATUS (*__subdeviceCtrlCmdGpuQueryEccConfiguration__)(struct Subdevice *, NV2080_CTRL_GPU_QUERY_ECC_CONFIGURATION_PARAMS *); NV_STATUS (*__subdeviceCtrlCmdGpuSetEccConfiguration__)(struct Subdevice *, NV2080_CTRL_GPU_SET_ECC_CONFIGURATION_PARAMS *); @@ -1070,7 +1069,6 @@ #define subdeviceCtrlCmdFlcnGetCtxBufferInfo(pSubdevice, pParams) subdeviceCtrlCmdFlcnGetCtxBufferInfo_DISPATCH(pSubdevice, pParams) #define subdeviceCtrlCmdFlcnGetCtxBufferSize(pSubdevice, pParams) subdeviceCtrlCmdFlcnGetCtxBufferSize_DISPATCH(pSubdevice, pParams) #define subdeviceCtrlCmdEccGetClientExposedCounters(pSubdevice, pParams) subdeviceCtrlCmdEccGetClientExposedCounters_DISPATCH(pSubdevice, pParams) -#define subdeviceCtrlCmdEccGetEciCounters(pSubdevice, pParams) subdeviceCtrlCmdEccGetEciCounters_DISPATCH(pSubdevice, pParams) #define subdeviceCtrlCmdEccGetVolatileCounts(pSubdevice, pParams) subdeviceCtrlCmdEccGetVolatileCounts_DISPATCH(pSubdevice, pParams) #define subdeviceCtrlCmdGpuQueryEccConfiguration(pSubdevice, pConfig) subdeviceCtrlCmdGpuQueryEccConfiguration_DISPATCH(pSubdevice, pConfig) #define subdeviceCtrlCmdGpuSetEccConfiguration(pSubdevice, pConfig) subdeviceCtrlCmdGpuSetEccConfiguration_DISPATCH(pSubdevice, pConfig) @@ -3378,12 +3376,6 @@ return pSubdevice->__subdeviceCtrlCmdEccGetClientExposedCounters__(pSubdevice, pParams); } -NV_STATUS subdeviceCtrlCmdEccGetEciCounters_IMPL(struct Subdevice *pSubdevice, NV2080_CTRL_ECC_GET_ECI_COUNTERS_PARAMS *pParams); - -static inline NV_STATUS subdeviceCtrlCmdEccGetEciCounters_DISPATCH(struct Subdevice *pSubdevice, NV2080_CTRL_ECC_GET_ECI_COUNTERS_PARAMS *pParams) { - return pSubdevice->__subdeviceCtrlCmdEccGetEciCounters__(pSubdevice, pParams); -} - NV_STATUS subdeviceCtrlCmdEccGetVolatileCounts_IMPL(struct Subdevice *pSubdevice, NV2080_CTRL_ECC_GET_VOLATILE_COUNTS_PARAMS *pParams); static inline NV_STATUS subdeviceCtrlCmdEccGetVolatileCounts_DISPATCH(struct Subdevice *pSubdevice, NV2080_CTRL_ECC_GET_VOLATILE_COUNTS_PARAMS *pParams) { diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/inc/libraries/nvport/string.h nvidia-open-gpu-kernel-modules-535.247.01/src/nvidia/inc/libraries/nvport/string.h --- nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/inc/libraries/nvport/string.h 2024-09-17 16:46:11.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/src/nvidia/inc/libraries/nvport/string.h 2025-03-26 06:12:28.000000000 +0000 @@ -49,10 +49,8 @@ /** * @brief Compare two strings, character by character. * - * Will only compare lengthBytes bytes. Strings are assumed to be at least that - * long. - * - * Strings are allowed to overlap, but in . + * Will compare the first 'length' chars of each string, or until + * the nul-terminator is reached in either string, whichever comes first. * * @returns: * - 0 if all bytes are equal diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/inc/libraries/resserv/rs_resource.h nvidia-open-gpu-kernel-modules-535.247.01/src/nvidia/inc/libraries/resserv/rs_resource.h --- nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/inc/libraries/resserv/rs_resource.h 2024-09-17 16:46:13.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/src/nvidia/inc/libraries/resserv/rs_resource.h 2025-03-26 06:12:29.000000000 +0000 @@ -62,15 +62,16 @@ */ struct RS_LOCK_INFO { - RsClient *pClient; ///< Pointer to client that was locked (if any) - RsClient *pSecondClient; ///< Pointer to second client, for dual-client locking - RsResourceRef *pContextRef; ///< User-defined reference - RsSession *pSession; ///< Session object to be locked, if any - NvU32 flags; ///< RS_LOCK_FLAGS_* - NvU32 state; ///< RS_LOCK_STATE_* + RsClient *pClient; ///< Pointer to client that was locked (if any) + RsClient *pSecondClient; ///< Pointer to second client, for dual-client locking + RsResourceRef *pContextRef; ///< User-defined reference + RsResourceRef *pResRefToBackRef; ///< Resource from which to infer indirect GPU dependencies + RsSession *pSession; ///< Session object to be locked, if any + NvU32 flags; ///< RS_LOCK_FLAGS_* + NvU32 state; ///< RS_LOCK_STATE_* NvU32 gpuMask; - NvU8 traceOp; ///< RS_LOCK_TRACE_* operation for lock-metering - NvU32 traceClassId; ///< Class of initial resource that was locked for lock metering + NvU8 traceOp; ///< RS_LOCK_TRACE_* operation for lock-metering + NvU32 traceClassId; ///< Class of initial resource that was locked for lock metering }; struct RS_RES_ALLOC_PARAMS_INTERNAL diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/src/kernel/diagnostics/journal.c nvidia-open-gpu-kernel-modules-535.247.01/src/nvidia/src/kernel/diagnostics/journal.c --- nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/src/kernel/diagnostics/journal.c 2024-09-17 16:46:20.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/src/nvidia/src/kernel/diagnostics/journal.c 2025-03-26 06:12:33.000000000 +0000 @@ -1817,6 +1817,33 @@ } break; } + case RmRcDiagReport: + { + RmRcDiag_RECORD* pRecord = (RmRcDiag_RECORD*) &pDclRecord[1]; + OBJGPU *pGpu = gpumgrGetGpuFromId(pDclRecord->GPUTag); + + // open an RC Diagnostic record in the Proto Bufffer + NV_CHECK_OK(nvStatus, LEVEL_ERROR, + prbEncNestedStart(pPrbEnc, DCL_DCLMSG_RC_DIAG_RECS)); + if (nvStatus == NV_OK) + { + prbEncAddUInt32(pPrbEnc, RC_RCDIAGRECORD_RECORD_ID, pRecord->idx); + prbEncAddUInt32(pPrbEnc, RC_RCDIAGRECORD_RECORD_TYPE, pRecord->type); + if (NULL != pGpu) + { + NvU32 i; + for (i = 0; i < pRecord->count; ++i) + { + if (NV0000_CTRL_CMD_NVD_RCERR_RPT_REG_MAX_PSEDO_REG < pRecord->data[i].tag) + { + prbEncGpuRegImm(pGpu, pRecord->data[i].offset, pRecord->data[i].value, pPrbEnc, RC_RCDIAGRECORD_REGS); + } + } + } + NV_CHECK_OK(nvStatus, LEVEL_ERROR, prbEncNestedEnd(pPrbEnc)); + } + break; + } case RmPrbErrorInfo_V2: case RmPrbFullDump_V2: { diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/src/kernel/gpu/fifo/kernel_channel_group_api.c nvidia-open-gpu-kernel-modules-535.247.01/src/nvidia/src/kernel/gpu/fifo/kernel_channel_group_api.c --- nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/src/kernel/gpu/fifo/kernel_channel_group_api.c 2024-09-17 16:46:33.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/src/nvidia/src/kernel/gpu/fifo/kernel_channel_group_api.c 2025-03-26 06:12:40.000000000 +0000 @@ -232,7 +232,9 @@ // vGpu plugin context flag should only be set on host if context is plugin if (gpuIsSriovEnabled(pGpu)) + { pKernelChannelGroup->bIsCallingContextVgpuPlugin = pAllocParams->bIsCallingContextVgpuPlugin; + } if (pKernelChannelGroup->bIsCallingContextVgpuPlugin) gfid = GPU_GFID_PF; diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/src/kernel/gpu/fifo/kernel_fifo.c nvidia-open-gpu-kernel-modules-535.247.01/src/nvidia/src/kernel/gpu/fifo/kernel_fifo.c --- nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/src/kernel/gpu/fifo/kernel_fifo.c 2024-09-17 16:46:33.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/src/nvidia/src/kernel/gpu/fifo/kernel_fifo.c 2025-03-26 06:12:40.000000000 +0000 @@ -930,13 +930,26 @@ if (IS_GFID_VF(gfid)) { - NV_ASSERT_OR_RETURN(pChidMgr->ppVirtualChIDHeap[gfid] != NULL, NV_ERR_INVALID_STATE); - NV_ASSERT_OK(pChidMgr->ppVirtualChIDHeap[gfid]->eheapFree(pChidMgr->ppVirtualChIDHeap[gfid], ChID)); + // + // ppVirtualChIDHeap is freed during hostvgpudeviceapiDestruct in GSP-RM. + // In the case of a GSP-Plugin crash after running the VF doorbell fuzzer, only the hostvgpudeviceapi object is freed in GSP-RM. + // Other resources are cleaned up when shutting down the VM. + // + if (pChidMgr->ppVirtualChIDHeap[gfid] != NULL) + { + NV_ASSERT_OK(pChidMgr->ppVirtualChIDHeap[gfid]->eheapFree(pChidMgr->ppVirtualChIDHeap[gfid], ChID)); + } } else { - NV_ASSERT_OR_RETURN(pChidMgr->pGlobalChIDHeap != NULL, NV_ERR_INVALID_STATE); - NV_ASSERT_OK(pChidMgr->pGlobalChIDHeap->eheapFree(pChidMgr->pGlobalChIDHeap, ChID)); + if (pChidMgr->pGlobalChIDHeap != NULL) + { + NV_ASSERT_OK(pChidMgr->pGlobalChIDHeap->eheapFree(pChidMgr->pGlobalChIDHeap, ChID)); + } + else + { + NV_ASSERT(pChidMgr->pGlobalChIDHeap != NULL); + } } NV_ASSERT_OR_RETURN(pChidMgr->pFifoDataHeap != NULL, NV_ERR_INVALID_STATE); @@ -1322,7 +1335,6 @@ return NV_OK; } - /** * @brief Releases a hardware channel group ID. * diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/src/kernel/gpu/gsp/kernel_gsp.c nvidia-open-gpu-kernel-modules-535.247.01/src/nvidia/src/kernel/gpu/gsp/kernel_gsp.c --- nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/src/kernel/gpu/gsp/kernel_gsp.c 2024-09-17 16:46:39.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/src/nvidia/src/kernel/gpu/gsp/kernel_gsp.c 2025-03-26 06:12:43.000000000 +0000 @@ -514,6 +514,67 @@ NV_ERR_INVALID_CHANNEL); } + // Add the RcDiag records we received from GSP-RM to our system wide journal + { + OBJSYS *pSys = SYS_GET_INSTANCE(); + Journal *pRcDB = SYS_GET_RCDB(pSys); + RmClient *pClient; + + NvU32 recordSize = rcdbGetOcaRecordSizeWithHeader(pRcDB, RmRcDiagReport); + NvU32 rcDiagRecStart = pRcDB->RcErrRptNextIdx; + NvU32 rcDiagRecEnd; + NvU32 processId = 0; + NvU32 owner = RCDB_RCDIAG_DEFAULT_OWNER; + + if (pKernelChannel != NULL) + { + pClient = dynamicCast(RES_GET_CLIENT(pKernelChannel), RmClient); + NV_ASSERT(pClient != NULL); + if (pClient != NULL) + processId = pClient->ProcID; + } + + for (NvU32 i = 0; i < rpc_params->rcJournalBufferSize / recordSize; i++) + { + RmRCCommonJournal_RECORD *pCommonRecord = + (RmRCCommonJournal_RECORD *)((NvU8*)&rpc_params->rcJournalBuffer + i * recordSize); + RmRcDiag_RECORD *pRcDiagRecord = + (RmRcDiag_RECORD *)&pCommonRecord[1]; + +#if defined(DEBUG) + NV_PRINTF(LEVEL_INFO, "%d: GPUTag=0x%x CPUTag=0x%llx timestamp=0x%llx stateMask=0x%llx\n", + i, pCommonRecord->GPUTag, pCommonRecord->CPUTag, pCommonRecord->timeStamp, + pCommonRecord->stateMask); + NV_PRINTF(LEVEL_INFO, " idx=%d timeStamp=0x%x type=0x%x flags=0x%x count=%d owner=0x%x processId=0x%x\n", + pRcDiagRecord->idx, pRcDiagRecord->timeStamp, pRcDiagRecord->type, pRcDiagRecord->flags, + pRcDiagRecord->count, pRcDiagRecord->owner, processId); + for (NvU32 j = 0; j < pRcDiagRecord->count; j++) + { + NV_PRINTF(LEVEL_INFO, " %d: offset=0x08%x tag=0x08%x value=0x08%x attribute=0x08%x\n", + j, pRcDiagRecord->data[j].offset, pRcDiagRecord->data[j].tag, + pRcDiagRecord->data[j].value, pRcDiagRecord->data[j].attribute); + } +#endif + if (rcdbAddRcDiagRecFromGsp(pGpu, pRcDB, pCommonRecord, pRcDiagRecord) == NULL) + { + NV_PRINTF(LEVEL_WARNING, "Lost RC diagnostic record coming from GPU%d GSP: type=0x%x stateMask=0x%llx\n", + gpuGetInstance(pGpu), pRcDiagRecord->type, pCommonRecord->stateMask); + } + } + + rcDiagRecEnd = pRcDB->RcErrRptNextIdx - 1; + + // Update records to have the correct PID associated with the channel + if (rcDiagRecStart != rcDiagRecEnd) + { + rcdbUpdateRcDiagRecContext(pRcDB, + rcDiagRecStart, + rcDiagRecEnd, + processId, + owner); + } + } + // With CC enabled, CPU-RM needs to write error notifiers if (gpuIsCCFeatureEnabled(pGpu) && pKernelChannel != NULL) { diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/src/kernel/gpu/intr/intr.c nvidia-open-gpu-kernel-modules-535.247.01/src/nvidia/src/kernel/gpu/intr/intr.c --- nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/src/kernel/gpu/intr/intr.c 2024-09-17 16:46:40.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/src/nvidia/src/kernel/gpu/intr/intr.c 2025-03-26 06:12:45.000000000 +0000 @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining a @@ -55,6 +55,7 @@ } stuckIntr[MC_ENGINE_IDX_MAX]; static NvBool _intrServiceStallExactList(OBJGPU *pGpu, Intr *pIntr, MC_ENGINE_BITVECTOR *pEngines); +static void _intrLogLongRunningInterrupts(Intr *pIntr); static void _intrInitServiceTable(OBJGPU *pGpu, Intr *pIntr); @@ -141,6 +142,8 @@ intrProcessDPCQueue_HAL(pGpu, pIntr); } + _intrLogLongRunningInterrupts(pIntr); + exit: return; } @@ -1067,6 +1070,7 @@ IntrService *pIntrService = pIntr->intrServiceTable[engineIdx].pInterruptService; NvU32 ret = 0; NvBool bShouldService; + NvU64 intrTiming, intrTiming2; IntrServiceClearInterruptArguments clearParams = {engineIdx}; IntrServiceServiceInterruptArguments serviceParams = {engineIdx}; @@ -1088,7 +1092,18 @@ if (bShouldService) { + osGetPerformanceCounter(&intrTiming); + ret = intrservServiceInterrupt(pGpu, pIntrService, &serviceParams); + + osGetPerformanceCounter(&intrTiming2); + intrTiming = intrTiming2 - intrTiming; + if (intrTiming > LONG_INTR_LOG_LENGTH_NS) + { + pIntr->longIntrStats[engineIdx].intrCount++; + if (intrTiming > pIntr->longIntrStats[engineIdx].intrLength) + pIntr->longIntrStats[engineIdx].intrLength = intrTiming; + } } return ret; } @@ -1401,6 +1416,29 @@ } } +static void +_intrLogLongRunningInterrupts(Intr *pIntr) +{ + NvU64 now; + osGetPerformanceCounter(&now); + + for (NvU32 i = 0; i < MC_ENGINE_IDX_MAX; ++i) + { + if (pIntr->longIntrStats[i].intrCount > 0) + { + if (now - pIntr->longIntrStats[i].lastPrintTime > LONG_INTR_LOG_RATELIMIT_NS) + { + NV_PRINTF(LEVEL_WARNING, "%u long-running interrupts (%llu ns or slower) from engine %u, longest taking %llu ns\n", + pIntr->longIntrStats[i].intrCount, LONG_INTR_LOG_LENGTH_NS, i, pIntr->longIntrStats[i].intrLength); + + pIntr->longIntrStats[i].intrCount = 0; + pIntr->longIntrStats[i].intrLength = 0; + pIntr->longIntrStats[i].lastPrintTime = now; + } + } + } +} + static NvBool _intrServiceStallExactList ( @@ -1608,6 +1646,9 @@ // allow the isr to come in. _intrExitCriticalSection(pGpu, pIntr, &intrMaskCtx); + // Delay prints until after exiting critical sections to save perf impact + _intrLogLongRunningInterrupts(pIntr); + NV_ASSERT_OK(resservRestoreTlsCallContext(pOldContext)); } diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/src/kernel/gpu/subdevice/subdevice_ctrl_gpu_kernel.c nvidia-open-gpu-kernel-modules-535.247.01/src/nvidia/src/kernel/gpu/subdevice/subdevice_ctrl_gpu_kernel.c --- nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/src/kernel/gpu/subdevice/subdevice_ctrl_gpu_kernel.c 2024-09-17 16:46:57.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/src/nvidia/src/kernel/gpu/subdevice/subdevice_ctrl_gpu_kernel.c 2025-03-26 06:12:54.000000000 +0000 @@ -1355,6 +1355,47 @@ return NV_OK; } +#if (defined(DEBUG) || defined(DEVELOP)) +// +// subdeviceCtrlCmdGpuGetTpcReconfigMask +// +// Lock Requirements: +// Assert that API lock and GPUs lock held on entry +// +NV_STATUS +subdeviceCtrlCmdGpuGetTpcReconfigMask_IMPL +( + Subdevice *pSubdevice, + NV2080_CTRL_GPU_GET_TPC_RECONFIG_MASK_PARAMS *pParams +) +{ + OBJGPU *pGpu = GPU_RES_GET_GPU(pSubdevice); + RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu); + NvHandle hClient = RES_GET_CLIENT_HANDLE(pSubdevice); + NvHandle hSubdevice = RES_GET_HANDLE(pSubdevice); + + NV2080_CTRL_GR_GET_TPC_RECONFIG_MASK_PARAMS tpcReconfigMaskParams; + + NV_ASSERT_OR_RETURN(rmapiLockIsOwner() && rmDeviceGpuLockIsOwner(GPU_RES_GET_GPU(pSubdevice)->gpuInstance), + NV_ERR_INVALID_LOCK_STATE); + + portMemSet(&tpcReconfigMaskParams, 0, sizeof(tpcReconfigMaskParams)); + tpcReconfigMaskParams.gpc = pParams->gpc; + + NV_CHECK_OK_OR_RETURN( + LEVEL_ERROR, + pRmApi->Control(pRmApi, + hClient, + hSubdevice, + NV2080_CTRL_CMD_GR_GET_TPC_RECONFIG_MASK, + &tpcReconfigMaskParams, + sizeof(tpcReconfigMaskParams))); + + pParams->tpcReconfigMask = tpcReconfigMaskParams.tpcReconfigMask; + return NV_OK; +} +#endif // defined(DEBUG) || defined(DEVELOP) + // // subdeviceCtrlCmdGpuGetFermiZcullInfo // diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/src/kernel/mem_mgr/mem.c nvidia-open-gpu-kernel-modules-535.247.01/src/nvidia/src/kernel/mem_mgr/mem.c --- nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/src/kernel/mem_mgr/mem.c 2024-09-17 16:47:01.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/src/nvidia/src/kernel/mem_mgr/mem.c 2025-03-26 06:12:57.000000000 +0000 @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2018-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2018-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining a diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/src/kernel/platform/chipset/chipset_info.c nvidia-open-gpu-kernel-modules-535.247.01/src/nvidia/src/kernel/platform/chipset/chipset_info.c --- nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/src/kernel/platform/chipset/chipset_info.c 2024-09-17 16:47:04.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/src/nvidia/src/kernel/platform/chipset/chipset_info.c 2025-03-26 06:12:59.000000000 +0000 @@ -904,6 +904,17 @@ } static NV_STATUS +Intel_1B81_setupFunc +( + OBJCL *pCl +) +{ + pCl->setProperty(pCl, PDB_PROP_CL_RELAXED_ORDERING_NOT_CAPABLE, NV_TRUE); + + return NV_OK; +} + +static NV_STATUS Nvidia_T210_setupFunc ( OBJCL *pCl diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/src/kernel/platform/chipset/chipset_pcie.c nvidia-open-gpu-kernel-modules-535.247.01/src/nvidia/src/kernel/platform/chipset/chipset_pcie.c --- nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/src/kernel/platform/chipset/chipset_pcie.c 2024-09-17 16:47:04.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/src/nvidia/src/kernel/platform/chipset/chipset_pcie.c 2025-03-26 06:12:59.000000000 +0000 @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2000-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2000-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining a diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/src/kernel/rmapi/alloc_free.c nvidia-open-gpu-kernel-modules-535.247.01/src/nvidia/src/kernel/rmapi/alloc_free.c --- nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/src/kernel/rmapi/alloc_free.c 2024-09-17 16:47:06.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/src/nvidia/src/kernel/rmapi/alloc_free.c 2025-03-26 06:13:00.000000000 +0000 @@ -328,6 +328,36 @@ } } +static NvU32 +_resGetBackRefGpusMask(RsResourceRef *pResourceRef) +{ + NvU32 gpuMask = 0x0; + RS_INTER_MAPPING_BACK_REF *pBackRefItem; + + if (pResourceRef == NULL) + { + return 0x0; + } + + pBackRefItem = listHead(&pResourceRef->interBackRefs); + while (pBackRefItem != NULL) + { + RsInterMapping *pMapping = pBackRefItem->pMapping; + RsResourceRef *pDeviceRef = pMapping->pContextRef; + GpuResource *pGpuResource = dynamicCast(pDeviceRef->pResource, GpuResource); + + if (pGpuResource != NULL) + { + OBJGPU *pGpu = GPU_RES_GET_GPU(pGpuResource); + gpuMask |= gpumgrGetGpuMask(pGpu); + } + + pBackRefItem = listNext(&pResourceRef->interBackRefs, pBackRefItem); + } + + return gpuMask; +} + NV_STATUS serverResLock_Prologue ( @@ -445,8 +475,15 @@ } else { - status = rmGpuGroupLockAcquire(pParentGpu->gpuInstance, - GPU_LOCK_GRP_DEVICE, + // + // Lock the parent GPU and if specified any GPUs that resource + // may backreference via mappings. + // + pLockInfo->gpuMask = gpumgrGetGpuMask(pParentGpu) | + _resGetBackRefGpusMask(pLockInfo->pResRefToBackRef); + + status = rmGpuGroupLockAcquire(0, + GPU_LOCK_GRP_MASK, GPUS_LOCK_FLAGS_NONE, RM_LOCK_MODULES_CLIENT, &pLockInfo->gpuMask); diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/src/kernel/virtualization/kernel_hostvgpudeviceapi.c nvidia-open-gpu-kernel-modules-535.247.01/src/nvidia/src/kernel/virtualization/kernel_hostvgpudeviceapi.c --- nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/src/kernel/virtualization/kernel_hostvgpudeviceapi.c 2024-09-17 16:47:10.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/src/nvidia/src/kernel/virtualization/kernel_hostvgpudeviceapi.c 2025-03-26 06:13:03.000000000 +0000 @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining a diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/src/libraries/nvport/string/string_generic.c nvidia-open-gpu-kernel-modules-535.247.01/src/nvidia/src/libraries/nvport/string/string_generic.c --- nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/src/libraries/nvport/string/string_generic.c 2024-09-17 16:47:26.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/src/nvidia/src/libraries/nvport/string/string_generic.c 2025-03-26 06:13:11.000000000 +0000 @@ -29,6 +29,7 @@ #include "nvport/nvport.h" #include "nvmisc.h" + #ifndef NVPORT_STRING_DONT_DEFINE_portStringLength NvLength portStringLength @@ -75,18 +76,34 @@ NvLength maxLength ) { - NvLength length; + NvLength i; PORT_ASSERT_CHECKED(str1 != NULL); PORT_ASSERT_CHECKED(str2 != NULL); - length = portStringLengthSafe(str1, maxLength); - - // Add 1 for the null terminator. - if (length < maxLength) - length++; + for (i = 0; i < maxLength; i++) + { + if (str1[i] != str2[i]) + { + // + // Cast to unsigned before assigning to NvS32, to avoid sign + // extension. E.g., if str1[i] is 0xff, we want s1 to contain + // 0xff, not -1. In practice, this shouldn't matter for printable + // characters, but still... + // + NvS32 s1 = (unsigned char)str1[i]; + NvS32 s2 = (unsigned char)str2[i]; + return s1 - s2; + } + + if ((str1[i] == '\0') && + (str2[i] == '\0')) + { + break; + } + } - return portMemCmp(str1, str2, length); + return 0; } #endif diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/src/libraries/resserv/src/rs_server.c nvidia-open-gpu-kernel-modules-535.247.01/src/nvidia/src/libraries/resserv/src/rs_server.c --- nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/src/libraries/resserv/src/rs_server.c 2024-09-17 16:47:28.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/src/nvidia/src/libraries/resserv/src/rs_server.c 2025-03-26 06:13:13.000000000 +0000 @@ -145,6 +145,7 @@ return status; pLockInfo->flags |= RS_LOCK_FLAGS_FREE_SESSION_LOCK; + pLockInfo->pResRefToBackRef = pResourceRef; pLockInfo->traceOp = RS_LOCK_TRACE_FREE; pLockInfo->traceClassId = pResourceRef->externalClassId; status = serverResLock_Prologue(pServer, LOCK_ACCESS_WRITE, pLockInfo, &releaseFlags); diff -Nru nvidia-open-gpu-kernel-modules-535.216.01/version.mk nvidia-open-gpu-kernel-modules-535.247.01/version.mk --- nvidia-open-gpu-kernel-modules-535.216.01/version.mk 2024-09-17 18:01:14.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.247.01/version.mk 2025-03-26 13:46:48.000000000 +0000 @@ -1,4 +1,4 @@ -NVIDIA_VERSION = 535.216.01 +NVIDIA_VERSION = 535.247.01 # This file. VERSION_MK_FILE := $(lastword $(MAKEFILE_LIST))