Version in base suite: 535.183.01-1~deb12u1 Base version: nvidia-open-gpu-kernel-modules_535.183.01-1~deb12u1 Target version: nvidia-open-gpu-kernel-modules_535.216.01-1~deb12u1 Base file: /srv/ftp-master.debian.org/ftp/pool/contrib/n/nvidia-open-gpu-kernel-modules/nvidia-open-gpu-kernel-modules_535.183.01-1~deb12u1.dsc Target file: /srv/ftp-master.debian.org/policy/pool/contrib/n/nvidia-open-gpu-kernel-modules/nvidia-open-gpu-kernel-modules_535.216.01-1~deb12u1.dsc CHANGELOG.md | 12 README.md | 8 debian/.gitignore | 1 debian/bug-script | 4 debian/changelog | 30 debian/control | 5 debian/patches/module/0002-conftest.sh-remove-empty-lines-from-uts_release-outp.patch | 8 debian/patches/module/0034-fix-typos.patch | 10 debian/patches/module/0037-import-pfn_valid-w-o-GPL-rcu_read_lock-unlock-from-v.patch | 91 + debian/patches/module/0042-Log-an-error-message-when-nv_mem_client_init-fails-d.patch | 29 debian/patches/module/0045-let-the-virt_addr_valid-macro-use-nv_pfn_valid-on-pp.patch | 37 debian/patches/module/0046-backport-nv_get_kern_phys_address-changes-from-555.4.patch | 25 debian/patches/module/bashisms.patch | 2 debian/patches/module/cc_version_check-gcc5.patch | 2 debian/patches/module/conftest-prefer-arch-headers.patch | 2 debian/patches/module/conftest-verbose.patch | 3 debian/patches/module/fragile-ARCH.patch | 6 debian/patches/module/series | 18 debian/patches/module/series.in | 14 debian/patches/module/use-kbuild-compiler.patch | 2 debian/patches/module/use-kbuild-flags.patch | 2 debian/rules.defs | 3 debian/sync.sh | 2 kernel-open/Kbuild | 2 kernel-open/Makefile | 8 kernel-open/common/inc/nv-hypervisor.h | 24 kernel-open/common/inc/nv-linux.h | 16 kernel-open/common/inc/nv.h | 9 kernel-open/conftest.sh | 69 kernel-open/nvidia-drm/nvidia-drm-drv.c | 4 kernel-open/nvidia-drm/nvidia-drm.Kbuild | 1 kernel-open/nvidia-modeset/nv-kthread-q.c | 4 kernel-open/nvidia-modeset/nvidia-modeset-linux.c | 6 kernel-open/nvidia-uvm/nv-kthread-q-selftest.c | 4 kernel-open/nvidia-uvm/nv-kthread-q.c | 4 kernel-open/nvidia-uvm/uvm_channel.c | 24 kernel-open/nvidia-uvm/uvm_common.h | 6 kernel-open/nvidia-uvm/uvm_gpu_access_counters.c | 16 kernel-open/nvidia-uvm/uvm_gpu_replayable_faults.c | 14 kernel-open/nvidia/linux_nvswitch.c | 3 kernel-open/nvidia/nv-caps.c | 2 kernel-open/nvidia/nv-dma.c | 4 kernel-open/nvidia/nv-ibmnpu.c | 4 kernel-open/nvidia/nv-kthread-q.c | 4 kernel-open/nvidia/nv-mmap.c | 4 kernel-open/nvidia/nv-pci.c | 27 kernel-open/nvidia/nv-procfs.c | 4 kernel-open/nvidia/nv.c | 6 kernel-open/nvidia/nvidia.Kbuild | 3 kernel-open/nvidia/nvlink_linux.c | 1 kernel-open/nvidia/os-interface.c | 2 kernel-open/nvidia/os-mlock.c | 24 src/common/displayport/inc/dp_configcaps.h | 2 src/common/displayport/inc/dp_connector.h | 3 src/common/displayport/inc/dp_connectorimpl.h | 8 src/common/displayport/inc/dp_deviceimpl.h | 2 src/common/displayport/src/dp_configcaps.cpp | 4 src/common/displayport/src/dp_connectorimpl.cpp | 2 src/common/displayport/src/dp_deviceimpl.cpp | 9 src/common/displayport/src/dp_wardatabase.cpp | 12 src/common/inc/nvBldVer.h | 20 src/common/inc/nvUnixVersion.h | 2 src/common/inc/swref/published/ampere/ga102/dev_falcon_v4.h | 2 src/common/nvlink/interface/nvlink.h | 3 src/common/nvlink/kernel/nvlink/core/nvlink_discovery.c | 5 src/common/nvlink/kernel/nvlink/interface/nvlink_ioctl_entry.c | 2 src/common/nvlink/kernel/nvlink/interface/nvlink_kern_discovery_entry.c | 6 src/common/nvlink/kernel/nvlink/nvlink_helper.h | 3 src/common/nvswitch/kernel/lr10/lr10.c | 4 src/common/sdk/nvidia/inc/class/cl0000_notification.h | 2 src/common/sdk/nvidia/inc/class/cla084.h | 1 src/common/sdk/nvidia/inc/ctrl/ctrl0000/ctrl0000vgpu.h | 149 +- src/common/sdk/nvidia/inc/ctrl/ctrl2080/ctrl2080nvlink.h | 89 + src/common/sdk/nvidia/inc/ctrl/ctrl208f/ctrl208ffb.h | 27 src/common/sdk/nvidia/inc/ctrl/ctrla081.h | 118 + src/common/sdk/nvidia/inc/nv-hypervisor.h | 24 src/common/shared/inc/nvdevid.h | 3 src/common/uproc/os/common/include/liblogdecode.h | 3 src/common/uproc/os/libos-v3.1.0/lib/liblogdecode.c | 28 src/nvidia-modeset/src/nvkms-headsurface-ioctl.c | 4 src/nvidia/arch/nvalloc/common/inc/nvcst.h | 6 src/nvidia/arch/nvalloc/unix/include/nv.h | 9 src/nvidia/arch/nvalloc/unix/src/os-hypervisor.c | 696 +++------- src/nvidia/arch/nvalloc/unix/src/osapi.c | 11 src/nvidia/arch/nvalloc/unix/src/osinit.c | 8 src/nvidia/exports_link_command.txt | 4 src/nvidia/generated/g_client_resource_nvoc.c | 22 src/nvidia/generated/g_client_resource_nvoc.h | 16 src/nvidia/generated/g_event_nvoc.h | 2 src/nvidia/generated/g_kernel_gsp_nvoc.c | 10 src/nvidia/generated/g_kernel_gsp_nvoc.h | 33 src/nvidia/generated/g_kernel_nvlink_nvoc.h | 1 src/nvidia/generated/g_kernel_vgpu_mgr_nvoc.h | 20 src/nvidia/generated/g_mem_nvoc.h | 1 src/nvidia/generated/g_os_nvoc.h | 7 src/nvidia/generated/g_rpc-structures.h | 8 src/nvidia/generated/g_subdevice_diag_nvoc.h | 2 src/nvidia/generated/g_subdevice_nvoc.c | 138 + src/nvidia/generated/g_subdevice_nvoc.h | 16 src/nvidia/generated/g_vgpuconfigapi_nvoc.c | 60 src/nvidia/generated/g_vgpuconfigapi_nvoc.h | 16 src/nvidia/inc/kernel/gpu/falcon/falcon_common.h | 10 src/nvidia/inc/libraries/utils/nvprintf.h | 2 src/nvidia/interface/nvrm_registry.h | 14 src/nvidia/kernel/inc/nvpcf.h | 3 src/nvidia/src/kernel/gpu/fifo/kernel_channel_group_api.c | 11 src/nvidia/src/kernel/gpu/fifo/kernel_fifo.c | 12 src/nvidia/src/kernel/gpu/gpu_suspend.c | 15 src/nvidia/src/kernel/gpu/gr/arch/turing/kgraphics_tu102.c | 5 src/nvidia/src/kernel/gpu/gr/kernel_graphics.c | 2 src/nvidia/src/kernel/gpu/gsp/arch/ampere/kernel_gsp_falcon_ga102.c | 93 + src/nvidia/src/kernel/gpu/gsp/arch/turing/kernel_gsp_tu102.c | 2 src/nvidia/src/kernel/gpu/gsp/kernel_gsp.c | 271 ++- src/nvidia/src/kernel/gpu/nvlink/arch/volta/kernel_nvlink_gv100.c | 3 src/nvidia/src/kernel/gpu/nvlink/common_nvlinkapi.c | 4 src/nvidia/src/kernel/gpu/nvlink/kernel_nvlink.c | 3 src/nvidia/src/kernel/gpu/nvlink/kernel_nvlinkcorelibtrain.c | 46 src/nvidia/src/kernel/gpu/rc/kernel_rc_notification.c | 12 src/nvidia/src/kernel/gpu/subdevice/subdevice_ctrl_event_kernel.c | 1 src/nvidia/src/kernel/rmapi/client_resource.c | 38 src/nvidia/src/kernel/rmapi/event.c | 8 src/nvidia/src/kernel/virtualization/kernel_vgpu_mgr.c | 19 src/nvidia/src/kernel/virtualization/vgpuconfigapi.c | 21 version.mk | 2 124 files changed, 1715 insertions(+), 1120 deletions(-) diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/CHANGELOG.md nvidia-open-gpu-kernel-modules-535.216.01/CHANGELOG.md --- nvidia-open-gpu-kernel-modules-535.183.01/CHANGELOG.md 2024-05-12 20:29:36.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/CHANGELOG.md 2024-09-17 18:01:14.000000000 +0000 @@ -2,6 +2,18 @@ ## Release 535 Entries +### [535.179] 2024-05-09 + +### [535.171.04] 2024-03-21 + +### [535.161.08] 2024-03-18 + +### [535.161.07] 2024-02-22 + +### [535.154.05] 2024-01-16 + +### [535.146.02] 2023-12-07 + ### [535.129.03] 2023-10-31 ### [535.113.01] 2023-09-21 diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/README.md nvidia-open-gpu-kernel-modules-535.216.01/README.md --- nvidia-open-gpu-kernel-modules-535.183.01/README.md 2024-05-12 20:29:37.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/README.md 2024-09-17 18:01:16.000000000 +0000 @@ -1,7 +1,7 @@ # NVIDIA Linux Open GPU Kernel Module Source This is the source release of the NVIDIA Linux open GPU kernel modules, -version 535.183.01. +version 535.216.01. ## How to Build @@ -17,7 +17,7 @@ Note that the kernel modules built here must be used with GSP firmware and user-space NVIDIA GPU driver components from a corresponding -535.183.01 driver release. This can be achieved by installing +535.216.01 driver release. This can be achieved by installing the NVIDIA GPU driver from the .run file using the `--no-kernel-modules` option. E.g., @@ -180,7 +180,7 @@ ## Compatible GPUs The open-gpu-kernel-modules can be used on any Turing or later GPU -(see the table below). However, in the 535.183.01 release, +(see the table below). However, in the 535.216.01 release, GeForce and Workstation support is still considered alpha-quality. To enable use of the open kernel modules on GeForce and Workstation GPUs, @@ -188,7 +188,7 @@ parameter to 1. For more details, see the NVIDIA GPU driver end user README here: -https://us.download.nvidia.com/XFree86/Linux-x86_64/535.183.01/README/kernel_open.html +https://us.download.nvidia.com/XFree86/Linux-x86_64/535.216.01/README/kernel_open.html In the below table, if three IDs are listed, the first is the PCI Device ID, the second is the PCI Subsystem Vendor ID, and the third is the PCI diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/debian/.gitignore nvidia-open-gpu-kernel-modules-535.216.01/debian/.gitignore --- nvidia-open-gpu-kernel-modules-535.183.01/debian/.gitignore 1970-01-01 00:00:00.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/debian/.gitignore 2024-11-20 09:22:33.000000000 +0000 @@ -0,0 +1 @@ +/patches/*~ diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/debian/bug-script nvidia-open-gpu-kernel-modules-535.216.01/debian/bug-script --- nvidia-open-gpu-kernel-modules-535.183.01/debian/bug-script 2024-06-19 16:17:12.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/debian/bug-script 2024-11-20 09:22:33.000000000 +0000 @@ -155,4 +155,8 @@ grep -H . /etc/OpenCL/vendors/* 2>/dev/null echo +echo "APT sources:" +apt-cache policy +echo + exit 0 diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/debian/changelog nvidia-open-gpu-kernel-modules-535.216.01/debian/changelog --- nvidia-open-gpu-kernel-modules-535.183.01/debian/changelog 2024-06-19 16:17:12.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/debian/changelog 2024-11-20 09:22:33.000000000 +0000 @@ -1,3 +1,33 @@ +nvidia-open-gpu-kernel-modules (535.216.01-1~deb12u1) bookworm; urgency=medium + + * Rebuild for bookworm. + + -- Andreas Beckmann Wed, 20 Nov 2024 10:22:33 +0100 + +nvidia-open-gpu-kernel-modules (535.216.01-1) unstable; urgency=medium + + * New upstream LTS and Tesla branch release 535.216.01 (2024-10-22). + * Fixed CVE-2024-0126. (Closes: #1085976) + https://nvidia.custhelp.com/app/answers/detail/a_id/5586 + * Sync with src:nvidia-graphics-drivers. + + -- Andreas Beckmann Wed, 13 Nov 2024 20:39:29 +0100 + +nvidia-open-gpu-kernel-modules (535.183.06-2) unstable; urgency=medium + + * Sync with src:nvidia-graphics-drivers. + + -- Andreas Beckmann Fri, 01 Nov 2024 21:03:38 +0100 + +nvidia-open-gpu-kernel-modules (535.183.06-1) unstable; urgency=medium + + * New upstream Tesla branch release 535.183.06 (2024-07-09). + * Sync with src:nvidia-graphics-drivers. + * Refresh patches. + * Bump Standards-Version to 4.7.0. No changes needed. + + -- Andreas Beckmann Mon, 12 Aug 2024 10:55:57 +0200 + nvidia-open-gpu-kernel-modules (535.183.01-1~deb12u1) bookworm; urgency=medium * Rebuild for bookworm. diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/debian/control nvidia-open-gpu-kernel-modules-535.216.01/debian/control --- nvidia-open-gpu-kernel-modules-535.183.01/debian/control 2024-06-19 16:17:12.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/debian/control 2024-11-20 09:22:33.000000000 +0000 @@ -6,14 +6,13 @@ Andreas Beckmann , Build-Depends: debhelper-compat (= 13), + quilt, Build-Depends-Arch: dh-sequence-dkms, - quilt, linux-headers-amd64 [amd64] , linux-headers-arm64 [arm64] , - linux-headers-powerpc64le [ppc64el] , Rules-Requires-Root: no -Standards-Version: 4.6.2 +Standards-Version: 4.7.0 Homepage: https://github.com/NVIDIA/open-gpu-kernel-modules Vcs-Browser: https://salsa.debian.org/nvidia-team/nvidia-open-gpu-kernel-modules Vcs-Git: https://salsa.debian.org/nvidia-team/nvidia-open-gpu-kernel-modules.git diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/debian/patches/module/0002-conftest.sh-remove-empty-lines-from-uts_release-outp.patch nvidia-open-gpu-kernel-modules-535.216.01/debian/patches/module/0002-conftest.sh-remove-empty-lines-from-uts_release-outp.patch --- nvidia-open-gpu-kernel-modules-535.183.01/debian/patches/module/0002-conftest.sh-remove-empty-lines-from-uts_release-outp.patch 2024-06-19 16:17:12.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/debian/patches/module/0002-conftest.sh-remove-empty-lines-from-uts_release-outp.patch 2024-11-20 09:22:33.000000000 +0000 @@ -1,4 +1,4 @@ -From 172858a78f62259bcb6d4436c2051c3645fb2f7e Mon Sep 17 00:00:00 2001 +From 3a03c9a15522c69286f9a94d5395430af8d3f628 Mon Sep 17 00:00:00 2001 From: Andreas Beckmann Date: Mon, 31 Oct 2022 14:40:42 +0100 Subject: [PATCH] conftest.sh: remove empty lines from uts_release output @@ -8,10 +8,10 @@ 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conftest.sh b/conftest.sh -index fcfa9d5..376b227 100755 +index 7f0478ea..33ec54b3 100755 --- a/conftest.sh +++ b/conftest.sh -@@ -5912,7 +5912,7 @@ compile_test() { +@@ -5930,7 +5930,7 @@ compile_test() { echo "#include UTS_RELEASE" > conftest$$.c @@ -21,5 +21,5 @@ ;; -- -2.20.1 +2.39.5 diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/debian/patches/module/0034-fix-typos.patch nvidia-open-gpu-kernel-modules-535.216.01/debian/patches/module/0034-fix-typos.patch --- nvidia-open-gpu-kernel-modules-535.183.01/debian/patches/module/0034-fix-typos.patch 2024-06-19 16:17:12.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/debian/patches/module/0034-fix-typos.patch 2024-11-20 09:22:33.000000000 +0000 @@ -1,4 +1,4 @@ -From 7c6f49a47ddff074f6926997f2d37f57396fd2fb Mon Sep 17 00:00:00 2001 +From f4fdb2cdb13d4b50f19565ba91682af77cf846a9 Mon Sep 17 00:00:00 2001 From: Andreas Beckmann Date: Wed, 31 Jan 2024 03:06:19 +0100 Subject: [PATCH] fix typos @@ -9,7 +9,7 @@ 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/nvidia-uvm/uvm_perf_thrashing.c b/nvidia-uvm/uvm_perf_thrashing.c -index ee5eef4..3032615 100644 +index ee5eef43..30326157 100644 --- a/nvidia-uvm/uvm_perf_thrashing.c +++ b/nvidia-uvm/uvm_perf_thrashing.c @@ -318,7 +318,7 @@ module_param(uvm_perf_thrashing_max_resets, uint, S_IRUGO); @@ -31,10 +31,10 @@ (g_uvm_perf_thrashing_lapse_usec == UVM_PERF_THRASHING_LAPSE_USEC_DEFAULT)) { va_space_thrashing->params.lapse_ns = UVM_PERF_THRASHING_LAPSE_USEC_DEFAULT_EMULATION * 1000; diff --git a/nvidia/nv.c b/nvidia/nv.c -index 76d1ef3..4fc7966 100644 +index f49e8dd7..499a55e3 100644 --- a/nvidia/nv.c +++ b/nvidia/nv.c -@@ -4489,7 +4489,7 @@ NvU64 NV_API_CALL nv_get_dma_start_address( +@@ -4483,7 +4483,7 @@ NvU64 NV_API_CALL nv_get_dma_start_address( * Otherwise, the DMA start address only needs to be set once, and it * won't change afterward. Just return the cached value if asked again, * to avoid the kernel printing redundant messages to the kernel @@ -44,5 +44,5 @@ if ((nv_tce_bypass_mode == NV_TCE_BYPASS_MODE_DISABLE) || (nvl->tce_bypass_enabled)) -- -2.20.1 +2.39.5 diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/debian/patches/module/0037-import-pfn_valid-w-o-GPL-rcu_read_lock-unlock-from-v.patch nvidia-open-gpu-kernel-modules-535.216.01/debian/patches/module/0037-import-pfn_valid-w-o-GPL-rcu_read_lock-unlock-from-v.patch --- nvidia-open-gpu-kernel-modules-535.183.01/debian/patches/module/0037-import-pfn_valid-w-o-GPL-rcu_read_lock-unlock-from-v.patch 1970-01-01 00:00:00.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/debian/patches/module/0037-import-pfn_valid-w-o-GPL-rcu_read_lock-unlock-from-v.patch 2024-11-20 09:22:33.000000000 +0000 @@ -0,0 +1,91 @@ +From b28cea9c8f2fcb649e2930cc02ac8cfc5e8c7806 Mon Sep 17 00:00:00 2001 +From: Andreas Beckmann +Date: Thu, 1 Feb 2024 14:58:45 -0500 +Subject: [PATCH] import pfn_valid() w/o GPL rcu_read_lock/unlock from v6.8-rc3 + +linux-6.1.76, 6.6.15, and 6.7.3 have modified the non-ARCH-specific +pfn_valid() to use __rcu_read_lock/unlock[1] that is marked GPL and +cannot be used here[2][3][4] unless using the open source variant. + +pfn_valid() has been changed to use GPL rcu_read_lock/unlock by +"mm/sparsemem: fix race in accessing memory_section->usage" in Linux + v6.8-rc1 (5ec8e8ea8b7783fab150cf86404fc38cb4db8800) +which has been backported to Linux + v6.7.3 (3a01daace71b521563c38bbbf874e14c3e58adb7) + v6.6.15 (70064241f2229f7ba7b9599a98f68d9142e81a97) + v6.1.76 (68ed9e33324021e9d6b798e9db00ca3093d2012a) + v5.10.210 (90ad17575d26874287271127d43ef3c2af876cea) + +pfn_valid() has been further changed to use non-GPL +rcu_read_lock_sched()/rcu_read_unlock_sched() by +"mm, kmsan: fix infinite recursion due to RCU critical section" in Linux + v6.8-rc3 (f6564fce256a3944aa1bc76cb3c40e792d97c1eb) +which has been backported to Linux + v6.7.4 (5a33420599fa0288792537e6872fd19cc8607ea6) + v6.6.16 (6335c0cdb2ea0ea02c999e04d34fd84f69fb27ff) + v6.1.77 (dc904345e3771aa01d0b8358b550802fdc6fe00b) +but not (yet) to Linux v5.10.x + +As a workaround, use the v6.8-rc3 implementation for all kernels +having only the first patch until NVIDIA makes a fixed release +(that no longer will be using pfn_valid[5]). + +[1] https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/include/linux/mmzone.h?h=v6.7.3&id=3a01daace71b521563c38bbbf874e14c3e58adb7 +[2] https://bugs.gentoo.org/923456 +[3] https://forums.developer.nvidia.com/t/280908 +[4] https://github.com/NVIDIA/open-gpu-kernel-modules/issues/594 +[5] https://github.com/NVIDIA/open-gpu-kernel-modules/issues/594#issuecomment-1916197641 + +Bug-Debian: https://bugs.debian.org/1062932 +Origin: gentoo, https://github.com/gentoo/gentoo/blob/c64caf53/x11-drivers/nvidia-drivers/files/nvidia-drivers-470.223.02-gpl-pfn_valid.patch +--- + common/inc/nv-linux.h | 33 +++++++++++++++++++++++++++++++++ + 1 file changed, 33 insertions(+) + +diff --git a/common/inc/nv-linux.h b/common/inc/nv-linux.h +index 94106b3e..18901239 100644 +--- a/common/inc/nv-linux.h ++++ b/common/inc/nv-linux.h +@@ -1947,6 +1947,39 @@ static inline NvU32 nv_default_irq_flags(nv_state_t *nv) + #define NV_GET_UNUSED_FD_FLAGS(flags) (-1) + #endif + ++#if (!defined(CONFIG_HAVE_ARCH_PFN_VALID)) && \ ++ ((LINUX_VERSION_CODE == KERNEL_VERSION(6,7,3)) || \ ++ (LINUX_VERSION_CODE == KERNEL_VERSION(6,6,15)) || \ ++ (LINUX_VERSION_CODE == KERNEL_VERSION(6,1,76)) || \ ++ ((LINUX_VERSION_CODE >= KERNEL_VERSION(5,10,210)) && (LINUX_VERSION_CODE < KERNEL_VERSION(5,11,0)))) ++ ++/* Linux v6.8-rc3 pfn_valid version without GPL rcu_read_lock/unlock() */ ++static inline int nv_pfn_valid(unsigned long pfn) ++{ ++ struct mem_section *ms; ++ int ret; ++ ++ if (PHYS_PFN(PFN_PHYS(pfn)) != pfn) ++ return 0; ++ ++ if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS) ++ return 0; ++ ms = __pfn_to_section(pfn); ++ rcu_read_lock_sched(); ++ if (!valid_section(ms)) { ++ rcu_read_unlock_sched(); ++ return 0; ++ } ++ ret = early_section(ms) || pfn_section_valid(ms, pfn); ++ rcu_read_unlock_sched(); ++ ++ return ret; ++} ++ ++#else ++# define nv_pfn_valid pfn_valid ++#endif ++ + #define MODULE_BASE_NAME "nvidia" + #define MODULE_INSTANCE_NUMBER 0 + #define MODULE_INSTANCE_STRING "" +-- +2.39.5 + diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/debian/patches/module/0042-Log-an-error-message-when-nv_mem_client_init-fails-d.patch nvidia-open-gpu-kernel-modules-535.216.01/debian/patches/module/0042-Log-an-error-message-when-nv_mem_client_init-fails-d.patch --- nvidia-open-gpu-kernel-modules-535.183.01/debian/patches/module/0042-Log-an-error-message-when-nv_mem_client_init-fails-d.patch 1970-01-01 00:00:00.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/debian/patches/module/0042-Log-an-error-message-when-nv_mem_client_init-fails-d.patch 2024-11-20 09:22:33.000000000 +0000 @@ -0,0 +1,29 @@ +From a49b85be06506c0f915c71da427b361dc9a11479 Mon Sep 17 00:00:00 2001 +From: "Patrick J. LoPresti" +Date: Wed, 24 Apr 2024 16:28:11 -0700 +Subject: [PATCH] Log an error message when nv_mem_client_init() fails due to + missing IB peer memory symbols. + +Fixes https://forums.developer.nvidia.com/t/290774 + +Bug: https://forums.developer.nvidia.com/t/290774 +Origin: other, https://github.com/NVIDIA/open-gpu-kernel-modules/pull/630 +--- + nvidia-peermem/nvidia-peermem.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/nvidia-peermem/nvidia-peermem.c b/nvidia-peermem/nvidia-peermem.c +index 66c4de8..75a0f39 100644 +--- a/nvidia-peermem/nvidia-peermem.c ++++ b/nvidia-peermem/nvidia-peermem.c +@@ -548,6 +548,7 @@ out: + + return status; + #else ++ peer_err("module compiled without IB peer memory symbols present\n"); + return -EINVAL; + #endif + } +-- +2.20.1 + diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/debian/patches/module/0045-let-the-virt_addr_valid-macro-use-nv_pfn_valid-on-pp.patch nvidia-open-gpu-kernel-modules-535.216.01/debian/patches/module/0045-let-the-virt_addr_valid-macro-use-nv_pfn_valid-on-pp.patch --- nvidia-open-gpu-kernel-modules-535.183.01/debian/patches/module/0045-let-the-virt_addr_valid-macro-use-nv_pfn_valid-on-pp.patch 1970-01-01 00:00:00.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/debian/patches/module/0045-let-the-virt_addr_valid-macro-use-nv_pfn_valid-on-pp.patch 2024-11-20 09:22:33.000000000 +0000 @@ -0,0 +1,37 @@ +From 9e266458d8dc9e07123cbc46a887a5f3a9744cf2 Mon Sep 17 00:00:00 2001 +From: Andreas Beckmann +Date: Sun, 13 Oct 2024 08:52:51 +0200 +Subject: [PATCH] let the virt_addr_valid() macro use nv_pfn_valid() on ppc64el + +--- + common/inc/nv-linux.h | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/common/inc/nv-linux.h b/common/inc/nv-linux.h +index 18901239..433e5587 100644 +--- a/common/inc/nv-linux.h ++++ b/common/inc/nv-linux.h +@@ -1947,6 +1947,7 @@ static inline NvU32 nv_default_irq_flags(nv_state_t *nv) + #define NV_GET_UNUSED_FD_FLAGS(flags) (-1) + #endif + ++#if defined(NVCPU_PPC64LE) + #if (!defined(CONFIG_HAVE_ARCH_PFN_VALID)) && \ + ((LINUX_VERSION_CODE == KERNEL_VERSION(6,7,3)) || \ + (LINUX_VERSION_CODE == KERNEL_VERSION(6,6,15)) || \ +@@ -1976,8 +1977,10 @@ static inline int nv_pfn_valid(unsigned long pfn) + return ret; + } + +-#else +-# define nv_pfn_valid pfn_valid ++// let the virt_addr_valid() macro use nv_pfn_valid() ++#define pfn_valid nv_pfn_valid ++ ++#endif + #endif + + #define MODULE_BASE_NAME "nvidia" +-- +2.39.5 + diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/debian/patches/module/0046-backport-nv_get_kern_phys_address-changes-from-555.4.patch nvidia-open-gpu-kernel-modules-535.216.01/debian/patches/module/0046-backport-nv_get_kern_phys_address-changes-from-555.4.patch --- nvidia-open-gpu-kernel-modules-535.183.01/debian/patches/module/0046-backport-nv_get_kern_phys_address-changes-from-555.4.patch 1970-01-01 00:00:00.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/debian/patches/module/0046-backport-nv_get_kern_phys_address-changes-from-555.4.patch 2024-11-20 09:22:33.000000000 +0000 @@ -0,0 +1,25 @@ +From f93c98b0ce4185c640ac60829b06a609913bba3d Mon Sep 17 00:00:00 2001 +From: Andreas Beckmann +Date: Fri, 18 Oct 2024 00:04:11 +0200 +Subject: [PATCH] backport nv_get_kern_phys_address() changes from 555.42.02 + +--- + nvidia/nv-vtophys.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/nvidia/nv-vtophys.c b/nvidia/nv-vtophys.c +index df2a01e..fcae701 100644 +--- a/nvidia/nv-vtophys.c ++++ b/nvidia/nv-vtophys.c +@@ -29,7 +29,7 @@ + NvU64 NV_API_CALL nv_get_kern_phys_address(NvU64 address) + { + /* direct-mapped kernel address */ +- if (virt_addr_valid(address)) ++ if (virt_addr_valid((void *)address)) + return __pa(address); + + nv_printf(NV_DBG_ERRORS, +-- +2.39.5 + diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/debian/patches/module/bashisms.patch nvidia-open-gpu-kernel-modules-535.216.01/debian/patches/module/bashisms.patch --- nvidia-open-gpu-kernel-modules-535.183.01/debian/patches/module/bashisms.patch 2024-06-19 16:17:12.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/debian/patches/module/bashisms.patch 2024-11-20 09:22:33.000000000 +0000 @@ -3,7 +3,7 @@ --- a/conftest.sh +++ b/conftest.sh -@@ -6752,7 +6752,7 @@ case "$5" in +@@ -6793,7 +6793,7 @@ case "$5" in if [ -n "$VGX_BUILD" ]; then if [ -f /proc/xen/capabilities ]; then diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/debian/patches/module/cc_version_check-gcc5.patch nvidia-open-gpu-kernel-modules-535.216.01/debian/patches/module/cc_version_check-gcc5.patch --- nvidia-open-gpu-kernel-modules-535.183.01/debian/patches/module/cc_version_check-gcc5.patch 2024-06-19 16:17:12.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/debian/patches/module/cc_version_check-gcc5.patch 2024-11-20 09:22:33.000000000 +0000 @@ -5,7 +5,7 @@ --- a/conftest.sh +++ b/conftest.sh -@@ -6586,7 +6586,7 @@ case "$5" in +@@ -6627,7 +6627,7 @@ case "$5" in kernel_cc_minor=`echo ${kernel_cc_version} | cut -d '.' -f 2` echo " diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/debian/patches/module/conftest-prefer-arch-headers.patch nvidia-open-gpu-kernel-modules-535.216.01/debian/patches/module/conftest-prefer-arch-headers.patch --- nvidia-open-gpu-kernel-modules-535.183.01/debian/patches/module/conftest-prefer-arch-headers.patch 2024-06-19 16:17:12.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/debian/patches/module/conftest-prefer-arch-headers.patch 2024-11-20 09:22:33.000000000 +0000 @@ -1,7 +1,7 @@ Author: Paul Szabo Author: Andreas Beckmann Description: prefer arch headers over common headers -Bug-Debian: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=933309 +Bug-Debian: https://bugs.debian.org/933309 --- a/conftest.sh +++ b/conftest.sh diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/debian/patches/module/conftest-verbose.patch nvidia-open-gpu-kernel-modules-535.216.01/debian/patches/module/conftest-verbose.patch --- nvidia-open-gpu-kernel-modules-535.183.01/debian/patches/module/conftest-verbose.patch 2024-06-19 16:17:12.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/debian/patches/module/conftest-verbose.patch 2024-11-20 09:22:33.000000000 +0000 @@ -3,12 +3,13 @@ --- a/Kbuild +++ b/Kbuild -@@ -166,6 +166,16 @@ NV_CONFTEST_HEADERS += $(obj)/conftest/h +@@ -166,6 +166,17 @@ NV_CONFTEST_HEADERS += $(obj)/conftest/h NV_CONFTEST_HEADERS += $(NV_CONFTEST_COMPILE_TEST_HEADERS) +.PHONY: conftest-verbose +conftest-verbose: ++ uname -a + @echo 'NV_CONFTEST_CMD=$(NV_CONFTEST_CMD)' + @echo 'NV_CONFTEST_CFLAGS=$(NV_CONFTEST_CFLAGS)' + @echo 'KBUILD_CFLAGS=$(KBUILD_CFLAGS)' diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/debian/patches/module/fragile-ARCH.patch nvidia-open-gpu-kernel-modules-535.216.01/debian/patches/module/fragile-ARCH.patch --- nvidia-open-gpu-kernel-modules-535.183.01/debian/patches/module/fragile-ARCH.patch 2024-06-19 16:17:12.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/debian/patches/module/fragile-ARCH.patch 2024-11-20 09:22:33.000000000 +0000 @@ -5,7 +5,7 @@ --- a/Makefile +++ b/Makefile -@@ -52,8 +52,8 @@ else +@@ -56,8 +56,8 @@ else LD ?= ld OBJDUMP ?= objdump @@ -16,7 +16,7 @@ -e 's/armv[0-7]\w\+/arm/' \ -e 's/aarch64/arm64/' \ -e 's/ppc64le/powerpc/' \ -@@ -70,7 +70,7 @@ else +@@ -74,7 +74,7 @@ else KBUILD_PARAMS += V=1 endif KBUILD_PARAMS += -C $(KERNEL_SOURCES) M=$(CURDIR) @@ -25,7 +25,7 @@ KBUILD_PARAMS += NV_KERNEL_SOURCES=$(KERNEL_SOURCES) KBUILD_PARAMS += NV_KERNEL_OUTPUT=$(KERNEL_OUTPUT) KBUILD_PARAMS += NV_KERNEL_MODULES="$(NV_KERNEL_MODULES)" -@@ -100,7 +100,7 @@ else +@@ -104,7 +104,7 @@ else # and hence must be used whenever present. LD_SCRIPT ?= $(KERNEL_SOURCES)/scripts/module-common.lds \ diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/debian/patches/module/series nvidia-open-gpu-kernel-modules-535.216.01/debian/patches/module/series --- nvidia-open-gpu-kernel-modules-535.183.01/debian/patches/module/series 1970-01-01 00:00:00.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/debian/patches/module/series 2024-11-20 09:22:33.000000000 +0000 @@ -0,0 +1,18 @@ +cc_version_check-gcc5.patch +bashisms.patch + +# kernel support +0001-bump-minimum-supported-kernel-version-to-3.10.patch +0002-conftest.sh-remove-empty-lines-from-uts_release-outp.patch +0034-fix-typos.patch +0037-import-pfn_valid-w-o-GPL-rcu_read_lock-unlock-from-v.patch +0042-Log-an-error-message-when-nv_mem_client_init-fails-d.patch +0045-let-the-virt_addr_valid-macro-use-nv_pfn_valid-on-pp.patch +0046-backport-nv_get_kern_phys_address-changes-from-555.4.patch + +# build system updates +fragile-ARCH.patch +conftest-verbose.patch +use-kbuild-compiler.patch +use-kbuild-flags.patch +conftest-prefer-arch-headers.patch diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/debian/patches/module/series.in nvidia-open-gpu-kernel-modules-535.216.01/debian/patches/module/series.in --- nvidia-open-gpu-kernel-modules-535.183.01/debian/patches/module/series.in 2024-06-19 16:17:12.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/debian/patches/module/series.in 1970-01-01 00:00:00.000000000 +0000 @@ -1,14 +0,0 @@ -cc_version_check-gcc5.patch -bashisms.patch - -# kernel support -0001-bump-minimum-supported-kernel-version-to-3.10.patch -0002-conftest.sh-remove-empty-lines-from-uts_release-outp.patch -0034-fix-typos.patch - -# build system updates -fragile-ARCH.patch -conftest-verbose.patch -use-kbuild-compiler.patch -use-kbuild-flags.patch -conftest-prefer-arch-headers.patch diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/debian/patches/module/use-kbuild-compiler.patch nvidia-open-gpu-kernel-modules-535.216.01/debian/patches/module/use-kbuild-compiler.patch --- nvidia-open-gpu-kernel-modules-535.183.01/debian/patches/module/use-kbuild-compiler.patch 2024-06-19 16:17:12.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/debian/patches/module/use-kbuild-compiler.patch 2024-11-20 09:22:33.000000000 +0000 @@ -7,7 +7,7 @@ --- a/Makefile +++ b/Makefile -@@ -79,8 +79,8 @@ else +@@ -83,8 +83,8 @@ else .PHONY: modules module clean clean_conftest modules_install modules clean modules_install: diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/debian/patches/module/use-kbuild-flags.patch nvidia-open-gpu-kernel-modules-535.216.01/debian/patches/module/use-kbuild-flags.patch --- nvidia-open-gpu-kernel-modules-535.183.01/debian/patches/module/use-kbuild-flags.patch 2024-06-19 16:17:12.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/debian/patches/module/use-kbuild-flags.patch 2024-11-20 09:22:33.000000000 +0000 @@ -43,7 +43,7 @@ # --- a/Makefile +++ b/Makefile -@@ -114,7 +114,7 @@ else +@@ -118,7 +118,7 @@ else # cannot be defined in the *Kbuild files, which are only used during stage 1. %-linux.o: modules diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/debian/rules.defs nvidia-open-gpu-kernel-modules-535.216.01/debian/rules.defs --- nvidia-open-gpu-kernel-modules-535.183.01/debian/rules.defs 2024-06-19 16:17:12.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/debian/rules.defs 2024-11-20 09:22:33.000000000 +0000 @@ -1,7 +1,6 @@ LINUX_KMOD_VARIANT = -open -LINUX_KMOD_TESTED = 6.7 +LINUX_KMOD_TESTED = 6.11 ARCH_LIST = amd64 ARCH_LIST += arm64 -#ARCH_LIST += ppc64el diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/debian/sync.sh nvidia-open-gpu-kernel-modules-535.216.01/debian/sync.sh --- nvidia-open-gpu-kernel-modules-535.183.01/debian/sync.sh 2024-06-19 16:17:12.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/debian/sync.sh 2024-11-20 09:22:33.000000000 +0000 @@ -37,7 +37,7 @@ fi done -for psrc in "$ngd/debian/patches/module"/*.patch +for psrc in "$ngd/debian/patches/module"/*.patch "$ngd/debian/patches/module"/series* do pdst=${psrc#$ngd/} if ! cmp -s "$psrc" "$pdst" ; then diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/kernel-open/Kbuild nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/Kbuild --- nvidia-open-gpu-kernel-modules-535.183.01/kernel-open/Kbuild 2024-05-12 19:45:20.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/Kbuild 2024-09-17 17:05:59.000000000 +0000 @@ -72,7 +72,7 @@ EXTRA_CFLAGS += -I$(src) EXTRA_CFLAGS += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-error -Wno-format-extra-args EXTRA_CFLAGS += -D__KERNEL__ -DMODULE -DNVRM -EXTRA_CFLAGS += -DNV_VERSION_STRING=\"535.183.01\" +EXTRA_CFLAGS += -DNV_VERSION_STRING=\"535.216.01\" ifneq ($(SYSSRCHOST1X),) EXTRA_CFLAGS += -I$(SYSSRCHOST1X) diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/kernel-open/Makefile nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/Makefile --- nvidia-open-gpu-kernel-modules-535.183.01/kernel-open/Makefile 2024-05-12 19:45:20.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/Makefile 2024-09-17 17:05:59.000000000 +0000 @@ -28,7 +28,7 @@ else KERNEL_UNAME ?= $(shell uname -r) KERNEL_MODLIB := /lib/modules/$(KERNEL_UNAME) - KERNEL_SOURCES := $(shell test -d $(KERNEL_MODLIB)/source && echo $(KERNEL_MODLIB)/source || echo $(KERNEL_MODLIB)/build) + KERNEL_SOURCES := $(shell ((test -d $(KERNEL_MODLIB)/source && echo $(KERNEL_MODLIB)/source) || (test -d $(KERNEL_MODLIB)/build/source && echo $(KERNEL_MODLIB)/build/source)) || echo $(KERNEL_MODLIB)/build) endif KERNEL_OUTPUT := $(KERNEL_SOURCES) @@ -42,7 +42,11 @@ else KERNEL_UNAME ?= $(shell uname -r) KERNEL_MODLIB := /lib/modules/$(KERNEL_UNAME) - ifeq ($(KERNEL_SOURCES), $(KERNEL_MODLIB)/source) + # $(filter patter...,text) - Returns all whitespace-separated words in text that + # do match any of the pattern words, removing any words that do not match. + # Set the KERNEL_OUTPUT only if either $(KERNEL_MODLIB)/source or + # $(KERNEL_MODLIB)/build/source path matches the KERNEL_SOURCES. + ifneq ($(filter $(KERNEL_SOURCES),$(KERNEL_MODLIB)/source $(KERNEL_MODLIB)/build/source),) KERNEL_OUTPUT := $(KERNEL_MODLIB)/build KBUILD_PARAMS := KBUILD_OUTPUT=$(KERNEL_OUTPUT) endif diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/kernel-open/common/inc/nv-hypervisor.h nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/common/inc/nv-hypervisor.h --- nvidia-open-gpu-kernel-modules-535.183.01/kernel-open/common/inc/nv-hypervisor.h 2024-05-12 19:44:54.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/common/inc/nv-hypervisor.h 2024-09-17 17:05:31.000000000 +0000 @@ -37,13 +37,11 @@ OS_HYPERVISOR_UNKNOWN } HYPERVISOR_TYPE; -#define CMD_VGPU_VFIO_WAKE_WAIT_QUEUE 0 -#define CMD_VGPU_VFIO_INJECT_INTERRUPT 1 -#define CMD_VGPU_VFIO_REGISTER_MDEV 2 -#define CMD_VGPU_VFIO_PRESENT 3 -#define CMD_VFIO_PCI_CORE_PRESENT 4 +#define CMD_VFIO_WAKE_REMOVE_GPU 1 +#define CMD_VGPU_VFIO_PRESENT 2 +#define CMD_VFIO_PCI_CORE_PRESENT 3 -#define MAX_VF_COUNT_PER_GPU 64 +#define MAX_VF_COUNT_PER_GPU 64 typedef enum _VGPU_TYPE_INFO { @@ -54,17 +52,11 @@ typedef struct { - void *vgpuVfioRef; - void *waitQueue; void *nv; - NvU32 *vgpuTypeIds; - NvU8 **vgpuNames; - NvU32 numVgpuTypes; - NvU32 domain; - NvU8 bus; - NvU8 slot; - NvU8 function; - NvBool is_virtfn; + NvU32 domain; + NvU32 bus; + NvU32 device; + NvU32 return_status; } vgpu_vfio_info; typedef struct diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/kernel-open/common/inc/nv-linux.h nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/common/inc/nv-linux.h --- nvidia-open-gpu-kernel-modules-535.183.01/kernel-open/common/inc/nv-linux.h 2024-05-12 19:44:52.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/common/inc/nv-linux.h 2024-09-17 17:05:29.000000000 +0000 @@ -499,7 +499,9 @@ void *ptr = __vmalloc(size, GFP_KERNEL); #endif if (ptr) + { NV_MEMDBG_ADD(ptr, size); + } return ptr; } @@ -517,7 +519,9 @@ void *ptr = ioremap(phys, size); #endif if (ptr) + { NV_MEMDBG_ADD(ptr, size); + } return ptr; } @@ -553,8 +557,9 @@ #endif if (ptr) + { NV_MEMDBG_ADD(ptr, size); - + } return ptr; } @@ -570,8 +575,9 @@ #endif if (ptr) + { NV_MEMDBG_ADD(ptr, size); - + } return ptr; } @@ -700,7 +706,9 @@ /* All memory cached in PPC64LE; can't honor 'cached' input. */ ptr = vmap(pages, page_count, VM_MAP, prot); if (ptr) + { NV_MEMDBG_ADD(ptr, page_count * PAGE_SIZE); + } return (NvUPtr)ptr; } @@ -1603,6 +1611,10 @@ struct nv_dma_device dma_dev; struct nv_dma_device niso_dma_dev; +#if defined(NV_VGPU_KVM_BUILD) + wait_queue_head_t wait; + NvS32 return_status; +#endif } nv_linux_state_t; extern nv_linux_state_t *nv_linux_devices; diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/kernel-open/common/inc/nv.h nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/common/inc/nv.h --- nvidia-open-gpu-kernel-modules-535.183.01/kernel-open/common/inc/nv.h 2024-05-12 19:44:52.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/common/inc/nv.h 2024-09-17 17:05:28.000000000 +0000 @@ -1034,12 +1034,11 @@ NV_STATUS NV_API_CALL nv_vgpu_delete(nvidia_stack_t *, const NvU8 *, NvU16); NV_STATUS NV_API_CALL nv_vgpu_get_type_ids(nvidia_stack_t *, nv_state_t *, NvU32 *, NvU32 *, NvBool, NvU8, NvBool); NV_STATUS NV_API_CALL nv_vgpu_get_type_info(nvidia_stack_t *, nv_state_t *, NvU32, char *, int, NvU8); -NV_STATUS NV_API_CALL nv_vgpu_get_bar_info(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU64 *, NvU32, void *); -NV_STATUS NV_API_CALL nv_vgpu_start(nvidia_stack_t *, const NvU8 *, void *, NvS32 *, NvU8 *, NvU32); -NV_STATUS NV_API_CALL nv_vgpu_get_sparse_mmap(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU64 **, NvU64 **, NvU32 *); +NV_STATUS NV_API_CALL nv_vgpu_get_bar_info(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU64 *, + NvU64 *, NvU64 *, NvU32 *, NvU8 *); NV_STATUS NV_API_CALL nv_vgpu_process_vf_info(nvidia_stack_t *, nv_state_t *, NvU8, NvU32, NvU8, NvU8, NvU8, NvBool, void *); -NV_STATUS NV_API_CALL nv_vgpu_update_request(nvidia_stack_t *, const NvU8 *, NvU32, NvU64 *, NvU64 *, const char *); -NV_STATUS NV_API_CALL nv_gpu_bind_event(nvidia_stack_t *); +NV_STATUS NV_API_CALL nv_gpu_bind_event(nvidia_stack_t *, NvU32, NvBool *); +NV_STATUS NV_API_CALL nv_gpu_unbind_event(nvidia_stack_t *, NvU32, NvBool *); NV_STATUS NV_API_CALL nv_get_usermap_access_params(nv_state_t*, nv_usermap_access_params_t*); nv_soc_irq_type_t NV_API_CALL nv_get_current_irq_type(nv_state_t*); diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/kernel-open/conftest.sh nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/conftest.sh --- nvidia-open-gpu-kernel-modules-535.183.01/kernel-open/conftest.sh 2024-05-12 19:11:06.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/conftest.sh 2024-09-17 16:26:52.000000000 +0000 @@ -1208,6 +1208,23 @@ compile_check_conftest "$CODE" "NV_VFIO_DEVICE_OPS_HAS_BIND_IOMMUFD" "" "types" ;; + vfio_device_ops_has_detach_ioas) + # + # Determine if 'vfio_device_ops' struct has 'detach_ioas' field. + # + # Added by commit 9048c7341c4df9cae04c154a8b0f556dbe913358 ("vfio-iommufd: Add detach_ioas + # support for physical VFIO devices + # + CODE=" + #include + #include + int conftest_vfio_device_ops_has_detach_ioas(void) { + return offsetof(struct vfio_device_ops, detach_ioas); + }" + + compile_check_conftest "$CODE" "NV_VFIO_DEVICE_OPS_HAS_DETACH_IOAS" "" "types" + ;; + pci_irq_vector_helpers) # # Determine if pci_alloc_irq_vectors(), pci_free_irq_vectors() @@ -5181,22 +5198,23 @@ compile_check_conftest "$CODE" "NV_PCI_CLASS_MULTIMEDIA_HD_AUDIO_PRESENT" "" "generic" ;; - unsafe_follow_pfn) + follow_pfn) # - # Determine if unsafe_follow_pfn() is present. + # Determine if follow_pfn() is present. # - # unsafe_follow_pfn() was added by commit 69bacee7f9ad - # ("mm: Add unsafe_follow_pfn") in v5.13-rc1. + # follow_pfn() was added by commit 3b6748e2dd69 + # ("mm: introduce follow_pfn()") in v2.6.31-rc1, and removed + # by commit 233eb0bf3b94 ("mm: remove follow_pfn") + # from linux-next 233eb0bf3b94. # CODE=" #include - void conftest_unsafe_follow_pfn(void) { - unsafe_follow_pfn(); + void conftest_follow_pfn(void) { + follow_pfn(); }" - compile_check_conftest "$CODE" "NV_UNSAFE_FOLLOW_PFN_PRESENT" "" "functions" + compile_check_conftest "$CODE" "NV_FOLLOW_PFN_PRESENT" "" "functions" ;; - drm_plane_atomic_check_has_atomic_state_arg) # # Determine if drm_plane_helper_funcs::atomic_check takes 'state' @@ -6344,6 +6362,29 @@ compile_check_conftest "$CODE" "NV_MEMORY_FAILURE_MF_SW_SIMULATED_DEFINED" "" "types" ;; + drm_output_poll_changed) + # + # Determine whether drm_mode_config_funcs.output_poll_changed + # callback is present + # + # Removed by commit 446d0f4849b1 ("drm: Remove struct + # drm_mode_config_funcs.output_poll_changed") in v6.12. Hotplug + # event support is handled through the fbdev emulation interface + # going forward. + # + CODE=" + #if defined(NV_DRM_DRM_MODE_CONFIG_H_PRESENT) + #include + #else + #include + #endif + int conftest_drm_output_poll_changed_available(void) { + return offsetof(struct drm_mode_config_funcs, output_poll_changed); + }" + + compile_check_conftest "$CODE" "NV_DRM_OUTPUT_POLL_CHANGED_PRESENT" "" "types" + ;; + crypto_tfm_ctx_aligned) # Determine if 'crypto_tfm_ctx_aligned' is defined. # @@ -6772,10 +6813,12 @@ # VERBOSE=$6 iommu=CONFIG_VFIO_IOMMU_TYPE1 + iommufd_vfio_container=CONFIG_IOMMUFD_VFIO_CONTAINER mdev=CONFIG_VFIO_MDEV kvm=CONFIG_KVM_VFIO vfio_pci_core=CONFIG_VFIO_PCI_CORE VFIO_IOMMU_PRESENT=0 + VFIO_IOMMUFD_VFIO_CONTAINER_PRESENT=0 VFIO_MDEV_PRESENT=0 KVM_PRESENT=0 VFIO_PCI_CORE_PRESENT=0 @@ -6785,6 +6828,10 @@ VFIO_IOMMU_PRESENT=1 fi + if (test_configuration_option ${iommufd_vfio_container} || test_configuration_option ${iommufd_vfio_container}_MODULE); then + VFIO_IOMMUFD_VFIO_CONTAINER_PRESENT=1 + fi + if (test_configuration_option ${mdev} || test_configuration_option ${mdev}_MODULE); then VFIO_MDEV_PRESENT=1 fi @@ -6797,7 +6844,7 @@ VFIO_PCI_CORE_PRESENT=1 fi - if [ "$VFIO_IOMMU_PRESENT" != "0" ] && [ "$KVM_PRESENT" != "0" ] ; then + if ([ "$VFIO_IOMMU_PRESENT" != "0" ] || [ "$VFIO_IOMMUFD_VFIO_CONTAINER_PRESENT" != "0" ])&& [ "$KVM_PRESENT" != "0" ] ; then # vGPU requires either MDEV or vfio-pci-core framework to be present. if [ "$VFIO_MDEV_PRESENT" != "0" ] || [ "$VFIO_PCI_CORE_PRESENT" != "0" ]; then exit 0 @@ -6806,8 +6853,8 @@ echo "Below CONFIG options are missing on the kernel for installing"; echo "NVIDIA vGPU driver on KVM host"; - if [ "$VFIO_IOMMU_PRESENT" = "0" ]; then - echo "CONFIG_VFIO_IOMMU_TYPE1"; + if [ "$VFIO_IOMMU_PRESENT" = "0" ] && [ "$VFIO_IOMMUFD_VFIO_CONTAINER_PRESENT" = "0" ]; then + echo "either CONFIG_VFIO_IOMMU_TYPE1 or CONFIG_IOMMUFD_VFIO_CONTAINER"; fi if [ "$VFIO_MDEV_PRESENT" = "0" ] && [ "$VFIO_PCI_CORE_PRESENT" = "0" ]; then diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/kernel-open/nvidia/linux_nvswitch.c nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/nvidia/linux_nvswitch.c --- nvidia-open-gpu-kernel-modules-535.183.01/kernel-open/nvidia/linux_nvswitch.c 2024-05-12 19:45:00.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/nvidia/linux_nvswitch.c 2024-09-17 17:05:36.000000000 +0000 @@ -31,6 +31,7 @@ #include "nvCpuUuid.h" #include "nv-time.h" #include "nvlink_caps.h" +#include "nvlink_proto.h" #include #include @@ -49,7 +50,7 @@ #include "ioctl_nvswitch.h" -const static struct +static const struct { NvlStatus status; int err; diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/kernel-open/nvidia/nv-caps.c nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/nvidia/nv-caps.c --- nvidia-open-gpu-kernel-modules-535.183.01/kernel-open/nvidia/nv-caps.c 2024-05-12 19:44:51.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/nvidia/nv-caps.c 2024-09-17 17:05:28.000000000 +0000 @@ -266,7 +266,7 @@ nv_cap_procfs_dir = NULL; } -int nv_cap_procfs_init(void) +static int nv_cap_procfs_init(void) { static struct proc_dir_entry *file_entry; diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/kernel-open/nvidia/nv-dma.c nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/nvidia/nv-dma.c --- nvidia-open-gpu-kernel-modules-535.183.01/kernel-open/nvidia/nv-dma.c 2024-05-12 19:44:50.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/nvidia/nv-dma.c 2024-09-17 17:05:26.000000000 +0000 @@ -290,7 +290,7 @@ os_free_mem(dma_map->mapping.discontig.submaps); } -void nv_load_dma_map_scatterlist( +static void nv_load_dma_map_scatterlist( nv_dma_map_t *dma_map, NvU64 *va_array ) @@ -486,7 +486,7 @@ return status; } -NV_STATUS NV_API_CALL nv_dma_unmap_sgt( +static NV_STATUS NV_API_CALL nv_dma_unmap_sgt( nv_dma_device_t *dma_dev, void **priv ) diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/kernel-open/nvidia/nv-ibmnpu.c nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/nvidia/nv-ibmnpu.c --- nvidia-open-gpu-kernel-modules-535.183.01/kernel-open/nvidia/nv-ibmnpu.c 2024-05-12 19:44:51.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/nvidia/nv-ibmnpu.c 2024-09-17 17:05:28.000000000 +0000 @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2017-2019 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2017-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining a @@ -25,9 +25,9 @@ * nv-ibmnpu.c - interface with the ibmnpu (IBM NVLink Processing Unit) "module" */ #include "nv-linux.h" +#include "nv-ibmnpu.h" #if defined(NVCPU_PPC64LE) -#include "nv-ibmnpu.h" #include "nv-rsync.h" /* diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/kernel-open/nvidia/nv-kthread-q.c nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/nvidia/nv-kthread-q.c --- nvidia-open-gpu-kernel-modules-535.183.01/kernel-open/nvidia/nv-kthread-q.c 2024-05-12 19:44:51.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/nvidia/nv-kthread-q.c 2024-09-17 17:05:28.000000000 +0000 @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2016 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2016-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining a @@ -176,7 +176,7 @@ { unsigned i, j; - const static unsigned attempts = 3; + static const unsigned attempts = 3; struct task_struct *thread[3]; for (i = 0;; i++) { diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/kernel-open/nvidia/nv-mmap.c nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/nvidia/nv-mmap.c --- nvidia-open-gpu-kernel-modules-535.183.01/kernel-open/nvidia/nv-mmap.c 2024-05-12 19:44:50.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/nvidia/nv-mmap.c 2024-09-17 17:05:26.000000000 +0000 @@ -368,7 +368,7 @@ return 0; } -int static nvidia_mmap_peer_io( +static int nvidia_mmap_peer_io( struct vm_area_struct *vma, nv_alloc_t *at, NvU64 page_index, @@ -389,7 +389,7 @@ return ret; } -int static nvidia_mmap_sysmem( +static int nvidia_mmap_sysmem( struct vm_area_struct *vma, nv_alloc_t *at, NvU64 page_index, diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/kernel-open/nvidia/nv-pci.c nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/nvidia/nv-pci.c --- nvidia-open-gpu-kernel-modules-535.183.01/kernel-open/nvidia/nv-pci.c 2024-05-12 19:44:50.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/nvidia/nv-pci.c 2024-09-17 17:05:26.000000000 +0000 @@ -370,15 +370,6 @@ if (pci_dev->is_virtfn) { #if defined(NV_VGPU_KVM_BUILD) - nvl = pci_get_drvdata(pci_dev->physfn); - if (!nvl) - { - nv_printf(NV_DBG_ERRORS, "NVRM: Aborting probe for VF %04x:%02x:%02x.%x " - "since PF is not bound to nvidia driver.\n", - NV_PCI_DOMAIN_NUMBER(pci_dev), NV_PCI_BUS_NUMBER(pci_dev), - NV_PCI_SLOT_NUMBER(pci_dev), PCI_FUNC(pci_dev->devfn)); - goto failed; - } #if defined(NV_BUS_TYPE_HAS_IOMMU_OPS) if (pci_dev->dev.bus->iommu_ops == NULL) @@ -387,20 +378,10 @@ (pci_dev->dev.iommu->iommu_dev->ops == NULL)) #endif { - nv = NV_STATE_PTR(nvl); - if (rm_is_iommu_needed_for_sriov(sp, nv)) - { - nv_printf(NV_DBG_ERRORS, "NVRM: Aborting probe for VF %04x:%02x:%02x.%x " - "since IOMMU is not present on the system.\n", - NV_PCI_DOMAIN_NUMBER(pci_dev), NV_PCI_BUS_NUMBER(pci_dev), - NV_PCI_SLOT_NUMBER(pci_dev), PCI_FUNC(pci_dev->devfn)); - goto failed; - } - } - - if (nvidia_vgpu_vfio_probe(pci_dev) != NV_OK) - { - nv_printf(NV_DBG_ERRORS, "NVRM: Failed to register device to vGPU VFIO module"); + nv_printf(NV_DBG_ERRORS, "NVRM: Aborting probe for VF %04x:%02x:%02x.%x " + "since IOMMU is not present on the system.\n", + NV_PCI_DOMAIN_NUMBER(pci_dev), NV_PCI_BUS_NUMBER(pci_dev), + NV_PCI_SLOT_NUMBER(pci_dev), PCI_FUNC(pci_dev->devfn)); goto failed; } diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/kernel-open/nvidia/nv-procfs.c nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/nvidia/nv-procfs.c --- nvidia-open-gpu-kernel-modules-535.183.01/kernel-open/nvidia/nv-procfs.c 2024-05-12 19:44:50.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/nvidia/nv-procfs.c 2024-09-17 17:05:27.000000000 +0000 @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1999-2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1999-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining a @@ -694,7 +694,7 @@ /* * Forwards error to nv_log_error which exposes data to vendor callback */ -void +static void exercise_error_forwarding_va( nv_state_t *nv, NvU32 err, diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/kernel-open/nvidia/nv.c nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/nvidia/nv.c --- nvidia-open-gpu-kernel-modules-535.183.01/kernel-open/nvidia/nv.c 2024-05-12 19:44:49.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/nvidia/nv.c 2024-09-17 17:05:26.000000000 +0000 @@ -1138,12 +1138,6 @@ return rc; } -NV_STATUS NV_API_CALL nv_get_num_dpaux_instances(nv_state_t *nv, NvU32 *num_instances) -{ - *num_instances = nv->num_dpaux_instance; - return NV_OK; -} - void NV_API_CALL nv_schedule_uvm_isr(nv_state_t *nv) { diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/kernel-open/nvidia/nvidia.Kbuild nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/nvidia/nvidia.Kbuild --- nvidia-open-gpu-kernel-modules-535.183.01/kernel-open/nvidia/nvidia.Kbuild 2024-05-12 19:44:49.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/nvidia/nvidia.Kbuild 2024-09-17 17:05:26.000000000 +0000 @@ -161,7 +161,7 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += vga_tryget NV_CONFTEST_FUNCTION_COMPILE_TESTS += cc_platform_has NV_CONFTEST_FUNCTION_COMPILE_TESTS += seq_read_iter -NV_CONFTEST_FUNCTION_COMPILE_TESTS += unsafe_follow_pfn +NV_CONFTEST_FUNCTION_COMPILE_TESTS += follow_pfn NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_gem_object_get NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_gem_object_put_unlocked NV_CONFTEST_FUNCTION_COMPILE_TESTS += add_memory_driver_managed @@ -228,6 +228,7 @@ NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_tsec_comms_free_gscco_mem NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_memory_block_size_bytes NV_CONFTEST_SYMBOL_COMPILE_TESTS += crypto +NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_follow_pte NV_CONFTEST_TYPE_COMPILE_TESTS += dma_ops NV_CONFTEST_TYPE_COMPILE_TESTS += swiotlb_dma_ops diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/kernel-open/nvidia/nvlink_linux.c nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/nvidia/nvlink_linux.c --- nvidia-open-gpu-kernel-modules-535.183.01/kernel-open/nvidia/nvlink_linux.c 2024-05-12 19:44:59.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/nvidia/nvlink_linux.c 2024-09-17 17:05:35.000000000 +0000 @@ -27,6 +27,7 @@ #include "nvlink_linux.h" #include "nvlink_errors.h" #include "nvlink_export.h" +#include "nvlink_proto.h" #include "nv-linux.h" #include "nv-procfs.h" #include "nv-time.h" diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/kernel-open/nvidia/os-interface.c nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/nvidia/os-interface.c --- nvidia-open-gpu-kernel-modules-535.183.01/kernel-open/nvidia/os-interface.c 2024-05-12 19:44:50.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/nvidia/os-interface.c 2024-09-17 17:05:27.000000000 +0000 @@ -373,7 +373,7 @@ return strcmp(str1, str2); } -void *os_mem_copy_custom( +static void *os_mem_copy_custom( void *dstPtr, const void *srcPtr, NvU32 length diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/kernel-open/nvidia/os-mlock.c nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/nvidia/os-mlock.c --- nvidia-open-gpu-kernel-modules-535.183.01/kernel-open/nvidia/os-mlock.c 2024-05-12 19:44:51.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/nvidia/os-mlock.c 2024-09-17 17:05:27.000000000 +0000 @@ -36,10 +36,28 @@ unsigned long address, unsigned long *pfn) { -#if defined(NV_UNSAFE_FOLLOW_PFN_PRESENT) - return unsafe_follow_pfn(vma, address, pfn); -#else +#if defined(NV_FOLLOW_PFN_PRESENT) return follow_pfn(vma, address, pfn); +#else +#if NV_IS_EXPORT_SYMBOL_PRESENT_follow_pte + int status = 0; + spinlock_t *ptl; + pte_t *ptep; + + if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) + return status; + + status = follow_pte(vma, address, &ptep, &ptl); + if (status) + return status; + *pfn = pte_pfn(ptep_get(ptep)); + + // The lock is acquired inside follow_pte() + pte_unmap_unlock(ptep, ptl); + return 0; +#else // NV_IS_EXPORT_SYMBOL_PRESENT_follow_pte + return -1; +#endif // NV_IS_EXPORT_SYMBOL_PRESENT_follow_pte #endif } diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/kernel-open/nvidia-drm/nvidia-drm-drv.c nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/nvidia-drm/nvidia-drm-drv.c --- nvidia-open-gpu-kernel-modules-535.183.01/kernel-open/nvidia-drm/nvidia-drm-drv.c 2024-05-12 19:36:50.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/nvidia-drm/nvidia-drm-drv.c 2024-09-17 16:53:17.000000000 +0000 @@ -105,6 +105,7 @@ #if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE) +#if defined(NV_DRM_OUTPUT_POLL_CHANGED_PRESENT) static void nv_drm_output_poll_changed(struct drm_device *dev) { struct drm_connector *connector = NULL; @@ -148,6 +149,7 @@ nv_drm_connector_list_iter_end(&conn_iter); #endif } +#endif /* NV_DRM_OUTPUT_POLL_CHANGED_PRESENT */ static struct drm_framebuffer *nv_drm_framebuffer_create( struct drm_device *dev, @@ -185,7 +187,9 @@ .atomic_check = nv_drm_atomic_check, .atomic_commit = nv_drm_atomic_commit, + #if defined(NV_DRM_OUTPUT_POLL_CHANGED_PRESENT) .output_poll_changed = nv_drm_output_poll_changed, + #endif }; static void nv_drm_event_callback(const struct NvKmsKapiEvent *event) diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/kernel-open/nvidia-drm/nvidia-drm.Kbuild nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/nvidia-drm/nvidia-drm.Kbuild --- nvidia-open-gpu-kernel-modules-535.183.01/kernel-open/nvidia-drm/nvidia-drm.Kbuild 2024-05-12 19:12:42.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/nvidia-drm/nvidia-drm.Kbuild 2024-09-17 16:28:47.000000000 +0000 @@ -134,3 +134,4 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += vm_area_struct_has_const_vm_flags NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_has_dumb_destroy NV_CONFTEST_TYPE_COMPILE_TESTS += drm_unlocked_ioctl_flag_present +NV_CONFTEST_TYPE_COMPILE_TESTS += drm_output_poll_changed diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/kernel-open/nvidia-modeset/nv-kthread-q.c nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/nvidia-modeset/nv-kthread-q.c --- nvidia-open-gpu-kernel-modules-535.183.01/kernel-open/nvidia-modeset/nv-kthread-q.c 2024-05-12 19:34:28.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/nvidia-modeset/nv-kthread-q.c 2024-09-17 16:50:50.000000000 +0000 @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2016 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2016-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining a @@ -176,7 +176,7 @@ { unsigned i, j; - const static unsigned attempts = 3; + static const unsigned attempts = 3; struct task_struct *thread[3]; for (i = 0;; i++) { diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/kernel-open/nvidia-modeset/nvidia-modeset-linux.c nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/nvidia-modeset/nvidia-modeset-linux.c --- nvidia-open-gpu-kernel-modules-535.183.01/kernel-open/nvidia-modeset/nvidia-modeset-linux.c 2024-05-12 19:34:28.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/nvidia-modeset/nvidia-modeset-linux.c 2024-09-17 16:50:50.000000000 +0000 @@ -1081,7 +1081,7 @@ nvKmsKapiHandleEventQueueChange(device); } -struct nvkms_per_open *nvkms_open_common(enum NvKmsClientType type, +static struct nvkms_per_open *nvkms_open_common(enum NvKmsClientType type, struct NvKmsKapiDevice *device, int *status) { @@ -1133,7 +1133,7 @@ return NULL; } -void nvkms_close_pm_locked(struct nvkms_per_open *popen) +static void nvkms_close_pm_locked(struct nvkms_per_open *popen) { /* * Don't use down_interruptible(): we need to free resources @@ -1196,7 +1196,7 @@ } } -int nvkms_ioctl_common +static int nvkms_ioctl_common ( struct nvkms_per_open *popen, NvU32 cmd, NvU64 address, const size_t size diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/kernel-open/nvidia-uvm/nv-kthread-q-selftest.c nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/nvidia-uvm/nv-kthread-q-selftest.c --- nvidia-open-gpu-kernel-modules-535.183.01/kernel-open/nvidia-uvm/nv-kthread-q-selftest.c 2024-05-12 19:45:01.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/nvidia-uvm/nv-kthread-q-selftest.c 2024-09-17 17:05:38.000000000 +0000 @@ -1,5 +1,5 @@ /******************************************************************************* - Copyright (c) 2016 NVIDIA Corporation + Copyright (c) 2016-2024 NVIDIA Corporation Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to @@ -81,7 +81,7 @@ #define NUM_Q_ITEMS_IN_MULTITHREAD_TEST (NUM_TEST_Q_ITEMS * NUM_TEST_KTHREADS) // This exists in order to have a function to place a breakpoint on: -void on_nvq_assert(void) +static void on_nvq_assert(void) { (void)NULL; } diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/kernel-open/nvidia-uvm/nv-kthread-q.c nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/nvidia-uvm/nv-kthread-q.c --- nvidia-open-gpu-kernel-modules-535.183.01/kernel-open/nvidia-uvm/nv-kthread-q.c 2024-05-12 19:45:01.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/nvidia-uvm/nv-kthread-q.c 2024-09-17 17:05:38.000000000 +0000 @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2016 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2016-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining a @@ -176,7 +176,7 @@ { unsigned i, j; - const static unsigned attempts = 3; + static const unsigned attempts = 3; struct task_struct *thread[3]; for (i = 0;; i++) { diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/kernel-open/nvidia-uvm/uvm_channel.c nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/nvidia-uvm/uvm_channel.c --- nvidia-open-gpu-kernel-modules-535.183.01/kernel-open/nvidia-uvm/uvm_channel.c 2024-05-12 19:45:07.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/nvidia-uvm/uvm_channel.c 2024-09-17 17:05:44.000000000 +0000 @@ -722,7 +722,17 @@ // Wait for the WLC/LCIC to be primed. This means that PUT == GET + 2 // and a WLC doorbell ring is enough to start work. - UVM_SPIN_WHILE(!uvm_gpu_tracking_semaphore_is_completed(&lcic_channel->tracking_sem), &spin); + UVM_SPIN_WHILE(!uvm_gpu_tracking_semaphore_is_completed(&lcic_channel->tracking_sem), &spin) { + NV_STATUS status = uvm_channel_check_errors(lcic_channel); + if (status != NV_OK) { + UVM_ASSERT(uvm_global_get_status() != NV_OK); + + // If there's a global fatal error we can't communicate with the GPU + // and the below launch sequence doesn't work. + UVM_ERR_PRINT_NV_STATUS("Failed to wait for LCIC channel (%s) completion.", status, lcic_channel->name); + return; + } + } // Executing WLC adds an extra job to LCIC ++lcic_channel->tracking_sem.queued_value; @@ -3250,7 +3260,17 @@ // Wait for the WLC/LCIC to be primed. This means that PUT == GET + 2 // and a WLC doorbell ring is enough to start work. - UVM_SPIN_WHILE(!uvm_gpu_tracking_semaphore_is_completed(&channel->tracking_sem), &spin); + UVM_SPIN_WHILE(!uvm_gpu_tracking_semaphore_is_completed(&channel->tracking_sem), &spin) { + status = uvm_channel_check_errors(channel); + if (status != NV_OK) { + UVM_ERR_PRINT_NV_STATUS("Failed to wait for LCIC channel (%s) completion", status, channel->name); + break; + } + } + + // Continue on error and attempt to stop WLC below. This can lead to + // channel destruction with mismatched GET and PUT pointers. RM will + // print errors if that's the case, but channel destruction succeeeds. } status = uvm_push_begin(manager, UVM_CHANNEL_TYPE_SEC2, &push, "Stop WLC channels"); diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/kernel-open/nvidia-uvm/uvm_common.h nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/nvidia-uvm/uvm_common.h --- nvidia-open-gpu-kernel-modules-535.183.01/kernel-open/nvidia-uvm/uvm_common.h 2024-05-12 19:45:01.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/nvidia-uvm/uvm_common.h 2024-09-17 17:05:38.000000000 +0000 @@ -1,5 +1,5 @@ /******************************************************************************* - Copyright (c) 2013-2021 NVIDIA Corporation + Copyright (c) 2013-2024 NVIDIA Corporation Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to @@ -434,7 +434,9 @@ UVM_ASSERT(first); UVM_ASSERT(outer); - if (uvm_platform_uses_canonical_form_address()) { + // Maxwell GPUs (num_va_bits == 40b) do not support canonical form address + // even when plugged into platforms using it. + if (uvm_platform_uses_canonical_form_address() && num_va_bits > 40) { *first = 1ULL << (num_va_bits - 1); *outer = (NvU64)((NvS64)(1ULL << 63) >> (64 - num_va_bits)); } diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/kernel-open/nvidia-uvm/uvm_gpu_access_counters.c nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/nvidia-uvm/uvm_gpu_access_counters.c --- nvidia-open-gpu-kernel-modules-535.183.01/kernel-open/nvidia-uvm/uvm_gpu_access_counters.c 2024-05-12 19:45:09.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/nvidia-uvm/uvm_gpu_access_counters.c 2024-09-17 17:05:47.000000000 +0000 @@ -680,7 +680,10 @@ while (get != put) { // Wait until valid bit is set - UVM_SPIN_WHILE(!gpu->parent->access_counter_buffer_hal->entry_is_valid(gpu->parent, get), &spin); + UVM_SPIN_WHILE(!gpu->parent->access_counter_buffer_hal->entry_is_valid(gpu->parent, get), &spin) { + if (uvm_global_get_status() != NV_OK) + goto done; + } gpu->parent->access_counter_buffer_hal->entry_clear_valid(gpu->parent, get); ++get; @@ -688,6 +691,7 @@ get = 0; } +done: write_get(gpu->parent, get); } @@ -813,12 +817,18 @@ (fetch_mode == NOTIFICATION_FETCH_MODE_ALL || notification_index < access_counters->max_batch_size)) { uvm_access_counter_buffer_entry_t *current_entry = ¬ification_cache[notification_index]; - // We cannot just wait for the last entry (the one pointed by put) to become valid, we have to do it - // individually since entries can be written out of order + // We cannot just wait for the last entry (the one pointed by put) to + // become valid, we have to do it individually since entries can be + // written out of order UVM_SPIN_WHILE(!gpu->parent->access_counter_buffer_hal->entry_is_valid(gpu->parent, get), &spin) { // We have some entry to work on. Let's do the rest later. if (fetch_mode != NOTIFICATION_FETCH_MODE_ALL && notification_index > 0) goto done; + + // There's no entry to work on and something has gone wrong. Ignore + // the rest. + if (uvm_global_get_status() != NV_OK) + goto done; } // Prevent later accesses being moved above the read of the valid bit diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/kernel-open/nvidia-uvm/uvm_gpu_replayable_faults.c nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/nvidia-uvm/uvm_gpu_replayable_faults.c --- nvidia-open-gpu-kernel-modules-535.183.01/kernel-open/nvidia-uvm/uvm_gpu_replayable_faults.c 2024-05-12 19:45:09.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/kernel-open/nvidia-uvm/uvm_gpu_replayable_faults.c 2024-09-17 17:05:47.000000000 +0000 @@ -624,7 +624,15 @@ while (get != put) { // Wait until valid bit is set - UVM_SPIN_WHILE(!parent_gpu->fault_buffer_hal->entry_is_valid(parent_gpu, get), &spin); + UVM_SPIN_WHILE(!parent_gpu->fault_buffer_hal->entry_is_valid(parent_gpu, get), &spin) { + // Channels might be idle (e.g. in teardown) so check for errors + // actively. In that case the gpu pointer is valid. + NV_STATUS status = gpu ? uvm_channel_manager_check_errors(gpu->channel_manager) : uvm_global_get_status(); + if (status != NV_OK) { + write_get(parent_gpu, get); + return status; + } + } fault_buffer_skip_replayable_entry(parent_gpu, get); ++get; @@ -857,6 +865,10 @@ // We have some entry to work on. Let's do the rest later. if (fetch_mode == FAULT_FETCH_MODE_BATCH_READY && fault_index > 0) goto done; + + status = uvm_global_get_status(); + if (status != NV_OK) + goto done; } // Prevent later accesses being moved above the read of the valid bit diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/src/common/displayport/inc/dp_configcaps.h nvidia-open-gpu-kernel-modules-535.216.01/src/common/displayport/inc/dp_configcaps.h --- nvidia-open-gpu-kernel-modules-535.183.01/src/common/displayport/inc/dp_configcaps.h 2024-05-12 19:32:03.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/src/common/displayport/inc/dp_configcaps.h 2024-09-17 16:47:44.000000000 +0000 @@ -246,7 +246,7 @@ virtual bool getOuiSupported() = 0; virtual AuxRetry::status setOuiSource(unsigned ouiId, const char * model, size_t modelNameLength, NvU8 chipRevision) = 0; virtual bool getOuiSource(unsigned &ouiId, char * modelName, size_t modelNameBufferSize, NvU8 & chipRevision) = 0; - virtual bool getOuiSink(unsigned &ouiId, char * modelName, size_t modelNameBufferSize, NvU8 & chipRevision) = 0; + virtual bool getOuiSink(unsigned &ouiId, unsigned char * modelName, size_t modelNameBufferSize, NvU8 & chipRevision) = 0; }; class HDCP diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/src/common/displayport/inc/dp_connector.h nvidia-open-gpu-kernel-modules-535.216.01/src/common/displayport/inc/dp_connector.h --- nvidia-open-gpu-kernel-modules-535.183.01/src/common/displayport/inc/dp_connector.h 2024-05-12 19:32:03.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/src/common/displayport/inc/dp_connector.h 2024-09-17 16:47:44.000000000 +0000 @@ -229,6 +229,7 @@ virtual void markDeviceForDeletion() = 0; virtual bool getRawDscCaps(NvU8 *buffer, NvU32 bufferSize) = 0; + virtual bool setRawDscCaps(NvU8 *buffer, NvU32 bufferSize) = 0; // This interface is still nascent. Please don't use it. Read size limit is 16 bytes. virtual AuxBus::status getDpcdData(unsigned offset, NvU8 * buffer, @@ -628,7 +629,7 @@ virtual bool getHDCPAbortCodesDP12(NvU32 &hdcpAbortCodesDP12) = 0; - virtual bool getOuiSink(unsigned &ouiId, char * modelName, + virtual bool getOuiSink(unsigned &ouiId, unsigned char * modelName, size_t modelNameBufferSize, NvU8 & chipRevision) = 0; virtual bool getIgnoreSourceOuiHandshake() = 0; diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/src/common/displayport/inc/dp_connectorimpl.h nvidia-open-gpu-kernel-modules-535.216.01/src/common/displayport/inc/dp_connectorimpl.h --- nvidia-open-gpu-kernel-modules-535.183.01/src/common/displayport/inc/dp_connectorimpl.h 2024-05-12 19:32:03.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/src/common/displayport/inc/dp_connectorimpl.h 2024-09-17 16:47:44.000000000 +0000 @@ -94,9 +94,9 @@ char cachedSourceModelName[NV_DPCD_SOURCE_DEV_ID_STRING__SIZE + 1]; NvU8 cachedSourceChipRevision; - unsigned ouiId; // Sink ouiId - char modelName[NV_DPCD_SOURCE_DEV_ID_STRING__SIZE + 1]; // Device Model-name - bool bIgnoreSrcOuiHandshake; // Skip writing source OUI + unsigned ouiId; // Sink ouiId + unsigned char modelName[NV_DPCD_SOURCE_DEV_ID_STRING__SIZE + 1]; // Device Model-name + bool bIgnoreSrcOuiHandshake; // Skip writing source OUI LinkPolicy linkPolicy; bool linkGuessed; // True when link was "guessed" during HPD in TMDS mode @@ -551,7 +551,7 @@ void freeTimeslice(GroupImpl * targetGroup); void flushTimeslotsToHardware(); bool getHDCPAbortCodesDP12(NvU32 &hdcpAbortCodesDP12); - bool getOuiSink(unsigned &ouiId, char * modelName, size_t modelNameBufferSize, NvU8 & chipRevision); + bool getOuiSink(unsigned &ouiId, unsigned char * modelName, size_t modelNameBufferSize, NvU8 & chipRevision); bool hdcpValidateKsv(const NvU8 *ksv, NvU32 Size); void cancelHdcpCallbacks(); bool handleCPIRQ(); diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/src/common/displayport/inc/dp_deviceimpl.h nvidia-open-gpu-kernel-modules-535.216.01/src/common/displayport/inc/dp_deviceimpl.h --- nvidia-open-gpu-kernel-modules-535.183.01/src/common/displayport/inc/dp_deviceimpl.h 2024-05-12 19:32:03.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/src/common/displayport/inc/dp_deviceimpl.h 2024-09-17 16:47:44.000000000 +0000 @@ -44,6 +44,7 @@ #define HDCP_BCAPS_DDC_EN_BIT 0x80 #define HDCP_BCAPS_DP_EN_BIT 0x01 #define HDCP_I2C_CLIENT_ADDR 0x74 + #define DSC_CAPS_SIZE 16 struct GroupImpl; struct ConnectorImpl; @@ -420,6 +421,7 @@ virtual void markDeviceForDeletion() {bisMarkedForDeletion = true;}; virtual bool isMarkedForDeletion() {return bisMarkedForDeletion;}; virtual bool getRawDscCaps(NvU8 *buffer, NvU32 bufferSize); + virtual bool setRawDscCaps(NvU8 *buffer, NvU32 bufferSize); virtual AuxBus::status dscCrcControl(NvBool bEnable, gpuDscCrc *dataGpu, sinkDscCrc *dataSink); diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/src/common/displayport/src/dp_configcaps.cpp nvidia-open-gpu-kernel-modules-535.216.01/src/common/displayport/src/dp_configcaps.cpp --- nvidia-open-gpu-kernel-modules-535.183.01/src/common/displayport/src/dp_configcaps.cpp 2024-05-12 19:32:06.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/src/common/displayport/src/dp_configcaps.cpp 2024-09-17 16:47:46.000000000 +0000 @@ -827,7 +827,7 @@ return true; } - virtual bool getOuiSink(unsigned &ouiId, char * modelName, size_t modelNameBufferSize, NvU8 & chipRevision) + virtual bool getOuiSink(unsigned &ouiId, unsigned char * modelName, size_t modelNameBufferSize, NvU8 & chipRevision) { NvU8 ouiBuffer[16]; int address = NV_DPCD_SINK_IEEE_OUI; @@ -865,7 +865,7 @@ // Next 6 bytes are Device Identification String, copy as much as we can (limited buffer case). unsigned int i; for (i = 0; i < modelNameBufferSize; i++) - modelName[i] = ouiBuffer[3+i]; + modelName[i] = (unsigned char)ouiBuffer[3+i]; chipRevision = ouiBuffer[9]; diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/src/common/displayport/src/dp_connectorimpl.cpp nvidia-open-gpu-kernel-modules-535.216.01/src/common/displayport/src/dp_connectorimpl.cpp --- nvidia-open-gpu-kernel-modules-535.183.01/src/common/displayport/src/dp_connectorimpl.cpp 2024-05-12 19:32:06.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/src/common/displayport/src/dp_connectorimpl.cpp 2024-09-17 16:47:47.000000000 +0000 @@ -3288,7 +3288,7 @@ return true; } -bool ConnectorImpl::getOuiSink(unsigned &ouiId, char * modelName, size_t modelNameBufferSize, NvU8 & chipRevision) +bool ConnectorImpl::getOuiSink(unsigned &ouiId, unsigned char * modelName, size_t modelNameBufferSize, NvU8 & chipRevision) { if (!previousPlugged || !hal->getOuiSupported()) return false; diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/src/common/displayport/src/dp_deviceimpl.cpp nvidia-open-gpu-kernel-modules-535.216.01/src/common/displayport/src/dp_deviceimpl.cpp --- nvidia-open-gpu-kernel-modules-535.183.01/src/common/displayport/src/dp_deviceimpl.cpp 2024-05-12 19:32:07.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/src/common/displayport/src/dp_deviceimpl.cpp 2024-09-17 16:47:47.000000000 +0000 @@ -459,6 +459,15 @@ return true; } +bool DeviceImpl::setRawDscCaps(NvU8 *buffer, NvU32 bufferSize) +{ + if (bufferSize < sizeof(rawDscCaps)) + return false; + + dpMemCopy(&rawDscCaps, buffer, sizeof(rawDscCaps)); + return parseDscCaps(&rawDscCaps[0], sizeof(rawDscCaps)); +} + AuxBus::status DeviceImpl::transaction(Action action, Type type, int address, NvU8 * buffer, unsigned sizeRequested, unsigned * sizeCompleted, diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/src/common/displayport/src/dp_wardatabase.cpp nvidia-open-gpu-kernel-modules-535.216.01/src/common/displayport/src/dp_wardatabase.cpp --- nvidia-open-gpu-kernel-modules-535.183.01/src/common/displayport/src/dp_wardatabase.cpp 2024-05-12 19:32:08.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/src/common/displayport/src/dp_wardatabase.cpp 2024-09-17 16:47:48.000000000 +0000 @@ -66,21 +66,15 @@ // Synaptics case 0x24CC90: if ((modelName[0] == 'S') && (modelName[1] == 'Y') && (modelName[2] == 'N') && - (modelName[3] == 'A') && (modelName[4] == 'S') && + (modelName[3] == 'A') && (((modelName[4] == 'S') && ((modelName[5] == '1') || (modelName[5] == '2') || (modelName[5] == '3') || (modelName[5] == '#') || - (modelName[5] == '\"'))) + (modelName[5] == '\"')))||((modelName[4] == 0x84) && + (modelName[5] == '0')))) { // // Extended latency from link-train end to FEC enable pattern // to avoid link lost or blank screen with Synaptics branch. - // (Bug 2561206) - // - // Dock SKU ID: - // Dell Salomon-WD19TB SYNAS1 - // HP Hook SYNAS3 - // HP Adira-A SYNAS# - // Lenovo SYNAS" / SYNAS2 // LT2FecLatencyMs = 57; diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/src/common/inc/nvBldVer.h nvidia-open-gpu-kernel-modules-535.216.01/src/common/inc/nvBldVer.h --- nvidia-open-gpu-kernel-modules-535.183.01/src/common/inc/nvBldVer.h 2024-05-12 19:29:06.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/src/common/inc/nvBldVer.h 2024-09-17 16:44:51.000000000 +0000 @@ -36,25 +36,25 @@ // and then checked back in. You cannot make changes to these sections without // corresponding changes to the buildmeister script #ifndef NV_BUILD_BRANCH - #define NV_BUILD_BRANCH r538_67 + #define NV_BUILD_BRANCH r538_95 #endif #ifndef NV_PUBLIC_BRANCH - #define NV_PUBLIC_BRANCH r538_67 + #define NV_PUBLIC_BRANCH r538_95 #endif #if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS) -#define NV_BUILD_BRANCH_VERSION "rel/gpu_drv/r535/r538_67-552" -#define NV_BUILD_CHANGELIST_NUM (34280977) +#define NV_BUILD_BRANCH_VERSION "rel/gpu_drv/r535/r538_95-686" +#define NV_BUILD_CHANGELIST_NUM (34854198) #define NV_BUILD_TYPE "Official" -#define NV_BUILD_NAME "rel/gpu_drv/r535/r538_67-552" -#define NV_LAST_OFFICIAL_CHANGELIST_NUM (34280977) +#define NV_BUILD_NAME "rel/gpu_drv/r535/r538_95-686" +#define NV_LAST_OFFICIAL_CHANGELIST_NUM (34854198) #else /* Windows builds */ -#define NV_BUILD_BRANCH_VERSION "r538_67-1" -#define NV_BUILD_CHANGELIST_NUM (34280977) +#define NV_BUILD_BRANCH_VERSION "r538_95-1" +#define NV_BUILD_CHANGELIST_NUM (34853858) #define NV_BUILD_TYPE "Official" -#define NV_BUILD_NAME "538.69" -#define NV_LAST_OFFICIAL_CHANGELIST_NUM (34280977) +#define NV_BUILD_NAME "538.96" +#define NV_LAST_OFFICIAL_CHANGELIST_NUM (34853858) #define NV_BUILD_BRANCH_BASE_VERSION R535 #endif // End buildmeister python edited section diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/src/common/inc/nvUnixVersion.h nvidia-open-gpu-kernel-modules-535.216.01/src/common/inc/nvUnixVersion.h --- nvidia-open-gpu-kernel-modules-535.183.01/src/common/inc/nvUnixVersion.h 2024-05-12 19:29:06.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/src/common/inc/nvUnixVersion.h 2024-09-17 16:44:52.000000000 +0000 @@ -4,7 +4,7 @@ #if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS) || defined(NV_VMWARE) || defined(NV_QNX) || defined(NV_INTEGRITY) || \ (defined(RMCFG_FEATURE_PLATFORM_GSP) && RMCFG_FEATURE_PLATFORM_GSP == 1) -#define NV_VERSION_STRING "535.183.01" +#define NV_VERSION_STRING "535.216.01" #else diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/src/common/inc/swref/published/ampere/ga102/dev_falcon_v4.h nvidia-open-gpu-kernel-modules-535.216.01/src/common/inc/swref/published/ampere/ga102/dev_falcon_v4.h --- nvidia-open-gpu-kernel-modules-535.183.01/src/common/inc/swref/published/ampere/ga102/dev_falcon_v4.h 2024-05-12 19:29:12.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/src/common/inc/swref/published/ampere/ga102/dev_falcon_v4.h 2024-09-17 16:44:57.000000000 +0000 @@ -57,7 +57,9 @@ #define NV_PFALCON_FALCON_DMATRFCMD 0x00000118 /* RW-4R */ #define NV_PFALCON_FALCON_DMATRFCMD_FULL 0:0 /* R-XVF */ #define NV_PFALCON_FALCON_DMATRFCMD_FULL_TRUE 0x00000001 /* R---V */ +#define NV_PFALCON_FALCON_DMATRFCMD_FULL_FALSE 0x00000000 /* R---V */ #define NV_PFALCON_FALCON_DMATRFCMD_IDLE 1:1 /* R-XVF */ +#define NV_PFALCON_FALCON_DMATRFCMD_IDLE_TRUE 0x00000001 /* R---V */ #define NV_PFALCON_FALCON_DMATRFCMD_IDLE_FALSE 0x00000000 /* R---V */ #define NV_PFALCON_FALCON_DMATRFCMD_SEC 3:2 /* RWXVF */ #define NV_PFALCON_FALCON_DMATRFCMD_IMEM 4:4 /* RWXVF */ diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/src/common/nvlink/interface/nvlink.h nvidia-open-gpu-kernel-modules-535.216.01/src/common/nvlink/interface/nvlink.h --- nvidia-open-gpu-kernel-modules-535.183.01/src/common/nvlink/interface/nvlink.h 2024-05-12 19:29:42.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/src/common/nvlink/interface/nvlink.h 2024-09-17 16:45:24.000000000 +0000 @@ -500,7 +500,8 @@ */ NvlStatus nvlink_lib_discover_and_get_remote_conn_info(nvlink_link *end, nvlink_conn_info *conn_info, - NvU32 flags); + NvU32 flags, + NvBool bForceDiscovery); /************************************************************************************************/ diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/src/common/nvlink/kernel/nvlink/core/nvlink_discovery.c nvidia-open-gpu-kernel-modules-535.216.01/src/common/nvlink/kernel/nvlink/core/nvlink_discovery.c --- nvidia-open-gpu-kernel-modules-535.183.01/src/common/nvlink/kernel/nvlink/core/nvlink_discovery.c 2024-05-12 19:29:42.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/src/common/nvlink/kernel/nvlink/core/nvlink_discovery.c 2024-09-17 16:45:25.000000000 +0000 @@ -47,7 +47,8 @@ ( nvlink_link *end, nvlink_link **remote_end, - NvU32 flags + NvU32 flags, + NvBool bForceDiscovery ) { nvlink_intranode_conn *conn = NULL; @@ -67,7 +68,7 @@ return; } - if (nvlinkLibCtx.bNewEndpoints) + if (nvlinkLibCtx.bNewEndpoints || bForceDiscovery) { if (!_nvlink_core_all_links_initialized()) { diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/src/common/nvlink/kernel/nvlink/interface/nvlink_ioctl_entry.c nvidia-open-gpu-kernel-modules-535.216.01/src/common/nvlink/kernel/nvlink/interface/nvlink_ioctl_entry.c --- nvidia-open-gpu-kernel-modules-535.183.01/src/common/nvlink/kernel/nvlink/interface/nvlink_ioctl_entry.c 2024-05-12 19:29:43.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/src/common/nvlink/kernel/nvlink/interface/nvlink_ioctl_entry.c 2024-09-17 16:45:25.000000000 +0000 @@ -1481,7 +1481,7 @@ (linkMode == NVLINK_LINKSTATE_SLEEP)) { nvlink_link *remoteLink = NULL; - nvlink_core_discover_and_get_remote_end(link, &remoteLink, 0); + nvlink_core_discover_and_get_remote_end(link, &remoteLink, 0, NV_FALSE); if (remoteLink == NULL) { NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_INFO, diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/src/common/nvlink/kernel/nvlink/interface/nvlink_kern_discovery_entry.c nvidia-open-gpu-kernel-modules-535.216.01/src/common/nvlink/kernel/nvlink/interface/nvlink_kern_discovery_entry.c --- nvidia-open-gpu-kernel-modules-535.183.01/src/common/nvlink/kernel/nvlink/interface/nvlink_kern_discovery_entry.c 2024-05-12 19:29:43.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/src/common/nvlink/kernel/nvlink/interface/nvlink_kern_discovery_entry.c 2024-09-17 16:45:25.000000000 +0000 @@ -182,7 +182,8 @@ ( nvlink_link *end, nvlink_conn_info *conn_info, - NvU32 flags + NvU32 flags, + NvBool bForceDiscovery ) { NvlStatus status = NVL_SUCCESS; @@ -257,7 +258,8 @@ conn_info->bConnected = NV_FALSE; // Get the remote_end of the link - nvlink_core_discover_and_get_remote_end(end, &remote_end, flags); + nvlink_core_discover_and_get_remote_end(end, &remote_end, flags, + bForceDiscovery); if (remote_end) { diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/src/common/nvlink/kernel/nvlink/nvlink_helper.h nvidia-open-gpu-kernel-modules-535.216.01/src/common/nvlink/kernel/nvlink/nvlink_helper.h --- nvidia-open-gpu-kernel-modules-535.183.01/src/common/nvlink/kernel/nvlink/nvlink_helper.h 2024-05-12 19:29:44.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/src/common/nvlink/kernel/nvlink/nvlink_helper.h 2024-09-17 16:45:26.000000000 +0000 @@ -174,7 +174,8 @@ */ void nvlink_core_discover_and_get_remote_end(nvlink_link *end, nvlink_link **remote_end, - NvU32 flags); + NvU32 flags, + NvBool bForceDiscovery); /************************************************************************************************/ diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/src/common/nvswitch/kernel/lr10/lr10.c nvidia-open-gpu-kernel-modules-535.216.01/src/common/nvswitch/kernel/lr10/lr10.c --- nvidia-open-gpu-kernel-modules-535.183.01/src/common/nvswitch/kernel/lr10/lr10.c 2024-05-12 19:29:57.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/src/common/nvswitch/kernel/lr10/lr10.c 2024-09-17 16:45:38.000000000 +0000 @@ -4071,7 +4071,9 @@ } else { - nvlink_lib_discover_and_get_remote_conn_info(link, &conn_info, NVLINK_STATE_CHANGE_SYNC); + nvlink_lib_discover_and_get_remote_conn_info(link, &conn_info, + NVLINK_STATE_CHANGE_SYNC, + NV_FALSE); } // Set NVLINK per-link caps diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/src/common/sdk/nvidia/inc/class/cl0000_notification.h nvidia-open-gpu-kernel-modules-535.216.01/src/common/sdk/nvidia/inc/class/cl0000_notification.h --- nvidia-open-gpu-kernel-modules-535.183.01/src/common/sdk/nvidia/inc/class/cl0000_notification.h 2024-05-12 19:31:43.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/src/common/sdk/nvidia/inc/class/cl0000_notification.h 2024-09-17 16:47:29.000000000 +0000 @@ -31,7 +31,7 @@ /*event values*/ #define NV0000_NOTIFIERS_DISPLAY_CHANGE (0) #define NV0000_NOTIFIERS_EVENT_NONE_PENDING (1) -#define NV0000_NOTIFIERS_VM_START (2) +#define NV0000_NOTIFIERS_GPU_UNBIND_EVENT (2) #define NV0000_NOTIFIERS_GPU_BIND_EVENT (3) #define NV0000_NOTIFIERS_NVTELEMETRY_REPORT_EVENT (4) #define NV0000_NOTIFIERS_MAXCOUNT (5) diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/src/common/sdk/nvidia/inc/class/cla084.h nvidia-open-gpu-kernel-modules-535.216.01/src/common/sdk/nvidia/inc/class/cla084.h --- nvidia-open-gpu-kernel-modules-535.183.01/src/common/sdk/nvidia/inc/class/cla084.h 2024-05-12 19:31:52.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/src/common/sdk/nvidia/inc/class/cla084.h 2024-09-17 16:47:38.000000000 +0000 @@ -73,6 +73,7 @@ NvHandle hPluginClient; NvU32 numGuestFbHandles; NvHandle guestFbHandleList[NVA084_MAX_VMMU_SEGMENTS]; + NvU8 vgpuDevName[VM_UUID_SIZE]; NvHandle hPluginHeapMemory; NV_DECLARE_ALIGNED(NvU64 ctrlBuffOffset, 8); NV_DECLARE_ALIGNED(NvU64 initTaskLogBuffOffset, 8); diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/src/common/sdk/nvidia/inc/ctrl/ctrl0000/ctrl0000vgpu.h nvidia-open-gpu-kernel-modules-535.216.01/src/common/sdk/nvidia/inc/ctrl/ctrl0000/ctrl0000vgpu.h --- nvidia-open-gpu-kernel-modules-535.183.01/src/common/sdk/nvidia/inc/ctrl/ctrl0000/ctrl0000vgpu.h 2024-05-12 19:32:13.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/src/common/sdk/nvidia/inc/ctrl/ctrl0000/ctrl0000vgpu.h 2024-09-17 16:48:00.000000000 +0000 @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2016-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2016-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining a @@ -36,24 +36,36 @@ #include "ctrl/ctrla081.h" #include "class/cl0000.h" #include "nv_vgpu_types.h" + +/* DRF macros for OBJGPU::gpuId */ +#define NV0000_BUSDEVICE_DOMAIN 31:16 +#define NV0000_BUSDEVICE_BUS 15:8 +#define NV0000_BUSDEVICE_DEVICE 7:0 + +#define GPU_32_BIT_ID_DECODE_DOMAIN(gpuId) (NvU16)DRF_VAL(0000, _BUSDEVICE, _DOMAIN, gpuId); +#define GPU_32_BIT_ID_DECODE_BUS(gpuId) (NvU8) DRF_VAL(0000, _BUSDEVICE, _BUS, gpuId); +#define GPU_32_BIT_ID_DECODE_DEVICE(gpuId) (NvU8) DRF_VAL(0000, _BUSDEVICE, _DEVICE, gpuId); + /* - * NV0000_CTRL_CMD_VGPU_GET_START_DATA + * NV0000_CTRL_CMD_VGPU_CREATE_DEVICE * - * This command gets data associated with NV0000_NOTIFIERS_VGPU_MGR_START to - * start VGPU process. + * This command informs RM to create a vGPU device on KVM. * - * mdevUuid - * This parameter gives mdev device UUID for which nvidia-vgpu-mgr should - * init process. + * vgpuName [IN] + * This parameter provides the MDEV UUID or VF BDF depending on whether MDEV + * or vfio-pci-core framework is used. * - * qemuPid - * This parameter specifies the QEMU process ID of the VM. - * - * gpuPciId + * gpuPciId [IN] * This parameter provides gpuId of GPU on which vgpu device is created. * - * configParams - * This parameter specifies the configuration parameters for vGPU + * gpuPciBdf + * This parameter specifies the BDF of the VF. (Same as PF for non-sriov) + * + * vgpuTypeId [IN] + * This parameter specifies the vGPU type ID for the device to be created. + * + * vgpuId [OUT] + * This parameter returns the vgpu id allocated by RM for the device * * Possible status values returned are: * NV_OK @@ -62,17 +74,114 @@ * NV_ERR_INVALID_CLIENT * */ -#define NV0000_CTRL_CMD_VGPU_GET_START_DATA (0xc01) /* finn: Evaluated from "(FINN_NV01_ROOT_VGPU_INTERFACE_ID << 8) | NV0000_CTRL_VGPU_GET_START_DATA_PARAMS_MESSAGE_ID" */ -#define NV0000_CTRL_VGPU_GET_START_DATA_PARAMS_MESSAGE_ID (0x1U) +#define NV0000_CTRL_CMD_VGPU_CREATE_DEVICE (0xc02) /* finn: Evaluated from "(FINN_NV01_ROOT_VGPU_INTERFACE_ID << 8) | NV0000_CTRL_VGPU_CREATE_DEVICE_PARAMS_MESSAGE_ID" */ + +#define NV0000_CTRL_VGPU_CREATE_DEVICE_PARAMS_MESSAGE_ID (0x2U) -typedef struct NV0000_CTRL_VGPU_GET_START_DATA_PARAMS { - NvU8 mdevUuid[VM_UUID_SIZE]; - NvU8 configParams[1024]; - NvU32 qemuPid; +typedef struct NV0000_CTRL_VGPU_CREATE_DEVICE_PARAMS { + NvU8 vgpuName[VM_UUID_SIZE]; NvU32 gpuPciId; + NvU32 gpuPciBdf; + NvU32 vgpuTypeId; NvU16 vgpuId; +} NV0000_CTRL_VGPU_CREATE_DEVICE_PARAMS; + +/* + * NV0000_CTRL_CMD_VGPU_GET_INSTANCES + * + * This command queries RM for available instances for a particular vGPU type ID + * on KVM. + * + * gpuPciId [IN] + * This parameter specifies gpuId of GPU on which vGPU instances are being + * queried. + * + * gpuPciBdf [IN] + * This parameter specifies the BDF of the VF. (Same as PF for non-sriov) + * + * numVgpuTypes [IN] + * This parameter specifies the count of vgpuTypeIds supplied and the + * count of availableInstances values to be returned. + * + * vgpuTypeIds [IN] + * This parameter specifies a total of numVgpuTypes vGPU type IDs for which + * the available instances are to be queried. + * + * availableInstances [OUT] + * This parameter returns a total of numVgpuTypes available instances for + * the respective vGPU type IDs supplied in vgpuTypeIds input parameter. + * + * Possible status values returned are: + * NV_OK + * NV_ERR_INVALID_EVENT + * NV_ERR_OBJECT_NOT_FOUND + * NV_ERR_INVALID_CLIENT + * NV_ERR_INVALID_STATE + * + */ + +#define NV0000_CTRL_CMD_VGPU_GET_INSTANCES (0xc03) /* finn: Evaluated from "(FINN_NV01_ROOT_VGPU_INTERFACE_ID << 8) | NV0000_CTRL_VGPU_GET_INSTANCES_PARAMS_MESSAGE_ID" */ + +#define NV0000_CTRL_VGPU_GET_INSTANCES_PARAMS_MESSAGE_ID (0x3U) + +typedef struct NV0000_CTRL_VGPU_GET_INSTANCES_PARAMS { + NvU32 gpuPciId; NvU32 gpuPciBdf; -} NV0000_CTRL_VGPU_GET_START_DATA_PARAMS; + NvU32 numVgpuTypes; + NvU32 vgpuTypeIds[NVA081_MAX_VGPU_TYPES_PER_PGPU]; + NvU32 availableInstances[NVA081_MAX_VGPU_TYPES_PER_PGPU]; +} NV0000_CTRL_VGPU_GET_INSTANCES_PARAMS; + +/* + * NV0000_CTRL_CMD_VGPU_DELETE_DEVICE + * + * This command informs RM to delete a vGPU device on KVM. + * + * vgpuName [IN] + * This parameter provides the MDEV UUID or VF BDF depending on whether MDEV + * or vfio-pci-core framework is used. + * + * vgpuId [IN] + * This parameter provides the vgpu id allocated by RM for the device to be + * deleted. + * + * Possible status values returned are: + * NV_OK + * NV_ERR_INVALID_EVENT + * NV_ERR_OBJECT_NOT_FOUND + * NV_ERR_INVALID_CLIENT + * + */ + +#define NV0000_CTRL_CMD_VGPU_DELETE_DEVICE (0xc04) /* finn: Evaluated from "(FINN_NV01_ROOT_VGPU_INTERFACE_ID << 8) | NV0000_CTRL_VGPU_DELETE_DEVICE_PARAMS_MESSAGE_ID" */ + +#define NV0000_CTRL_VGPU_DELETE_DEVICE_PARAMS_MESSAGE_ID (0x4U) + +typedef struct NV0000_CTRL_VGPU_DELETE_DEVICE_PARAMS { + NvU8 vgpuName[VM_UUID_SIZE]; + NvU16 vgpuId; +} NV0000_CTRL_VGPU_DELETE_DEVICE_PARAMS; + +/* + * NV0000_CTRL_CMD_VGPU_VFIO_NOTIFY_RM_STATUS + * + * This command informs RM the status of vgpu-vfio GPU operations such as probe and unregister. + * + * returnStatus [IN] + * This parameter provides the status of vgpu-vfio GPU operation. + * + * gpuPciId [IN] + * This parameter provides the gpu id of the GPU + */ + +#define NV0000_CTRL_CMD_VGPU_VFIO_NOTIFY_RM_STATUS (0xc05) /* finn: Evaluated from "(FINN_NV01_ROOT_VGPU_INTERFACE_ID << 8) | NV0000_CTRL_VGPU_VFIO_NOTIFY_RM_STATUS_PARAMS_MESSAGE_ID" */ + +#define NV0000_CTRL_VGPU_VFIO_NOTIFY_RM_STATUS_PARAMS_MESSAGE_ID (0x5U) + +typedef struct NV0000_CTRL_VGPU_VFIO_NOTIFY_RM_STATUS_PARAMS { + NvU32 returnStatus; + NvU32 gpuId; +} NV0000_CTRL_VGPU_VFIO_NOTIFY_RM_STATUS_PARAMS; /* _ctrl0000vgpu_h_ */ diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/src/common/sdk/nvidia/inc/ctrl/ctrl2080/ctrl2080nvlink.h nvidia-open-gpu-kernel-modules-535.216.01/src/common/sdk/nvidia/inc/ctrl/ctrl2080/ctrl2080nvlink.h --- nvidia-open-gpu-kernel-modules-535.183.01/src/common/sdk/nvidia/inc/ctrl/ctrl2080/ctrl2080nvlink.h 2024-05-12 19:32:23.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/src/common/sdk/nvidia/inc/ctrl/ctrl2080/ctrl2080nvlink.h 2024-09-17 16:48:10.000000000 +0000 @@ -2971,8 +2971,91 @@ NvU32 linkId; } NV2080_CTRL_NVLINK_POST_FAULT_UP_PARAMS; -#define NV2080_CTRL_CMD_NVLINK_POST_FAULT_UP (0x20803043U) /* finn: Evaluated from "(FINN_NV20_SUBDEVICE_0_NVLINK_INTERFACE_ID << 8) | NV2080_CTRL_NVLINK_POST_FAULT_UP_PARAMS_MESSAGE_ID" */ +#define NV2080_CTRL_CMD_NVLINK_POST_FAULT_UP (0x20803043U) /* finn: Evaluated from "(FINN_NV20_SUBDEVICE_0_NVLINK_INTERFACE_ID << 8) | NV2080_CTRL_NVLINK_POST_FAULT_UP_PARAMS_MESSAGE_ID" */ +#define NV2080_CTRL_NVLINK_PORT_EVENT_COUNT_SIZE 64U + +/* +* Structure to store port event information +* +* portEventType +* Type of port even that occurred: NVLINK_PORT_EVENT_TYPE* +* +* gpuId +* Gpu that port event occurred on +* +* linkId +* Link id that port event occurred on +* +* time +* Platform time (nsec) when event occurred +*/ +typedef struct NV2080_CTRL_NVLINK_PORT_EVENT { + NvU32 portEventType; + NvU32 gpuId; + NvU32 linkId; + NV_DECLARE_ALIGNED(NvU64 time, 8); +} NV2080_CTRL_NVLINK_PORT_EVENT; + +/* +* NV2080_CTRL_CMD_NVLINK_GET_PORT_EVENTS +* +* This command returns the port up and port down events that have occurred +* +* Parameters: +* +* portEventIndex [IN/OUT] +* On input: The index of the first port event at which to start reading out of the driver. +* +* On output: The index of the first port event that wasn't reported through the 'port event' array +* in this call to NV2080_CTRL_CMD_NVLINK_GET_PORT_EVENTS. +* +* nextPortEventIndex[OUT] +* The index that will be assigned to the next port event that occurs. +* Users of the GET_PORT_EVENTS control call may set 'portEventIndex' to this field on initialization +* to bypass port events that have already occurred without making multiple control calls. +* +* portEventCount [OUT] +* Number of port events returned by the call. Currently, portEventCount is limited +* by NV2080_CTRL_NVLINK_PORT_EVENT_COUNT_SIZE. In order to query all the port events, a +* client needs to keep calling the control till portEventCount is zero. +* +* bOverflow [OUT] +* True when the port event log is overflowed and no longer contains all the port +* events that have occurred, false otherwise. +* +* portEvent [OUT] +* The port event entires. +*/ +#define NV2080_CTRL_NVLINK_GET_PORT_EVENTS_PARAMS_MESSAGE_ID (0x44U) + +typedef struct NV2080_CTRL_NVLINK_GET_PORT_EVENTS_PARAMS { + NV_DECLARE_ALIGNED(NvU64 portEventIndex, 8); + NV_DECLARE_ALIGNED(NvU64 nextPortEventIndex, 8); + NvU32 portEventCount; + NvBool bOverflow; + NV_DECLARE_ALIGNED(NV2080_CTRL_NVLINK_PORT_EVENT portEvent[NV2080_CTRL_NVLINK_PORT_EVENT_COUNT_SIZE], 8); +} NV2080_CTRL_NVLINK_GET_PORT_EVENTS_PARAMS; + +#define NV2080_CTRL_CMD_NVLINK_GET_PORT_EVENTS (0x20803044U) /* finn: Evaluated from "(FINN_NV20_SUBDEVICE_0_NVLINK_INTERFACE_ID << 8) | NV2080_CTRL_NVLINK_GET_PORT_EVENTS_PARAMS_MESSAGE_ID" */ + +/* +* NV2080_CTRL_CMD_NVLINK_CYCLE_LINK +* +* This command cycles a link by faulting it and then retraining the link +* +* Parameters: +* +* linkId [IN] +* The link id of the link to be cycled +*/ +#define NV2080_CTRL_NVLINK_CYCLE_LINK_PARAMS_MESSAGE_ID (0x45U) + +typedef struct NV2080_CTRL_NVLINK_CYCLE_LINK_PARAMS { + NvU32 linkId; +} NV2080_CTRL_NVLINK_CYCLE_LINK_PARAMS; + +#define NV2080_CTRL_CMD_NVLINK_CYCLE_LINK (0x20803045U) /* finn: Evaluated from "(FINN_NV20_SUBDEVICE_0_NVLINK_INTERFACE_ID << 8) | NV2080_CTRL_NVLINK_CYCLE_LINK_PARAMS_MESSAGE_ID" */ /* * NV2080_CTRL_CMD_NVLINK_IS_REDUCED_CONFIG @@ -2982,13 +3065,13 @@ * [out] bReducedNvlinkConfig * Link number which the sequence should be triggered */ -#define NV2080_CTRL_NVLINK_IS_REDUCED_CONFIG_PARAMS_MESSAGE_ID (0x44U) +#define NV2080_CTRL_NVLINK_IS_REDUCED_CONFIG_PARAMS_MESSAGE_ID (0x46U) typedef struct NV2080_CTRL_NVLINK_IS_REDUCED_CONFIG_PARAMS { NvBool bReducedNvlinkConfig; } NV2080_CTRL_NVLINK_IS_REDUCED_CONFIG_PARAMS; -#define NV2080_CTRL_CMD_NVLINK_IS_REDUCED_CONFIG (0x20803044U) /* finn: Evaluated from "(FINN_NV20_SUBDEVICE_0_NVLINK_INTERFACE_ID << 8) | NV2080_CTRL_NVLINK_IS_REDUCED_CONFIG_PARAMS_MESSAGE_ID" */ +#define NV2080_CTRL_CMD_NVLINK_IS_REDUCED_CONFIG (0x20803046U) /* finn: Evaluated from "(FINN_NV20_SUBDEVICE_0_NVLINK_INTERFACE_ID << 8) | NV2080_CTRL_NVLINK_IS_REDUCED_CONFIG_PARAMS_MESSAGE_ID" */ /* _ctrl2080nvlink_h_ */ diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/src/common/sdk/nvidia/inc/ctrl/ctrl208f/ctrl208ffb.h nvidia-open-gpu-kernel-modules-535.216.01/src/common/sdk/nvidia/inc/ctrl/ctrl208f/ctrl208ffb.h --- nvidia-open-gpu-kernel-modules-535.183.01/src/common/sdk/nvidia/inc/ctrl/ctrl208f/ctrl208ffb.h 2024-05-12 19:32:26.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/src/common/sdk/nvidia/inc/ctrl/ctrl208f/ctrl208ffb.h 2024-09-17 16:48:13.000000000 +0000 @@ -460,11 +460,20 @@ NV_DECLARE_ALIGNED(NvU64 address, 8); } NV208F_CTRL_FB_ECC_SET_WRITE_KILL_PARAMS; +typedef struct NV208F_CTRL_FB_REMAPPING_RBC_ADDRESS_INFO { + NvU32 bank; + NvU32 stackId; + NvU32 row; + NvU32 partition; + NvU32 sublocation; +} NV208F_CTRL_FB_REMAPPING_RBC_ADDRESS_INFO; + +#define NV208F_CTRL_FB_REMAP_ROW_ADDRESS_TYPE_PHYSICAL 0x0 +#define NV208F_CTRL_FB_REMAP_ROW_ADDRESS_TYPE_RBC 0x1 + /* * NV208F_CTRL_FB_REMAPPING_ADDRESS_INFO * - * physicalAddress - * Physical address to be remapped * source * The reason for retirement. Valid values for this parameter are * from NV2080_CTRL_FB_REMAPPED_ROW_SOURCE_* @@ -480,11 +489,23 @@ * Attempting to remap a reserved row * NV208F_CTRL_FB_REMAP_ROW_STATUS_INTERNAL_ERROR * Some other RM failure + * addressType + * Type of address passed. Valid values are: + * NV208F_CTRL_FB_REMAP_ROW_ADDRESS_TYPE_PHYSICAL + * The specified address is physical address. + * NV208F_CTRL_FB_REMAP_ROW_ADDRESS_TYPE_RBC + * The specified address is DRAM Row Bank Column address. + * address + * Union of physicalAddress and rbcAddress. Set the appropriate one based on the address type. */ typedef struct NV208F_CTRL_FB_REMAPPING_ADDRESS_INFO { - NV_DECLARE_ALIGNED(NvU64 physicalAddress, 8); NvU8 source; NvU32 status; + NvU8 addressType; + union { + NV_DECLARE_ALIGNED(NvU64 physicalAddress, 8); + NV208F_CTRL_FB_REMAPPING_RBC_ADDRESS_INFO rbcAddress; + } address; } NV208F_CTRL_FB_REMAPPING_ADDRESS_INFO; /* valid values for status */ diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/src/common/sdk/nvidia/inc/ctrl/ctrla081.h nvidia-open-gpu-kernel-modules-535.216.01/src/common/sdk/nvidia/inc/ctrl/ctrla081.h --- nvidia-open-gpu-kernel-modules-535.183.01/src/common/sdk/nvidia/inc/ctrl/ctrla081.h 2024-05-12 19:32:32.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/src/common/sdk/nvidia/inc/ctrl/ctrla081.h 2024-09-17 16:48:18.000000000 +0000 @@ -51,6 +51,10 @@ #define NVA081_PCI_CONFIG_SPACE_SIZE 0x100 #define NVA081_PGPU_METADATA_STRING_SIZE 256 #define NVA081_EXTRA_PARAMETERS_SIZE 1024 +#define NVA081_CONFIG_PARAMS_MAX_LENGTH 1024 + +#define NVA081_MAX_BAR_REGION_COUNT 4 +#define NVA081_MAX_SPARSE_REGION_COUNT 5 /* * NVA081_CTRL_CMD_VGPU_CONFIG_SET_INFO @@ -422,45 +426,9 @@ /* valid event action values */ -#define NVA081_CTRL_EVENT_SET_NOTIFICATION_ACTION_DISABLE (0x00000000) -#define NVA081_CTRL_EVENT_SET_NOTIFICATION_ACTION_SINGLE (0x00000001) -#define NVA081_CTRL_EVENT_SET_NOTIFICATION_ACTION_REPEAT (0x00000002) - -/* - * NVA081_CTRL_CMD_VGPU_CONFIG_NOTIFY_START - * - * This command notifies the nvidia-vgpu-vfio module with start status. - * It notifies whether start has been successful or not. - * - * mdevUuid - * This parameter specifies the uuid of the mdev device for which start has - * been called. - * vmUuid - * The UUID of VM for which vGPU has been created. - * vmName - * The name of VM for which vGPU has been created. - * returnStatus - * This parameter species whether the vGPU plugin is initialized or not. - * it specifies the error code in case plugin initialization has failed - * - * Possible status values returned are: - * NV_OK - * NV_ERR_OBJECT_NOT_FOUND - */ -#define NVA081_CTRL_CMD_VGPU_CONFIG_NOTIFY_START (0xa0810107) /* finn: Evaluated from "(FINN_NVA081_VGPU_CONFIG_VGPU_CONFIG_INTERFACE_ID << 8) | NVA081_CTRL_VGPU_CONFIG_NOTIFY_START_PARAMS_MESSAGE_ID" */ - -/* - * NVA081_CTRL_VGPU_CONFIG_NOTIFY_START_PARAMS - * This structure represents information of plugin init status. - */ -#define NVA081_CTRL_VGPU_CONFIG_NOTIFY_START_PARAMS_MESSAGE_ID (0x7U) - -typedef struct NVA081_CTRL_VGPU_CONFIG_NOTIFY_START_PARAMS { - NvU8 mdevUuid[VM_UUID_SIZE]; - NvU8 vmUuid[VM_UUID_SIZE]; - NvU8 vmName[NVA081_VM_NAME_SIZE]; - NvU32 returnStatus; -} NVA081_CTRL_VGPU_CONFIG_NOTIFY_START_PARAMS; +#define NVA081_CTRL_EVENT_SET_NOTIFICATION_ACTION_DISABLE (0x00000000) +#define NVA081_CTRL_EVENT_SET_NOTIFICATION_ACTION_SINGLE (0x00000001) +#define NVA081_CTRL_EVENT_SET_NOTIFICATION_ACTION_REPEAT (0x00000002) /* * NVA081_CTRL_CMD_VGPU_CONFIG_MDEV_REGISTER @@ -747,4 +715,76 @@ NvU32 swizzId; } NVA081_CTRL_VGPU_CONFIG_VALIDATE_SWIZZID_PARAMS; +/* + * NVA081_CTRL_CMD_VGPU_SET_VM_NAME + * + * This command is to set VM name for KVM. + * + * vgpuName [IN] + * This param provides the vGPU device name to RM. + * + * vmName [IN] + * This param provides the VM name of the vGPU device attached. + * + * Possible status values returned are: + * NV_OK + * NV_ERR_OBJECT_NOT_FOUND + * NV_ERR_INVALID_ARGUMENT + */ + +#define NVA081_CTRL_CMD_VGPU_SET_VM_NAME (0xa0810120) /* finn: Evaluated from "(FINN_NVA081_VGPU_CONFIG_VGPU_CONFIG_INTERFACE_ID << 8) | NVA081_CTRL_VGPU_SET_VM_NAME_PARAMS_MESSAGE_ID" */ + +#define NVA081_CTRL_VGPU_SET_VM_NAME_PARAMS_MESSAGE_ID (0x20U) + +typedef struct NVA081_CTRL_VGPU_SET_VM_NAME_PARAMS { + NvU8 vgpuName[VM_UUID_SIZE]; + NvU8 vmName[NVA081_VM_NAME_SIZE]; +} NVA081_CTRL_VGPU_SET_VM_NAME_PARAMS; + +/* + * NVA081_CTRL_CMD_VGPU_GET_BAR_INFO + * + * This command is to get the bar info for a vGPU. + * + * gpuPciId [IN] + * This param specifies the PCI device ID of VF on which VM is running + * + * vgpuName [IN] + * This param provides the vGPU device name to RM. + * + * configParams [IN] + * This param provides the vGPU config params to RM + * + * barSizes [OUT] + * This param provides the BAR size for each region index of the device + * + * sparseOffsets [OUT] + * This param provides the offset of each sparse mmap region in BAR0 + * + * sparseSizes [OUT] + * This param provides the size of each sparse mmap region in BAR0 + * + * sparseCount [OUT] + * This param provides the number of sparse mmap regions in BAR0 + * + * Possible status values returned are: + * NV_OK + * NV_ERR_OBJECT_NOT_FOUND + * NV_ERR_INVALID_ARGUMENT + */ + +#define NVA081_CTRL_CMD_VGPU_GET_BAR_INFO (0xa0810121) /* finn: Evaluated from "(FINN_NVA081_VGPU_CONFIG_VGPU_CONFIG_INTERFACE_ID << 8) | NVA081_CTRL_VGPU_GET_BAR_INFO_PARAMS_MESSAGE_ID" */ + +#define NVA081_CTRL_VGPU_GET_BAR_INFO_PARAMS_MESSAGE_ID (0x21U) + +typedef struct NVA081_CTRL_VGPU_GET_BAR_INFO_PARAMS { + NvU32 gpuPciId; + NvU8 vgpuName[VM_UUID_SIZE]; + NvU8 configParams[NVA081_CONFIG_PARAMS_MAX_LENGTH]; + NV_DECLARE_ALIGNED(NvU64 barSizes[NVA081_MAX_BAR_REGION_COUNT], 8); + NV_DECLARE_ALIGNED(NvU64 sparseOffsets[NVA081_MAX_SPARSE_REGION_COUNT], 8); + NV_DECLARE_ALIGNED(NvU64 sparseSizes[NVA081_MAX_SPARSE_REGION_COUNT], 8); + NvU32 sparseCount; +} NVA081_CTRL_VGPU_GET_BAR_INFO_PARAMS; + /* _ctrlA081vgpuconfig_h_ */ diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/src/common/sdk/nvidia/inc/nv-hypervisor.h nvidia-open-gpu-kernel-modules-535.216.01/src/common/sdk/nvidia/inc/nv-hypervisor.h --- nvidia-open-gpu-kernel-modules-535.183.01/src/common/sdk/nvidia/inc/nv-hypervisor.h 2024-05-12 19:32:36.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/src/common/sdk/nvidia/inc/nv-hypervisor.h 2024-09-17 16:48:23.000000000 +0000 @@ -37,13 +37,11 @@ OS_HYPERVISOR_UNKNOWN } HYPERVISOR_TYPE; -#define CMD_VGPU_VFIO_WAKE_WAIT_QUEUE 0 -#define CMD_VGPU_VFIO_INJECT_INTERRUPT 1 -#define CMD_VGPU_VFIO_REGISTER_MDEV 2 -#define CMD_VGPU_VFIO_PRESENT 3 -#define CMD_VFIO_PCI_CORE_PRESENT 4 +#define CMD_VFIO_WAKE_REMOVE_GPU 1 +#define CMD_VGPU_VFIO_PRESENT 2 +#define CMD_VFIO_PCI_CORE_PRESENT 3 -#define MAX_VF_COUNT_PER_GPU 64 +#define MAX_VF_COUNT_PER_GPU 64 typedef enum _VGPU_TYPE_INFO { @@ -54,17 +52,11 @@ typedef struct { - void *vgpuVfioRef; - void *waitQueue; void *nv; - NvU32 *vgpuTypeIds; - NvU8 **vgpuNames; - NvU32 numVgpuTypes; - NvU32 domain; - NvU8 bus; - NvU8 slot; - NvU8 function; - NvBool is_virtfn; + NvU32 domain; + NvU32 bus; + NvU32 device; + NvU32 return_status; } vgpu_vfio_info; typedef struct diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/src/common/shared/inc/nvdevid.h nvidia-open-gpu-kernel-modules-535.216.01/src/common/shared/inc/nvdevid.h --- nvidia-open-gpu-kernel-modules-535.183.01/src/common/shared/inc/nvdevid.h 2024-05-12 19:29:41.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/src/common/shared/inc/nvdevid.h 2024-09-17 16:45:23.000000000 +0000 @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 200-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2004-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining a @@ -630,6 +630,7 @@ , CS_INTEL_1B81 , CS_INTEL_18DC , CS_INTEL_7A04 +, CS_INTEL_5795 , CS_AMPERE_AMPEREONE , CS_MAX_PCIE }; diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/src/common/uproc/os/common/include/liblogdecode.h nvidia-open-gpu-kernel-modules-535.216.01/src/common/uproc/os/common/include/liblogdecode.h --- nvidia-open-gpu-kernel-modules-535.183.01/src/common/uproc/os/common/include/liblogdecode.h 2024-05-12 19:32:39.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/src/common/uproc/os/common/include/liblogdecode.h 2024-09-17 16:48:26.000000000 +0000 @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2019-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2019-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining a @@ -189,6 +189,7 @@ void libosExtractLogs(LIBOS_LOG_DECODE *logDecode, NvBool bSyncNvLog); void libosPreserveLogs(LIBOS_LOG_DECODE *pLogDecode); +NvBool isLibosPreserveLogBufferFull(LIBOS_LOG_DECODE *pLogDecode, NvU32 gpuInstance); #ifdef __cplusplus } diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/src/common/uproc/os/libos-v3.1.0/lib/liblogdecode.c nvidia-open-gpu-kernel-modules-535.216.01/src/common/uproc/os/libos-v3.1.0/lib/liblogdecode.c --- nvidia-open-gpu-kernel-modules-535.183.01/src/common/uproc/os/libos-v3.1.0/lib/liblogdecode.c 2024-05-12 19:32:41.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/src/common/uproc/os/libos-v3.1.0/lib/liblogdecode.c 2024-09-17 16:48:27.000000000 +0000 @@ -1249,6 +1249,34 @@ } } +NvBool isLibosPreserveLogBufferFull(LIBOS_LOG_DECODE *pLogDecode, NvU32 gpuInstance) +{ + NvU64 i = (NvU32)(pLogDecode->numLogBuffers); + NvU32 tag = LIBOS_LOG_NVLOG_BUFFER_TAG(pLogDecode->sourceName, i * 2); + NVLOG_BUFFER_HANDLE handle = 0; + NV_STATUS status = nvlogGetBufferHandleFromTag(tag, &handle); + + if (status != NV_OK) + { + return NV_FALSE; + } + + NVLOG_BUFFER *pNvLogBuffer = NvLogLogger.pBuffers[handle]; + if (pNvLogBuffer == NULL) + { + return NV_FALSE; + } + + if (FLD_TEST_DRF(LOG_BUFFER, _FLAGS, _PRESERVE, _YES, pNvLogBuffer->flags) && + DRF_VAL(LOG, _BUFFER_FLAGS, _GPU_INSTANCE, pNvLogBuffer->flags) == gpuInstance && + (pNvLogBuffer->pos >= pNvLogBuffer->size - NV_OFFSETOF(LIBOS_LOG_NVLOG_BUFFER, data) - sizeof(NvU64))) + { + return NV_TRUE; + } + + return NV_FALSE; +} + static NvBool findPreservedNvlogBuffer(NvU32 tag, NvU32 gpuInstance, NVLOG_BUFFER_HANDLE *pHandle) { NVLOG_BUFFER_HANDLE handle = 0; diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia/arch/nvalloc/common/inc/nvcst.h nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/arch/nvalloc/common/inc/nvcst.h --- nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia/arch/nvalloc/common/inc/nvcst.h 2024-05-12 19:30:02.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/arch/nvalloc/common/inc/nvcst.h 2024-09-17 16:45:44.000000000 +0000 @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2004-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2004-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining a @@ -190,6 +190,7 @@ {PCI_VENDOR_ID_INTEL, 0x7A8A, CS_INTEL_1B81, "Intel-SapphireRapids", NULL}, {PCI_VENDOR_ID_INTEL, 0x18DC, CS_INTEL_18DC, "Intel-IceLake", NULL}, {PCI_VENDOR_ID_INTEL, 0x7A04, CS_INTEL_7A04, "Intel-RaptorLake", Intel_7A04_setupFunc}, + {PCI_VENDOR_ID_INTEL, 0x5795, CS_INTEL_5795, "Intel-GraniteRapids", NULL}, {PCI_VENDOR_ID_NVIDIA, 0x0FAE, CS_NVIDIA_T210, "T210", Nvidia_T210_setupFunc}, {PCI_VENDOR_ID_NVIDIA, 0x0FAF, CS_NVIDIA_T210, "T210", Nvidia_T210_setupFunc}, @@ -354,7 +355,8 @@ {PCI_VENDOR_ID_MELLANOX, 0xA2D0, CS_MELLANOX_BLUEFIELD}, // Mellanox BlueField {PCI_VENDOR_ID_MELLANOX, 0xA2D4, CS_MELLANOX_BLUEFIELD2},// Mellanox BlueField 2 {PCI_VENDOR_ID_MELLANOX, 0xA2D5, CS_MELLANOX_BLUEFIELD2},// Mellanox BlueField 2 Crypto disabled - {PCI_VENDOR_ID_MELLANOX, 0xA2DB, CS_MELLANOX_BLUEFIELD3},// Mellanox BlueField 3 + {PCI_VENDOR_ID_MELLANOX, 0xA2DB, CS_MELLANOX_BLUEFIELD3},// Mellanox BlueField 3 Crypto disabled + {PCI_VENDOR_ID_MELLANOX, 0xA2DA, CS_MELLANOX_BLUEFIELD3},// Mellanox BlueField 3 Crypto enabled {PCI_VENDOR_ID_AMAZON, 0x0200, CS_AMAZON_GRAVITRON2}, // Amazon Gravitron2 {PCI_VENDOR_ID_FUJITSU, 0x1952, CS_FUJITSU_A64FX}, // Fujitsu A64FX {PCI_VENDOR_ID_CADENCE, 0xDC01, CS_PHYTIUM_S2500}, // Phytium S2500 diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia/arch/nvalloc/unix/include/nv.h nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/arch/nvalloc/unix/include/nv.h --- nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia/arch/nvalloc/unix/include/nv.h 2024-05-12 19:30:05.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/arch/nvalloc/unix/include/nv.h 2024-09-17 16:45:47.000000000 +0000 @@ -1034,12 +1034,11 @@ NV_STATUS NV_API_CALL nv_vgpu_delete(nvidia_stack_t *, const NvU8 *, NvU16); NV_STATUS NV_API_CALL nv_vgpu_get_type_ids(nvidia_stack_t *, nv_state_t *, NvU32 *, NvU32 *, NvBool, NvU8, NvBool); NV_STATUS NV_API_CALL nv_vgpu_get_type_info(nvidia_stack_t *, nv_state_t *, NvU32, char *, int, NvU8); -NV_STATUS NV_API_CALL nv_vgpu_get_bar_info(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU64 *, NvU32, void *); -NV_STATUS NV_API_CALL nv_vgpu_start(nvidia_stack_t *, const NvU8 *, void *, NvS32 *, NvU8 *, NvU32); -NV_STATUS NV_API_CALL nv_vgpu_get_sparse_mmap(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU64 **, NvU64 **, NvU32 *); +NV_STATUS NV_API_CALL nv_vgpu_get_bar_info(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU64 *, + NvU64 *, NvU64 *, NvU32 *, NvU8 *); NV_STATUS NV_API_CALL nv_vgpu_process_vf_info(nvidia_stack_t *, nv_state_t *, NvU8, NvU32, NvU8, NvU8, NvU8, NvBool, void *); -NV_STATUS NV_API_CALL nv_vgpu_update_request(nvidia_stack_t *, const NvU8 *, NvU32, NvU64 *, NvU64 *, const char *); -NV_STATUS NV_API_CALL nv_gpu_bind_event(nvidia_stack_t *); +NV_STATUS NV_API_CALL nv_gpu_bind_event(nvidia_stack_t *, NvU32, NvBool *); +NV_STATUS NV_API_CALL nv_gpu_unbind_event(nvidia_stack_t *, NvU32, NvBool *); NV_STATUS NV_API_CALL nv_get_usermap_access_params(nv_state_t*, nv_usermap_access_params_t*); nv_soc_irq_type_t NV_API_CALL nv_get_current_irq_type(nv_state_t*); diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia/arch/nvalloc/unix/src/os-hypervisor.c nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/arch/nvalloc/unix/src/os-hypervisor.c --- nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia/arch/nvalloc/unix/src/os-hypervisor.c 2024-05-12 19:30:07.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/arch/nvalloc/unix/src/os-hypervisor.c 2024-09-17 16:45:49.000000000 +0000 @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2014-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2014-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining a @@ -45,6 +45,7 @@ #include "gpu/bus/kern_bus.h" #include // NV_PMC_BOOT_1_VGPU #include "nvdevid.h" +#include "ctrl/ctrl0000/ctrl0000vgpu.h" #define NV_VFIO_PCI_BAR1_REGION_INDEX 1 #define NV_VFIO_PCI_BAR2_REGION_INDEX 2 @@ -80,8 +81,8 @@ { NV_STATUS status = NV_ERR_NOT_SUPPORTED; - if (pVgpuNsIntr->pVgpuVfioRef) - status = osVgpuInjectInterrupt(pVgpuNsIntr->pVgpuVfioRef); + if (osIsVgpuVfioPresent() == NV_TRUE) + return NV_ERR_NOT_SUPPORTED; else { if (pVgpuNsIntr->guestMSIAddr && pVgpuNsIntr->guestMSIData) @@ -102,127 +103,6 @@ return hypervisorGetHypervisorType(pHypervisor); } -static NV_STATUS get_available_instances( - NvU32 *avail_instances, - nv_state_t *pNv, - VGPU_TYPE *vgpuTypeInfo, - NvU32 pgpuIndex, - NvU8 devfn -) -{ - NV_STATUS rmStatus = NV_OK; - OBJGPU *pGpu = NULL; - OBJSYS *pSys = SYS_GET_INSTANCE(); - KernelVgpuMgr *pKernelVgpuMgr = SYS_GET_KERNEL_VGPUMGR(pSys); - OBJHYPERVISOR *pHypervisor = SYS_GET_HYPERVISOR(pSys); - - *avail_instances = 0; - - pGpu = NV_GET_NV_PRIV_PGPU(pNv); - if (pGpu == NULL) - { - NV_PRINTF(LEVEL_ERROR, "%s GPU handle is not valid \n", __FUNCTION__); - rmStatus = NV_ERR_INVALID_STATE; - goto exit; - } - - /* TODO: Needs to have a proper fix this for DriverVM config */ - if (gpuIsSriovEnabled(pGpu) && - !(pHypervisor->getProperty(pHypervisor, PDB_PROP_HYPERVISOR_DRIVERVM_ENABLED))) - { - NvU8 fnId = devfn - pGpu->sriovState.firstVFOffset; - - if (fnId > 63) - { - NV_ASSERT(0); - rmStatus = NV_ERR_INVALID_ARGUMENT; - goto exit; - } - - if (IS_MIG_ENABLED(pGpu)) - { - if (IS_MIG_IN_USE(pGpu)) { - NvU64 swizzIdInUseMask = 0; - NvU32 partitionFlag = PARTITIONID_INVALID; - KernelMIGManager *pKernelMIGManager = GPU_GET_KERNEL_MIG_MANAGER(pGpu); - NvU32 id; - - swizzIdInUseMask = kmigmgrGetSwizzIdInUseMask(pGpu, pKernelMIGManager); - - rmStatus = kvgpumgrGetPartitionFlag(vgpuTypeInfo->vgpuTypeId, - &partitionFlag); - if (rmStatus != NV_OK) - { - // Query for a non MIG vgpuType - NV_PRINTF(LEVEL_ERROR, "%s Query for a non MIG vGPU type \n", - __FUNCTION__); - rmStatus = NV_OK; - goto exit; - } - - // Determine valid swizzids not assigned to any vGPU device. - FOR_EACH_INDEX_IN_MASK(64, id, swizzIdInUseMask) - { - KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance; - NvU64 mask = 0; - - rmStatus = kmigmgrGetGPUInstanceInfo(pGpu, pKernelMIGManager, - id, &pKernelMIGGpuInstance); - if (rmStatus != NV_OK) - { - // Didn't find requested GPU instance - NV_PRINTF(LEVEL_ERROR, - "No valid GPU instance with SwizzId - %d found\n", id); - goto exit; - } - - mask = NVBIT64(id); - - if (pKernelMIGGpuInstance->partitionFlag == partitionFlag) - { - // Validate that same ID is not already set and VF is available - if (!(mask & pKernelVgpuMgr->pgpuInfo[pgpuIndex].assignedSwizzIdMask) && - !(pKernelVgpuMgr->pgpuInfo[pgpuIndex].createdVfMask & NVBIT64(fnId))) - { - *avail_instances = 1; - break; - } - } - } - FOR_EACH_INDEX_IN_MASK_END; - } - } - else - { - if (pKernelVgpuMgr->pgpuInfo[pgpuIndex].numCreatedVgpu < vgpuTypeInfo->maxInstance) - { - if (vgpuTypeInfo->gpuInstanceSize) - { - // Query for a MIG vgpuType - NV_PRINTF(LEVEL_ERROR, "%s Query for a MIG vGPU type \n", - __FUNCTION__); - rmStatus = NV_OK; - goto exit; - } - - if (!(pKernelVgpuMgr->pgpuInfo[pgpuIndex].createdVfMask & NVBIT64(fnId))) - { - if (kvgpumgrCheckVgpuTypeCreatable(&pKernelVgpuMgr->pgpuInfo[pgpuIndex], vgpuTypeInfo) == NV_OK) - *avail_instances = 1; - } - } - } - } - else - { - if (kvgpumgrCheckVgpuTypeCreatable(&pKernelVgpuMgr->pgpuInfo[pgpuIndex], vgpuTypeInfo) == NV_OK) - *avail_instances = vgpuTypeInfo->maxInstance - pKernelVgpuMgr->pgpuInfo[pgpuIndex].numCreatedVgpu; - } - -exit: - return rmStatus; -} - #define MAX_STR_LEN 256 NV_STATUS NV_API_CALL nv_vgpu_get_type_info( nvidia_stack_t *sp, @@ -239,6 +119,7 @@ NV_STATUS rmStatus = NV_OK; VGPU_TYPE *vgpuTypeInfo; NvU32 pgpuIndex, i, avail_instances = 0; + OBJGPU *pGpu = NULL; void *fp; NV_ENTER_RM_RUNTIME(sp,fp); @@ -261,24 +142,19 @@ switch (type_info) { - case VGPU_TYPE_NAME: - os_snprintf(buffer, VGPU_STRING_BUFFER_SIZE, "%s\n", - vgpuTypeInfo->vgpuName); - break; - case VGPU_TYPE_DESCRIPTION: - os_snprintf(buffer, MAX_STR_LEN, - "num_heads=%d, frl_config=%d, " - "framebuffer=%dM, max_resolution=%dx%d, max_instance=%d\n", - vgpuTypeInfo->numHeads, vgpuTypeInfo->frlConfig, - vgpuTypeInfo->profileSize >> 20, - vgpuTypeInfo->maxResolutionX, - vgpuTypeInfo->maxResolutionY, - vgpuTypeInfo->maxInstance); - break; case VGPU_TYPE_INSTANCES: - rmStatus = get_available_instances(&avail_instances, pNv, - vgpuTypeInfo, - pgpuIndex, devfn); + pGpu = NV_GET_NV_PRIV_PGPU(pNv); + if (pGpu == NULL) + { + NV_PRINTF(LEVEL_ERROR, "%s GPU handle is not valid \n", + __FUNCTION__); + rmStatus = NV_ERR_INVALID_STATE; + goto exit; + } + + rmStatus = kvgpumgrGetAvailableInstances(&avail_instances, pGpu, + vgpuTypeInfo, + pgpuIndex, devfn); if (rmStatus != NV_OK) goto exit; @@ -314,6 +190,7 @@ { THREAD_STATE_NODE threadState; OBJSYS *pSys = SYS_GET_INSTANCE(); + OBJGPU *pGpu = NULL; KernelVgpuMgr *pKernelVgpuMgr = SYS_GET_KERNEL_VGPUMGR(pSys); NV_STATUS rmStatus = NV_OK; NvU32 pgpuIndex, i, avail_instances = 0; @@ -354,9 +231,17 @@ continue; } - rmStatus = get_available_instances(&avail_instances, pNv, - vgpuTypeInfo, pgpuIndex, - devfn); + pGpu = NV_GET_NV_PRIV_PGPU(pNv); + if (pGpu == NULL) + { + NV_PRINTF(LEVEL_ERROR, "%s GPU handle is not valid \n", + __FUNCTION__); + goto exit; + } + + rmStatus = kvgpumgrGetAvailableInstances(&avail_instances, pGpu, + vgpuTypeInfo, pgpuIndex, + devfn); if (rmStatus != NV_OK) { NV_PRINTF(LEVEL_ERROR, "Failed to get available instances for vGPU ID: %d, status: 0x%x\n", @@ -373,6 +258,7 @@ } } +exit: // UNLOCK: release API lock rmapiLockRelease(); } @@ -474,56 +360,19 @@ return rmStatus; } -NV_STATUS NV_API_CALL nv_vgpu_get_bar_info( - nvidia_stack_t *sp, - nv_state_t *pNv, - const NvU8 *pMdevUuid, - NvU64 *size, - NvU32 regionIndex, - void *pVgpuVfioRef -) +static NV_STATUS +_nv_vgpu_get_bar_size(OBJGPU *pGpu, KERNEL_HOST_VGPU_DEVICE *pKernelHostVgpuDevice, + NvU32 regionIndex, NvU64 *size, NvU8 *configParams) { - REQUEST_VGPU_INFO_NODE *pRequestVgpu = NULL; - THREAD_STATE_NODE threadState; - NV_STATUS rmStatus = NV_OK, status; - OBJGPU *pGpu = NULL; - KernelBus *pKernelBus; - KERNEL_HOST_VGPU_DEVICE *pKernelHostVgpuDevice; - void *fp = NULL; - NvU32 value = 0; - OBJSYS *pSys = SYS_GET_INSTANCE(); - KernelVgpuMgr * pKernelVgpuMgr = SYS_GET_KERNEL_VGPUMGR(pSys); - - NV_ENTER_RM_RUNTIME(sp,fp); - threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE); - - // LOCK: acquire API lock - NV_CHECK_OK_OR_GOTO(rmStatus, LEVEL_SILENT, rmapiLockAcquire(API_LOCK_FLAGS_NONE, RM_LOCK_MODULES_HYPERVISOR), exit); + OBJSYS *pSys = SYS_GET_INSTANCE(); + KernelVgpuMgr *pKernelVgpuMgr = SYS_GET_KERNEL_VGPUMGR(pSys); + NV_STATUS status; + KernelBus *pKernelBus; + NvU32 value = 0; - pGpu = NV_GET_NV_PRIV_PGPU(pNv); - if (pGpu == NULL) - { - NV_PRINTF(LEVEL_ERROR, "%s GPU handle is not valid \n", __FUNCTION__); - rmStatus = NV_ERR_INVALID_STATE; - goto release_lock; - } pKernelBus = GPU_GET_KERNEL_BUS(pGpu); *size = kbusGetPciBarSize(pKernelBus, regionIndex); - NV_CHECK_OK_OR_GOTO(rmStatus, LEVEL_SILENT, - kvgpumgrGetHostVgpuDeviceFromMdevUuid(pNv->gpu_id, - pMdevUuid, - &pKernelHostVgpuDevice), release_lock); - - pRequestVgpu = pKernelHostVgpuDevice->pRequestVgpuInfoNode; - if (pRequestVgpu == NULL) - { - rmStatus = NV_ERR_INVALID_POINTER; - goto release_lock; - } - - pKernelHostVgpuDevice->pVgpuVfioRef = pVgpuVfioRef; - if (regionIndex == NV_VFIO_PCI_BAR1_REGION_INDEX) { VGPU_TYPE *vgpuTypeInfo; @@ -531,34 +380,36 @@ NvBool bOverrideBar1Size = NV_FALSE; // Read BAR1 length from vgpuTypeInfo - NV_CHECK_OK_OR_GOTO(rmStatus, LEVEL_SILENT, - kvgpumgrGetVgpuTypeInfo(pKernelHostVgpuDevice->vgpuType, &vgpuTypeInfo), release_lock); - - *size = vgpuTypeInfo->bar1Length << 20; + NV_ASSERT_OK_OR_RETURN(kvgpumgrGetVgpuTypeInfo(pKernelHostVgpuDevice->vgpuType, + &vgpuTypeInfo)); - NV_CHECK_OK_OR_GOTO(rmStatus, LEVEL_SILENT, - kvgpumgrGetPgpuIndex(pKernelVgpuMgr, pNv->gpu_id, &pgpuIndex), release_lock); + *size = vgpuTypeInfo->bar1Length << 20; + NV_ASSERT_OK_OR_RETURN(kvgpumgrGetPgpuIndex(pKernelVgpuMgr, pGpu->gpuId, &pgpuIndex)); - /* + /* * check for 'override_bar1_size' param in vgpuExtraParams list first, * if param is missing there then check it in vgpu_params list */ status = nv_parse_config_params((const char*)vgpuTypeInfo->vgpuExtraParams, "override_bar1_size", ';', &value); - - if (status == NV_OK && value) { + if (status == NV_OK && value) + { bOverrideBar1Size = NV_TRUE; - } else if (status == NV_ERR_OBJECT_NOT_FOUND) { - status = nv_parse_config_params(pRequestVgpu->configParams, + } + else if (status == NV_ERR_OBJECT_NOT_FOUND) + { + status = nv_parse_config_params((const char *)configParams, "override_bar1_size", ',', &value); if (status == NV_OK && value) bOverrideBar1Size = NV_TRUE; } - if (bOverrideBar1Size) { + + if (bOverrideBar1Size) + { NvU64 bar1SizeInBytes, guestBar1; NvU64 gpuBar1LowerLimit = 256 * 1024 * 1024; // bar1 lower limit for override_bar1_length parameter - bar1SizeInBytes = kbusGetPciBarSize(pKernelBus, NV_VFIO_PCI_BAR1_REGION_INDEX); + if (pKernelVgpuMgr->pgpuInfo[pgpuIndex].sriovEnabled) { *size = pGpu->sriovState.vfBarSize[1]; @@ -573,7 +424,7 @@ else if (regionIndex == NV_VFIO_PCI_BAR2_REGION_INDEX || regionIndex == NV_VFIO_PCI_BAR3_REGION_INDEX) { - status = nv_parse_config_params(pRequestVgpu->configParams, + status = nv_parse_config_params((const char *)configParams, "address64", ',', &value); if ((status != NV_OK) || ((status == NV_OK) && (value != 0))) @@ -585,53 +436,51 @@ } } -release_lock: - // UNLOCK: release API lock - rmapiLockRelease(); - -exit: - threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); - NV_EXIT_RM_RUNTIME(sp,fp); - - return rmStatus; -} - -NV_STATUS osVgpuVfioWake( - void *waitQueue -) -{ - vgpu_vfio_info vgpu_info; - - vgpu_info.waitQueue = waitQueue; - - return os_call_vgpu_vfio((void *) &vgpu_info, CMD_VGPU_VFIO_WAKE_WAIT_QUEUE); + return NV_OK; } -NV_STATUS NV_API_CALL nv_vgpu_start( +NV_STATUS NV_API_CALL nv_vgpu_get_bar_info +( nvidia_stack_t *sp, + nv_state_t *pNv, const NvU8 *pMdevUuid, - void *waitQueue, - NvS32 *returnStatus, - NvU8 *vmName, - NvU32 qemuPid + NvU64 *barSizes, + NvU64 *sparseOffsets, + NvU64 *sparseSizes, + NvU32 *sparseCount, + NvU8 *configParams ) { THREAD_STATE_NODE threadState; NV_STATUS rmStatus = NV_OK; + OBJGPU *pGpu = NULL; void *fp = NULL; NV_ENTER_RM_RUNTIME(sp,fp); threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE); // LOCK: acquire API lock - if ((rmStatus = rmapiLockAcquire(API_LOCK_FLAGS_NONE, RM_LOCK_MODULES_HYPERVISOR)) == NV_OK) - { - rmStatus = kvgpumgrStart(pMdevUuid, waitQueue, returnStatus, - vmName, qemuPid); + NV_CHECK_OK_OR_GOTO(rmStatus, LEVEL_SILENT, + rmapiLockAcquire(API_LOCK_FLAGS_NONE, RM_LOCK_MODULES_HYPERVISOR), exit); - // UNLOCK: release API lock - rmapiLockRelease(); + pGpu = NV_GET_NV_PRIV_PGPU(pNv); + if (pGpu == NULL) + { + NV_PRINTF(LEVEL_ERROR, "%s GPU handle is not valid \n", __FUNCTION__); + rmStatus = NV_ERR_INVALID_STATE; + goto release_lock; } + + NV_CHECK_OK_OR_GOTO(rmStatus, LEVEL_SILENT, + nv_vgpu_rm_get_bar_info(pGpu, pMdevUuid, barSizes, + sparseOffsets, sparseSizes, + sparseCount, configParams), + release_lock); +release_lock: + // UNLOCK: release API lock + rmapiLockRelease(); + +exit: threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); NV_EXIT_RM_RUNTIME(sp,fp); @@ -674,194 +523,153 @@ return rmStatus; } -NV_STATUS NV_API_CALL nv_vgpu_get_sparse_mmap( - nvidia_stack_t *sp , - nv_state_t *pNv, - const NvU8 *pMdevUuid, - NvU64 **offsets, - NvU64 **sizes, - NvU32 *numAreas +static NV_STATUS _nv_vgpu_get_sparse_mmap( + OBJGPU *pGpu, + KERNEL_HOST_VGPU_DEVICE *pKernelHostVgpuDevice, + NvU64 *offsets, + NvU64 *sizes, + NvU32 *numAreas, + NvU8 *configParams ) { - THREAD_STATE_NODE threadState; - NV_STATUS rmStatus = NV_ERR_INVALID_STATE, status; - OBJGPU *pGpu = NULL; - POBJTMR pTmr = NULL; - KernelFifo *pKernelFifo = NULL; - void *fp = NULL; - REQUEST_VGPU_INFO_NODE *pRequestVgpu = NULL; - KERNEL_HOST_VGPU_DEVICE *pKernelHostVgpuDevice; - NvU32 bar0TmrMapSize = 0, bar0FifoMapSize = 0, value = 0; - NvU64 bar0TmrMapOffset = 0, bar0FifoMapOffset = 0; - NvU64 *vfRegionSizes = NULL; - NvU64 *vfRegionOffsets = NULL; - KernelBif *pKernelBif = NULL; - + NV_STATUS rmStatus = NV_OK, status; + POBJTMR pTmr = GPU_GET_TIMER(pGpu);; + KernelFifo *pKernelFifo = GPU_GET_KERNEL_FIFO(pGpu);; + KernelBif *pKernelBif = GPU_GET_KERNEL_BIF(pGpu); + NvU32 value = 0; - NV_ENTER_RM_RUNTIME(sp,fp); - threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE); - - // LOCK: acquire API lock - if ((rmStatus = rmapiLockAcquire(API_LOCK_FLAGS_NONE, RM_LOCK_MODULES_HYPERVISOR)) == NV_OK) + *numAreas = 0; + if (pKernelHostVgpuDevice->gfid != 0) { - pGpu = NV_GET_NV_PRIV_PGPU(pNv); - - if (pGpu == NULL) - { - rmStatus = NV_ERR_INVALID_STATE; - goto cleanup; - } - pTmr = GPU_GET_TIMER(pGpu); - pKernelFifo = GPU_GET_KERNEL_FIFO(pGpu); - pKernelBif = GPU_GET_KERNEL_BIF(pGpu); - *numAreas = 0; - rmStatus = kvgpumgrGetHostVgpuDeviceFromMdevUuid(pNv->gpu_id, pMdevUuid, - &pKernelHostVgpuDevice); + rmStatus = kbifGetVFSparseMmapRegions_HAL(pGpu, pKernelBif, pKernelHostVgpuDevice, + os_page_size, numAreas, NULL, NULL); if (rmStatus == NV_OK) { - if (pKernelHostVgpuDevice->gfid != 0) + if (*numAreas > NVA081_MAX_SPARSE_REGION_COUNT) { - rmStatus = kbifGetVFSparseMmapRegions_HAL(pGpu, pKernelBif, pKernelHostVgpuDevice, os_page_size, - numAreas, NULL, NULL); - if (rmStatus == NV_OK) - { - os_alloc_mem((void **)&vfRegionOffsets, sizeof(NvU64) * (*numAreas)); - os_alloc_mem((void **)&vfRegionSizes, sizeof (NvU64) * (*numAreas)); - if (vfRegionOffsets && vfRegionSizes) - { - rmStatus = kbifGetVFSparseMmapRegions_HAL(pGpu, pKernelBif, pKernelHostVgpuDevice, os_page_size, - numAreas, vfRegionOffsets, vfRegionSizes); - if (rmStatus == NV_OK) - { - *offsets = vfRegionOffsets; - *sizes = vfRegionSizes; - } - } - else - { - if (vfRegionOffsets != NULL) - os_free_mem(vfRegionOffsets); + NV_PRINTF(LEVEL_ERROR, "Not enough space for sparse mmap region info\n"); + return NV_ERR_INSUFFICIENT_RESOURCES; + } - if (vfRegionSizes != NULL) - os_free_mem(vfRegionSizes); - rmStatus = NV_ERR_INSUFFICIENT_RESOURCES; - } - } - } - else + rmStatus = kbifGetVFSparseMmapRegions_HAL(pGpu, pKernelBif, pKernelHostVgpuDevice, os_page_size, + numAreas, offsets, sizes); + if (rmStatus != NV_OK) + return rmStatus; + } + } + else + { + status = nv_parse_config_params((const char *)configParams, + "direct_gpu_timer_access", ',', &value); + if ((status == NV_OK) && (value != 0)) + { + NvU64 offset = 0; + NvU32 size = 0; + + rmStatus = tmrGetTimerBar0MapInfo_HAL(pGpu, pTmr, &offset, &size); + if (rmStatus == NV_OK) { - pRequestVgpu = pKernelHostVgpuDevice->pRequestVgpuInfoNode; - if (pRequestVgpu == NULL) - { - rmStatus = NV_ERR_INVALID_POINTER; - goto cleanup; - } + offsets[*numAreas] = offset; + sizes[*numAreas] = size; + (*numAreas)++; + } + } - status = nv_parse_config_params(pRequestVgpu->configParams, "direct_gpu_timer_access", ',', &value); - if ((status == NV_OK) && (value != 0)) - { - rmStatus = tmrGetTimerBar0MapInfo_HAL(pGpu, pTmr, - &bar0TmrMapOffset, - &bar0TmrMapSize); - if (rmStatus == NV_OK) - (*numAreas)++; - else - NV_PRINTF(LEVEL_ERROR, - "%s Failed to get NV_PTIMER region \n", - __FUNCTION__); - } + value = 0; + { + NvU64 offset = 0; + NvU32 size = 0; - value = 0; - { - status = kfifoGetUsermodeMapInfo_HAL(pGpu, pKernelFifo, - &bar0FifoMapOffset, - &bar0FifoMapSize); - if (status == NV_OK) - (*numAreas)++; - } + status = kfifoGetUsermodeMapInfo_HAL(pGpu, pKernelFifo, &offset, &size); - if (*numAreas != 0) - { - NvU32 i = 0; - NvU64 *tmpOffset, *tmpSize; - os_alloc_mem((void **)offsets, sizeof(NvU64) * (*numAreas)); - os_alloc_mem((void **)sizes, sizeof (NvU64) * (*numAreas)); + if (status == NV_OK) + { + offsets[*numAreas] = offset; + sizes[*numAreas] = size; + (*numAreas)++; + } + } + } - tmpOffset = *offsets; - tmpSize = *sizes; + return rmStatus; +} - if (bar0TmrMapSize != 0) - { - tmpOffset[i] = bar0TmrMapOffset; - tmpSize[i] = bar0TmrMapSize; - i++; - } +NV_STATUS nv_vgpu_rm_get_bar_info +( + OBJGPU *pGpu, + const NvU8 *pMdevUuid, + NvU64 *barSizes, + NvU64 *sparseOffsets, + NvU64 *sparseSizes, + NvU32 *sparseCount, + NvU8 *configParams +) +{ + KERNEL_HOST_VGPU_DEVICE *pKernelHostVgpuDevice; + NV_STATUS rmStatus; + NvU32 i = 0; - if (bar0FifoMapSize != 0) - { - tmpOffset[i] = bar0FifoMapOffset; - tmpSize[i] = bar0FifoMapSize; - } - } - } + NV_CHECK_OK_OR_GOTO(rmStatus, LEVEL_SILENT, + kvgpumgrGetHostVgpuDeviceFromMdevUuid(pGpu->gpuId, + pMdevUuid, + &pKernelHostVgpuDevice), + exit); + + for (i = 0; i < NVA081_MAX_BAR_REGION_COUNT; i++) + { + /* + * For SRIOV, only VF BAR1 is queried via RM, others BARs are directly + * queried via VF config space in vgpu-vfio + */ + if (gpuIsSriovEnabled(pGpu) && (i != NV_VFIO_PCI_BAR1_REGION_INDEX)) + { + barSizes[i] = 0; + continue; } -cleanup: - // UNLOCK: release API lock - rmapiLockRelease(); + rmStatus = _nv_vgpu_get_bar_size(pGpu, pKernelHostVgpuDevice, i, + &barSizes[i], configParams); + if (rmStatus != NV_OK) + { + NV_PRINTF(LEVEL_ERROR, "Failed to query BAR size for index %u 0x%x\n", + i, rmStatus); + goto exit; + } } - threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); - NV_EXIT_RM_RUNTIME(sp,fp); + NV_CHECK_OK_OR_GOTO(rmStatus, LEVEL_SILENT, + _nv_vgpu_get_sparse_mmap(pGpu, pKernelHostVgpuDevice, + sparseOffsets, sparseSizes, + sparseCount, configParams), + exit); +exit: return rmStatus; } -NV_STATUS NV_API_CALL nv_vgpu_update_request( - nvidia_stack_t *sp , - const NvU8 *pMdevUuid, - VGPU_DEVICE_STATE deviceState, - NvU64 *offsets, - NvU64 *sizes, - const char *configParams +NV_STATUS NV_API_CALL nv_gpu_unbind_event +( + nvidia_stack_t *sp, + NvU32 gpuId, + NvBool *isEventNotified ) { THREAD_STATE_NODE threadState; - NV_STATUS rmStatus = NV_ERR_OBJECT_NOT_FOUND; + NV_STATUS rmStatus = NV_OK; void *fp = NULL; - REQUEST_VGPU_INFO_NODE *pRequestVgpu = NULL; - OBJSYS *pSys = SYS_GET_INSTANCE(); - KernelVgpuMgr *pKernelVgpuMgr = SYS_GET_KERNEL_VGPUMGR(pSys); NV_ENTER_RM_RUNTIME(sp,fp); threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE); - if (offsets != NULL) - os_free_mem(offsets); - - if (sizes != NULL) - os_free_mem(sizes); - // LOCK: acquire API lock if ((rmStatus = rmapiLockAcquire(API_LOCK_FLAGS_NONE, RM_LOCK_MODULES_HYPERVISOR)) == NV_OK) { - for (pRequestVgpu = listHead(&pKernelVgpuMgr->listRequestVgpuHead); - pRequestVgpu != NULL; - pRequestVgpu = listNext(&pKernelVgpuMgr->listRequestVgpuHead, pRequestVgpu)) - { - if (portMemCmp(pRequestVgpu->mdevUuid, pMdevUuid, VGPU_UUID_SIZE) == 0) - { - - if (configParams != NULL) - portStringCopy(pRequestVgpu->configParams, - sizeof(pRequestVgpu->configParams), - configParams, (portStringLength(configParams) + 1)); - - pRequestVgpu->deviceState = deviceState; - rmStatus = NV_OK; - } - } + /* + * Send gpu_id in "status" field of the event so that nvidia-vgpu-mgr + * daemon knows which GPU is being unbound + */ + CliAddSystemEvent(NV0000_NOTIFIERS_GPU_UNBIND_EVENT, gpuId, isEventNotified); // UNLOCK: release API lock rmapiLockRelease(); @@ -874,7 +682,9 @@ } NV_STATUS NV_API_CALL nv_gpu_bind_event( - nvidia_stack_t *sp + nvidia_stack_t *sp, + NvU32 gpuId, + NvBool *isEventNotified ) { THREAD_STATE_NODE threadState; @@ -887,7 +697,7 @@ // LOCK: acquire API lock if ((rmStatus = rmapiLockAcquire(API_LOCK_FLAGS_NONE, RM_LOCK_MODULES_HYPERVISOR)) == NV_OK) { - CliAddSystemEvent(NV0000_NOTIFIERS_GPU_BIND_EVENT, 0); + CliAddSystemEvent(NV0000_NOTIFIERS_GPU_BIND_EVENT, gpuId, isEventNotified); // UNLOCK: release API lock rmapiLockRelease(); @@ -899,118 +709,34 @@ return rmStatus; } -NV_STATUS osVgpuInjectInterrupt(void *vgpuVfioRef) +NV_STATUS osIsVgpuVfioPresent(void) { vgpu_vfio_info vgpu_info; - vgpu_info.vgpuVfioRef = vgpuVfioRef; - - return os_call_vgpu_vfio((void *) &vgpu_info, CMD_VGPU_VFIO_INJECT_INTERRUPT); -} - -NV_STATUS osVgpuRegisterMdev -( - OS_GPU_INFO *pOsGpuInfo -) -{ - NV_STATUS status = NV_OK; - vgpu_vfio_info vgpu_info = {0}; - OBJSYS *pSys = SYS_GET_INSTANCE(); - KernelVgpuMgr *pKernelVgpuMgr = SYS_GET_KERNEL_VGPUMGR(pSys); - KERNEL_PHYS_GPU_INFO *pPhysGpuInfo; - NvU32 pgpuIndex, i; - OBJHYPERVISOR *pHypervisor = SYS_GET_HYPERVISOR(pSys); - - status = kvgpumgrGetPgpuIndex(pKernelVgpuMgr, pOsGpuInfo->gpu_id, &pgpuIndex); - if (status != NV_OK) - return status; - - pPhysGpuInfo = &(pKernelVgpuMgr->pgpuInfo[pgpuIndex]); - - vgpu_info.numVgpuTypes = pKernelVgpuMgr->pgpuInfo[pgpuIndex].numVgpuTypes; - - status = os_alloc_mem((void **)&vgpu_info.vgpuTypeIds, - ((vgpu_info.numVgpuTypes) * sizeof(NvU32))); - if (status != NV_OK) - goto free_mem; - - status = os_alloc_mem((void **)&vgpu_info.vgpuNames, - ((vgpu_info.numVgpuTypes) * sizeof(char *))); - if (status != NV_OK) - goto free_mem; - - vgpu_info.nv = pOsGpuInfo; - for (i = 0; i < pPhysGpuInfo->numVgpuTypes; i++) - { - status = os_alloc_mem((void *)&vgpu_info.vgpuNames[i], (VGPU_STRING_BUFFER_SIZE * sizeof(char))); - if (status != NV_OK) - goto free_mem; - - vgpu_info.vgpuTypeIds[i] = pPhysGpuInfo->vgpuTypes[i]->vgpuTypeId; - os_snprintf((char *) vgpu_info.vgpuNames[i], VGPU_STRING_BUFFER_SIZE, "%s\n", pPhysGpuInfo->vgpuTypes[i]->vgpuName); - } - - if ((!pPhysGpuInfo->sriovEnabled) || - (pHypervisor->getProperty(pHypervisor, PDB_PROP_HYPERVISOR_DRIVERVM_ENABLED))) - { - vgpu_info.is_virtfn = NV_FALSE; - status = os_call_vgpu_vfio((void *)&vgpu_info, CMD_VGPU_VFIO_REGISTER_MDEV); - } - else - { - for (i = 0; i < MAX_VF_COUNT_PER_GPU; i++) - { - if (pPhysGpuInfo->vfPciInfo[i].isNvidiaAttached) - { - vgpu_info.is_virtfn = NV_TRUE; - vgpu_info.domain = pPhysGpuInfo->vfPciInfo[i].domain; - vgpu_info.bus = pPhysGpuInfo->vfPciInfo[i].bus; - vgpu_info.slot = pPhysGpuInfo->vfPciInfo[i].slot; - vgpu_info.function = pPhysGpuInfo->vfPciInfo[i].function; - - status = os_call_vgpu_vfio((void *)&vgpu_info, CMD_VGPU_VFIO_REGISTER_MDEV); - if (status == NV_OK) - { - pPhysGpuInfo->vfPciInfo[i].isMdevAttached = NV_TRUE; - } - } - } - } - -free_mem: - if (vgpu_info.vgpuTypeIds) - os_free_mem(vgpu_info.vgpuTypeIds); - - if (vgpu_info.vgpuNames) - { - for (i = 0; i < pPhysGpuInfo->numVgpuTypes; i++) - { - if (vgpu_info.vgpuNames[i]) - { - os_free_mem(vgpu_info.vgpuNames[i]); - } - } - os_free_mem(vgpu_info.vgpuNames); - } - - return status; + return os_call_vgpu_vfio((void *) &vgpu_info, CMD_VGPU_VFIO_PRESENT); } -NV_STATUS osIsVgpuVfioPresent(void) +NV_STATUS osIsVfioPciCorePresent(void) { vgpu_vfio_info vgpu_info; - return os_call_vgpu_vfio((void *) &vgpu_info, CMD_VGPU_VFIO_PRESENT); + return os_call_vgpu_vfio((void *) &vgpu_info, CMD_VFIO_PCI_CORE_PRESENT); } -NV_STATUS osIsVfioPciCorePresent(void) +void osWakeRemoveVgpu(NvU32 gpuId, NvU32 returnStatus) { vgpu_vfio_info vgpu_info; - return os_call_vgpu_vfio((void *) &vgpu_info, CMD_VFIO_PCI_CORE_PRESENT); + vgpu_info.return_status = returnStatus; + vgpu_info.domain = GPU_32_BIT_ID_DECODE_DOMAIN(gpuId); + vgpu_info.bus = GPU_32_BIT_ID_DECODE_BUS(gpuId); + vgpu_info.device = GPU_32_BIT_ID_DECODE_DEVICE(gpuId); + + os_call_vgpu_vfio((void *)&vgpu_info, CMD_VFIO_WAKE_REMOVE_GPU); } + void initVGXSpecificRegistry(OBJGPU *pGpu) { NvU32 data32; diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia/arch/nvalloc/unix/src/osapi.c nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/arch/nvalloc/unix/src/osapi.c --- nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia/arch/nvalloc/unix/src/osapi.c 2024-05-12 19:30:08.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/arch/nvalloc/unix/src/osapi.c 2024-09-17 16:45:50.000000000 +0000 @@ -481,6 +481,11 @@ status = NV_ERR_NO_MEMORY; goto done; } + new_event->hParent = hParent; + new_event->nvfp = nvfp; + new_event->fd = fd; + new_event->active = NV_TRUE; + new_event->refcount = 0; portSyncSpinlockAcquire(nv->event_spinlock); for (event = nv->event_list; event; event = event->next) @@ -501,12 +506,6 @@ done: if (status == NV_OK) { - new_event->hParent = hParent; - new_event->nvfp = nvfp; - new_event->fd = fd; - new_event->active = NV_TRUE; - new_event->refcount = 0; - nvfp->bCleanupRmapi = NV_TRUE; NV_PRINTF(LEVEL_INFO, "allocated OS event:\n"); diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia/arch/nvalloc/unix/src/osinit.c nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/arch/nvalloc/unix/src/osinit.c --- nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia/arch/nvalloc/unix/src/osinit.c 2024-05-12 19:30:08.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/arch/nvalloc/unix/src/osinit.c 2024-09-17 16:45:50.000000000 +0000 @@ -59,6 +59,7 @@ #include #include "liblogdecode.h" #include +#include #include #include @@ -385,6 +386,13 @@ gpuSetDisconnectedProperties(pGpu); + if (IS_GSP_CLIENT(pGpu)) + { + // Notify all channels of the error so that UVM can fail gracefully + KernelGsp *pKernelGsp = GPU_GET_KERNEL_GSP(pGpu); + kgspRcAndNotifyAllChannels(pGpu, pKernelGsp, ROBUST_CHANNEL_GPU_HAS_FALLEN_OFF_THE_BUS, NV_FALSE); + } + // Trigger the OS's PCI recovery mechanism if (nv_pci_trigger_recovery(nv) != NV_OK) { diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia/exports_link_command.txt nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/exports_link_command.txt --- nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia/exports_link_command.txt 2024-05-12 19:33:14.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/exports_link_command.txt 2024-09-17 16:50:40.000000000 +0000 @@ -181,13 +181,11 @@ --undefined=nv_vgpu_create_request --undefined=nv_vgpu_delete --undefined=nv_vgpu_get_bar_info ---undefined=nv_vgpu_start --undefined=nv_vgpu_get_type_ids --undefined=nv_vgpu_get_type_info ---undefined=nv_vgpu_get_sparse_mmap ---undefined=nv_vgpu_update_request --undefined=nv_vgpu_process_vf_info --undefined=nv_gpu_bind_event +--undefined=nv_gpu_unbind_event --undefined=rm_check_for_gpu_surprise_removal --undefined=rm_set_external_kernel_client_count --undefined=rm_schedule_gpu_wakeup diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia/generated/g_client_resource_nvoc.c nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/generated/g_client_resource_nvoc.c --- nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia/generated/g_client_resource_nvoc.c 2024-05-12 19:36:30.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/generated/g_client_resource_nvoc.c 2024-09-17 16:54:38.000000000 +0000 @@ -1461,18 +1461,18 @@ #endif }, { /* [84] */ -#if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x10u) +#if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x4u) /*pFunc=*/ (void (*)(void)) NULL, #else - /*pFunc=*/ (void (*)(void)) cliresCtrlCmdVgpuGetStartData_IMPL, -#endif // NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x10u) - /*flags=*/ 0x10u, + /*pFunc=*/ (void (*)(void)) cliresCtrlCmdVgpuVfioNotifyRMStatus_IMPL, +#endif // NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x4u) + /*flags=*/ 0x4u, /*accessRight=*/0x0u, - /*methodId=*/ 0xc01u, - /*paramSize=*/ sizeof(NV0000_CTRL_VGPU_GET_START_DATA_PARAMS), + /*methodId=*/ 0xc05u, + /*paramSize=*/ sizeof(NV0000_CTRL_VGPU_VFIO_NOTIFY_RM_STATUS_PARAMS), /*pClassInfo=*/ &(__nvoc_class_def_RmClientResource.classInfo), #if NV_PRINTF_STRINGS_ALLOWED - /*func=*/ "cliresCtrlCmdVgpuGetStartData" + /*func=*/ "cliresCtrlCmdVgpuVfioNotifyRMStatus" #endif }, { /* [85] */ @@ -2094,10 +2094,6 @@ #endif #if !NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x10u) - pThis->__cliresCtrlCmdVgpuGetStartData__ = &cliresCtrlCmdVgpuGetStartData_IMPL; -#endif - -#if !NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x10u) pThis->__cliresCtrlCmdVgpuGetVgpuVersion__ = &cliresCtrlCmdVgpuGetVgpuVersion_IMPL; #endif @@ -2105,6 +2101,10 @@ pThis->__cliresCtrlCmdVgpuSetVgpuVersion__ = &cliresCtrlCmdVgpuSetVgpuVersion_IMPL; #endif +#if !NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x4u) + pThis->__cliresCtrlCmdVgpuVfioNotifyRMStatus__ = &cliresCtrlCmdVgpuVfioNotifyRMStatus_IMPL; +#endif + #if !NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x10u) pThis->__cliresCtrlCmdSystemNVPCFGetPowerModeInfo__ = &cliresCtrlCmdSystemNVPCFGetPowerModeInfo_IMPL; #endif diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia/generated/g_client_resource_nvoc.h nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/generated/g_client_resource_nvoc.h --- nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia/generated/g_client_resource_nvoc.h 2024-05-12 19:36:30.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/generated/g_client_resource_nvoc.h 2024-09-17 16:54:38.000000000 +0000 @@ -164,9 +164,9 @@ NV_STATUS (*__cliresCtrlCmdSyncGpuBoostGroupCreate__)(struct RmClientResource *, NV0000_SYNC_GPU_BOOST_GROUP_CREATE_PARAMS *); NV_STATUS (*__cliresCtrlCmdSyncGpuBoostGroupDestroy__)(struct RmClientResource *, NV0000_SYNC_GPU_BOOST_GROUP_DESTROY_PARAMS *); NV_STATUS (*__cliresCtrlCmdSyncGpuBoostGroupInfo__)(struct RmClientResource *, NV0000_SYNC_GPU_BOOST_GROUP_INFO_PARAMS *); - NV_STATUS (*__cliresCtrlCmdVgpuGetStartData__)(struct RmClientResource *, NV0000_CTRL_VGPU_GET_START_DATA_PARAMS *); NV_STATUS (*__cliresCtrlCmdVgpuGetVgpuVersion__)(struct RmClientResource *, NV0000_CTRL_VGPU_GET_VGPU_VERSION_PARAMS *); NV_STATUS (*__cliresCtrlCmdVgpuSetVgpuVersion__)(struct RmClientResource *, NV0000_CTRL_VGPU_SET_VGPU_VERSION_PARAMS *); + NV_STATUS (*__cliresCtrlCmdVgpuVfioNotifyRMStatus__)(struct RmClientResource *, NV0000_CTRL_VGPU_VFIO_NOTIFY_RM_STATUS_PARAMS *); NV_STATUS (*__cliresCtrlCmdSystemNVPCFGetPowerModeInfo__)(struct RmClientResource *, NV0000_CTRL_CMD_SYSTEM_NVPCF_GET_POWER_MODE_INFO_PARAMS *); NV_STATUS (*__cliresCtrlCmdSystemSyncExternalFabricMgmt__)(struct RmClientResource *, NV0000_CTRL_CMD_SYSTEM_SYNC_EXTERNAL_FABRIC_MGMT_PARAMS *); NV_STATUS (*__cliresCtrlCmdSystemPfmreqhndlrCtrl__)(struct RmClientResource *, NV0000_CTRL_SYSTEM_PFM_REQ_HNDLR_CTRL_PARAMS *); @@ -316,9 +316,9 @@ #define cliresCtrlCmdSyncGpuBoostGroupCreate(pRmCliRes, pParams) cliresCtrlCmdSyncGpuBoostGroupCreate_DISPATCH(pRmCliRes, pParams) #define cliresCtrlCmdSyncGpuBoostGroupDestroy(pRmCliRes, pParams) cliresCtrlCmdSyncGpuBoostGroupDestroy_DISPATCH(pRmCliRes, pParams) #define cliresCtrlCmdSyncGpuBoostGroupInfo(pRmCliRes, pParams) cliresCtrlCmdSyncGpuBoostGroupInfo_DISPATCH(pRmCliRes, pParams) -#define cliresCtrlCmdVgpuGetStartData(pRmCliRes, pVgpuStartParams) cliresCtrlCmdVgpuGetStartData_DISPATCH(pRmCliRes, pVgpuStartParams) #define cliresCtrlCmdVgpuGetVgpuVersion(pRmCliRes, vgpuVersionInfo) cliresCtrlCmdVgpuGetVgpuVersion_DISPATCH(pRmCliRes, vgpuVersionInfo) #define cliresCtrlCmdVgpuSetVgpuVersion(pRmCliRes, vgpuVersionInfo) cliresCtrlCmdVgpuSetVgpuVersion_DISPATCH(pRmCliRes, vgpuVersionInfo) +#define cliresCtrlCmdVgpuVfioNotifyRMStatus(pRmCliRes, pVgpuDeleteParams) cliresCtrlCmdVgpuVfioNotifyRMStatus_DISPATCH(pRmCliRes, pVgpuDeleteParams) #define cliresCtrlCmdSystemNVPCFGetPowerModeInfo(pRmCliRes, pParams) cliresCtrlCmdSystemNVPCFGetPowerModeInfo_DISPATCH(pRmCliRes, pParams) #define cliresCtrlCmdSystemSyncExternalFabricMgmt(pRmCliRes, pExtFabricMgmtParams) cliresCtrlCmdSystemSyncExternalFabricMgmt_DISPATCH(pRmCliRes, pExtFabricMgmtParams) #define cliresCtrlCmdSystemPfmreqhndlrCtrl(pRmCliRes, pParams) cliresCtrlCmdSystemPfmreqhndlrCtrl_DISPATCH(pRmCliRes, pParams) @@ -888,12 +888,6 @@ return pRmCliRes->__cliresCtrlCmdSyncGpuBoostGroupInfo__(pRmCliRes, pParams); } -NV_STATUS cliresCtrlCmdVgpuGetStartData_IMPL(struct RmClientResource *pRmCliRes, NV0000_CTRL_VGPU_GET_START_DATA_PARAMS *pVgpuStartParams); - -static inline NV_STATUS cliresCtrlCmdVgpuGetStartData_DISPATCH(struct RmClientResource *pRmCliRes, NV0000_CTRL_VGPU_GET_START_DATA_PARAMS *pVgpuStartParams) { - return pRmCliRes->__cliresCtrlCmdVgpuGetStartData__(pRmCliRes, pVgpuStartParams); -} - NV_STATUS cliresCtrlCmdVgpuGetVgpuVersion_IMPL(struct RmClientResource *pRmCliRes, NV0000_CTRL_VGPU_GET_VGPU_VERSION_PARAMS *vgpuVersionInfo); static inline NV_STATUS cliresCtrlCmdVgpuGetVgpuVersion_DISPATCH(struct RmClientResource *pRmCliRes, NV0000_CTRL_VGPU_GET_VGPU_VERSION_PARAMS *vgpuVersionInfo) { @@ -906,6 +900,12 @@ return pRmCliRes->__cliresCtrlCmdVgpuSetVgpuVersion__(pRmCliRes, vgpuVersionInfo); } +NV_STATUS cliresCtrlCmdVgpuVfioNotifyRMStatus_IMPL(struct RmClientResource *pRmCliRes, NV0000_CTRL_VGPU_VFIO_NOTIFY_RM_STATUS_PARAMS *pVgpuDeleteParams); + +static inline NV_STATUS cliresCtrlCmdVgpuVfioNotifyRMStatus_DISPATCH(struct RmClientResource *pRmCliRes, NV0000_CTRL_VGPU_VFIO_NOTIFY_RM_STATUS_PARAMS *pVgpuDeleteParams) { + return pRmCliRes->__cliresCtrlCmdVgpuVfioNotifyRMStatus__(pRmCliRes, pVgpuDeleteParams); +} + NV_STATUS cliresCtrlCmdSystemNVPCFGetPowerModeInfo_IMPL(struct RmClientResource *pRmCliRes, NV0000_CTRL_CMD_SYSTEM_NVPCF_GET_POWER_MODE_INFO_PARAMS *pParams); static inline NV_STATUS cliresCtrlCmdSystemNVPCFGetPowerModeInfo_DISPATCH(struct RmClientResource *pRmCliRes, NV0000_CTRL_CMD_SYSTEM_NVPCF_GET_POWER_MODE_INFO_PARAMS *pParams) { diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia/generated/g_event_nvoc.h nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/generated/g_event_nvoc.h --- nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia/generated/g_event_nvoc.h 2024-05-12 19:36:35.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/generated/g_event_nvoc.h 2024-09-17 16:54:46.000000000 +0000 @@ -535,7 +535,7 @@ #undef PRIVATE_FIELD -void CliAddSystemEvent(NvU32, NvU32); +void CliAddSystemEvent(NvU32, NvU32, NvBool *); NvBool CliDelObjectEvents(NvHandle hClient, NvHandle hObject); NvBool CliGetEventInfo(NvHandle hClient, NvHandle hEvent, struct Event **ppEvent); NV_STATUS CliGetEventNotificationList(NvHandle hClient, NvHandle hObject, diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia/generated/g_kernel_gsp_nvoc.c nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/generated/g_kernel_gsp_nvoc.c --- nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia/generated/g_kernel_gsp_nvoc.c 2024-05-12 19:36:49.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/generated/g_kernel_gsp_nvoc.c 2024-09-17 16:55:03.000000000 +0000 @@ -762,6 +762,16 @@ pThis->__kgspInitVgpuPartitionLogging__ = &kgspInitVgpuPartitionLogging_IMPL; } + // Hal function -- kgspPreserveVgpuPartitionLogging + if (( ((chipHal_HalVarIdx >> 5) == 1UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x000007e0UL) )) /* ChipHal: TU102 | TU104 | TU106 | TU116 | TU117 | GA100 */ + { + pThis->__kgspPreserveVgpuPartitionLogging__ = &kgspPreserveVgpuPartitionLogging_395e98; + } + else + { + pThis->__kgspPreserveVgpuPartitionLogging__ = &kgspPreserveVgpuPartitionLogging_IMPL; + } + // Hal function -- kgspFreeVgpuPartitionLogging if (( ((chipHal_HalVarIdx >> 5) == 1UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x000007e0UL) )) /* ChipHal: TU102 | TU104 | TU106 | TU116 | TU117 | GA100 */ { diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia/generated/g_kernel_gsp_nvoc.h nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/generated/g_kernel_gsp_nvoc.h --- nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia/generated/g_kernel_gsp_nvoc.h 2024-05-12 19:36:49.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/generated/g_kernel_gsp_nvoc.h 2024-09-17 16:55:04.000000000 +0000 @@ -292,7 +292,8 @@ const BINDATA_ARCHIVE *(*__kgspGetBinArchiveBooterUnloadUcode__)(struct KernelGsp *); NvU64 (*__kgspGetMinWprHeapSizeMB__)(struct OBJGPU *, struct KernelGsp *); NvU64 (*__kgspGetMaxWprHeapSizeMB__)(struct OBJGPU *, struct KernelGsp *); - NV_STATUS (*__kgspInitVgpuPartitionLogging__)(struct OBJGPU *, struct KernelGsp *, NvU32, NvU64, NvU64, NvU64, NvU64); + NV_STATUS (*__kgspInitVgpuPartitionLogging__)(struct OBJGPU *, struct KernelGsp *, NvU32, NvU64, NvU64, NvU64, NvU64, NvBool *); + NV_STATUS (*__kgspPreserveVgpuPartitionLogging__)(struct OBJGPU *, struct KernelGsp *, NvU32); NV_STATUS (*__kgspFreeVgpuPartitionLogging__)(struct OBJGPU *, struct KernelGsp *, NvU32); const char *(*__kgspGetSignatureSectionNamePrefix__)(struct OBJGPU *, struct KernelGsp *); NV_STATUS (*__kgspSetupGspFmcArgs__)(struct OBJGPU *, struct KernelGsp *, GSP_FIRMWARE *); @@ -482,8 +483,10 @@ #define kgspGetMinWprHeapSizeMB_HAL(pGpu, pKernelGsp) kgspGetMinWprHeapSizeMB_DISPATCH(pGpu, pKernelGsp) #define kgspGetMaxWprHeapSizeMB(pGpu, pKernelGsp) kgspGetMaxWprHeapSizeMB_DISPATCH(pGpu, pKernelGsp) #define kgspGetMaxWprHeapSizeMB_HAL(pGpu, pKernelGsp) kgspGetMaxWprHeapSizeMB_DISPATCH(pGpu, pKernelGsp) -#define kgspInitVgpuPartitionLogging(pGpu, pKernelGsp, gfid, initTaskLogBUffOffset, initTaskLogBUffSize, vgpuTaskLogBUffOffset, vgpuTaskLogBuffSize) kgspInitVgpuPartitionLogging_DISPATCH(pGpu, pKernelGsp, gfid, initTaskLogBUffOffset, initTaskLogBUffSize, vgpuTaskLogBUffOffset, vgpuTaskLogBuffSize) -#define kgspInitVgpuPartitionLogging_HAL(pGpu, pKernelGsp, gfid, initTaskLogBUffOffset, initTaskLogBUffSize, vgpuTaskLogBUffOffset, vgpuTaskLogBuffSize) kgspInitVgpuPartitionLogging_DISPATCH(pGpu, pKernelGsp, gfid, initTaskLogBUffOffset, initTaskLogBUffSize, vgpuTaskLogBUffOffset, vgpuTaskLogBuffSize) +#define kgspInitVgpuPartitionLogging(pGpu, pKernelGsp, gfid, initTaskLogBUffOffset, initTaskLogBUffSize, vgpuTaskLogBUffOffset, vgpuTaskLogBuffSize, pPreserveLogBufferFull) kgspInitVgpuPartitionLogging_DISPATCH(pGpu, pKernelGsp, gfid, initTaskLogBUffOffset, initTaskLogBUffSize, vgpuTaskLogBUffOffset, vgpuTaskLogBuffSize, pPreserveLogBufferFull) +#define kgspInitVgpuPartitionLogging_HAL(pGpu, pKernelGsp, gfid, initTaskLogBUffOffset, initTaskLogBUffSize, vgpuTaskLogBUffOffset, vgpuTaskLogBuffSize, pPreserveLogBufferFull) kgspInitVgpuPartitionLogging_DISPATCH(pGpu, pKernelGsp, gfid, initTaskLogBUffOffset, initTaskLogBUffSize, vgpuTaskLogBUffOffset, vgpuTaskLogBuffSize, pPreserveLogBufferFull) +#define kgspPreserveVgpuPartitionLogging(pGpu, pKernelGsp, gfid) kgspPreserveVgpuPartitionLogging_DISPATCH(pGpu, pKernelGsp, gfid) +#define kgspPreserveVgpuPartitionLogging_HAL(pGpu, pKernelGsp, gfid) kgspPreserveVgpuPartitionLogging_DISPATCH(pGpu, pKernelGsp, gfid) #define kgspFreeVgpuPartitionLogging(pGpu, pKernelGsp, gfid) kgspFreeVgpuPartitionLogging_DISPATCH(pGpu, pKernelGsp, gfid) #define kgspFreeVgpuPartitionLogging_HAL(pGpu, pKernelGsp, gfid) kgspFreeVgpuPartitionLogging_DISPATCH(pGpu, pKernelGsp, gfid) #define kgspGetSignatureSectionNamePrefix(pGpu, pKernelGsp) kgspGetSignatureSectionNamePrefix_DISPATCH(pGpu, pKernelGsp) @@ -981,14 +984,24 @@ return pKernelGsp->__kgspGetMaxWprHeapSizeMB__(pGpu, pKernelGsp); } -static inline NV_STATUS kgspInitVgpuPartitionLogging_395e98(struct OBJGPU *pGpu, struct KernelGsp *pKernelGsp, NvU32 gfid, NvU64 initTaskLogBUffOffset, NvU64 initTaskLogBUffSize, NvU64 vgpuTaskLogBUffOffset, NvU64 vgpuTaskLogBuffSize) { +static inline NV_STATUS kgspInitVgpuPartitionLogging_395e98(struct OBJGPU *pGpu, struct KernelGsp *pKernelGsp, NvU32 gfid, NvU64 initTaskLogBUffOffset, NvU64 initTaskLogBUffSize, NvU64 vgpuTaskLogBUffOffset, NvU64 vgpuTaskLogBuffSize, NvBool *pPreserveLogBufferFull) { return NV_ERR_NOT_SUPPORTED; } -NV_STATUS kgspInitVgpuPartitionLogging_IMPL(struct OBJGPU *pGpu, struct KernelGsp *pKernelGsp, NvU32 gfid, NvU64 initTaskLogBUffOffset, NvU64 initTaskLogBUffSize, NvU64 vgpuTaskLogBUffOffset, NvU64 vgpuTaskLogBuffSize); +NV_STATUS kgspInitVgpuPartitionLogging_IMPL(struct OBJGPU *pGpu, struct KernelGsp *pKernelGsp, NvU32 gfid, NvU64 initTaskLogBUffOffset, NvU64 initTaskLogBUffSize, NvU64 vgpuTaskLogBUffOffset, NvU64 vgpuTaskLogBuffSize, NvBool *pPreserveLogBufferFull); -static inline NV_STATUS kgspInitVgpuPartitionLogging_DISPATCH(struct OBJGPU *pGpu, struct KernelGsp *pKernelGsp, NvU32 gfid, NvU64 initTaskLogBUffOffset, NvU64 initTaskLogBUffSize, NvU64 vgpuTaskLogBUffOffset, NvU64 vgpuTaskLogBuffSize) { - return pKernelGsp->__kgspInitVgpuPartitionLogging__(pGpu, pKernelGsp, gfid, initTaskLogBUffOffset, initTaskLogBUffSize, vgpuTaskLogBUffOffset, vgpuTaskLogBuffSize); +static inline NV_STATUS kgspInitVgpuPartitionLogging_DISPATCH(struct OBJGPU *pGpu, struct KernelGsp *pKernelGsp, NvU32 gfid, NvU64 initTaskLogBUffOffset, NvU64 initTaskLogBUffSize, NvU64 vgpuTaskLogBUffOffset, NvU64 vgpuTaskLogBuffSize, NvBool *pPreserveLogBufferFull) { + return pKernelGsp->__kgspInitVgpuPartitionLogging__(pGpu, pKernelGsp, gfid, initTaskLogBUffOffset, initTaskLogBUffSize, vgpuTaskLogBUffOffset, vgpuTaskLogBuffSize, pPreserveLogBufferFull); +} + +static inline NV_STATUS kgspPreserveVgpuPartitionLogging_395e98(struct OBJGPU *pGpu, struct KernelGsp *pKernelGsp, NvU32 gfid) { + return NV_ERR_NOT_SUPPORTED; +} + +NV_STATUS kgspPreserveVgpuPartitionLogging_IMPL(struct OBJGPU *pGpu, struct KernelGsp *pKernelGsp, NvU32 gfid); + +static inline NV_STATUS kgspPreserveVgpuPartitionLogging_DISPATCH(struct OBJGPU *pGpu, struct KernelGsp *pKernelGsp, NvU32 gfid) { + return pKernelGsp->__kgspPreserveVgpuPartitionLogging__(pGpu, pKernelGsp, gfid); } static inline NV_STATUS kgspFreeVgpuPartitionLogging_395e98(struct OBJGPU *pGpu, struct KernelGsp *pKernelGsp, NvU32 gfid) { @@ -1339,14 +1352,14 @@ #define kgspAllocateBooterUnloadUcodeImage(pGpu, pKernelGsp, ppBooterUnloadUcode) kgspAllocateBooterUnloadUcodeImage_IMPL(pGpu, pKernelGsp, ppBooterUnloadUcode) #endif //__nvoc_kernel_gsp_h_disabled -void kgspRcAndNotifyAllUserChannels_IMPL(struct OBJGPU *pGpu, struct KernelGsp *pKernelGsp, NvU32 exceptType); +void kgspRcAndNotifyAllChannels_IMPL(struct OBJGPU *pGpu, struct KernelGsp *pKernelGsp, NvU32 exceptType, NvBool bSkipKernelChannels); #ifdef __nvoc_kernel_gsp_h_disabled -static inline void kgspRcAndNotifyAllUserChannels(struct OBJGPU *pGpu, struct KernelGsp *pKernelGsp, NvU32 exceptType) { +static inline void kgspRcAndNotifyAllChannels(struct OBJGPU *pGpu, struct KernelGsp *pKernelGsp, NvU32 exceptType, NvBool bSkipKernelChannels) { NV_ASSERT_FAILED_PRECOMP("KernelGsp was disabled!"); } #else //__nvoc_kernel_gsp_h_disabled -#define kgspRcAndNotifyAllUserChannels(pGpu, pKernelGsp, exceptType) kgspRcAndNotifyAllUserChannels_IMPL(pGpu, pKernelGsp, exceptType) +#define kgspRcAndNotifyAllChannels(pGpu, pKernelGsp, exceptType, bSkipKernelChannels) kgspRcAndNotifyAllChannels_IMPL(pGpu, pKernelGsp, exceptType, bSkipKernelChannels) #endif //__nvoc_kernel_gsp_h_disabled #undef PRIVATE_FIELD diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia/generated/g_kernel_nvlink_nvoc.h nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/generated/g_kernel_nvlink_nvoc.h --- nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia/generated/g_kernel_nvlink_nvoc.h 2024-05-12 19:36:51.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/generated/g_kernel_nvlink_nvoc.h 2024-09-17 16:55:07.000000000 +0000 @@ -316,6 +316,7 @@ NvBool PRIVATE_FIELD(bEnableSafeModeAtLoad); NvBool PRIVATE_FIELD(bEnableAli); NvBool PRIVATE_FIELD(bFloorSwept); + NvU32 PRIVATE_FIELD(numPortEvents); NvBool PRIVATE_FIELD(bLinkTrainingDebugSpew); NvBool PRIVATE_FIELD(bDisableL2Mode); NvU32 PRIVATE_FIELD(nvlinkLinkSpeed); diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia/generated/g_kernel_vgpu_mgr_nvoc.h nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/generated/g_kernel_vgpu_mgr_nvoc.h --- nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia/generated/g_kernel_vgpu_mgr_nvoc.h 2024-05-12 19:36:52.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/generated/g_kernel_vgpu_mgr_nvoc.h 2024-09-17 16:55:08.000000000 +0000 @@ -7,7 +7,7 @@ #endif /* - * SPDX-FileCopyrightText: Copyright (c) 2017-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2017-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining a @@ -116,7 +116,6 @@ NvU32 chidOffset[RM_ENGINE_TYPE_LAST]; NvU32 channelCount[RM_ENGINE_TYPE_LAST]; /*Number of channels available to the VF*/ NvU8 vgpuUuid[RM_SHA1_GID_SIZE]; - void *pVgpuVfioRef; struct REQUEST_VGPU_INFO_NODE *pRequestVgpuInfoNode; struct PhysMemSubAlloc *pPhysMemSubAlloc; struct HOST_VGPU_DEVICE *pHostVgpuDevice; @@ -175,17 +174,11 @@ /* vGPU info received from mdev kernel module for KVM */ typedef struct REQUEST_VGPU_INFO_NODE { - char configParams[VGPU_CONFIG_PARAMS_MAX_LENGTH]; NvU8 mdevUuid[VGPU_UUID_SIZE]; - void *waitQueue; - NvU8 *vmName; - NvS32 *returnStatus; NvU32 gpuPciId; - NvU32 qemuPid; - NvU16 vgpuId; - VGPU_DEVICE_STATE deviceState; NvU32 gpuPciBdf; NvU32 swizzId; + NvU16 vgpuId; KERNEL_HOST_VGPU_DEVICE *pKernelHostVgpuDevice; } REQUEST_VGPU_INFO_NODE; @@ -281,6 +274,7 @@ NvU32 swizzId, NvU32 vgpuDeviceInstanceId, NvBool bDisableDefaultSmcExecPartRestore, + NvU8 *pVgpuDevName, KERNEL_HOST_VGPU_DEVICE **ppKernelHostVgpuDevice); NV_STATUS @@ -316,10 +310,6 @@ kvgpumgrSetVgpuEncoderCapacity(struct OBJGPU *pGpu, NvU8 *vgpuUuid, NvU32 encoderCapacity); NV_STATUS -kvgpumgrStart(const NvU8 *pMdevUuid, void *waitQueue, NvS32 *returnStatus, - NvU8 *vmName, NvU32 qemuPid); - -NV_STATUS kvgpumgrCreateRequestVgpu(NvU32 gpuPciId, const NvU8 *pMdevUuid, NvU32 vgpuTypeId, NvU16 *vgpuId, NvU32 gpuPciBdf); @@ -327,6 +317,10 @@ kvgpumgrDeleteRequestVgpu(const NvU8 *pMdevUuid, NvU16 vgpuId); NV_STATUS +kvgpumgrGetAvailableInstances(NvU32 *avail_instances, struct OBJGPU *pGpu, VGPU_TYPE *vgpuTypeInfo, + NvU32 pgpuIndex, NvU8 devfn); + +NV_STATUS kvgpumgrGetHostVgpuDeviceFromMdevUuid(NvU32 gpuPciId, const NvU8 *pMdevUuid, KERNEL_HOST_VGPU_DEVICE **ppKernelHostVgpuDevice); diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia/generated/g_mem_nvoc.h nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/generated/g_mem_nvoc.h --- nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia/generated/g_mem_nvoc.h 2024-05-12 19:36:54.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/generated/g_mem_nvoc.h 2024-09-17 16:55:10.000000000 +0000 @@ -119,7 +119,6 @@ NvU64 guestDomainId; // guest ID that we need to use to inject interrupt NvU64 guestMSIAddr; // MSI address allocated by guest OS NvU32 guestMSIData; // MSI data value set by guest OS - void *pVgpuVfioRef; // Reference to vgpu device in nvidia-vgpu-vfio module void *pEventDpc; // DPC event to pass the interrupt } VGPU_NS_INTR; diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia/generated/g_os_nvoc.h nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/generated/g_os_nvoc.h --- nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia/generated/g_os_nvoc.h 2024-05-12 19:36:57.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/generated/g_os_nvoc.h 2024-09-17 16:55:14.000000000 +0000 @@ -825,11 +825,12 @@ void initVGXSpecificRegistry(OBJGPU *); -NV_STATUS osVgpuVfioWake(void *waitQueue); -NV_STATUS osVgpuInjectInterrupt(void *pArg1); -NV_STATUS osVgpuRegisterMdev(OS_GPU_INFO *pArg1); +NV_STATUS nv_vgpu_rm_get_bar_info(OBJGPU *pGpu, const NvU8 *pMdevUuid, NvU64 *barSizes, + NvU64 *sparseOffsets, NvU64 *sparseSizes, + NvU32 *sparseCount, NvU8 *configParams); NV_STATUS osIsVgpuVfioPresent(void); NV_STATUS osIsVfioPciCorePresent(void); +void osWakeRemoveVgpu(NvU32, NvU32); NV_STATUS rm_is_vgpu_supported_device(OS_GPU_INFO *pNv, NvU32 pmc_boot_1); NV_STATUS osLockPageableDataSection(RM_PAGEABLE_SECTION *pSection); NV_STATUS osUnlockPageableDataSection(RM_PAGEABLE_SECTION *pSection); diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia/generated/g_rpc-structures.h nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/generated/g_rpc-structures.h --- nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia/generated/g_rpc-structures.h 2024-05-12 19:33:14.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/generated/g_rpc-structures.h 2024-09-17 16:49:14.000000000 +0000 @@ -295,6 +295,7 @@ { NvU32 nv2080EngineType; NvU32 chid; + NvU32 gfid; NvU32 exceptType; NvU32 scope; NvU16 partitionAttributionId; @@ -1793,6 +1794,13 @@ #endif }, { + .vtype = vtype_NvU32, + .offset = NV_OFFSETOF(rpc_rc_triggered_v17_02, gfid), + #if (defined(DEBUG) || defined(DEVELOP)) + .name = "gfid" + #endif + }, + { .vtype = vtype_NvU32, .offset = NV_OFFSETOF(rpc_rc_triggered_v17_02, exceptType), #if (defined(DEBUG) || defined(DEVELOP)) diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia/generated/g_subdevice_diag_nvoc.h nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/generated/g_subdevice_diag_nvoc.h --- nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia/generated/g_subdevice_diag_nvoc.h 2024-05-12 19:37:01.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/generated/g_subdevice_diag_nvoc.h 2024-09-17 16:55:20.000000000 +0000 @@ -7,7 +7,7 @@ #endif /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining a diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia/generated/g_subdevice_nvoc.c nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/generated/g_subdevice_nvoc.c --- nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia/generated/g_subdevice_nvoc.c 2024-05-12 19:37:01.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/generated/g_subdevice_nvoc.c 2024-09-17 16:55:21.000000000 +0000 @@ -7243,6 +7243,36 @@ #endif }, { /* [467] */ +#if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x204u) + /*pFunc=*/ (void (*)(void)) NULL, +#else + /*pFunc=*/ (void (*)(void)) subdeviceCtrlCmdNvlinkGetPortEvents_IMPL, +#endif // NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x204u) + /*flags=*/ 0x204u, + /*accessRight=*/0x0u, + /*methodId=*/ 0x20803044u, + /*paramSize=*/ sizeof(NV2080_CTRL_NVLINK_GET_PORT_EVENTS_PARAMS), + /*pClassInfo=*/ &(__nvoc_class_def_Subdevice.classInfo), +#if NV_PRINTF_STRINGS_ALLOWED + /*func=*/ "subdeviceCtrlCmdNvlinkGetPortEvents" +#endif + }, + { /* [468] */ +#if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x204u) + /*pFunc=*/ (void (*)(void)) NULL, +#else + /*pFunc=*/ (void (*)(void)) subdeviceCtrlCmdNvlinkCycleLink_IMPL, +#endif // NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x204u) + /*flags=*/ 0x204u, + /*accessRight=*/0x0u, + /*methodId=*/ 0x20803045u, + /*paramSize=*/ sizeof(NV2080_CTRL_NVLINK_CYCLE_LINK_PARAMS), + /*pClassInfo=*/ &(__nvoc_class_def_Subdevice.classInfo), +#if NV_PRINTF_STRINGS_ALLOWED + /*func=*/ "subdeviceCtrlCmdNvlinkCycleLink" +#endif + }, + { /* [469] */ #if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x200u) /*pFunc=*/ (void (*)(void)) NULL, #else @@ -7250,14 +7280,14 @@ #endif // NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x200u) /*flags=*/ 0x200u, /*accessRight=*/0x0u, - /*methodId=*/ 0x20803044u, + /*methodId=*/ 0x20803046u, /*paramSize=*/ sizeof(NV2080_CTRL_NVLINK_IS_REDUCED_CONFIG_PARAMS), /*pClassInfo=*/ &(__nvoc_class_def_Subdevice.classInfo), #if NV_PRINTF_STRINGS_ALLOWED /*func=*/ "subdeviceCtrlCmdIsNvlinkReducedConfig" #endif }, - { /* [468] */ + { /* [470] */ #if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x210u) /*pFunc=*/ (void (*)(void)) NULL, #else @@ -7272,7 +7302,7 @@ /*func=*/ "subdeviceCtrlCmdFlcnGetDmemUsage" #endif }, - { /* [469] */ + { /* [471] */ #if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x210u) /*pFunc=*/ (void (*)(void)) NULL, #else @@ -7287,7 +7317,7 @@ /*func=*/ "subdeviceCtrlCmdFlcnGetEngineArch" #endif }, - { /* [470] */ + { /* [472] */ #if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x210u) /*pFunc=*/ (void (*)(void)) NULL, #else @@ -7302,7 +7332,7 @@ /*func=*/ "subdeviceCtrlCmdFlcnUstreamerQueueInfo" #endif }, - { /* [471] */ + { /* [473] */ #if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x210u) /*pFunc=*/ (void (*)(void)) NULL, #else @@ -7317,7 +7347,7 @@ /*func=*/ "subdeviceCtrlCmdFlcnUstreamerControlGet" #endif }, - { /* [472] */ + { /* [474] */ #if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x204u) /*pFunc=*/ (void (*)(void)) NULL, #else @@ -7332,7 +7362,7 @@ /*func=*/ "subdeviceCtrlCmdFlcnUstreamerControlSet" #endif }, - { /* [473] */ + { /* [475] */ #if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x0u) /*pFunc=*/ (void (*)(void)) NULL, #else @@ -7347,7 +7377,7 @@ /*func=*/ "subdeviceCtrlCmdFlcnGetCtxBufferInfo" #endif }, - { /* [474] */ + { /* [476] */ #if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x10u) /*pFunc=*/ (void (*)(void)) NULL, #else @@ -7362,7 +7392,7 @@ /*func=*/ "subdeviceCtrlCmdFlcnGetCtxBufferSize" #endif }, - { /* [475] */ + { /* [477] */ #if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x210u) /*pFunc=*/ (void (*)(void)) NULL, #else @@ -7377,7 +7407,7 @@ /*func=*/ "subdeviceCtrlCmdEccGetClientExposedCounters" #endif }, - { /* [476] */ + { /* [478] */ #if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x210u) /*pFunc=*/ (void (*)(void)) NULL, #else @@ -7392,7 +7422,7 @@ /*func=*/ "subdeviceCtrlCmdEccGetEciCounters" #endif }, - { /* [477] */ + { /* [479] */ #if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x210u) /*pFunc=*/ (void (*)(void)) NULL, #else @@ -7407,7 +7437,7 @@ /*func=*/ "subdeviceCtrlCmdEccGetVolatileCounts" #endif }, - { /* [478] */ + { /* [480] */ #if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x810u) /*pFunc=*/ (void (*)(void)) NULL, #else @@ -7422,7 +7452,7 @@ /*func=*/ "subdeviceCtrlCmdFlaRange" #endif }, - { /* [479] */ + { /* [481] */ #if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x102204u) /*pFunc=*/ (void (*)(void)) NULL, #else @@ -7437,7 +7467,7 @@ /*func=*/ "subdeviceCtrlCmdFlaSetupInstanceMemBlock" #endif }, - { /* [480] */ + { /* [482] */ #if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x100004u) /*pFunc=*/ (void (*)(void)) NULL, #else @@ -7452,7 +7482,7 @@ /*func=*/ "subdeviceCtrlCmdFlaGetRange" #endif }, - { /* [481] */ + { /* [483] */ #if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x1810u) /*pFunc=*/ (void (*)(void)) NULL, #else @@ -7467,7 +7497,7 @@ /*func=*/ "subdeviceCtrlCmdFlaGetFabricMemStats" #endif }, - { /* [482] */ + { /* [484] */ #if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x4211u) /*pFunc=*/ (void (*)(void)) NULL, #else @@ -7482,7 +7512,7 @@ /*func=*/ "subdeviceCtrlCmdGspGetFeatures" #endif }, - { /* [483] */ + { /* [485] */ #if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x210u) /*pFunc=*/ (void (*)(void)) NULL, #else @@ -7497,7 +7527,7 @@ /*func=*/ "subdeviceCtrlCmdGspGetRmHeapStats" #endif }, - { /* [484] */ + { /* [486] */ #if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x2210u) /*pFunc=*/ (void (*)(void)) NULL, #else @@ -7512,7 +7542,7 @@ /*func=*/ "subdeviceCtrlCmdGrmgrGetGrFsInfo" #endif }, - { /* [485] */ + { /* [487] */ #if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x3u) /*pFunc=*/ (void (*)(void)) NULL, #else @@ -7527,7 +7557,7 @@ /*func=*/ "subdeviceCtrlCmdOsUnixGc6BlockerRefCnt" #endif }, - { /* [486] */ + { /* [488] */ #if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x11u) /*pFunc=*/ (void (*)(void)) NULL, #else @@ -7542,7 +7572,7 @@ /*func=*/ "subdeviceCtrlCmdOsUnixAllowDisallowGcoff" #endif }, - { /* [487] */ + { /* [489] */ #if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x1u) /*pFunc=*/ (void (*)(void)) NULL, #else @@ -7557,7 +7587,7 @@ /*func=*/ "subdeviceCtrlCmdOsUnixAudioDynamicPower" #endif }, - { /* [488] */ + { /* [490] */ #if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x13u) /*pFunc=*/ (void (*)(void)) NULL, #else @@ -7572,7 +7602,7 @@ /*func=*/ "subdeviceCtrlCmdOsUnixVidmemPersistenceStatus" #endif }, - { /* [489] */ + { /* [491] */ #if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x7u) /*pFunc=*/ (void (*)(void)) NULL, #else @@ -7587,7 +7617,7 @@ /*func=*/ "subdeviceCtrlCmdOsUnixUpdateTgpStatus" #endif }, - { /* [490] */ + { /* [492] */ #if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x600u) /*pFunc=*/ (void (*)(void)) NULL, #else @@ -7602,7 +7632,7 @@ /*func=*/ "subdeviceCtrlCmdVgpuMgrInternalBootloadGspVgpuPluginTask" #endif }, - { /* [491] */ + { /* [493] */ #if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x600u) /*pFunc=*/ (void (*)(void)) NULL, #else @@ -7617,7 +7647,7 @@ /*func=*/ "subdeviceCtrlCmdVgpuMgrInternalShutdownGspVgpuPluginTask" #endif }, - { /* [492] */ + { /* [494] */ #if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x600u) /*pFunc=*/ (void (*)(void)) NULL, #else @@ -7632,7 +7662,7 @@ /*func=*/ "subdeviceCtrlCmdVgpuMgrInternalPgpuAddVgpuType" #endif }, - { /* [493] */ + { /* [495] */ #if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x600u) /*pFunc=*/ (void (*)(void)) NULL, #else @@ -7647,7 +7677,7 @@ /*func=*/ "subdeviceCtrlCmdVgpuMgrInternalEnumerateVgpuPerPgpu" #endif }, - { /* [494] */ + { /* [496] */ #if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x600u) /*pFunc=*/ (void (*)(void)) NULL, #else @@ -7662,7 +7692,7 @@ /*func=*/ "subdeviceCtrlCmdVgpuMgrInternalClearGuestVmInfo" #endif }, - { /* [495] */ + { /* [497] */ #if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x600u) /*pFunc=*/ (void (*)(void)) NULL, #else @@ -7677,7 +7707,7 @@ /*func=*/ "subdeviceCtrlCmdVgpuMgrInternalGetVgpuFbUsage" #endif }, - { /* [496] */ + { /* [498] */ #if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x600u) /*pFunc=*/ (void (*)(void)) NULL, #else @@ -7692,7 +7722,7 @@ /*func=*/ "subdeviceCtrlCmdVgpuMgrInternalSetVgpuEncoderCapacity" #endif }, - { /* [497] */ + { /* [499] */ #if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x600u) /*pFunc=*/ (void (*)(void)) NULL, #else @@ -7707,7 +7737,7 @@ /*func=*/ "subdeviceCtrlCmdVgpuMgrInternalCleanupGspVgpuPluginResources" #endif }, - { /* [498] */ + { /* [500] */ #if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x600u) /*pFunc=*/ (void (*)(void)) NULL, #else @@ -7722,7 +7752,7 @@ /*func=*/ "subdeviceCtrlCmdVgpuMgrInternalGetPgpuFsEncoding" #endif }, - { /* [499] */ + { /* [501] */ #if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x600u) /*pFunc=*/ (void (*)(void)) NULL, #else @@ -7737,7 +7767,7 @@ /*func=*/ "subdeviceCtrlCmdVgpuMgrInternalGetPgpuMigrationSupport" #endif }, - { /* [500] */ + { /* [502] */ #if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x600u) /*pFunc=*/ (void (*)(void)) NULL, #else @@ -7752,7 +7782,7 @@ /*func=*/ "subdeviceCtrlCmdVgpuMgrInternalSetVgpuMgrConfig" #endif }, - { /* [501] */ + { /* [503] */ #if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0xa50u) /*pFunc=*/ (void (*)(void)) NULL, #else @@ -7767,7 +7797,7 @@ /*func=*/ "subdeviceCtrlCmdGetAvailableHshubMask" #endif }, - { /* [502] */ + { /* [504] */ #if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x210u) /*pFunc=*/ (void (*)(void)) NULL, #else @@ -7782,7 +7812,7 @@ /*func=*/ "subdeviceCtrlCmdPerfGetGpumonPerfmonUtilSamples" #endif }, - { /* [503] */ + { /* [505] */ #if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x600u) /*pFunc=*/ (void (*)(void)) NULL, #else @@ -7802,7 +7832,7 @@ const struct NVOC_EXPORT_INFO __nvoc_export_info_Subdevice = { - /*numEntries=*/ 504, + /*numEntries=*/ 506, /*pExportEntries=*/ __nvoc_exported_method_def_Subdevice }; @@ -8257,6 +8287,14 @@ pThis->__subdeviceCtrlCmdNvlinkGetL1Threshold__ = &subdeviceCtrlCmdNvlinkGetL1Threshold_IMPL; #endif +#if !NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x204u) + pThis->__subdeviceCtrlCmdNvlinkGetPortEvents__ = &subdeviceCtrlCmdNvlinkGetPortEvents_IMPL; +#endif + +#if !NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x204u) + pThis->__subdeviceCtrlCmdNvlinkCycleLink__ = &subdeviceCtrlCmdNvlinkCycleLink_IMPL; +#endif + #if !NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x200u) pThis->__subdeviceCtrlCmdIsNvlinkReducedConfig__ = &subdeviceCtrlCmdIsNvlinkReducedConfig_IMPL; #endif @@ -8891,14 +8929,6 @@ #if !NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x10u) pThis->__subdeviceCtrlCmdGpuQueryMode__ = &subdeviceCtrlCmdGpuQueryMode_IMPL; #endif - -#if !NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x4210u) - pThis->__subdeviceCtrlCmdGpuGetInforomImageVersion__ = &subdeviceCtrlCmdGpuGetInforomImageVersion_IMPL; -#endif - -#if !NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x210u) - pThis->__subdeviceCtrlCmdGpuGetInforomObjectVersion__ = &subdeviceCtrlCmdGpuGetInforomObjectVersion_IMPL; -#endif } static void __nvoc_init_funcTable_Subdevice_2(Subdevice *pThis, RmHalspecOwner *pRmhalspecowner) { @@ -8909,6 +8939,14 @@ PORT_UNREFERENCED_VARIABLE(rmVariantHal); PORT_UNREFERENCED_VARIABLE(rmVariantHal_HalVarIdx); +#if !NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x4210u) + pThis->__subdeviceCtrlCmdGpuGetInforomImageVersion__ = &subdeviceCtrlCmdGpuGetInforomImageVersion_IMPL; +#endif + +#if !NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x210u) + pThis->__subdeviceCtrlCmdGpuGetInforomObjectVersion__ = &subdeviceCtrlCmdGpuGetInforomObjectVersion_IMPL; +#endif + #if !NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x210u) pThis->__subdeviceCtrlCmdGpuQueryInforomEccSupport__ = &subdeviceCtrlCmdGpuQueryInforomEccSupport_IMPL; #endif @@ -9918,10 +9956,6 @@ pThis->__subdeviceControl__ = &__nvoc_thunk_GpuResource_subdeviceControl; pThis->__subdeviceUnmap__ = &__nvoc_thunk_GpuResource_subdeviceUnmap; - - pThis->__subdeviceGetMemInterMapParams__ = &__nvoc_thunk_RmResource_subdeviceGetMemInterMapParams; - - pThis->__subdeviceGetMemoryMappingDescriptor__ = &__nvoc_thunk_RmResource_subdeviceGetMemoryMappingDescriptor; } static void __nvoc_init_funcTable_Subdevice_3(Subdevice *pThis, RmHalspecOwner *pRmhalspecowner) { @@ -9932,6 +9966,10 @@ PORT_UNREFERENCED_VARIABLE(rmVariantHal); PORT_UNREFERENCED_VARIABLE(rmVariantHal_HalVarIdx); + pThis->__subdeviceGetMemInterMapParams__ = &__nvoc_thunk_RmResource_subdeviceGetMemInterMapParams; + + pThis->__subdeviceGetMemoryMappingDescriptor__ = &__nvoc_thunk_RmResource_subdeviceGetMemoryMappingDescriptor; + pThis->__subdeviceUnregisterEvent__ = &__nvoc_thunk_Notifier_subdeviceUnregisterEvent; pThis->__subdeviceControlSerialization_Prologue__ = &__nvoc_thunk_RmResource_subdeviceControlSerialization_Prologue; diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia/generated/g_subdevice_nvoc.h nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/generated/g_subdevice_nvoc.h --- nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia/generated/g_subdevice_nvoc.h 2024-05-12 19:37:02.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/generated/g_subdevice_nvoc.h 2024-09-17 16:55:21.000000000 +0000 @@ -218,6 +218,8 @@ NV_STATUS (*__subdeviceCtrlCmdNvlinkSetL1Threshold__)(struct Subdevice *, NV2080_CTRL_NVLINK_SET_L1_THRESHOLD_PARAMS *); NV_STATUS (*__subdeviceCtrlCmdNvlinkDirectConnectCheck__)(struct Subdevice *, NV2080_CTRL_NVLINK_DIRECT_CONNECT_CHECK_PARAMS *); NV_STATUS (*__subdeviceCtrlCmdNvlinkGetL1Threshold__)(struct Subdevice *, NV2080_CTRL_NVLINK_GET_L1_THRESHOLD_PARAMS *); + NV_STATUS (*__subdeviceCtrlCmdNvlinkGetPortEvents__)(struct Subdevice *, NV2080_CTRL_NVLINK_GET_PORT_EVENTS_PARAMS *); + NV_STATUS (*__subdeviceCtrlCmdNvlinkCycleLink__)(struct Subdevice *, NV2080_CTRL_NVLINK_CYCLE_LINK_PARAMS *); NV_STATUS (*__subdeviceCtrlCmdIsNvlinkReducedConfig__)(struct Subdevice *, NV2080_CTRL_NVLINK_IS_REDUCED_CONFIG_PARAMS *); NV_STATUS (*__subdeviceCtrlCmdI2cReadBuffer__)(struct Subdevice *, NV2080_CTRL_I2C_READ_BUFFER_PARAMS *); NV_STATUS (*__subdeviceCtrlCmdI2cWriteBuffer__)(struct Subdevice *, NV2080_CTRL_I2C_WRITE_BUFFER_PARAMS *); @@ -812,6 +814,8 @@ #define subdeviceCtrlCmdNvlinkSetL1Threshold(pSubdevice, pParams) subdeviceCtrlCmdNvlinkSetL1Threshold_DISPATCH(pSubdevice, pParams) #define subdeviceCtrlCmdNvlinkDirectConnectCheck(pSubdevice, pParams) subdeviceCtrlCmdNvlinkDirectConnectCheck_DISPATCH(pSubdevice, pParams) #define subdeviceCtrlCmdNvlinkGetL1Threshold(pSubdevice, pParams) subdeviceCtrlCmdNvlinkGetL1Threshold_DISPATCH(pSubdevice, pParams) +#define subdeviceCtrlCmdNvlinkGetPortEvents(pSubdevice, pParams) subdeviceCtrlCmdNvlinkGetPortEvents_DISPATCH(pSubdevice, pParams) +#define subdeviceCtrlCmdNvlinkCycleLink(pSubdevice, pParams) subdeviceCtrlCmdNvlinkCycleLink_DISPATCH(pSubdevice, pParams) #define subdeviceCtrlCmdIsNvlinkReducedConfig(pSubdevice, pParams) subdeviceCtrlCmdIsNvlinkReducedConfig_DISPATCH(pSubdevice, pParams) #define subdeviceCtrlCmdI2cReadBuffer(pSubdevice, pI2cParams) subdeviceCtrlCmdI2cReadBuffer_DISPATCH(pSubdevice, pI2cParams) #define subdeviceCtrlCmdI2cWriteBuffer(pSubdevice, pI2cParams) subdeviceCtrlCmdI2cWriteBuffer_DISPATCH(pSubdevice, pI2cParams) @@ -1864,6 +1868,18 @@ return pSubdevice->__subdeviceCtrlCmdNvlinkGetL1Threshold__(pSubdevice, pParams); } +NV_STATUS subdeviceCtrlCmdNvlinkGetPortEvents_IMPL(struct Subdevice *pSubdevice, NV2080_CTRL_NVLINK_GET_PORT_EVENTS_PARAMS *pParams); + +static inline NV_STATUS subdeviceCtrlCmdNvlinkGetPortEvents_DISPATCH(struct Subdevice *pSubdevice, NV2080_CTRL_NVLINK_GET_PORT_EVENTS_PARAMS *pParams) { + return pSubdevice->__subdeviceCtrlCmdNvlinkGetPortEvents__(pSubdevice, pParams); +} + +NV_STATUS subdeviceCtrlCmdNvlinkCycleLink_IMPL(struct Subdevice *pSubdevice, NV2080_CTRL_NVLINK_CYCLE_LINK_PARAMS *pParams); + +static inline NV_STATUS subdeviceCtrlCmdNvlinkCycleLink_DISPATCH(struct Subdevice *pSubdevice, NV2080_CTRL_NVLINK_CYCLE_LINK_PARAMS *pParams) { + return pSubdevice->__subdeviceCtrlCmdNvlinkCycleLink__(pSubdevice, pParams); +} + NV_STATUS subdeviceCtrlCmdIsNvlinkReducedConfig_IMPL(struct Subdevice *pSubdevice, NV2080_CTRL_NVLINK_IS_REDUCED_CONFIG_PARAMS *pParams); static inline NV_STATUS subdeviceCtrlCmdIsNvlinkReducedConfig_DISPATCH(struct Subdevice *pSubdevice, NV2080_CTRL_NVLINK_IS_REDUCED_CONFIG_PARAMS *pParams) { diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia/generated/g_vgpuconfigapi_nvoc.c nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/generated/g_vgpuconfigapi_nvoc.c --- nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia/generated/g_vgpuconfigapi_nvoc.c 2024-05-12 19:37:05.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/generated/g_vgpuconfigapi_nvoc.c 2024-09-17 16:55:26.000000000 +0000 @@ -331,21 +331,6 @@ #if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x10u) /*pFunc=*/ (void (*)(void)) NULL, #else - /*pFunc=*/ (void (*)(void)) vgpuconfigapiCtrlCmdVgpuConfigNotifyStart_IMPL, -#endif // NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x10u) - /*flags=*/ 0x10u, - /*accessRight=*/0x0u, - /*methodId=*/ 0xa0810107u, - /*paramSize=*/ sizeof(NVA081_CTRL_VGPU_CONFIG_NOTIFY_START_PARAMS), - /*pClassInfo=*/ &(__nvoc_class_def_VgpuConfigApi.classInfo), -#if NV_PRINTF_STRINGS_ALLOWED - /*func=*/ "vgpuconfigapiCtrlCmdVgpuConfigNotifyStart" -#endif - }, - { /* [7] */ -#if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x10u) - /*pFunc=*/ (void (*)(void)) NULL, -#else /*pFunc=*/ (void (*)(void)) vgpuconfigapiCtrlCmdVgpuConfigMdevRegister_IMPL, #endif // NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x10u) /*flags=*/ 0x10u, @@ -357,7 +342,7 @@ /*func=*/ "vgpuconfigapiCtrlCmdVgpuConfigMdevRegister" #endif }, - { /* [8] */ + { /* [7] */ #if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x10u) /*pFunc=*/ (void (*)(void)) NULL, #else @@ -372,7 +357,7 @@ /*func=*/ "vgpuconfigapiCtrlCmdVgpuConfigSetVgpuInstanceEncoderCapacity" #endif }, - { /* [9] */ + { /* [8] */ #if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x10u) /*pFunc=*/ (void (*)(void)) NULL, #else @@ -387,7 +372,7 @@ /*func=*/ "vgpuconfigapiCtrlCmdVgpuConfigGetVgpuFbUsage" #endif }, - { /* [10] */ + { /* [9] */ #if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x10u) /*pFunc=*/ (void (*)(void)) NULL, #else @@ -402,7 +387,7 @@ /*func=*/ "vgpuconfigapiCtrlCmdVgpuConfigGetMigrationCap" #endif }, - { /* [11] */ + { /* [10] */ #if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x10u) /*pFunc=*/ (void (*)(void)) NULL, #else @@ -417,7 +402,7 @@ /*func=*/ "vgpuconfigapiCtrlCmdVgpuConfigGetHostFbReservation" #endif }, - { /* [12] */ + { /* [11] */ #if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x10u) /*pFunc=*/ (void (*)(void)) NULL, #else @@ -432,7 +417,7 @@ /*func=*/ "vgpuconfigapiCtrlCmdVgpuConfigGetPgpuMetadataString" #endif }, - { /* [13] */ + { /* [12] */ #if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x10u) /*pFunc=*/ (void (*)(void)) NULL, #else @@ -447,7 +432,7 @@ /*func=*/ "vgpuconfigapiCtrlCmdVgpuConfigGetDoorbellEmulationSupport" #endif }, - { /* [14] */ + { /* [13] */ #if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x4u) /*pFunc=*/ (void (*)(void)) NULL, #else @@ -462,7 +447,7 @@ /*func=*/ "vgpuconfigapiCtrlCmdVgpuConfigGetFreeSwizzId" #endif }, - { /* [15] */ + { /* [14] */ #if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x10u) /*pFunc=*/ (void (*)(void)) NULL, #else @@ -477,7 +462,7 @@ /*func=*/ "vgpuconfigapiCtrlCmdPgpuGetMultiVgpuSupportInfo" #endif }, - { /* [16] */ + { /* [15] */ #if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x10u) /*pFunc=*/ (void (*)(void)) NULL, #else @@ -492,7 +477,7 @@ /*func=*/ "vgpuconfigapiCtrlCmdGetVgpuDriversCaps" #endif }, - { /* [17] */ + { /* [16] */ #if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x4u) /*pFunc=*/ (void (*)(void)) NULL, #else @@ -507,7 +492,7 @@ /*func=*/ "vgpuconfigapiCtrlCmdVgpuConfigSetPgpuInfo" #endif }, - { /* [18] */ + { /* [17] */ #if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x4u) /*pFunc=*/ (void (*)(void)) NULL, #else @@ -522,6 +507,21 @@ /*func=*/ "vgpuconfigapiCtrlCmdVgpuConfigValidateSwizzId" #endif }, + { /* [18] */ +#if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x10u) + /*pFunc=*/ (void (*)(void)) NULL, +#else + /*pFunc=*/ (void (*)(void)) vgpuconfigapiCtrlCmdVgpuSetVmName_IMPL, +#endif // NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x10u) + /*flags=*/ 0x10u, + /*accessRight=*/0x0u, + /*methodId=*/ 0xa0810120u, + /*paramSize=*/ sizeof(NVA081_CTRL_VGPU_SET_VM_NAME_PARAMS), + /*pClassInfo=*/ &(__nvoc_class_def_VgpuConfigApi.classInfo), +#if NV_PRINTF_STRINGS_ALLOWED + /*func=*/ "vgpuconfigapiCtrlCmdVgpuSetVmName" +#endif + }, }; @@ -596,10 +596,6 @@ #endif #if !NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x10u) - pThis->__vgpuconfigapiCtrlCmdVgpuConfigNotifyStart__ = &vgpuconfigapiCtrlCmdVgpuConfigNotifyStart_IMPL; -#endif - -#if !NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x10u) pThis->__vgpuconfigapiCtrlCmdVgpuConfigMdevRegister__ = &vgpuconfigapiCtrlCmdVgpuConfigMdevRegister_IMPL; #endif @@ -647,6 +643,10 @@ pThis->__vgpuconfigapiCtrlCmdVgpuConfigValidateSwizzId__ = &vgpuconfigapiCtrlCmdVgpuConfigValidateSwizzId_IMPL; #endif +#if !NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x10u) + pThis->__vgpuconfigapiCtrlCmdVgpuSetVmName__ = &vgpuconfigapiCtrlCmdVgpuSetVmName_IMPL; +#endif + pThis->__vgpuconfigapiShareCallback__ = &__nvoc_thunk_GpuResource_vgpuconfigapiShareCallback; pThis->__vgpuconfigapiCheckMemInterUnmap__ = &__nvoc_thunk_RmResource_vgpuconfigapiCheckMemInterUnmap; diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia/generated/g_vgpuconfigapi_nvoc.h nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/generated/g_vgpuconfigapi_nvoc.h --- nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia/generated/g_vgpuconfigapi_nvoc.h 2024-05-12 19:37:05.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/generated/g_vgpuconfigapi_nvoc.h 2024-09-17 16:55:26.000000000 +0000 @@ -68,7 +68,6 @@ NV_STATUS (*__vgpuconfigapiCtrlCmdVgpuConfigGetSupportedVgpuTypes__)(struct VgpuConfigApi *, NVA081_CTRL_VGPU_CONFIG_GET_VGPU_TYPES_PARAMS *); NV_STATUS (*__vgpuconfigapiCtrlCmdVgpuConfigGetCreatableVgpuTypes__)(struct VgpuConfigApi *, NVA081_CTRL_VGPU_CONFIG_GET_VGPU_TYPES_PARAMS *); NV_STATUS (*__vgpuconfigapiCtrlCmdVgpuConfigEventSetNotification__)(struct VgpuConfigApi *, NVA081_CTRL_VGPU_CONFIG_EVENT_SET_NOTIFICATION_PARAMS *); - NV_STATUS (*__vgpuconfigapiCtrlCmdVgpuConfigNotifyStart__)(struct VgpuConfigApi *, NVA081_CTRL_VGPU_CONFIG_NOTIFY_START_PARAMS *); NV_STATUS (*__vgpuconfigapiCtrlCmdVgpuConfigMdevRegister__)(struct VgpuConfigApi *); NV_STATUS (*__vgpuconfigapiCtrlCmdVgpuConfigSetVgpuInstanceEncoderCapacity__)(struct VgpuConfigApi *, NVA081_CTRL_VGPU_CONFIG_VGPU_INSTANCE_ENCODER_CAPACITY_PARAMS *); NV_STATUS (*__vgpuconfigapiCtrlCmdVgpuConfigGetVgpuFbUsage__)(struct VgpuConfigApi *, NVA081_CTRL_VGPU_CONFIG_GET_VGPU_FB_USAGE_PARAMS *); @@ -81,6 +80,7 @@ NV_STATUS (*__vgpuconfigapiCtrlCmdGetVgpuDriversCaps__)(struct VgpuConfigApi *, NVA081_CTRL_GET_VGPU_DRIVER_CAPS_PARAMS *); NV_STATUS (*__vgpuconfigapiCtrlCmdVgpuConfigSetPgpuInfo__)(struct VgpuConfigApi *, NVA081_CTRL_VGPU_CONFIG_SET_PGPU_INFO_PARAMS *); NV_STATUS (*__vgpuconfigapiCtrlCmdVgpuConfigValidateSwizzId__)(struct VgpuConfigApi *, NVA081_CTRL_VGPU_CONFIG_VALIDATE_SWIZZID_PARAMS *); + NV_STATUS (*__vgpuconfigapiCtrlCmdVgpuSetVmName__)(struct VgpuConfigApi *, NVA081_CTRL_VGPU_SET_VM_NAME_PARAMS *); NvBool (*__vgpuconfigapiShareCallback__)(struct VgpuConfigApi *, struct RsClient *, struct RsResourceRef *, RS_SHARE_POLICY *); NV_STATUS (*__vgpuconfigapiCheckMemInterUnmap__)(struct VgpuConfigApi *, NvBool); NV_STATUS (*__vgpuconfigapiGetOrAllocNotifShare__)(struct VgpuConfigApi *, NvHandle, NvHandle, struct NotifShare **); @@ -148,7 +148,6 @@ #define vgpuconfigapiCtrlCmdVgpuConfigGetSupportedVgpuTypes(pVgpuConfigApi, pParams) vgpuconfigapiCtrlCmdVgpuConfigGetSupportedVgpuTypes_DISPATCH(pVgpuConfigApi, pParams) #define vgpuconfigapiCtrlCmdVgpuConfigGetCreatableVgpuTypes(pVgpuConfigApi, pParams) vgpuconfigapiCtrlCmdVgpuConfigGetCreatableVgpuTypes_DISPATCH(pVgpuConfigApi, pParams) #define vgpuconfigapiCtrlCmdVgpuConfigEventSetNotification(pVgpuConfigApi, pSetEventParams) vgpuconfigapiCtrlCmdVgpuConfigEventSetNotification_DISPATCH(pVgpuConfigApi, pSetEventParams) -#define vgpuconfigapiCtrlCmdVgpuConfigNotifyStart(pVgpuConfigApi, pNotifyParams) vgpuconfigapiCtrlCmdVgpuConfigNotifyStart_DISPATCH(pVgpuConfigApi, pNotifyParams) #define vgpuconfigapiCtrlCmdVgpuConfigMdevRegister(pVgpuConfigApi) vgpuconfigapiCtrlCmdVgpuConfigMdevRegister_DISPATCH(pVgpuConfigApi) #define vgpuconfigapiCtrlCmdVgpuConfigSetVgpuInstanceEncoderCapacity(pVgpuConfigApi, pEncoderParams) vgpuconfigapiCtrlCmdVgpuConfigSetVgpuInstanceEncoderCapacity_DISPATCH(pVgpuConfigApi, pEncoderParams) #define vgpuconfigapiCtrlCmdVgpuConfigGetVgpuFbUsage(pVgpuConfigApi, pParams) vgpuconfigapiCtrlCmdVgpuConfigGetVgpuFbUsage_DISPATCH(pVgpuConfigApi, pParams) @@ -161,6 +160,7 @@ #define vgpuconfigapiCtrlCmdGetVgpuDriversCaps(pVgpuConfigApi, pParams) vgpuconfigapiCtrlCmdGetVgpuDriversCaps_DISPATCH(pVgpuConfigApi, pParams) #define vgpuconfigapiCtrlCmdVgpuConfigSetPgpuInfo(pVgpuConfigApi, pParams) vgpuconfigapiCtrlCmdVgpuConfigSetPgpuInfo_DISPATCH(pVgpuConfigApi, pParams) #define vgpuconfigapiCtrlCmdVgpuConfigValidateSwizzId(pVgpuConfigApi, pParams) vgpuconfigapiCtrlCmdVgpuConfigValidateSwizzId_DISPATCH(pVgpuConfigApi, pParams) +#define vgpuconfigapiCtrlCmdVgpuSetVmName(pVgpuConfigApi, pParams) vgpuconfigapiCtrlCmdVgpuSetVmName_DISPATCH(pVgpuConfigApi, pParams) #define vgpuconfigapiShareCallback(pGpuResource, pInvokingClient, pParentRef, pSharePolicy) vgpuconfigapiShareCallback_DISPATCH(pGpuResource, pInvokingClient, pParentRef, pSharePolicy) #define vgpuconfigapiCheckMemInterUnmap(pRmResource, bSubdeviceHandleProvided) vgpuconfigapiCheckMemInterUnmap_DISPATCH(pRmResource, bSubdeviceHandleProvided) #define vgpuconfigapiGetOrAllocNotifShare(pNotifier, hNotifierClient, hNotifierResource, ppNotifShare) vgpuconfigapiGetOrAllocNotifShare_DISPATCH(pNotifier, hNotifierClient, hNotifierResource, ppNotifShare) @@ -227,12 +227,6 @@ return pVgpuConfigApi->__vgpuconfigapiCtrlCmdVgpuConfigEventSetNotification__(pVgpuConfigApi, pSetEventParams); } -NV_STATUS vgpuconfigapiCtrlCmdVgpuConfigNotifyStart_IMPL(struct VgpuConfigApi *pVgpuConfigApi, NVA081_CTRL_VGPU_CONFIG_NOTIFY_START_PARAMS *pNotifyParams); - -static inline NV_STATUS vgpuconfigapiCtrlCmdVgpuConfigNotifyStart_DISPATCH(struct VgpuConfigApi *pVgpuConfigApi, NVA081_CTRL_VGPU_CONFIG_NOTIFY_START_PARAMS *pNotifyParams) { - return pVgpuConfigApi->__vgpuconfigapiCtrlCmdVgpuConfigNotifyStart__(pVgpuConfigApi, pNotifyParams); -} - NV_STATUS vgpuconfigapiCtrlCmdVgpuConfigMdevRegister_IMPL(struct VgpuConfigApi *pVgpuConfigApi); static inline NV_STATUS vgpuconfigapiCtrlCmdVgpuConfigMdevRegister_DISPATCH(struct VgpuConfigApi *pVgpuConfigApi) { @@ -305,6 +299,12 @@ return pVgpuConfigApi->__vgpuconfigapiCtrlCmdVgpuConfigValidateSwizzId__(pVgpuConfigApi, pParams); } +NV_STATUS vgpuconfigapiCtrlCmdVgpuSetVmName_IMPL(struct VgpuConfigApi *pVgpuConfigApi, NVA081_CTRL_VGPU_SET_VM_NAME_PARAMS *pParams); + +static inline NV_STATUS vgpuconfigapiCtrlCmdVgpuSetVmName_DISPATCH(struct VgpuConfigApi *pVgpuConfigApi, NVA081_CTRL_VGPU_SET_VM_NAME_PARAMS *pParams) { + return pVgpuConfigApi->__vgpuconfigapiCtrlCmdVgpuSetVmName__(pVgpuConfigApi, pParams); +} + static inline NvBool vgpuconfigapiShareCallback_DISPATCH(struct VgpuConfigApi *pGpuResource, struct RsClient *pInvokingClient, struct RsResourceRef *pParentRef, RS_SHARE_POLICY *pSharePolicy) { return pGpuResource->__vgpuconfigapiShareCallback__(pGpuResource, pInvokingClient, pParentRef, pSharePolicy); } diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia/inc/kernel/gpu/falcon/falcon_common.h nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/inc/kernel/gpu/falcon/falcon_common.h --- nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia/inc/kernel/gpu/falcon/falcon_common.h 2024-05-12 19:30:13.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/inc/kernel/gpu/falcon/falcon_common.h 2024-09-17 16:45:55.000000000 +0000 @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2021-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining a @@ -63,4 +63,12 @@ */ #define FLCN_RESET_PROPAGATION_DELAY_COUNT 10 +/*! + * Used by FALCON_DMATRFCMD polling functions to wait for _FULL==FALSE or _IDLE==TRUE + */ +typedef enum { + FLCN_DMA_POLL_QUEUE_NOT_FULL = 0, + FLCN_DMA_POLL_ENGINE_IDLE = 1 +} FlcnDmaPollMode; + #endif // FALCON_COMMON_H diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia/inc/libraries/utils/nvprintf.h nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/inc/libraries/utils/nvprintf.h --- nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia/inc/libraries/utils/nvprintf.h 2024-05-12 19:30:28.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/inc/libraries/utils/nvprintf.h 2024-09-17 16:46:14.000000000 +0000 @@ -405,7 +405,7 @@ // In MODS builds, we allow all printfs, but don't automatically include the // __FILE__ or __FUNCTION__ references. // -#if NV_PRINTF_STRINGS_ALLOWED && (!defined(NV_MODS) || defined(SIM_BUILD) || defined(DEBUG) || defined(NV_MODS_INTERNAL)) +#if NV_PRINTF_STRINGS_ALLOWED && (!defined(NV_MODS) || defined(SIM_BUILD) || defined(DEBUG) || defined(DEVELOP) || defined(NV_MODS_INTERNAL)) #define NV_FILE_STR __FILE__ #define NV_FILE __FILE__ #define NV_FILE_FMT "%s" diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia/interface/nvrm_registry.h nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/interface/nvrm_registry.h --- nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia/interface/nvrm_registry.h 2024-05-12 19:30:30.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/interface/nvrm_registry.h 2024-09-17 16:46:16.000000000 +0000 @@ -1213,6 +1213,19 @@ #define NV_REG_STR_RM_WATCHDOG_INTERVAL_HI 0x0000000C #define NV_REG_STR_RM_WATCHDOG_INTERVAL_DEFAULT NV_REG_STR_RM_WATCHDOG_INTERVAL_LOW +// Enable/Disable watchcat in GSP-Plugin for Guest RPC +// Default is Enabled +#define NV_REG_STR_RM_GSP_VGPU_WATCHCAT "RmEnableGspPluginWatchcat" +#define NV_REG_STR_RM_GSP_VGPU_WATCHCAT_ENABLE 0x00000001 +#define NV_REG_STR_RM_GSP_VGPU_WATCHCAT_DISABLE 0x00000000 +#define NV_REG_STR_RM_GSP_VGPU_WATCHCAT_DEFAULT NV_REG_STR_RM_GSP_VGPU_WATCHCAT_ENABLE + +// Set watchcat timeout value in GSP-Plugin for Guest RPC +// Default is 10 seconds +#define NV_REG_STR_RM_GSP_VGPU_WATCHCAT_TIMEOUT "RmGspPluginWatchcatTimeOut" +#define NV_REG_STR_RM_GSP_VGPU_WATCHCAT_TIMEOUT_MIN 0x0000000A +#define NV_REG_STR_RM_GSP_VGPU_WATCHCAT_TIMEOUT_DEFAULT NV_REG_STR_RM_GSP_VGPU_WATCHCAT_TIMEOUT_MIN + #define NV_REG_STR_RM_DO_LOG_RC_EVENTS "RmLogonRC" // Type Dword // Encoding : 0 --> Skip Logging @@ -1966,4 +1979,3 @@ #define NV_REG_STR_RM_FORCE_GR_SCRUBBER_CHANNEL_DISABLE 0x00000000 #define NV_REG_STR_RM_FORCE_GR_SCRUBBER_CHANNEL_ENABLE 0x00000001 #endif // NVRM_REGISTRY_H - diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia/kernel/inc/nvpcf.h nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/kernel/inc/nvpcf.h --- nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia/kernel/inc/nvpcf.h 2024-05-12 19:30:30.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/kernel/inc/nvpcf.h 2024-09-17 16:46:17.000000000 +0000 @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2020-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining a @@ -82,6 +82,7 @@ #define NVPCF_CONTROLLER_STATIC_TABLE_VERSION_21 (0x21) #define NVPCF_CONTROLLER_STATIC_TABLE_VERSION_22 (0x22) #define NVPCF_CONTROLLER_STATIC_TABLE_VERSION_23 (0x23) +#define NVPCF_CONTROLLER_STATIC_TABLE_VERSION_24 (0x24) // format for 2.0 and 2.1 #define NVPCF_CONTROLLER_STATIC_TABLE_HEADER_V20_SIZE_05 (0x05U) diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia/src/kernel/gpu/fifo/kernel_channel_group_api.c nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/src/kernel/gpu/fifo/kernel_channel_group_api.c --- nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia/src/kernel/gpu/fifo/kernel_channel_group_api.c 2024-05-12 19:30:46.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/src/kernel/gpu/fifo/kernel_channel_group_api.c 2024-09-17 16:46:33.000000000 +0000 @@ -548,11 +548,18 @@ if (bLockAcquired) rmGpuLocksRelease(GPUS_LOCK_FLAGS_NONE, NULL); - if (bReserveMem) + if ((rmStatus == NV_OK) && bReserveMem) { // GPU lock should not be held when reserving memory for ctxBufPool - NV_ASSERT_OK_OR_CAPTURE_FIRST_ERROR(rmStatus, + NV_CHECK_OK(rmStatus, LEVEL_ERROR, ctxBufPoolReserve(pGpu, pKernelChannelGroup->pCtxBufPool, bufInfoList, bufCount)); + if (rmStatus != NV_OK) + { + // Acquire the lock again for the cleanup path + NV_ASSERT_OK_OR_RETURN(rmGpuLocksAcquire(GPUS_LOCK_FLAGS_NONE, RM_LOCK_MODULES_FIFO)); + bLockAcquired = NV_TRUE; + goto failed; + } } portMemFree(bufInfoList); diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia/src/kernel/gpu/fifo/kernel_fifo.c nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/src/kernel/gpu/fifo/kernel_fifo.c --- nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia/src/kernel/gpu/fifo/kernel_fifo.c 2024-05-12 19:30:46.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/src/kernel/gpu/fifo/kernel_fifo.c 2024-09-17 16:46:33.000000000 +0000 @@ -1675,14 +1675,10 @@ ) { portMemSet(pIt, 0, sizeof(*pIt)); - pIt->physicalChannelID = 0; - pIt->pFifoDataBlock = NULL; - pIt->runlistId = 0; - pIt->numRunlists = 1; - if (kfifoIsPerRunlistChramEnabled(pKernelFifo)) - { - pIt->numRunlists = kfifoGetMaxNumRunlists_HAL(pGpu, pKernelFifo); - } + pIt->runlistId = 0; + + // Resulting iterator will iterate over constructed CHID_MGRs only + pIt->numRunlists = pKernelFifo->numChidMgrs; } /** diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia/src/kernel/gpu/gpu_suspend.c nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/src/kernel/gpu/gpu_suspend.c --- nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia/src/kernel/gpu/gpu_suspend.c 2024-05-12 19:30:49.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/src/kernel/gpu/gpu_suspend.c 2024-09-17 16:46:36.000000000 +0000 @@ -31,6 +31,7 @@ #include "platform/platform.h" #include "platform/chipset/chipset.h" +#include "kernel/gpu/gr/kernel_graphics.h" #include "gpu/mem_mgr/mem_mgr.h" #include "gpu/mem_mgr/fbsr.h" #include "gpu/gsp/gsp_init_args.h" @@ -350,6 +351,13 @@ NV_PRINTF(LEVEL_NOTICE, "Ending resume from %s\n", IS_GPU_GC6_STATE_EXITING(pGpu) ? "GC6" : "APM Suspend"); } + if (resumeStatus == NV_OK) + { + if (kgraphicsIsBug4208224WARNeeded_HAL(pGpu, GPU_GET_KERNEL_GRAPHICS(pGpu, 0))) + { + return kgraphicsInitializeBug4208224WAR_HAL(pGpu, GPU_GET_KERNEL_GRAPHICS(pGpu, 0)); + } + } return resumeStatus; } @@ -413,6 +421,13 @@ { NV_PRINTF(LEVEL_NOTICE, "End resuming from APM Suspend\n"); } + if (resumeStatus == NV_OK) + { + if (kgraphicsIsBug4208224WARNeeded_HAL(pGpu, GPU_GET_KERNEL_GRAPHICS(pGpu, 0))) + { + return kgraphicsInitializeBug4208224WAR_HAL(pGpu, GPU_GET_KERNEL_GRAPHICS(pGpu, 0)); + } + } return resumeStatus; } diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia/src/kernel/gpu/gr/arch/turing/kgraphics_tu102.c nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/src/kernel/gpu/gr/arch/turing/kgraphics_tu102.c --- nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia/src/kernel/gpu/gr/arch/turing/kgraphics_tu102.c 2024-05-12 19:30:50.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/src/kernel/gpu/gr/arch/turing/kgraphics_tu102.c 2024-09-17 16:46:37.000000000 +0000 @@ -196,6 +196,11 @@ RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); NV2080_CTRL_INTERNAL_KGR_INIT_BUG4208224_WAR_PARAMS params = {0}; + if (pKernelGraphics->bug4208224Info.bConstructed) + { + return NV_OK; + } + NV_CHECK_OK_OR_RETURN(LEVEL_ERROR, kgraphicsCreateBug4208224Channel_HAL(pGpu, pKernelGraphics)); diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia/src/kernel/gpu/gr/kernel_graphics.c nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/src/kernel/gpu/gr/kernel_graphics.c --- nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia/src/kernel/gpu/gr/kernel_graphics.c 2024-05-12 19:30:50.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/src/kernel/gpu/gr/kernel_graphics.c 2024-09-17 16:46:37.000000000 +0000 @@ -506,7 +506,7 @@ } NV_CHECK_OK_OR_RETURN(LEVEL_ERROR, kgraphicsCreateGoldenImageChannel(pGpu, pKernelGraphics)); - if (kgraphicsIsBug4208224WARNeeded_HAL(pGpu, pKernelGraphics)) + if (kgraphicsIsBug4208224WARNeeded_HAL(pGpu, pKernelGraphics) && !pGpu->getProperty(pGpu, PDB_PROP_GPU_IN_PM_RESUME_CODEPATH)) { return kgraphicsInitializeBug4208224WAR_HAL(pGpu, pKernelGraphics); } diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia/src/kernel/gpu/gsp/arch/ampere/kernel_gsp_falcon_ga102.c nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/src/kernel/gpu/gsp/arch/ampere/kernel_gsp_falcon_ga102.c --- nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia/src/kernel/gpu/gsp/arch/ampere/kernel_gsp_falcon_ga102.c 2024-05-12 19:30:51.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/src/kernel/gpu/gsp/arch/ampere/kernel_gsp_falcon_ga102.c 2024-09-17 16:46:38.000000000 +0000 @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2021-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining a @@ -35,6 +35,67 @@ #include "published/ampere/ga102/dev_falcon_second_pri.h" #include "published/ampere/ga102/dev_fbif_v4.h" +static GpuWaitConditionFunc s_dmaPollCondFunc; + +typedef struct { + KernelFalcon *pKernelFlcn; + NvU32 pollMask; + NvU32 pollValue; +} DmaPollCondData; + +static NvBool +s_dmaPollCondFunc +( + OBJGPU *pGpu, + void *pVoid +) +{ + DmaPollCondData *pData = (DmaPollCondData *)pVoid; + return ((kflcnRegRead_HAL(pGpu, pData->pKernelFlcn, NV_PFALCON_FALCON_DMATRFCMD) & pData->pollMask) == pData->pollValue); +} + +/*! + * Poll on either _FULL or _IDLE field of NV_PFALCON_FALCON_DMATRFCMD + * + * @param[in] pGpu GPU object pointer + * @param[in] pKernelFlcn pKernelFlcn object pointer + * @param[in] mode FLCN_DMA_POLL_QUEUE_NOT_FULL for poll on _FULL; return when _FULL is false + * FLCN_DMA_POLL_ENGINE_IDLE for poll on _IDLE; return when _IDLE is true + */ +static NV_STATUS +s_dmaPoll_GA102 +( + OBJGPU *pGpu, + KernelFalcon *pKernelFlcn, + FlcnDmaPollMode mode +) +{ + NV_STATUS status; + DmaPollCondData data; + + data.pKernelFlcn = pKernelFlcn; + if (mode == FLCN_DMA_POLL_QUEUE_NOT_FULL) + { + data.pollMask = DRF_SHIFTMASK(NV_PFALCON_FALCON_DMATRFCMD_FULL); + data.pollValue = DRF_DEF(_PFALCON, _FALCON_DMATRFCMD, _FULL, _FALSE); + } + else + { + data.pollMask = DRF_SHIFTMASK(NV_PFALCON_FALCON_DMATRFCMD_IDLE); + data.pollValue = DRF_DEF(_PFALCON, _FALCON_DMATRFCMD, _IDLE, _TRUE); + } + + status = gpuTimeoutCondWait(pGpu, s_dmaPollCondFunc, &data, NULL); + if (status != NV_OK) + { + NV_PRINTF(LEVEL_ERROR, "Error while waiting for Falcon DMA; mode: %d, status: 0x%08x\n", mode, status); + DBG_BREAKPOINT(); + return status; + } + + return NV_OK; +} + static NV_STATUS s_dmaTransfer_GA102 ( @@ -48,15 +109,20 @@ ) { NV_STATUS status = NV_OK; - RMTIMEOUT timeout; NvU32 data; NvU32 bytesXfered = 0; + // Ensure request queue initially has space or writing base registers will corrupt DMA transfer. + NV_CHECK_OK_OR_RETURN(LEVEL_SILENT, s_dmaPoll_GA102(pGpu, pKernelFlcn, FLCN_DMA_POLL_QUEUE_NOT_FULL)); + kflcnRegWrite_HAL(pGpu, pKernelFlcn, NV_PFALCON_FALCON_DMATRFBASE, NvU64_LO32(srcPhysAddr >> 8)); kflcnRegWrite_HAL(pGpu, pKernelFlcn, NV_PFALCON_FALCON_DMATRFBASE1, NvU64_HI32(srcPhysAddr >> 8) & 0x1FF); while (bytesXfered < sizeInBytes) { + // Poll for non-full request queue as writing control registers when full will corrupt DMA transfer. + NV_CHECK_OK_OR_RETURN(LEVEL_SILENT, s_dmaPoll_GA102(pGpu, pKernelFlcn, FLCN_DMA_POLL_QUEUE_NOT_FULL)); + data = FLD_SET_DRF_NUM(_PFALCON, _FALCON_DMATRFMOFFS, _OFFS, dest, 0); kflcnRegWrite_HAL(pGpu, pKernelFlcn, NV_PFALCON_FALCON_DMATRFMOFFS, data); @@ -66,28 +132,17 @@ // Write the command kflcnRegWrite_HAL(pGpu, pKernelFlcn, NV_PFALCON_FALCON_DMATRFCMD, dmaCmd); - // Poll for completion - data = kflcnRegRead_HAL(pGpu, pKernelFlcn, NV_PFALCON_FALCON_DMATRFCMD); - - gpuSetTimeout(pGpu, GPU_TIMEOUT_DEFAULT, &timeout, 0); - while(FLD_TEST_DRF(_PFALCON_FALCON, _DMATRFCMD, _IDLE, _FALSE, data)) - { - status = gpuCheckTimeout(pGpu, &timeout); - if (status == NV_ERR_TIMEOUT) - { - NV_PRINTF(LEVEL_ERROR, "Timeout waiting for Falcon DMA to finish\n"); - DBG_BREAKPOINT(); - return status; - } - osSpinLoop(); - data = kflcnRegRead_HAL(pGpu, pKernelFlcn, NV_PFALCON_FALCON_DMATRFCMD); - } - bytesXfered += FLCN_BLK_ALIGNMENT; dest += FLCN_BLK_ALIGNMENT; memOff += FLCN_BLK_ALIGNMENT; } + // + // Poll for completion. GA10x+ does not have TCM tagging so DMA operations to/from TCM should + // wait for DMA to complete before launching another operation to avoid memory ordering problems. + // + NV_CHECK_OK_OR_RETURN(LEVEL_SILENT, s_dmaPoll_GA102(pGpu, pKernelFlcn, FLCN_DMA_POLL_ENGINE_IDLE)); + return status; } diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia/src/kernel/gpu/gsp/arch/turing/kernel_gsp_tu102.c nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/src/kernel/gpu/gsp/arch/turing/kernel_gsp_tu102.c --- nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia/src/kernel/gpu/gsp/arch/turing/kernel_gsp_tu102.c 2024-05-12 19:30:52.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/src/kernel/gpu/gsp/arch/turing/kernel_gsp_tu102.c 2024-09-17 16:46:38.000000000 +0000 @@ -867,7 +867,7 @@ if (bFirstFatal) { - kgspRcAndNotifyAllUserChannels(pGpu, pKernelGsp, GSP_ERROR); + kgspRcAndNotifyAllChannels(pGpu, pKernelGsp, GSP_ERROR, NV_TRUE); } gpuCheckEccCounts_HAL(pGpu); diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia/src/kernel/gpu/gsp/kernel_gsp.c nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/src/kernel/gpu/gsp/kernel_gsp.c --- nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia/src/kernel/gpu/gsp/kernel_gsp.c 2024-05-12 19:30:52.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/src/kernel/gpu/gsp/kernel_gsp.c 2024-09-17 16:46:39.000000000 +0000 @@ -295,13 +295,17 @@ NvU32 historyIndex; NvU32 historyEntry; + // Complete the current entry (it should be active) + // TODO: assert that ts_end == 0 here when continuation record timestamps are fixed + NV_ASSERT_OR_RETURN_VOID(pHistory[current].ts_start != 0); + pHistory[current].ts_end = osGetTimestamp(); // // Complete any previous entries that aren't marked complete yet, using the same timestamp // (we may not have explicitly waited for them) // - for (historyIndex = 0; historyIndex < RPC_HISTORY_DEPTH; historyIndex++) + for (historyIndex = 1; historyIndex < RPC_HISTORY_DEPTH; historyIndex++) { historyEntry = (current + RPC_HISTORY_DEPTH - historyIndex) % RPC_HISTORY_DEPTH; if (pHistory[historyEntry].ts_start != 0 && @@ -309,8 +313,8 @@ { pHistory[historyEntry].ts_end = pHistory[current].ts_end; } + } } -} /*! * GSP client RM RPC send routine @@ -472,7 +476,7 @@ RPC_PARAMS(rc_triggered, _v17_02); KernelRc *pKernelRc = GPU_GET_KERNEL_RC(pGpu); - KernelChannel *pKernelChannel; + KernelChannel *pKernelChannel = NULL; KernelFifo *pKernelFifo = GPU_GET_KERNEL_FIFO(pGpu); CHID_MGR *pChidMgr; NvU32 status = NV_OK; @@ -500,73 +504,18 @@ if (status != NV_OK) return status; - pKernelChannel = kfifoChidMgrGetKernelChannel(pGpu, pKernelFifo, - pChidMgr, - rpc_params->chid); - NV_CHECK_OR_RETURN(LEVEL_ERROR, - pKernelChannel != NULL, - NV_ERR_INVALID_CHANNEL); - - // Add the RcDiag records we received from GSP-RM to our system wide journal - { - OBJSYS *pSys = SYS_GET_INSTANCE(); - Journal *pRcDB = SYS_GET_RCDB(pSys); - RmClient *pClient; - - NvU32 recordSize = rcdbGetOcaRecordSizeWithHeader(pRcDB, RmRcDiagReport); - NvU32 rcDiagRecStart = pRcDB->RcErrRptNextIdx; - NvU32 rcDiagRecEnd; - NvU32 processId = 0; - NvU32 owner = RCDB_RCDIAG_DEFAULT_OWNER; - - pClient = dynamicCast(RES_GET_CLIENT(pKernelChannel), RmClient); - NV_ASSERT(pClient != NULL); - if (pClient != NULL) - processId = pClient->ProcID; - - for (NvU32 i = 0; i < rpc_params->rcJournalBufferSize / recordSize; i++) - { - RmRCCommonJournal_RECORD *pCommonRecord = - (RmRCCommonJournal_RECORD *)((NvU8*)&rpc_params->rcJournalBuffer + i * recordSize); - RmRcDiag_RECORD *pRcDiagRecord = - (RmRcDiag_RECORD *)&pCommonRecord[1]; - -#if defined(DEBUG) - NV_PRINTF(LEVEL_INFO, "%d: GPUTag=0x%x CPUTag=0x%llx timestamp=0x%llx stateMask=0x%llx\n", - i, pCommonRecord->GPUTag, pCommonRecord->CPUTag, pCommonRecord->timeStamp, - pCommonRecord->stateMask); - NV_PRINTF(LEVEL_INFO, " idx=%d timeStamp=0x%x type=0x%x flags=0x%x count=%d owner=0x%x processId=0x%x\n", - pRcDiagRecord->idx, pRcDiagRecord->timeStamp, pRcDiagRecord->type, pRcDiagRecord->flags, - pRcDiagRecord->count, pRcDiagRecord->owner, processId); - for (NvU32 j = 0; j < pRcDiagRecord->count; j++) - { - NV_PRINTF(LEVEL_INFO, " %d: offset=0x08%x tag=0x08%x value=0x08%x attribute=0x08%x\n", - j, pRcDiagRecord->data[j].offset, pRcDiagRecord->data[j].tag, - pRcDiagRecord->data[j].value, pRcDiagRecord->data[j].attribute); - } -#endif - if (rcdbAddRcDiagRecFromGsp(pGpu, pRcDB, pCommonRecord, pRcDiagRecord) == NULL) - { - NV_PRINTF(LEVEL_WARNING, "Lost RC diagnostic record coming from GPU%d GSP: type=0x%x stateMask=0x%llx\n", - gpuGetInstance(pGpu), pRcDiagRecord->type, pCommonRecord->stateMask); - } - } - - rcDiagRecEnd = pRcDB->RcErrRptNextIdx - 1; - - // Update records to have the correct PID associated with the channel - if (rcDiagRecStart != rcDiagRecEnd) - { - rcdbUpdateRcDiagRecContext(pRcDB, - rcDiagRecStart, - rcDiagRecEnd, - processId, - owner); - } + if (IS_GFID_PF(rpc_params->gfid)) + { + pKernelChannel = kfifoChidMgrGetKernelChannel(pGpu, pKernelFifo, + pChidMgr, + rpc_params->chid); + NV_CHECK_OR_RETURN(LEVEL_ERROR, + pKernelChannel != NULL, + NV_ERR_INVALID_CHANNEL); } // With CC enabled, CPU-RM needs to write error notifiers - if (gpuIsCCFeatureEnabled(pGpu)) + if (gpuIsCCFeatureEnabled(pGpu) && pKernelChannel != NULL) { NV_ASSERT_OK_OR_RETURN(krcErrorSetNotifier(pGpu, pKernelRc, pKernelChannel, @@ -577,7 +526,7 @@ return krcErrorSendEventNotifications_HAL(pGpu, pKernelRc, pKernelChannel, - rmEngineType, // unused on kernel side + rmEngineType, // unused on kernel side rpc_params->exceptType, rpc_params->scope, rpc_params->partitionAttributionId); @@ -590,34 +539,39 @@ * @param[in] pGpu GPU object pointer * @param[in] pKernelGsp KernelGsp object pointer * @param[in] exceptType Error code to send to the RC notifiers + * @param[in] bSkipKernelChannels Don't RC and notify kernel channels * */ void -kgspRcAndNotifyAllUserChannels +kgspRcAndNotifyAllChannels_IMPL ( OBJGPU *pGpu, KernelGsp *pKernelGsp, - NvU32 exceptType + NvU32 exceptType, + NvBool bSkipKernelChannels ) { + // + // Note Bug 4503046: UVM currently attributes all errors as global and fails + // operations on all GPUs, in addition to the current failing GPU. Right now, the only + // case where we shouldn't skip kernel channels is when the GPU has fallen off the bus. + // + KernelRc *pKernelRc = GPU_GET_KERNEL_RC(pGpu); KernelChannel *pKernelChannel; KernelFifo *pKernelFifo = GPU_GET_KERNEL_FIFO(pGpu); CHANNEL_ITERATOR chanIt; RMTIMEOUT timeout; - NV_PRINTF(LEVEL_ERROR, "RC all user channels for critical error %d.\n", exceptType); + NV_PRINTF(LEVEL_ERROR, "RC all %schannels for critical error %d.\n", + bSkipKernelChannels ? MAKE_NV_PRINTF_STR("user ") : MAKE_NV_PRINTF_STR(""), + exceptType); - // Pass 1: halt all user channels. + // Pass 1: halt all channels. kfifoGetChannelIterator(pGpu, pKernelFifo, &chanIt); while (kfifoGetNextKernelChannel(pGpu, pKernelFifo, &chanIt, &pKernelChannel) == NV_OK) { - // - // Kernel (uvm) channels are skipped to workaround nvbug 4503046, where - // uvm attributes all errors as global and fails operations on all GPUs, - // in addition to the current failing GPU. - // - if (kchannelCheckIsKernel(pKernelChannel)) + if (kchannelCheckIsKernel(pKernelChannel) && bSkipKernelChannels) { continue; } @@ -626,7 +580,7 @@ } // - // Pass 2: Wait for the halts to complete, and RC notify the user channels. + // Pass 2: Wait for the halts to complete, and RC notify the channels. // The channel halts require a preemption, which may not be able to complete // since the GSP is no longer servicing interrupts. Wait for up to the // default GPU timeout value for the preemptions to complete. @@ -635,21 +589,22 @@ kfifoGetChannelIterator(pGpu, pKernelFifo, &chanIt); while (kfifoGetNextKernelChannel(pGpu, pKernelFifo, &chanIt, &pKernelChannel) == NV_OK) { - // Skip kernel (uvm) channels as only user channel halts are initiated above. - if (kchannelCheckIsKernel(pKernelChannel)) + if (kchannelCheckIsKernel(pKernelChannel) && bSkipKernelChannels) { continue; } kfifoCompleteChannelHalt(pGpu, pKernelFifo, pKernelChannel, &timeout); - NV_ASSERT_OK(krcErrorSetNotifier(pGpu, pKernelRc, + NV_ASSERT_OK( + krcErrorSetNotifier(pGpu, pKernelRc, pKernelChannel, exceptType, kchannelGetEngineType(pKernelChannel), RC_NOTIFIER_SCOPE_CHANNEL)); - NV_ASSERT_OK(krcErrorSendEventNotifications_HAL(pGpu, pKernelRc, + NV_ASSERT_OK( + krcErrorSendEventNotifications_HAL(pGpu, pKernelRc, pKernelChannel, kchannelGetEngineType(pKernelChannel), exceptType, @@ -1162,8 +1117,8 @@ pParams->gfid = rpc_params->gfid; pParams->bDelete = rpc_params->bDelete; status = pOS->osQueueWorkItemWithFlags(pGpu, - _kgspRpcMigCiConfigUpdateCallback, - (void *)pParams, + _kgspRpcMigCiConfigUpdateCallback, + (void *)pParams, OS_QUEUE_WORKITEM_FLAGS_LOCK_API_RW | OS_QUEUE_WORKITEM_FLAGS_LOCK_GPUS_RW); if (status != NV_OK) { @@ -1548,13 +1503,13 @@ { duration /= 1000; *pDurationUnitsChar = 'm'; - } - // 9999ms then 10s - if (duration >= 10000) - { - duration /= 1000; - *pDurationUnitsChar = ' '; // so caller can always just append 's' + // 9999ms then 10s + if (duration >= 10000) + { + duration /= 1000; + *pDurationUnitsChar = ' '; // so caller can always just append 's' + } } return duration; @@ -1717,7 +1672,7 @@ duration = _tsDiffToDuration(ts_end - pHistoryEntry->ts_start, &durationUnitsChar); NV_ERROR_LOG(pGpu, GSP_RPC_TIMEOUT, - "Timeout after %llus of waiting for RPC response from GPU%d GSP! Expected function %d (%s) (0x%x 0x%x).", + "Timeout after %llus of waiting for RPC response from GPU%d GSP! Expected function %d (%s) (0x%llx 0x%llx).", (durationUnitsChar == 'm' ? duration / 1000 : duration), gpuGetInstance(pGpu), expectedFunc, @@ -1728,7 +1683,6 @@ if (pRpc->timeoutCount == 1) { kgspLogRpcDebugInfo(pGpu, pRpc, GSP_RPC_TIMEOUT, NV_TRUE/*bPollingForRpcResponse*/); - osAssertFailed(); NV_PRINTF(LEVEL_ERROR, @@ -1737,6 +1691,32 @@ } static void +_kgspLogRpcSanityCheckFailure +( + OBJGPU *pGpu, + OBJRPC *pRpc, + NvU32 rpcStatus, + NvU32 expectedFunc +) +{ + RpcHistoryEntry *pHistoryEntry = &pRpc->rpcHistory[pRpc->rpcHistoryCurrent]; + + NV_ASSERT(expectedFunc == pHistoryEntry->function); + + NV_PRINTF(LEVEL_ERROR, + "GPU%d sanity check failed 0x%x waiting for RPC response from GSP. Expected function %d (%s) (0x%llx 0x%llx).\n", + gpuGetInstance(pGpu), + rpcStatus, + expectedFunc, + _getRpcName(expectedFunc), + pHistoryEntry->data[0], + pHistoryEntry->data[1]); + + kgspLogRpcDebugInfo(pGpu, pRpc, GSP_RPC_TIMEOUT, NV_TRUE/*bPollingForRpcResponse*/); + osAssertFailed(); +} + +static void _kgspRpcIncrementTimeoutCountAndRateLimitPrints ( OBJGPU *pGpu, @@ -1866,7 +1846,16 @@ goto done; } - NV_CHECK_OK_OR_GOTO(rpcStatus, LEVEL_SILENT, _kgspRpcSanityCheck(pGpu, pKernelGsp, pRpc), done); + rpcStatus = _kgspRpcSanityCheck(pGpu, pKernelGsp, pRpc); + if (rpcStatus != NV_OK) + { + if (!pRpc->bQuietPrints) + { + _kgspLogRpcSanityCheckFailure(pGpu, pRpc, rpcStatus, expectedFunc); + pRpc->bQuietPrints = NV_TRUE; + } + goto done; + } if (timeoutStatus == NV_ERR_TIMEOUT) { @@ -2135,19 +2124,20 @@ NvU64 initTaskLogBUffOffset, NvU64 initTaskLogBUffSize, NvU64 vgpuTaskLogBUffOffset, - NvU64 vgpuTaskLogBuffSize + NvU64 vgpuTaskLogBuffSize, + NvBool *pPreserveLogBufferFull ) { NV_STATUS nvStatus = NV_OK; RM_LIBOS_LOG_MEM *pGspPluginVgpuTaskLog = NULL; RM_LIBOS_LOG_MEM *pGspPluginInitTaskLog = NULL; char vm_string[8], sourceName[SOURCE_NAME_MAX_LENGTH]; + NvBool bPreserveLogBufferFull = NV_FALSE; if (gfid > MAX_PARTITIONS_WITH_GFID) { return NV_ERR_INVALID_ARGUMENT; } - portSyncMutexAcquire(pKernelGsp->pNvlogFlushMtx); // Source name is used to generate a tag that is a unique identifier for nvlog buffers. @@ -2155,6 +2145,11 @@ nvDbgSnprintf(sourceName, SOURCE_NAME_MAX_LENGTH, "V%02d", gfid); libosLogCreateEx(&pKernelGsp->logDecodeVgpuPartition[gfid - 1], sourceName); + if (!bPreserveLogBufferFull) + { + bPreserveLogBufferFull = isLibosPreserveLogBufferFull(&pKernelGsp->logDecodeVgpuPartition[gfid - 1], pGpu->gpuInstance); + } + // Setup logging for vgpu task in vgpu partition { pGspPluginVgpuTaskLog = &pKernelGsp->gspPluginVgpuTaskLogMem[gfid - 1]; @@ -2162,14 +2157,13 @@ NV_ASSERT_OK_OR_GOTO(nvStatus, - memdescCreate(&pGspPluginVgpuTaskLog->pTaskLogDescriptor, - pGpu, - vgpuTaskLogBuffSize, - RM_PAGE_SIZE, - NV_TRUE, ADDR_FBMEM, NV_MEMORY_CACHED, - MEMDESC_FLAGS_NONE), - error_cleanup); - + memdescCreate(&pGspPluginVgpuTaskLog->pTaskLogDescriptor, + pGpu, + vgpuTaskLogBuffSize, + RM_PAGE_SIZE, + NV_TRUE, ADDR_FBMEM, NV_MEMORY_CACHED, + MEMDESC_FLAGS_NONE), + error_cleanup); memdescDescribe(pGspPluginVgpuTaskLog->pTaskLogDescriptor, ADDR_FBMEM, vgpuTaskLogBUffOffset, vgpuTaskLogBuffSize); @@ -2185,12 +2179,12 @@ nvDbgSnprintf(vm_string, sizeof(vm_string), "VGPU%d", gfid); libosLogAddLogEx(&pKernelGsp->logDecodeVgpuPartition[gfid - 1], - pGspPluginVgpuTaskLog->pTaskLogBuffer, - memdescGetSize(pGspPluginVgpuTaskLog->pTaskLogDescriptor), - pGpu->gpuInstance, - (gpuGetChipArch(pGpu) >> GPU_ARCH_SHIFT), - gpuGetChipImpl(pGpu), - vm_string, + pGspPluginVgpuTaskLog->pTaskLogBuffer, + memdescGetSize(pGspPluginVgpuTaskLog->pTaskLogDescriptor), + pGpu->gpuInstance, + (gpuGetChipArch(pGpu) >> GPU_ARCH_SHIFT), + gpuGetChipImpl(pGpu), + vm_string, ".fwlogging_vgpu"); } else @@ -2201,6 +2195,11 @@ } } + if (!bPreserveLogBufferFull) + { + bPreserveLogBufferFull = isLibosPreserveLogBufferFull(&pKernelGsp->logDecodeVgpuPartition[gfid - 1], pGpu->gpuInstance); + } + // Setup logging for init task in vgpu partition { pGspPluginInitTaskLog = &pKernelGsp->gspPluginInitTaskLogMem[gfid - 1]; @@ -2254,6 +2253,7 @@ "GSP", SOURCE_NAME_MAX_LENGTH); } + *pPreserveLogBufferFull = bPreserveLogBufferFull; pKernelGsp->bHasVgpuLogs = NV_TRUE; error_cleanup: @@ -2265,6 +2265,31 @@ return nvStatus; } +/*! + * Preserve vGPU Partition log buffers between VM reboots + */ +NV_STATUS +kgspPreserveVgpuPartitionLogging_IMPL +( + OBJGPU *pGpu, + KernelGsp *pKernelGsp, + NvU32 gfid +) +{ + if ((gfid == 0) || (gfid > MAX_PARTITIONS_WITH_GFID)) + { + return NV_ERR_INVALID_ARGUMENT; + } + + // Make sure this this NvLog buffer is pushed + kgspDumpGspLogsUnlocked(pKernelGsp, NV_FALSE); + + // Preserve any captured vGPU Partition logs + libosPreserveLogs(&pKernelGsp->logDecodeVgpuPartition[gfid - 1]); + + return NV_OK; +} + void kgspNvlogFlushCb(void *pKernelGsp) { if (pKernelGsp != NULL) @@ -2343,7 +2368,7 @@ const char *elfSectionName; } logInitValues[] = { - {"LOGINIT", "INIT", 0x10000, ".fwlogging_init"}, // 64KB for stack traces + {"LOGINIT", "INIT", 0x10000, ".fwlogging_init"}, // 64KB for stack traces #if defined(DEVELOP) || defined(DEBUG) // The interrupt task is in the rm elf, so they share the same logging elf too {"LOGINTR", "INTR", 0x40000, ".fwlogging_rm"}, // 256KB ISR debug log on develop/debug builds @@ -2361,12 +2386,12 @@ NvU8 idx; NvU64 flags = MEMDESC_FLAGS_NONE; - pKernelGsp->pNvlogFlushMtx = portSyncMutexCreate(portMemAllocatorGetGlobalNonPaged()); - if (pKernelGsp->pNvlogFlushMtx == NULL) - { - nvStatus = NV_ERR_INSUFFICIENT_RESOURCES; - goto error_cleanup; - } + pKernelGsp->pNvlogFlushMtx = portSyncMutexCreate(portMemAllocatorGetGlobalNonPaged()); + if (pKernelGsp->pNvlogFlushMtx == NULL) + { + nvStatus = NV_ERR_INSUFFICIENT_RESOURCES; + goto error_cleanup; + } libosLogCreate(&pKernelGsp->logDecode); @@ -2381,8 +2406,8 @@ // Setup logging memory for each task. NV_ASSERT_OK_OR_GOTO(nvStatus, memdescCreate(&pLog->pTaskLogDescriptor, - pGpu, - logInitValues[idx].size, + pGpu, + logInitValues[idx].size, RM_PAGE_SIZE, NV_TRUE, ADDR_SYSMEM, NV_MEMORY_CACHED, flags), @@ -3033,7 +3058,9 @@ NvBool bSyncNvLog ) { - if (pKernelGsp->bInInit || pKernelGsp->pLogElf || bSyncNvLog) + if (pKernelGsp->bInInit || pKernelGsp->pLogElf || bSyncNvLog + || pKernelGsp->bHasVgpuLogs + ) { libosExtractLogs(&pKernelGsp->logDecode, bSyncNvLog); @@ -3063,7 +3090,9 @@ NvBool bSyncNvLog ) { - if (pKernelGsp->bInInit || pKernelGsp->pLogElf || bSyncNvLog) + if (pKernelGsp->bInInit || pKernelGsp->pLogElf || bSyncNvLog + || pKernelGsp->bHasVgpuLogs + ) { if (pKernelGsp->pNvlogFlushMtx != NULL) portSyncMutexAcquire(pKernelGsp->pNvlogFlushMtx); diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia/src/kernel/gpu/nvlink/arch/volta/kernel_nvlink_gv100.c nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/src/kernel/gpu/nvlink/arch/volta/kernel_nvlink_gv100.c --- nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia/src/kernel/gpu/nvlink/arch/volta/kernel_nvlink_gv100.c 2024-05-12 19:31:06.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/src/kernel/gpu/nvlink/arch/volta/kernel_nvlink_gv100.c 2024-09-17 16:46:52.000000000 +0000 @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2021-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining a @@ -341,6 +341,7 @@ portMemSet(pParams, 0, sizeof(*pParams)); pParams->linkMask = pKernelNvlink->enabledLinks; + pParams->bSublinkStateInst = NV_TRUE; status = knvlinkExecGspRmRpc(pGpu, pKernelNvlink, NV2080_CTRL_CMD_NVLINK_GET_LINK_AND_CLOCK_INFO, diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia/src/kernel/gpu/nvlink/common_nvlinkapi.c nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/src/kernel/gpu/nvlink/common_nvlinkapi.c --- nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia/src/kernel/gpu/nvlink/common_nvlinkapi.c 2024-05-12 19:31:06.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/src/kernel/gpu/nvlink/common_nvlinkapi.c 2024-09-17 16:46:53.000000000 +0000 @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining a @@ -722,7 +722,7 @@ pParams->enabledLinkMask = (bIsNvlinkReady) ? pKernelNvlink->enabledLinks : 0x0; pTmpData->nvlinkLinkAndClockInfoParams.linkMask = pParams->enabledLinkMask; - pTmpData->nvlinkLinkAndClockInfoParams.bSublinkStateInst = pParams->bSublinkStateInst; + pTmpData->nvlinkLinkAndClockInfoParams.bSublinkStateInst = NV_TRUE; status = knvlinkExecGspRmRpc(pGpu, pKernelNvlink, NV2080_CTRL_CMD_NVLINK_GET_LINK_AND_CLOCK_INFO, diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia/src/kernel/gpu/nvlink/kernel_nvlink.c nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/src/kernel/gpu/nvlink/kernel_nvlink.c --- nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia/src/kernel/gpu/nvlink/kernel_nvlink.c 2024-05-12 19:31:07.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/src/kernel/gpu/nvlink/kernel_nvlink.c 2024-09-17 16:46:53.000000000 +0000 @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2020-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2020-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining a @@ -945,6 +945,7 @@ portMemSet(¶ms, 0, sizeof(params)); params.linkMask = pKernelNvlink->enabledLinks; + params.bSublinkStateInst = NV_TRUE; status = knvlinkExecGspRmRpc(pGpu, pKernelNvlink, NV2080_CTRL_CMD_NVLINK_GET_LINK_AND_CLOCK_INFO, diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia/src/kernel/gpu/nvlink/kernel_nvlinkcorelibtrain.c nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/src/kernel/gpu/nvlink/kernel_nvlinkcorelibtrain.c --- nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia/src/kernel/gpu/nvlink/kernel_nvlinkcorelibtrain.c 2024-05-12 19:31:07.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/src/kernel/gpu/nvlink/kernel_nvlinkcorelibtrain.c 2024-09-17 16:46:53.000000000 +0000 @@ -53,6 +53,7 @@ static NvBool _knvlinkUpdateSwitchLinkMasksGpuDegraded(OBJGPU *, KernelNvlink *); static void _knvlinkUpdatePeerConfigs(OBJGPU *, KernelNvlink *); static void _knvlinkPrintTopologySummary(OBJGPU *, KernelNvlink *); +static NvU32 _knvlinkGetNumPortEvents(OBJGPU *pGpu, KernelNvlink *pKernelNvlink); #endif @@ -82,6 +83,7 @@ NvBool bNvswitchProxyPresent = NV_FALSE; NvBool bUpdateConnStatus = NV_FALSE; NvBool bCheckDegradedMode = NV_FALSE; + NvBool bForceDiscovery = NV_FALSE; nvlink_conn_info conn_info = {0}; NvU32 linkId; NvU32 numActiveLinksPerIoctrl = 0; @@ -151,6 +153,12 @@ { if (gpuFabricProbeIsSupported(pGpu)) { + NvU32 numPortEvents = _knvlinkGetNumPortEvents(pGpu, pKernelNvlink); + if (pKernelNvlink->numPortEvents < numPortEvents) + { + bForceDiscovery = NV_TRUE; + } + // // If FM doesn't talk to NVLink driver using control calls // (i.e. uses NVLink inband comm instread) such as @@ -158,7 +166,13 @@ // discover remote information explicitly. // nvlink_lib_discover_and_get_remote_conn_info( - pKernelNvlink->nvlinkLinks[linkId].core_link, &conn_info, flags); + pKernelNvlink->nvlinkLinks[linkId].core_link, &conn_info, + flags, bForceDiscovery); + + if (bForceDiscovery) + { + pKernelNvlink->numPortEvents = numPortEvents; + } } else { @@ -205,7 +219,7 @@ } nvlink_lib_discover_and_get_remote_conn_info( - pKernelNvlink->nvlinkLinks[linkId].core_link, &conn_info, flags); + pKernelNvlink->nvlinkLinks[linkId].core_link, &conn_info, flags, NV_FALSE); } // RPC into GSP-RM to update the link connected status only if its required @@ -1344,7 +1358,7 @@ FOR_EACH_INDEX_IN_MASK(32, linkId, pKernelNvlink->enabledLinks) { nvlink_lib_discover_and_get_remote_conn_info( - pKernelNvlink->nvlinkLinks[linkId].core_link, &conn_info, 0); + pKernelNvlink->nvlinkLinks[linkId].core_link, &conn_info, 0, NV_FALSE); } FOR_EACH_INDEX_IN_MASK_END; @@ -2485,4 +2499,30 @@ #endif } +static NvU32 +_knvlinkGetNumPortEvents +( + OBJGPU *pGpu, + KernelNvlink *pKernelNvlink +) +{ + NV_STATUS status; + RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu); + NV2080_CTRL_NVLINK_GET_PORT_EVENTS_PARAMS params = {0}; + + status = pRmApi->Control(pRmApi, + pGpu->hInternalClient, + pGpu->hInternalSubdevice, + NV2080_CTRL_CMD_NVLINK_GET_PORT_EVENTS, + ¶ms, + sizeof(NV2080_CTRL_NVLINK_GET_PORT_EVENTS_PARAMS)); + if (status != NV_OK) + { + // If this call fails, force discovery in knvlinkCoreGetRemoteDeviceInfo + return 0; + } + + return params.portEventCount; +} + #endif diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia/src/kernel/gpu/rc/kernel_rc_notification.c nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/src/kernel/gpu/rc/kernel_rc_notification.c --- nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia/src/kernel/gpu/rc/kernel_rc_notification.c 2024-05-12 19:31:09.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/src/kernel/gpu/rc/kernel_rc_notification.c 2024-09-17 16:46:55.000000000 +0000 @@ -445,12 +445,14 @@ ) { NV_ASSERT_OR_RETURN(!gpumgrGetBcEnabledStatus(pGpu), NV_ERR_INVALID_STATE); - NV_ASSERT_OR_RETURN(pKernelChannel != NULL, NV_ERR_INVALID_CHANNEL); - NV_ASSERT_OK_OR_RETURN( - krcErrorSendEventNotificationsCtxDma_HAL(pGpu, pKernelRc, - pKernelChannel, - scope)); + if (pKernelChannel != NULL) + { + NV_ASSERT_OK_OR_RETURN( + krcErrorSendEventNotificationsCtxDma_HAL(pGpu, pKernelRc, + pKernelChannel, + scope)); + } gpuNotifySubDeviceEvent(pGpu, NV2080_NOTIFIERS_RC_ERROR, diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia/src/kernel/gpu/subdevice/subdevice_ctrl_event_kernel.c nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/src/kernel/gpu/subdevice/subdevice_ctrl_event_kernel.c --- nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia/src/kernel/gpu/subdevice/subdevice_ctrl_event_kernel.c 2024-05-12 19:31:11.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/src/kernel/gpu/subdevice/subdevice_ctrl_event_kernel.c 2024-09-17 16:46:57.000000000 +0000 @@ -240,7 +240,6 @@ pMemory->vgpuNsIntr.guestMSIAddr = 0; pMemory->vgpuNsIntr.guestMSIData = 0; pMemory->vgpuNsIntr.guestDomainId = 0; - pMemory->vgpuNsIntr.pVgpuVfioRef = NULL; pMemory->vgpuNsIntr.isSemaMemValidationEnabled = NV_TRUE; return NV_OK; diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia/src/kernel/rmapi/client_resource.c nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/src/kernel/rmapi/client_resource.c --- nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia/src/kernel/rmapi/client_resource.c 2024-05-12 19:31:20.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/src/kernel/rmapi/client_resource.c 2024-09-17 16:47:06.000000000 +0000 @@ -2369,6 +2369,7 @@ switch (header.version) { + case NVPCF_CONTROLLER_STATIC_TABLE_VERSION_24: case NVPCF_CONTROLLER_STATIC_TABLE_VERSION_23: case NVPCF_CONTROLLER_STATIC_TABLE_VERSION_22: { @@ -3885,45 +3886,22 @@ } NV_STATUS -cliresCtrlCmdVgpuGetStartData_IMPL +cliresCtrlCmdVgpuVfioNotifyRMStatus_IMPL ( RmClientResource *pRmCliRes, - NV0000_CTRL_VGPU_GET_START_DATA_PARAMS *pVgpuStartParams + NV0000_CTRL_VGPU_VFIO_NOTIFY_RM_STATUS_PARAMS *pVgpuStatusParams ) { - NV_STATUS status = NV_OK; - NvHandle hClient = RES_GET_CLIENT_HANDLE(pRmCliRes); - NvU32 event, eventStatus; - OBJSYS *pSys = SYS_GET_INSTANCE(); - KernelVgpuMgr *pKernelVgpuMgr = SYS_GET_KERNEL_VGPUMGR(pSys); - REQUEST_VGPU_INFO_NODE *pRequestVgpu = NULL; - status = CliGetSystemEventStatus(hClient, &event, &eventStatus); - if (status != NV_OK) - return status; - - if (event != NV0000_NOTIFIERS_VM_START) - return NV_ERR_INVALID_EVENT; + if (osIsVgpuVfioPresent() != NV_OK) + return NV_ERR_NOT_SUPPORTED; - for (pRequestVgpu = listHead(&pKernelVgpuMgr->listRequestVgpuHead); - pRequestVgpu != NULL; - pRequestVgpu = listNext(&pKernelVgpuMgr->listRequestVgpuHead, pRequestVgpu)) - { - if (pRequestVgpu->deviceState == NV_VGPU_DEV_OPENED) - { - portMemCopy(pVgpuStartParams->mdevUuid, VGPU_UUID_SIZE, pRequestVgpu->mdevUuid, VGPU_UUID_SIZE); - portMemCopy(pVgpuStartParams->configParams, VGPU_CONFIG_PARAMS_MAX_LENGTH, pRequestVgpu->configParams, VGPU_CONFIG_PARAMS_MAX_LENGTH); - pVgpuStartParams->gpuPciId = pRequestVgpu->gpuPciId; - pVgpuStartParams->qemuPid = pRequestVgpu->qemuPid; - pVgpuStartParams->vgpuId = pRequestVgpu->vgpuId; - pVgpuStartParams->gpuPciBdf = pRequestVgpu->gpuPciBdf; - return NV_OK; - } - } + osWakeRemoveVgpu(pVgpuStatusParams->gpuId, pVgpuStatusParams->returnStatus); - return NV_ERR_OBJECT_NOT_FOUND; + return NV_OK; } + NV_STATUS cliresCtrlCmdVgpuGetVgpuVersion_IMPL ( diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia/src/kernel/rmapi/event.c nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/src/kernel/rmapi/event.c --- nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia/src/kernel/rmapi/event.c 2024-05-12 19:31:21.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/src/kernel/rmapi/event.c 2024-09-17 16:47:07.000000000 +0000 @@ -569,7 +569,8 @@ void CliAddSystemEvent( NvU32 event, - NvU32 status + NvU32 status, + NvBool *isEventNotified ) { NvU32 temp; @@ -581,6 +582,9 @@ NV_STATUS rmStatus = NV_OK; Notifier *pNotifier; + if (isEventNotified != NULL) + *isEventNotified = NV_FALSE; + for (ppClient = serverutilGetFirstClientUnderLock(); ppClient; ppClient = serverutilGetNextClientUnderLock(ppClient)) @@ -629,6 +633,8 @@ NV_PRINTF(LEVEL_ERROR, "failed to deliver event 0x%x", event); } + if (isEventNotified != NULL) + *isEventNotified = NV_TRUE; } pEventNotification = pEventNotification->Next; } diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia/src/kernel/virtualization/kernel_vgpu_mgr.c nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/src/kernel/virtualization/kernel_vgpu_mgr.c --- nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia/src/kernel/virtualization/kernel_vgpu_mgr.c 2024-05-12 19:31:26.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/src/kernel/virtualization/kernel_vgpu_mgr.c 2024-09-17 16:47:11.000000000 +0000 @@ -137,6 +137,7 @@ NvU32 swizzId, NvU32 vgpuDeviceInstanceId, NvBool bDisableDefaultSmcExecPartRestore, + NvU8 *pVgpuDevName, KERNEL_HOST_VGPU_DEVICE **ppKernelHostVgpuDevice) { return NV_ERR_NOT_SUPPORTED; @@ -271,13 +272,6 @@ return NV_ERR_NOT_SUPPORTED; } -NV_STATUS -kvgpumgrStart(const NvU8 *pMdevUuid, void *waitQueue, NvS32 *returnStatus, - NvU8 *vmName, NvU32 qemuPid) -{ - return NV_ERR_OBJECT_NOT_FOUND; -} - // // Add vGPU info received on mdev_create sysfs call to REQUEST_VGPU_INFO_NODE // list. REQUEST_VGPU_INFO_NODE is currently used only for vGPU on KVM. @@ -303,6 +297,17 @@ return NV_ERR_OBJECT_NOT_FOUND; } +NV_STATUS kvgpumgrGetAvailableInstances( + NvU32 *availInstances, + OBJGPU *pGpu, + VGPU_TYPE *vgpuTypeInfo, + NvU32 pgpuIndex, + NvU8 devfn +) +{ + return NV_ERR_NOT_SUPPORTED; +} + NV_STATUS kvgpumgrGetHostVgpuDeviceFromMdevUuid(NvU32 gpuPciId, const NvU8 *pMdevUuid, KERNEL_HOST_VGPU_DEVICE **ppKernelHostVgpuDevice) diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia/src/kernel/virtualization/vgpuconfigapi.c nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/src/kernel/virtualization/vgpuconfigapi.c --- nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia/src/kernel/virtualization/vgpuconfigapi.c 2024-05-12 19:31:26.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia/src/kernel/virtualization/vgpuconfigapi.c 2024-09-17 16:47:11.000000000 +0000 @@ -122,16 +122,6 @@ } NV_STATUS -vgpuconfigapiCtrlCmdVgpuConfigNotifyStart_IMPL -( - VgpuConfigApi *pVgpuConfigApi, - NVA081_CTRL_VGPU_CONFIG_NOTIFY_START_PARAMS *pNotifyParams -) -{ - return NV_ERR_OBJECT_NOT_FOUND; -} - -NV_STATUS vgpuconfigapiCtrlCmdVgpuConfigMdevRegister_IMPL ( VgpuConfigApi *pVgpuConfigApi @@ -240,5 +230,16 @@ ) { return NV_ERR_NOT_SUPPORTED; +} + + +NV_STATUS +vgpuconfigapiCtrlCmdVgpuSetVmName_IMPL +( + VgpuConfigApi *pVgpuConfigApi, + NVA081_CTRL_VGPU_SET_VM_NAME_PARAMS *pParams +) +{ + return NV_ERR_NOT_SUPPORTED; } diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia-modeset/src/nvkms-headsurface-ioctl.c nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia-modeset/src/nvkms-headsurface-ioctl.c --- nvidia-open-gpu-kernel-modules-535.183.01/src/nvidia-modeset/src/nvkms-headsurface-ioctl.c 2024-05-12 19:32:24.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/src/nvidia-modeset/src/nvkms-headsurface-ioctl.c 2024-09-17 16:48:05.000000000 +0000 @@ -110,7 +110,7 @@ { NVHsChannelEvoRec *pHsChannel; - if (apiHead > ARRAY_LEN(pDispEvo->pHsChannel)) { + if (apiHead >= ARRAY_LEN(pDispEvo->pHsChannel)) { return FALSE; } @@ -206,7 +206,7 @@ NVHsChannelEvoRec *pHsChannel; NVSurfaceEvoRec *pSurfaceEvo = NULL; - if (apiHead > ARRAY_LEN(pDispEvo->pHsChannel)) { + if (apiHead >= ARRAY_LEN(pDispEvo->pHsChannel)) { return FALSE; } diff -Nru nvidia-open-gpu-kernel-modules-535.183.01/version.mk nvidia-open-gpu-kernel-modules-535.216.01/version.mk --- nvidia-open-gpu-kernel-modules-535.183.01/version.mk 2024-05-12 20:29:36.000000000 +0000 +++ nvidia-open-gpu-kernel-modules-535.216.01/version.mk 2024-09-17 18:01:14.000000000 +0000 @@ -1,4 +1,4 @@ -NVIDIA_VERSION = 535.183.01 +NVIDIA_VERSION = 535.216.01 # This file. VERSION_MK_FILE := $(lastword $(MAKEFILE_LIST))