Version in base suite: 4.17.5+23-ga4e5191dc0-1+deb12u1 Base version: xen_4.17.5+23-ga4e5191dc0-1+deb12u1 Target version: xen_4.17.5+72-g01140da4e8-1 Base file: /srv/ftp-master.debian.org/ftp/pool/main/x/xen/xen_4.17.5+23-ga4e5191dc0-1+deb12u1.dsc Target file: /srv/ftp-master.debian.org/policy/pool/main/x/xen/xen_4.17.5+72-g01140da4e8-1.dsc .cirrus.yml | 4 debian/changelog | 36 debian/patches/0023-xen-arch-x86-make-objdump-output-user-locale-agnosti.patch | 4 debian/patches/0025-x86-intel-Fix-PERF_GLOBAL-fixup-when-virtualised.patch | 112 +++ debian/patches/series | 1 debian/salsa-ci.yml | 44 - docs/misc/xen-command-line.pandoc | 11 tools/libs/light/libxl_cpuid.c | 1 tools/libs/light/libxl_pci.c | 9 tools/libs/light/libxl_utils.c | 4 tools/misc/mktarball | 2 tools/misc/xen-cpuid.c | 13 tools/tests/x86_emulator/x86-emulate.h | 6 xen/arch/arm/p2m.c | 32 xen/arch/x86/Kconfig | 5 xen/arch/x86/Makefile | 7 xen/arch/x86/acpi/cpu_idle.c | 134 +-- xen/arch/x86/acpi/power.c | 2 xen/arch/x86/acpi/wakeup_prot.S | 2 xen/arch/x86/alternative.c | 55 + xen/arch/x86/apic.c | 3 xen/arch/x86/arch.mk | 3 xen/arch/x86/bhb-thunk.S | 19 xen/arch/x86/clear_page.S | 3 xen/arch/x86/copy_page.S | 3 xen/arch/x86/cpu-policy.c | 122 +-- xen/arch/x86/cpu/common.c | 4 xen/arch/x86/cpu/intel.c | 39 + xen/arch/x86/cpu/microcode/amd-patch-digests.c | 370 ++++++++++ xen/arch/x86/cpu/microcode/amd.c | 191 +++++ xen/arch/x86/cpu/microcode/core.c | 21 xen/arch/x86/cpu/microcode/private.h | 4 xen/arch/x86/cpu/mwait-idle.c | 20 xen/arch/x86/efi/check.c | 3 xen/arch/x86/extable.c | 11 xen/arch/x86/guest/xen/Makefile | 2 xen/arch/x86/guest/xen/hypercall.S | 52 + xen/arch/x86/guest/xen/hypercall_page.S | 78 -- xen/arch/x86/guest/xen/xen.c | 48 + xen/arch/x86/hpet.c | 2 xen/arch/x86/hvm/svm/entry.S | 3 xen/arch/x86/hvm/viridian/synic.c | 4 xen/arch/x86/hvm/viridian/time.c | 8 xen/arch/x86/hvm/viridian/viridian.c | 27 xen/arch/x86/include/asm/alternative.h | 11 xen/arch/x86/include/asm/asm-defns.h | 6 xen/arch/x86/include/asm/cpufeature.h | 12 xen/arch/x86/include/asm/cpufeatures.h | 5 xen/arch/x86/include/asm/guest/xen-hcall.h | 24 xen/arch/x86/include/asm/hardirq.h | 21 xen/arch/x86/include/asm/hvm/viridian.h | 1 xen/arch/x86/include/asm/processor.h | 17 xen/arch/x86/include/asm/softirq.h | 48 + xen/arch/x86/include/asm/spec_ctrl.h | 39 - xen/arch/x86/indirect-thunk.S | 37 + xen/arch/x86/pv/emul-priv-op.c | 5 xen/arch/x86/pv/gpr_switch.S | 4 xen/arch/x86/spec_ctrl.c | 203 +++++ xen/arch/x86/x86_64/compat/entry.S | 6 xen/arch/x86/x86_64/entry.S | 2 xen/arch/x86/x86_emulate/x86_emulate.c | 89 +- xen/arch/x86/xen.lds.S | 1 xen/common/Kconfig | 11 xen/common/softirq.c | 8 xen/drivers/passthrough/pci.c | 20 xen/include/public/arch-x86/cpufeatureset.h | 8 xen/include/xen/cpuidle.h | 2 xen/include/xen/irq_cpustat.h | 1 xen/include/xen/lib/x86/cpu-policy.h | 10 xen/include/xen/sha2.h | 15 xen/include/xen/softirq.h | 16 xen/lib/Makefile | 1 xen/lib/sha2-256.c | 216 +++++ xen/lib/x86/cpuid.c | 2 xen/tools/gen-cpuid.py | 2 75 files changed, 1918 insertions(+), 449 deletions(-) gpgv: Signature made Tue Dec 2 12:01:35 2025 UTC gpgv: using RSA key B6E62F3D12AC38495C0DA90510C293B6C37C4E36 gpgv: Note: signatures using the SHA1 algorithm are rejected gpgv: Can't check signature: Bad public key dpkg-source: warning: cannot verify inline signature for /srv/release.debian.org/tmp/tmplzmb8js5/xen_4.17.5+72-g01140da4e8-1.dsc: no acceptable signature found diff -Nru xen-4.17.5+23-ga4e5191dc0/.cirrus.yml xen-4.17.5+72-g01140da4e8/.cirrus.yml --- xen-4.17.5+23-ga4e5191dc0/.cirrus.yml 2024-11-12 13:09:34.000000000 +0000 +++ xen-4.17.5+72-g01140da4e8/.cirrus.yml 2025-11-13 16:58:23.000000000 +0000 @@ -17,11 +17,11 @@ task: name: 'FreeBSD 13' freebsd_instance: - image_family: freebsd-13-3 + image_family: freebsd-13-5 << : *FREEBSD_TEMPLATE task: name: 'FreeBSD 14' freebsd_instance: - image_family: freebsd-14-1 + image_family: freebsd-14-2 << : *FREEBSD_TEMPLATE diff -Nru xen-4.17.5+23-ga4e5191dc0/debian/changelog xen-4.17.5+72-g01140da4e8/debian/changelog --- xen-4.17.5+23-ga4e5191dc0/debian/changelog 2025-02-18 23:00:27.000000000 +0000 +++ xen-4.17.5+72-g01140da4e8/debian/changelog 2025-11-29 13:52:25.000000000 +0000 @@ -1,3 +1,39 @@ +xen (4.17.5+72-g01140da4e8-1) bookworm-security; urgency=medium + + Significant changes: + * Update to new upstream version 4.17.5+72-g01140da4e8, which also contains + security fixes for the following issues: + (Closes: #1105193) (Closes: #1120075) + - deadlock potential with VT-d and legacy PCI device pass-through + XSA-467 CVE-2025-1713 + - x86: Indirect Target Selection + XSA-469 CVE-2024-28956 + - x86: Incorrect stubs exception handling for flags recovery + XSA-470 CVE-2025-27465 + - x86: Transitive Scheduler Attacks + XSA-471 CVE-2024-36350 CVE-2024-36357 + - Multiple vulnerabilities in the Viridian interface + XSA-472 CVE-2025-27466 CVE-2025-58142 CVE-2025-58143 + - Arm issues with page refcounting + XSA-473 CVE-2025-58144 CVE-2025-58145 + - x86: Incorrect input sanitisation in Viridian hypercalls + XSA-475 CVE-2025-58147 CVE-2025-58148 + - Incorrect removal of permissions on PCI device unplug + XSA-476 CVE-2025-58149 + * Note that the following XSA are not listed, because... + - XSA-468 applies to Windows PV drivers + - XSA-474 applies to XAPI which is not included in Debian + + Packaging minor fixes and improvements: + * debian/salsa-ci.yml: adjust for new salsa-ci pipeline + + Additional changes for 4.17 that were not backported upstream: + * Cherry-pick dd05d265b8 ("x86/intel: Fix PERF_GLOBAL fixup when + virtualised") to fix a boot loop when using Xen under nested + virtualization (Closes: #1105222) + + -- Hans van Kranenburg Sat, 29 Nov 2025 14:52:25 +0100 + xen (4.17.5+23-ga4e5191dc0-1+deb12u1) bookworm; urgency=medium * Ignore lintian error not relevant for bookworm in salsa-ci. diff -Nru xen-4.17.5+23-ga4e5191dc0/debian/patches/0023-xen-arch-x86-make-objdump-output-user-locale-agnosti.patch xen-4.17.5+72-g01140da4e8/debian/patches/0023-xen-arch-x86-make-objdump-output-user-locale-agnosti.patch --- xen-4.17.5+23-ga4e5191dc0/debian/patches/0023-xen-arch-x86-make-objdump-output-user-locale-agnosti.patch 2025-02-18 23:00:27.000000000 +0000 +++ xen-4.17.5+72-g01140da4e8/debian/patches/0023-xen-arch-x86-make-objdump-output-user-locale-agnosti.patch 2025-11-29 13:52:25.000000000 +0000 @@ -13,10 +13,10 @@ 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xen/arch/x86/arch.mk b/xen/arch/x86/arch.mk -index 227d439..3b10eb3 100644 +index 379317c..f50c201 100644 --- a/xen/arch/x86/arch.mk +++ b/xen/arch/x86/arch.mk -@@ -108,7 +108,7 @@ endif +@@ -111,7 +111,7 @@ endif ifeq ($(XEN_BUILD_PE),y) # Check if the linker produces fixups in PE by default diff -Nru xen-4.17.5+23-ga4e5191dc0/debian/patches/0025-x86-intel-Fix-PERF_GLOBAL-fixup-when-virtualised.patch xen-4.17.5+72-g01140da4e8/debian/patches/0025-x86-intel-Fix-PERF_GLOBAL-fixup-when-virtualised.patch --- xen-4.17.5+23-ga4e5191dc0/debian/patches/0025-x86-intel-Fix-PERF_GLOBAL-fixup-when-virtualised.patch 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.17.5+72-g01140da4e8/debian/patches/0025-x86-intel-Fix-PERF_GLOBAL-fixup-when-virtualised.patch 2025-11-29 13:52:25.000000000 +0000 @@ -0,0 +1,112 @@ +From: Andrew Cooper +Date: Tue, 21 Jan 2025 16:56:26 +0000 +Subject: x86/intel: Fix PERF_GLOBAL fixup when virtualised +MIME-Version: 1.0 +Content-Type: text/plain; charset="utf-8" +Content-Transfer-Encoding: 8bit + +Logic using performance counters needs to look at +MSR_MISC_ENABLE.PERF_AVAILABLE before touching any other resources. + +When virtualised under ESX, Xen dies with a #GP fault trying to read +MSR_CORE_PERF_GLOBAL_CTRL. + +Factor this logic out into a separate function (it's already too squashed to +the RHS), and insert a check of MSR_MISC_ENABLE.PERF_AVAILABLE. + +This also avoids setting X86_FEATURE_ARCH_PERFMON if MSR_MISC_ENABLE says that +PERF is unavailable, although oprofile (the only consumer of this flag) +cross-checks too. + +Fixes: 6bdb965178bb ("x86/intel: ensure Global Performance Counter Control is setup correctly") +Reported-by: Jonathan Katz +Link: https://xcp-ng.org/forum/topic/10286/nesting-xcp-ng-on-esx-8 +Signed-off-by: Andrew Cooper +Reviewed-by: Roger Pau Monné +Tested-by: Jonathan Katz +(cherry picked from commit dd05d265b8abda4cc7206b29cd71b77fb46658bf) +--- + xen/arch/x86/cpu/intel.c | 64 ++++++++++++++++++++++++++++-------------------- + 1 file changed, 37 insertions(+), 27 deletions(-) + +diff --git a/xen/arch/x86/cpu/intel.c b/xen/arch/x86/cpu/intel.c +index af4a52e..0f678a4 100644 +--- a/xen/arch/x86/cpu/intel.c ++++ b/xen/arch/x86/cpu/intel.c +@@ -565,39 +565,49 @@ static void intel_log_freq(const struct cpuinfo_x86 *c) + printk("%u MHz\n", (factor * max_ratio + 50) / 100); + } + ++static void init_intel_perf(struct cpuinfo_x86 *c) ++{ ++ uint64_t val; ++ unsigned int eax, ver, nr_cnt; ++ ++ if ( c->cpuid_level <= 9 || ++ ({ rdmsrl(MSR_IA32_MISC_ENABLE, val); ++ !(val & MSR_IA32_MISC_ENABLE_PERF_AVAIL); }) ) ++ return; ++ ++ eax = cpuid_eax(10); ++ ver = eax & 0xff; ++ nr_cnt = (eax >> 8) & 0xff; ++ ++ if ( ver && nr_cnt > 1 && nr_cnt <= 32 ) ++ { ++ unsigned int cnt_mask = (1UL << nr_cnt) - 1; ++ ++ /* ++ * On (some?) Sapphire/Emerald Rapids platforms each package-BSP ++ * starts with all the enable bits for the general-purpose PMCs ++ * cleared. Adjust so counters can be enabled from EVNTSEL. ++ */ ++ rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, val); ++ ++ if ( (val & cnt_mask) != cnt_mask ) ++ { ++ printk("FIRMWARE BUG: CPU%u invalid PERF_GLOBAL_CTRL: %#"PRIx64" adjusting to %#"PRIx64"\n", ++ smp_processor_id(), val, val | cnt_mask); ++ wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, val | cnt_mask); ++ } ++ ++ __set_bit(X86_FEATURE_ARCH_PERFMON, c->x86_capability); ++ } ++} ++ + static void cf_check init_intel(struct cpuinfo_x86 *c) + { + /* Detect the extended topology information if available */ + detect_extended_topology(c); + + init_intel_cacheinfo(c); +- if (c->cpuid_level > 9) { +- unsigned eax = cpuid_eax(10); +- unsigned int cnt = (eax >> 8) & 0xff; +- +- /* Check for version and the number of counters */ +- if ((eax & 0xff) && (cnt > 1) && (cnt <= 32)) { +- uint64_t global_ctrl; +- unsigned int cnt_mask = (1UL << cnt) - 1; +- +- /* +- * On (some?) Sapphire/Emerald Rapids platforms each +- * package-BSP starts with all the enable bits for the +- * general-purpose PMCs cleared. Adjust so counters +- * can be enabled from EVNTSEL. +- */ +- rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, global_ctrl); +- if ((global_ctrl & cnt_mask) != cnt_mask) { +- printk("CPU%u: invalid PERF_GLOBAL_CTRL: %#" +- PRIx64 " adjusting to %#" PRIx64 "\n", +- smp_processor_id(), global_ctrl, +- global_ctrl | cnt_mask); +- wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, +- global_ctrl | cnt_mask); +- } +- __set_bit(X86_FEATURE_ARCH_PERFMON, c->x86_capability); +- } +- } ++ init_intel_perf(c); + + if ( !cpu_has(c, X86_FEATURE_XTOPOLOGY) ) + { diff -Nru xen-4.17.5+23-ga4e5191dc0/debian/patches/series xen-4.17.5+72-g01140da4e8/debian/patches/series --- xen-4.17.5+23-ga4e5191dc0/debian/patches/series 2025-02-18 23:00:27.000000000 +0000 +++ xen-4.17.5+72-g01140da4e8/debian/patches/series 2025-11-29 13:52:25.000000000 +0000 @@ -22,3 +22,4 @@ 0022-give-meaningful-error-message-if-qemu-device-model-i.patch 0023-xen-arch-x86-make-objdump-output-user-locale-agnosti.patch 0024-tools-xg-increase-LZMA_BLOCK_SIZE-for-uncompressing-.patch +0025-x86-intel-Fix-PERF_GLOBAL-fixup-when-virtualised.patch diff -Nru xen-4.17.5+23-ga4e5191dc0/debian/salsa-ci.yml xen-4.17.5+72-g01140da4e8/debian/salsa-ci.yml --- xen-4.17.5+23-ga4e5191dc0/debian/salsa-ci.yml 2025-02-18 23:00:27.000000000 +0000 +++ xen-4.17.5+72-g01140da4e8/debian/salsa-ci.yml 2025-11-29 13:52:25.000000000 +0000 @@ -2,19 +2,26 @@ include: - https://salsa.debian.org/salsa-ci-team/pipeline/raw/master/recipes/debian.yml -extract-source: - before_script: - # convert into a gbp patches-unapplied branch using git-debrebase - - apt-get update - - apt-get --yes install git-debrebase - # git-debrebase needs git user setup - - git config user.email "salsa-ci@invalid.invalid" - - git config user.name "salsa-ci" - - git debrebase --force - - git debrebase convert-to-gbp - # work around gbp export-orig replacing variables in .gitarchive-info - - test -d .git/info || mkdir .git/info - - echo ".gitarchive-info -export-subst" >> .git/info/attributes +# for documentation about the templating magic (yaml anchors) see here: +# https://docs.gitlab.com/ci/yaml/yaml_optimization/ + +# run git-debrebase make-patches +.git-debrebase-prepare: &git-debrebase-prepare + - apt-get update + - apt-get --yes install git-debrebase + # git-debrebase needs git user setup + - git config user.email "salsa-ci@invalid.invalid" + - git config user.name "salsa-ci" + - git debrebase --force + - git debrebase make-patches + +.build-definition: &build-definition + extends: .build-definition-common + before_script: *git-debrebase-prepare + +build source: + extends: .build-source-only + before_script: *git-debrebase-prepare variables: # disable shallow cloning of git repository. This is needed for git-debrebase @@ -30,16 +37,5 @@ # disable blhc. SALSA_CI_DISABLE_BLHC: 1 - SALSA_CI_REPROTEST_ENABLE_DIFFOSCOPE: 1 - # We do not provide packages for i386 SALSA_CI_DISABLE_BUILD_PACKAGE_I386: 1 - - # cross building xen currently fails for the following reason: - # The following packages have unmet dependencies: - # ocaml:arm64 : Depends: gcc:arm64 but it is not installable - # Depends: binutils:arm64 but it is not installable - # - # It can be tried again when there are better chances of it building - # successfully. - SALSA_CI_DISABLE_CROSSBUILD_ARM64: 1 diff -Nru xen-4.17.5+23-ga4e5191dc0/docs/misc/xen-command-line.pandoc xen-4.17.5+72-g01140da4e8/docs/misc/xen-command-line.pandoc --- xen-4.17.5+23-ga4e5191dc0/docs/misc/xen-command-line.pandoc 2024-11-12 13:09:34.000000000 +0000 +++ xen-4.17.5+72-g01140da4e8/docs/misc/xen-command-line.pandoc 2025-11-13 16:58:23.000000000 +0000 @@ -2588,10 +2588,10 @@ Alternatively, selecting `tsx=1` will re-enable TSX at the users own risk. ### ucode -> `= List of [ | scan=, nmi=, allow-same= ]` +> `= List of [ | scan=, nmi=, allow-same=, digest-check= ]` Applicability: x86 - Default: `nmi` + Default: `nmi,digest-check` Controls for CPU microcode loading. For early loading, this parameter can specify how and where to find the microcode update blob. For late loading, @@ -2625,6 +2625,13 @@ of the same version, and this allows for easy testing of the late microcode loading path. +The `digest-check=` option is active by default and controls whether to +perform additional authenticity checks. The Entrysign vulnerability (AMD +SB-7033, CVE-2024-36347) on Zen1-5 processors allows forging the signature on +arbitrary microcode such that it is accepted by the CPU. Xen contains a table +of digests of microcode patches with known-good provenance, and will block +loading of patches that do not match. + ### unrestricted_guest (Intel) > `= ` diff -Nru xen-4.17.5+23-ga4e5191dc0/tools/libs/light/libxl_cpuid.c xen-4.17.5+72-g01140da4e8/tools/libs/light/libxl_cpuid.c --- xen-4.17.5+23-ga4e5191dc0/tools/libs/light/libxl_cpuid.c 2024-11-12 13:09:34.000000000 +0000 +++ xen-4.17.5+72-g01140da4e8/tools/libs/light/libxl_cpuid.c 2025-11-13 16:58:23.000000000 +0000 @@ -344,6 +344,7 @@ CPUID_ENTRY(0x00000007, 1, CPUID_REG_EDX), MSR_ENTRY(0x10a, CPUID_REG_EAX), MSR_ENTRY(0x10a, CPUID_REG_EDX), + CPUID_ENTRY(0x80000021, NA, CPUID_REG_ECX), #undef MSR_ENTRY #undef CPUID_ENTRY }; diff -Nru xen-4.17.5+23-ga4e5191dc0/tools/libs/light/libxl_pci.c xen-4.17.5+72-g01140da4e8/tools/libs/light/libxl_pci.c --- xen-4.17.5+23-ga4e5191dc0/tools/libs/light/libxl_pci.c 2024-11-12 13:09:34.000000000 +0000 +++ xen-4.17.5+72-g01140da4e8/tools/libs/light/libxl_pci.c 2025-11-13 16:58:23.000000000 +0000 @@ -1995,7 +1995,7 @@ char *sysfs_path = GCSPRINTF(SYSFS_PCI_DEV"/"PCI_BDF"/resource", pci->domain, pci->bus, pci->dev, pci->func); FILE *f = fopen(sysfs_path, "r"); - unsigned int start = 0, end = 0, flags = 0, size = 0; + uint64_t start = 0, end = 0, flags = 0, size = 0; int irq = 0; int i; @@ -2004,7 +2004,8 @@ goto skip1; } for (i = 0; i < PROC_PCI_NUM_RESOURCES; i++) { - if (fscanf(f, "0x%x 0x%x 0x%x\n", &start, &end, &flags) != 3) + if (fscanf(f, "0x%"SCNx64" 0x%"SCNx64" 0x%"SCNx64"\n", + &start, &end, &flags) != 3) continue; size = end - start + 1; if (start) { @@ -2012,7 +2013,7 @@ rc = xc_domain_ioport_permission(ctx->xch, domid, start, size, 0); if (rc < 0) LOGED(ERROR, domainid, - "xc_domain_ioport_permission error 0x%x/0x%x", + "xc_domain_ioport_permission error %#"PRIx64"/%#"PRIx64, start, size); } else { @@ -2020,7 +2021,7 @@ (size+(XC_PAGE_SIZE-1))>>XC_PAGE_SHIFT, 0); if (rc < 0) LOGED(ERROR, domainid, - "xc_domain_iomem_permission error 0x%x/0x%x", + "xc_domain_iomem_permission error %#"PRIx64"/%#"PRIx64, start, size); } diff -Nru xen-4.17.5+23-ga4e5191dc0/tools/libs/light/libxl_utils.c xen-4.17.5+72-g01140da4e8/tools/libs/light/libxl_utils.c --- xen-4.17.5+23-ga4e5191dc0/tools/libs/light/libxl_utils.c 2024-11-12 13:09:34.000000000 +0000 +++ xen-4.17.5+72-g01140da4e8/tools/libs/light/libxl_utils.c 2025-11-13 16:58:23.000000000 +0000 @@ -45,7 +45,7 @@ char *libxl_domid_to_name(libxl_ctx *ctx, uint32_t domid) { unsigned int len; - char path[strlen("/local/domain") + 12]; + char path[sizeof("/local/domain") + 11]; char *s; snprintf(path, sizeof(path), "/local/domain/%d/name", domid); @@ -141,7 +141,7 @@ char *libxl_cpupoolid_to_name(libxl_ctx *ctx, uint32_t poolid) { unsigned int len; - char path[strlen("/local/pool") + 12]; + char path[sizeof("/local/pool") + 11]; char *s; snprintf(path, sizeof(path), "/local/pool/%d/name", poolid); diff -Nru xen-4.17.5+23-ga4e5191dc0/tools/misc/mktarball xen-4.17.5+72-g01140da4e8/tools/misc/mktarball --- xen-4.17.5+23-ga4e5191dc0/tools/misc/mktarball 2024-11-12 13:09:34.000000000 +0000 +++ xen-4.17.5+72-g01140da4e8/tools/misc/mktarball 2025-11-13 16:58:23.000000000 +0000 @@ -49,6 +49,6 @@ git_archive_into $xen_root/extras/mini-os-remote $tdir/xen-$desc/extras/mini-os -GZIP=-9v tar cz -f $xen_root/dist/xen-$desc.tar.gz -C $tdir xen-$desc +GZIP=-9v tar cz -f $xen_root/dist/xen-$desc.tar.gz --owner 0 --group 0 -C $tdir xen-$desc echo "Source tarball in $xen_root/dist/xen-$desc.tar.gz" diff -Nru xen-4.17.5+23-ga4e5191dc0/tools/misc/xen-cpuid.c xen-4.17.5+72-g01140da4e8/tools/misc/xen-cpuid.c --- xen-4.17.5+23-ga4e5191dc0/tools/misc/xen-cpuid.c 2024-11-12 13:09:34.000000000 +0000 +++ xen-4.17.5+72-g01140da4e8/tools/misc/xen-cpuid.c 2025-11-13 16:58:23.000000000 +0000 @@ -194,11 +194,13 @@ static const char *const str_e21a[32] = { [ 2] = "lfence+", + /* 4 */ [ 5] = "verw-clear", [ 6] = "nscb", [ 8] = "auto-ibrs", /* 26 */ [27] = "sbpb", [28] = "ibpb-brtype", [29] = "srso-no", + [30] = "srso-us-no", [31] = "srso-msr-fix", }; static const char *const str_7b1[32] = @@ -222,7 +224,7 @@ [ 4] = "bhi-ctrl", [ 5] = "mcdt-no", }; -static const char *const str_m10Al[32] = +static const char *const str_m10Al[64] = { [ 0] = "rdcl-no", [ 1] = "eibrs", [ 2] = "rsba", [ 3] = "skip-l1dfl", @@ -239,10 +241,14 @@ [24] = "pbrsb-no", [25] = "gds-ctrl", [26] = "gds-no", [27] = "rfds-no", [28] = "rfds-clear", + + [62] = "its-no", }; -static const char *const str_m10Ah[32] = +static const char *const str_e21c[32] = { + /* 0 */ [ 1] = "tsa-sq-no", + [ 2] = "tsa-l1-no", }; static const struct { @@ -268,7 +274,8 @@ { "CPUID 0x00000007:1.ecx", "7c1", str_7c1 }, { "CPUID 0x00000007:1.edx", "7d1", str_7d1 }, { "MSR_ARCH_CAPS.lo", "m10Al", str_m10Al }, - { "MSR_ARCH_CAPS.hi", "m10Ah", str_m10Ah }, + { "MSR_ARCH_CAPS.hi", "m10Ah", str_m10Al + 32 }, + { "CPUID 0x80000021.ecx", "e21c", str_e21c }, }; #define COL_ALIGN "24" diff -Nru xen-4.17.5+23-ga4e5191dc0/tools/tests/x86_emulator/x86-emulate.h xen-4.17.5+72-g01140da4e8/tools/tests/x86_emulator/x86-emulate.h --- xen-4.17.5+23-ga4e5191dc0/tools/tests/x86_emulator/x86-emulate.h 2024-11-12 13:09:34.000000000 +0000 +++ xen-4.17.5+72-g01140da4e8/tools/tests/x86_emulator/x86-emulate.h 2025-11-13 16:58:23.000000000 +0000 @@ -67,6 +67,12 @@ #define is_canonical_address(x) (((int64_t)(x) >> 47) == ((int64_t)(x) >> 63)) +static inline void *place_ret(void *ptr) +{ + *(uint8_t *)ptr = 0xc3; + return ptr + 1; +} + extern uint32_t mxcsr_mask; extern struct cpu_policy cp; diff -Nru xen-4.17.5+23-ga4e5191dc0/xen/arch/arm/p2m.c xen-4.17.5+72-g01140da4e8/xen/arch/arm/p2m.c --- xen-4.17.5+23-ga4e5191dc0/xen/arch/arm/p2m.c 2024-11-12 13:09:34.000000000 +0000 +++ xen-4.17.5+72-g01140da4e8/xen/arch/arm/p2m.c 2025-11-13 16:58:23.000000000 +0000 @@ -597,18 +597,22 @@ struct page_info *p2m_get_page_from_gfn(struct domain *d, gfn_t gfn, p2m_type_t *t) { + struct p2m_domain *p2m = p2m_get_hostp2m(d); struct page_info *page; p2m_type_t p2mt; - mfn_t mfn = p2m_lookup(d, gfn, &p2mt); + mfn_t mfn; + + p2m_read_lock(p2m); + mfn = p2m_get_entry(p2m, gfn, &p2mt, NULL, NULL, NULL); if ( t ) *t = p2mt; - if ( !p2m_is_any_ram(p2mt) ) - return NULL; - - if ( !mfn_valid(mfn) ) + if ( !p2m_is_any_ram(p2mt) || !mfn_valid(mfn) ) + { + p2m_read_unlock(p2m); return NULL; + } page = mfn_to_page(mfn); @@ -618,12 +622,22 @@ */ if ( p2m_is_foreign(p2mt) ) { - struct domain *fdom = page_get_owner_and_reference(page); - ASSERT(fdom != NULL); - ASSERT(fdom != d); - return page; + const struct domain *fdom = page_get_owner_and_reference(page); + + p2m_read_unlock(p2m); + + if ( fdom ) + { + if ( fdom != d ) + return page; + ASSERT_UNREACHABLE(); + put_page(page); + } + return NULL; } + p2m_read_unlock(p2m); + return get_page(page, d) ? page : NULL; } diff -Nru xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/Kconfig xen-4.17.5+72-g01140da4e8/xen/arch/x86/Kconfig --- xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/Kconfig 2024-11-12 13:09:34.000000000 +0000 +++ xen-4.17.5+72-g01140da4e8/xen/arch/x86/Kconfig 2025-11-13 16:58:23.000000000 +0000 @@ -35,9 +35,14 @@ default "arch/x86/configs/x86_64_defconfig" config CC_HAS_INDIRECT_THUNK + # GCC >= 8 or Clang >= 6 def_bool $(cc-option,-mindirect-branch-register) || \ $(cc-option,-mretpoline-external-thunk) +config CC_HAS_RETURN_THUNK + # GCC >= 8 or Clang >= 15 + def_bool $(cc-option,-mfunction-return=thunk-extern) + config HAS_AS_CET_SS # binutils >= 2.29 or LLVM >= 6 def_bool $(as-instr,wrssq %rax$(comma)0;setssbsy) diff -Nru xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/Makefile xen-4.17.5+72-g01140da4e8/xen/arch/x86/Makefile --- xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/Makefile 2024-11-12 13:09:34.000000000 +0000 +++ xen-4.17.5+72-g01140da4e8/xen/arch/x86/Makefile 2025-11-13 16:58:23.000000000 +0000 @@ -10,9 +10,7 @@ obj-$(CONFIG_PV) += pv/ obj-y += x86_64/ -alternative-y := alternative.init.o -alternative-$(CONFIG_LIVEPATCH) := -obj-bin-y += $(alternative-y) +obj-y += alternative.o obj-y += apic.o obj-y += bhb-thunk.o obj-y += bitops.o @@ -40,10 +38,11 @@ obj-y += i387.o obj-y += i8259.o obj-y += io_apic.o -obj-$(CONFIG_LIVEPATCH) += alternative.o livepatch.o +obj-$(CONFIG_LIVEPATCH) += livepatch.o obj-y += msi.o obj-y += msr.o obj-$(CONFIG_INDIRECT_THUNK) += indirect-thunk.o +obj-$(CONFIG_RETURN_THUNK) += indirect-thunk.o obj-$(CONFIG_PV) += ioport_emulate.o obj-y += irq.o obj-$(CONFIG_KEXEC) += machine_kexec.o diff -Nru xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/acpi/cpu_idle.c xen-4.17.5+72-g01140da4e8/xen/arch/x86/acpi/cpu_idle.c --- xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/acpi/cpu_idle.c 2024-11-12 13:09:34.000000000 +0000 +++ xen-4.17.5+72-g01140da4e8/xen/arch/x86/acpi/cpu_idle.c 2025-11-13 16:58:23.000000000 +0000 @@ -59,6 +59,33 @@ /*#define DEBUG_PM_CX*/ +static always_inline void monitor( + const void *addr, unsigned int ecx, unsigned int edx) +{ + alternative_input("", "clflush (%[addr])", X86_BUG_CLFLUSH_MONITOR, + [addr] "a" (addr)); + + /* + * The memory clobber is a compiler barrier. Subseqeunt reads from the + * monitored cacheline must not be reordered over MONITOR. + */ + asm volatile ( ".byte 0x0f, 0x01, 0xc8" /* monitor */ + :: "a" (addr), "c" (ecx), "d" (edx) : "memory" ); +} + +static always_inline void mwait(unsigned int eax, unsigned int ecx) +{ + asm volatile ( ".byte 0x0f, 0x01, 0xc9" /* mwait */ + :: "a" (eax), "c" (ecx) ); +} + +static always_inline void sti_mwait_cli(unsigned int eax, unsigned int ecx) +{ + /* STI shadow covers MWAIT. */ + asm volatile ( "sti; .byte 0x0f, 0x01, 0xc9;" /* mwait */ " cli" + :: "a" (eax), "c" (ecx) ); +} + #define GET_HW_RES_IN_NS(msr, val) \ do { rdmsrl(msr, val); val = tsc_ticks2ns(val); } while( 0 ) #define GET_MC6_RES(val) GET_HW_RES_IN_NS(0x664, val) @@ -432,72 +459,59 @@ } __initcall(cpu_idle_key_init); -/* - * The bit is set iff cpu use monitor/mwait to enter C state - * with this flag set, CPU can be waken up from C state - * by writing to specific memory address, instead of sending an IPI. - */ -static cpumask_t cpuidle_mwait_flags; - -void cpuidle_wakeup_mwait(cpumask_t *mask) +void mwait_idle_with_hints(unsigned int eax, unsigned int ecx) { - cpumask_t target; - unsigned int cpu; - - cpumask_and(&target, mask, &cpuidle_mwait_flags); - - /* CPU is MWAITing on the cpuidle_mwait_wakeup flag. */ - for_each_cpu(cpu, &target) - mwait_wakeup(cpu) = 0; + unsigned int cpu = smp_processor_id(); + struct cpu_info *info = get_cpu_info(); + irq_cpustat_t *stat = &irq_stat[cpu]; + const unsigned int *this_softirq_pending = &stat->__softirq_pending; - cpumask_andnot(mask, mask, &target); -} + /* + * Heuristic: if we're definitely not going to idle, bail early as the + * speculative safety can be expensive. This is a performance + * consideration not a correctness issue. + */ + if ( *this_softirq_pending ) + return; -bool arch_skip_send_event_check(unsigned int cpu) -{ /* - * This relies on softirq_pending() and mwait_wakeup() to access data - * on the same cache line. + * By setting in_mwait, we promise to other CPUs that we'll notice changes + * to __softirq_pending without being sent an IPI. We achieve this by + * either not going to sleep, or by having hardware notice on our behalf. + * + * Some errata exist where MONITOR doesn't work properly, and the + * workaround is to force the use of an IPI. Cause this to happen by + * simply not advertising ourselves as being in_mwait. */ - smp_mb(); - return !!cpumask_test_cpu(cpu, &cpuidle_mwait_flags); -} + alternative_io("movb $1, %[in_mwait]", + "", X86_BUG_MONITOR, + [in_mwait] "=m" (stat->in_mwait)); -void mwait_idle_with_hints(unsigned int eax, unsigned int ecx) -{ - unsigned int cpu = smp_processor_id(); - s_time_t expires = per_cpu(timer_deadline, cpu); - const void *monitor_addr = &mwait_wakeup(cpu); + /* + * On AMD systems, side effects from VERW cancel MONITOR, causing MWAIT to + * wake up immediately. Therefore, VERW must come ahead of MONITOR. + */ + __spec_ctrl_enter_idle_verw(info); - if ( boot_cpu_has(X86_FEATURE_CLFLUSH_MONITOR) ) - { - mb(); - clflush(monitor_addr); - mb(); - } + monitor(this_softirq_pending, 0, 0); - __monitor(monitor_addr, 0, 0); - smp_mb(); + ASSERT(!local_irq_is_enabled()); - /* - * Timer deadline passing is the event on which we will be woken via - * cpuidle_mwait_wakeup. So check it now that the location is armed. - */ - if ( (expires > NOW() || expires == 0) && !softirq_pending(cpu) ) + if ( !*this_softirq_pending ) { - struct cpu_info *info = get_cpu_info(); + __spec_ctrl_enter_idle(info, false /* VERW handled above */); - cpumask_set_cpu(cpu, &cpuidle_mwait_flags); + if ( ecx & MWAIT_ECX_INTERRUPT_BREAK ) + mwait(eax, ecx); + else + sti_mwait_cli(eax, ecx); - spec_ctrl_enter_idle(info); - __mwait(eax, ecx); spec_ctrl_exit_idle(info); - - cpumask_clear_cpu(cpu, &cpuidle_mwait_flags); } - if ( expires <= NOW() && expires > 0 ) - raise_softirq(TIMER_SOFTIRQ); + alternative_io("movb $0, %[in_mwait]", + "", X86_BUG_MONITOR, + [in_mwait] "=m" (stat->in_mwait)); } static void acpi_processor_ffh_cstate_enter(struct acpi_processor_cx *cx) @@ -898,7 +912,7 @@ if ( cx->entry_method == ACPI_CSTATE_EM_FFH ) { - void *mwait_ptr = &mwait_wakeup(smp_processor_id()); + void *mwait_ptr = &softirq_pending(smp_processor_id()); /* * Cache must be flushed as the last operation before sleeping. @@ -910,20 +924,8 @@ while ( 1 ) { - /* - * 1. The CLFLUSH is a workaround for erratum AAI65 for - * the Xeon 7400 series. - * 2. The WBINVD is insufficient due to the spurious-wakeup - * case where we return around the loop. - * 3. Unlike wbinvd, clflush is a light weight but not serializing - * instruction, hence memory fence is necessary to make sure all - * load/store visible before flush cache line. - */ - mb(); - clflush(mwait_ptr); - __monitor(mwait_ptr, 0, 0); - mb(); - __mwait(cx->address, 0); + monitor(mwait_ptr, 0, 0); + mwait(cx->address, 0); } } else if ( (current_cpu_data.x86_vendor & diff -Nru xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/acpi/power.c xen-4.17.5+72-g01140da4e8/xen/arch/x86/acpi/power.c --- xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/acpi/power.c 2024-11-12 13:09:34.000000000 +0000 +++ xen-4.17.5+72-g01140da4e8/xen/arch/x86/acpi/power.c 2025-11-13 16:58:23.000000000 +0000 @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -320,6 +321,7 @@ thaw_domains(); system_state = SYS_STATE_active; spin_unlock(&pm_lock); + raise_softirq(TIMER_SOFTIRQ); return error; } diff -Nru xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/acpi/wakeup_prot.S xen-4.17.5+72-g01140da4e8/xen/arch/x86/acpi/wakeup_prot.S --- xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/acpi/wakeup_prot.S 2024-11-12 13:09:34.000000000 +0000 +++ xen-4.17.5+72-g01140da4e8/xen/arch/x86/acpi/wakeup_prot.S 2025-11-13 16:58:23.000000000 +0000 @@ -131,7 +131,7 @@ pop %r12 pop %rbx pop %rbp - ret + RET .data .align 16 diff -Nru xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/alternative.c xen-4.17.5+72-g01140da4e8/xen/arch/x86/alternative.c --- xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/alternative.c 2024-11-12 13:09:34.000000000 +0000 +++ xen-4.17.5+72-g01140da4e8/xen/arch/x86/alternative.c 2025-11-13 16:58:23.000000000 +0000 @@ -149,6 +149,49 @@ } } +void nocall __x86_return_thunk(void); + +/* + * Place a return at @ptr. @ptr must be in the writable alias of a stub. + * + * When CONFIG_RETURN_THUNK is active, this may be a JMP __x86_return_thunk + * instead, depending on the safety of @ptr with respect to Indirect Target + * Selection. + * + * Returns the next position to write into the stub. + */ +void *place_ret(void *ptr) +{ + unsigned long addr = (unsigned long)ptr; + uint8_t *p = ptr; + + /* + * When Return Thunks are used, if a RET would be unsafe at this location + * with respect to Indirect Target Selection (i.e. if addr is in the first + * half of a cacheline), insert a JMP __x86_return_thunk instead. + * + * The displacement needs to be relative to the executable alias of the + * stub, not to @ptr which is the writeable alias. + */ + if ( IS_ENABLED(CONFIG_RETURN_THUNK) && !(addr & 0x20) ) + { + long stub_va = (this_cpu(stubs.addr) & PAGE_MASK) + (addr & ~PAGE_MASK); + long disp = (long)__x86_return_thunk - (stub_va + 5); + + BUG_ON((int32_t)disp != disp); + + *p++ = 0xe9; + *(int32_t *)p = disp; + p += 4; + } + else + { + *p++ = 0xc3; + } + + return p; +} + /* * text_poke - Update instructions on a live kernel or non-executed code. * @addr: address to modify @@ -209,10 +252,12 @@ uint8_t *repl = ALT_REPL_PTR(a); uint8_t buf[MAX_PATCH_LEN]; unsigned int total_len = a->orig_len + a->pad_len; + unsigned int feat = a->cpuid & ~ALT_FLAG_NOT; + bool inv = a->cpuid & ALT_FLAG_NOT, replace; BUG_ON(a->repl_len > total_len); BUG_ON(total_len > sizeof(buf)); - BUG_ON(a->cpuid >= NCAPINTS * 32); + BUG_ON(feat >= NCAPINTS * 32); /* * Detect sequences of alt_instr's patching the same origin site, and @@ -235,8 +280,14 @@ continue; } + /* + * Should a replacement be performed? Most replacements have positive + * polarity, but we support negative polarity too. + */ + replace = boot_cpu_has(feat) ^ inv; + /* If there is no replacement to make, see about optimising the nops. */ - if ( !boot_cpu_has(a->cpuid) ) + if ( !replace ) { /* Origin site site already touched? Don't nop anything. */ if ( base->priv ) diff -Nru xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/apic.c xen-4.17.5+72-g01140da4e8/xen/arch/x86/apic.c --- xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/apic.c 2024-11-12 13:09:34.000000000 +0000 +++ xen-4.17.5+72-g01140da4e8/xen/arch/x86/apic.c 2025-11-13 16:58:23.000000000 +0000 @@ -63,7 +63,6 @@ unsigned int apic_lvt0; unsigned int apic_lvt1; unsigned int apic_lvterr; - unsigned int apic_tmict; unsigned int apic_tdcr; unsigned int apic_thmr; } apic_pm_state; @@ -695,7 +694,6 @@ apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0); apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1); apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR); - apic_pm_state.apic_tmict = apic_read(APIC_TMICT); apic_pm_state.apic_tdcr = apic_read(APIC_TDCR); if (maxlvt >= 5) apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); @@ -755,7 +753,6 @@ apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc); apic_write(APIC_LVTT, apic_pm_state.apic_lvtt); apic_write(APIC_TDCR, apic_pm_state.apic_tdcr); - apic_write(APIC_TMICT, apic_pm_state.apic_tmict); apic_write(APIC_ESR, 0); apic_read(APIC_ESR); apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr); diff -Nru xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/arch.mk xen-4.17.5+72-g01140da4e8/xen/arch/x86/arch.mk --- xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/arch.mk 2024-11-12 13:09:34.000000000 +0000 +++ xen-4.17.5+72-g01140da4e8/xen/arch/x86/arch.mk 2025-11-13 16:58:23.000000000 +0000 @@ -50,6 +50,9 @@ CFLAGS-$(CONFIG_CC_IS_CLANG) += -mretpoline-external-thunk endif +# Compile with return thunk support if selected. +CFLAGS-$(CONFIG_RETURN_THUNK) += -mfunction-return=thunk-extern + ifdef CONFIG_XEN_IBT # Force -fno-jump-tables to work around # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104816 diff -Nru xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/bhb-thunk.S xen-4.17.5+72-g01140da4e8/xen/arch/x86/bhb-thunk.S --- xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/bhb-thunk.S 2024-11-12 13:09:34.000000000 +0000 +++ xen-4.17.5+72-g01140da4e8/xen/arch/x86/bhb-thunk.S 2025-11-13 16:58:23.000000000 +0000 @@ -23,7 +23,7 @@ 0: .byte 0xc6, 0xf8, 0 /* xabort $0 */ int3 1: - ret + RET .size clear_bhb_tsx, . - clear_bhb_tsx .type clear_bhb_tsx, @function @@ -52,7 +52,12 @@ * ret * * The CALL/RETs are necessary to prevent the Loop Stream Detector from - * interfering. The alignment is for performance and not safety. + * interfering. + * + * The .balign's are for performance, but they cause the RETs to be in unsafe + * positions with respect to Indirect Target Selection. The .skips are to + * move the RETs into ITS-safe positions, rather than using the slowpath + * through __x86_return_thunk. * * The "short" sequence (5 and 5) is for CPUs prior to Alder Lake / Sapphire * Rapids (i.e. Cores prior to Golden Cove and/or Gracemont). @@ -68,12 +73,14 @@ jmp 5f int3 - .align 64 + .balign 64 + .skip 32 - (.Lr1 - 1f), 0xcc 1: call 2f - ret +.Lr1: ret int3 - .align 64 + .balign 64 + .skip 32 - 18 /* (.Lr2 - 2f) but Clang IAS doesn't like this */, 0xcc 2: ALTERNATIVE "mov $5, %eax", "mov $7, %eax", X86_SPEC_BHB_LOOPS_LONG 3: jmp 4f @@ -85,7 +92,7 @@ sub $1, %ecx jnz 1b - ret +.Lr2: ret 5: /* * The Intel sequence has an LFENCE here. The purpose is to ensure diff -Nru xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/clear_page.S xen-4.17.5+72-g01140da4e8/xen/arch/x86/clear_page.S --- xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/clear_page.S 2024-11-12 13:09:34.000000000 +0000 +++ xen-4.17.5+72-g01140da4e8/xen/arch/x86/clear_page.S 2025-11-13 16:58:23.000000000 +0000 @@ -1,5 +1,6 @@ .file __FILE__ +#include #include ENTRY(clear_page_sse2) @@ -15,4 +16,4 @@ jnz 0b sfence - ret + RET diff -Nru xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/copy_page.S xen-4.17.5+72-g01140da4e8/xen/arch/x86/copy_page.S --- xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/copy_page.S 2024-11-12 13:09:34.000000000 +0000 +++ xen-4.17.5+72-g01140da4e8/xen/arch/x86/copy_page.S 2025-11-13 16:58:23.000000000 +0000 @@ -1,5 +1,6 @@ .file __FILE__ +#include #include #define src_reg %rsi @@ -40,4 +41,4 @@ movnti tmp4_reg, 3*WORD_SIZE(dst_reg) sfence - ret + RET diff -Nru xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/cpu/common.c xen-4.17.5+72-g01140da4e8/xen/arch/x86/cpu/common.c --- xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/cpu/common.c 2024-11-12 13:09:34.000000000 +0000 +++ xen-4.17.5+72-g01140da4e8/xen/arch/x86/cpu/common.c 2025-11-13 16:58:23.000000000 +0000 @@ -469,7 +469,9 @@ if (c->extended_cpuid_level >= 0x80000008) c->x86_capability[FEATURESET_e8b] = cpuid_ebx(0x80000008); if (c->extended_cpuid_level >= 0x80000021) - c->x86_capability[FEATURESET_e21a] = cpuid_eax(0x80000021); + cpuid(0x80000021, + &c->x86_capability[FEATURESET_e21a], &tmp, + &c->x86_capability[FEATURESET_e21c], &tmp); /* Intel-defined flags: level 0x00000007 */ if (c->cpuid_level >= 7) { diff -Nru xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/cpu/intel.c xen-4.17.5+72-g01140da4e8/xen/arch/x86/cpu/intel.c --- xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/cpu/intel.c 2024-11-12 13:09:34.000000000 +0000 +++ xen-4.17.5+72-g01140da4e8/xen/arch/x86/cpu/intel.c 2025-11-13 16:58:23.000000000 +0000 @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -363,7 +364,6 @@ INTEL_FAM6_MODEL(0x25), { } }; -#undef INTEL_FAM6_MODEL /* Serialized by the AP bringup code. */ if ( max_cstate > 1 && (c->apicid & (c->x86_num_siblings - 1)) && @@ -376,11 +376,44 @@ } /* + * APL30: One use of the MONITOR/MWAIT instruction pair is to allow a logical + * processor to wait in a sleep state until a store to the armed address range + * occurs. Due to this erratum, stores to the armed address range may not + * trigger MWAIT to resume execution. + * + * ICX143: Under complex microarchitectural conditions, a monitor that is armed + * with the MWAIT instruction may not be triggered, leading to a processor + * hang. + * + * LNL030: Problem P-cores may not exit power state Core C6 on monitor hit. + * + * Force the sending of an IPI in those cases. + */ +static void __init probe_mwait_errata(void) +{ + static const struct x86_cpu_id __initconst models[] = { + INTEL_FAM6_MODEL(INTEL_FAM6_ATOM_GOLDMONT), /* APL30 */ + INTEL_FAM6_MODEL(INTEL_FAM6_ICELAKE_X), /* ICX143 */ + INTEL_FAM6_MODEL(INTEL_FAM6_LUNARLAKE_M), /* LNL030 */ + { } + }; +#undef INTEL_FAM6_MODEL + + if ( boot_cpu_has(X86_FEATURE_MONITOR) && x86_match_cpu(models) ) + { + printk(XENLOG_WARNING + "Forcing IPI MWAIT wakeup due to CPU erratum\n"); + setup_force_cpu_cap(X86_BUG_MONITOR); + } +} + +/* * P4 Xeon errata 037 workaround. * Hardware prefetcher may cause stale data to be loaded into the cache. * * Xeon 7400 erratum AAI65 (and further newer Xeons) * MONITOR/MWAIT may have excessive false wakeups + * https://web.archive.org/web/20090219054841/http://download.intel.com/design/xeon/specupdt/32033601.pdf */ static void Intel_errata_workarounds(struct cpuinfo_x86 *c) { @@ -398,9 +431,11 @@ if (c->x86 == 6 && cpu_has_clflush && (c->x86_model == 29 || c->x86_model == 46 || c->x86_model == 47)) - __set_bit(X86_FEATURE_CLFLUSH_MONITOR, c->x86_capability); + setup_force_cpu_cap(X86_BUG_CLFLUSH_MONITOR); probe_c3_errata(c); + if (system_state < SYS_STATE_smp_boot) + probe_mwait_errata(); } diff -Nru xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/cpu/microcode/amd-patch-digests.c xen-4.17.5+72-g01140da4e8/xen/arch/x86/cpu/microcode/amd-patch-digests.c --- xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/cpu/microcode/amd-patch-digests.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.17.5+72-g01140da4e8/xen/arch/x86/cpu/microcode/amd-patch-digests.c 2025-11-13 16:58:23.000000000 +0000 @@ -0,0 +1,370 @@ +/* Generated from linux-firmware. */ +{ + .patch_id = 0x0800126f, + .digest = { + 0x2b, 0x5a, 0xf2, 0x9c, 0xdd, 0xd2, 0x7f, 0xec, + 0xec, 0x96, 0x09, 0x57, 0xb0, 0x96, 0x29, 0x8b, + 0x2e, 0x26, 0x91, 0xf0, 0x49, 0x33, 0x42, 0x18, + 0xdd, 0x4b, 0x65, 0x5a, 0xd4, 0x15, 0x3d, 0x33, + }, +}, +{ + .patch_id = 0x0800820d, + .digest = { + 0x68, 0x98, 0x83, 0xcd, 0x22, 0x0d, 0xdd, 0x59, + 0x73, 0x2c, 0x5b, 0x37, 0x1f, 0x84, 0x0e, 0x67, + 0x96, 0x43, 0x83, 0x0c, 0x46, 0x44, 0xab, 0x7c, + 0x7b, 0x65, 0x9e, 0x57, 0xb5, 0x90, 0x4b, 0x0e, + }, +}, +{ + .patch_id = 0x0830107c, + .digest = { + 0x21, 0x64, 0xde, 0xfb, 0x9f, 0x68, 0x96, 0x47, + 0x70, 0x5c, 0xe2, 0x8f, 0x18, 0x52, 0x6a, 0xac, + 0xa4, 0xd2, 0x2e, 0xe0, 0xde, 0x68, 0x66, 0xc3, + 0xeb, 0x1e, 0xd3, 0x3f, 0xbc, 0x51, 0x1d, 0x38, + }, +}, +{ + .patch_id = 0x0860010d, + .digest = { + 0x86, 0xb6, 0x15, 0x83, 0xbc, 0x3b, 0x9c, 0xe0, + 0xb3, 0xef, 0x1d, 0x99, 0x84, 0x35, 0x15, 0xf7, + 0x7c, 0x2a, 0xc6, 0x42, 0xdb, 0x73, 0x07, 0x5c, + 0x7d, 0xc3, 0x02, 0xb5, 0x43, 0x06, 0x5e, 0xf8, + }, +}, +{ + .patch_id = 0x08608108, + .digest = { + 0x14, 0xfe, 0x57, 0x86, 0x49, 0xc8, 0x68, 0xe2, + 0x11, 0xa3, 0xcb, 0x6e, 0xff, 0x6e, 0xd5, 0x38, + 0xfe, 0x89, 0x1a, 0xe0, 0x67, 0xbf, 0xc4, 0xcc, + 0x1b, 0x9f, 0x84, 0x77, 0x2b, 0x9f, 0xaa, 0xbd, + }, +}, +{ + .patch_id = 0x08701034, + .digest = { + 0xc3, 0x14, 0x09, 0xa8, 0x9c, 0x3f, 0x8d, 0x83, + 0x9b, 0x4c, 0xa5, 0xb7, 0x64, 0x8b, 0x91, 0x5d, + 0x85, 0x6a, 0x39, 0x26, 0x1e, 0x14, 0x41, 0xa8, + 0x75, 0xea, 0xa6, 0xf9, 0xc9, 0xd1, 0xea, 0x2b, + }, +}, +{ + .patch_id = 0x08a0000a, + .digest = { + 0x73, 0x31, 0x26, 0x22, 0xd4, 0xf9, 0xee, 0x3c, + 0x07, 0x06, 0xe7, 0xb9, 0xad, 0xd8, 0x72, 0x44, + 0x33, 0x31, 0xaa, 0x7d, 0xc3, 0x67, 0x0e, 0xdb, + 0x47, 0xb5, 0xaa, 0xbc, 0xf5, 0xbb, 0xd9, 0x20, + }, +}, +{ + .patch_id = 0x0a00107a, + .digest = { + 0x5f, 0x92, 0xca, 0xff, 0xc3, 0x59, 0x22, 0x5f, + 0x02, 0xa0, 0x91, 0x3b, 0x4a, 0x45, 0x10, 0xfd, + 0x19, 0xe1, 0x8a, 0x6d, 0x9a, 0x92, 0xc1, 0x3f, + 0x75, 0x78, 0xac, 0x78, 0x03, 0x1d, 0xdb, 0x18, + }, +}, +{ + .patch_id = 0x0a0011d5, + .digest = { + 0xed, 0x69, 0x89, 0xf4, 0xeb, 0x64, 0xc2, 0x13, + 0xe0, 0x51, 0x1f, 0x03, 0x26, 0x52, 0x7d, 0xb7, + 0x93, 0x5d, 0x65, 0xca, 0xb8, 0x12, 0x1d, 0x62, + 0x0d, 0x5b, 0x65, 0x34, 0x69, 0xb2, 0x62, 0x21, + }, +}, +{ + .patch_id = 0x0a0011d7, + .digest = { + 0x35, 0x07, 0xcd, 0x40, 0x94, 0xbc, 0x81, 0x6b, + 0xfc, 0x61, 0x56, 0x1a, 0xe2, 0xdb, 0x96, 0x12, + 0x1c, 0x1c, 0x31, 0xb1, 0x02, 0x6f, 0xe5, 0xd2, + 0xfe, 0x1b, 0x04, 0x03, 0x2c, 0x8f, 0x4c, 0x36, + }, +}, +{ + .patch_id = 0x0a001238, + .digest = { + 0x72, 0xf7, 0x4b, 0x0c, 0x7d, 0x58, 0x65, 0xcc, + 0x00, 0xcc, 0x57, 0x16, 0x68, 0x16, 0xf8, 0x2a, + 0x1b, 0xb3, 0x8b, 0xe1, 0xb6, 0x83, 0x8c, 0x7e, + 0xc0, 0xcd, 0x33, 0xf2, 0x8d, 0xf9, 0xef, 0x59, + }, +}, +{ + .patch_id = 0x0a00123b, + .digest = { + 0xef, 0xa1, 0x1e, 0x71, 0xf1, 0xc3, 0x2c, 0xe2, + 0xc3, 0xef, 0x69, 0x41, 0x7a, 0x54, 0xca, 0xc3, + 0x8f, 0x62, 0x84, 0xee, 0xc2, 0x39, 0xd9, 0x28, + 0x95, 0xa7, 0x12, 0x49, 0x1e, 0x30, 0x71, 0x72, + }, +}, +{ + .patch_id = 0x0a00820c, + .digest = { + 0xa8, 0x0c, 0x81, 0xc0, 0xa6, 0x00, 0xe7, 0xf3, + 0x5f, 0x65, 0xd3, 0xb9, 0x6f, 0xea, 0x93, 0x63, + 0xf1, 0x8c, 0x88, 0x45, 0xd7, 0x82, 0x80, 0xd1, + 0xe1, 0x3b, 0x8d, 0xb2, 0xf8, 0x22, 0x03, 0xe2, + }, +}, +{ + .patch_id = 0x0a00820d, + .digest = { + 0xf9, 0x2a, 0xc0, 0xf4, 0x9e, 0xa4, 0x87, 0xa4, + 0x7d, 0x87, 0x00, 0xfd, 0xab, 0xda, 0x19, 0xca, + 0x26, 0x51, 0x32, 0xc1, 0x57, 0x91, 0xdf, 0xc1, + 0x05, 0xeb, 0x01, 0x7c, 0x5a, 0x95, 0x21, 0xb7, + }, +}, +{ + .patch_id = 0x0a101148, + .digest = { + 0x20, 0xd5, 0x6f, 0x40, 0x4a, 0xf6, 0x48, 0x90, + 0xc2, 0x93, 0x9a, 0xc2, 0xfd, 0xac, 0xef, 0x4f, + 0xfa, 0xc0, 0x3d, 0x92, 0x3c, 0x6d, 0x01, 0x08, + 0xf1, 0x5e, 0xb0, 0xde, 0xb4, 0x98, 0xae, 0xc4, + }, +}, +{ + .patch_id = 0x0a10114c, + .digest = { + 0x9e, 0xb6, 0xa2, 0xd9, 0x87, 0x38, 0xc5, 0x64, + 0xd8, 0x88, 0xfa, 0x78, 0x98, 0xf9, 0x6f, 0x74, + 0x39, 0x90, 0x1b, 0xa5, 0xcf, 0x5e, 0xb4, 0x2a, + 0x02, 0xff, 0xd4, 0x8c, 0x71, 0x8b, 0xe2, 0xc0, + }, +}, +{ + .patch_id = 0x0a101248, + .digest = { + 0xed, 0x3b, 0x95, 0xa6, 0x68, 0xa7, 0x77, 0x3e, + 0xfc, 0x17, 0x26, 0xe2, 0x7b, 0xd5, 0x56, 0x22, + 0x2c, 0x1d, 0xef, 0xeb, 0x56, 0xdd, 0xba, 0x6e, + 0x1b, 0x7d, 0x64, 0x9d, 0x4b, 0x53, 0x13, 0x75, + }, +}, +{ + .patch_id = 0x0a10124c, + .digest = { + 0x29, 0xea, 0xf1, 0x2c, 0xb2, 0xe4, 0xef, 0x90, + 0xa4, 0xcd, 0x1d, 0x86, 0x97, 0x17, 0x61, 0x46, + 0xfc, 0x22, 0xcb, 0x57, 0x75, 0x19, 0xc8, 0xcc, + 0x0c, 0xf5, 0xbc, 0xac, 0x81, 0x9d, 0x9a, 0xd2, + }, +}, +{ + .patch_id = 0x0a108108, + .digest = { + 0xed, 0xc2, 0xec, 0xa1, 0x15, 0xc6, 0x65, 0xe9, + 0xd0, 0xef, 0x39, 0xaa, 0x7f, 0x55, 0x06, 0xc6, + 0xf5, 0xd4, 0x3f, 0x7b, 0x14, 0xd5, 0x60, 0x2c, + 0x28, 0x1e, 0x9c, 0x59, 0x69, 0x99, 0x4d, 0x16, + }, +}, +{ + .patch_id = 0x0a108109, + .digest = { + 0x85, 0xb4, 0xbd, 0x7c, 0x49, 0xa7, 0xbd, 0xfa, + 0x49, 0x36, 0x80, 0x81, 0xc5, 0xb7, 0x39, 0x1b, + 0x9a, 0xaa, 0x50, 0xde, 0x9b, 0xe9, 0x32, 0x35, + 0x42, 0x7e, 0x51, 0x4f, 0x52, 0x2c, 0x28, 0x59, + }, +}, +{ + .patch_id = 0x0a20102d, + .digest = { + 0xf9, 0x6e, 0xf2, 0x32, 0xd3, 0x0f, 0x5f, 0x11, + 0x59, 0xa1, 0xfe, 0xcc, 0xcd, 0x9b, 0x42, 0x89, + 0x8b, 0x89, 0x2f, 0xb5, 0xbb, 0x82, 0xef, 0x23, + 0x8c, 0xe9, 0x19, 0x3e, 0xcc, 0x3f, 0x7b, 0xb4, + }, +}, +{ + .patch_id = 0x0a20102e, + .digest = { + 0xbe, 0x1f, 0x32, 0x04, 0x0d, 0x3c, 0x9c, 0xdd, + 0xe1, 0xa4, 0xbf, 0x76, 0x3a, 0xec, 0xc2, 0xf6, + 0x11, 0x00, 0xa7, 0xaf, 0x0f, 0xe5, 0x02, 0xc5, + 0x54, 0x3a, 0x1f, 0x8c, 0x16, 0xb5, 0xff, 0xbe, + }, +}, +{ + .patch_id = 0x0a201210, + .digest = { + 0xe8, 0x6d, 0x51, 0x6a, 0x8e, 0x72, 0xf3, 0xfe, + 0x6e, 0x16, 0xbc, 0x62, 0x59, 0x40, 0x17, 0xe9, + 0x6d, 0x3d, 0x0e, 0x6b, 0xa7, 0xac, 0xe3, 0x68, + 0xf7, 0x55, 0xf0, 0x13, 0xbb, 0x22, 0xf6, 0x41, + }, +}, +{ + .patch_id = 0x0a201211, + .digest = { + 0x69, 0xa1, 0x17, 0xec, 0xd0, 0xf6, 0x6c, 0x95, + 0xe2, 0x1e, 0xc5, 0x59, 0x1a, 0x52, 0x0a, 0x27, + 0xc4, 0xed, 0xd5, 0x59, 0x1f, 0xbf, 0x00, 0xff, + 0x08, 0x88, 0xb5, 0xe1, 0x12, 0xb6, 0xcc, 0x27, + }, +}, +{ + .patch_id = 0x0a404107, + .digest = { + 0xbb, 0x04, 0x4e, 0x47, 0xdd, 0x5e, 0x26, 0x45, + 0x1a, 0xc9, 0x56, 0x24, 0xa4, 0x4c, 0x82, 0xb0, + 0x8b, 0x0d, 0x9f, 0xf9, 0x3a, 0xdf, 0xc6, 0x81, + 0x13, 0xbc, 0xc5, 0x25, 0xe4, 0xc5, 0xc3, 0x99, + }, +}, +{ + .patch_id = 0x0a404108, + .digest = { + 0x69, 0x67, 0x43, 0x06, 0xf8, 0x0c, 0x62, 0xdc, + 0xa4, 0x21, 0x30, 0x4f, 0x0f, 0x21, 0x2c, 0xcb, + 0xcc, 0x37, 0xf1, 0x1c, 0xc3, 0xf8, 0x2f, 0x19, + 0xdf, 0x53, 0x53, 0x46, 0xb1, 0x15, 0xea, 0x00, + }, +}, +{ + .patch_id = 0x0a500011, + .digest = { + 0x23, 0x3d, 0x70, 0x7d, 0x03, 0xc3, 0xc4, 0xf4, + 0x2b, 0x82, 0xc6, 0x05, 0xda, 0x80, 0x0a, 0xf1, + 0xd7, 0x5b, 0x65, 0x3a, 0x7d, 0xab, 0xdf, 0xa2, + 0x11, 0x5e, 0x96, 0x7e, 0x71, 0xe9, 0xfc, 0x74, + }, +}, +{ + .patch_id = 0x0a500012, + .digest = { + 0xeb, 0x74, 0x0d, 0x47, 0xa1, 0x8e, 0x09, 0xe4, + 0x93, 0x4c, 0xad, 0x03, 0x32, 0x4c, 0x38, 0x16, + 0x10, 0x39, 0xdd, 0x06, 0xaa, 0xce, 0xd6, 0x0f, + 0x62, 0x83, 0x9d, 0x8e, 0x64, 0x55, 0xbe, 0x63, + }, +}, +{ + .patch_id = 0x0a601209, + .digest = { + 0x66, 0x48, 0xd4, 0x09, 0x05, 0xcb, 0x29, 0x32, + 0x66, 0xb7, 0x9a, 0x76, 0xcd, 0x11, 0xf3, 0x30, + 0x15, 0x86, 0xcc, 0x5d, 0x97, 0x0f, 0xc0, 0x46, + 0xe8, 0x73, 0xe2, 0xd6, 0xdb, 0xd2, 0x77, 0x1d, + }, +}, +{ + .patch_id = 0x0a60120a, + .digest = { + 0x0c, 0x8b, 0x3d, 0xfd, 0x52, 0x52, 0x85, 0x7d, + 0x20, 0x3a, 0xe1, 0x7e, 0xa4, 0x21, 0x3b, 0x7b, + 0x17, 0x86, 0xae, 0xac, 0x13, 0xb8, 0x63, 0x9d, + 0x06, 0x01, 0xd0, 0xa0, 0x51, 0x9a, 0x91, 0x2c, + }, +}, +{ + .patch_id = 0x0a704107, + .digest = { + 0xf3, 0xc6, 0x58, 0x26, 0xee, 0xac, 0x3f, 0xd6, + 0xce, 0xa1, 0x72, 0x47, 0x3b, 0xba, 0x2b, 0x93, + 0x2a, 0xad, 0x8e, 0x6b, 0xea, 0x9b, 0xb7, 0xc2, + 0x64, 0x39, 0x71, 0x8c, 0xce, 0xe7, 0x41, 0x39, + }, +}, +{ + .patch_id = 0x0a704108, + .digest = { + 0xd7, 0x55, 0x15, 0x2b, 0xfe, 0xc4, 0xbc, 0x93, + 0xec, 0x91, 0xa0, 0xae, 0x45, 0xb7, 0xc3, 0x98, + 0x4e, 0xff, 0x61, 0x77, 0x88, 0xc2, 0x70, 0x49, + 0xe0, 0x3a, 0x1d, 0x84, 0x38, 0x52, 0xbf, 0x5a, + }, +}, +{ + .patch_id = 0x0a705206, + .digest = { + 0x8d, 0xc0, 0x76, 0xbd, 0x58, 0x9f, 0x8f, 0xa4, + 0x12, 0x9d, 0x21, 0xfb, 0x48, 0x21, 0xbc, 0xe7, + 0x67, 0x6f, 0x04, 0x18, 0xae, 0x20, 0x87, 0x4b, + 0x03, 0x35, 0xe9, 0xbe, 0xfb, 0x06, 0xdf, 0xfc, + }, +}, +{ + .patch_id = 0x0a705208, + .digest = { + 0x30, 0x1d, 0x55, 0x24, 0xbc, 0x6b, 0x5a, 0x19, + 0x0c, 0x7d, 0x1d, 0x74, 0xaa, 0xd1, 0xeb, 0xd2, + 0x16, 0x62, 0xf7, 0x5b, 0xe1, 0x1f, 0x18, 0x11, + 0x5c, 0xf0, 0x94, 0x90, 0x26, 0xec, 0x69, 0xff, + }, +}, +{ + .patch_id = 0x0a708007, + .digest = { + 0x6b, 0x76, 0xcc, 0x78, 0xc5, 0x8a, 0xa3, 0xe3, + 0x32, 0x2d, 0x79, 0xe4, 0xc3, 0x80, 0xdb, 0xb2, + 0x07, 0xaa, 0x3a, 0xe0, 0x57, 0x13, 0x72, 0x80, + 0xdf, 0x92, 0x73, 0x84, 0x87, 0x3c, 0x73, 0x93, + }, +}, +{ + .patch_id = 0x0a708008, + .digest = { + 0x08, 0x6e, 0xf0, 0x22, 0x4b, 0x8e, 0xc4, 0x46, + 0x58, 0x34, 0xe6, 0x47, 0xa2, 0x28, 0xfd, 0xab, + 0x22, 0x3d, 0xdd, 0xd8, 0x52, 0x9e, 0x1d, 0x16, + 0xfa, 0x01, 0x68, 0x14, 0x79, 0x3e, 0xe8, 0x6b, + }, +}, +{ + .patch_id = 0x0a70c005, + .digest = { + 0x88, 0x5d, 0xfb, 0x79, 0x64, 0xd8, 0x46, 0x3b, + 0x4a, 0x83, 0x8e, 0x77, 0x7e, 0xcf, 0xb3, 0x0f, + 0x1f, 0x1f, 0xf1, 0x97, 0xeb, 0xfe, 0x56, 0x55, + 0xee, 0x49, 0xac, 0xe1, 0x8b, 0x13, 0xc5, 0x13, + }, +}, +{ + .patch_id = 0x0a70c008, + .digest = { + 0x0f, 0xdb, 0x37, 0xa1, 0x10, 0xaf, 0xd4, 0x21, + 0x94, 0x0d, 0xa4, 0xa2, 0xe9, 0x86, 0x6c, 0x0e, + 0x85, 0x7c, 0x36, 0x30, 0xa3, 0x3a, 0x78, 0x66, + 0x18, 0x10, 0x60, 0x0d, 0x78, 0x3d, 0x44, 0xd0, + }, +}, +{ + .patch_id = 0x0aa00116, + .digest = { + 0xe8, 0x4c, 0x2c, 0x88, 0xa1, 0xac, 0x24, 0x63, + 0x65, 0xe5, 0xaa, 0x2d, 0x16, 0xa9, 0xc3, 0xf5, + 0xfe, 0x1d, 0x5e, 0x65, 0xc7, 0xaa, 0x92, 0x4d, + 0x91, 0xee, 0x76, 0xbb, 0x4c, 0x66, 0x78, 0xc9, + }, +}, +{ + .patch_id = 0x0aa00215, + .digest = { + 0x55, 0xd3, 0x28, 0xcb, 0x87, 0xa9, 0x32, 0xe9, + 0x4e, 0x85, 0x4b, 0x7c, 0x6b, 0xd5, 0x7c, 0xd4, + 0x1b, 0x51, 0x71, 0x3a, 0x0e, 0x0b, 0xdc, 0x9b, + 0x68, 0x2f, 0x46, 0xee, 0xfe, 0xc6, 0x6d, 0xef, + }, +}, +{ + .patch_id = 0x0aa00216, + .digest = { + 0x79, 0xfb, 0x5b, 0x9f, 0xb6, 0xe6, 0xa8, 0xf5, + 0x4e, 0x7c, 0x4f, 0x8e, 0x1d, 0xad, 0xd0, 0x08, + 0xc2, 0x43, 0x7c, 0x8b, 0xe6, 0xdb, 0xd0, 0xd2, + 0xe8, 0x39, 0x26, 0xc1, 0xe5, 0x5a, 0x48, 0xf1, + }, +}, diff -Nru xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/cpu/microcode/amd.c xen-4.17.5+72-g01140da4e8/xen/arch/x86/cpu/microcode/amd.c --- xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/cpu/microcode/amd.c 2024-11-12 13:09:34.000000000 +0000 +++ xen-4.17.5+72-g01140da4e8/xen/arch/x86/cpu/microcode/amd.c 2025-11-13 16:58:23.000000000 +0000 @@ -16,7 +16,9 @@ #include #include +#include #include /* TODO: Fix asm/tlbflush.h breakage */ +#include #include @@ -40,7 +42,10 @@ uint8_t mc_patch_data_id[2]; uint8_t mc_patch_data_len; uint8_t init_flag; - uint32_t mc_patch_data_checksum; + union { + uint32_t checksum; /* Fam12h and earlier */ + uint32_t min_rev; /* Zen3-5, post Entrysign */ + }; uint32_t nb_dev_id; uint32_t sb_dev_id; uint16_t processor_rev_id; @@ -91,6 +96,64 @@ uint16_t id; } equiv __read_mostly; +static const struct patch_digest { + uint32_t patch_id; + uint8_t digest[SHA2_256_DIGEST_SIZE]; +} patch_digests[] = { +#include "amd-patch-digests.c" +}; +static bool __ro_after_init entrysign_mitigiated_in_firmware; + +static int cf_check cmp_patch_id(const void *key, const void *elem) +{ + const struct patch_digest *pd = elem; + uint32_t patch_id = *(uint32_t *)key; + + if ( patch_id == pd->patch_id ) + return 0; + else if ( patch_id < pd->patch_id ) + return -1; + return 1; +} + +static bool check_digest(const struct container_microcode *mc) +{ + const struct microcode_patch *patch = mc->patch; + const struct patch_digest *pd; + uint8_t digest[SHA2_256_DIGEST_SIZE]; + + /* + * Zen1 thru Zen5 CPUs are known to use a weak signature algorithm on + * microcode updates. If this has not been mitigated in firmware, check + * the digest of the patch against a list of known provenance. + */ + if ( boot_cpu_data.x86 < 0x17 || boot_cpu_data.x86 > 0x1a || + entrysign_mitigiated_in_firmware || !opt_digest_check ) + return true; + + pd = bsearch(&patch->patch_id, patch_digests, ARRAY_SIZE(patch_digests), + sizeof(struct patch_digest), cmp_patch_id); + if ( !pd ) + { + printk(XENLOG_WARNING "No digest found for patch_id %08x\n", + patch->patch_id); + return false; + } + + sha2_256_digest(digest, patch, mc->len); + + if ( memcmp(digest, pd->digest, sizeof(digest)) ) + { + printk(XENLOG_WARNING "Patch %08x SHA256 mismatch:\n" + " expected %" STR(SHA2_256_DIGEST_SIZE) "phN\n" + " got %" STR(SHA2_256_DIGEST_SIZE) "phN\n", + patch->patch_id, pd->digest, digest); + return false; + } + + return true; +} + static void cf_check collect_cpu_info(void) { struct cpu_signature *csig = &this_cpu(cpu_sig); @@ -204,6 +267,42 @@ return compare_revisions(old->patch_id, new->patch_id); } +/* + * Check whether this patch has a minimum revision given, and whether the + * condition is satisfied. + * + * In linux-firmware for CPUs suffering from the Entrysign vulnerability, + * ucodes signed with the updated signature algorithm have reused the checksum + * field as a min-revision field. From public archives, the checksum field + * appears to have been unused since Fam12h. + * + * Returns false if there is a min revision given, and it suggests that that + * the patch cannot be loaded on the current system. True otherwise. + */ +static bool check_min_rev(const struct microcode_patch *patch) +{ + ASSERT(microcode_fits(patch)); + + if ( patch->processor_rev_id < 0xa000 || /* pre Zen3? */ + patch->min_rev == 0 ) /* No min rev specified */ + return true; + + /* + * Sanity check, as this is a reused field. If this is a true + * min_revision field, it will differ only in the bottom byte from the + * patch_id. Otherwise, it's probably a checksum. + */ + if ( (patch->patch_id ^ patch->min_rev) & ~0xff ) + { + printk(XENLOG_WARNING + "microcode: patch %#x has unexpected min_rev %#x\n", + patch->patch_id, patch->min_rev); + return true; + } + + return this_cpu(cpu_sig).rev >= patch->min_rev; +} + static enum microcode_match_result cf_check compare_patch( const struct microcode_patch *new, const struct microcode_patch *old) { @@ -240,6 +339,14 @@ return -ENXIO; } + if ( !check_min_rev(patch) ) + { + printk(XENLOG_ERR + "microcode: CPU%u current rev %#x below patch min_rev %#x\n", + cpu, sig->rev, patch->min_rev); + return -ENXIO; + } + hw_err = wrmsr_safe(MSR_AMD_PATCHLOADER, (unsigned long)patch); /* get patch id after patching */ @@ -391,7 +498,8 @@ * one with higher revision. */ if ( (microcode_fits(mc->patch) != MIS_UCODE) && - (!saved || (compare_header(mc->patch, saved) == NEW_UCODE)) ) + (!saved || (compare_header(mc->patch, saved) == NEW_UCODE)) && + check_digest(mc) ) { saved = mc->patch; saved_size = mc->len; @@ -438,3 +546,82 @@ .apply_microcode = apply_microcode, .compare_patch = compare_patch, }; + +/* + * The Entrysign vulnerability affects all Zen1 thru Zen5 CPUs. Firmware + * fixes were produced from Nov 2024. Zen3 thru Zen5 can continue to take + * OS-loadable microcode updates using a new signature scheme, as long as + * firmware has been updated first. + */ +void __init amd_check_entrysign(void) +{ + unsigned int curr_rev; + uint8_t fixed_rev; + + if ( boot_cpu_data.x86_vendor != X86_VENDOR_AMD || + boot_cpu_data.x86 < 0x17 || + boot_cpu_data.x86 > 0x1a ) + return; + + /* + * Table taken from Linux, which is the only known source of information + * about client revisions. Note, Linux expresses "last-vulnerable-rev" + * while Xen wants "first-fixed-rev". + */ + curr_rev = this_cpu(cpu_sig).rev; + switch ( curr_rev >> 8 ) + { + case 0x080012: fixed_rev = 0x78; break; + case 0x080082: fixed_rev = 0x10; break; + case 0x083010: fixed_rev = 0x7d; break; + case 0x086001: fixed_rev = 0x0f; break; + case 0x086081: fixed_rev = 0x09; break; + case 0x087010: fixed_rev = 0x35; break; + case 0x08a000: fixed_rev = 0x0b; break; + case 0x0a0010: fixed_rev = 0x7b; break; + case 0x0a0011: fixed_rev = 0xdb; break; + case 0x0a0012: fixed_rev = 0x44; break; + case 0x0a0082: fixed_rev = 0x0f; break; + case 0x0a1011: fixed_rev = 0x54; break; + case 0x0a1012: fixed_rev = 0x4f; break; + case 0x0a1081: fixed_rev = 0x0a; break; + case 0x0a2010: fixed_rev = 0x30; break; + case 0x0a2012: fixed_rev = 0x13; break; + case 0x0a4041: fixed_rev = 0x0a; break; + case 0x0a5000: fixed_rev = 0x14; break; + case 0x0a6012: fixed_rev = 0x0b; break; + case 0x0a7041: fixed_rev = 0x0a; break; + case 0x0a7052: fixed_rev = 0x09; break; + case 0x0a7080: fixed_rev = 0x0a; break; + case 0x0a70c0: fixed_rev = 0x0a; break; + case 0x0aa001: fixed_rev = 0x17; break; + case 0x0aa002: fixed_rev = 0x19; break; + case 0x0b0021: fixed_rev = 0x47; break; + case 0x0b1010: fixed_rev = 0x47; break; + case 0x0b2040: fixed_rev = 0x32; break; + case 0x0b4040: fixed_rev = 0x32; break; + case 0x0b6000: fixed_rev = 0x32; break; + case 0x0b7000: fixed_rev = 0x32; break; + default: + printk(XENLOG_WARNING + "Unrecognised CPU %02x-%02x-%02x ucode 0x%08x, assuming vulnerable to Entrysign\n", + boot_cpu_data.x86, boot_cpu_data.x86_model, + boot_cpu_data.x86_mask, curr_rev); + return; + } + + /* + * This check is best-effort. If the platform looks to be out of date, it + * probably is. If the platform looks to be fixed, it either genuinely + * is, or malware has gotten in before Xen booted and all bets are off. + */ + if ( (uint8_t)curr_rev >= fixed_rev ) + { + entrysign_mitigiated_in_firmware = true; + return; + } + + printk(XENLOG_WARNING + "WARNING: Platform vulnerable to Entrysign (SB-7033, CVE-2024-36347) - firmware update required\n"); + add_taint(TAINT_CPU_OUT_OF_SPEC); +} diff -Nru xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/cpu/microcode/core.c xen-4.17.5+72-g01140da4e8/xen/arch/x86/cpu/microcode/core.c --- xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/cpu/microcode/core.c 2024-11-12 13:09:34.000000000 +0000 +++ xen-4.17.5+72-g01140da4e8/xen/arch/x86/cpu/microcode/core.c 2025-11-13 16:58:23.000000000 +0000 @@ -97,6 +97,7 @@ static bool ucode_in_nmi = true; bool __read_mostly opt_ucode_allow_same; +bool __ro_after_init opt_digest_check = true; /* Protected by microcode_mutex */ static struct microcode_patch *microcode_cache; @@ -126,6 +127,8 @@ ucode_in_nmi = val; else if ( (val = parse_boolean("allow-same", s, ss)) >= 0 ) opt_ucode_allow_same = val; + else if ( (val = parse_boolean("digest-check", s, ss)) >= 0 ) + opt_digest_check = val; else if ( !ucode_mod_forced ) /* Not forced by EFI */ { if ( (val = parse_boolean("scan", s, ss)) >= 0 ) @@ -498,10 +501,10 @@ atomic_inc(&cpu_updated); atomic_inc(&cpu_out); - if ( ret == -EIO ) + if ( ret ) { printk(XENLOG_ERR - "Late loading aborted: CPU%u failed to update ucode\n", cpu); + "Late loading aborted: CPU%u failed to update ucode: %d\n", cpu, ret); goto out; } @@ -801,6 +804,18 @@ switch ( c->x86_vendor ) { case X86_VENDOR_AMD: + /* + * The Entrysign vulnerability (SB-7033, CVE-2024-36347) affects + * Zen1-5 CPUs. Taint Xen if digest checking is turned off. + */ + if ( boot_cpu_data.x86 >= 0x17 && boot_cpu_data.x86 <= 0x1a && + !opt_digest_check ) + { + printk(XENLOG_WARNING + "Microcode patch additional digest checks disabled\n"); + add_taint(TAINT_CPU_OUT_OF_SPEC); + } + if ( c->x86 >= 0x10 ) ucode_ops = amd_ucode_ops; break; @@ -821,6 +836,8 @@ printk(XENLOG_INFO "BSP microcode revision: 0x%08x\n", this_cpu(cpu_sig).rev); + amd_check_entrysign(); + if ( ucode_mod.mod_end || ucode_blob.size ) rc = early_microcode_update_cpu(); diff -Nru xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/cpu/microcode/private.h xen-4.17.5+72-g01140da4e8/xen/arch/x86/cpu/microcode/private.h --- xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/cpu/microcode/private.h 2024-11-12 13:09:34.000000000 +0000 +++ xen-4.17.5+72-g01140da4e8/xen/arch/x86/cpu/microcode/private.h 2025-11-13 16:58:23.000000000 +0000 @@ -54,6 +54,10 @@ const struct microcode_patch *new, const struct microcode_patch *old); }; +extern bool opt_digest_check; + +void amd_check_entrysign(void); + extern const struct microcode_ops amd_ucode_ops, intel_ucode_ops; #endif /* ASM_X86_MICROCODE_PRIVATE_H */ diff -Nru xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/cpu/mwait-idle.c xen-4.17.5+72-g01140da4e8/xen/arch/x86/cpu/mwait-idle.c --- xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/cpu/mwait-idle.c 2024-11-12 13:09:34.000000000 +0000 +++ xen-4.17.5+72-g01140da4e8/xen/arch/x86/cpu/mwait-idle.c 2025-11-13 16:58:23.000000000 +0000 @@ -902,7 +902,6 @@ static void cf_check mwait_idle(void) { unsigned int cpu = smp_processor_id(); - struct cpu_info *info = get_cpu_info(); struct acpi_processor_power *power = processor_powers[cpu]; struct acpi_processor_cx *cx = NULL; unsigned int next_state; @@ -929,6 +928,8 @@ pm_idle_save(); else { + struct cpu_info *info = get_cpu_info(); + spec_ctrl_enter_idle(info); safe_halt(); spec_ctrl_exit_idle(info); @@ -955,11 +956,6 @@ if ((cx->type >= 3) && errata_c6_workaround()) cx = power->safe_state; - if (cx->ibrs_disable) { - ASSERT(!cx->irq_enable_early); - spec_ctrl_enter_idle(info); - } - #if 0 /* XXX Can we/do we need to do something similar on Xen? */ /* * leave_mm() to avoid costly and often unnecessary wakeups @@ -977,12 +973,8 @@ update_last_cx_stat(power, cx, before); - if (cx->irq_enable_early) - local_irq_enable(); - - mwait_idle_with_hints(cx->address, MWAIT_ECX_INTERRUPT_BREAK); - - local_irq_disable(); + mwait_idle_with_hints(cx->address, + cx->irq_enable_early ? 0 : MWAIT_ECX_INTERRUPT_BREAK); after = alternative_call(cpuidle_get_tick); @@ -991,10 +983,6 @@ /* Now back in C0. */ update_idle_stats(power, cx, before, after); - - if (cx->ibrs_disable) - spec_ctrl_exit_idle(info); - local_irq_enable(); TRACE_6D(TRC_PM_IDLE_EXIT, cx->type, after, diff -Nru xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/cpu-policy.c xen-4.17.5+72-g01140da4e8/xen/arch/x86/cpu-policy.c --- xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/cpu-policy.c 2024-11-12 13:09:34.000000000 +0000 +++ xen-4.17.5+72-g01140da4e8/xen/arch/x86/cpu-policy.c 2025-11-13 16:58:23.000000000 +0000 @@ -340,6 +340,8 @@ p->extd.raw[0x1e] = EMPTY_LEAF; /* TopoExt APIC ID/Core/Node */ p->extd.raw[0x1f] = EMPTY_LEAF; /* SEV */ p->extd.raw[0x20] = EMPTY_LEAF; /* Platform QoS */ + p->extd.raw[0x21].b = 0; + p->extd.raw[0x21].d = 0; break; } } @@ -411,8 +413,9 @@ static void __init guest_common_max_feature_adjustments(uint32_t *fs) { - if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL ) + switch ( boot_cpu_data.x86_vendor ) { + case X86_VENDOR_INTEL: /* * MSR_ARCH_CAPS is just feature data, and we can offer it to guests * unconditionally, although limit it to Intel systems as it is highly @@ -457,6 +460,33 @@ boot_cpu_data.x86_model == INTEL_FAM6_SKYLAKE_X && raw_cpu_policy.feat.clwb ) __set_bit(X86_FEATURE_CLWB, fs); + + /* + * To mitigate Native-BHI, one option is to use a TSX Abort on capable + * systems. This is safe even if RTM has been disabled for other + * reasons via MSR_TSX_{CTRL,FORCE_ABORT}. However, a guest kernel + * doesn't get to know this type of information. + * + * Therefore the meaning of RTM_ALWAYS_ABORT has been adjusted, to + * instead mean "XBEGIN won't fault". This is enough for a guest + * kernel to make an informed choice WRT mitigating Native-BHI. + * + * If RTM-capable, we can run a VM which has seen RTM_ALWAYS_ABORT. + */ + if ( test_bit(X86_FEATURE_RTM, fs) ) + __set_bit(X86_FEATURE_RTM_ALWAYS_ABORT, fs); + break; + + case X86_VENDOR_AMD: + /* + * This bit indicates that the VERW instruction may have gained + * scrubbing side effects. With pooling, it means "you might migrate + * somewhere where scrubbing is necessary", and may need exposing on + * unaffected hardware. This is fine, because the VERW instruction + * has been around since the 286. + */ + __set_bit(X86_FEATURE_VERW_CLEAR, fs); + break; } /* @@ -468,27 +498,13 @@ */ __set_bit(X86_FEATURE_HTT, fs); __set_bit(X86_FEATURE_CMP_LEGACY, fs); - - /* - * To mitigate Native-BHI, one option is to use a TSX Abort on capable - * systems. This is safe even if RTM has been disabled for other reasons - * via MSR_TSX_{CTRL,FORCE_ABORT}. However, a guest kernel doesn't get to - * know this type of information. - * - * Therefore the meaning of RTM_ALWAYS_ABORT has been adjusted, to instead - * mean "XBEGIN won't fault". This is enough for a guest kernel to make - * an informed choice WRT mitigating Native-BHI. - * - * If RTM-capable, we can run a VM which has seen RTM_ALWAYS_ABORT. - */ - if ( test_bit(X86_FEATURE_RTM, fs) ) - __set_bit(X86_FEATURE_RTM_ALWAYS_ABORT, fs); } static void __init guest_common_default_feature_adjustments(uint32_t *fs) { - if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL ) + switch ( boot_cpu_data.x86_vendor ) { + case X86_VENDOR_INTEL: /* * IvyBridge client parts suffer from leakage of RDRAND data due to SRBDS * (XSA-320 / CVE-2020-0543), and won't be receiving microcode to @@ -511,17 +527,14 @@ * reasons, so reset the default policy back to the host values in * case we're unaffected. */ - __clear_bit(X86_FEATURE_MD_CLEAR, fs); - if ( cpu_has_md_clear ) - __set_bit(X86_FEATURE_MD_CLEAR, fs); - - __clear_bit(X86_FEATURE_FB_CLEAR, fs); - if ( cpu_has_fb_clear ) - __set_bit(X86_FEATURE_FB_CLEAR, fs); - - __clear_bit(X86_FEATURE_RFDS_CLEAR, fs); - if ( cpu_has_rfds_clear ) - __set_bit(X86_FEATURE_RFDS_CLEAR, fs); + if ( !cpu_has_md_clear ) + __clear_bit(X86_FEATURE_MD_CLEAR, fs); + + if ( !cpu_has_fb_clear ) + __clear_bit(X86_FEATURE_FB_CLEAR, fs); + + if ( !cpu_has_rfds_clear ) + __clear_bit(X86_FEATURE_RFDS_CLEAR, fs); /* * The Gather Data Sampling microcode mitigation (August 2023) has an @@ -535,34 +548,45 @@ boot_cpu_data.x86_model == INTEL_FAM6_SKYLAKE_X && raw_cpu_policy.feat.clwb ) __clear_bit(X86_FEATURE_CLWB, fs); + + /* + * On certain hardware, speculative or errata workarounds can result + * in TSX being placed in "force-abort" mode, where it doesn't + * actually function as expected, but is technically compatible with + * the ISA. + * + * Do not advertise RTM to guests by default if it won't actually + * work. Instead, advertise RTM_ALWAYS_ABORT indicating that TSX + * Aborts are safe to use, e.g. for mitigating Native-BHI. + */ + if ( rtm_disabled ) + { + __clear_bit(X86_FEATURE_RTM, fs); + __set_bit(X86_FEATURE_RTM_ALWAYS_ABORT, fs); + } + break; + + case X86_VENDOR_AMD: + /* + * This bit indicate that the VERW instruction may have gained + * scrubbing side effects. The max policy has it set for migration + * reasons, so reset the default policy back to the host value in case + * we're unaffected. + */ + if ( !cpu_has_verw_clear ) + __clear_bit(X86_FEATURE_VERW_CLEAR, fs); + break; } /* * Topology information is at the toolstack's discretion so these are * unconditionally set in max, but pick a default which matches the host. */ - __clear_bit(X86_FEATURE_HTT, fs); - if ( cpu_has_htt ) - __set_bit(X86_FEATURE_HTT, fs); - - __clear_bit(X86_FEATURE_CMP_LEGACY, fs); - if ( cpu_has_cmp_legacy ) - __set_bit(X86_FEATURE_CMP_LEGACY, fs); + if ( !cpu_has_htt ) + __clear_bit(X86_FEATURE_HTT, fs); - /* - * On certain hardware, speculative or errata workarounds can result in - * TSX being placed in "force-abort" mode, where it doesn't actually - * function as expected, but is technically compatible with the ISA. - * - * Do not advertise RTM to guests by default if it won't actually work. - * Instead, advertise RTM_ALWAYS_ABORT indicating that TSX Aborts are safe - * to use, e.g. for mitigating Native-BHI. - */ - if ( rtm_disabled ) - { - __clear_bit(X86_FEATURE_RTM, fs); - __set_bit(X86_FEATURE_RTM_ALWAYS_ABORT, fs); - } + if ( !cpu_has_cmp_legacy ) + __clear_bit(X86_FEATURE_CMP_LEGACY, fs); } static void __init guest_common_feature_adjustments(uint32_t *fs) diff -Nru xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/efi/check.c xen-4.17.5+72-g01140da4e8/xen/arch/x86/efi/check.c --- xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/efi/check.c 2024-11-12 13:09:34.000000000 +0000 +++ xen-4.17.5+72-g01140da4e8/xen/arch/x86/efi/check.c 2025-11-13 16:58:23.000000000 +0000 @@ -3,6 +3,9 @@ return i; } +/* In case -mfunction-return is in use. */ +void __x86_return_thunk(void) {}; + /* * Populate an array with "addresses" of relocatable and absolute values. * This is to probe ld for (a) emitting base relocations at all and (b) not diff -Nru xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/extable.c xen-4.17.5+72-g01140da4e8/xen/arch/x86/extable.c --- xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/extable.c 2024-11-12 13:09:34.000000000 +0000 +++ xen-4.17.5+72-g01140da4e8/xen/arch/x86/extable.c 2025-11-13 16:58:23.000000000 +0000 @@ -135,20 +135,20 @@ int __init cf_check stub_selftest(void) { static const struct { - uint8_t opc[8]; + uint8_t opc[7]; uint64_t rax; union stub_exception_token res; } tests[] __initconst = { #define endbr64 0xf3, 0x0f, 0x1e, 0xfa - { .opc = { endbr64, 0x0f, 0xb9, 0xc3, 0xc3 }, /* ud1 */ + { .opc = { endbr64, 0x0f, 0xb9, 0x90 }, /* ud1 */ .res.fields.trapnr = TRAP_invalid_op }, - { .opc = { endbr64, 0x90, 0x02, 0x00, 0xc3 }, /* nop; add (%rax),%al */ + { .opc = { endbr64, 0x90, 0x02, 0x00 }, /* nop; add (%rax),%al */ .rax = 0x0123456789abcdef, .res.fields.trapnr = TRAP_gp_fault }, - { .opc = { endbr64, 0x02, 0x04, 0x04, 0xc3 }, /* add (%rsp,%rax),%al */ + { .opc = { endbr64, 0x02, 0x04, 0x04 }, /* add (%rsp,%rax),%al */ .rax = 0xfedcba9876543210, .res.fields.trapnr = TRAP_stack_error }, - { .opc = { endbr64, 0xcc, 0xc3, 0xc3, 0xc3 }, /* int3 */ + { .opc = { endbr64, 0xcc, 0x90, 0x90 }, /* int3 */ .res.fields.trapnr = TRAP_int3 }, #undef endbr64 }; @@ -167,6 +167,7 @@ memset(ptr, 0xcc, STUB_BUF_SIZE / 2); memcpy(ptr, tests[i].opc, ARRAY_SIZE(tests[i].opc)); + place_ret(ptr + ARRAY_SIZE(tests[i].opc)); unmap_domain_page(ptr); asm volatile ( "INDIRECT_CALL %[stb]\n" diff -Nru xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/guest/xen/Makefile xen-4.17.5+72-g01140da4e8/xen/arch/x86/guest/xen/Makefile --- xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/guest/xen/Makefile 2024-11-12 13:09:34.000000000 +0000 +++ xen-4.17.5+72-g01140da4e8/xen/arch/x86/guest/xen/Makefile 2025-11-13 16:58:23.000000000 +0000 @@ -1,4 +1,4 @@ -obj-y += hypercall_page.o +obj-bin-y += hypercall.init.o obj-y += xen.o obj-bin-$(CONFIG_PVH_GUEST) += pvh-boot.init.o diff -Nru xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/guest/xen/hypercall.S xen-4.17.5+72-g01140da4e8/xen/arch/x86/guest/xen/hypercall.S --- xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/guest/xen/hypercall.S 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.17.5+72-g01140da4e8/xen/arch/x86/guest/xen/hypercall.S 2025-11-13 16:58:23.000000000 +0000 @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include + + .section .init.text, "ax", @progbits + + /* + * Used during early boot, before alternatives have run and inlined + * the appropriate instruction. Called using the hypercall ABI. + */ +ENTRY(early_hypercall) + cmpb $0, early_hypercall_insn(%rip) + jl .L_setup + je 1f + + vmmcall + ret + +1: vmcall + ret + +.L_setup: + /* + * When setting up the first time around, all registers need + * preserving. Save the non-callee-saved ones. + */ + push %r11 + push %r10 + push %r9 + push %r8 + push %rdi + push %rsi + push %rdx + push %rcx + push %rax + + call early_hypercall_setup + + pop %rax + pop %rcx + pop %rdx + pop %rsi + pop %rdi + pop %r8 + pop %r9 + pop %r10 + pop %r11 + + jmp early_hypercall + + .type early_hypercall, @function + .size early_hypercall, . - early_hypercall diff -Nru xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/guest/xen/hypercall_page.S xen-4.17.5+72-g01140da4e8/xen/arch/x86/guest/xen/hypercall_page.S --- xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/guest/xen/hypercall_page.S 2024-11-12 13:09:34.000000000 +0000 +++ xen-4.17.5+72-g01140da4e8/xen/arch/x86/guest/xen/hypercall_page.S 1970-01-01 00:00:00.000000000 +0000 @@ -1,78 +0,0 @@ -#include -#include -#include - - .section ".text.page_aligned", "ax", @progbits - .p2align PAGE_SHIFT - -GLOBAL(hypercall_page) - /* Poisoned with `ret` for safety before hypercalls are set up. */ - .fill PAGE_SIZE, 1, 0xc3 - .type hypercall_page, STT_OBJECT - .size hypercall_page, PAGE_SIZE - -/* - * Identify a specific hypercall in the hypercall page - * @param name Hypercall name. - */ -#define DECLARE_HYPERCALL(name) \ - .globl HYPERCALL_ ## name; \ - .type HYPERCALL_ ## name, STT_FUNC; \ - .size HYPERCALL_ ## name, 32; \ - .set HYPERCALL_ ## name, hypercall_page + __HYPERVISOR_ ## name * 32 - -DECLARE_HYPERCALL(set_trap_table) -DECLARE_HYPERCALL(mmu_update) -DECLARE_HYPERCALL(set_gdt) -DECLARE_HYPERCALL(stack_switch) -DECLARE_HYPERCALL(set_callbacks) -DECLARE_HYPERCALL(fpu_taskswitch) -DECLARE_HYPERCALL(sched_op_compat) -DECLARE_HYPERCALL(platform_op) -DECLARE_HYPERCALL(set_debugreg) -DECLARE_HYPERCALL(get_debugreg) -DECLARE_HYPERCALL(update_descriptor) -DECLARE_HYPERCALL(memory_op) -DECLARE_HYPERCALL(multicall) -DECLARE_HYPERCALL(update_va_mapping) -DECLARE_HYPERCALL(set_timer_op) -DECLARE_HYPERCALL(event_channel_op_compat) -DECLARE_HYPERCALL(xen_version) -DECLARE_HYPERCALL(console_io) -DECLARE_HYPERCALL(physdev_op_compat) -DECLARE_HYPERCALL(grant_table_op) -DECLARE_HYPERCALL(vm_assist) -DECLARE_HYPERCALL(update_va_mapping_otherdomain) -DECLARE_HYPERCALL(iret) -DECLARE_HYPERCALL(vcpu_op) -DECLARE_HYPERCALL(set_segment_base) -DECLARE_HYPERCALL(mmuext_op) -DECLARE_HYPERCALL(xsm_op) -DECLARE_HYPERCALL(nmi_op) -DECLARE_HYPERCALL(sched_op) -DECLARE_HYPERCALL(callback_op) -DECLARE_HYPERCALL(xenoprof_op) -DECLARE_HYPERCALL(event_channel_op) -DECLARE_HYPERCALL(physdev_op) -DECLARE_HYPERCALL(hvm_op) -DECLARE_HYPERCALL(sysctl) -DECLARE_HYPERCALL(domctl) -DECLARE_HYPERCALL(kexec_op) -DECLARE_HYPERCALL(argo_op) -DECLARE_HYPERCALL(xenpmu_op) - -DECLARE_HYPERCALL(arch_0) -DECLARE_HYPERCALL(arch_1) -DECLARE_HYPERCALL(arch_2) -DECLARE_HYPERCALL(arch_3) -DECLARE_HYPERCALL(arch_4) -DECLARE_HYPERCALL(arch_5) -DECLARE_HYPERCALL(arch_6) -DECLARE_HYPERCALL(arch_7) - -/* - * Local variables: - * tab-width: 8 - * indent-tabs-mode: nil - * End: - */ diff -Nru xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/guest/xen/xen.c xen-4.17.5+72-g01140da4e8/xen/arch/x86/guest/xen/xen.c --- xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/guest/xen/xen.c 2024-11-12 13:09:34.000000000 +0000 +++ xen-4.17.5+72-g01140da4e8/xen/arch/x86/guest/xen/xen.c 2025-11-13 16:58:23.000000000 +0000 @@ -38,7 +38,6 @@ bool __read_mostly xen_guest; uint32_t __read_mostly xen_cpuid_base; -extern char hypercall_page[]; static struct rangeset *mem; DEFINE_PER_CPU(unsigned int, vcpu_id); @@ -47,6 +46,50 @@ static unsigned long vcpu_info_mapped[BITS_TO_LONGS(NR_CPUS)]; DEFINE_PER_CPU(struct vcpu_info *, vcpu_info); +/* + * Which instruction to use for early hypercalls: + * < 0 setup + * 0 vmcall + * > 0 vmmcall + */ +int8_t __initdata early_hypercall_insn = -1; + +/* + * Called once during the first hypercall to figure out which instruction to + * use. Error handling options are limited. + */ +void __init early_hypercall_setup(void) +{ + BUG_ON(early_hypercall_insn != -1); + + if ( !boot_cpu_data.x86_vendor ) + { + unsigned int eax, ebx, ecx, edx; + + cpuid(0, &eax, &ebx, &ecx, &edx); + + boot_cpu_data.x86_vendor = x86_cpuid_lookup_vendor(ebx, ecx, edx); + } + + switch ( boot_cpu_data.x86_vendor ) + { + case X86_VENDOR_INTEL: + case X86_VENDOR_CENTAUR: + case X86_VENDOR_SHANGHAI: + early_hypercall_insn = 0; + setup_force_cpu_cap(X86_FEATURE_USE_VMCALL); + break; + + case X86_VENDOR_AMD: + case X86_VENDOR_HYGON: + early_hypercall_insn = 1; + break; + + default: + BUG(); + } +} + static void __init find_xen_leaves(void) { uint32_t eax, ebx, ecx, edx, base; @@ -349,9 +392,6 @@ if ( !xen_cpuid_base ) return NULL; - /* Fill the hypercall page. */ - wrmsrl(cpuid_ebx(xen_cpuid_base + 2), __pa(hypercall_page)); - xen_guest = true; return &ops; diff -Nru xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/hpet.c xen-4.17.5+72-g01140da4e8/xen/arch/x86/hpet.c --- xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/hpet.c 2024-11-12 13:09:34.000000000 +0000 +++ xen-4.17.5+72-g01140da4e8/xen/arch/x86/hpet.c 2025-11-13 16:58:23.000000000 +0000 @@ -187,8 +187,6 @@ if ( __cpumask_test_and_clear_cpu(cpu, mask) ) raise_softirq(TIMER_SOFTIRQ); - cpuidle_wakeup_mwait(mask); - if ( !cpumask_empty(mask) ) cpumask_raise_softirq(mask, TIMER_SOFTIRQ); } diff -Nru xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/hvm/svm/entry.S xen-4.17.5+72-g01140da4e8/xen/arch/x86/hvm/svm/entry.S --- xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/hvm/svm/entry.S 2024-11-12 13:09:34.000000000 +0000 +++ xen-4.17.5+72-g01140da4e8/xen/arch/x86/hvm/svm/entry.S 2025-11-13 16:58:23.000000000 +0000 @@ -94,6 +94,9 @@ pop %rdi sti + + SPEC_CTRL_COND_VERW /* Req: %rsp=eframe Clob: efl */ + vmrun SAVE_ALL diff -Nru xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/hvm/viridian/synic.c xen-4.17.5+72-g01140da4e8/xen/arch/x86/hvm/viridian/synic.c --- xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/hvm/viridian/synic.c 2024-11-12 13:09:34.000000000 +0000 +++ xen-4.17.5+72-g01140da4e8/xen/arch/x86/hvm/viridian/synic.c 2025-11-13 16:58:23.000000000 +0000 @@ -339,6 +339,10 @@ .DeliveryTime = delivery, }; + /* Don't assume SIM page to be mapped. */ + if ( !msg ) + return false; + /* * To avoid using an atomic test-and-set, and barrier before calling * vlapic_set_irq(), this function must be called in context of the diff -Nru xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/hvm/viridian/time.c xen-4.17.5+72-g01140da4e8/xen/arch/x86/hvm/viridian/time.c --- xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/hvm/viridian/time.c 2024-11-12 13:09:34.000000000 +0000 +++ xen-4.17.5+72-g01140da4e8/xen/arch/x86/hvm/viridian/time.c 2025-11-13 16:58:23.000000000 +0000 @@ -27,6 +27,10 @@ HV_REFERENCE_TSC_PAGE *p = rt->ptr; uint32_t seq; + /* Reference TSC page might not be mapped even if the MSR is enabled. */ + if ( !p ) + return; + if ( initialize ) clear_page(p); @@ -105,8 +109,10 @@ trc->off = (int64_t)trc->val - trc_val(d, 0); + spin_lock(&vd->lock); if ( vd->reference_tsc.msr.enabled ) update_reference_tsc(d, false); + spin_unlock(&vd->lock); } static uint64_t time_ref_count(const struct domain *d) @@ -328,6 +334,7 @@ if ( !(viridian_feature_mask(d) & HVMPV_reference_tsc) ) return X86EMUL_EXCEPTION; + spin_lock(&vd->lock); viridian_unmap_guest_page(&vd->reference_tsc); vd->reference_tsc.msr.raw = val; viridian_dump_guest_page(v, "REFERENCE_TSC", &vd->reference_tsc); @@ -336,6 +343,7 @@ viridian_map_guest_page(d, &vd->reference_tsc); update_reference_tsc(d, true); } + spin_unlock(&vd->lock); break; case HV_X64_MSR_TIME_REF_COUNT: diff -Nru xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/hvm/viridian/viridian.c xen-4.17.5+72-g01140da4e8/xen/arch/x86/hvm/viridian/viridian.c --- xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/hvm/viridian/viridian.c 2024-11-12 13:09:34.000000000 +0000 +++ xen-4.17.5+72-g01140da4e8/xen/arch/x86/hvm/viridian/viridian.c 2025-11-13 16:58:23.000000000 +0000 @@ -494,6 +494,8 @@ if ( !d->arch.hvm.viridian ) return -ENOMEM; + spin_lock_init(&d->arch.hvm.viridian->lock); + rc = viridian_synic_domain_init(d); if ( rc ) goto fail; @@ -560,7 +562,8 @@ if ( mask & 1 ) { - ASSERT(vp < HVM_MAX_VCPUS); + if ( vp >= HVM_MAX_VCPUS ) + break; __set_bit(vp, vpmask->mask); } @@ -574,26 +577,6 @@ bitmap_fill(vpmask->mask, HVM_MAX_VCPUS); } -static unsigned int vpmask_first(const struct hypercall_vpmask *vpmask) -{ - return find_first_bit(vpmask->mask, HVM_MAX_VCPUS); -} - -static unsigned int vpmask_next(const struct hypercall_vpmask *vpmask, - unsigned int vp) -{ - /* - * If vp + 1 > HVM_MAX_VCPUS then find_next_bit() will return - * HVM_MAX_VCPUS, ensuring the for_each_vp ( ... ) loop terminates. - */ - return find_next_bit(vpmask->mask, HVM_MAX_VCPUS, vp + 1); -} - -#define for_each_vp(vpmask, vp) \ - for ( (vp) = vpmask_first(vpmask); \ - (vp) < HVM_MAX_VCPUS; \ - (vp) = vpmask_next(vpmask, vp) ) - static unsigned int vpmask_nr(const struct hypercall_vpmask *vpmask) { return bitmap_weight(vpmask->mask, HVM_MAX_VCPUS); @@ -810,7 +793,7 @@ if ( nr > 1 ) cpu_raise_softirq_batch_begin(); - for_each_vp ( vpmask, vp ) + for_each_set_bit ( vp, vpmask->mask, currd->max_vcpus ) { struct vlapic *vlapic = vcpu_vlapic(currd->vcpu[vp]); diff -Nru xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/include/asm/alternative.h xen-4.17.5+72-g01140da4e8/xen/arch/x86/include/asm/alternative.h --- xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/include/asm/alternative.h 2024-11-12 13:09:34.000000000 +0000 +++ xen-4.17.5+72-g01140da4e8/xen/arch/x86/include/asm/alternative.h 2025-11-13 16:58:23.000000000 +0000 @@ -1,6 +1,13 @@ #ifndef __X86_ALTERNATIVE_H__ #define __X86_ALTERNATIVE_H__ +/* + * Common to both C and ASM. Express a replacement when a feature is not + * available. + */ +#define ALT_FLAG_NOT (1 << 15) +#define ALT_NOT(x) (ALT_FLAG_NOT | (x)) + #ifdef __ASSEMBLY__ #include #else @@ -11,7 +18,7 @@ struct __packed alt_instr { int32_t orig_offset; /* original instruction */ int32_t repl_offset; /* offset to replacement instruction */ - uint16_t cpuid; /* cpuid bit set for replacement */ + uint16_t cpuid; /* cpuid bit set for replacement (top bit is polarity) */ uint8_t orig_len; /* length of original instruction */ uint8_t repl_len; /* length of new instruction */ uint8_t pad_len; /* length of build-time padding */ @@ -23,6 +30,8 @@ #define ALT_REPL_PTR(a) __ALT_PTR(a, repl_offset) extern void add_nops(void *insns, unsigned int len); +void *place_ret(void *ptr); + /* Similar to alternative_instructions except it can be run with IRQs enabled. */ extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end); extern void alternative_instructions(void); diff -Nru xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/include/asm/asm-defns.h xen-4.17.5+72-g01140da4e8/xen/arch/x86/include/asm/asm-defns.h --- xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/include/asm/asm-defns.h 2024-11-12 13:09:34.000000000 +0000 +++ xen-4.17.5+72-g01140da4e8/xen/arch/x86/include/asm/asm-defns.h 2025-11-13 16:58:23.000000000 +0000 @@ -47,6 +47,12 @@ .endif .endm +#ifdef CONFIG_RETURN_THUNK +# define RET jmp __x86_return_thunk +#else +# define RET ret +#endif + #ifdef CONFIG_XEN_IBT # define ENDBR64 endbr64 #else diff -Nru xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/include/asm/cpufeature.h xen-4.17.5+72-g01140da4e8/xen/arch/x86/include/asm/cpufeature.h --- xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/include/asm/cpufeature.h 2024-11-12 13:09:34.000000000 +0000 +++ xen-4.17.5+72-g01140da4e8/xen/arch/x86/include/asm/cpufeature.h 2025-11-13 16:58:23.000000000 +0000 @@ -148,6 +148,11 @@ #define cpu_has_avx_vnni boot_cpu_has(X86_FEATURE_AVX_VNNI) #define cpu_has_avx512_bf16 boot_cpu_has(X86_FEATURE_AVX512_BF16) +/* CPUID level 0x80000021.eax */ +#define cpu_has_lfence_dispatch boot_cpu_has(X86_FEATURE_LFENCE_DISPATCH) +#define cpu_has_verw_clear boot_cpu_has(X86_FEATURE_VERW_CLEAR) +#define cpu_has_nscb boot_cpu_has(X86_FEATURE_NSCB) + /* MSR_ARCH_CAPS */ #define cpu_has_rdcl_no boot_cpu_has(X86_FEATURE_RDCL_NO) #define cpu_has_eibrs boot_cpu_has(X86_FEATURE_EIBRS) @@ -164,13 +169,16 @@ #define cpu_has_gds_no boot_cpu_has(X86_FEATURE_GDS_NO) #define cpu_has_rfds_no boot_cpu_has(X86_FEATURE_RFDS_NO) #define cpu_has_rfds_clear boot_cpu_has(X86_FEATURE_RFDS_CLEAR) +#define cpu_has_its_no boot_cpu_has(X86_FEATURE_ITS_NO) + +/* CPUID level 0x80000021.ecx */ +#define cpu_has_tsa_sq_no boot_cpu_has(X86_FEATURE_TSA_SQ_NO) +#define cpu_has_tsa_l1_no boot_cpu_has(X86_FEATURE_TSA_L1_NO) /* Synthesized. */ #define cpu_has_arch_perfmon boot_cpu_has(X86_FEATURE_ARCH_PERFMON) #define cpu_has_cpuid_faulting boot_cpu_has(X86_FEATURE_CPUID_FAULTING) #define cpu_has_aperfmperf boot_cpu_has(X86_FEATURE_APERFMPERF) -#define cpu_has_lfence_dispatch boot_cpu_has(X86_FEATURE_LFENCE_DISPATCH) -#define cpu_has_nscb boot_cpu_has(X86_FEATURE_NSCB) #define cpu_has_xen_lbr boot_cpu_has(X86_FEATURE_XEN_LBR) #define cpu_has_xen_shstk boot_cpu_has(X86_FEATURE_XEN_SHSTK) #define cpu_has_xen_ibt boot_cpu_has(X86_FEATURE_XEN_IBT) diff -Nru xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/include/asm/cpufeatures.h xen-4.17.5+72-g01140da4e8/xen/arch/x86/include/asm/cpufeatures.h --- xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/include/asm/cpufeatures.h 2024-11-12 13:09:34.000000000 +0000 +++ xen-4.17.5+72-g01140da4e8/xen/arch/x86/include/asm/cpufeatures.h 2025-11-13 16:58:23.000000000 +0000 @@ -19,7 +19,7 @@ XEN_CPUFEATURE(TSC_RELIABLE, X86_SYNTH( 4)) /* TSC is known to be reliable */ XEN_CPUFEATURE(XTOPOLOGY, X86_SYNTH( 5)) /* cpu topology enum extensions */ XEN_CPUFEATURE(CPUID_FAULTING, X86_SYNTH( 6)) /* cpuid faulting */ -XEN_CPUFEATURE(CLFLUSH_MONITOR, X86_SYNTH( 7)) /* clflush reqd with monitor */ +/* Bit 7 unused */ XEN_CPUFEATURE(APERFMPERF, X86_SYNTH( 8)) /* APERFMPERF */ XEN_CPUFEATURE(MFENCE_RDTSC, X86_SYNTH( 9)) /* MFENCE synchronizes RDTSC */ XEN_CPUFEATURE(XEN_SMEP, X86_SYNTH(10)) /* SMEP gets used by Xen itself */ @@ -42,6 +42,7 @@ XEN_CPUFEATURE(XEN_IBT, X86_SYNTH(27)) /* Xen uses CET Indirect Branch Tracking */ XEN_CPUFEATURE(IBPB_ENTRY_PV, X86_SYNTH(28)) /* MSR_PRED_CMD used by Xen for PV */ XEN_CPUFEATURE(IBPB_ENTRY_HVM, X86_SYNTH(29)) /* MSR_PRED_CMD used by Xen for HVM */ +XEN_CPUFEATURE(USE_VMCALL, X86_SYNTH(30)) /* Use VMCALL instead of VMMCALL */ /* Bug words follow the synthetic words. */ #define X86_NR_BUG 1 @@ -51,6 +52,8 @@ #define X86_BUG_NULL_SEG X86_BUG( 1) /* NULL-ing a selector preserves the base and limit. */ #define X86_BUG_CLFLUSH_MFENCE X86_BUG( 2) /* MFENCE needed to serialise CLFLUSH */ #define X86_BUG_IBPB_NO_RET X86_BUG( 3) /* IBPB doesn't flush the RSB/RAS */ +#define X86_BUG_CLFLUSH_MONITOR X86_BUG( 4) /* MONITOR requires CLFLUSH */ +#define X86_BUG_MONITOR X86_BUG( 5) /* MONITOR doesn't always notice writes (force IPIs) */ #define X86_SPEC_NO_LFENCE_ENTRY_PV X86_BUG(16) /* (No) safety LFENCE for SPEC_CTRL_ENTRY_PV. */ #define X86_SPEC_NO_LFENCE_ENTRY_INTR X86_BUG(17) /* (No) safety LFENCE for SPEC_CTRL_ENTRY_INTR. */ diff -Nru xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/include/asm/guest/xen-hcall.h xen-4.17.5+72-g01140da4e8/xen/arch/x86/include/asm/guest/xen-hcall.h --- xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/include/asm/guest/xen-hcall.h 2024-11-12 13:09:34.000000000 +0000 +++ xen-4.17.5+72-g01140da4e8/xen/arch/x86/include/asm/guest/xen-hcall.h 2025-11-13 16:58:23.000000000 +0000 @@ -41,9 +41,11 @@ ({ \ long res, tmp__; \ asm volatile ( \ - "call hypercall_page + %c[offset]" \ + ALTERNATIVE_2("call early_hypercall", \ + "vmmcall", ALT_NOT(X86_FEATURE_USE_VMCALL), \ + "vmcall", X86_FEATURE_USE_VMCALL) \ : "=a" (res), "=D" (tmp__) ASM_CALL_CONSTRAINT \ - : [offset] "i" (hcall * 32), \ + : "0" (hcall), \ "1" ((long)(a1)) \ : "memory" ); \ (type)res; \ @@ -53,10 +55,12 @@ ({ \ long res, tmp__; \ asm volatile ( \ - "call hypercall_page + %c[offset]" \ + ALTERNATIVE_2("call early_hypercall", \ + "vmmcall", ALT_NOT(X86_FEATURE_USE_VMCALL), \ + "vmcall", X86_FEATURE_USE_VMCALL) \ : "=a" (res), "=D" (tmp__), "=S" (tmp__) \ ASM_CALL_CONSTRAINT \ - : [offset] "i" (hcall * 32), \ + : "0" (hcall), \ "1" ((long)(a1)), "2" ((long)(a2)) \ : "memory" ); \ (type)res; \ @@ -66,10 +70,12 @@ ({ \ long res, tmp__; \ asm volatile ( \ - "call hypercall_page + %c[offset]" \ + ALTERNATIVE_2("call early_hypercall", \ + "vmmcall", ALT_NOT(X86_FEATURE_USE_VMCALL), \ + "vmcall", X86_FEATURE_USE_VMCALL) \ : "=a" (res), "=D" (tmp__), "=S" (tmp__), "=d" (tmp__) \ ASM_CALL_CONSTRAINT \ - : [offset] "i" (hcall * 32), \ + : "0" (hcall), \ "1" ((long)(a1)), "2" ((long)(a2)), "3" ((long)(a3)) \ : "memory" ); \ (type)res; \ @@ -80,10 +86,12 @@ long res, tmp__; \ register long _a4 asm ("r10") = ((long)(a4)); \ asm volatile ( \ - "call hypercall_page + %c[offset]" \ + ALTERNATIVE_2("call early_hypercall", \ + "vmmcall", ALT_NOT(X86_FEATURE_USE_VMCALL), \ + "vmcall", X86_FEATURE_USE_VMCALL) \ : "=a" (res), "=D" (tmp__), "=S" (tmp__), "=d" (tmp__), \ "=&r" (tmp__) ASM_CALL_CONSTRAINT \ - : [offset] "i" (hcall * 32), \ + : "0" (hcall), \ "1" ((long)(a1)), "2" ((long)(a2)), "3" ((long)(a3)), \ "4" (_a4) \ : "memory" ); \ diff -Nru xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/include/asm/hardirq.h xen-4.17.5+72-g01140da4e8/xen/arch/x86/include/asm/hardirq.h --- xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/include/asm/hardirq.h 2024-11-12 13:09:34.000000000 +0000 +++ xen-4.17.5+72-g01140da4e8/xen/arch/x86/include/asm/hardirq.h 2025-11-13 16:58:23.000000000 +0000 @@ -5,11 +5,22 @@ #include typedef struct { - unsigned int __softirq_pending; - unsigned int __local_irq_count; - unsigned int nmi_count; - unsigned int mce_count; - bool_t __mwait_wakeup; + /* + * The layout is important. Any CPU can set bits in __softirq_pending, + * but in_mwait is a status bit owned by the CPU. softirq_mwait_raw must + * cover both, and must be in a single cacheline. + */ + union { + struct { + unsigned int __softirq_pending; + bool in_mwait; + }; + uint64_t softirq_mwait_raw; + }; + + unsigned int __local_irq_count; + unsigned int nmi_count; + unsigned int mce_count; } __cacheline_aligned irq_cpustat_t; #include /* Standard mappings for irq_cpustat_t above */ diff -Nru xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/include/asm/hvm/viridian.h xen-4.17.5+72-g01140da4e8/xen/arch/x86/include/asm/hvm/viridian.h --- xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/include/asm/hvm/viridian.h 2024-11-12 13:09:34.000000000 +0000 +++ xen-4.17.5+72-g01140da4e8/xen/arch/x86/include/asm/hvm/viridian.h 2025-11-13 16:58:23.000000000 +0000 @@ -71,6 +71,7 @@ DECLARE_BITMAP(hypercall_flags, _HCALL_nr); struct viridian_time_ref_count time_ref_count; struct viridian_page reference_tsc; + spinlock_t lock; }; void cpuid_viridian_leaves(const struct vcpu *v, uint32_t leaf, diff -Nru xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/include/asm/processor.h xen-4.17.5+72-g01140da4e8/xen/arch/x86/include/asm/processor.h --- xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/include/asm/processor.h 2024-11-12 13:09:34.000000000 +0000 +++ xen-4.17.5+72-g01140da4e8/xen/arch/x86/include/asm/processor.h 2025-11-13 16:58:23.000000000 +0000 @@ -405,23 +405,6 @@ return (pkru >> (pkey * PKRU_ATTRS + PKRU_WRITE)) & 1; } -static always_inline void __monitor(const void *eax, unsigned long ecx, - unsigned long edx) -{ - /* "monitor %eax,%ecx,%edx;" */ - asm volatile ( - ".byte 0x0f,0x01,0xc8;" - : : "a" (eax), "c" (ecx), "d"(edx) ); -} - -static always_inline void __mwait(unsigned long eax, unsigned long ecx) -{ - /* "mwait %eax,%ecx;" */ - asm volatile ( - ".byte 0x0f,0x01,0xc9;" - : : "a" (eax), "c" (ecx) ); -} - #define IOBMP_BYTES 8192 #define IOBMP_INVALID_OFFSET 0x8000 diff -Nru xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/include/asm/softirq.h xen-4.17.5+72-g01140da4e8/xen/arch/x86/include/asm/softirq.h --- xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/include/asm/softirq.h 2024-11-12 13:09:34.000000000 +0000 +++ xen-4.17.5+72-g01140da4e8/xen/arch/x86/include/asm/softirq.h 2025-11-13 16:58:23.000000000 +0000 @@ -1,6 +1,8 @@ #ifndef __ASM_SOFTIRQ_H__ #define __ASM_SOFTIRQ_H__ +#include + #define NMI_SOFTIRQ (NR_COMMON_SOFTIRQS + 0) #define TIME_CALIBRATE_SOFTIRQ (NR_COMMON_SOFTIRQS + 1) #define VCPU_KICK_SOFTIRQ (NR_COMMON_SOFTIRQS + 2) @@ -9,6 +11,50 @@ #define HVM_DPCI_SOFTIRQ (NR_COMMON_SOFTIRQS + 4) #define NR_ARCH_SOFTIRQS 5 -bool arch_skip_send_event_check(unsigned int cpu); +/* + * Ensure softirq @nr is pending on @cpu. Return true if an IPI can be + * skipped, false if the IPI cannot be skipped. + * + * We use a CMPXCHG covering both __softirq_pending and in_mwait, in order to + * set softirq @nr while also observing in_mwait in a race-free way. + */ +static always_inline bool arch_set_softirq(unsigned int nr, unsigned int cpu) +{ + uint64_t *ptr = &irq_stat[cpu].softirq_mwait_raw; + uint64_t prev, old, new; + unsigned int softirq = 1U << nr; + + old = ACCESS_ONCE(*ptr); + + for ( ;; ) + { + if ( old & softirq ) + /* Softirq already pending, nothing to do. */ + return true; + + new = old | softirq; + + prev = cmpxchg(ptr, old, new); + if ( prev == old ) + break; + + old = prev; + } + + /* + * We have caused the softirq to become pending. If in_mwait was set, the + * target CPU will notice the modification and act on it. + * + * We can't access the in_mwait field nicely, so use some BUILD_BUG_ON()'s + * to cross-check the (1UL << 32) opencoding. + */ + BUILD_BUG_ON(sizeof(irq_stat[0].softirq_mwait_raw) != 8); + BUILD_BUG_ON((offsetof(irq_cpustat_t, in_mwait) - + offsetof(irq_cpustat_t, softirq_mwait_raw)) != 4); + + return new & (1UL << 32) /* in_mwait */; + +} +#define arch_set_softirq arch_set_softirq #endif /* __ASM_SOFTIRQ_H__ */ diff -Nru xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/include/asm/spec_ctrl.h xen-4.17.5+72-g01140da4e8/xen/arch/x86/include/asm/spec_ctrl.h --- xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/include/asm/spec_ctrl.h 2024-11-12 13:09:34.000000000 +0000 +++ xen-4.17.5+72-g01140da4e8/xen/arch/x86/include/asm/spec_ctrl.h 2025-11-13 16:58:23.000000000 +0000 @@ -126,8 +126,22 @@ info->verw_sel = __HYPERVISOR_DS32; } +static always_inline void __spec_ctrl_enter_idle_verw(struct cpu_info *info) +{ + /* + * Flush/scrub structures which are statically partitioned between active + * threads. Otherwise data of ours (of unknown sensitivity) will become + * available to our sibling when we go idle. + * + * Note: VERW must be encoded with a memory operand, as it is only that + * form with side effects. + */ + alternative_input("", "verw %[sel]", X86_FEATURE_SC_VERW_IDLE, + [sel] "m" (info->verw_sel)); +} + /* WARNING! `ret`, `call *`, `jmp *` not safe after this call. */ -static always_inline void spec_ctrl_enter_idle(struct cpu_info *info) +static always_inline void __spec_ctrl_enter_idle(struct cpu_info *info, bool verw) { uint32_t val = 0; @@ -146,21 +160,8 @@ "a" (val), "c" (MSR_SPEC_CTRL), "d" (0)); barrier(); - /* - * Microarchitectural Store Buffer Data Sampling: - * - * On vulnerable systems, store buffer entries are statically partitioned - * between active threads. When entering idle, our store buffer entries - * are re-partitioned to allow the other threads to use them. - * - * Flush the buffers to ensure that no sensitive data of ours can be - * leaked by a sibling after it gets our store buffer entries. - * - * Note: VERW must be encoded with a memory operand, as it is only that - * form which causes a flush. - */ - alternative_input("", "verw %[sel]", X86_FEATURE_SC_VERW_IDLE, - [sel] "m" (info->verw_sel)); + if ( verw ) /* Expected to be const-propagated. */ + __spec_ctrl_enter_idle_verw(info); /* * Cross-Thread Return Address Predictions: @@ -178,6 +179,12 @@ : "rax", "rcx"); } +/* WARNING! `ret`, `call *`, `jmp *` not safe after this call. */ +static always_inline void spec_ctrl_enter_idle(struct cpu_info *info) +{ + __spec_ctrl_enter_idle(info, true /* VERW */); +} + /* WARNING! `ret`, `call *`, `jmp *` not safe before this call. */ static always_inline void spec_ctrl_exit_idle(struct cpu_info *info) { diff -Nru xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/indirect-thunk.S xen-4.17.5+72-g01140da4e8/xen/arch/x86/indirect-thunk.S --- xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/indirect-thunk.S 2024-11-12 13:09:34.000000000 +0000 +++ xen-4.17.5+72-g01140da4e8/xen/arch/x86/indirect-thunk.S 2025-11-13 16:58:23.000000000 +0000 @@ -11,6 +11,9 @@ #include + +#ifdef CONFIG_INDIRECT_THUNK + .macro IND_THUNK_RETPOLINE reg:req call 1f int3 @@ -35,6 +38,16 @@ .macro GEN_INDIRECT_THUNK reg:req .section .text.__x86_indirect_thunk_\reg, "ax", @progbits + /* + * The Indirect Target Selection speculative vulnerability means that + * indirect branches (including RETs) are unsafe when in the first + * half of a cacheline. Arrange for them to be in the second half. + * + * Align to 64, then skip 32. + */ + .balign 64 + .fill 32, 1, 0xcc + ENTRY(__x86_indirect_thunk_\reg) ALTERNATIVE_2 __stringify(IND_THUNK_RETPOLINE \reg), \ __stringify(IND_THUNK_LFENCE \reg), X86_FEATURE_IND_THUNK_LFENCE, \ @@ -50,3 +63,27 @@ .irp reg, ax, cx, dx, bx, bp, si, di, 8, 9, 10, 11, 12, 13, 14, 15 GEN_INDIRECT_THUNK reg=r\reg .endr + +#endif /* CONFIG_INDIRECT_THUNK */ + +#ifdef CONFIG_RETURN_THUNK + .section .text.entry.__x86_return_thunk, "ax", @progbits + + /* + * The Indirect Target Selection speculative vulnerability means that + * indirect branches (including RETs) are unsafe when in the first + * half of a cacheline. Arrange for them to be in the second half. + * + * Align to 64, then skip 32. + */ + .balign 64 + .fill 32, 1, 0xcc + +ENTRY(__x86_return_thunk) + ret + int3 /* Halt straight-line speculation */ + + .size __x86_return_thunk, . - __x86_return_thunk + .type __x86_return_thunk, @function + +#endif /* CONFIG_RETURN_THUNK */ diff -Nru xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/pv/emul-priv-op.c xen-4.17.5+72-g01140da4e8/xen/arch/x86/pv/emul-priv-op.c --- xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/pv/emul-priv-op.c 2024-11-12 13:09:34.000000000 +0000 +++ xen-4.17.5+72-g01140da4e8/xen/arch/x86/pv/emul-priv-op.c 2025-11-13 16:58:23.000000000 +0000 @@ -88,7 +88,6 @@ 0x41, 0x5c, /* pop %r12 */ 0x5d, /* pop %rbp */ 0x5b, /* pop %rbx */ - 0xc3, /* ret */ }; const struct stubs *this_stubs = &this_cpu(stubs); @@ -138,11 +137,13 @@ APPEND_CALL(save_guest_gprs); APPEND_BUFF(epilogue); + p = place_ret(p); /* Build-time best effort attempt to catch problems. */ BUILD_BUG_ON(STUB_BUF_SIZE / 2 < (sizeof(prologue) + sizeof(epilogue) + 10 /* 2x call */ + - MAX(3 /* default stub */, IOEMUL_QUIRK_STUB_BYTES))); + MAX(3 /* default stub */, IOEMUL_QUIRK_STUB_BYTES) + + (IS_ENABLED(CONFIG_RETURN_THUNK) ? 5 : 1) /* ret */)); /* Runtime confirmation that we haven't clobbered an adjacent stub. */ BUG_ON(STUB_BUF_SIZE / 2 < (p - ctxt->io_emul_stub)); diff -Nru xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/pv/gpr_switch.S xen-4.17.5+72-g01140da4e8/xen/arch/x86/pv/gpr_switch.S --- xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/pv/gpr_switch.S 2024-11-12 13:09:34.000000000 +0000 +++ xen-4.17.5+72-g01140da4e8/xen/arch/x86/pv/gpr_switch.S 2025-11-13 16:58:23.000000000 +0000 @@ -26,7 +26,7 @@ movq UREGS_r15(%rdi), %r15 movq UREGS_rcx(%rdi), %rcx movq UREGS_rdi(%rdi), %rdi - ret + RET .size load_guest_gprs, . - load_guest_gprs .type load_guest_gprs, STT_FUNC @@ -51,7 +51,7 @@ movq %rbx, UREGS_rbx(%rdi) movq %rdx, UREGS_rdx(%rdi) movq %rcx, UREGS_rcx(%rdi) - ret + RET .size save_guest_gprs, . - save_guest_gprs .type save_guest_gprs, STT_FUNC diff -Nru xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/spec_ctrl.c xen-4.17.5+72-g01140da4e8/xen/arch/x86/spec_ctrl.c --- xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/spec_ctrl.c 2024-11-12 13:09:34.000000000 +0000 +++ xen-4.17.5+72-g01140da4e8/xen/arch/x86/spec_ctrl.c 2025-11-13 16:58:23.000000000 +0000 @@ -496,7 +496,7 @@ static void __init print_details(enum ind_thunk thunk) { - unsigned int _7d0 = 0, _7d2 = 0, e8b = 0, e21a = 0, max = 0, tmp; + unsigned int _7d0 = 0, _7d2 = 0, e8b = 0, e21a = 0, e21c = 0, max = 0, tmp; uint64_t caps = 0; /* Collect diagnostics about available mitigations. */ @@ -507,7 +507,7 @@ if ( boot_cpu_data.extended_cpuid_level >= 0x80000008 ) cpuid(0x80000008, &tmp, &e8b, &tmp, &tmp); if ( boot_cpu_data.extended_cpuid_level >= 0x80000021 ) - cpuid(0x80000021, &e21a, &tmp, &tmp, &tmp); + cpuid(0x80000021U, &e21a, &tmp, &e21c, &tmp); if ( cpu_has_arch_caps ) rdmsrl(MSR_ARCH_CAPABILITIES, caps); @@ -517,7 +517,7 @@ * Hardware read-only information, stating immunity to certain issues, or * suggestions of which mitigation to use. */ - printk(" Hardware hints:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", + printk(" Hardware hints:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", (caps & ARCH_CAPS_RDCL_NO) ? " RDCL_NO" : "", (caps & ARCH_CAPS_EIBRS) ? " EIBRS" : "", (caps & ARCH_CAPS_RSBA) ? " RSBA" : "", @@ -541,10 +541,12 @@ (e8b & cpufeat_mask(X86_FEATURE_BTC_NO)) ? " BTC_NO" : "", (e8b & cpufeat_mask(X86_FEATURE_IBPB_RET)) ? " IBPB_RET" : "", (e21a & cpufeat_mask(X86_FEATURE_IBPB_BRTYPE)) ? " IBPB_BRTYPE" : "", - (e21a & cpufeat_mask(X86_FEATURE_SRSO_NO)) ? " SRSO_NO" : ""); + (e21a & cpufeat_mask(X86_FEATURE_SRSO_NO)) ? " SRSO_NO" : "", + (e21c & cpufeat_mask(X86_FEATURE_TSA_SQ_NO)) ? " TSA_SQ_NO" : "", + (e21c & cpufeat_mask(X86_FEATURE_TSA_L1_NO)) ? " TSA_L1_NO" : ""); /* Hardware features which need driving to mitigate issues. */ - printk(" Hardware features:%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", + printk(" Hardware features:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", (e8b & cpufeat_mask(X86_FEATURE_IBPB)) || (_7d0 & cpufeat_mask(X86_FEATURE_IBRSB)) ? " IBPB" : "", (e8b & cpufeat_mask(X86_FEATURE_IBRS)) || @@ -563,7 +565,8 @@ (caps & ARCH_CAPS_FB_CLEAR_CTRL) ? " FB_CLEAR_CTRL" : "", (caps & ARCH_CAPS_GDS_CTRL) ? " GDS_CTRL" : "", (caps & ARCH_CAPS_RFDS_CLEAR) ? " RFDS_CLEAR" : "", - (e21a & cpufeat_mask(X86_FEATURE_SBPB)) ? " SBPB" : ""); + (e21a & cpufeat_mask(X86_FEATURE_SBPB)) ? " SBPB" : "", + (e21a & cpufeat_mask(X86_FEATURE_VERW_CLEAR)) ? " VERW_CLEAR" : ""); /* Compiled-in support which pertains to mitigations. */ if ( IS_ENABLED(CONFIG_INDIRECT_THUNK) || IS_ENABLED(CONFIG_SHADOW_PAGING) || @@ -575,6 +578,9 @@ #ifdef CONFIG_INDIRECT_THUNK " INDIRECT_THUNK" #endif +#ifdef CONFIG_RETURN_THUNK + " RETURN_THUNK" +#endif #ifdef CONFIG_SHADOW_PAGING " SHADOW_PAGING" #endif @@ -1523,6 +1529,77 @@ setup_force_cpu_cap(X86_FEATURE_RFDS_NO); } +/* + * Transient Scheduler Attacks + * + * https://www.amd.com/content/dam/amd/en/documents/resources/bulletin/technical-guidance-for-mitigating-transient-scheduler-attacks.pdf + */ +static void __init tsa_calculations(void) +{ + unsigned int curr_rev, min_rev; + + /* TSA is only known to affect AMD processors at this time. */ + if ( boot_cpu_data.x86_vendor != X86_VENDOR_AMD ) + return; + + /* If we're virtualised, don't attempt to synthesise anything. */ + if ( cpu_has_hypervisor ) + return; + + /* + * According to the whitepaper, some Fam1A CPUs (Models 0x00...0x4f, + * 0x60...0x7f) are not vulnerable but don't enumerate TSA_{SQ,L1}_NO. If + * we see either enumerated, assume both are correct ... + */ + if ( cpu_has_tsa_sq_no || cpu_has_tsa_l1_no ) + return; + + /* + * ... otherwise, synthesise them. CPUs other than Fam19 (Zen3/4) are + * stated to be not vulnerable. + */ + if ( boot_cpu_data.x86 != 0x19 ) + { + setup_force_cpu_cap(X86_FEATURE_TSA_SQ_NO); + setup_force_cpu_cap(X86_FEATURE_TSA_L1_NO); + return; + } + + /* + * Fam19 CPUs get VERW_CLEAR with new enough microcode, but must + * synthesise the CPUID bit. + */ + curr_rev = this_cpu(cpu_sig).rev; + switch ( curr_rev >> 8 ) + { + case 0x0a0011: min_rev = 0x0a0011d7; break; + case 0x0a0012: min_rev = 0x0a00123b; break; + case 0x0a0082: min_rev = 0x0a00820d; break; + case 0x0a1011: min_rev = 0x0a10114c; break; + case 0x0a1012: min_rev = 0x0a10124c; break; + case 0x0a1081: min_rev = 0x0a108109; break; + case 0x0a2010: min_rev = 0x0a20102e; break; + case 0x0a2012: min_rev = 0x0a201211; break; + case 0x0a4041: min_rev = 0x0a404108; break; + case 0x0a5000: min_rev = 0x0a500012; break; + case 0x0a6012: min_rev = 0x0a60120a; break; + case 0x0a7041: min_rev = 0x0a704108; break; + case 0x0a7052: min_rev = 0x0a705208; break; + case 0x0a7080: min_rev = 0x0a708008; break; + case 0x0a70c0: min_rev = 0x0a70c008; break; + case 0x0aa002: min_rev = 0x0aa00216; break; + default: + printk(XENLOG_WARNING + "Unrecognised CPU %02x-%02x-%02x, ucode 0x%08x for TSA mitigation\n", + boot_cpu_data.x86, boot_cpu_data.x86_model, + boot_cpu_data.x86_mask, curr_rev); + return; + } + + if ( curr_rev >= min_rev ) + setup_force_cpu_cap(X86_FEATURE_VERW_CLEAR); +} + static bool __init cpu_has_gds(void) { /* @@ -1763,6 +1840,90 @@ } } +/* + * https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/advisory-guidance/indirect-target-selection.html + */ +static void __init its_calculations(void) +{ + /* + * Indirect Target Selection is a Branch Prediction bug whereby certain + * indirect branches (including RETs) get predicted using a direct branch + * target, rather than a suitable indirect target, bypassing hardware + * isolation protections. + * + * ITS affects Core (but not Atom) processors starting from the + * introduction of eIBRS, up to but not including Golden Cove cores + * (checked here with BHI_CTRL). + * + * The ITS_NO feature is not expected to be enumerated by hardware, and is + * only for VMMs to synthesise for guests. + * + * ITS comes in 3 flavours: + * + * 1) Across-IBPB. Indirect branches after the IBPB can be controlled + * by direct targets which existed prior to the IBPB. This is + * addressed in the IPU 2025.1 microcode drop, and has no other + * software interaction. + * + * 2) Guest/Host. Indirect branches in the VMM can be controlled by + * direct targets from the guest. This applies equally to PV guests + * (Ring3) and HVM guests (VMX), and applies to all Skylake-uarch + * cores with eIBRS. + * + * 3) Intra-mode. Indirect branches in the VMM can be controlled by + * other execution in the same mode. + */ + + /* + * If we can see ITS_NO, or we're virtualised, do nothing. We are or may + * migrate somewhere unsafe. + */ + if ( cpu_has_its_no || cpu_has_hypervisor ) + return; + + /* ITS is only known to affect Intel processors at this time. */ + if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ) + return; + + /* + * ITS does not exist on: + * - non-Family 6 CPUs + * - those without eIBRS + * - those with BHI_CTRL + * but we still need to synthesise ITS_NO. + */ + if ( boot_cpu_data.x86 != 6 || !cpu_has_eibrs || + boot_cpu_has(X86_FEATURE_BHI_CTRL) ) + goto synthesise; + + switch ( boot_cpu_data.x86_model ) + { + /* These Skylake-uarch cores suffer cases #2 and #3. */ + case INTEL_FAM6_SKYLAKE_X: + case INTEL_FAM6_KABYLAKE_L: + case INTEL_FAM6_KABYLAKE: + case INTEL_FAM6_COMETLAKE: + case INTEL_FAM6_COMETLAKE_L: + return; + + /* These Sunny/Willow/Cypress Cove cores suffer case #3. */ + case INTEL_FAM6_ICELAKE_X: + case INTEL_FAM6_ICELAKE_D: + case INTEL_FAM6_ICELAKE_L: + case INTEL_FAM6_TIGERLAKE_L: + case INTEL_FAM6_TIGERLAKE: + case INTEL_FAM6_ROCKETLAKE: + return; + + default: + break; + } + + /* Platforms remaining are not believed to be vulnerable to ITS. */ + synthesise: + setup_force_cpu_cap(X86_FEATURE_ITS_NO); +} + void spec_ctrl_init_domain(struct domain *d) { bool pv = is_pv_domain(d); @@ -2132,6 +2293,7 @@ * https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/technical-documentation/intel-analysis-microarchitectural-data-sampling.html * https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/technical-documentation/processor-mmio-stale-data-vulnerabilities.html * https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/advisory-guidance/register-file-data-sampling.html + * https://www.amd.com/content/dam/amd/en/documents/resources/bulletin/technical-guidance-for-mitigating-transient-scheduler-attacks.pdf * * Relevant ucodes: * @@ -2164,9 +2326,18 @@ * * - March 2023, for RFDS. Enumerate RFDS_CLEAR to mean that VERW now * scrubs non-architectural entries from certain register files. + * + * - July 2025, for TSA. Introduces VERW side effects to mitigate + * TSA_{SQ/L1}. Xen must synthesise the VERW_CLEAR feature based on + * microcode version. + * + * Note, these microcode updates were produced before the remediation of + * the microcode signature issues, and are included in the firwmare + * updates fixing the entrysign vulnerability from ~December 2024. */ mds_calculations(); rfds_calculations(); + tsa_calculations(); /* * Parts which enumerate FB_CLEAR are those with now-updated microcode @@ -2198,21 +2369,27 @@ * MLPDS/MFBDS when SMT is enabled. */ if ( opt_verw_pv == -1 ) - opt_verw_pv = cpu_has_useful_md_clear || cpu_has_rfds_clear; + opt_verw_pv = (cpu_has_useful_md_clear || cpu_has_rfds_clear || + cpu_has_verw_clear); if ( opt_verw_hvm == -1 ) - opt_verw_hvm = cpu_has_useful_md_clear || cpu_has_rfds_clear; + opt_verw_hvm = (cpu_has_useful_md_clear || cpu_has_rfds_clear || + cpu_has_verw_clear); /* - * If SMT is active, and we're protecting against MDS or MMIO stale data, + * If SMT is active, and we're protecting against any of: + * - MSBDS + * - MMIO stale data + * - TSA-SQ * we need to scrub before going idle as well as on return to guest. * Various pipeline resources are repartitioned amongst non-idle threads. * - * We don't need to scrub on idle for RFDS. There are no affected cores - * which support SMT, despite there being affected cores in hybrid systems - * which have SMT elsewhere in the platform. + * We don't need to scrub on idle for: + * - RFDS (no SMT affected cores) + * - TSA-L1 (utags never shared between threads) */ if ( ((cpu_has_useful_md_clear && (opt_verw_pv || opt_verw_hvm)) || + (cpu_has_verw_clear && !cpu_has_tsa_sq_no) || opt_verw_mmio) && hw_smt_enabled ) setup_force_cpu_cap(X86_FEATURE_SC_VERW_IDLE); @@ -2313,6 +2490,8 @@ bhi_calculations(); + its_calculations(); + print_details(thunk); /* diff -Nru xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/x86_64/compat/entry.S xen-4.17.5+72-g01140da4e8/xen/arch/x86/x86_64/compat/entry.S --- xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/x86_64/compat/entry.S 2024-11-12 13:09:34.000000000 +0000 +++ xen-4.17.5+72-g01140da4e8/xen/arch/x86/x86_64/compat/entry.S 2025-11-13 16:58:23.000000000 +0000 @@ -183,7 +183,7 @@ mov %rax, %cr4 mov %rax, (%rdx) pop %rdx - ret + RET 0: #ifndef NDEBUG /* Check that _all_ of the bits intended to be set actually are. */ @@ -202,7 +202,7 @@ #endif pop %rdx xor %eax, %eax - ret + RET ENTRY(compat_syscall) /* Fix up reported %cs/%ss for compat domains. */ @@ -329,7 +329,7 @@ xor %eax, %eax mov %ax, TRAPBOUNCE_cs(%rdx) mov %al, TRAPBOUNCE_flags(%rdx) - ret + RET .section .fixup,"ax" .Lfx13: diff -Nru xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/x86_64/entry.S xen-4.17.5+72-g01140da4e8/xen/arch/x86/x86_64/entry.S --- xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/x86_64/entry.S 2024-11-12 13:09:34.000000000 +0000 +++ xen-4.17.5+72-g01140da4e8/xen/arch/x86/x86_64/entry.S 2025-11-13 16:58:23.000000000 +0000 @@ -598,7 +598,7 @@ xor %eax, %eax mov %rax, TRAPBOUNCE_eip(%rdx) mov %al, TRAPBOUNCE_flags(%rdx) - ret + RET .pushsection .fixup, "ax", @progbits # Numeric tags below represent the intended overall %rsi adjustment. diff -Nru xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/x86_emulate/x86_emulate.c xen-4.17.5+72-g01140da4e8/xen/arch/x86/x86_emulate/x86_emulate.c --- xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/x86_emulate/x86_emulate.c 2024-11-12 13:09:34.000000000 +0000 +++ xen-4.17.5+72-g01140da4e8/xen/arch/x86/x86_emulate/x86_emulate.c 2025-11-13 16:58:23.000000000 +0000 @@ -1258,12 +1258,15 @@ stub_exn.info = (union stub_exception_token) { .raw = ~0 }; \ stub_exn.line = __LINE__; /* Utility outweighs livepatching cost */ \ block_speculation(); /* SCSB */ \ - asm volatile ( pre "\n\tINDIRECT_CALL %[stub]\n\t" post "\n" \ + asm volatile ( pre "\n\t" \ + "INDIRECT_CALL %[stub]\n" \ ".Lret%=:\n\t" \ + post "\n\t" \ + ".Lskip%=:\n\t" \ ".pushsection .fixup,\"ax\"\n" \ ".Lfix%=:\n\t" \ "pop %[exn]\n\t" \ - "jmp .Lret%=\n\t" \ + "jmp .Lskip%=\n\t" \ ".popsection\n\t" \ _ASM_EXTABLE(.Lret%=, .Lfix%=) \ : [exn] "+g" (stub_exn.info) ASM_CALL_CONSTRAINT, \ @@ -1533,36 +1536,42 @@ #define emulate_fpu_insn_memdst(opc, ext, arg) \ do { \ + void *_p = get_stub(stub); \ /* ModRM: mod=0, reg=ext, rm=0, i.e. a (%rax) operand */ \ insn_bytes = 2; \ - memcpy(get_stub(stub), \ - ((uint8_t[]){ opc, ((ext) & 7) << 3, 0xc3 }), 3); \ + memcpy(_p, ((uint8_t[]){ opc, ((ext) & 7) << 3 }), 2); _p += 2; \ + place_ret(_p); \ invoke_stub("", "", "+m" (arg) : "a" (&(arg))); \ put_stub(stub); \ } while (0) #define emulate_fpu_insn_memsrc(opc, ext, arg) \ do { \ + void *_p = get_stub(stub); \ /* ModRM: mod=0, reg=ext, rm=0, i.e. a (%rax) operand */ \ - memcpy(get_stub(stub), \ - ((uint8_t[]){ opc, ((ext) & 7) << 3, 0xc3 }), 3); \ + memcpy(_p, ((uint8_t[]){ opc, ((ext) & 7) << 3 }), 2); _p += 2; \ + place_ret(_p); \ invoke_stub("", "", "=m" (dummy) : "m" (arg), "a" (&(arg))); \ put_stub(stub); \ } while (0) #define emulate_fpu_insn_stub(bytes...) \ do { \ + void *_p = get_stub(stub); \ unsigned int nr_ = sizeof((uint8_t[]){ bytes }); \ - memcpy(get_stub(stub), ((uint8_t[]){ bytes, 0xc3 }), nr_ + 1); \ + memcpy(_p, ((uint8_t[]){ bytes }), nr_); _p += nr_; \ + place_ret(_p); \ invoke_stub("", "", "=m" (dummy) : "i" (0)); \ put_stub(stub); \ } while (0) #define emulate_fpu_insn_stub_eflags(bytes...) \ do { \ + void *_p = get_stub(stub); \ unsigned int nr_ = sizeof((uint8_t[]){ bytes }); \ unsigned long tmp_; \ - memcpy(get_stub(stub), ((uint8_t[]){ bytes, 0xc3 }), nr_ + 1); \ + memcpy(_p, ((uint8_t[]){ bytes }), nr_); _p += nr_; \ + place_ret(_p); \ invoke_stub(_PRE_EFLAGS("[eflags]", "[mask]", "[tmp]"), \ _POST_EFLAGS("[eflags]", "[mask]", "[tmp]"), \ [eflags] "+g" (_regs.eflags), [tmp] "=&r" (tmp_) \ @@ -3852,7 +3861,7 @@ stb[3] = 0x91; stb[4] = evex.opmsk << 3; insn_bytes = 5; - stb[5] = 0xc3; + place_ret(&stb[5]); invoke_stub("", "", "+m" (op_mask) : "a" (&op_mask)); @@ -6751,7 +6760,7 @@ evex.lr = 0; opc[1] = (modrm & 0x38) | 0xc0; insn_bytes = EVEX_PFX_BYTES + 2; - opc[2] = 0xc3; + place_ret(&opc[2]); copy_EVEX(opc, evex); invoke_stub("", "", "=g" (dummy) : "a" (src.val)); @@ -6816,7 +6825,7 @@ insn_bytes = PFX_BYTES + 2; copy_REX_VEX(opc, rex_prefix, vex); } - opc[2] = 0xc3; + place_ret(&opc[2]); ea.reg = decode_gpr(&_regs, modrm_reg); invoke_stub("", "", "=a" (*ea.reg) : "c" (mmvalp), "m" (*mmvalp)); @@ -6884,7 +6893,7 @@ insn_bytes = PFX_BYTES + 2; copy_REX_VEX(opc, rex_prefix, vex); } - opc[2] = 0xc3; + place_ret(&opc[2]); _regs.eflags &= ~EFLAGS_MASK; invoke_stub("", @@ -7113,7 +7122,7 @@ opc[1] = modrm & 0xc7; insn_bytes = PFX_BYTES + 2; simd_0f_to_gpr: - opc[insn_bytes - PFX_BYTES] = 0xc3; + place_ret(&opc[insn_bytes - PFX_BYTES]); generate_exception_if(ea.type != OP_REG, EXC_UD); @@ -7510,7 +7519,7 @@ vex.w = 0; opc[1] = modrm & 0x38; insn_bytes = PFX_BYTES + 2; - opc[2] = 0xc3; + place_ret(&opc[2]); copy_REX_VEX(opc, rex_prefix, vex); invoke_stub("", "", "+m" (src.val) : "a" (&src.val)); @@ -7538,7 +7547,7 @@ evex.w = 0; opc[1] = modrm & 0x38; insn_bytes = EVEX_PFX_BYTES + 2; - opc[2] = 0xc3; + place_ret(&opc[2]); copy_EVEX(opc, evex); invoke_stub("", "", "+m" (src.val) : "a" (&src.val)); @@ -7733,7 +7742,7 @@ #endif /* X86EMUL_NO_SIMD */ simd_0f_reg_only: - opc[insn_bytes - PFX_BYTES] = 0xc3; + place_ret(&opc[insn_bytes - PFX_BYTES]); copy_REX_VEX(opc, rex_prefix, vex); invoke_stub("", "", [dummy_out] "=g" (dummy) : [dummy_in] "i" (0) ); @@ -8058,7 +8067,7 @@ if ( !mode_64bit() ) vex.w = 0; opc[1] = modrm & 0xf8; - opc[2] = 0xc3; + place_ret(&opc[2]); copy_VEX(opc, vex); ea.reg = decode_gpr(&_regs, modrm_rm); @@ -8101,7 +8110,7 @@ if ( !mode_64bit() ) vex.w = 0; opc[1] = modrm & 0xc7; - opc[2] = 0xc3; + place_ret(&opc[2]); copy_VEX(opc, vex); invoke_stub("", "", "=a" (dst.val) : [dummy] "i" (0)); @@ -8131,7 +8140,7 @@ opc = init_prefixes(stub); opc[0] = b; opc[1] = modrm; - opc[2] = 0xc3; + place_ret(&opc[2]); copy_VEX(opc, vex); _regs.eflags &= ~EFLAGS_MASK; @@ -9027,7 +9036,7 @@ if ( !mode_64bit() ) vex.w = 0; opc[1] = modrm & 0xc7; - opc[2] = 0xc3; + place_ret(&opc[2]); copy_REX_VEX(opc, rex_prefix, vex); invoke_stub("", "", "=a" (ea.val) : [dummy] "i" (0)); @@ -9145,7 +9154,7 @@ opc[1] &= 0x38; } insn_bytes = PFX_BYTES + 2; - opc[2] = 0xc3; + place_ret(&opc[2]); if ( vex.opcx == vex_none ) { /* Cover for extra prefix byte. */ @@ -9424,7 +9433,7 @@ pvex->b = !mode_64bit() || (vex.reg >> 3); opc[1] = 0xc0 | (~vex.reg & 7); pvex->reg = 0xf; - opc[2] = 0xc3; + place_ret(&opc[2]); invoke_stub("", "", "=a" (ea.val) : [dummy] "i" (0)); put_stub(stub); @@ -9684,7 +9693,7 @@ evex.w = 0; opc[1] = modrm & 0xf8; insn_bytes = EVEX_PFX_BYTES + 2; - opc[2] = 0xc3; + place_ret(&opc[2]); copy_EVEX(opc, evex); invoke_stub("", "", "=g" (dummy) : "a" (src.val)); @@ -9783,7 +9792,7 @@ pvex->b = 1; opc[1] = (modrm_reg & 7) << 3; pvex->reg = 0xf; - opc[2] = 0xc3; + place_ret(&opc[2]); invoke_stub("", "", "=m" (*mmvalp) : "a" (mmvalp)); @@ -9853,7 +9862,7 @@ pvex->b = 1; opc[1] = (modrm_reg & 7) << 3; pvex->reg = 0xf; - opc[2] = 0xc3; + place_ret(&opc[2]); invoke_stub("", "", "+m" (*mmvalp) : "a" (mmvalp)); @@ -9909,7 +9918,7 @@ pevex->b = 1; opc[1] = (modrm_reg & 7) << 3; pevex->RX = 1; - opc[2] = 0xc3; + place_ret(&opc[2]); invoke_stub("", "", "=m" (*mmvalp) : "a" (mmvalp)); @@ -9974,7 +9983,7 @@ pevex->b = 1; opc[1] = (modrm_reg & 7) << 3; pevex->RX = 1; - opc[2] = 0xc3; + place_ret(&opc[2]); invoke_stub("", "", "+m" (*mmvalp) : "a" (mmvalp)); @@ -9988,7 +9997,7 @@ opc[2] = 0x90; /* Use (%rax) as source. */ opc[3] = evex.opmsk << 3; - opc[4] = 0xc3; + place_ret(&opc[4]); invoke_stub("", "", "+m" (op_mask) : "a" (&op_mask)); put_stub(stub); @@ -10082,7 +10091,7 @@ pevex->b = 1; opc[1] = (modrm_reg & 7) << 3; pevex->RX = 1; - opc[2] = 0xc3; + place_ret(&opc[2]); invoke_stub("", "", "=m" (*mmvalp) : "a" (mmvalp)); @@ -10160,7 +10169,7 @@ opc[2] = 0x90; /* Use (%rax) as source. */ opc[3] = evex.opmsk << 3; - opc[4] = 0xc3; + place_ret(&opc[4]); invoke_stub("", "", "+m" (op_mask) : "a" (&op_mask)); put_stub(stub); @@ -10229,7 +10238,7 @@ pevex->r = !mode_64bit() || !(state->sib_index & 0x08); pevex->R = !mode_64bit() || !(state->sib_index & 0x10); pevex->RX = 1; - opc[2] = 0xc3; + place_ret(&opc[2]); invoke_stub("", "", "=m" (index) : "a" (&index)); put_stub(stub); @@ -10404,7 +10413,7 @@ pvex->reg = 0xf; /* rAX */ buf[3] = b; buf[4] = 0x09; /* reg=rCX r/m=(%rCX) */ - buf[5] = 0xc3; + place_ret(&buf[5]); src.reg = decode_vex_gpr(vex.reg, &_regs, ctxt); emulate_stub([dst] "=&c" (dst.val), "[dst]" (&src.val), "a" (*src.reg)); @@ -10438,7 +10447,7 @@ pvex->reg = 0xf; /* rAX */ buf[3] = b; buf[4] = (modrm & 0x38) | 0x01; /* r/m=(%rCX) */ - buf[5] = 0xc3; + place_ret(&buf[5]); dst.reg = decode_vex_gpr(vex.reg, &_regs, ctxt); emulate_stub("=&a" (dst.val), "c" (&src.val)); @@ -10670,7 +10679,7 @@ evex.w = vex.w = 0; opc[1] = modrm & 0x38; opc[2] = imm1; - opc[3] = 0xc3; + place_ret(&opc[3]); if ( vex.opcx == vex_none ) { /* Cover for extra prefix byte. */ @@ -10837,7 +10846,7 @@ insn_bytes = PFX_BYTES + 3; copy_VEX(opc, vex); } - opc[3] = 0xc3; + place_ret(&opc[3]); /* Latch MXCSR - we may need to restore it below. */ invoke_stub("stmxcsr %[mxcsr]", "", @@ -11065,7 +11074,7 @@ } opc[2] = imm1; insn_bytes = PFX_BYTES + 3; - opc[3] = 0xc3; + place_ret(&opc[3]); if ( vex.opcx == vex_none ) { /* Cover for extra prefix byte. */ @@ -11225,7 +11234,7 @@ pxop->reg = 0xf; /* rAX */ buf[3] = b; buf[4] = (modrm & 0x38) | 0x01; /* r/m=(%rCX) */ - buf[5] = 0xc3; + place_ret(&buf[5]); dst.reg = decode_vex_gpr(vex.reg, &_regs, ctxt); emulate_stub([dst] "=&a" (dst.val), "c" (&src.val)); @@ -11334,7 +11343,7 @@ buf[3] = b; buf[4] = 0x09; /* reg=rCX r/m=(%rCX) */ *(uint32_t *)(buf + 5) = imm1; - buf[9] = 0xc3; + place_ret(&buf[9]); emulate_stub([dst] "=&c" (dst.val), "[dst]" (&src.val)); @@ -11401,12 +11410,12 @@ BUG(); if ( evex_encoded() ) { - opc[insn_bytes - EVEX_PFX_BYTES] = 0xc3; + place_ret(&opc[insn_bytes - EVEX_PFX_BYTES]); copy_EVEX(opc, evex); } else { - opc[insn_bytes - PFX_BYTES] = 0xc3; + place_ret(&opc[insn_bytes - PFX_BYTES]); copy_REX_VEX(opc, rex_prefix, vex); } diff -Nru xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/xen.lds.S xen-4.17.5+72-g01140da4e8/xen/arch/x86/xen.lds.S --- xen-4.17.5+23-ga4e5191dc0/xen/arch/x86/xen.lds.S 2024-11-12 13:09:34.000000000 +0000 +++ xen-4.17.5+72-g01140da4e8/xen/arch/x86/xen.lds.S 2025-11-13 16:58:23.000000000 +0000 @@ -86,6 +86,7 @@ . = ALIGN(PAGE_SIZE); _stextentry = .; *(.text.entry) + *(.text.entry.*) . = ALIGN(PAGE_SIZE); _etextentry = .; diff -Nru xen-4.17.5+23-ga4e5191dc0/xen/common/Kconfig xen-4.17.5+72-g01140da4e8/xen/common/Kconfig --- xen-4.17.5+23-ga4e5191dc0/xen/common/Kconfig 2024-11-12 13:09:34.000000000 +0000 +++ xen-4.17.5+72-g01140da4e8/xen/common/Kconfig 2025-11-13 16:58:23.000000000 +0000 @@ -112,6 +112,17 @@ When enabled, indirect branches are implemented using a new construct called "retpoline" that prevents speculation. +config RETURN_THUNK + bool "Out-of-line Returns" + depends on CC_HAS_RETURN_THUNK + default INDIRECT_THUNK + help + Compile Xen with out-of-line returns. + + This allows Xen to mitigate a variety of speculative vulnerabilities + by choosing a hardware-dependent instruction sequence to implement + function returns safely. + config SPECULATIVE_HARDEN_ARRAY bool "Speculative Array Hardening" default y diff -Nru xen-4.17.5+23-ga4e5191dc0/xen/common/softirq.c xen-4.17.5+72-g01140da4e8/xen/common/softirq.c --- xen-4.17.5+23-ga4e5191dc0/xen/common/softirq.c 2024-11-12 13:09:34.000000000 +0000 +++ xen-4.17.5+72-g01140da4e8/xen/common/softirq.c 2025-11-13 16:58:23.000000000 +0000 @@ -94,9 +94,7 @@ raise_mask = &per_cpu(batch_mask, this_cpu); for_each_cpu(cpu, mask) - if ( !test_and_set_bit(nr, &softirq_pending(cpu)) && - cpu != this_cpu && - !arch_skip_send_event_check(cpu) ) + if ( !arch_set_softirq(nr, cpu) && cpu != this_cpu ) __cpumask_set_cpu(cpu, raise_mask); if ( raise_mask == &send_mask ) @@ -107,9 +105,7 @@ { unsigned int this_cpu = smp_processor_id(); - if ( test_and_set_bit(nr, &softirq_pending(cpu)) - || (cpu == this_cpu) - || arch_skip_send_event_check(cpu) ) + if ( arch_set_softirq(nr, cpu) || cpu == this_cpu ) return; if ( !per_cpu(batching, this_cpu) || in_irq() ) diff -Nru xen-4.17.5+23-ga4e5191dc0/xen/drivers/passthrough/pci.c xen-4.17.5+72-g01140da4e8/xen/drivers/passthrough/pci.c --- xen-4.17.5+23-ga4e5191dc0/xen/drivers/passthrough/pci.c 2024-11-12 13:09:34.000000000 +0000 +++ xen-4.17.5+72-g01140da4e8/xen/drivers/passthrough/pci.c 2025-11-13 16:58:23.000000000 +0000 @@ -345,20 +345,21 @@ switch ( pdev->type = pdev_type(pseg->nr, bus, devfn) ) { unsigned int cap, sec_bus, sub_bus; + unsigned long flags; case DEV_TYPE_PCIe2PCI_BRIDGE: case DEV_TYPE_LEGACY_PCI_BRIDGE: sec_bus = pci_conf_read8(pdev->sbdf, PCI_SECONDARY_BUS); sub_bus = pci_conf_read8(pdev->sbdf, PCI_SUBORDINATE_BUS); - spin_lock(&pseg->bus2bridge_lock); + spin_lock_irqsave(&pseg->bus2bridge_lock, flags); for ( ; sec_bus <= sub_bus; sec_bus++ ) { pseg->bus2bridge[sec_bus].map = 1; pseg->bus2bridge[sec_bus].bus = bus; pseg->bus2bridge[sec_bus].devfn = devfn; } - spin_unlock(&pseg->bus2bridge_lock); + spin_unlock_irqrestore(&pseg->bus2bridge_lock, flags); break; case DEV_TYPE_PCIe_ENDPOINT: @@ -429,16 +430,17 @@ switch ( pdev->type ) { unsigned int sec_bus, sub_bus; + unsigned long flags; case DEV_TYPE_PCIe2PCI_BRIDGE: case DEV_TYPE_LEGACY_PCI_BRIDGE: sec_bus = pci_conf_read8(pdev->sbdf, PCI_SECONDARY_BUS); sub_bus = pci_conf_read8(pdev->sbdf, PCI_SUBORDINATE_BUS); - spin_lock(&pseg->bus2bridge_lock); + spin_lock_irqsave(&pseg->bus2bridge_lock, flags); for ( ; sec_bus <= sub_bus; sec_bus++ ) pseg->bus2bridge[sec_bus] = pseg->bus2bridge[pdev->bus]; - spin_unlock(&pseg->bus2bridge_lock); + spin_unlock_irqrestore(&pseg->bus2bridge_lock, flags); break; default: @@ -954,8 +956,9 @@ int find_upstream_bridge(u16 seg, u8 *bus, u8 *devfn, u8 *secbus) { struct pci_seg *pseg = get_pseg(seg); - int ret = 0; - int cnt = 0; + int ret = 1; + unsigned long flags; + unsigned int cnt = 0; if ( *bus == 0 ) return 0; @@ -966,8 +969,7 @@ if ( !pseg->bus2bridge[*bus].map ) return 0; - ret = 1; - spin_lock(&pseg->bus2bridge_lock); + spin_lock_irqsave(&pseg->bus2bridge_lock, flags); while ( pseg->bus2bridge[*bus].map ) { *secbus = *bus; @@ -981,7 +983,7 @@ } out: - spin_unlock(&pseg->bus2bridge_lock); + spin_unlock_irqrestore(&pseg->bus2bridge_lock, flags); return ret; } diff -Nru xen-4.17.5+23-ga4e5191dc0/xen/include/public/arch-x86/cpufeatureset.h xen-4.17.5+72-g01140da4e8/xen/include/public/arch-x86/cpufeatureset.h --- xen-4.17.5+23-ga4e5191dc0/xen/include/public/arch-x86/cpufeatureset.h 2024-11-12 13:09:34.000000000 +0000 +++ xen-4.17.5+72-g01140da4e8/xen/include/public/arch-x86/cpufeatureset.h 2025-11-13 16:58:23.000000000 +0000 @@ -284,6 +284,7 @@ /* AMD-defined CPU features, CPUID level 0x80000021.eax, word 11 */ XEN_CPUFEATURE(LFENCE_DISPATCH, 11*32+ 2) /*A LFENCE always serializing */ +XEN_CPUFEATURE(VERW_CLEAR, 11*32+ 5) /*!A VERW clears microarchitectural buffers */ XEN_CPUFEATURE(NSCB, 11*32+ 6) /*A Null Selector Clears Base (and limit too) */ XEN_CPUFEATURE(AUTO_IBRS, 11*32+ 8) /* Automatic IBRS */ XEN_CPUFEATURE(SBPB, 11*32+27) /*A Selective Branch Predictor Barrier */ @@ -334,7 +335,12 @@ XEN_CPUFEATURE(RFDS_NO, 16*32+27) /*A No Register File Data Sampling */ XEN_CPUFEATURE(RFDS_CLEAR, 16*32+28) /*!A Register File(s) cleared by VERW */ -/* Intel-defined CPU features, MSR_ARCH_CAPS 0x10a.edx, word 17 */ +/* Intel-defined CPU features, MSR_ARCH_CAPS 0x10a.edx, word 17 (express in terms of word 16) */ +XEN_CPUFEATURE(ITS_NO, 16*32+62) /*!A No Indirect Target Selection */ + +/* AMD-defined CPU features, CPUID level 0x80000021.ecx, word 18 */ +XEN_CPUFEATURE(TSA_SQ_NO, 18*32+ 1) /*A No Store Queue Transitive Scheduler Attacks */ +XEN_CPUFEATURE(TSA_L1_NO, 18*32+ 2) /*A No L1D Transitive Scheduler Attacks */ #endif /* XEN_CPUFEATURE */ diff -Nru xen-4.17.5+23-ga4e5191dc0/xen/include/xen/cpuidle.h xen-4.17.5+72-g01140da4e8/xen/include/xen/cpuidle.h --- xen-4.17.5+23-ga4e5191dc0/xen/include/xen/cpuidle.h 2024-11-12 13:09:34.000000000 +0000 +++ xen-4.17.5+72-g01140da4e8/xen/include/xen/cpuidle.h 2025-11-13 16:58:23.000000000 +0000 @@ -92,8 +92,6 @@ bool cpuidle_using_deep_cstate(void); void cpuidle_disable_deep_cstate(void); -extern void cpuidle_wakeup_mwait(cpumask_t *mask); - #define CPUIDLE_DRIVER_STATE_START 1 extern void menu_get_trace_data(u32 *expected, u32 *pred); diff -Nru xen-4.17.5+23-ga4e5191dc0/xen/include/xen/irq_cpustat.h xen-4.17.5+72-g01140da4e8/xen/include/xen/irq_cpustat.h --- xen-4.17.5+23-ga4e5191dc0/xen/include/xen/irq_cpustat.h 2024-11-12 13:09:34.000000000 +0000 +++ xen-4.17.5+72-g01140da4e8/xen/include/xen/irq_cpustat.h 2025-11-13 16:58:23.000000000 +0000 @@ -24,6 +24,5 @@ /* arch independent irq_stat fields */ #define softirq_pending(cpu) __IRQ_STAT((cpu), __softirq_pending) #define local_irq_count(cpu) __IRQ_STAT((cpu), __local_irq_count) -#define mwait_wakeup(cpu) __IRQ_STAT((cpu), __mwait_wakeup) #endif /* __irq_cpustat_h */ diff -Nru xen-4.17.5+23-ga4e5191dc0/xen/include/xen/lib/x86/cpu-policy.h xen-4.17.5+72-g01140da4e8/xen/include/xen/lib/x86/cpu-policy.h --- xen-4.17.5+23-ga4e5191dc0/xen/include/xen/lib/x86/cpu-policy.h 2024-11-12 13:09:34.000000000 +0000 +++ xen-4.17.5+72-g01140da4e8/xen/include/xen/lib/x86/cpu-policy.h 2025-11-13 16:58:23.000000000 +0000 @@ -22,6 +22,7 @@ #define FEATURESET_7d1 15 /* 0x00000007:1.edx */ #define FEATURESET_m10Al 16 /* 0x0000010a.eax */ #define FEATURESET_m10Ah 17 /* 0x0000010a.edx */ +#define FEATURESET_e21c 18 /* 0x80000021.ecx */ struct cpuid_leaf { @@ -324,7 +325,14 @@ uint32_t e21a; struct { DECL_BITFIELD(e21a); }; }; - uint32_t /* b */:32, /* c */:32, /* d */:32; + uint16_t ucode_size; /* Units of 16 bytes */ + uint8_t rap_size; /* Units of 8 entries */ + uint8_t :8; + union { + uint32_t e21c; + struct { DECL_BITFIELD(e21c); }; + }; + uint32_t /* d */:32; }; } extd; diff -Nru xen-4.17.5+23-ga4e5191dc0/xen/include/xen/sha2.h xen-4.17.5+72-g01140da4e8/xen/include/xen/sha2.h --- xen-4.17.5+23-ga4e5191dc0/xen/include/xen/sha2.h 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.17.5+72-g01140da4e8/xen/include/xen/sha2.h 2025-11-13 16:58:23.000000000 +0000 @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * SHA2-256: https://csrc.nist.gov/pubs/fips/180-2/upd1/final + */ +#ifndef XEN_SHA2_H +#define XEN_SHA2_H + +#include + +#define SHA2_256_DIGEST_SIZE 32 + +void sha2_256_digest(uint8_t digest[SHA2_256_DIGEST_SIZE], + const void *msg, size_t len); + +#endif /* XEN_SHA2_H */ diff -Nru xen-4.17.5+23-ga4e5191dc0/xen/include/xen/softirq.h xen-4.17.5+72-g01140da4e8/xen/include/xen/softirq.h --- xen-4.17.5+23-ga4e5191dc0/xen/include/xen/softirq.h 2024-11-12 13:09:34.000000000 +0000 +++ xen-4.17.5+72-g01140da4e8/xen/include/xen/softirq.h 2025-11-13 16:58:23.000000000 +0000 @@ -21,6 +21,22 @@ #define NR_SOFTIRQS (NR_COMMON_SOFTIRQS + NR_ARCH_SOFTIRQS) +/* + * Ensure softirq @nr is pending on @cpu. Return true if an IPI can be + * skipped, false if the IPI cannot be skipped. + */ +#ifndef arch_set_softirq +static always_inline bool arch_set_softirq(unsigned int nr, unsigned int cpu) +{ + /* + * Try to set the softirq pending. If we set the bit (i.e. the old bit + * was 0), we're responsible to send the IPI. If the softirq was already + * pending (i.e. the old bit was 1), no IPI is needed. + */ + return test_and_set_bit(nr, &softirq_pending(cpu)); +} +#endif + typedef void (*softirq_handler)(void); void do_softirq(void); diff -Nru xen-4.17.5+23-ga4e5191dc0/xen/lib/Makefile xen-4.17.5+72-g01140da4e8/xen/lib/Makefile --- xen-4.17.5+23-ga4e5191dc0/xen/lib/Makefile 2024-11-12 13:09:34.000000000 +0000 +++ xen-4.17.5+72-g01140da4e8/xen/lib/Makefile 2025-11-13 16:58:23.000000000 +0000 @@ -13,6 +13,7 @@ lib-y += muldiv64.o lib-y += parse-size.o lib-y += rbtree.o +lib-$(CONFIG_X86) += sha2-256.o lib-y += sort.o lib-y += strcasecmp.o lib-y += strchr.o diff -Nru xen-4.17.5+23-ga4e5191dc0/xen/lib/sha2-256.c xen-4.17.5+72-g01140da4e8/xen/lib/sha2-256.c --- xen-4.17.5+23-ga4e5191dc0/xen/lib/sha2-256.c 1970-01-01 00:00:00.000000000 +0000 +++ xen-4.17.5+72-g01140da4e8/xen/lib/sha2-256.c 2025-11-13 16:58:23.000000000 +0000 @@ -0,0 +1,216 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * SHA2-256: https://csrc.nist.gov/pubs/fips/180-2/upd1/final + * + * Originally derived from Linux. Modified substantially to optimise for size + * and Xen's expected usecases. + */ +#include +#include +#include + +#include + +struct sha2_256_state { + uint32_t state[SHA2_256_DIGEST_SIZE / sizeof(uint32_t)]; + uint8_t buf[64]; + size_t count; /* Byte count. */ +}; + +static uint32_t choose(uint32_t x, uint32_t y, uint32_t z) +{ + return z ^ (x & (y ^ z)); +} + +static uint32_t majority(uint32_t x, uint32_t y, uint32_t z) +{ + return (x & y) | (z & (x | y)); +} + +static uint32_t e0(uint32_t x) +{ + return ror32(x, 2) ^ ror32(x, 13) ^ ror32(x, 22); +} + +static uint32_t e1(uint32_t x) +{ + return ror32(x, 6) ^ ror32(x, 11) ^ ror32(x, 25); +} + +static uint32_t s0(uint32_t x) +{ + return ror32(x, 7) ^ ror32(x, 18) ^ (x >> 3); +} + +static uint32_t s1(uint32_t x) +{ + return ror32(x, 17) ^ ror32(x, 19) ^ (x >> 10); +} + +static uint32_t blend(uint32_t W[16], unsigned int i) +{ +#define W(i) W[(i) & 15] + + return W(i) += s1(W(i - 2)) + W(i - 7) + s0(W(i - 15)); + +#undef W +} + +static const uint32_t K[] = { + 0x428a2f98U, 0x71374491U, 0xb5c0fbcfU, 0xe9b5dba5U, + 0x3956c25bU, 0x59f111f1U, 0x923f82a4U, 0xab1c5ed5U, + 0xd807aa98U, 0x12835b01U, 0x243185beU, 0x550c7dc3U, + 0x72be5d74U, 0x80deb1feU, 0x9bdc06a7U, 0xc19bf174U, + 0xe49b69c1U, 0xefbe4786U, 0x0fc19dc6U, 0x240ca1ccU, + 0x2de92c6fU, 0x4a7484aaU, 0x5cb0a9dcU, 0x76f988daU, + 0x983e5152U, 0xa831c66dU, 0xb00327c8U, 0xbf597fc7U, + 0xc6e00bf3U, 0xd5a79147U, 0x06ca6351U, 0x14292967U, + 0x27b70a85U, 0x2e1b2138U, 0x4d2c6dfcU, 0x53380d13U, + 0x650a7354U, 0x766a0abbU, 0x81c2c92eU, 0x92722c85U, + 0xa2bfe8a1U, 0xa81a664bU, 0xc24b8b70U, 0xc76c51a3U, + 0xd192e819U, 0xd6990624U, 0xf40e3585U, 0x106aa070U, + 0x19a4c116U, 0x1e376c08U, 0x2748774cU, 0x34b0bcb5U, + 0x391c0cb3U, 0x4ed8aa4aU, 0x5b9cca4fU, 0x682e6ff3U, + 0x748f82eeU, 0x78a5636fU, 0x84c87814U, 0x8cc70208U, + 0x90befffaU, 0xa4506cebU, 0xbef9a3f7U, 0xc67178f2U, +}; + +static void sha2_256_transform(uint32_t *state, const void *_input) +{ + const uint32_t *input = _input; + uint32_t a, b, c, d, e, f, g, h, t1, t2; + uint32_t W[16]; + unsigned int i; + + for ( i = 0; i < 16; i++ ) + W[i] = get_unaligned_be32(&input[i]); + + a = state[0]; b = state[1]; c = state[2]; d = state[3]; + e = state[4]; f = state[5]; g = state[6]; h = state[7]; + + for ( i = 0; i < 16; i += 8 ) + { + t1 = h + e1(e) + choose(e, f, g) + K[i + 0] + W[i + 0]; + t2 = e0(a) + majority(a, b, c); d += t1; h = t1 + t2; + t1 = g + e1(d) + choose(d, e, f) + K[i + 1] + W[i + 1]; + t2 = e0(h) + majority(h, a, b); c += t1; g = t1 + t2; + t1 = f + e1(c) + choose(c, d, e) + K[i + 2] + W[i + 2]; + t2 = e0(g) + majority(g, h, a); b += t1; f = t1 + t2; + t1 = e + e1(b) + choose(b, c, d) + K[i + 3] + W[i + 3]; + t2 = e0(f) + majority(f, g, h); a += t1; e = t1 + t2; + t1 = d + e1(a) + choose(a, b, c) + K[i + 4] + W[i + 4]; + t2 = e0(e) + majority(e, f, g); h += t1; d = t1 + t2; + t1 = c + e1(h) + choose(h, a, b) + K[i + 5] + W[i + 5]; + t2 = e0(d) + majority(d, e, f); g += t1; c = t1 + t2; + t1 = b + e1(g) + choose(g, h, a) + K[i + 6] + W[i + 6]; + t2 = e0(c) + majority(c, d, e); f += t1; b = t1 + t2; + t1 = a + e1(f) + choose(f, g, h) + K[i + 7] + W[i + 7]; + t2 = e0(b) + majority(b, c, d); e += t1; a = t1 + t2; + } + + for ( ; i < 64; i += 8 ) + { + t1 = h + e1(e) + choose(e, f, g) + K[i + 0] + blend(W, i + 0); + t2 = e0(a) + majority(a, b, c); d += t1; h = t1 + t2; + t1 = g + e1(d) + choose(d, e, f) + K[i + 1] + blend(W, i + 1); + t2 = e0(h) + majority(h, a, b); c += t1; g = t1 + t2; + t1 = f + e1(c) + choose(c, d, e) + K[i + 2] + blend(W, i + 2); + t2 = e0(g) + majority(g, h, a); b += t1; f = t1 + t2; + t1 = e + e1(b) + choose(b, c, d) + K[i + 3] + blend(W, i + 3); + t2 = e0(f) + majority(f, g, h); a += t1; e = t1 + t2; + t1 = d + e1(a) + choose(a, b, c) + K[i + 4] + blend(W, i + 4); + t2 = e0(e) + majority(e, f, g); h += t1; d = t1 + t2; + t1 = c + e1(h) + choose(h, a, b) + K[i + 5] + blend(W, i + 5); + t2 = e0(d) + majority(d, e, f); g += t1; c = t1 + t2; + t1 = b + e1(g) + choose(g, h, a) + K[i + 6] + blend(W, i + 6); + t2 = e0(c) + majority(c, d, e); f += t1; b = t1 + t2; + t1 = a + e1(f) + choose(f, g, h) + K[i + 7] + blend(W, i + 7); + t2 = e0(b) + majority(b, c, d); e += t1; a = t1 + t2; + } + + state[0] += a; state[1] += b; state[2] += c; state[3] += d; + state[4] += e; state[5] += f; state[6] += g; state[7] += h; +} + +static void sha2_256_init(struct sha2_256_state *s) +{ + *s = (struct sha2_256_state){ + .state = { + 0x6a09e667UL, + 0xbb67ae85UL, + 0x3c6ef372UL, + 0xa54ff53aUL, + 0x510e527fUL, + 0x9b05688cUL, + 0x1f83d9abUL, + 0x5be0cd19UL, + }, + }; +} + +static void sha2_256_update(struct sha2_256_state *s, const void *msg, + size_t len) +{ + unsigned int partial = s->count & 63; + + s->count += len; + + if ( (partial + len) >= 64 ) + { + if ( partial ) + { + unsigned int rem = 64 - partial; + + /* Fill the partial block. */ + memcpy(s->buf + partial, msg, rem); + msg += rem; + len -= rem; + + sha2_256_transform(s->state, s->buf); + partial = 0; + } + + for ( ; len >= 64; msg += 64, len -= 64 ) + sha2_256_transform(s->state, msg); + } + + /* Remaining data becomes partial. */ + memcpy(s->buf + partial, msg, len); +} + +static void sha2_256_final(struct sha2_256_state *s, void *_dst) +{ + uint32_t *dst = _dst; + unsigned int i, partial = s->count & 63; + + /* Start padding */ + s->buf[partial++] = 0x80; + + if ( partial > 56 ) + { + /* Need one extra block - pad to 64 */ + memset(s->buf + partial, 0, 64 - partial); + sha2_256_transform(s->state, s->buf); + partial = 0; + } + /* Pad to 56 */ + memset(s->buf + partial, 0, 56 - partial); + + /* Append the bit count */ + put_unaligned_be64((uint64_t)s->count << 3, &s->buf[56]); + sha2_256_transform(s->state, s->buf); + + /* Store state in digest */ + for ( i = 0; i < 8; i++ ) + put_unaligned_be32(s->state[i], &dst[i]); +} + +void sha2_256_digest(uint8_t digest[SHA2_256_DIGEST_SIZE], + const void *msg, size_t len) +{ + struct sha2_256_state s; + + sha2_256_init(&s); + sha2_256_update(&s, msg, len); + sha2_256_final(&s, digest); +} diff -Nru xen-4.17.5+23-ga4e5191dc0/xen/lib/x86/cpuid.c xen-4.17.5+72-g01140da4e8/xen/lib/x86/cpuid.c --- xen-4.17.5+23-ga4e5191dc0/xen/lib/x86/cpuid.c 2024-11-12 13:09:34.000000000 +0000 +++ xen-4.17.5+72-g01140da4e8/xen/lib/x86/cpuid.c 2025-11-13 16:58:23.000000000 +0000 @@ -81,6 +81,7 @@ fs[FEATURESET_7d1] = p->feat._7d1; fs[FEATURESET_m10Al] = p->arch_caps.lo; fs[FEATURESET_m10Ah] = p->arch_caps.hi; + fs[FEATURESET_e21c] = p->extd.e21c; } void x86_cpu_featureset_to_policy( @@ -104,6 +105,7 @@ p->feat._7d1 = fs[FEATURESET_7d1]; p->arch_caps.lo = fs[FEATURESET_m10Al]; p->arch_caps.hi = fs[FEATURESET_m10Ah]; + p->extd.e21c = fs[FEATURESET_e21c]; } void x86_cpu_policy_recalc_synth(struct cpu_policy *p) diff -Nru xen-4.17.5+23-ga4e5191dc0/xen/tools/gen-cpuid.py xen-4.17.5+72-g01140da4e8/xen/tools/gen-cpuid.py --- xen-4.17.5+23-ga4e5191dc0/xen/tools/gen-cpuid.py 2024-11-12 13:09:34.000000000 +0000 +++ xen-4.17.5+72-g01140da4e8/xen/tools/gen-cpuid.py 2025-11-13 16:58:23.000000000 +0000 @@ -51,7 +51,7 @@ r"\s+/\*([\w!]*) .*$") word_regex = re.compile( - r"^/\* .* word (\d*) \*/$") + r"^/\* .* word (\d*) .*\*/$") last_word = -1 this = sys.modules[__name__]