Version in base suite: 4.20.2+7-g1badcf5035-0+deb13u1 Base version: xen_4.20.2+7-g1badcf5035-0+deb13u1 Target version: xen_4.20.2+37-g61ff35323e-0+deb13u1 Base file: /srv/ftp-master.debian.org/ftp/pool/main/x/xen/xen_4.20.2+7-g1badcf5035-0+deb13u1.dsc Target file: /srv/ftp-master.debian.org/policy/pool/main/x/xen/xen_4.20.2+37-g61ff35323e-0+deb13u1.dsc .cirrus.yml | 4 SUPPORT.md | 4 debian/changelog | 13 + xen/Makefile | 9 - xen/arch/arm/Makefile | 4 xen/arch/arm/include/asm/mm.h | 10 - xen/arch/ppc/include/asm/mm.h | 10 - xen/arch/riscv/include/asm/mm.h | 10 - xen/arch/x86/cpu/amd.c | 240 ++++++++++++++++------------------- xen/arch/x86/cpu/common.c | 2 xen/arch/x86/cpu/cpu.h | 3 xen/arch/x86/cpu/hygon.c | 6 xen/arch/x86/cpu/microcode/amd.c | 2 xen/arch/x86/domain.c | 42 +----- xen/arch/x86/hvm/dom0_build.c | 4 xen/arch/x86/hvm/hpet.c | 15 +- xen/arch/x86/hvm/vmsi.c | 2 xen/arch/x86/hvm/vmx/vmx.c | 1 xen/arch/x86/include/asm/mm.h | 18 +- xen/arch/x86/include/asm/processor.h | 2 xen/arch/x86/mm/p2m-pod.c | 2 xen/arch/x86/mm/p2m.c | 15 +- xen/arch/x86/mm/shadow/multi.c | 18 +- xen/arch/x86/mm/shadow/private.h | 8 - xen/arch/x86/pv/domain.c | 28 ++++ xen/arch/x86/time.c | 9 + xen/arch/x86/xen.lds.S | 19 ++ xen/common/domain.c | 27 +++ xen/common/kexec.c | 6 xen/common/memory.c | 112 ++++++++++++++++ xen/common/page_alloc.c | 60 ++++---- xen/drivers/passthrough/pci.c | 2 xen/include/acpi/actbl3.h | 4 xen/include/xen/domain.h | 2 xen/include/xen/mm.h | 11 + xen/include/xen/sched.h | 8 + xen/xsm/flask/ss/policydb.c | 9 + 37 files changed, 475 insertions(+), 266 deletions(-) dpkg-source: warning: cannot verify inline signature for /srv/release.debian.org/tmp/tmpwhh63468/xen_4.20.2+7-g1badcf5035-0+deb13u1.dsc: no acceptable signature found dpkg-source: warning: cannot verify inline signature for /srv/release.debian.org/tmp/tmpwhh63468/xen_4.20.2+37-g61ff35323e-0+deb13u1.dsc: no acceptable signature found diff -Nru xen-4.20.2+7-g1badcf5035/.cirrus.yml xen-4.20.2+37-g61ff35323e/.cirrus.yml --- xen-4.20.2+7-g1badcf5035/.cirrus.yml 2025-11-25 08:37:47.000000000 +0000 +++ xen-4.20.2+37-g61ff35323e/.cirrus.yml 2026-02-12 12:58:50.000000000 +0000 @@ -2,8 +2,8 @@ freebsd_versions: &FREEBSD_VERSIONS env: FREEBSD_LEGACY: freebsd-13-5 - FREEBSD_PRODUCTION: freebsd-14-2 - FREEBSD_CURRENT: freebsd-15-0-snap + FREEBSD_PRODUCTION: freebsd-14-3 + FREEBSD_CURRENT: freebsd-15-0-amd64-ufs # Build jobs diff -Nru xen-4.20.2+7-g1badcf5035/SUPPORT.md xen-4.20.2+37-g61ff35323e/SUPPORT.md --- xen-4.20.2+7-g1badcf5035/SUPPORT.md 2025-11-25 08:37:47.000000000 +0000 +++ xen-4.20.2+37-g61ff35323e/SUPPORT.md 2026-02-12 12:58:50.000000000 +0000 @@ -11,8 +11,8 @@ Xen-Version: 4.20 Initial-Release: 2025-03-05 - Supported-Until: 2026-09-05 - Security-Support-Until: 2028-03-05 + Supported-Until: 2028-03-05 + Security-Support-Until: 2030-03-05 Release Notes : RN diff -Nru xen-4.20.2+7-g1badcf5035/debian/changelog xen-4.20.2+37-g61ff35323e/debian/changelog --- xen-4.20.2+7-g1badcf5035/debian/changelog 2025-11-30 15:57:07.000000000 +0000 +++ xen-4.20.2+37-g61ff35323e/debian/changelog 2026-03-05 20:05:14.000000000 +0000 @@ -1,3 +1,16 @@ +xen (4.20.2+37-g61ff35323e-0+deb13u1) trixie; urgency=medium + + * Update to new upstream version 4.20.2+37-g61ff35323e, which also contains + security fixes for the following issues: + - x86: buffer overrun with shadow paging + tracing + XSA-477 CVE-2025-58150 + - x86: incomplete IBPB for vCPU isolation + XSA-479 CVE-2026-23553 + * Note that the following XSA are not listed, because... + - XSA-478 applies to XAPI which is not included in Debian + + -- Maximilian Engelhardt Thu, 05 Mar 2026 21:05:14 +0100 + xen (4.20.2+7-g1badcf5035-0+deb13u1) trixie-security; urgency=medium Significant changes: diff -Nru xen-4.20.2+7-g1badcf5035/xen/Makefile xen-4.20.2+37-g61ff35323e/xen/Makefile --- xen-4.20.2+7-g1badcf5035/xen/Makefile 2025-11-25 08:37:47.000000000 +0000 +++ xen-4.20.2+37-g61ff35323e/xen/Makefile 2026-02-12 12:58:50.000000000 +0000 @@ -113,10 +113,11 @@ Q := @ endif -# If the user is running make -s (silent mode), suppress echoing of -# commands - -ifneq ($(findstring s,$(filter-out --%,$(MAKEFLAGS))),) +# If the user is running make -s (silent mode), suppress echoing of commands. +# Use MFLAGS (options only). MAKEFLAGS may include variable overrides after +# ā€œ--ā€ (GNU make 4.4 and newer), which can contain an ā€œsā€ and falsely trigger +# silent mode. +ifneq ($(findstring s,$(filter-out --%,$(MFLAGS))),) quiet := silent_ endif diff -Nru xen-4.20.2+7-g1badcf5035/xen/arch/arm/Makefile xen-4.20.2+37-g61ff35323e/xen/arch/arm/Makefile --- xen-4.20.2+7-g1badcf5035/xen/arch/arm/Makefile 2025-11-25 08:37:47.000000000 +0000 +++ xen-4.20.2+37-g61ff35323e/xen/arch/arm/Makefile 2026-02-12 12:58:50.000000000 +0000 @@ -96,13 +96,13 @@ endif $(TARGET)-syms: $(objtree)/prelink.o $(obj)/xen.lds - $(LD) $(XEN_LDFLAGS) -T $(obj)/xen.lds $< \ + $(LD) $(XEN_LDFLAGS) -T $(obj)/xen.lds $< $(build_id_linker) \ $(objtree)/common/symbols-dummy.o -o $(dot-target).0 $(NM) -pa --format=sysv $(dot-target).0 \ | $(objtree)/tools/symbols $(all_symbols) --sysv --sort \ > $(dot-target).0.S $(MAKE) $(build)=$(@D) $(dot-target).0.o - $(LD) $(XEN_LDFLAGS) -T $(obj)/xen.lds $< \ + $(LD) $(XEN_LDFLAGS) -T $(obj)/xen.lds $< $(build_id_linker) \ $(dot-target).0.o -o $(dot-target).1 $(NM) -pa --format=sysv $(dot-target).1 \ | $(objtree)/tools/symbols $(all_symbols) --sysv --sort \ diff -Nru xen-4.20.2+7-g1badcf5035/xen/arch/arm/include/asm/mm.h xen-4.20.2+37-g61ff35323e/xen/arch/arm/include/asm/mm.h --- xen-4.20.2+7-g1badcf5035/xen/arch/arm/include/asm/mm.h 2025-11-25 08:37:47.000000000 +0000 +++ xen-4.20.2+37-g61ff35323e/xen/arch/arm/include/asm/mm.h 2026-02-12 12:58:50.000000000 +0000 @@ -150,6 +150,9 @@ #define _PGC_colored PG_shift(4) #define PGC_colored PG_mask(1, 4) #endif +/* Page needs to be scrubbed. */ +#define _PGC_need_scrub PG_shift(5) +#define PGC_need_scrub PG_mask(1, 5) /* ... */ /* Page is broken? */ #define _PGC_broken PG_shift(7) @@ -169,13 +172,6 @@ #define PGC_count_width PG_shift(10) #define PGC_count_mask ((1UL<count_info & PGC_xen_heap) #define is_xen_heap_mfn(mfn) \ (mfn_valid(mfn) && is_xen_heap_page(mfn_to_page(mfn))) diff -Nru xen-4.20.2+7-g1badcf5035/xen/arch/riscv/include/asm/mm.h xen-4.20.2+37-g61ff35323e/xen/arch/riscv/include/asm/mm.h --- xen-4.20.2+7-g1badcf5035/xen/arch/riscv/include/asm/mm.h 2025-11-25 08:37:47.000000000 +0000 +++ xen-4.20.2+37-g61ff35323e/xen/arch/riscv/include/asm/mm.h 2026-02-12 12:58:50.000000000 +0000 @@ -221,19 +221,15 @@ #define PGT_count_width PG_shift(2) #define PGT_count_mask ((1UL << PGT_count_width) - 1) -/* - * Page needs to be scrubbed. Since this bit can only be set on a page that is - * free (i.e. in PGC_state_free) we can reuse PGC_allocated bit. - */ -#define _PGC_need_scrub _PGC_allocated -#define PGC_need_scrub PGC_allocated - /* Cleared when the owning guest 'frees' this page. */ #define _PGC_allocated PG_shift(1) #define PGC_allocated PG_mask(1, 1) /* Page is Xen heap? */ #define _PGC_xen_heap PG_shift(2) #define PGC_xen_heap PG_mask(1, 2) +/* Page needs to be scrubbed. */ +#define _PGC_need_scrub PG_shift(4) +#define PGC_need_scrub PG_mask(1, 4) /* Page is broken? */ #define _PGC_broken PG_shift(7) #define PGC_broken PG_mask(1, 7) diff -Nru xen-4.20.2+7-g1badcf5035/xen/arch/x86/cpu/amd.c xen-4.20.2+37-g61ff35323e/xen/arch/x86/cpu/amd.c --- xen-4.20.2+7-g1badcf5035/xen/arch/x86/cpu/amd.c 2025-11-25 08:37:47.000000000 +0000 +++ xen-4.20.2+37-g61ff35323e/xen/arch/x86/cpu/amd.c 2026-02-12 12:58:50.000000000 +0000 @@ -681,45 +681,6 @@ ctxt_switch_levelling(NULL); } -void amd_init_lfence(struct cpuinfo_x86 *c) -{ - uint64_t value; - - /* - * Some hardware has LFENCE dispatch serialising always enabled, - * nothing to do on that case. - */ - if (test_bit(X86_FEATURE_LFENCE_DISPATCH, c->x86_capability)) - return; - - /* - * Attempt to set lfence to be Dispatch Serialising. This MSR almost - * certainly isn't virtualised (and Xen at least will leak the real - * value in but silently discard writes), as well as being per-core - * rather than per-thread, so do a full safe read/write/readback cycle - * in the worst case. - */ - if (rdmsr_safe(MSR_AMD64_DE_CFG, value)) - /* Unable to read. Assume the safer default. */ - __clear_bit(X86_FEATURE_LFENCE_DISPATCH, - c->x86_capability); - else if (value & AMD64_DE_CFG_LFENCE_SERIALISE) - /* Already dispatch serialising. */ - __set_bit(X86_FEATURE_LFENCE_DISPATCH, - c->x86_capability); - else if (wrmsr_safe(MSR_AMD64_DE_CFG, - value | AMD64_DE_CFG_LFENCE_SERIALISE) || - rdmsr_safe(MSR_AMD64_DE_CFG, value) || - !(value & AMD64_DE_CFG_LFENCE_SERIALISE)) - /* Attempt to set failed. Assume the safer default. */ - __clear_bit(X86_FEATURE_LFENCE_DISPATCH, - c->x86_capability); - else - /* Successfully enabled! */ - __set_bit(X86_FEATURE_LFENCE_DISPATCH, - c->x86_capability); -} - /* * Refer to the AMD Speculative Store Bypass whitepaper: * https://developer.amd.com/wp-content/resources/124441_AMD64_SpeculativeStoreBypassDisable_Whitepaper_final.pdf @@ -913,76 +874,6 @@ } -void amd_check_zenbleed(void) -{ - const struct cpu_signature *sig = &this_cpu(cpu_sig); - unsigned int good_rev; - uint64_t val, old_val, chickenbit = (1 << 9); - - /* - * If we're virtualised, we can't do family/model checks safely, and - * we likely wouldn't have access to DE_CFG even if we could see a - * microcode revision. - * - * A hypervisor may hide AVX as a stopgap mitigation. We're not in a - * position to care either way. An admin doesn't want to be disabling - * AVX as a mitigation on any build of Xen with this logic present. - */ - if (cpu_has_hypervisor || boot_cpu_data.x86 != 0x17) - return; - - switch (boot_cpu_data.x86_model) { - case 0x30 ... 0x3f: good_rev = 0x0830107a; break; - case 0x60 ... 0x67: good_rev = 0x0860010b; break; - case 0x68 ... 0x6f: good_rev = 0x08608105; break; - case 0x70 ... 0x7f: good_rev = 0x08701032; break; - case 0xa0 ... 0xaf: good_rev = 0x08a00008; break; - default: - /* - * With the Fam17h check above, most parts getting here are - * Zen1. They're not affected. Assume Zen2 ones making it - * here are affected regardless of microcode version. - */ - if (is_zen1_uarch()) - return; - good_rev = ~0U; - break; - } - - rdmsrl(MSR_AMD64_DE_CFG, val); - old_val = val; - - /* - * Microcode is the preferred mitigation, in terms of performance. - * However, without microcode, this chickenbit (specific to the Zen2 - * uarch) disables Floating Point Mov-Elimination to mitigate the - * issue. - */ - val &= ~chickenbit; - if (sig->rev < good_rev) - val |= chickenbit; - - if (val == old_val) - /* Nothing to change. */ - return; - - /* - * DE_CFG is a Core-scoped MSR, and this write is racy during late - * microcode load. However, both threads calculate the new value from - * state which is shared, and unrelated to the old value, so the - * result should be consistent. - */ - wrmsrl(MSR_AMD64_DE_CFG, val); - - /* - * Inform the admin that we changed something, but don't spam, - * especially during a late microcode load. - */ - if (smp_processor_id() == 0) - printk(XENLOG_INFO "Zenbleed mitigation - using %s\n", - val & chickenbit ? "chickenbit" : "microcode"); -} - static void cf_check fam17_disable_c6(void *arg) { /* Disable C6 by clearing the CCR{0,1,2}_CC6EN bits. */ @@ -1009,6 +900,116 @@ wrmsrl(MSR_AMD_CSTATE_CFG, val & mask); } +static bool zenbleed_use_chickenbit(void) +{ + unsigned int curr_rev; + uint8_t fixed_rev; + + /* Zenbleed only affects Zen2. Nothing to do on non-Fam17h systems. */ + if ( boot_cpu_data.x86 != 0x17 ) + return false; + + curr_rev = this_cpu(cpu_sig).rev; + switch ( curr_rev >> 8 ) + { + case 0x083010: fixed_rev = 0x7a; break; + case 0x086001: fixed_rev = 0x0b; break; + case 0x086081: fixed_rev = 0x05; break; + case 0x087010: fixed_rev = 0x32; break; + case 0x08a000: fixed_rev = 0x08; break; + default: + /* + * With the Fam17h check above, most parts getting here are Zen1. + * They're not affected. Assume Zen2 ones making it here are affected + * regardless of microcode version. + */ + return is_zen2_uarch(); + } + + return (uint8_t)curr_rev >= fixed_rev; +} + +void amd_init_de_cfg(const struct cpuinfo_x86 *c) +{ + uint64_t val, new = 0; + + /* + * The MSR doesn't exist on Fam 0xf/0x11. If virtualised, we won't have + * mutable access even if we can read it. + */ + if ( c->x86 == 0xf || c->x86 == 0x11 || cpu_has_hypervisor ) + return; + + /* + * On Zen3 (Fam 0x19) and later CPUs, LFENCE is unconditionally dispatch + * serialising, and is enumerated in CPUID. + * + * On older systems, LFENCE is unconditionally dispatch serialising (when + * the MSR doesn't exist), or can be made so by setting this bit. + */ + if ( !test_bit(X86_FEATURE_LFENCE_DISPATCH, c->x86_capability) ) + new |= AMD64_DE_CFG_LFENCE_SERIALISE; + + /* + * If vulnerable to Zenbleed and not mitigated in microcode, use the + * bigger hammer. + */ + if ( zenbleed_use_chickenbit() ) + new |= (1 << 9); + + /* + * Erratum #665, doc 44739. Integer divide instructions may cause + * unpredictable behaviour. + */ + if ( c->x86 == 0x12 ) + new |= 1U << 31; + + /* Avoid reading DE_CFG if we don't intend to change anything. */ + if ( !new ) + return; + + rdmsrl(MSR_AMD64_DE_CFG, val); + + if ( (val & new) == new ) + return; + + /* + * DE_CFG is a Core-scoped MSR, and this write is racy. However, both + * threads calculate the new value from state which expected to be + * consistent across CPUs and unrelated to the old value, so the result + * should be consistent. + */ + wrmsrl(MSR_AMD64_DE_CFG, val | new); +} + +void __init amd_init_lfence_dispatch(void) +{ + struct cpuinfo_x86 *c = &boot_cpu_data; + uint64_t val; + + if ( test_bit(X86_FEATURE_LFENCE_DISPATCH, c->x86_capability) ) + /* LFENCE is forced dispatch serialising and we can't control it. */ + return; + + if ( c->x86 == 0xf || c->x86 == 0x11 ) + /* MSR doesn't exist, LFENCE is dispatch serialising. */ + goto set; + + if ( rdmsr_safe(MSR_AMD64_DE_CFG, val) ) + /* Unable to read. Assume the safer default. */ + goto clear; + + if ( val & AMD64_DE_CFG_LFENCE_SERIALISE ) + goto set; + + clear: + setup_clear_cpu_cap(X86_FEATURE_LFENCE_DISPATCH); + return; + + set: + setup_force_cpu_cap(X86_FEATURE_LFENCE_DISPATCH); +} + static void amd_check_bp_cfg(void) { uint64_t val, new = 0; @@ -1053,6 +1054,11 @@ unsigned long long value; + amd_init_de_cfg(c); + + if (c == &boot_cpu_data) + amd_init_lfence_dispatch(); /* Needs amd_init_de_cfg() */ + /* Disable TLB flush filter by setting HWCR.FFDIS on K8 * bit 6 of msr C001_0015 * @@ -1091,12 +1097,6 @@ if (c == &boot_cpu_data && !cpu_has(c, X86_FEATURE_RSTR_FP_ERR_PTRS)) setup_force_cpu_cap(X86_BUG_FPU_PTRS); - if (c->x86 == 0x0f || c->x86 == 0x11) - /* Always dispatch serialising on this hardare. */ - __set_bit(X86_FEATURE_LFENCE_DISPATCH, c->x86_capability); - else /* Implicily "== 0x10 || >= 0x12" by being 64bit. */ - amd_init_lfence(c); - amd_init_ssbd(c); if (c->x86 == 0x17) @@ -1252,15 +1252,6 @@ smp_processor_id()); wrmsrl(MSR_AMD64_LS_CFG, value | (1 << 15)); } - } else if (c->x86 == 0x12) { - rdmsrl(MSR_AMD64_DE_CFG, value); - if (!(value & (1U << 31))) { - if (c == &boot_cpu_data || opt_cpu_info) - printk_once(XENLOG_WARNING - "CPU%u: Applying workaround for erratum 665\n", - smp_processor_id()); - wrmsrl(MSR_AMD64_DE_CFG, value | (1U << 31)); - } } /* AMD CPUs do not support SYSENTER outside of legacy mode. */ @@ -1313,7 +1304,6 @@ if ((smp_processor_id() == 1) && !cpu_has(c, X86_FEATURE_ITSC)) disable_c1_ramping(); - amd_check_zenbleed(); amd_check_bp_cfg(); if (fam17_c6_disabled) diff -Nru xen-4.20.2+7-g1badcf5035/xen/arch/x86/cpu/common.c xen-4.20.2+37-g61ff35323e/xen/arch/x86/cpu/common.c --- xen-4.20.2+7-g1badcf5035/xen/arch/x86/cpu/common.c 2025-11-25 08:37:47.000000000 +0000 +++ xen-4.20.2+37-g61ff35323e/xen/arch/x86/cpu/common.c 2026-02-12 12:58:50.000000000 +0000 @@ -103,7 +103,7 @@ bool __init is_forced_cpu_cap(unsigned int cap) { - return test_bit(cap, forced_caps); + return test_bit(cap, forced_caps) || test_bit(cap, cleared_caps); } static void cf_check default_init(struct cpuinfo_x86 * c) diff -Nru xen-4.20.2+7-g1badcf5035/xen/arch/x86/cpu/cpu.h xen-4.20.2+37-g61ff35323e/xen/arch/x86/cpu/cpu.h --- xen-4.20.2+7-g1badcf5035/xen/arch/x86/cpu/cpu.h 2025-11-25 08:37:47.000000000 +0000 +++ xen-4.20.2+37-g61ff35323e/xen/arch/x86/cpu/cpu.h 2026-02-12 12:58:50.000000000 +0000 @@ -20,7 +20,8 @@ void cf_check early_init_amd(struct cpuinfo_x86 *c); void amd_log_freq(const struct cpuinfo_x86 *c); -void amd_init_lfence(struct cpuinfo_x86 *c); +void amd_init_de_cfg(const struct cpuinfo_x86 *c); +void amd_init_lfence_dispatch(void); void amd_init_ssbd(const struct cpuinfo_x86 *c); void amd_init_spectral_chicken(void); void detect_zen2_null_seg_behaviour(void); diff -Nru xen-4.20.2+7-g1badcf5035/xen/arch/x86/cpu/hygon.c xen-4.20.2+37-g61ff35323e/xen/arch/x86/cpu/hygon.c --- xen-4.20.2+7-g1badcf5035/xen/arch/x86/cpu/hygon.c 2025-11-25 08:37:47.000000000 +0000 +++ xen-4.20.2+37-g61ff35323e/xen/arch/x86/cpu/hygon.c 2026-02-12 12:58:50.000000000 +0000 @@ -31,7 +31,11 @@ { unsigned long long value; - amd_init_lfence(c); + amd_init_de_cfg(c); + + if (c == &boot_cpu_data) + amd_init_lfence_dispatch(); /* Needs amd_init_de_cfg() */ + amd_init_ssbd(c); /* Probe for NSCB on Zen2 CPUs when not virtualised */ diff -Nru xen-4.20.2+7-g1badcf5035/xen/arch/x86/cpu/microcode/amd.c xen-4.20.2+37-g61ff35323e/xen/arch/x86/cpu/microcode/amd.c --- xen-4.20.2+7-g1badcf5035/xen/arch/x86/cpu/microcode/amd.c 2025-11-25 08:37:47.000000000 +0000 +++ xen-4.20.2+37-g61ff35323e/xen/arch/x86/cpu/microcode/amd.c 2026-02-12 12:58:50.000000000 +0000 @@ -373,8 +373,6 @@ "microcode: CPU%u updated from revision %#x to %#x, date = %04x-%02x-%02x\n", cpu, old_rev, rev, patch->year, patch->month, patch->day); - amd_check_zenbleed(); - return 0; } diff -Nru xen-4.20.2+7-g1badcf5035/xen/arch/x86/domain.c xen-4.20.2+37-g61ff35323e/xen/arch/x86/domain.c --- xen-4.20.2+7-g1badcf5035/xen/arch/x86/domain.c 2025-11-25 08:37:47.000000000 +0000 +++ xen-4.20.2+37-g61ff35323e/xen/arch/x86/domain.c 2026-02-12 12:58:50.000000000 +0000 @@ -845,11 +845,7 @@ if ( d->arch.ioport_caps == NULL ) goto fail; - /* - * The shared_info machine address must fit in a 32-bit field within a - * 32-bit guest's start_info structure. Hence we specify MEMF_bits(32). - */ - if ( (d->shared_info = alloc_xenheap_pages(0, MEMF_bits(32))) == NULL ) + if ( (d->shared_info = alloc_xenheap_page()) == NULL ) goto fail; clear_page(d->shared_info); @@ -2100,33 +2096,15 @@ ctxt_switch_levelling(next); - if ( opt_ibpb_ctxt_switch && !is_idle_domain(nextd) ) - { - static DEFINE_PER_CPU(unsigned int, last); - unsigned int *last_id = &this_cpu(last); - - /* - * Squash the domid and vcpu id together for comparison - * efficiency. We could in principle stash and compare the struct - * vcpu pointer, but this risks a false alias if a domain has died - * and the same 4k page gets reused for a new vcpu. - */ - unsigned int next_id = (((unsigned int)nextd->domain_id << 16) | - (uint16_t)next->vcpu_id); - BUILD_BUG_ON(MAX_VIRT_CPUS > 0xffff); - - /* - * When scheduling from a vcpu, to idle, and back to the same vcpu - * (which might be common in a lightly loaded system, or when - * using vcpu pinning), there is no need to issue IBPB, as we are - * returning to the same security context. - */ - if ( *last_id != next_id ) - { - spec_ctrl_new_guest_context(); - *last_id = next_id; - } - } + /* + * Issue an IBPB when scheduling a different vCPU if required. + * + * IBPB clears the RSB/RAS/RAP, but that's fine as we leave this + * function via reset_stack_and_call_ind() rather than via a RET + * instruction. + */ + if ( opt_ibpb_ctxt_switch ) + spec_ctrl_new_guest_context(); /* Update the top-of-stack block with the new speculation settings. */ info->scf = diff -Nru xen-4.20.2+7-g1badcf5035/xen/arch/x86/hvm/dom0_build.c xen-4.20.2+37-g61ff35323e/xen/arch/x86/hvm/dom0_build.c --- xen-4.20.2+7-g1badcf5035/xen/arch/x86/hvm/dom0_build.c 2025-11-25 08:37:47.000000000 +0000 +++ xen-4.20.2+37-g61ff35323e/xen/arch/x86/hvm/dom0_build.c 2026-02-12 12:58:50.000000000 +0000 @@ -1014,7 +1014,7 @@ ACPI_SIG_DSDT, ACPI_SIG_FADT, ACPI_SIG_FACS, ACPI_SIG_PSDT, ACPI_SIG_SSDT, ACPI_SIG_SBST, ACPI_SIG_MCFG, ACPI_SIG_SLIC, ACPI_SIG_MSDM, ACPI_SIG_WDAT, ACPI_SIG_FPDT, ACPI_SIG_S3PT, - ACPI_SIG_VFCT, + ACPI_SIG_TCPA, ACPI_SIG_TPM2, ACPI_SIG_VFCT, }; unsigned int i; @@ -1311,7 +1311,7 @@ return 0; } -static void __hwdom_init pvh_setup_mmcfg(struct domain *d) +static void __init pvh_setup_mmcfg(struct domain *d) { unsigned int i; int rc; diff -Nru xen-4.20.2+7-g1badcf5035/xen/arch/x86/hvm/hpet.c xen-4.20.2+37-g61ff35323e/xen/arch/x86/hvm/hpet.c --- xen-4.20.2+7-g1badcf5035/xen/arch/x86/hvm/hpet.c 2025-11-25 08:37:47.000000000 +0000 +++ xen-4.20.2+37-g61ff35323e/xen/arch/x86/hvm/hpet.c 2026-02-12 12:58:50.000000000 +0000 @@ -48,6 +48,8 @@ #define timer_is_32bit(h, n) (timer_config(h, n) & HPET_TN_32BIT) #define hpet_enabled(h) ((h)->hpet.config & HPET_CFG_ENABLE) #define timer_level(h, n) (timer_config(h, n) & HPET_TN_LEVEL) +#define timer_is_legacy(h, n) \ + (((n) <= 1) && ((h)->hpet.config & HPET_CFG_LEGACY)) #define timer_int_route(h, n) MASK_EXTR(timer_config(h, n), HPET_TN_ROUTE) @@ -55,7 +57,8 @@ MASK_EXTR(timer_config(h, n), HPET_TN_INT_ROUTE_CAP) #define timer_int_route_valid(h, n) \ - ((1u << timer_int_route(h, n)) & timer_int_route_cap(h, n)) + (timer_is_legacy(h, n) || \ + ((1u << timer_int_route(h, n)) & timer_int_route_cap(h, n))) static inline uint64_t hpet_read_maincounter(HPETState *h, uint64_t guest_time) { @@ -275,7 +278,7 @@ ? (uint32_t)diff : 0; destroy_periodic_time(&h->pt[tn]); - if ( (tn <= 1) && (h->hpet.config & HPET_CFG_LEGACY) ) + if ( timer_is_legacy(h, tn) ) { /* if LegacyReplacementRoute bit is set, HPET specification requires timer0 be routed to IRQ0 in NON-APIC or IRQ2 in the I/O APIC, @@ -379,6 +382,14 @@ h->hpet.config = hpet_fixup_reg(new_val, old_val, HPET_CFG_ENABLE | HPET_CFG_LEGACY); + /* + * The first 2 channels' interrupt route values only matter when + * HPET_CFG_LEGACY is disabled. However, for simplicity's sake, always + * resanitize all channels anyway. + */ + for ( i = 0; i < HPET_TIMER_NUM; i++ ) + timer_sanitize_int_route(h, i); + if ( !(old_val & HPET_CFG_ENABLE) && (new_val & HPET_CFG_ENABLE) ) { /* Enable main counter and interrupt generation. */ diff -Nru xen-4.20.2+7-g1badcf5035/xen/arch/x86/hvm/vmsi.c xen-4.20.2+37-g61ff35323e/xen/arch/x86/hvm/vmsi.c --- xen-4.20.2+7-g1badcf5035/xen/arch/x86/hvm/vmsi.c 2025-11-25 08:37:47.000000000 +0000 +++ xen-4.20.2+37-g61ff35323e/xen/arch/x86/hvm/vmsi.c 2026-02-12 12:58:50.000000000 +0000 @@ -758,7 +758,7 @@ return; found: - if ( !atomic_dec_and_test(&entry->refcnt) ) + if ( atomic_dec_and_test(&entry->refcnt) ) del_msixtbl_entry(entry); spin_unlock_irq(&irqd->lock); diff -Nru xen-4.20.2+7-g1badcf5035/xen/arch/x86/hvm/vmx/vmx.c xen-4.20.2+37-g61ff35323e/xen/arch/x86/hvm/vmx/vmx.c --- xen-4.20.2+7-g1badcf5035/xen/arch/x86/hvm/vmx/vmx.c 2025-11-25 08:37:47.000000000 +0000 +++ xen-4.20.2+37-g61ff35323e/xen/arch/x86/hvm/vmx/vmx.c 2026-02-12 12:58:50.000000000 +0000 @@ -2979,6 +2979,7 @@ case 0x75: /* Lightning Mountain */ case 0x7a: /* Gemini Lake */ case 0x86: /* Jacobsville */ + return false; default: printk("Unrecognised CPU model %#x - assuming vulnerable to IF_PSCHANGE_MC\n", diff -Nru xen-4.20.2+7-g1badcf5035/xen/arch/x86/include/asm/mm.h xen-4.20.2+37-g61ff35323e/xen/arch/x86/include/asm/mm.h --- xen-4.20.2+7-g1badcf5035/xen/arch/x86/include/asm/mm.h 2025-11-25 08:37:47.000000000 +0000 +++ xen-4.20.2+37-g61ff35323e/xen/arch/x86/include/asm/mm.h 2026-02-12 12:58:50.000000000 +0000 @@ -83,29 +83,25 @@ #define PGC_state_offlined PG_mask(2, 6) #define PGC_state_free PG_mask(3, 6) #define page_state_is(pg, st) (((pg)->count_info&PGC_state) == PGC_state_##st) +/* Page needs to be scrubbed. */ +#define _PGC_need_scrub PG_shift(7) +#define PGC_need_scrub PG_mask(1, 7) #ifdef CONFIG_SHADOW_PAGING /* Set when a page table page has been shadowed. */ -#define _PGC_shadowed_pt PG_shift(7) -#define PGC_shadowed_pt PG_mask(1, 7) +#define _PGC_shadowed_pt PG_shift(8) +#define PGC_shadowed_pt PG_mask(1, 8) #else #define PGC_shadowed_pt 0 #endif /* Count of references to this frame. */ #if PGC_shadowed_pt -#define PGC_count_width PG_shift(7) +#define PGC_count_width PG_shift(8) #else -#define PGC_count_width PG_shift(6) +#define PGC_count_width PG_shift(7) #endif #define PGC_count_mask ((1UL< sizeof(this_cpu(trace_emulate_write_val)) ) + bytes = sizeof(this_cpu(trace_emulate_write_val)); + #if GUEST_PAGING_LEVELS == 3 if ( vaddr == this_cpu(trace_emulate_initial_va) ) memcpy(&this_cpu(trace_emulate_write_val), src, bytes); @@ -2072,13 +2075,16 @@ /* * For GUEST_PAGING_LEVELS=3 (PAE paging), guest_l1e is 64 while * guest_va is 32. Put it first to avoid padding. + * + * Note: .write_val is an arbitrary set of written bytes, possibly + * misaligned and possibly spanning the next gl1e. */ guest_l1e_t gl1e, write_val; guest_va_t va; uint32_t flags:29, emulation_count:3; } d = { .gl1e = gl1e, - .write_val.l1 = this_cpu(trace_emulate_write_val), + .write_val = this_cpu(trace_emulate_write_val), .va = va, #if GUEST_PAGING_LEVELS == 3 .emulation_count = this_cpu(trace_extra_emulation_count), @@ -2659,7 +2665,7 @@ paging_unlock(d); put_gfn(d, gfn_x(gfn)); - this_cpu(trace_emulate_write_val) = 0; + this_cpu(trace_emulate_write_val) = (guest_l1e_t){}; #if SHADOW_OPTIMIZATIONS & SHOPT_FAST_EMULATION early_emulation: diff -Nru xen-4.20.2+7-g1badcf5035/xen/arch/x86/mm/shadow/private.h xen-4.20.2+37-g61ff35323e/xen/arch/x86/mm/shadow/private.h --- xen-4.20.2+7-g1badcf5035/xen/arch/x86/mm/shadow/private.h 2025-11-25 08:37:47.000000000 +0000 +++ xen-4.20.2+37-g61ff35323e/xen/arch/x86/mm/shadow/private.h 2026-02-12 12:58:50.000000000 +0000 @@ -120,14 +120,6 @@ TRCE_SFLAG_OOS_FIXUP_EVICT, }; - -/* Size (in bytes) of a guest PTE */ -#if GUEST_PAGING_LEVELS >= 3 -# define GUEST_PTE_SIZE 8 -#else -# define GUEST_PTE_SIZE 4 -#endif - /****************************************************************************** * Auditing routines */ diff -Nru xen-4.20.2+7-g1badcf5035/xen/arch/x86/pv/domain.c xen-4.20.2+37-g61ff35323e/xen/arch/x86/pv/domain.c --- xen-4.20.2+7-g1badcf5035/xen/arch/x86/pv/domain.c 2025-11-25 08:37:47.000000000 +0000 +++ xen-4.20.2+37-g61ff35323e/xen/arch/x86/pv/domain.c 2026-02-12 12:58:50.000000000 +0000 @@ -246,6 +246,34 @@ d->arch.has_32bit_shinfo = 1; d->arch.pv.is_32bit = true; + /* + * For 32bit PV guests the shared_info machine address must fit in a 32-bit + * field within the guest's start_info structure. We might need to free + * the current page and allocate a new one that fulfills this requirement. + */ + if ( virt_to_maddr(d->shared_info) >> 32 ) + { + shared_info_t *prev = d->shared_info; + + d->shared_info = alloc_xenheap_pages(0, MEMF_bits(32)); + if ( !d->shared_info ) + { + d->shared_info = prev; + rc = -ENOMEM; + goto undo_and_fail; + } + clear_page(d->shared_info); + share_xen_page_with_guest(virt_to_page(d->shared_info), d, SHARE_rw); + /* + * Ensure all pointers to the old shared_info page are replaced. vCPUs + * below XEN_LEGACY_MAX_VCPUS may have stashed a pointer to + * shared_info->vcpu_info[id]. + */ + for_each_vcpu ( d, v ) + vcpu_info_reset(v); + put_page(virt_to_page(prev)); + } + for_each_vcpu( d, v ) { if ( (rc = setup_compat_arg_xlat(v)) || diff -Nru xen-4.20.2+7-g1badcf5035/xen/arch/x86/time.c xen-4.20.2+37-g61ff35323e/xen/arch/x86/time.c --- xen-4.20.2+7-g1badcf5035/xen/arch/x86/time.c 2025-11-25 08:37:47.000000000 +0000 +++ xen-4.20.2+37-g61ff35323e/xen/arch/x86/time.c 2026-02-12 12:58:50.000000000 +0000 @@ -1649,8 +1649,13 @@ tsc = at_tsc; else tsc = rdtsc_ordered(); - delta = tsc - t->stamp.local_tsc; - return t->stamp.local_stime + scale_delta(delta, &t->tsc_scale); + + if ( likely(tsc >= t->stamp.local_tsc) ) + delta = scale_delta(tsc - t->stamp.local_tsc, &t->tsc_scale); + else + delta = -scale_delta(t->stamp.local_tsc - tsc, &t->tsc_scale); + + return t->stamp.local_stime + delta; } s_time_t get_s_time(void) diff -Nru xen-4.20.2+7-g1badcf5035/xen/arch/x86/xen.lds.S xen-4.20.2+37-g61ff35323e/xen/arch/x86/xen.lds.S --- xen-4.20.2+7-g1badcf5035/xen/arch/x86/xen.lds.S 2025-11-25 08:37:47.000000000 +0000 +++ xen-4.20.2+37-g61ff35323e/xen/arch/x86/xen.lds.S 2026-02-12 12:58:50.000000000 +0000 @@ -340,6 +340,25 @@ *(.reloc) __base_relocs_end = .; } + + /* + * When efi/relocs-dummy.o is linked into the first-pass binary, the two + * symbols supplied by it (for ./Makefile to use) may appear in the symbol + * table (GNU ld 2.37 and newer strip them, for not being properly + * representable). No such symbols would appear during subsequent passes. + * At least some of those older ld versions emit VIRT_START as absolute, but + * ALT_START as if it was part of .text. The symbols tool generating our + * own symbol table would hence not ignore it when passed --all-symbols, + * leading to the 2nd pass binary having one more symbol than the final (3rd + * pass) one. + * + * Arrange for both (just in case) symbols to always be there, and to always + * be absolute (zero). + */ + PROVIDE(VIRT_START = 0); + PROVIDE(ALT_START = 0); + VIRT_START &= 0; + ALT_START &= 0; #elif defined(XEN_BUILD_EFI) /* * Due to the way EFI support is currently implemented, these two symbols diff -Nru xen-4.20.2+7-g1badcf5035/xen/common/domain.c xen-4.20.2+37-g61ff35323e/xen/common/domain.c --- xen-4.20.2+7-g1badcf5035/xen/common/domain.c 2025-11-25 08:37:47.000000000 +0000 +++ xen-4.20.2+37-g61ff35323e/xen/common/domain.c 2026-02-12 12:58:50.000000000 +0000 @@ -177,7 +177,7 @@ spin_unlock(&d->shutdown_lock); } -static void vcpu_info_reset(struct vcpu *v) +void vcpu_info_reset(struct vcpu *v) { struct domain *d = v->domain; @@ -446,6 +446,18 @@ } custom_param("dom0", parse_dom0_param); +static void domain_pending_scrub_free(struct domain *d) +{ + rspin_lock(&d->page_alloc_lock); + if ( d->pending_scrub ) + { + FREE_DOMHEAP_PAGES(d->pending_scrub, d->pending_scrub_order); + d->pending_scrub_order = 0; + d->pending_scrub_index = 0; + } + rspin_unlock(&d->page_alloc_lock); +} + /* * Release resources held by a domain. There may or may not be live * references to the domain, and it may or may not be fully constructed. @@ -505,6 +517,9 @@ case PROG_none: BUILD_BUG_ON(PROG_none != 0); + /* Trivial teardown, not long-running enough to need a preemption check. */ + domain_pending_scrub_free(d); + PROGRESS(gnttab_mappings): rc = gnttab_release_mappings(d); if ( rc ) @@ -547,6 +562,7 @@ { BUG_ON(!d->is_dying); BUG_ON(atomic_read(&d->refcnt) != DOMAIN_DESTROYED); + ASSERT(!d->pending_scrub); xfree(d->pbuf); @@ -1478,6 +1494,15 @@ */ if ( new == 0 && !d->creation_finished ) { + if ( d->pending_scrub ) + { + printk(XENLOG_ERR + "%pd: cannot be started with pending unscrubbed pages, destroying\n", + d); + domain_crash(d); + domain_pending_scrub_free(d); + return -EBUSY; + } d->creation_finished = true; arch_domain_creation_finished(d); } diff -Nru xen-4.20.2+7-g1badcf5035/xen/common/kexec.c xen-4.20.2+37-g61ff35323e/xen/common/kexec.c --- xen-4.20.2+7-g1badcf5035/xen/common/kexec.c 2025-11-25 08:37:47.000000000 +0000 +++ xen-4.20.2+37-g61ff35323e/xen/common/kexec.c 2026-02-12 12:58:50.000000000 +0000 @@ -1217,7 +1217,7 @@ XEN_GUEST_HANDLE_PARAM(void) uarg, bool compat) { - int ret = -EINVAL; + int ret; ret = xsm_kexec(XSM_PRIV); if ( ret ) @@ -1258,6 +1258,10 @@ case KEXEC_CMD_kexec_status: ret = kexec_status(uarg); break; + + default: + ret = -EOPNOTSUPP; + break; } clear_bit(KEXEC_FLAG_IN_HYPERCALL, &kexec_flags); diff -Nru xen-4.20.2+7-g1badcf5035/xen/common/memory.c xen-4.20.2+37-g61ff35323e/xen/common/memory.c --- xen-4.20.2+7-g1badcf5035/xen/common/memory.c 2025-11-25 08:37:47.000000000 +0000 +++ xen-4.20.2+37-g61ff35323e/xen/common/memory.c 2026-02-12 12:58:50.000000000 +0000 @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -156,6 +157,73 @@ a->nr_done = i; } +/* + * Temporary storage for a domain assigned page that's not been fully scrubbed. + * Stored pages must be domheap ones. + * + * The stashed page can be freed at any time by Xen, the caller must pass the + * order and NUMA node requirement to the fetch function to ensure the + * currently stashed page matches it's requirements. + */ +static void stash_allocation(struct domain *d, struct page_info *page, + unsigned int order, unsigned int scrub_index) +{ + rspin_lock(&d->page_alloc_lock); + + /* + * Drop the passed page in preference for the already stashed one. This + * interface is designed to be used for single-threaded domain creation. + */ + if ( d->pending_scrub || d->is_dying ) + free_domheap_pages(page, order); + else + { + d->pending_scrub_index = scrub_index; + d->pending_scrub_order = order; + d->pending_scrub = page; + } + + rspin_unlock(&d->page_alloc_lock); +} + +static struct page_info *get_stashed_allocation(struct domain *d, + unsigned int order, + nodeid_t node, + unsigned int *scrub_index) +{ + struct page_info *page = NULL; + + rspin_lock(&d->page_alloc_lock); + + /* + * If there's a pending page to scrub check if it satisfies the current + * request. If it doesn't free it and return NULL. + */ + if ( d->pending_scrub ) + { + if ( d->pending_scrub_order == order && + (node == NUMA_NO_NODE || node == page_to_nid(d->pending_scrub)) ) + { + page = d->pending_scrub; + *scrub_index = d->pending_scrub_index; + } + else + free_domheap_pages(d->pending_scrub, d->pending_scrub_order); + + /* + * The caller now owns the page or it has been freed, clear stashed + * information. Prevent concurrent usages of get_stashed_allocation() + * from returning the same page to different contexts. + */ + d->pending_scrub_index = 0; + d->pending_scrub_order = 0; + d->pending_scrub = NULL; + } + + rspin_unlock(&d->page_alloc_lock); + return page; +} + static void populate_physmap(struct memop_args *a) { struct page_info *page; @@ -272,7 +340,19 @@ } else { - page = alloc_domheap_pages(d, a->extent_order, a->memflags); + unsigned int scrub_start = 0; + unsigned int memflags = + a->memflags | (d->creation_finished ? 0 + : MEMF_no_scrub); + nodeid_t node = + (a->memflags & MEMF_exact_node) ? MEMF_get_node(a->memflags) + : NUMA_NO_NODE; + + page = get_stashed_allocation(d, a->extent_order, node, + &scrub_start); + + if ( !page ) + page = alloc_domheap_pages(d, a->extent_order, memflags); if ( unlikely(!page) ) { @@ -283,6 +363,30 @@ goto out; } + if ( memflags & MEMF_no_scrub ) + { + unsigned int dirty_cnt = 0; + + /* Check if there's anything to scrub. */ + for ( j = scrub_start; j < (1U << a->extent_order); j++ ) + { + if ( !test_and_clear_bit(_PGC_need_scrub, + &page[j].count_info) ) + continue; + + scrub_one_page(&page[j]); + + if ( (j + 1) != (1U << a->extent_order) && + !(++dirty_cnt & 0xff) && + hypercall_preempt_check() ) + { + a->preempted = 1; + stash_allocation(d, page, a->extent_order, j + 1); + goto out; + } + } + } + if ( unlikely(a->memflags & MEMF_no_tlbflush) ) { for ( j = 0; j < (1U << a->extent_order); j++ ) @@ -1658,6 +1762,9 @@ break; case XENMEM_claim_pages: + if ( llc_coloring_enabled ) + return -EOPNOTSUPP; + if ( unlikely(start_extent) ) return -EINVAL; @@ -1679,6 +1786,9 @@ rc = xsm_claim_pages(XSM_PRIV, d); + if ( !rc && d->is_dying ) + rc = -EINVAL; + if ( !rc ) rc = domain_set_outstanding_pages(d, reservation.nr_extents); diff -Nru xen-4.20.2+7-g1badcf5035/xen/common/page_alloc.c xen-4.20.2+37-g61ff35323e/xen/common/page_alloc.c --- xen-4.20.2+7-g1badcf5035/xen/common/page_alloc.c 2025-11-25 08:37:47.000000000 +0000 +++ xen-4.20.2+37-g61ff35323e/xen/common/page_alloc.c 2026-02-12 12:58:50.000000000 +0000 @@ -169,7 +169,7 @@ /* * Flags that are preserved in assign_pages() (and only there) */ -#define PGC_preserved (PGC_extra | PGC_static | PGC_colored) +#define PGC_preserved (PGC_extra | PGC_static | PGC_colored | PGC_need_scrub) #ifndef PGT_TYPE_INFO_INITIALIZER #define PGT_TYPE_INFO_INITIALIZER 0 @@ -490,34 +490,9 @@ unsigned long domain_adjust_tot_pages(struct domain *d, long pages) { - long dom_before, dom_after, dom_claimed, sys_before, sys_after; - ASSERT(rspin_is_locked(&d->page_alloc_lock)); d->tot_pages += pages; - /* - * can test d->claimed_pages race-free because it can only change - * if d->page_alloc_lock and heap_lock are both held, see also - * domain_set_outstanding_pages below - */ - if ( !d->outstanding_pages ) - goto out; - - spin_lock(&heap_lock); - /* adjust domain outstanding pages; may not go negative */ - dom_before = d->outstanding_pages; - dom_after = dom_before - pages; - BUG_ON(dom_before < 0); - dom_claimed = dom_after < 0 ? 0 : dom_after; - d->outstanding_pages = dom_claimed; - /* flag accounting bug if system outstanding_claims would go negative */ - sys_before = outstanding_claims; - sys_after = sys_before - (dom_before - dom_claimed); - BUG_ON(sys_after < 0); - outstanding_claims = sys_after; - spin_unlock(&heap_lock); - -out: return d->tot_pages; } @@ -527,9 +502,10 @@ unsigned long claim, avail_pages; /* - * take the domain's page_alloc_lock, else all d->tot_page adjustments - * must always take the global heap_lock rather than only in the much - * rarer case that d->outstanding_pages is non-zero + * Two locks are needed here: + * - d->page_alloc_lock: protects accesses to d->{tot,max,extra}_pages. + * - heap_lock: protects accesses to d->outstanding_pages, total_avail_pages + * and outstanding_claims. */ nrspin_lock(&d->page_alloc_lock); spin_lock(&heap_lock); @@ -788,7 +764,7 @@ #endif #define SCRUB_BYTE_PATTERN (SCRUB_PATTERN & 0xff) -static void scrub_one_page(const struct page_info *pg) +void scrub_one_page(const struct page_info *pg) { if ( unlikely(pg->count_info & PGC_broken) ) return; @@ -1044,6 +1020,30 @@ total_avail_pages -= request; ASSERT(total_avail_pages >= 0); + if ( d && d->outstanding_pages && !(memflags & MEMF_no_refcount) ) + { + /* + * Adjust claims in the same locked region where total_avail_pages is + * adjusted, not doing so would lead to a window where the amount of + * free memory (avail - claimed) would be incorrect. + * + * Note that by adjusting the claimed amount here it's possible for + * pages to fail to be assigned to the claiming domain while already + * having been subtracted from d->outstanding_pages. Such claimed + * amount is then lost, as the pages that fail to be assigned to the + * domain are freed without replenishing the claim. This is fine given + * claims are only to be used during physmap population as part of + * domain build, and any failure in assign_pages() there will result in + * the domain being destroyed before creation is finished. Losing part + * of the claim makes no difference. + */ + unsigned long outstanding = min(d->outstanding_pages + 0UL, request); + + BUG_ON(outstanding > outstanding_claims); + outstanding_claims -= outstanding; + d->outstanding_pages -= outstanding; + } + check_low_mem_virq(); if ( d != NULL ) diff -Nru xen-4.20.2+7-g1badcf5035/xen/drivers/passthrough/pci.c xen-4.20.2+37-g61ff35323e/xen/drivers/passthrough/pci.c --- xen-4.20.2+7-g1badcf5035/xen/drivers/passthrough/pci.c 2025-11-25 08:37:47.000000000 +0000 +++ xen-4.20.2+37-g61ff35323e/xen/drivers/passthrough/pci.c 2026-02-12 12:58:50.000000000 +0000 @@ -357,6 +357,7 @@ unsigned long flags; case DEV_TYPE_PCIe2PCI_BRIDGE: + case DEV_TYPE_PCI2PCIe_BRIDGE: case DEV_TYPE_LEGACY_PCI_BRIDGE: sec_bus = pci_conf_read8(pdev->sbdf, PCI_SECONDARY_BUS); sub_bus = pci_conf_read8(pdev->sbdf, PCI_SUBORDINATE_BUS); @@ -441,6 +442,7 @@ unsigned long flags; case DEV_TYPE_PCIe2PCI_BRIDGE: + case DEV_TYPE_PCI2PCIe_BRIDGE: case DEV_TYPE_LEGACY_PCI_BRIDGE: sec_bus = pci_conf_read8(pdev->sbdf, PCI_SECONDARY_BUS); sub_bus = pci_conf_read8(pdev->sbdf, PCI_SUBORDINATE_BUS); diff -Nru xen-4.20.2+7-g1badcf5035/xen/include/acpi/actbl3.h xen-4.20.2+37-g61ff35323e/xen/include/acpi/actbl3.h --- xen-4.20.2+7-g1badcf5035/xen/include/acpi/actbl3.h 2025-11-25 08:37:47.000000000 +0000 +++ xen-4.20.2+37-g61ff35323e/xen/include/acpi/actbl3.h 2026-02-12 12:58:50.000000000 +0000 @@ -78,8 +78,10 @@ #define ACPI_SIG_CSRT "CSRT" /* Core System Resources Table */ #define ACPI_SIG_MATR "MATR" /* Memory Address Translation Table */ #define ACPI_SIG_MSDM "MSDM" /* Microsoft Data Management Table */ -#define ACPI_SIG_WPBT "WPBT" /* Windows Platform Binary Table */ +#define ACPI_SIG_TCPA "TCPA" /* Trusted Computing Platform Alliance table */ +#define ACPI_SIG_TPM2 "TPM2" /* Trusted Platform Module 2.0 H/W interface table */ #define ACPI_SIG_VFCT "VFCT" /* AMD Video BIOS */ +#define ACPI_SIG_WPBT "WPBT" /* Windows Platform Binary Table */ /* * All tables must be byte-packed to match the ACPI specification, since diff -Nru xen-4.20.2+7-g1badcf5035/xen/include/xen/domain.h xen-4.20.2+37-g61ff35323e/xen/include/xen/domain.h --- xen-4.20.2+7-g1badcf5035/xen/include/xen/domain.h 2025-11-25 08:37:47.000000000 +0000 +++ xen-4.20.2+37-g61ff35323e/xen/include/xen/domain.h 2026-02-12 12:58:50.000000000 +0000 @@ -83,6 +83,8 @@ int arch_vcpu_create(struct vcpu *v); void arch_vcpu_destroy(struct vcpu *v); +void vcpu_info_reset(struct vcpu *v); + int map_guest_area(struct vcpu *v, paddr_t gaddr, unsigned int size, struct guest_area *area, void (*populate)(void *dst, struct vcpu *v)); diff -Nru xen-4.20.2+7-g1badcf5035/xen/include/xen/mm.h xen-4.20.2+37-g61ff35323e/xen/include/xen/mm.h --- xen-4.20.2+7-g1badcf5035/xen/include/xen/mm.h 2025-11-25 08:37:47.000000000 +0000 +++ xen-4.20.2+37-g61ff35323e/xen/include/xen/mm.h 2026-02-12 12:58:50.000000000 +0000 @@ -144,6 +144,17 @@ unsigned long avail_node_heap_pages(unsigned int nodeid); #define alloc_domheap_page(d,f) (alloc_domheap_pages(d,0,f)) #define free_domheap_page(p) (free_domheap_pages(p,0)) + +/* Free an allocation, and zero the pointer to it. */ +#define FREE_DOMHEAP_PAGES(p, o) do { \ + void *_ptr_ = (p); \ + (p) = NULL; \ + free_domheap_pages(_ptr_, o); \ +} while ( false ) +#define FREE_DOMHEAP_PAGE(p) FREE_DOMHEAP_PAGES(p, 0) + +void scrub_one_page(const struct page_info *pg); + unsigned int online_page(mfn_t mfn, uint32_t *status); int offline_page(mfn_t mfn, int broken, uint32_t *status); int query_page_offline(mfn_t mfn, uint32_t *status); diff -Nru xen-4.20.2+7-g1badcf5035/xen/include/xen/sched.h xen-4.20.2+37-g61ff35323e/xen/include/xen/sched.h --- xen-4.20.2+7-g1badcf5035/xen/include/xen/sched.h 2025-11-25 08:37:47.000000000 +0000 +++ xen-4.20.2+37-g61ff35323e/xen/include/xen/sched.h 2026-02-12 12:58:50.000000000 +0000 @@ -396,7 +396,8 @@ unsigned int tot_pages; unsigned int xenheap_pages; /* pages allocated from Xen heap */ - unsigned int outstanding_pages; /* pages claimed but not possessed */ + /* Pages claimed but not possessed, protected by global heap_lock. */ + unsigned int outstanding_pages; unsigned int max_pages; /* maximum value for domain_tot_pages() */ unsigned int extra_pages; /* pages not included in domain_tot_pages() */ @@ -645,6 +646,11 @@ unsigned int num_llc_colors; const unsigned int *llc_colors; #endif + + /* Pointer to allocated domheap page that possibly needs scrubbing. */ + struct page_info *pending_scrub; + unsigned int pending_scrub_order; + unsigned int pending_scrub_index; }; static inline struct page_list_head *page_to_list( diff -Nru xen-4.20.2+7-g1badcf5035/xen/xsm/flask/ss/policydb.c xen-4.20.2+37-g61ff35323e/xen/xsm/flask/ss/policydb.c --- xen-4.20.2+7-g1badcf5035/xen/xsm/flask/ss/policydb.c 2025-11-25 08:37:47.000000000 +0000 +++ xen-4.20.2+37-g61ff35323e/xen/xsm/flask/ss/policydb.c 2026-02-12 12:58:50.000000000 +0000 @@ -1272,7 +1272,10 @@ if ( ver >= POLICYDB_VERSION_BOUNDARY ) rc = next_entry(buf, fp, sizeof(buf[0]) * 3); else + { rc = next_entry(buf, fp, sizeof(buf[0]) * 2); + buf[2] = cpu_to_le32(0); /* gcc14 onwards */ + } if ( rc < 0 ) goto bad; @@ -1343,7 +1346,10 @@ if ( ver >= POLICYDB_VERSION_BOUNDARY ) rc = next_entry(buf, fp, sizeof(buf[0]) * 4); else + { rc = next_entry(buf, fp, sizeof(buf[0]) * 3); + buf[3] = cpu_to_le32(0); /* gcc14 onwards */ + } if ( rc < 0 ) goto bad; @@ -1437,7 +1443,10 @@ if ( ver >= POLICYDB_VERSION_BOUNDARY ) rc = next_entry(buf, fp, sizeof(buf[0]) * 3); else + { rc = next_entry(buf, fp, sizeof(buf[0]) * 2); + buf[2] = cpu_to_le32(0); /* gcc14 onwards */ + } if ( rc < 0 ) goto bad;